Cycles: Use dedicated debug passes for traversed nodes and intersection tests
[blender-staging.git] / intern / cycles / kernel / kernel_path.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifdef __OSL__
18 #  include "osl_shader.h"
19 #endif
20
21 #include "kernel_random.h"
22 #include "kernel_projection.h"
23 #include "kernel_montecarlo.h"
24 #include "kernel_differential.h"
25 #include "kernel_camera.h"
26
27 #include "geom/geom.h"
28 #include "bvh/bvh.h"
29
30 #include "kernel_accumulate.h"
31 #include "kernel_shader.h"
32 #include "kernel_light.h"
33 #include "kernel_passes.h"
34
35 #ifdef __SUBSURFACE__
36 #  include "kernel_subsurface.h"
37 #endif
38
39 #ifdef __VOLUME__
40 #  include "kernel_volume.h"
41 #endif
42
43 #include "kernel_path_state.h"
44 #include "kernel_shadow.h"
45 #include "kernel_emission.h"
46 #include "kernel_path_common.h"
47 #include "kernel_path_surface.h"
48 #include "kernel_path_volume.h"
49
50 #ifdef __KERNEL_DEBUG__
51 #  include "kernel_debug.h"
52 #endif
53
54 CCL_NAMESPACE_BEGIN
55
56 ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
57                                         ShaderData *sd,
58                                         ShaderData *emission_sd,
59                                         PathRadiance *L,
60                                         PathState *state,
61                                         RNG *rng,
62                                         float3 throughput,
63                                         float3 ao_alpha)
64 {
65         /* todo: solve correlation */
66         float bsdf_u, bsdf_v;
67
68         path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
69
70         float ao_factor = kernel_data.background.ao_factor;
71         float3 ao_N;
72         float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
73         float3 ao_D;
74         float ao_pdf;
75
76         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
77
78         if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
79                 Ray light_ray;
80                 float3 ao_shadow;
81
82                 light_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
83                 light_ray.D = ao_D;
84                 light_ray.t = kernel_data.background.ao_distance;
85 #ifdef __OBJECT_MOTION__
86                 light_ray.time = ccl_fetch(sd, time);
87 #endif  /* __OBJECT_MOTION__ */
88                 light_ray.dP = ccl_fetch(sd, dP);
89                 light_ray.dD = differential3_zero();
90
91                 if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) {
92                         path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
93                 }
94         }
95 }
96
97 ccl_device void kernel_path_indirect(KernelGlobals *kg,
98                                      ShaderData *sd,
99                                      ShaderData *emission_sd,
100                                      RNG *rng,
101                                      Ray *ray,
102                                      float3 throughput,
103                                      int num_samples,
104                                      PathState *state,
105                                      PathRadiance *L)
106 {
107         /* path iteration */
108         for(;;) {
109                 /* intersect scene */
110                 Intersection isect;
111                 uint visibility = path_state_ray_visibility(kg, state);
112                 bool hit = scene_intersect(kg,
113                                            *ray,
114                                            visibility,
115                                            &isect,
116                                            NULL,
117                                            0.0f, 0.0f);
118
119 #ifdef __LAMP_MIS__
120                 if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
121                         /* ray starting from previous non-transparent bounce */
122                         Ray light_ray;
123
124                         light_ray.P = ray->P - state->ray_t*ray->D;
125                         state->ray_t += isect.t;
126                         light_ray.D = ray->D;
127                         light_ray.t = state->ray_t;
128                         light_ray.time = ray->time;
129                         light_ray.dD = ray->dD;
130                         light_ray.dP = ray->dP;
131
132                         /* intersect with lamp */
133                         float3 emission;
134                         if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission)) {
135                                 path_radiance_accum_emission(L,
136                                                              throughput,
137                                                              emission,
138                                                              state->bounce);
139                         }
140                 }
141 #endif  /* __LAMP_MIS__ */
142
143 #ifdef __VOLUME__
144                 /* Sanitize volume stack. */
145                 if(!hit) {
146                         kernel_volume_clean_stack(kg, state->volume_stack);
147                 }
148                 /* volume attenuation, emission, scatter */
149                 if(state->volume_stack[0].shader != SHADER_NONE) {
150                         Ray volume_ray = *ray;
151                         volume_ray.t = (hit)? isect.t: FLT_MAX;
152
153                         bool heterogeneous =
154                                 volume_stack_is_heterogeneous(kg,
155                                                               state->volume_stack);
156
157 #  ifdef __VOLUME_DECOUPLED__
158                         int sampling_method =
159                                 volume_stack_sampling_method(kg,
160                                                              state->volume_stack);
161                         bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, false, sampling_method);
162
163                         if(decoupled) {
164                                 /* cache steps along volume for repeated sampling */
165                                 VolumeSegment volume_segment;
166
167                                 shader_setup_from_volume(kg,
168                                                          sd,
169                                                          &volume_ray);
170                                 kernel_volume_decoupled_record(kg,
171                                                                state,
172                                                                &volume_ray,
173                                                                sd,
174                                                                &volume_segment,
175                                                                heterogeneous);
176
177                                 volume_segment.sampling_method = sampling_method;
178
179                                 /* emission */
180                                 if(volume_segment.closure_flag & SD_EMISSION) {
181                                         path_radiance_accum_emission(L,
182                                                                      throughput,
183                                                                      volume_segment.accum_emission,
184                                                                      state->bounce);
185                                 }
186
187                                 /* scattering */
188                                 VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
189
190                                 if(volume_segment.closure_flag & SD_SCATTER) {
191                                         int all = kernel_data.integrator.sample_all_lights_indirect;
192
193                                         /* direct light sampling */
194                                         kernel_branched_path_volume_connect_light(kg,
195                                                                                   rng,
196                                                                                   sd,
197                                                                                   emission_sd,
198                                                                                   throughput,
199                                                                                   state,
200                                                                                   L,
201                                                                                   all,
202                                                                                   &volume_ray,
203                                                                                   &volume_segment);
204
205                                         /* indirect sample. if we use distance sampling and take just
206                                          * one sample for direct and indirect light, we could share
207                                          * this computation, but makes code a bit complex */
208                                         float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
209                                         float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
210
211                                         result = kernel_volume_decoupled_scatter(kg,
212                                                                                  state,
213                                                                                  &volume_ray,
214                                                                                  sd,
215                                                                                  &throughput,
216                                                                                  rphase,
217                                                                                  rscatter,
218                                                                                  &volume_segment,
219                                                                                  NULL,
220                                                                                  true);
221                                 }
222
223                                 /* free cached steps */
224                                 kernel_volume_decoupled_free(kg, &volume_segment);
225
226                                 if(result == VOLUME_PATH_SCATTERED) {
227                                         if(kernel_path_volume_bounce(kg,
228                                                                      rng,
229                                                                      sd,
230                                                                      &throughput,
231                                                                      state,
232                                                                      L,
233                                                                      ray))
234                                         {
235                                                 continue;
236                                         }
237                                         else {
238                                                 break;
239                                         }
240                                 }
241                                 else {
242                                         throughput *= volume_segment.accum_transmittance;
243                                 }
244                         }
245                         else
246 #  endif  /* __VOLUME_DECOUPLED__ */
247                         {
248                                 /* integrate along volume segment with distance sampling */
249                                 VolumeIntegrateResult result = kernel_volume_integrate(
250                                         kg, state, sd, &volume_ray, L, &throughput, rng, heterogeneous);
251
252 #  ifdef __VOLUME_SCATTER__
253                                 if(result == VOLUME_PATH_SCATTERED) {
254                                         /* direct lighting */
255                                         kernel_path_volume_connect_light(kg,
256                                                                          rng,
257                                                                          sd,
258                                                                          emission_sd,
259                                                                          throughput,
260                                                                          state,
261                                                                          L);
262
263                                         /* indirect light bounce */
264                                         if(kernel_path_volume_bounce(kg,
265                                                                      rng,
266                                                                      sd,
267                                                                      &throughput,
268                                                                      state,
269                                                                      L,
270                                                                      ray))
271                                         {
272                                                 continue;
273                                         }
274                                         else {
275                                                 break;
276                                         }
277                                 }
278 #  endif  /* __VOLUME_SCATTER__ */
279                         }
280                 }
281 #endif  /* __VOLUME__ */
282
283                 if(!hit) {
284 #ifdef __BACKGROUND__
285                         /* sample background shader */
286                         float3 L_background = indirect_background(kg, emission_sd, state, ray);
287                         path_radiance_accum_background(L,
288                                                        throughput,
289                                                        L_background,
290                                                        state->bounce);
291 #endif  /* __BACKGROUND__ */
292
293                         break;
294                 }
295
296                 /* setup shading */
297                 shader_setup_from_ray(kg,
298                                       sd,
299                                       &isect,
300                                       ray);
301                 float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
302                 shader_eval_surface(kg, sd, rng, state, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT);
303 #ifdef __BRANCHED_PATH__
304                 shader_merge_closures(sd);
305 #endif  /* __BRANCHED_PATH__ */
306
307                 /* blurring of bsdf after bounces, for rays that have a small likelihood
308                  * of following this particular path (diffuse, rough glossy) */
309                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
310                         float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
311
312                         if(blur_pdf < 1.0f) {
313                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
314                                 shader_bsdf_blur(kg, sd, blur_roughness);
315                         }
316                 }
317
318 #ifdef __EMISSION__
319                 /* emission */
320                 if(sd->flag & SD_EMISSION) {
321                         float3 emission = indirect_primitive_emission(kg,
322                                                                       sd,
323                                                                       isect.t,
324                                                                       state->flag,
325                                                                       state->ray_pdf);
326                         path_radiance_accum_emission(L, throughput, emission, state->bounce);
327                 }
328 #endif  /* __EMISSION__ */
329
330                 /* path termination. this is a strange place to put the termination, it's
331                  * mainly due to the mixed in MIS that we use. gives too many unneeded
332                  * shader evaluations, only need emission if we are going to terminate */
333                 float probability =
334                         path_state_terminate_probability(kg,
335                                                          state,
336                                                          throughput*num_samples);
337
338                 if(probability == 0.0f) {
339                         break;
340                 }
341                 else if(probability != 1.0f) {
342                         float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
343
344                         if(terminate >= probability)
345                                 break;
346
347                         throughput /= probability;
348                 }
349
350 #ifdef __AO__
351                 /* ambient occlusion */
352                 if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
353                         kernel_path_ao(kg, sd, emission_sd, L, state, rng, throughput, make_float3(0.0f, 0.0f, 0.0f));
354                 }
355 #endif  /* __AO__ */
356
357 #ifdef __SUBSURFACE__
358                 /* bssrdf scatter to a different location on the same object, replacing
359                  * the closures with a diffuse BSDF */
360                 if(sd->flag & SD_BSSRDF) {
361                         float bssrdf_probability;
362                         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
363
364                         /* modify throughput for picking bssrdf or bsdf */
365                         throughput *= bssrdf_probability;
366
367                         /* do bssrdf scatter step if we picked a bssrdf closure */
368                         if(sc) {
369                                 uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
370
371                                 float bssrdf_u, bssrdf_v;
372                                 path_state_rng_2D(kg,
373                                                   rng,
374                                                   state,
375                                                   PRNG_BSDF_U,
376                                                   &bssrdf_u, &bssrdf_v);
377                                 subsurface_scatter_step(kg,
378                                                         sd,
379                                                         state,
380                                                         state->flag,
381                                                         sc,
382                                                         &lcg_state,
383                                                         bssrdf_u, bssrdf_v,
384                                                         false);
385                         }
386                 }
387 #endif  /* __SUBSURFACE__ */
388
389 #if defined(__EMISSION__) && defined(__BRANCHED_PATH__)
390                 if(kernel_data.integrator.use_direct_light) {
391                         int all = kernel_data.integrator.sample_all_lights_indirect;
392                         kernel_branched_path_surface_connect_light(kg,
393                                                                    rng,
394                                                                    sd,
395                                                                    emission_sd,
396                                                                    state,
397                                                                    throughput,
398                                                                    1.0f,
399                                                                    L,
400                                                                    all);
401                 }
402 #endif  /* defined(__EMISSION__) && defined(__BRANCHED_PATH__) */
403
404                 if(!kernel_path_surface_bounce(kg, rng, sd, &throughput, state, L, ray))
405                         break;
406         }
407 }
408
409 #ifdef __SUBSURFACE__
410 #  ifndef __KERNEL_CUDA__
411 ccl_device
412 #  else
413 ccl_device_inline
414 #  endif
415 bool kernel_path_subsurface_scatter(
416         KernelGlobals *kg,
417         ShaderData *sd,
418         ShaderData *emission_sd,
419         PathRadiance *L,
420         PathState *state,
421         RNG *rng,
422         Ray *ray,
423         float3 *throughput,
424         SubsurfaceIndirectRays *ss_indirect)
425 {
426         float bssrdf_probability;
427         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
428
429         /* modify throughput for picking bssrdf or bsdf */
430         *throughput *= bssrdf_probability;
431
432         /* do bssrdf scatter step if we picked a bssrdf closure */
433         if(sc) {
434                 /* We should never have two consecutive BSSRDF bounces,
435                  * the second one should be converted to a diffuse BSDF to
436                  * avoid this.
437                  */
438                 kernel_assert(!ss_indirect->tracing);
439
440                 uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
441
442                 SubsurfaceIntersection ss_isect;
443                 float bssrdf_u, bssrdf_v;
444                 path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
445                 int num_hits = subsurface_scatter_multi_intersect(kg,
446                                                                   &ss_isect,
447                                                                   sd,
448                                                                   sc,
449                                                                   &lcg_state,
450                                                                   bssrdf_u, bssrdf_v,
451                                                                   false);
452 #  ifdef __VOLUME__
453                 ss_indirect->need_update_volume_stack =
454                         kernel_data.integrator.use_volumes &&
455                         ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME;
456 #  endif  /* __VOLUME__ */
457
458                 /* compute lighting with the BSDF closure */
459                 for(int hit = 0; hit < num_hits; hit++) {
460                         /* NOTE: We reuse the existing ShaderData, we assume the path
461                          * integration loop stops when this function returns true.
462                          */
463                         subsurface_scatter_multi_setup(kg,
464                                                        &ss_isect,
465                                                        hit,
466                                                        sd,
467                                                        state,
468                                                        state->flag,
469                                                        sc,
470                                                        false);
471
472                         PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
473                         Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
474                         float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
475                         PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays];
476
477                         *hit_state = *state;
478                         *hit_ray = *ray;
479                         *hit_tp = *throughput;
480
481                         hit_state->rng_offset += PRNG_BOUNCE_NUM;
482
483                         path_radiance_init(hit_L, kernel_data.film.use_light_pass);
484                         hit_L->direct_throughput = L->direct_throughput;
485                         path_radiance_copy_indirect(hit_L, L);
486
487                         kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L);
488
489                         if(kernel_path_surface_bounce(kg,
490                                                       rng,
491                                                       sd,
492                                                       hit_tp,
493                                                       hit_state,
494                                                       hit_L,
495                                                       hit_ray))
496                         {
497 #  ifdef __LAMP_MIS__
498                                 hit_state->ray_t = 0.0f;
499 #  endif  /* __LAMP_MIS__ */
500
501 #  ifdef __VOLUME__
502                                 if(ss_indirect->need_update_volume_stack) {
503                                         Ray volume_ray = *ray;
504                                         /* Setup ray from previous surface point to the new one. */
505                                         volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
506                                                                      &volume_ray.t);
507
508                                         kernel_volume_stack_update_for_subsurface(
509                                             kg,
510                                             emission_sd,
511                                             &volume_ray,
512                                             hit_state->volume_stack);
513                                 }
514 #  endif  /* __VOLUME__ */
515                                 path_radiance_reset_indirect(L);
516                                 ss_indirect->num_rays++;
517                         }
518                         else {
519                                 path_radiance_accum_sample(L, hit_L, 1);
520                         }
521                 }
522                 return true;
523         }
524         return false;
525 }
526
527 ccl_device_inline void kernel_path_subsurface_init_indirect(
528         SubsurfaceIndirectRays *ss_indirect)
529 {
530         ss_indirect->tracing = false;
531         ss_indirect->num_rays = 0;
532 }
533
534 ccl_device void kernel_path_subsurface_accum_indirect(
535         SubsurfaceIndirectRays *ss_indirect,
536         PathRadiance *L)
537 {
538         if(ss_indirect->tracing) {
539                 path_radiance_sum_indirect(L);
540                 path_radiance_accum_sample(&ss_indirect->direct_L, L, 1);
541                 if(ss_indirect->num_rays == 0) {
542                         *L = ss_indirect->direct_L;
543                 }
544         }
545 }
546
547 ccl_device void kernel_path_subsurface_setup_indirect(
548         KernelGlobals *kg,
549         SubsurfaceIndirectRays *ss_indirect,
550         PathState *state,
551         Ray *ray,
552         PathRadiance *L,
553         float3 *throughput)
554 {
555         if(!ss_indirect->tracing) {
556                 ss_indirect->direct_L = *L;
557         }
558         ss_indirect->tracing = true;
559
560         /* Setup state, ray and throughput for indirect SSS rays. */
561         ss_indirect->num_rays--;
562
563         Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays];
564         PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays];
565
566         *state = ss_indirect->state[ss_indirect->num_rays];
567         *ray = *indirect_ray;
568         *L = *indirect_L;
569         *throughput = ss_indirect->throughputs[ss_indirect->num_rays];
570
571         state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
572 }
573
574 #endif  /* __SUBSURFACE__ */
575
576 ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
577                                                RNG *rng,
578                                                int sample,
579                                                Ray ray,
580                                                ccl_global float *buffer)
581 {
582         /* initialize */
583         PathRadiance L;
584         float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
585         float L_transparent = 0.0f;
586
587         path_radiance_init(&L, kernel_data.film.use_light_pass);
588
589         /* shader data memory used for both volumes and surfaces, saves stack space */
590         ShaderData sd;
591         /* shader data used by emission, shadows, volume stacks */
592         ShaderData emission_sd;
593
594         PathState state;
595         path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
596
597 #ifdef __KERNEL_DEBUG__
598         DebugData debug_data;
599         debug_data_init(&debug_data);
600 #endif  /* __KERNEL_DEBUG__ */
601
602 #ifdef __SUBSURFACE__
603         SubsurfaceIndirectRays ss_indirect;
604         kernel_path_subsurface_init_indirect(&ss_indirect);
605
606         for(;;) {
607 #endif  /* __SUBSURFACE__ */
608
609         /* path iteration */
610         for(;;) {
611                 /* intersect scene */
612                 Intersection isect;
613                 uint visibility = path_state_ray_visibility(kg, &state);
614
615 #ifdef __HAIR__
616                 float difl = 0.0f, extmax = 0.0f;
617                 uint lcg_state = 0;
618
619                 if(kernel_data.bvh.have_curves) {
620                         if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {       
621                                 float3 pixdiff = ray.dD.dx + ray.dD.dy;
622                                 /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
623                                 difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
624                         }
625
626                         extmax = kernel_data.curve.maximum_width;
627                         lcg_state = lcg_state_init(rng, &state, 0x51633e2d);
628                 }
629
630                 bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
631 #else
632                 bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
633 #endif  /* __HAIR__ */
634
635 #ifdef __KERNEL_DEBUG__
636                 if(state.flag & PATH_RAY_CAMERA) {
637                         debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes;
638                         debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
639                         debug_data.num_bvh_intersections += isect.num_intersections;
640                 }
641                 debug_data.num_ray_bounces++;
642 #endif  /* __KERNEL_DEBUG__ */
643
644 #ifdef __LAMP_MIS__
645                 if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
646                         /* ray starting from previous non-transparent bounce */
647                         Ray light_ray;
648
649                         light_ray.P = ray.P - state.ray_t*ray.D;
650                         state.ray_t += isect.t;
651                         light_ray.D = ray.D;
652                         light_ray.t = state.ray_t;
653                         light_ray.time = ray.time;
654                         light_ray.dD = ray.dD;
655                         light_ray.dP = ray.dP;
656
657                         /* intersect with lamp */
658                         float3 emission;
659
660                         if(indirect_lamp_emission(kg, &emission_sd, &state, &light_ray, &emission))
661                                 path_radiance_accum_emission(&L, throughput, emission, state.bounce);
662                 }
663 #endif  /* __LAMP_MIS__ */
664
665 #ifdef __VOLUME__
666                 /* Sanitize volume stack. */
667                 if(!hit) {
668                         kernel_volume_clean_stack(kg, state.volume_stack);
669                 }
670                 /* volume attenuation, emission, scatter */
671                 if(state.volume_stack[0].shader != SHADER_NONE) {
672                         Ray volume_ray = ray;
673                         volume_ray.t = (hit)? isect.t: FLT_MAX;
674
675                         bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
676
677 #  ifdef __VOLUME_DECOUPLED__
678                         int sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
679                         bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true, sampling_method);
680
681                         if(decoupled) {
682                                 /* cache steps along volume for repeated sampling */
683                                 VolumeSegment volume_segment;
684
685                                 shader_setup_from_volume(kg, &sd, &volume_ray);
686                                 kernel_volume_decoupled_record(kg, &state,
687                                         &volume_ray, &sd, &volume_segment, heterogeneous);
688
689                                 volume_segment.sampling_method = sampling_method;
690
691                                 /* emission */
692                                 if(volume_segment.closure_flag & SD_EMISSION)
693                                         path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce);
694
695                                 /* scattering */
696                                 VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
697
698                                 if(volume_segment.closure_flag & SD_SCATTER) {
699                                         int all = false;
700
701                                         /* direct light sampling */
702                                         kernel_branched_path_volume_connect_light(kg, rng, &sd,
703                                                 &emission_sd, throughput, &state, &L, all,
704                                                 &volume_ray, &volume_segment);
705
706                                         /* indirect sample. if we use distance sampling and take just
707                                          * one sample for direct and indirect light, we could share
708                                          * this computation, but makes code a bit complex */
709                                         float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE);
710                                         float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
711
712                                         result = kernel_volume_decoupled_scatter(kg,
713                                                 &state, &volume_ray, &sd, &throughput,
714                                                 rphase, rscatter, &volume_segment, NULL, true);
715                                 }
716
717                                 /* free cached steps */
718                                 kernel_volume_decoupled_free(kg, &volume_segment);
719
720                                 if(result == VOLUME_PATH_SCATTERED) {
721                                         if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
722                                                 continue;
723                                         else
724                                                 break;
725                                 }
726                                 else {
727                                         throughput *= volume_segment.accum_transmittance;
728                                 }
729                         }
730                         else
731 #  endif  /* __VOLUME_DECOUPLED__ */
732                         {
733                                 /* integrate along volume segment with distance sampling */
734                                 VolumeIntegrateResult result = kernel_volume_integrate(
735                                         kg, &state, &sd, &volume_ray, &L, &throughput, rng, heterogeneous);
736
737 #  ifdef __VOLUME_SCATTER__
738                                 if(result == VOLUME_PATH_SCATTERED) {
739                                         /* direct lighting */
740                                         kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
741
742                                         /* indirect light bounce */
743                                         if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
744                                                 continue;
745                                         else
746                                                 break;
747                                 }
748 #  endif  /* __VOLUME_SCATTER__ */
749                         }
750                 }
751 #endif  /* __VOLUME__ */
752
753                 if(!hit) {
754                         /* eval background shader if nothing hit */
755                         if(kernel_data.background.transparent && (state.flag & PATH_RAY_CAMERA)) {
756                                 L_transparent += average(throughput);
757
758 #ifdef __PASSES__
759                                 if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
760 #endif  /* __PASSES__ */
761                                         break;
762                         }
763
764 #ifdef __BACKGROUND__
765                         /* sample background shader */
766                         float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
767                         path_radiance_accum_background(&L, throughput, L_background, state.bounce);
768 #endif  /* __BACKGROUND__ */
769
770                         break;
771                 }
772
773                 /* setup shading */
774                 shader_setup_from_ray(kg, &sd, &isect, &ray);
775                 float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
776                 shader_eval_surface(kg, &sd, rng, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
777
778                 /* holdout */
779 #ifdef __HOLDOUT__
780                 if((sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) && (state.flag & PATH_RAY_CAMERA)) {
781                         if(kernel_data.background.transparent) {
782                                 float3 holdout_weight;
783                                 
784                                 if(sd.flag & SD_HOLDOUT_MASK)
785                                         holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
786                                 else
787                                         holdout_weight = shader_holdout_eval(kg, &sd);
788
789                                 /* any throughput is ok, should all be identical here */
790                                 L_transparent += average(holdout_weight*throughput);
791                         }
792
793                         if(sd.flag & SD_HOLDOUT_MASK)
794                                 break;
795                 }
796 #endif  /* __HOLDOUT__ */
797
798                 /* holdout mask objects do not write data passes */
799                 kernel_write_data_passes(kg, buffer, &L, &sd, sample, &state, throughput);
800
801                 /* blurring of bsdf after bounces, for rays that have a small likelihood
802                  * of following this particular path (diffuse, rough glossy) */
803                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
804                         float blur_pdf = kernel_data.integrator.filter_glossy*state.min_ray_pdf;
805
806                         if(blur_pdf < 1.0f) {
807                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
808                                 shader_bsdf_blur(kg, &sd, blur_roughness);
809                         }
810                 }
811
812 #ifdef __EMISSION__
813                 /* emission */
814                 if(sd.flag & SD_EMISSION) {
815                         /* todo: is isect.t wrong here for transparent surfaces? */
816                         float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf);
817                         path_radiance_accum_emission(&L, throughput, emission, state.bounce);
818                 }
819 #endif  /* __EMISSION__ */
820
821                 /* path termination. this is a strange place to put the termination, it's
822                  * mainly due to the mixed in MIS that we use. gives too many unneeded
823                  * shader evaluations, only need emission if we are going to terminate */
824                 float probability = path_state_terminate_probability(kg, &state, throughput);
825
826                 if(probability == 0.0f) {
827                         break;
828                 }
829                 else if(probability != 1.0f) {
830                         float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
831                         if(terminate >= probability)
832                                 break;
833
834                         throughput /= probability;
835                 }
836
837 #ifdef __AO__
838                 /* ambient occlusion */
839                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
840                         kernel_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput, shader_bsdf_alpha(kg, &sd));
841                 }
842 #endif  /* __AO__ */
843
844 #ifdef __SUBSURFACE__
845                 /* bssrdf scatter to a different location on the same object, replacing
846                  * the closures with a diffuse BSDF */
847                 if(sd.flag & SD_BSSRDF) {
848                         if(kernel_path_subsurface_scatter(kg,
849                                                           &sd,
850                                                           &emission_sd,
851                                                           &L,
852                                                           &state,
853                                                           rng,
854                                                           &ray,
855                                                           &throughput,
856                                                           &ss_indirect))
857                         {
858                                 break;
859                         }
860                 }
861 #endif  /* __SUBSURFACE__ */
862
863                 /* direct lighting */
864                 kernel_path_surface_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
865
866                 /* compute direct lighting and next bounce */
867                 if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
868                         break;
869         }
870
871 #ifdef __SUBSURFACE__
872                 kernel_path_subsurface_accum_indirect(&ss_indirect, &L);
873
874                 /* Trace indirect subsurface rays by restarting the loop. this uses less
875                  * stack memory than invoking kernel_path_indirect.
876                  */
877                 if(ss_indirect.num_rays) {
878                         kernel_path_subsurface_setup_indirect(kg,
879                                                               &ss_indirect,
880                                                               &state,
881                                                               &ray,
882                                                               &L,
883                                                               &throughput);
884                 }
885                 else {
886                         break;
887                 }
888         }
889 #endif  /* __SUBSURFACE__ */
890
891         float3 L_sum = path_radiance_clamp_and_sum(kg, &L);
892
893         kernel_write_light_passes(kg, buffer, &L, sample);
894
895 #ifdef __KERNEL_DEBUG__
896         kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
897 #endif  /* __KERNEL_DEBUG__ */
898
899         return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
900 }
901
902 ccl_device void kernel_path_trace(KernelGlobals *kg,
903         ccl_global float *buffer, ccl_global uint *rng_state,
904         int sample, int x, int y, int offset, int stride)
905 {
906         /* buffer offset */
907         int index = offset + x + y*stride;
908         int pass_stride = kernel_data.film.pass_stride;
909
910         rng_state += index;
911         buffer += index*pass_stride;
912
913         /* initialize random numbers and ray */
914         RNG rng;
915         Ray ray;
916
917         kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
918
919         /* integrate */
920         float4 L;
921
922         if(ray.t != 0.0f)
923                 L = kernel_path_integrate(kg, &rng, sample, ray, buffer);
924         else
925                 L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
926
927         /* accumulate result in output buffer */
928         kernel_write_pass_float4(buffer, sample, L);
929
930         path_rng_end(kg, rng_state, rng);
931 }
932
933 CCL_NAMESPACE_END
934