Fix Cycles CUDA adaptive megakernel build error.
[blender-staging.git] / intern / cycles / kernel / kernel_path.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifdef __OSL__
18 #  include "kernel/osl/osl_shader.h"
19 #endif
20
21 #include "kernel/kernel_random.h"
22 #include "kernel/kernel_projection.h"
23 #include "kernel/kernel_montecarlo.h"
24 #include "kernel/kernel_differential.h"
25 #include "kernel/kernel_camera.h"
26
27 #include "kernel/geom/geom.h"
28 #include "kernel/bvh/bvh.h"
29
30 #include "kernel/kernel_accumulate.h"
31 #include "kernel/kernel_shader.h"
32 #include "kernel/kernel_light.h"
33 #include "kernel/kernel_passes.h"
34
35 #ifdef __SUBSURFACE__
36 #  include "kernel/kernel_subsurface.h"
37 #endif
38
39 #ifdef __VOLUME__
40 #  include "kernel/kernel_volume.h"
41 #endif
42
43 #include "kernel/kernel_path_state.h"
44 #include "kernel/kernel_shadow.h"
45 #include "kernel/kernel_emission.h"
46 #include "kernel/kernel_path_common.h"
47 #include "kernel/kernel_path_surface.h"
48 #include "kernel/kernel_path_volume.h"
49 #include "kernel/kernel_path_subsurface.h"
50
51 #ifdef __KERNEL_DEBUG__
52 #  include "kernel/kernel_debug.h"
53 #endif
54
55 CCL_NAMESPACE_BEGIN
56
57 ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
58                                         ShaderData *sd,
59                                         ShaderData *emission_sd,
60                                         PathRadiance *L,
61                                         ccl_addr_space PathState *state,
62                                         RNG *rng,
63                                         float3 throughput,
64                                         float3 ao_alpha)
65 {
66         /* todo: solve correlation */
67         float bsdf_u, bsdf_v;
68
69         path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
70
71         float ao_factor = kernel_data.background.ao_factor;
72         float3 ao_N;
73         float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
74         float3 ao_D;
75         float ao_pdf;
76
77         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
78
79         if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
80                 Ray light_ray;
81                 float3 ao_shadow;
82
83                 light_ray.P = ray_offset(sd->P, sd->Ng);
84                 light_ray.D = ao_D;
85                 light_ray.t = kernel_data.background.ao_distance;
86 #ifdef __OBJECT_MOTION__
87                 light_ray.time = sd->time;
88 #endif  /* __OBJECT_MOTION__ */
89                 light_ray.dP = sd->dP;
90                 light_ray.dD = differential3_zero();
91
92                 if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) {
93                         path_radiance_accum_ao(L, state, throughput, ao_alpha, ao_bsdf, ao_shadow);
94                 }
95                 else {
96                         path_radiance_accum_total_ao(L, state, throughput, ao_bsdf);
97                 }
98         }
99 }
100
101 #ifndef __SPLIT_KERNEL__
102
103 #if defined(__BRANCHED_PATH__) || defined(__BAKING__)
104
105 ccl_device void kernel_path_indirect(KernelGlobals *kg,
106                                      ShaderData *sd,
107                                      ShaderData *emission_sd,
108                                      RNG *rng,
109                                      Ray *ray,
110                                      float3 throughput,
111                                      int num_samples,
112                                      PathState *state,
113                                      PathRadiance *L)
114 {
115         /* path iteration */
116         for(;;) {
117                 /* intersect scene */
118                 Intersection isect;
119                 uint visibility = path_state_ray_visibility(kg, state);
120                 if(state->bounce > kernel_data.integrator.ao_bounces) {
121                         visibility = PATH_RAY_SHADOW;
122                         ray->t = kernel_data.background.ao_distance;
123                 }
124                 bool hit = scene_intersect(kg,
125                                            *ray,
126                                            visibility,
127                                            &isect,
128                                            NULL,
129                                            0.0f, 0.0f);
130
131 #ifdef __LAMP_MIS__
132                 if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
133                         /* ray starting from previous non-transparent bounce */
134                         Ray light_ray;
135
136                         light_ray.P = ray->P - state->ray_t*ray->D;
137                         state->ray_t += isect.t;
138                         light_ray.D = ray->D;
139                         light_ray.t = state->ray_t;
140                         light_ray.time = ray->time;
141                         light_ray.dD = ray->dD;
142                         light_ray.dP = ray->dP;
143
144                         /* intersect with lamp */
145                         float3 emission;
146                         if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission)) {
147                                 path_radiance_accum_emission(L,
148                                                              throughput,
149                                                              emission,
150                                                              state->bounce);
151                         }
152                 }
153 #endif  /* __LAMP_MIS__ */
154
155 #ifdef __VOLUME__
156                 /* Sanitize volume stack. */
157                 if(!hit) {
158                         kernel_volume_clean_stack(kg, state->volume_stack);
159                 }
160                 /* volume attenuation, emission, scatter */
161                 if(state->volume_stack[0].shader != SHADER_NONE) {
162                         Ray volume_ray = *ray;
163                         volume_ray.t = (hit)? isect.t: FLT_MAX;
164
165                         bool heterogeneous =
166                                 volume_stack_is_heterogeneous(kg,
167                                                               state->volume_stack);
168
169 #  ifdef __VOLUME_DECOUPLED__
170                         int sampling_method =
171                                 volume_stack_sampling_method(kg,
172                                                              state->volume_stack);
173                         bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, false, sampling_method);
174
175                         if(decoupled) {
176                                 /* cache steps along volume for repeated sampling */
177                                 VolumeSegment volume_segment;
178
179                                 shader_setup_from_volume(kg,
180                                                          sd,
181                                                          &volume_ray);
182                                 kernel_volume_decoupled_record(kg,
183                                                                state,
184                                                                &volume_ray,
185                                                                sd,
186                                                                &volume_segment,
187                                                                heterogeneous);
188
189                                 volume_segment.sampling_method = sampling_method;
190
191                                 /* emission */
192                                 if(volume_segment.closure_flag & SD_EMISSION) {
193                                         path_radiance_accum_emission(L,
194                                                                      throughput,
195                                                                      volume_segment.accum_emission,
196                                                                      state->bounce);
197                                 }
198
199                                 /* scattering */
200                                 VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
201
202                                 if(volume_segment.closure_flag & SD_SCATTER) {
203                                         int all = kernel_data.integrator.sample_all_lights_indirect;
204
205                                         /* direct light sampling */
206                                         kernel_branched_path_volume_connect_light(kg,
207                                                                                   rng,
208                                                                                   sd,
209                                                                                   emission_sd,
210                                                                                   throughput,
211                                                                                   state,
212                                                                                   L,
213                                                                                   all,
214                                                                                   &volume_ray,
215                                                                                   &volume_segment);
216
217                                         /* indirect sample. if we use distance sampling and take just
218                                          * one sample for direct and indirect light, we could share
219                                          * this computation, but makes code a bit complex */
220                                         float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
221                                         float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
222
223                                         result = kernel_volume_decoupled_scatter(kg,
224                                                                                  state,
225                                                                                  &volume_ray,
226                                                                                  sd,
227                                                                                  &throughput,
228                                                                                  rphase,
229                                                                                  rscatter,
230                                                                                  &volume_segment,
231                                                                                  NULL,
232                                                                                  true);
233                                 }
234
235                                 /* free cached steps */
236                                 kernel_volume_decoupled_free(kg, &volume_segment);
237
238                                 if(result == VOLUME_PATH_SCATTERED) {
239                                         if(kernel_path_volume_bounce(kg,
240                                                                      rng,
241                                                                      sd,
242                                                                      &throughput,
243                                                                      state,
244                                                                      L,
245                                                                      ray))
246                                         {
247                                                 continue;
248                                         }
249                                         else {
250                                                 break;
251                                         }
252                                 }
253                                 else {
254                                         throughput *= volume_segment.accum_transmittance;
255                                 }
256                         }
257                         else
258 #  endif  /* __VOLUME_DECOUPLED__ */
259                         {
260                                 /* integrate along volume segment with distance sampling */
261                                 VolumeIntegrateResult result = kernel_volume_integrate(
262                                         kg, state, sd, &volume_ray, L, &throughput, rng, heterogeneous);
263
264 #  ifdef __VOLUME_SCATTER__
265                                 if(result == VOLUME_PATH_SCATTERED) {
266                                         /* direct lighting */
267                                         kernel_path_volume_connect_light(kg,
268                                                                          rng,
269                                                                          sd,
270                                                                          emission_sd,
271                                                                          throughput,
272                                                                          state,
273                                                                          L);
274
275                                         /* indirect light bounce */
276                                         if(kernel_path_volume_bounce(kg,
277                                                                      rng,
278                                                                      sd,
279                                                                      &throughput,
280                                                                      state,
281                                                                      L,
282                                                                      ray))
283                                         {
284                                                 continue;
285                                         }
286                                         else {
287                                                 break;
288                                         }
289                                 }
290 #  endif  /* __VOLUME_SCATTER__ */
291                         }
292                 }
293 #endif  /* __VOLUME__ */
294
295                 if(!hit) {
296 #ifdef __BACKGROUND__
297                         /* sample background shader */
298                         float3 L_background = indirect_background(kg, emission_sd, state, ray);
299                         path_radiance_accum_background(L,
300                                                        state,
301                                                        throughput,
302                                                        L_background);
303 #endif  /* __BACKGROUND__ */
304
305                         break;
306                 }
307                 else if(state->bounce > kernel_data.integrator.ao_bounces) {
308                         break;
309                 }
310
311                 /* setup shading */
312                 shader_setup_from_ray(kg,
313                                       sd,
314                                       &isect,
315                                       ray);
316                 float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
317                 shader_eval_surface(kg, sd, rng, state, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT);
318 #ifdef __BRANCHED_PATH__
319                 shader_merge_closures(sd);
320 #endif  /* __BRANCHED_PATH__ */
321
322 #ifdef __SHADOW_TRICKS__
323                 if(!(sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) {
324                         state->flag &= ~PATH_RAY_SHADOW_CATCHER_ONLY;
325                 }
326 #endif  /* __SHADOW_TRICKS__ */
327
328                 /* blurring of bsdf after bounces, for rays that have a small likelihood
329                  * of following this particular path (diffuse, rough glossy) */
330                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
331                         float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
332
333                         if(blur_pdf < 1.0f) {
334                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
335                                 shader_bsdf_blur(kg, sd, blur_roughness);
336                         }
337                 }
338
339 #ifdef __EMISSION__
340                 /* emission */
341                 if(sd->flag & SD_EMISSION) {
342                         float3 emission = indirect_primitive_emission(kg,
343                                                                       sd,
344                                                                       isect.t,
345                                                                       state->flag,
346                                                                       state->ray_pdf);
347                         path_radiance_accum_emission(L, throughput, emission, state->bounce);
348                 }
349 #endif  /* __EMISSION__ */
350
351                 /* path termination. this is a strange place to put the termination, it's
352                  * mainly due to the mixed in MIS that we use. gives too many unneeded
353                  * shader evaluations, only need emission if we are going to terminate */
354                 float probability =
355                         path_state_terminate_probability(kg,
356                                                          state,
357                                                          throughput*num_samples);
358
359                 if(probability == 0.0f) {
360                         break;
361                 }
362                 else if(probability != 1.0f) {
363                         float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
364
365                         if(terminate >= probability)
366                                 break;
367
368                         throughput /= probability;
369                 }
370
371                 kernel_update_denoising_features(kg, sd, state, L);
372
373 #ifdef __AO__
374                 /* ambient occlusion */
375                 if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
376                         kernel_path_ao(kg, sd, emission_sd, L, state, rng, throughput, make_float3(0.0f, 0.0f, 0.0f));
377                 }
378 #endif  /* __AO__ */
379
380 #ifdef __SUBSURFACE__
381                 /* bssrdf scatter to a different location on the same object, replacing
382                  * the closures with a diffuse BSDF */
383                 if(sd->flag & SD_BSSRDF) {
384                         float bssrdf_probability;
385                         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
386
387                         /* modify throughput for picking bssrdf or bsdf */
388                         throughput *= bssrdf_probability;
389
390                         /* do bssrdf scatter step if we picked a bssrdf closure */
391                         if(sc) {
392                                 uint lcg_state = lcg_state_init(rng, state->rng_offset, state->sample, 0x68bc21eb);
393
394                                 float bssrdf_u, bssrdf_v;
395                                 path_state_rng_2D(kg,
396                                                   rng,
397                                                   state,
398                                                   PRNG_BSDF_U,
399                                                   &bssrdf_u, &bssrdf_v);
400                                 subsurface_scatter_step(kg,
401                                                         sd,
402                                                         state,
403                                                         state->flag,
404                                                         sc,
405                                                         &lcg_state,
406                                                         bssrdf_u, bssrdf_v,
407                                                         false);
408                         }
409                 }
410 #endif  /* __SUBSURFACE__ */
411
412 #if defined(__EMISSION__)
413                 if(kernel_data.integrator.use_direct_light) {
414                         int all = (kernel_data.integrator.sample_all_lights_indirect) ||
415                                   (state->flag & PATH_RAY_SHADOW_CATCHER);
416                         kernel_branched_path_surface_connect_light(kg,
417                                                                    rng,
418                                                                    sd,
419                                                                    emission_sd,
420                                                                    state,
421                                                                    throughput,
422                                                                    1.0f,
423                                                                    L,
424                                                                    all);
425                 }
426 #endif  /* defined(__EMISSION__) */
427
428                 if(!kernel_path_surface_bounce(kg, rng, sd, &throughput, state, L, ray))
429                         break;
430         }
431 }
432
433 #endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */
434
435 ccl_device_inline float kernel_path_integrate(KernelGlobals *kg,
436                                               RNG *rng,
437                                               int sample,
438                                               Ray ray,
439                                               ccl_global float *buffer,
440                                               PathRadiance *L,
441                                               bool *is_shadow_catcher)
442 {
443         /* initialize */
444         float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
445         float L_transparent = 0.0f;
446
447         path_radiance_init(L, kernel_data.film.use_light_pass);
448
449         /* shader data memory used for both volumes and surfaces, saves stack space */
450         ShaderData sd;
451         /* shader data used by emission, shadows, volume stacks */
452         ShaderData emission_sd;
453
454         PathState state;
455         path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
456
457 #ifdef __KERNEL_DEBUG__
458         DebugData debug_data;
459         debug_data_init(&debug_data);
460 #endif  /* __KERNEL_DEBUG__ */
461
462 #ifdef __SUBSURFACE__
463         SubsurfaceIndirectRays ss_indirect;
464         kernel_path_subsurface_init_indirect(&ss_indirect);
465
466         for(;;) {
467 #endif  /* __SUBSURFACE__ */
468
469         /* path iteration */
470         for(;;) {
471                 /* intersect scene */
472                 Intersection isect;
473                 uint visibility = path_state_ray_visibility(kg, &state);
474
475 #ifdef __HAIR__
476                 float difl = 0.0f, extmax = 0.0f;
477                 uint lcg_state = 0;
478
479                 if(kernel_data.bvh.have_curves) {
480                         if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {       
481                                 float3 pixdiff = ray.dD.dx + ray.dD.dy;
482                                 /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
483                                 difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
484                         }
485
486                         extmax = kernel_data.curve.maximum_width;
487                         lcg_state = lcg_state_init(rng, state.rng_offset, state.sample, 0x51633e2d);
488                 }
489
490                 if(state.bounce > kernel_data.integrator.ao_bounces) {
491                         visibility = PATH_RAY_SHADOW;
492                         ray.t = kernel_data.background.ao_distance;
493                 }
494
495                 bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
496 #else
497                 bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
498 #endif  /* __HAIR__ */
499
500 #ifdef __KERNEL_DEBUG__
501                 if(state.flag & PATH_RAY_CAMERA) {
502                         debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes;
503                         debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
504                         debug_data.num_bvh_intersections += isect.num_intersections;
505                 }
506                 debug_data.num_ray_bounces++;
507 #endif  /* __KERNEL_DEBUG__ */
508
509 #ifdef __LAMP_MIS__
510                 if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
511                         /* ray starting from previous non-transparent bounce */
512                         Ray light_ray;
513
514                         light_ray.P = ray.P - state.ray_t*ray.D;
515                         state.ray_t += isect.t;
516                         light_ray.D = ray.D;
517                         light_ray.t = state.ray_t;
518                         light_ray.time = ray.time;
519                         light_ray.dD = ray.dD;
520                         light_ray.dP = ray.dP;
521
522                         /* intersect with lamp */
523                         float3 emission;
524
525                         if(indirect_lamp_emission(kg, &emission_sd, &state, &light_ray, &emission))
526                                 path_radiance_accum_emission(L, throughput, emission, state.bounce);
527                 }
528 #endif  /* __LAMP_MIS__ */
529
530 #ifdef __VOLUME__
531                 /* Sanitize volume stack. */
532                 if(!hit) {
533                         kernel_volume_clean_stack(kg, state.volume_stack);
534                 }
535                 /* volume attenuation, emission, scatter */
536                 if(state.volume_stack[0].shader != SHADER_NONE) {
537                         Ray volume_ray = ray;
538                         volume_ray.t = (hit)? isect.t: FLT_MAX;
539
540                         bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
541
542 #  ifdef __VOLUME_DECOUPLED__
543                         int sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
544                         bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true, sampling_method);
545
546                         if(decoupled) {
547                                 /* cache steps along volume for repeated sampling */
548                                 VolumeSegment volume_segment;
549
550                                 shader_setup_from_volume(kg, &sd, &volume_ray);
551                                 kernel_volume_decoupled_record(kg, &state,
552                                         &volume_ray, &sd, &volume_segment, heterogeneous);
553
554                                 volume_segment.sampling_method = sampling_method;
555
556                                 /* emission */
557                                 if(volume_segment.closure_flag & SD_EMISSION)
558                                         path_radiance_accum_emission(L, throughput, volume_segment.accum_emission, state.bounce);
559
560                                 /* scattering */
561                                 VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
562
563                                 if(volume_segment.closure_flag & SD_SCATTER) {
564                                         int all = false;
565
566                                         /* direct light sampling */
567                                         kernel_branched_path_volume_connect_light(kg, rng, &sd,
568                                                 &emission_sd, throughput, &state, L, all,
569                                                 &volume_ray, &volume_segment);
570
571                                         /* indirect sample. if we use distance sampling and take just
572                                          * one sample for direct and indirect light, we could share
573                                          * this computation, but makes code a bit complex */
574                                         float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE);
575                                         float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
576
577                                         result = kernel_volume_decoupled_scatter(kg,
578                                                 &state, &volume_ray, &sd, &throughput,
579                                                 rphase, rscatter, &volume_segment, NULL, true);
580                                 }
581
582                                 /* free cached steps */
583                                 kernel_volume_decoupled_free(kg, &volume_segment);
584
585                                 if(result == VOLUME_PATH_SCATTERED) {
586                                         if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, L, &ray))
587                                                 continue;
588                                         else
589                                                 break;
590                                 }
591                                 else {
592                                         throughput *= volume_segment.accum_transmittance;
593                                 }
594                         }
595                         else
596 #  endif  /* __VOLUME_DECOUPLED__ */
597                         {
598                                 /* integrate along volume segment with distance sampling */
599                                 VolumeIntegrateResult result = kernel_volume_integrate(
600                                         kg, &state, &sd, &volume_ray, L, &throughput, rng, heterogeneous);
601
602 #  ifdef __VOLUME_SCATTER__
603                                 if(result == VOLUME_PATH_SCATTERED) {
604                                         /* direct lighting */
605                                         kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, L);
606
607                                         /* indirect light bounce */
608                                         if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, L, &ray))
609                                                 continue;
610                                         else
611                                                 break;
612                                 }
613 #  endif  /* __VOLUME_SCATTER__ */
614                         }
615                 }
616 #endif  /* __VOLUME__ */
617
618                 if(!hit) {
619                         /* eval background shader if nothing hit */
620                         if(kernel_data.background.transparent && (state.flag & PATH_RAY_CAMERA)) {
621                                 L_transparent += average(throughput);
622
623 #ifdef __PASSES__
624                                 if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
625 #endif  /* __PASSES__ */
626                                         break;
627                         }
628
629 #ifdef __BACKGROUND__
630                         /* sample background shader */
631                         float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
632                         path_radiance_accum_background(L, &state, throughput, L_background);
633 #endif  /* __BACKGROUND__ */
634
635                         break;
636                 }
637                 else if(state.bounce > kernel_data.integrator.ao_bounces) {
638                         break;
639                 }
640
641                 /* setup shading */
642                 shader_setup_from_ray(kg, &sd, &isect, &ray);
643                 float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
644                 shader_eval_surface(kg, &sd, rng, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
645
646 #ifdef __SHADOW_TRICKS__
647                 if((sd.object_flag & SD_OBJECT_SHADOW_CATCHER)) {
648                         if(state.flag & PATH_RAY_CAMERA) {
649                                 state.flag |= (PATH_RAY_SHADOW_CATCHER |
650                                                PATH_RAY_SHADOW_CATCHER_ONLY |
651                                                PATH_RAY_STORE_SHADOW_INFO);
652                                 state.catcher_object = sd.object;
653                                 if(!kernel_data.background.transparent) {
654                                         L->shadow_background_color =
655                                                 indirect_background(kg, &emission_sd, &state, &ray);
656                                 }
657                                 L->shadow_radiance_sum = path_radiance_clamp_and_sum(kg, L);
658                                 L->shadow_throughput = average(throughput);
659                         }
660                 }
661                 else {
662                         state.flag &= ~PATH_RAY_SHADOW_CATCHER_ONLY;
663                 }
664 #endif  /* __SHADOW_TRICKS__ */
665
666                 /* holdout */
667 #ifdef __HOLDOUT__
668                 if(((sd.flag & SD_HOLDOUT) ||
669                     (sd.object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
670                    (state.flag & PATH_RAY_CAMERA))
671                 {
672                         if(kernel_data.background.transparent) {
673                                 float3 holdout_weight;
674                                 if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) {
675                                         holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
676                                 }
677                                 else {
678                                         holdout_weight = shader_holdout_eval(kg, &sd);
679                                 }
680                                 /* any throughput is ok, should all be identical here */
681                                 L_transparent += average(holdout_weight*throughput);
682                         }
683
684                         if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) {
685                                 break;
686                         }
687                 }
688 #endif  /* __HOLDOUT__ */
689
690                 /* holdout mask objects do not write data passes */
691                 kernel_write_data_passes(kg, buffer, L, &sd, sample, &state, throughput);
692
693                 /* blurring of bsdf after bounces, for rays that have a small likelihood
694                  * of following this particular path (diffuse, rough glossy) */
695                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
696                         float blur_pdf = kernel_data.integrator.filter_glossy*state.min_ray_pdf;
697
698                         if(blur_pdf < 1.0f) {
699                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
700                                 shader_bsdf_blur(kg, &sd, blur_roughness);
701                         }
702                 }
703
704 #ifdef __EMISSION__
705                 /* emission */
706                 if(sd.flag & SD_EMISSION) {
707                         /* todo: is isect.t wrong here for transparent surfaces? */
708                         float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf);
709                         path_radiance_accum_emission(L, throughput, emission, state.bounce);
710                 }
711 #endif  /* __EMISSION__ */
712
713                 /* path termination. this is a strange place to put the termination, it's
714                  * mainly due to the mixed in MIS that we use. gives too many unneeded
715                  * shader evaluations, only need emission if we are going to terminate */
716                 float probability = path_state_terminate_probability(kg, &state, throughput);
717
718                 if(probability == 0.0f) {
719                         break;
720                 }
721                 else if(probability != 1.0f) {
722                         float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
723                         if(terminate >= probability)
724                                 break;
725
726                         throughput /= probability;
727                 }
728
729                 kernel_update_denoising_features(kg, &sd, &state, L);
730
731 #ifdef __AO__
732                 /* ambient occlusion */
733                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
734                         kernel_path_ao(kg, &sd, &emission_sd, L, &state, rng, throughput, shader_bsdf_alpha(kg, &sd));
735                 }
736 #endif  /* __AO__ */
737
738 #ifdef __SUBSURFACE__
739                 /* bssrdf scatter to a different location on the same object, replacing
740                  * the closures with a diffuse BSDF */
741                 if(sd.flag & SD_BSSRDF) {
742                         if(kernel_path_subsurface_scatter(kg,
743                                                           &sd,
744                                                           &emission_sd,
745                                                           L,
746                                                           &state,
747                                                           rng,
748                                                           &ray,
749                                                           &throughput,
750                                                           &ss_indirect))
751                         {
752                                 break;
753                         }
754                 }
755 #endif  /* __SUBSURFACE__ */
756
757                 /* direct lighting */
758                 kernel_path_surface_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, L);
759
760                 /* compute direct lighting and next bounce */
761                 if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, L, &ray))
762                         break;
763         }
764
765 #ifdef __SUBSURFACE__
766                 kernel_path_subsurface_accum_indirect(&ss_indirect, L);
767
768                 /* Trace indirect subsurface rays by restarting the loop. this uses less
769                  * stack memory than invoking kernel_path_indirect.
770                  */
771                 if(ss_indirect.num_rays) {
772                         kernel_path_subsurface_setup_indirect(kg,
773                                                               &ss_indirect,
774                                                               &state,
775                                                               &ray,
776                                                               L,
777                                                               &throughput);
778                 }
779                 else {
780                         break;
781                 }
782         }
783 #endif  /* __SUBSURFACE__ */
784
785 #ifdef __SHADOW_TRICKS__
786         *is_shadow_catcher = (state.flag & PATH_RAY_SHADOW_CATCHER);
787 #endif  /* __SHADOW_TRICKS__ */
788
789 #ifdef __KERNEL_DEBUG__
790         kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
791 #endif  /* __KERNEL_DEBUG__ */
792
793         return 1.0f - L_transparent;
794 }
795
796 ccl_device void kernel_path_trace(KernelGlobals *kg,
797         ccl_global float *buffer, ccl_global uint *rng_state,
798         int sample, int x, int y, int offset, int stride)
799 {
800         /* buffer offset */
801         int index = offset + x + y*stride;
802         int pass_stride = kernel_data.film.pass_stride;
803
804         rng_state += index;
805         buffer += index*pass_stride;
806
807         /* initialize random numbers and ray */
808         RNG rng;
809         Ray ray;
810
811         kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
812
813         /* integrate */
814         PathRadiance L;
815         bool is_shadow_catcher;
816
817         if(ray.t != 0.0f) {
818                 float alpha = kernel_path_integrate(kg, &rng, sample, ray, buffer, &L, &is_shadow_catcher);
819                 kernel_write_result(kg, buffer, sample, &L, alpha, is_shadow_catcher);
820         }
821         else {
822                 kernel_write_result(kg, buffer, sample, NULL, 0.0f, false);
823         }
824
825         path_rng_end(kg, rng_state, rng);
826 }
827
828 #endif  /* __SPLIT_KERNEL__ */
829
830 CCL_NAMESPACE_END
831