Fix #36741: cycles AO pass giving values > 1.0 with transparency.
[blender-staging.git] / intern / cycles / kernel / kernel_path.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License
15  */
16
17 #ifdef __OSL__
18 #include "osl_shader.h"
19 #endif
20
21 #include "kernel_differential.h"
22 #include "kernel_montecarlo.h"
23 #include "kernel_projection.h"
24 #include "kernel_object.h"
25 #include "kernel_triangle.h"
26 #include "kernel_curve.h"
27 #include "kernel_primitive.h"
28 #include "kernel_projection.h"
29 #include "kernel_random.h"
30 #include "kernel_bvh.h"
31 #include "kernel_accumulate.h"
32 #include "kernel_camera.h"
33 #include "kernel_shader.h"
34 #include "kernel_light.h"
35 #include "kernel_emission.h"
36 #include "kernel_passes.h"
37 #include "kernel_path_state.h"
38
39 #ifdef __SUBSURFACE__
40 #include "kernel_subsurface.h"
41 #endif
42
43 CCL_NAMESPACE_BEGIN
44
45 __device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow)
46 {
47         *shadow = make_float3(1.0f, 1.0f, 1.0f);
48
49         if(ray->t == 0.0f)
50                 return false;
51         
52         Intersection isect;
53 #ifdef __HAIR__
54         bool result = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f);
55 #else
56         bool result = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect);
57 #endif
58
59 #ifdef __TRANSPARENT_SHADOWS__
60         if(result && kernel_data.integrator.transparent_shadows) {
61                 /* transparent shadows work in such a way to try to minimize overhead
62                  * in cases where we don't need them. after a regular shadow ray is
63                  * cast we check if the hit primitive was potentially transparent, and
64                  * only in that case start marching. this gives on extra ray cast for
65                  * the cases were we do want transparency.
66                  *
67                  * also note that for this to work correct, multi close sampling must
68                  * be used, since we don't pass a random number to shader_eval_surface */
69                 if(shader_transparent_shadow(kg, &isect)) {
70                         float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
71                         float3 Pend = ray->P + ray->D*ray->t;
72                         int bounce = state->transparent_bounce;
73
74                         for(;;) {
75                                 if(bounce >= kernel_data.integrator.transparent_max_bounce) {
76                                         return true;
77                                 }
78                                 else if(bounce >= kernel_data.integrator.transparent_min_bounce) {
79                                         /* todo: get random number somewhere for probabilistic terminate */
80 #if 0
81                                         float probability = average(throughput);
82                                         float terminate = 0.0f;
83
84                                         if(terminate >= probability)
85                                                 return true;
86
87                                         throughput /= probability;
88 #endif
89                                 }
90
91 #ifdef __HAIR__
92                                 if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, &isect, NULL, 0.0f, 0.0f)) {
93 #else
94                                 if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, &isect)) {
95 #endif
96                                         *shadow *= throughput;
97                                         return false;
98                                 }
99
100                                 if(!shader_transparent_shadow(kg, &isect))
101                                         return true;
102
103                                 ShaderData sd;
104                                 shader_setup_from_ray(kg, &sd, &isect, ray, state->bounce+1);
105                                 shader_eval_surface(kg, &sd, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
106
107                                 throughput *= shader_bsdf_transparency(kg, &sd);
108
109                                 ray->P = ray_offset(sd.P, -sd.Ng);
110                                 if(ray->t != FLT_MAX)
111                                         ray->D = normalize_len(Pend - ray->P, &ray->t);
112
113                                 bounce++;
114                         }
115                 }
116         }
117 #endif
118
119         return result;
120 }
121
122
123 #if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__)
124
125 __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer,
126         float3 throughput, int num_samples, int num_total_samples,
127         float min_ray_pdf, float ray_pdf, PathState state, int rng_offset, PathRadiance *L)
128 {
129 #ifdef __LAMP_MIS__
130         float ray_t = 0.0f;
131 #endif
132
133         /* path iteration */
134         for(;; rng_offset += PRNG_BOUNCE_NUM) {
135                 /* intersect scene */
136                 Intersection isect;
137                 uint visibility = path_state_ray_visibility(kg, &state);
138 #ifdef __HAIR__
139                 bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f);
140 #else
141                 bool hit = scene_intersect(kg, &ray, visibility, &isect);
142 #endif
143
144 #ifdef __LAMP_MIS__
145                 if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
146                         /* ray starting from previous non-transparent bounce */
147                         Ray light_ray;
148
149                         light_ray.P = ray.P - ray_t*ray.D;
150                         ray_t += isect.t;
151                         light_ray.D = ray.D;
152                         light_ray.t = ray_t;
153                         light_ray.time = ray.time;
154                         light_ray.dD = ray.dD;
155                         light_ray.dP = ray.dP;
156
157                         /* intersect with lamp */
158                         float light_t = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT);
159                         float3 emission;
160
161                         if(indirect_lamp_emission(kg, &light_ray, state.flag, ray_pdf, light_t, &emission, state.bounce))
162                                 path_radiance_accum_emission(L, throughput, emission, state.bounce);
163                 }
164 #endif
165
166                 if(!hit) {
167 #ifdef __BACKGROUND__
168                         /* sample background shader */
169                         float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf, state.bounce);
170                         path_radiance_accum_background(L, throughput, L_background, state.bounce);
171 #endif
172
173                         break;
174                 }
175
176                 /* setup shading */
177                 ShaderData sd;
178                 shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce);
179                 float rbsdf = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF);
180                 shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_INDIRECT);
181 #ifdef __BRANCHED_PATH__
182                 shader_merge_closures(kg, &sd);
183 #endif
184
185                 /* blurring of bsdf after bounces, for rays that have a small likelihood
186                  * of following this particular path (diffuse, rough glossy) */
187                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
188                         float blur_pdf = kernel_data.integrator.filter_glossy*min_ray_pdf;
189
190                         if(blur_pdf < 1.0f) {
191                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
192                                 shader_bsdf_blur(kg, &sd, blur_roughness);
193                         }
194                 }
195
196 #ifdef __EMISSION__
197                 /* emission */
198                 if(sd.flag & SD_EMISSION) {
199                         float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, ray_pdf);
200                         path_radiance_accum_emission(L, throughput, emission, state.bounce);
201                 }
202 #endif
203
204                 /* path termination. this is a strange place to put the termination, it's
205                  * mainly due to the mixed in MIS that we use. gives too many unneeded
206                  * shader evaluations, only need emission if we are going to terminate */
207                 float probability = path_state_terminate_probability(kg, &state, throughput*num_samples);
208
209                 if(probability == 0.0f) {
210                         break;
211                 }
212                 else if(probability != 1.0f) {
213                         float terminate = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_TERMINATE);
214
215                         if(terminate >= probability)
216                                 break;
217
218                         throughput /= probability;
219                 }
220
221 #ifdef __AO__
222                 /* ambient occlusion */
223                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
224                         float bsdf_u, bsdf_v;
225                         path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
226
227                         float ao_factor = kernel_data.background.ao_factor;
228                         float3 ao_N;
229                         float3 ao_bsdf = shader_bsdf_ao(kg, &sd, ao_factor, &ao_N);
230                         float3 ao_D;
231                         float ao_pdf;
232                         float3 ao_alpha = make_float3(0.0f, 0.0f, 0.0f);
233
234                         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
235
236                         if(dot(sd.Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
237                                 Ray light_ray;
238                                 float3 ao_shadow;
239
240                                 light_ray.P = ray_offset(sd.P, sd.Ng);
241                                 light_ray.D = ao_D;
242                                 light_ray.t = kernel_data.background.ao_distance;
243 #ifdef __OBJECT_MOTION__
244                                 light_ray.time = sd.time;
245 #endif
246                                 light_ray.dP = sd.dP;
247                                 light_ray.dD = differential3_zero();
248
249                                 if(!shadow_blocked(kg, &state, &light_ray, &ao_shadow))
250                                         path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state.bounce);
251                         }
252                 }
253 #endif
254
255 #ifdef __SUBSURFACE__
256                 /* bssrdf scatter to a different location on the same object, replacing
257                  * the closures with a diffuse BSDF */
258                 if(sd.flag & SD_BSSRDF) {
259                         float bssrdf_probability;
260                         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, &sd, &bssrdf_probability);
261
262                         /* modify throughput for picking bssrdf or bsdf */
263                         throughput *= bssrdf_probability;
264
265                         /* do bssrdf scatter step if we picked a bssrdf closure */
266                         if(sc) {
267                                 uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
268
269                                 if(old_subsurface_scatter_use(&sd)) {
270                                         old_subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
271                                 }
272                                 else {
273                                         float bssrdf_u, bssrdf_v;
274                                         path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
275                                         subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
276                                 }
277
278                                 state.flag |= PATH_RAY_BSSRDF_ANCESTOR;
279                         }
280                 }
281 #endif
282
283 #ifdef __EMISSION__
284                 if(kernel_data.integrator.use_direct_light) {
285                         /* sample illumination from lights to find path contribution */
286                         if(sd.flag & SD_BSDF_HAS_EVAL) {
287                                 float light_t = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT);
288 #ifdef __MULTI_CLOSURE__
289                                 float light_o = 0.0f;
290 #else
291                                 float light_o = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_F);
292 #endif
293                                 float light_u, light_v;
294                                 path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
295
296                                 Ray light_ray;
297                                 BsdfEval L_light;
298                                 bool is_lamp;
299
300 #ifdef __OBJECT_MOTION__
301                                 light_ray.time = sd.time;
302 #endif
303
304                                 /* sample random light */
305                                 if(direct_emission(kg, &sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce)) {
306                                         /* trace shadow ray */
307                                         float3 shadow;
308
309                                         if(!shadow_blocked(kg, &state, &light_ray, &shadow)) {
310                                                 /* accumulate */
311                                                 path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state.bounce, is_lamp);
312                                         }
313                                 }
314                         }
315                 }
316 #endif
317
318                 /* no BSDF? we can stop here */
319                 if(!(sd.flag & SD_BSDF))
320                         break;
321
322                 /* sample BSDF */
323                 float bsdf_pdf;
324                 BsdfEval bsdf_eval;
325                 float3 bsdf_omega_in;
326                 differential3 bsdf_domega_in;
327                 float bsdf_u, bsdf_v;
328                 path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
329                 int label;
330
331                 label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval,
332                         &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
333
334                 if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
335                         break;
336
337                 /* modify throughput */
338                 path_radiance_bsdf_bounce(L, &throughput, &bsdf_eval, bsdf_pdf, state.bounce, label);
339
340                 /* set labels */
341                 if(!(label & LABEL_TRANSPARENT)) {
342                         ray_pdf = bsdf_pdf;
343 #ifdef __LAMP_MIS__
344                         ray_t = 0.0f;
345 #endif
346                         min_ray_pdf = fminf(bsdf_pdf, min_ray_pdf);
347                 }
348
349                 /* update path state */
350                 path_state_next(kg, &state, label);
351
352                 /* setup ray */
353                 ray.P = ray_offset(sd.P, (label & LABEL_TRANSMIT)? -sd.Ng: sd.Ng);
354                 ray.D = bsdf_omega_in;
355                 ray.t = FLT_MAX;
356 #ifdef __RAY_DIFFERENTIALS__
357                 ray.dP = sd.dP;
358                 ray.dD = bsdf_domega_in;
359 #endif
360         }
361 }
362
363 #endif
364
365 #ifdef __SUBSURFACE__
366
367 __device_inline bool kernel_path_integrate_lighting(KernelGlobals *kg, RNG *rng,
368         int sample, int num_samples,
369         ShaderData *sd, float3 *throughput,
370         float *min_ray_pdf, float *ray_pdf, PathState *state,
371         int rng_offset, PathRadiance *L, Ray *ray, float *ray_t)
372 {
373 #ifdef __EMISSION__
374         if(kernel_data.integrator.use_direct_light) {
375                 /* sample illumination from lights to find path contribution */
376                 if(sd->flag & SD_BSDF_HAS_EVAL) {
377                         float light_t = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT);
378 #ifdef __MULTI_CLOSURE__
379                         float light_o = 0.0f;
380 #else
381                         float light_o = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_F);
382 #endif
383                         float light_u, light_v;
384                         path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
385
386                         Ray light_ray;
387                         BsdfEval L_light;
388                         bool is_lamp;
389
390 #ifdef __OBJECT_MOTION__
391                         light_ray.time = sd->time;
392 #endif
393
394                         if(direct_emission(kg, sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce)) {
395                                 /* trace shadow ray */
396                                 float3 shadow;
397
398                                 if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
399                                         /* accumulate */
400                                         path_radiance_accum_light(L, *throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
401                                 }
402                         }
403                 }
404         }
405 #endif
406
407         /* no BSDF? we can stop here */
408         if(!(sd->flag & SD_BSDF))
409                 return false;
410
411         /* sample BSDF */
412         float bsdf_pdf;
413         BsdfEval bsdf_eval;
414         float3 bsdf_omega_in;
415         differential3 bsdf_domega_in;
416         float bsdf_u, bsdf_v;
417         path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
418         int label;
419
420         label = shader_bsdf_sample(kg, sd, bsdf_u, bsdf_v, &bsdf_eval,
421                 &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
422
423         if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
424                 return false;
425
426         /* modify throughput */
427         path_radiance_bsdf_bounce(L, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label);
428
429         /* set labels */
430         if(!(label & LABEL_TRANSPARENT)) {
431                 *ray_pdf = bsdf_pdf;
432 #ifdef __LAMP_MIS__
433                 *ray_t = 0.0f;
434 #endif
435                 *min_ray_pdf = fminf(bsdf_pdf, *min_ray_pdf);
436         }
437
438         /* update path state */
439         path_state_next(kg, state, label);
440
441         /* setup ray */
442         ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng);
443         ray->D = bsdf_omega_in;
444
445         if(state->bounce == 0)
446                 ray->t -= sd->ray_length; /* clipping works through transparent */
447         else
448                 ray->t = FLT_MAX;
449
450 #ifdef __RAY_DIFFERENTIALS__
451         ray->dP = sd->dP;
452         ray->dD = bsdf_domega_in;
453 #endif
454         
455         return true;
456 }
457
458 #endif
459
460 __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer)
461 {
462         /* initialize */
463         PathRadiance L;
464         float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
465         float L_transparent = 0.0f;
466
467         path_radiance_init(&L, kernel_data.film.use_light_pass);
468
469         float min_ray_pdf = FLT_MAX;
470         float ray_pdf = 0.0f;
471         float ray_t = 0.0f;
472         PathState state;
473         int rng_offset = PRNG_BASE_NUM;
474 #ifdef __CMJ__
475         int num_samples = kernel_data.integrator.aa_samples;
476 #else
477         int num_samples = 0;
478 #endif
479
480         path_state_init(&state);
481
482         /* path iteration */
483         for(;; rng_offset += PRNG_BOUNCE_NUM) {
484                 /* intersect scene */
485                 Intersection isect;
486                 uint visibility = path_state_ray_visibility(kg, &state);
487
488 #ifdef __HAIR__
489                 float difl = 0.0f, extmax = 0.0f;
490                 uint lcg_state = 0;
491
492                 if(kernel_data.bvh.have_curves) {
493                         if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {       
494                                 float3 pixdiff = ray.dD.dx + ray.dD.dy;
495                                 /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
496                                 difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
497                         }
498
499                         extmax = kernel_data.curve.maximum_width;
500                         lcg_state = lcg_init(*rng + rng_offset + sample*0x51633e2d);
501                 }
502
503                 bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax);
504 #else
505                 bool hit = scene_intersect(kg, &ray, visibility, &isect);
506 #endif
507
508 #ifdef __LAMP_MIS__
509                 if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
510                         /* ray starting from previous non-transparent bounce */
511                         Ray light_ray;
512
513                         light_ray.P = ray.P - ray_t*ray.D;
514                         ray_t += isect.t;
515                         light_ray.D = ray.D;
516                         light_ray.t = ray_t;
517                         light_ray.time = ray.time;
518                         light_ray.dD = ray.dD;
519                         light_ray.dP = ray.dP;
520
521                         /* intersect with lamp */
522                         float light_t = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT);
523                         float3 emission;
524
525                         if(indirect_lamp_emission(kg, &light_ray, state.flag, ray_pdf, light_t, &emission, state.bounce))
526                                 path_radiance_accum_emission(&L, throughput, emission, state.bounce);
527                 }
528 #endif
529
530                 if(!hit) {
531                         /* eval background shader if nothing hit */
532                         if(kernel_data.background.transparent && (state.flag & PATH_RAY_CAMERA)) {
533                                 L_transparent += average(throughput);
534
535 #ifdef __PASSES__
536                                 if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
537 #endif
538                                         break;
539                         }
540
541 #ifdef __BACKGROUND__
542                         /* sample background shader */
543                         float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf, state.bounce);
544                         path_radiance_accum_background(&L, throughput, L_background, state.bounce);
545 #endif
546
547                         break;
548                 }
549
550                 /* setup shading */
551                 ShaderData sd;
552                 shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce);
553                 float rbsdf = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF);
554                 shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
555
556                 /* holdout */
557 #ifdef __HOLDOUT__
558                 if((sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) && (state.flag & PATH_RAY_CAMERA)) {
559                         if(kernel_data.background.transparent) {
560                                 float3 holdout_weight;
561                                 
562                                 if(sd.flag & SD_HOLDOUT_MASK)
563                                         holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
564                                 else
565                                         holdout_weight = shader_holdout_eval(kg, &sd);
566
567                                 /* any throughput is ok, should all be identical here */
568                                 L_transparent += average(holdout_weight*throughput);
569                         }
570
571                         if(sd.flag & SD_HOLDOUT_MASK)
572                                 break;
573                 }
574 #endif
575
576                 /* holdout mask objects do not write data passes */
577                 kernel_write_data_passes(kg, buffer, &L, &sd, sample, state.flag, throughput);
578
579                 /* blurring of bsdf after bounces, for rays that have a small likelihood
580                  * of following this particular path (diffuse, rough glossy) */
581                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
582                         float blur_pdf = kernel_data.integrator.filter_glossy*min_ray_pdf;
583
584                         if(blur_pdf < 1.0f) {
585                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
586                                 shader_bsdf_blur(kg, &sd, blur_roughness);
587                         }
588                 }
589
590 #ifdef __EMISSION__
591                 /* emission */
592                 if(sd.flag & SD_EMISSION) {
593                         /* todo: is isect.t wrong here for transparent surfaces? */
594                         float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, ray_pdf);
595                         path_radiance_accum_emission(&L, throughput, emission, state.bounce);
596                 }
597 #endif
598
599                 /* path termination. this is a strange place to put the termination, it's
600                  * mainly due to the mixed in MIS that we use. gives too many unneeded
601                  * shader evaluations, only need emission if we are going to terminate */
602                 float probability = path_state_terminate_probability(kg, &state, throughput);
603
604                 if(probability == 0.0f) {
605                         break;
606                 }
607                 else if(probability != 1.0f) {
608                         float terminate = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_TERMINATE);
609
610                         if(terminate >= probability)
611                                 break;
612
613                         throughput /= probability;
614                 }
615
616 #ifdef __AO__
617                 /* ambient occlusion */
618                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
619                         /* todo: solve correlation */
620                         float bsdf_u, bsdf_v;
621                         path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
622
623                         float ao_factor = kernel_data.background.ao_factor;
624                         float3 ao_N;
625                         float3 ao_bsdf = shader_bsdf_ao(kg, &sd, ao_factor, &ao_N);
626                         float3 ao_D;
627                         float ao_pdf;
628                         float3 ao_alpha = shader_bsdf_alpha(kg, &sd);
629
630                         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
631
632                         if(dot(sd.Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
633                                 Ray light_ray;
634                                 float3 ao_shadow;
635
636                                 light_ray.P = ray_offset(sd.P, sd.Ng);
637                                 light_ray.D = ao_D;
638                                 light_ray.t = kernel_data.background.ao_distance;
639 #ifdef __OBJECT_MOTION__
640                                 light_ray.time = sd.time;
641 #endif
642                                 light_ray.dP = sd.dP;
643                                 light_ray.dD = differential3_zero();
644
645                                 if(!shadow_blocked(kg, &state, &light_ray, &ao_shadow))
646                                         path_radiance_accum_ao(&L, throughput, ao_alpha, ao_bsdf, ao_shadow, state.bounce);
647                         }
648                 }
649 #endif
650
651 #ifdef __SUBSURFACE__
652                 /* bssrdf scatter to a different location on the same object, replacing
653                  * the closures with a diffuse BSDF */
654                 if(sd.flag & SD_BSSRDF) {
655                         float bssrdf_probability;
656                         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, &sd, &bssrdf_probability);
657
658                         /* modify throughput for picking bssrdf or bsdf */
659                         throughput *= bssrdf_probability;
660
661                         /* do bssrdf scatter step if we picked a bssrdf closure */
662                         if(sc) {
663                                 uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
664
665                                 if(old_subsurface_scatter_use(&sd)) {
666                                         old_subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
667                                 }
668                                 else {
669                                         ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
670                                         float bssrdf_u, bssrdf_v;
671                                         path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
672                                         int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
673
674                                         /* compute lighting with the BSDF closure */
675                                         for(int hit = 0; hit < num_hits; hit++) {
676                                                 float3 tp = throughput;
677                                                 PathState hit_state = state;
678                                                 Ray hit_ray = ray;
679                                                 float hit_ray_t = ray_t;
680                                                 float hit_ray_pdf = ray_pdf;
681                                                 float hit_min_ray_pdf = min_ray_pdf;
682
683                                                 hit_state.flag |= PATH_RAY_BSSRDF_ANCESTOR;
684                                                 
685                                                 if(kernel_path_integrate_lighting(kg, rng, sample, num_samples, &bssrdf_sd[hit],
686                                                         &tp, &hit_min_ray_pdf, &hit_ray_pdf, &hit_state, rng_offset+PRNG_BOUNCE_NUM, &L, &hit_ray, &hit_ray_t)) {
687                                                         kernel_path_indirect(kg, rng, sample, hit_ray, buffer,
688                                                                 tp, num_samples, num_samples,
689                                                                 hit_min_ray_pdf, hit_ray_pdf, hit_state, rng_offset+PRNG_BOUNCE_NUM*2, &L);
690
691                                                         /* for render passes, sum and reset indirect light pass variables
692                                                          * for the next samples */
693                                                         path_radiance_sum_indirect(&L);
694                                                         path_radiance_reset_indirect(&L);
695                                                 }
696                                         }
697
698                                         break;
699                                 }
700                         }
701                 }
702 #endif
703                 
704                 /* The following code is the same as in kernel_path_integrate_lighting(),
705                    but for CUDA the function call is slower. */
706 #ifdef __EMISSION__
707                 if(kernel_data.integrator.use_direct_light) {
708                         /* sample illumination from lights to find path contribution */
709                         if(sd.flag & SD_BSDF_HAS_EVAL) {
710                                 float light_t = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT);
711 #ifdef __MULTI_CLOSURE__
712                                 float light_o = 0.0f;
713 #else
714                                 float light_o = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_F);
715 #endif
716                                 float light_u, light_v;
717                                 path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
718
719                                 Ray light_ray;
720                                 BsdfEval L_light;
721                                 bool is_lamp;
722
723 #ifdef __OBJECT_MOTION__
724                                 light_ray.time = sd.time;
725 #endif
726
727                                 if(direct_emission(kg, &sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce)) {
728                                         /* trace shadow ray */
729                                         float3 shadow;
730
731                                         if(!shadow_blocked(kg, &state, &light_ray, &shadow)) {
732                                                 /* accumulate */
733                                                 path_radiance_accum_light(&L, throughput, &L_light, shadow, 1.0f, state.bounce, is_lamp);
734                                         }
735                                 }
736                         }
737                 }
738 #endif
739
740                 /* no BSDF? we can stop here */
741                 if(!(sd.flag & SD_BSDF))
742                         break;
743
744                 /* sample BSDF */
745                 float bsdf_pdf;
746                 BsdfEval bsdf_eval;
747                 float3 bsdf_omega_in;
748                 differential3 bsdf_domega_in;
749                 float bsdf_u, bsdf_v;
750                 path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
751                 int label;
752
753                 label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval,
754                         &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
755
756                 if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
757                         break;
758
759                 /* modify throughput */
760                 path_radiance_bsdf_bounce(&L, &throughput, &bsdf_eval, bsdf_pdf, state.bounce, label);
761
762                 /* set labels */
763                 if(!(label & LABEL_TRANSPARENT)) {
764                         ray_pdf = bsdf_pdf;
765 #ifdef __LAMP_MIS__
766                         ray_t = 0.0f;
767 #endif
768                         min_ray_pdf = fminf(bsdf_pdf, min_ray_pdf);
769                 }
770
771                 /* update path state */
772                 path_state_next(kg, &state, label);
773
774                 /* setup ray */
775                 ray.P = ray_offset(sd.P, (label & LABEL_TRANSMIT)? -sd.Ng: sd.Ng);
776                 ray.D = bsdf_omega_in;
777
778                 if(state.bounce == 0)
779                         ray.t -= sd.ray_length; /* clipping works through transparent */
780                 else
781                         ray.t = FLT_MAX;
782
783 #ifdef __RAY_DIFFERENTIALS__
784                 ray.dP = sd.dP;
785                 ray.dD = bsdf_domega_in;
786 #endif
787         }
788
789         float3 L_sum = path_radiance_sum(kg, &L);
790
791 #ifdef __CLAMP_SAMPLE__
792         path_radiance_clamp(&L, &L_sum, kernel_data.integrator.sample_clamp);
793 #endif
794
795         kernel_write_light_passes(kg, buffer, &L, sample);
796
797         return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
798 }
799
800 #ifdef __BRANCHED_PATH__
801
802 __device_noinline void kernel_branched_path_integrate_lighting(KernelGlobals *kg, RNG *rng,
803         int sample, int aa_samples,
804         ShaderData *sd, float3 throughput, float num_samples_adjust,
805         float min_ray_pdf, float ray_pdf, PathState state,
806         int rng_offset, PathRadiance *L, __global float *buffer)
807 {
808 #ifdef __EMISSION__
809         /* sample illumination from lights to find path contribution */
810         if(sd->flag & SD_BSDF_HAS_EVAL) {
811                 Ray light_ray;
812                 BsdfEval L_light;
813                 bool is_lamp;
814
815 #ifdef __OBJECT_MOTION__
816                 light_ray.time = sd->time;
817 #endif
818
819                 /* lamp sampling */
820                 for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
821                         int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
822                         float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
823                         RNG lamp_rng = cmj_hash(*rng, i);
824
825                         if(kernel_data.integrator.pdf_triangles != 0.0f)
826                                 num_samples_inv *= 0.5f;
827
828                         for(int j = 0; j < num_samples; j++) {
829                                 float light_u, light_v;
830                                 path_rng_2D(kg, &lamp_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
831
832                                 if(direct_emission(kg, sd, i, 0.0f, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce)) {
833                                         /* trace shadow ray */
834                                         float3 shadow;
835
836                                         if(!shadow_blocked(kg, &state, &light_ray, &shadow)) {
837                                                 /* accumulate */
838                                                 path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state.bounce, is_lamp);
839                                         }
840                                 }
841                         }
842                 }
843
844                 /* mesh light sampling */
845                 if(kernel_data.integrator.pdf_triangles != 0.0f) {
846                         int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
847                         float num_samples_inv = num_samples_adjust/num_samples;
848
849                         if(kernel_data.integrator.num_all_lights)
850                                 num_samples_inv *= 0.5f;
851
852                         for(int j = 0; j < num_samples; j++) {
853                                 float light_t = path_rng_1D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT);
854                                 float light_u, light_v;
855                                 path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
856
857                                 /* only sample triangle lights */
858                                 if(kernel_data.integrator.num_all_lights)
859                                         light_t = 0.5f*light_t;
860
861                                 if(direct_emission(kg, sd, -1, light_t, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce)) {
862                                         /* trace shadow ray */
863                                         float3 shadow;
864
865                                         if(!shadow_blocked(kg, &state, &light_ray, &shadow)) {
866                                                 /* accumulate */
867                                                 path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state.bounce, is_lamp);
868                                         }
869                                 }
870                         }
871                 }
872         }
873 #endif
874
875         for(int i = 0; i< sd->num_closure; i++) {
876                 const ShaderClosure *sc = &sd->closure[i];
877
878                 if(!CLOSURE_IS_BSDF(sc->type))
879                         continue;
880                 /* transparency is not handled here, but in outer loop */
881                 if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID)
882                         continue;
883
884                 int num_samples;
885
886                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
887                         num_samples = kernel_data.integrator.diffuse_samples;
888                 else if(CLOSURE_IS_BSDF_BSSRDF(sc->type))
889                         num_samples = 1;
890                 else if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
891                         num_samples = kernel_data.integrator.glossy_samples;
892                 else
893                         num_samples = kernel_data.integrator.transmission_samples;
894
895                 num_samples = ceil_to_int(num_samples_adjust*num_samples);
896
897                 float num_samples_inv = num_samples_adjust/num_samples;
898                 RNG bsdf_rng = cmj_hash(*rng, i);
899
900                 for(int j = 0; j < num_samples; j++) {
901                         /* sample BSDF */
902                         float bsdf_pdf;
903                         BsdfEval bsdf_eval;
904                         float3 bsdf_omega_in;
905                         differential3 bsdf_domega_in;
906                         float bsdf_u, bsdf_v;
907                         path_rng_2D(kg, &bsdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
908                         int label;
909
910                         label = shader_bsdf_sample_closure(kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval,
911                                 &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
912
913                         if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
914                                 continue;
915
916                         /* modify throughput */
917                         float3 tp = throughput;
918                         path_radiance_bsdf_bounce(L, &tp, &bsdf_eval, bsdf_pdf, state.bounce, label);
919
920                         /* set labels */
921                         float min_ray_pdf = fminf(bsdf_pdf, FLT_MAX);
922
923                         /* modify path state */
924                         PathState ps = state;
925                         path_state_next(kg, &ps, label);
926
927                         /* setup ray */
928                         Ray bsdf_ray;
929
930                         bsdf_ray.P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng);
931                         bsdf_ray.D = bsdf_omega_in;
932                         bsdf_ray.t = FLT_MAX;
933 #ifdef __RAY_DIFFERENTIALS__
934                         bsdf_ray.dP = sd->dP;
935                         bsdf_ray.dD = bsdf_domega_in;
936 #endif
937 #ifdef __OBJECT_MOTION__
938                         bsdf_ray.time = sd->time;
939 #endif
940
941                         kernel_path_indirect(kg, rng, sample*num_samples + j, bsdf_ray, buffer,
942                                 tp*num_samples_inv, num_samples, aa_samples*num_samples,
943                                 min_ray_pdf, bsdf_pdf, ps, rng_offset+PRNG_BOUNCE_NUM, L);
944
945                         /* for render passes, sum and reset indirect light pass variables
946                          * for the next samples */
947                         path_radiance_sum_indirect(L);
948                         path_radiance_reset_indirect(L);
949                 }
950         }
951 }
952
953 __device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer)
954 {
955         /* initialize */
956         PathRadiance L;
957         float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
958         float L_transparent = 0.0f;
959
960         path_radiance_init(&L, kernel_data.film.use_light_pass);
961
962         float ray_pdf = 0.0f;
963         PathState state;
964         int rng_offset = PRNG_BASE_NUM;
965 #ifdef __CMJ__
966         int aa_samples = kernel_data.integrator.aa_samples;
967 #else
968         int aa_samples = 0;
969 #endif
970
971         path_state_init(&state);
972
973         for(;; rng_offset += PRNG_BOUNCE_NUM) {
974                 /* intersect scene */
975                 Intersection isect;
976                 uint visibility = path_state_ray_visibility(kg, &state);
977
978 #ifdef __HAIR__
979                 float difl = 0.0f, extmax = 0.0f;
980                 uint lcg_state = 0;
981
982                 if(kernel_data.bvh.have_curves) {
983                         if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {       
984                                 float3 pixdiff = ray.dD.dx + ray.dD.dy;
985                                 /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
986                                 difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
987                         }
988
989                         extmax = kernel_data.curve.maximum_width;
990                         lcg_state = lcg_init(*rng + rng_offset + sample*0x51633e2d);
991                 }
992
993                 if(!scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax)) {
994 #else
995                 if(!scene_intersect(kg, &ray, visibility, &isect)) {
996 #endif
997                         /* eval background shader if nothing hit */
998                         if(kernel_data.background.transparent) {
999                                 L_transparent += average(throughput);
1000
1001 #ifdef __PASSES__
1002                                 if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
1003 #endif
1004                                         break;
1005                         }
1006
1007 #ifdef __BACKGROUND__
1008                         /* sample background shader */
1009                         float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf, state.bounce);
1010                         path_radiance_accum_background(&L, throughput, L_background, state.bounce);
1011 #endif
1012
1013                         break;
1014                 }
1015
1016                 /* setup shading */
1017                 ShaderData sd;
1018                 shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce);
1019                 shader_eval_surface(kg, &sd, 0.0f, state.flag, SHADER_CONTEXT_MAIN);
1020                 shader_merge_closures(kg, &sd);
1021
1022                 /* holdout */
1023 #ifdef __HOLDOUT__
1024                 if((sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK))) {
1025                         if(kernel_data.background.transparent) {
1026                                 float3 holdout_weight;
1027                                 
1028                                 if(sd.flag & SD_HOLDOUT_MASK)
1029                                         holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
1030                                 else
1031                                         holdout_weight = shader_holdout_eval(kg, &sd);
1032
1033                                 /* any throughput is ok, should all be identical here */
1034                                 L_transparent += average(holdout_weight*throughput);
1035                         }
1036
1037                         if(sd.flag & SD_HOLDOUT_MASK)
1038                                 break;
1039                 }
1040 #endif
1041
1042                 /* holdout mask objects do not write data passes */
1043                 kernel_write_data_passes(kg, buffer, &L, &sd, sample, state.flag, throughput);
1044
1045 #ifdef __EMISSION__
1046                 /* emission */
1047                 if(sd.flag & SD_EMISSION) {
1048                         float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, ray_pdf);
1049                         path_radiance_accum_emission(&L, throughput, emission, state.bounce);
1050                 }
1051 #endif
1052
1053                 /* transparency termination */
1054                 if(state.flag & PATH_RAY_TRANSPARENT) {
1055                         /* path termination. this is a strange place to put the termination, it's
1056                          * mainly due to the mixed in MIS that we use. gives too many unneeded
1057                          * shader evaluations, only need emission if we are going to terminate */
1058                         float probability = path_state_terminate_probability(kg, &state, throughput);
1059
1060                         if(probability == 0.0f) {
1061                                 break;
1062                         }
1063                         else if(probability != 1.0f) {
1064                                 float terminate = path_rng_1D(kg, rng, sample, aa_samples, rng_offset + PRNG_TERMINATE);
1065
1066                                 if(terminate >= probability)
1067                                         break;
1068
1069                                 throughput /= probability;
1070                         }
1071                 }
1072
1073 #ifdef __AO__
1074                 /* ambient occlusion */
1075                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
1076                         int num_samples = kernel_data.integrator.ao_samples;
1077                         float num_samples_inv = 1.0f/num_samples;
1078                         float ao_factor = kernel_data.background.ao_factor;
1079                         float3 ao_N;
1080                         float3 ao_bsdf = shader_bsdf_ao(kg, &sd, ao_factor, &ao_N);
1081                         float3 ao_alpha = shader_bsdf_alpha(kg, &sd);
1082
1083                         for(int j = 0; j < num_samples; j++) {
1084                                 float bsdf_u, bsdf_v;
1085                                 path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
1086
1087                                 float3 ao_D;
1088                                 float ao_pdf;
1089
1090                                 sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
1091
1092                                 if(dot(sd.Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
1093                                         Ray light_ray;
1094                                         float3 ao_shadow;
1095
1096                                         light_ray.P = ray_offset(sd.P, sd.Ng);
1097                                         light_ray.D = ao_D;
1098                                         light_ray.t = kernel_data.background.ao_distance;
1099 #ifdef __OBJECT_MOTION__
1100                                         light_ray.time = sd.time;
1101 #endif
1102                                         light_ray.dP = sd.dP;
1103                                         light_ray.dD = differential3_zero();
1104
1105                                         if(!shadow_blocked(kg, &state, &light_ray, &ao_shadow))
1106                                                 path_radiance_accum_ao(&L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state.bounce);
1107                                 }
1108                         }
1109                 }
1110 #endif
1111
1112 #ifdef __SUBSURFACE__
1113                 /* bssrdf scatter to a different location on the same object */
1114                 if(sd.flag & SD_BSSRDF) {
1115                         for(int i = 0; i< sd.num_closure; i++) {
1116                                 ShaderClosure *sc = &sd.closure[i];
1117
1118                                 if(!CLOSURE_IS_BSSRDF(sc->type))
1119                                         continue;
1120
1121                                 /* set up random number generator */
1122                                 uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
1123                                 int num_samples = kernel_data.integrator.subsurface_samples;
1124                                 float num_samples_inv = 1.0f/num_samples;
1125                                 RNG bssrdf_rng = cmj_hash(*rng, i);
1126
1127                                 state.flag |= PATH_RAY_BSSRDF_ANCESTOR;
1128
1129                                 /* do subsurface scatter step with copy of shader data, this will
1130                                  * replace the BSSRDF with a diffuse BSDF closure */
1131                                 for(int j = 0; j < num_samples; j++) {
1132                                         if(old_subsurface_scatter_use(&sd)) {
1133                                                 ShaderData bssrdf_sd = sd;
1134                                                 old_subsurface_scatter_step(kg, &bssrdf_sd, state.flag, sc, &lcg_state, true);
1135
1136                                                 /* compute lighting with the BSDF closure */
1137                                                 kernel_branched_path_integrate_lighting(kg, rng, sample*num_samples + j,
1138                                                         aa_samples*num_samples,
1139                                                         &bssrdf_sd, throughput, num_samples_inv,
1140                                                         ray_pdf, ray_pdf, state, rng_offset, &L, buffer);
1141                                         }
1142                                         else {
1143                                                 ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
1144                                                 float bssrdf_u, bssrdf_v;
1145                                                 path_rng_2D(kg, &bssrdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
1146                                                 int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
1147
1148                                                 /* compute lighting with the BSDF closure */
1149                                                 for(int hit = 0; hit < num_hits; hit++)
1150                                                         kernel_branched_path_integrate_lighting(kg, rng, sample*num_samples + j,
1151                                                                 aa_samples*num_samples,
1152                                                                 &bssrdf_sd[hit], throughput, num_samples_inv,
1153                                                                 ray_pdf, ray_pdf, state, rng_offset+PRNG_BOUNCE_NUM, &L, buffer);
1154                                         }
1155                                 }
1156
1157                                 state.flag &= ~PATH_RAY_BSSRDF_ANCESTOR;
1158                         }
1159                 }
1160 #endif
1161
1162                 /* lighting */
1163                 kernel_branched_path_integrate_lighting(kg, rng, sample, aa_samples,
1164                         &sd, throughput, 1.0f, ray_pdf, ray_pdf, state, rng_offset, &L, buffer);
1165
1166                 /* continue in case of transparency */
1167                 throughput *= shader_bsdf_transparency(kg, &sd);
1168
1169                 if(is_zero(throughput))
1170                         break;
1171
1172                 path_state_next(kg, &state, LABEL_TRANSPARENT);
1173                 ray.P = ray_offset(sd.P, -sd.Ng);
1174                 ray.t -= sd.ray_length; /* clipping works through transparent */
1175         }
1176
1177         float3 L_sum = path_radiance_sum(kg, &L);
1178
1179 #ifdef __CLAMP_SAMPLE__
1180         path_radiance_clamp(&L, &L_sum, kernel_data.integrator.sample_clamp);
1181 #endif
1182
1183         kernel_write_light_passes(kg, buffer, &L, sample);
1184
1185         return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
1186 }
1187
1188 #endif
1189
1190 __device_inline void kernel_path_trace_setup(KernelGlobals *kg, __global uint *rng_state, int sample, int x, int y, RNG *rng, Ray *ray)
1191 {
1192         float filter_u;
1193         float filter_v;
1194 #ifdef __CMJ__
1195         int num_samples = kernel_data.integrator.aa_samples;
1196 #else
1197         int num_samples = 0;
1198 #endif
1199
1200         path_rng_init(kg, rng_state, sample, num_samples, rng, x, y, &filter_u, &filter_v);
1201
1202         /* sample camera ray */
1203
1204         float lens_u = 0.0f, lens_v = 0.0f;
1205
1206         if(kernel_data.cam.aperturesize > 0.0f)
1207                 path_rng_2D(kg, rng, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v);
1208
1209         float time = 0.0f;
1210
1211 #ifdef __CAMERA_MOTION__
1212         if(kernel_data.cam.shuttertime != -1.0f)
1213                 time = path_rng_1D(kg, rng, sample, num_samples, PRNG_TIME);
1214 #endif
1215
1216         camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
1217 }
1218
1219 __device void kernel_path_trace(KernelGlobals *kg,
1220         __global float *buffer, __global uint *rng_state,
1221         int sample, int x, int y, int offset, int stride)
1222 {
1223         /* buffer offset */
1224         int index = offset + x + y*stride;
1225         int pass_stride = kernel_data.film.pass_stride;
1226
1227         rng_state += index;
1228         buffer += index*pass_stride;
1229
1230         /* initialize random numbers and ray */
1231         RNG rng;
1232         Ray ray;
1233
1234         kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
1235
1236         /* integrate */
1237         float4 L;
1238
1239         if (ray.t != 0.0f)
1240                 L = kernel_path_integrate(kg, &rng, sample, ray, buffer);
1241         else
1242                 L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
1243
1244         /* accumulate result in output buffer */
1245         kernel_write_pass_float4(buffer, sample, L);
1246
1247         path_rng_end(kg, rng_state, rng);
1248 }
1249
1250 #ifdef __BRANCHED_PATH__
1251 __device void kernel_branched_path_trace(KernelGlobals *kg,
1252         __global float *buffer, __global uint *rng_state,
1253         int sample, int x, int y, int offset, int stride)
1254 {
1255         /* buffer offset */
1256         int index = offset + x + y*stride;
1257         int pass_stride = kernel_data.film.pass_stride;
1258
1259         rng_state += index;
1260         buffer += index*pass_stride;
1261
1262         /* initialize random numbers and ray */
1263         RNG rng;
1264         Ray ray;
1265
1266         kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
1267
1268         /* integrate */
1269         float4 L;
1270
1271         if (ray.t != 0.0f)
1272                 L = kernel_branched_path_integrate(kg, &rng, sample, ray, buffer);
1273         else
1274                 L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
1275
1276         /* accumulate result in output buffer */
1277         kernel_write_pass_float4(buffer, sample, L);
1278
1279         path_rng_end(kg, rng_state, rng);
1280 }
1281 #endif
1282
1283 CCL_NAMESPACE_END
1284