Cycles: change __device and similar qualifiers to ccl_device in kernel code.
[blender.git] / intern / cycles / kernel / kernel_path.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License
15  */
16
17 #ifdef __OSL__
18 #include "osl_shader.h"
19 #endif
20
21 #include "kernel_differential.h"
22 #include "kernel_montecarlo.h"
23 #include "kernel_projection.h"
24 #include "kernel_object.h"
25 #include "kernel_triangle.h"
26 #include "kernel_curve.h"
27 #include "kernel_primitive.h"
28 #include "kernel_projection.h"
29 #include "kernel_random.h"
30 #include "kernel_bvh.h"
31 #include "kernel_accumulate.h"
32 #include "kernel_camera.h"
33 #include "kernel_shader.h"
34 #include "kernel_light.h"
35 #include "kernel_emission.h"
36 #include "kernel_passes.h"
37 #include "kernel_path_state.h"
38
39 #ifdef __SUBSURFACE__
40 #include "kernel_subsurface.h"
41 #endif
42
43 CCL_NAMESPACE_BEGIN
44
45 ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow)
46 {
47         *shadow = make_float3(1.0f, 1.0f, 1.0f);
48
49         if(ray->t == 0.0f)
50                 return false;
51         
52         Intersection isect;
53 #ifdef __HAIR__
54         bool result = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f);
55 #else
56         bool result = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect);
57 #endif
58
59 #ifdef __TRANSPARENT_SHADOWS__
60         if(result && kernel_data.integrator.transparent_shadows) {
61                 /* transparent shadows work in such a way to try to minimize overhead
62                  * in cases where we don't need them. after a regular shadow ray is
63                  * cast we check if the hit primitive was potentially transparent, and
64                  * only in that case start marching. this gives on extra ray cast for
65                  * the cases were we do want transparency.
66                  *
67                  * also note that for this to work correct, multi close sampling must
68                  * be used, since we don't pass a random number to shader_eval_surface */
69                 if(shader_transparent_shadow(kg, &isect)) {
70                         float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
71                         float3 Pend = ray->P + ray->D*ray->t;
72                         int bounce = state->transparent_bounce;
73
74                         for(;;) {
75                                 if(bounce >= kernel_data.integrator.transparent_max_bounce) {
76                                         return true;
77                                 }
78                                 else if(bounce >= kernel_data.integrator.transparent_min_bounce) {
79                                         /* todo: get random number somewhere for probabilistic terminate */
80 #if 0
81                                         float probability = average(throughput);
82                                         float terminate = 0.0f;
83
84                                         if(terminate >= probability)
85                                                 return true;
86
87                                         throughput /= probability;
88 #endif
89                                 }
90
91 #ifdef __HAIR__
92                                 if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, &isect, NULL, 0.0f, 0.0f)) {
93 #else
94                                 if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, &isect)) {
95 #endif
96                                         *shadow *= throughput;
97                                         return false;
98                                 }
99
100                                 if(!shader_transparent_shadow(kg, &isect))
101                                         return true;
102
103                                 ShaderData sd;
104                                 shader_setup_from_ray(kg, &sd, &isect, ray, state->bounce+1);
105                                 shader_eval_surface(kg, &sd, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
106
107                                 throughput *= shader_bsdf_transparency(kg, &sd);
108
109                                 ray->P = ray_offset(sd.P, -sd.Ng);
110                                 if(ray->t != FLT_MAX)
111                                         ray->D = normalize_len(Pend - ray->P, &ray->t);
112
113                                 bounce++;
114                         }
115                 }
116         }
117 #endif
118
119         return result;
120 }
121
122
123 #if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__)
124
125 ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer,
126         float3 throughput, int num_samples, int num_total_samples,
127         float min_ray_pdf, float ray_pdf, PathState state, int rng_offset, PathRadiance *L)
128 {
129 #ifdef __LAMP_MIS__
130         float ray_t = 0.0f;
131 #endif
132
133         /* path iteration */
134         for(;; rng_offset += PRNG_BOUNCE_NUM) {
135                 /* intersect scene */
136                 Intersection isect;
137                 uint visibility = path_state_ray_visibility(kg, &state);
138 #ifdef __HAIR__
139                 bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f);
140 #else
141                 bool hit = scene_intersect(kg, &ray, visibility, &isect);
142 #endif
143
144 #ifdef __LAMP_MIS__
145                 if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
146                         /* ray starting from previous non-transparent bounce */
147                         Ray light_ray;
148
149                         light_ray.P = ray.P - ray_t*ray.D;
150                         ray_t += isect.t;
151                         light_ray.D = ray.D;
152                         light_ray.t = ray_t;
153                         light_ray.time = ray.time;
154                         light_ray.dD = ray.dD;
155                         light_ray.dP = ray.dP;
156
157                         /* intersect with lamp */
158                         float light_t = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT);
159                         float3 emission;
160
161                         if(indirect_lamp_emission(kg, &light_ray, state.flag, ray_pdf, light_t, &emission, state.bounce))
162                                 path_radiance_accum_emission(L, throughput, emission, state.bounce);
163                 }
164 #endif
165
166                 if(!hit) {
167 #ifdef __BACKGROUND__
168                         /* sample background shader */
169                         float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf, state.bounce);
170                         path_radiance_accum_background(L, throughput, L_background, state.bounce);
171 #endif
172
173                         break;
174                 }
175
176                 /* setup shading */
177                 ShaderData sd;
178                 shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce);
179                 float rbsdf = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF);
180                 shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_INDIRECT);
181 #ifdef __BRANCHED_PATH__
182                 shader_merge_closures(kg, &sd);
183 #endif
184
185                 /* blurring of bsdf after bounces, for rays that have a small likelihood
186                  * of following this particular path (diffuse, rough glossy) */
187                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
188                         float blur_pdf = kernel_data.integrator.filter_glossy*min_ray_pdf;
189
190                         if(blur_pdf < 1.0f) {
191                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
192                                 shader_bsdf_blur(kg, &sd, blur_roughness);
193                         }
194                 }
195
196 #ifdef __EMISSION__
197                 /* emission */
198                 if(sd.flag & SD_EMISSION) {
199                         float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, ray_pdf);
200                         path_radiance_accum_emission(L, throughput, emission, state.bounce);
201                 }
202 #endif
203
204                 /* path termination. this is a strange place to put the termination, it's
205                  * mainly due to the mixed in MIS that we use. gives too many unneeded
206                  * shader evaluations, only need emission if we are going to terminate */
207                 float probability = path_state_terminate_probability(kg, &state, throughput*num_samples);
208
209                 if(probability == 0.0f) {
210                         break;
211                 }
212                 else if(probability != 1.0f) {
213                         float terminate = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_TERMINATE);
214
215                         if(terminate >= probability)
216                                 break;
217
218                         throughput /= probability;
219                 }
220
221 #ifdef __AO__
222                 /* ambient occlusion */
223                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
224                         float bsdf_u, bsdf_v;
225                         path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
226
227                         float ao_factor = kernel_data.background.ao_factor;
228                         float3 ao_N;
229                         float3 ao_bsdf = shader_bsdf_ao(kg, &sd, ao_factor, &ao_N);
230                         float3 ao_D;
231                         float ao_pdf;
232                         float3 ao_alpha = make_float3(0.0f, 0.0f, 0.0f);
233
234                         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
235
236                         if(dot(sd.Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
237                                 Ray light_ray;
238                                 float3 ao_shadow;
239
240                                 light_ray.P = ray_offset(sd.P, sd.Ng);
241                                 light_ray.D = ao_D;
242                                 light_ray.t = kernel_data.background.ao_distance;
243 #ifdef __OBJECT_MOTION__
244                                 light_ray.time = sd.time;
245 #endif
246                                 light_ray.dP = sd.dP;
247                                 light_ray.dD = differential3_zero();
248
249                                 if(!shadow_blocked(kg, &state, &light_ray, &ao_shadow))
250                                         path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state.bounce);
251                         }
252                 }
253 #endif
254
255 #ifdef __SUBSURFACE__
256                 /* bssrdf scatter to a different location on the same object, replacing
257                  * the closures with a diffuse BSDF */
258                 if(sd.flag & SD_BSSRDF) {
259                         float bssrdf_probability;
260                         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, &sd, &bssrdf_probability);
261
262                         /* modify throughput for picking bssrdf or bsdf */
263                         throughput *= bssrdf_probability;
264
265                         /* do bssrdf scatter step if we picked a bssrdf closure */
266                         if(sc) {
267                                 uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
268
269                                 float bssrdf_u, bssrdf_v;
270                                 path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
271                                 subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
272
273                                 state.flag |= PATH_RAY_BSSRDF_ANCESTOR;
274                         }
275                 }
276 #endif
277
278 #ifdef __EMISSION__
279                 if(kernel_data.integrator.use_direct_light) {
280                         /* sample illumination from lights to find path contribution */
281                         if(sd.flag & SD_BSDF_HAS_EVAL) {
282                                 float light_t = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT);
283 #ifdef __MULTI_CLOSURE__
284                                 float light_o = 0.0f;
285 #else
286                                 float light_o = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_F);
287 #endif
288                                 float light_u, light_v;
289                                 path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
290
291                                 Ray light_ray;
292                                 BsdfEval L_light;
293                                 bool is_lamp;
294
295 #ifdef __OBJECT_MOTION__
296                                 light_ray.time = sd.time;
297 #endif
298
299                                 /* sample random light */
300                                 if(direct_emission(kg, &sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce)) {
301                                         /* trace shadow ray */
302                                         float3 shadow;
303
304                                         if(!shadow_blocked(kg, &state, &light_ray, &shadow)) {
305                                                 /* accumulate */
306                                                 path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state.bounce, is_lamp);
307                                         }
308                                 }
309                         }
310                 }
311 #endif
312
313                 /* no BSDF? we can stop here */
314                 if(!(sd.flag & SD_BSDF))
315                         break;
316
317                 /* sample BSDF */
318                 float bsdf_pdf;
319                 BsdfEval bsdf_eval;
320                 float3 bsdf_omega_in;
321                 differential3 bsdf_domega_in;
322                 float bsdf_u, bsdf_v;
323                 path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
324                 int label;
325
326                 label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval,
327                         &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
328
329                 if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
330                         break;
331
332                 /* modify throughput */
333                 path_radiance_bsdf_bounce(L, &throughput, &bsdf_eval, bsdf_pdf, state.bounce, label);
334
335                 /* set labels */
336                 if(!(label & LABEL_TRANSPARENT)) {
337                         ray_pdf = bsdf_pdf;
338 #ifdef __LAMP_MIS__
339                         ray_t = 0.0f;
340 #endif
341                         min_ray_pdf = fminf(bsdf_pdf, min_ray_pdf);
342                 }
343
344                 /* update path state */
345                 path_state_next(kg, &state, label);
346
347                 /* setup ray */
348                 ray.P = ray_offset(sd.P, (label & LABEL_TRANSMIT)? -sd.Ng: sd.Ng);
349                 ray.D = bsdf_omega_in;
350                 ray.t = FLT_MAX;
351 #ifdef __RAY_DIFFERENTIALS__
352                 ray.dP = sd.dP;
353                 ray.dD = bsdf_domega_in;
354 #endif
355         }
356 }
357
358 #endif
359
360 #ifdef __SUBSURFACE__
361
362 ccl_device_inline bool kernel_path_integrate_lighting(KernelGlobals *kg, RNG *rng,
363         int sample, int num_samples,
364         ShaderData *sd, float3 *throughput,
365         float *min_ray_pdf, float *ray_pdf, PathState *state,
366         int rng_offset, PathRadiance *L, Ray *ray, float *ray_t)
367 {
368 #ifdef __EMISSION__
369         if(kernel_data.integrator.use_direct_light) {
370                 /* sample illumination from lights to find path contribution */
371                 if(sd->flag & SD_BSDF_HAS_EVAL) {
372                         float light_t = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT);
373 #ifdef __MULTI_CLOSURE__
374                         float light_o = 0.0f;
375 #else
376                         float light_o = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_F);
377 #endif
378                         float light_u, light_v;
379                         path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
380
381                         Ray light_ray;
382                         BsdfEval L_light;
383                         bool is_lamp;
384
385 #ifdef __OBJECT_MOTION__
386                         light_ray.time = sd->time;
387 #endif
388
389                         if(direct_emission(kg, sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce)) {
390                                 /* trace shadow ray */
391                                 float3 shadow;
392
393                                 if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
394                                         /* accumulate */
395                                         path_radiance_accum_light(L, *throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
396                                 }
397                         }
398                 }
399         }
400 #endif
401
402         /* no BSDF? we can stop here */
403         if(!(sd->flag & SD_BSDF))
404                 return false;
405
406         /* sample BSDF */
407         float bsdf_pdf;
408         BsdfEval bsdf_eval;
409         float3 bsdf_omega_in;
410         differential3 bsdf_domega_in;
411         float bsdf_u, bsdf_v;
412         path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
413         int label;
414
415         label = shader_bsdf_sample(kg, sd, bsdf_u, bsdf_v, &bsdf_eval,
416                 &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
417
418         if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
419                 return false;
420
421         /* modify throughput */
422         path_radiance_bsdf_bounce(L, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label);
423
424         /* set labels */
425         if(!(label & LABEL_TRANSPARENT)) {
426                 *ray_pdf = bsdf_pdf;
427 #ifdef __LAMP_MIS__
428                 *ray_t = 0.0f;
429 #endif
430                 *min_ray_pdf = fminf(bsdf_pdf, *min_ray_pdf);
431         }
432
433         /* update path state */
434         path_state_next(kg, state, label);
435
436         /* setup ray */
437         ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng);
438         ray->D = bsdf_omega_in;
439
440         if(state->bounce == 0)
441                 ray->t -= sd->ray_length; /* clipping works through transparent */
442         else
443                 ray->t = FLT_MAX;
444
445 #ifdef __RAY_DIFFERENTIALS__
446         ray->dP = sd->dP;
447         ray->dD = bsdf_domega_in;
448 #endif
449         
450         return true;
451 }
452
453 #endif
454
455 ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer)
456 {
457         /* initialize */
458         PathRadiance L;
459         float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
460         float L_transparent = 0.0f;
461
462         path_radiance_init(&L, kernel_data.film.use_light_pass);
463
464         float min_ray_pdf = FLT_MAX;
465         float ray_pdf = 0.0f;
466 #ifdef __LAMP_MIS__
467         float ray_t = 0.0f;
468 #endif
469         PathState state;
470         int rng_offset = PRNG_BASE_NUM;
471 #ifdef __CMJ__
472         int num_samples = kernel_data.integrator.aa_samples;
473 #else
474         int num_samples = 0;
475 #endif
476
477         path_state_init(&state);
478
479         /* path iteration */
480         for(;; rng_offset += PRNG_BOUNCE_NUM) {
481                 /* intersect scene */
482                 Intersection isect;
483                 uint visibility = path_state_ray_visibility(kg, &state);
484
485 #ifdef __HAIR__
486                 float difl = 0.0f, extmax = 0.0f;
487                 uint lcg_state = 0;
488
489                 if(kernel_data.bvh.have_curves) {
490                         if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {       
491                                 float3 pixdiff = ray.dD.dx + ray.dD.dy;
492                                 /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
493                                 difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
494                         }
495
496                         extmax = kernel_data.curve.maximum_width;
497                         lcg_state = lcg_init(*rng + rng_offset + sample*0x51633e2d);
498                 }
499
500                 bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax);
501 #else
502                 bool hit = scene_intersect(kg, &ray, visibility, &isect);
503 #endif
504
505 #ifdef __LAMP_MIS__
506                 if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
507                         /* ray starting from previous non-transparent bounce */
508                         Ray light_ray;
509
510                         light_ray.P = ray.P - ray_t*ray.D;
511                         ray_t += isect.t;
512                         light_ray.D = ray.D;
513                         light_ray.t = ray_t;
514                         light_ray.time = ray.time;
515                         light_ray.dD = ray.dD;
516                         light_ray.dP = ray.dP;
517
518                         /* intersect with lamp */
519                         float light_t = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT);
520                         float3 emission;
521
522                         if(indirect_lamp_emission(kg, &light_ray, state.flag, ray_pdf, light_t, &emission, state.bounce))
523                                 path_radiance_accum_emission(&L, throughput, emission, state.bounce);
524                 }
525 #endif
526
527                 if(!hit) {
528                         /* eval background shader if nothing hit */
529                         if(kernel_data.background.transparent && (state.flag & PATH_RAY_CAMERA)) {
530                                 L_transparent += average(throughput);
531
532 #ifdef __PASSES__
533                                 if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
534 #endif
535                                         break;
536                         }
537
538 #ifdef __BACKGROUND__
539                         /* sample background shader */
540                         float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf, state.bounce);
541                         path_radiance_accum_background(&L, throughput, L_background, state.bounce);
542 #endif
543
544                         break;
545                 }
546
547                 /* setup shading */
548                 ShaderData sd;
549                 shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce);
550                 float rbsdf = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF);
551                 shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
552
553                 /* holdout */
554 #ifdef __HOLDOUT__
555                 if((sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) && (state.flag & PATH_RAY_CAMERA)) {
556                         if(kernel_data.background.transparent) {
557                                 float3 holdout_weight;
558                                 
559                                 if(sd.flag & SD_HOLDOUT_MASK)
560                                         holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
561                                 else
562                                         holdout_weight = shader_holdout_eval(kg, &sd);
563
564                                 /* any throughput is ok, should all be identical here */
565                                 L_transparent += average(holdout_weight*throughput);
566                         }
567
568                         if(sd.flag & SD_HOLDOUT_MASK)
569                                 break;
570                 }
571 #endif
572
573                 /* holdout mask objects do not write data passes */
574                 kernel_write_data_passes(kg, buffer, &L, &sd, sample, state.flag, throughput);
575
576                 /* blurring of bsdf after bounces, for rays that have a small likelihood
577                  * of following this particular path (diffuse, rough glossy) */
578                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
579                         float blur_pdf = kernel_data.integrator.filter_glossy*min_ray_pdf;
580
581                         if(blur_pdf < 1.0f) {
582                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
583                                 shader_bsdf_blur(kg, &sd, blur_roughness);
584                         }
585                 }
586
587 #ifdef __EMISSION__
588                 /* emission */
589                 if(sd.flag & SD_EMISSION) {
590                         /* todo: is isect.t wrong here for transparent surfaces? */
591                         float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, ray_pdf);
592                         path_radiance_accum_emission(&L, throughput, emission, state.bounce);
593                 }
594 #endif
595
596                 /* path termination. this is a strange place to put the termination, it's
597                  * mainly due to the mixed in MIS that we use. gives too many unneeded
598                  * shader evaluations, only need emission if we are going to terminate */
599                 float probability = path_state_terminate_probability(kg, &state, throughput);
600
601                 if(probability == 0.0f) {
602                         break;
603                 }
604                 else if(probability != 1.0f) {
605                         float terminate = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_TERMINATE);
606
607                         if(terminate >= probability)
608                                 break;
609
610                         throughput /= probability;
611                 }
612
613 #ifdef __AO__
614                 /* ambient occlusion */
615                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
616                         /* todo: solve correlation */
617                         float bsdf_u, bsdf_v;
618                         path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
619
620                         float ao_factor = kernel_data.background.ao_factor;
621                         float3 ao_N;
622                         float3 ao_bsdf = shader_bsdf_ao(kg, &sd, ao_factor, &ao_N);
623                         float3 ao_D;
624                         float ao_pdf;
625                         float3 ao_alpha = shader_bsdf_alpha(kg, &sd);
626
627                         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
628
629                         if(dot(sd.Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
630                                 Ray light_ray;
631                                 float3 ao_shadow;
632
633                                 light_ray.P = ray_offset(sd.P, sd.Ng);
634                                 light_ray.D = ao_D;
635                                 light_ray.t = kernel_data.background.ao_distance;
636 #ifdef __OBJECT_MOTION__
637                                 light_ray.time = sd.time;
638 #endif
639                                 light_ray.dP = sd.dP;
640                                 light_ray.dD = differential3_zero();
641
642                                 if(!shadow_blocked(kg, &state, &light_ray, &ao_shadow))
643                                         path_radiance_accum_ao(&L, throughput, ao_alpha, ao_bsdf, ao_shadow, state.bounce);
644                         }
645                 }
646 #endif
647
648 #ifdef __SUBSURFACE__
649                 /* bssrdf scatter to a different location on the same object, replacing
650                  * the closures with a diffuse BSDF */
651                 if(sd.flag & SD_BSSRDF) {
652                         float bssrdf_probability;
653                         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, &sd, &bssrdf_probability);
654
655                         /* modify throughput for picking bssrdf or bsdf */
656                         throughput *= bssrdf_probability;
657
658                         /* do bssrdf scatter step if we picked a bssrdf closure */
659                         if(sc) {
660                                 uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
661
662                                 ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
663                                 float bssrdf_u, bssrdf_v;
664                                 path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
665                                 int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
666
667                                 /* compute lighting with the BSDF closure */
668                                 for(int hit = 0; hit < num_hits; hit++) {
669                                         float3 tp = throughput;
670                                         PathState hit_state = state;
671                                         Ray hit_ray = ray;
672                                         float hit_ray_t = ray_t;
673                                         float hit_ray_pdf = ray_pdf;
674                                         float hit_min_ray_pdf = min_ray_pdf;
675
676                                         hit_state.flag |= PATH_RAY_BSSRDF_ANCESTOR;
677                                         
678                                         if(kernel_path_integrate_lighting(kg, rng, sample, num_samples, &bssrdf_sd[hit],
679                                                 &tp, &hit_min_ray_pdf, &hit_ray_pdf, &hit_state, rng_offset+PRNG_BOUNCE_NUM, &L, &hit_ray, &hit_ray_t)) {
680                                                 kernel_path_indirect(kg, rng, sample, hit_ray, buffer,
681                                                         tp, num_samples, num_samples,
682                                                         hit_min_ray_pdf, hit_ray_pdf, hit_state, rng_offset+PRNG_BOUNCE_NUM*2, &L);
683
684                                                 /* for render passes, sum and reset indirect light pass variables
685                                                  * for the next samples */
686                                                 path_radiance_sum_indirect(&L);
687                                                 path_radiance_reset_indirect(&L);
688                                         }
689                                 }
690                                 break;
691                         }
692                 }
693 #endif
694                 
695                 /* The following code is the same as in kernel_path_integrate_lighting(),
696                    but for CUDA the function call is slower. */
697 #ifdef __EMISSION__
698                 if(kernel_data.integrator.use_direct_light) {
699                         /* sample illumination from lights to find path contribution */
700                         if(sd.flag & SD_BSDF_HAS_EVAL) {
701                                 float light_t = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT);
702 #ifdef __MULTI_CLOSURE__
703                                 float light_o = 0.0f;
704 #else
705                                 float light_o = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_F);
706 #endif
707                                 float light_u, light_v;
708                                 path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
709
710                                 Ray light_ray;
711                                 BsdfEval L_light;
712                                 bool is_lamp;
713
714 #ifdef __OBJECT_MOTION__
715                                 light_ray.time = sd.time;
716 #endif
717
718                                 if(direct_emission(kg, &sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce)) {
719                                         /* trace shadow ray */
720                                         float3 shadow;
721
722                                         if(!shadow_blocked(kg, &state, &light_ray, &shadow)) {
723                                                 /* accumulate */
724                                                 path_radiance_accum_light(&L, throughput, &L_light, shadow, 1.0f, state.bounce, is_lamp);
725                                         }
726                                 }
727                         }
728                 }
729 #endif
730
731                 /* no BSDF? we can stop here */
732                 if(!(sd.flag & SD_BSDF))
733                         break;
734
735                 /* sample BSDF */
736                 float bsdf_pdf;
737                 BsdfEval bsdf_eval;
738                 float3 bsdf_omega_in;
739                 differential3 bsdf_domega_in;
740                 float bsdf_u, bsdf_v;
741                 path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
742                 int label;
743
744                 label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval,
745                         &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
746
747                 if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
748                         break;
749
750                 /* modify throughput */
751                 path_radiance_bsdf_bounce(&L, &throughput, &bsdf_eval, bsdf_pdf, state.bounce, label);
752
753                 /* set labels */
754                 if(!(label & LABEL_TRANSPARENT)) {
755                         ray_pdf = bsdf_pdf;
756 #ifdef __LAMP_MIS__
757                         ray_t = 0.0f;
758 #endif
759                         min_ray_pdf = fminf(bsdf_pdf, min_ray_pdf);
760                 }
761
762                 /* update path state */
763                 path_state_next(kg, &state, label);
764
765                 /* setup ray */
766                 ray.P = ray_offset(sd.P, (label & LABEL_TRANSMIT)? -sd.Ng: sd.Ng);
767                 ray.D = bsdf_omega_in;
768
769                 if(state.bounce == 0)
770                         ray.t -= sd.ray_length; /* clipping works through transparent */
771                 else
772                         ray.t = FLT_MAX;
773
774 #ifdef __RAY_DIFFERENTIALS__
775                 ray.dP = sd.dP;
776                 ray.dD = bsdf_domega_in;
777 #endif
778         }
779
780         float3 L_sum = path_radiance_sum(kg, &L);
781
782 #ifdef __CLAMP_SAMPLE__
783         path_radiance_clamp(&L, &L_sum, kernel_data.integrator.sample_clamp);
784 #endif
785
786         kernel_write_light_passes(kg, buffer, &L, sample);
787
788         return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
789 }
790
791 #ifdef __BRANCHED_PATH__
792
793 ccl_device_noinline void kernel_branched_path_integrate_lighting(KernelGlobals *kg, RNG *rng,
794         int sample, int aa_samples,
795         ShaderData *sd, float3 throughput, float num_samples_adjust,
796         float min_ray_pdf, float ray_pdf, PathState state,
797         int rng_offset, PathRadiance *L, ccl_global float *buffer)
798 {
799 #ifdef __EMISSION__
800         /* sample illumination from lights to find path contribution */
801         if(sd->flag & SD_BSDF_HAS_EVAL) {
802                 Ray light_ray;
803                 BsdfEval L_light;
804                 bool is_lamp;
805
806 #ifdef __OBJECT_MOTION__
807                 light_ray.time = sd->time;
808 #endif
809
810                 /* lamp sampling */
811                 for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
812                         int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
813                         float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
814                         RNG lamp_rng = cmj_hash(*rng, i);
815
816                         if(kernel_data.integrator.pdf_triangles != 0.0f)
817                                 num_samples_inv *= 0.5f;
818
819                         for(int j = 0; j < num_samples; j++) {
820                                 float light_u, light_v;
821                                 path_rng_2D(kg, &lamp_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
822
823                                 if(direct_emission(kg, sd, i, 0.0f, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce)) {
824                                         /* trace shadow ray */
825                                         float3 shadow;
826
827                                         if(!shadow_blocked(kg, &state, &light_ray, &shadow)) {
828                                                 /* accumulate */
829                                                 path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state.bounce, is_lamp);
830                                         }
831                                 }
832                         }
833                 }
834
835                 /* mesh light sampling */
836                 if(kernel_data.integrator.pdf_triangles != 0.0f) {
837                         int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
838                         float num_samples_inv = num_samples_adjust/num_samples;
839
840                         if(kernel_data.integrator.num_all_lights)
841                                 num_samples_inv *= 0.5f;
842
843                         for(int j = 0; j < num_samples; j++) {
844                                 float light_t = path_rng_1D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT);
845                                 float light_u, light_v;
846                                 path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
847
848                                 /* only sample triangle lights */
849                                 if(kernel_data.integrator.num_all_lights)
850                                         light_t = 0.5f*light_t;
851
852                                 if(direct_emission(kg, sd, -1, light_t, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce)) {
853                                         /* trace shadow ray */
854                                         float3 shadow;
855
856                                         if(!shadow_blocked(kg, &state, &light_ray, &shadow)) {
857                                                 /* accumulate */
858                                                 path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state.bounce, is_lamp);
859                                         }
860                                 }
861                         }
862                 }
863         }
864 #endif
865
866         for(int i = 0; i< sd->num_closure; i++) {
867                 const ShaderClosure *sc = &sd->closure[i];
868
869                 if(!CLOSURE_IS_BSDF(sc->type))
870                         continue;
871                 /* transparency is not handled here, but in outer loop */
872                 if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID)
873                         continue;
874
875                 int num_samples;
876
877                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
878                         num_samples = kernel_data.integrator.diffuse_samples;
879                 else if(CLOSURE_IS_BSDF_BSSRDF(sc->type))
880                         num_samples = 1;
881                 else if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
882                         num_samples = kernel_data.integrator.glossy_samples;
883                 else
884                         num_samples = kernel_data.integrator.transmission_samples;
885
886                 num_samples = ceil_to_int(num_samples_adjust*num_samples);
887
888                 float num_samples_inv = num_samples_adjust/num_samples;
889                 RNG bsdf_rng = cmj_hash(*rng, i);
890
891                 for(int j = 0; j < num_samples; j++) {
892                         /* sample BSDF */
893                         float bsdf_pdf;
894                         BsdfEval bsdf_eval;
895                         float3 bsdf_omega_in;
896                         differential3 bsdf_domega_in;
897                         float bsdf_u, bsdf_v;
898                         path_rng_2D(kg, &bsdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
899                         int label;
900
901                         label = shader_bsdf_sample_closure(kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval,
902                                 &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
903
904                         if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval))
905                                 continue;
906
907                         /* modify throughput */
908                         float3 tp = throughput;
909                         path_radiance_bsdf_bounce(L, &tp, &bsdf_eval, bsdf_pdf, state.bounce, label);
910
911                         /* set labels */
912                         float min_ray_pdf = fminf(bsdf_pdf, FLT_MAX);
913
914                         /* modify path state */
915                         PathState ps = state;
916                         path_state_next(kg, &ps, label);
917
918                         /* setup ray */
919                         Ray bsdf_ray;
920
921                         bsdf_ray.P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng);
922                         bsdf_ray.D = bsdf_omega_in;
923                         bsdf_ray.t = FLT_MAX;
924 #ifdef __RAY_DIFFERENTIALS__
925                         bsdf_ray.dP = sd->dP;
926                         bsdf_ray.dD = bsdf_domega_in;
927 #endif
928 #ifdef __OBJECT_MOTION__
929                         bsdf_ray.time = sd->time;
930 #endif
931
932                         kernel_path_indirect(kg, rng, sample*num_samples + j, bsdf_ray, buffer,
933                                 tp*num_samples_inv, num_samples, aa_samples*num_samples,
934                                 min_ray_pdf, bsdf_pdf, ps, rng_offset+PRNG_BOUNCE_NUM, L);
935
936                         /* for render passes, sum and reset indirect light pass variables
937                          * for the next samples */
938                         path_radiance_sum_indirect(L);
939                         path_radiance_reset_indirect(L);
940                 }
941         }
942 }
943
944 ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer)
945 {
946         /* initialize */
947         PathRadiance L;
948         float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
949         float L_transparent = 0.0f;
950
951         path_radiance_init(&L, kernel_data.film.use_light_pass);
952
953         float ray_pdf = 0.0f;
954         PathState state;
955         int rng_offset = PRNG_BASE_NUM;
956 #ifdef __CMJ__
957         int aa_samples = kernel_data.integrator.aa_samples;
958 #else
959         int aa_samples = 0;
960 #endif
961
962         path_state_init(&state);
963
964         for(;; rng_offset += PRNG_BOUNCE_NUM) {
965                 /* intersect scene */
966                 Intersection isect;
967                 uint visibility = path_state_ray_visibility(kg, &state);
968
969 #ifdef __HAIR__
970                 float difl = 0.0f, extmax = 0.0f;
971                 uint lcg_state = 0;
972
973                 if(kernel_data.bvh.have_curves) {
974                         if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {       
975                                 float3 pixdiff = ray.dD.dx + ray.dD.dy;
976                                 /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
977                                 difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
978                         }
979
980                         extmax = kernel_data.curve.maximum_width;
981                         lcg_state = lcg_init(*rng + rng_offset + sample*0x51633e2d);
982                 }
983
984                 if(!scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax)) {
985 #else
986                 if(!scene_intersect(kg, &ray, visibility, &isect)) {
987 #endif
988                         /* eval background shader if nothing hit */
989                         if(kernel_data.background.transparent) {
990                                 L_transparent += average(throughput);
991
992 #ifdef __PASSES__
993                                 if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
994 #endif
995                                         break;
996                         }
997
998 #ifdef __BACKGROUND__
999                         /* sample background shader */
1000                         float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf, state.bounce);
1001                         path_radiance_accum_background(&L, throughput, L_background, state.bounce);
1002 #endif
1003
1004                         break;
1005                 }
1006
1007                 /* setup shading */
1008                 ShaderData sd;
1009                 shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce);
1010                 shader_eval_surface(kg, &sd, 0.0f, state.flag, SHADER_CONTEXT_MAIN);
1011                 shader_merge_closures(kg, &sd);
1012
1013                 /* holdout */
1014 #ifdef __HOLDOUT__
1015                 if((sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK))) {
1016                         if(kernel_data.background.transparent) {
1017                                 float3 holdout_weight;
1018                                 
1019                                 if(sd.flag & SD_HOLDOUT_MASK)
1020                                         holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
1021                                 else
1022                                         holdout_weight = shader_holdout_eval(kg, &sd);
1023
1024                                 /* any throughput is ok, should all be identical here */
1025                                 L_transparent += average(holdout_weight*throughput);
1026                         }
1027
1028                         if(sd.flag & SD_HOLDOUT_MASK)
1029                                 break;
1030                 }
1031 #endif
1032
1033                 /* holdout mask objects do not write data passes */
1034                 kernel_write_data_passes(kg, buffer, &L, &sd, sample, state.flag, throughput);
1035
1036 #ifdef __EMISSION__
1037                 /* emission */
1038                 if(sd.flag & SD_EMISSION) {
1039                         float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, ray_pdf);
1040                         path_radiance_accum_emission(&L, throughput, emission, state.bounce);
1041                 }
1042 #endif
1043
1044                 /* transparency termination */
1045                 if(state.flag & PATH_RAY_TRANSPARENT) {
1046                         /* path termination. this is a strange place to put the termination, it's
1047                          * mainly due to the mixed in MIS that we use. gives too many unneeded
1048                          * shader evaluations, only need emission if we are going to terminate */
1049                         float probability = path_state_terminate_probability(kg, &state, throughput);
1050
1051                         if(probability == 0.0f) {
1052                                 break;
1053                         }
1054                         else if(probability != 1.0f) {
1055                                 float terminate = path_rng_1D(kg, rng, sample, aa_samples, rng_offset + PRNG_TERMINATE);
1056
1057                                 if(terminate >= probability)
1058                                         break;
1059
1060                                 throughput /= probability;
1061                         }
1062                 }
1063
1064 #ifdef __AO__
1065                 /* ambient occlusion */
1066                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
1067                         int num_samples = kernel_data.integrator.ao_samples;
1068                         float num_samples_inv = 1.0f/num_samples;
1069                         float ao_factor = kernel_data.background.ao_factor;
1070                         float3 ao_N;
1071                         float3 ao_bsdf = shader_bsdf_ao(kg, &sd, ao_factor, &ao_N);
1072                         float3 ao_alpha = shader_bsdf_alpha(kg, &sd);
1073
1074                         for(int j = 0; j < num_samples; j++) {
1075                                 float bsdf_u, bsdf_v;
1076                                 path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
1077
1078                                 float3 ao_D;
1079                                 float ao_pdf;
1080
1081                                 sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
1082
1083                                 if(dot(sd.Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
1084                                         Ray light_ray;
1085                                         float3 ao_shadow;
1086
1087                                         light_ray.P = ray_offset(sd.P, sd.Ng);
1088                                         light_ray.D = ao_D;
1089                                         light_ray.t = kernel_data.background.ao_distance;
1090 #ifdef __OBJECT_MOTION__
1091                                         light_ray.time = sd.time;
1092 #endif
1093                                         light_ray.dP = sd.dP;
1094                                         light_ray.dD = differential3_zero();
1095
1096                                         if(!shadow_blocked(kg, &state, &light_ray, &ao_shadow))
1097                                                 path_radiance_accum_ao(&L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state.bounce);
1098                                 }
1099                         }
1100                 }
1101 #endif
1102
1103 #ifdef __SUBSURFACE__
1104                 /* bssrdf scatter to a different location on the same object */
1105                 if(sd.flag & SD_BSSRDF) {
1106                         for(int i = 0; i< sd.num_closure; i++) {
1107                                 ShaderClosure *sc = &sd.closure[i];
1108
1109                                 if(!CLOSURE_IS_BSSRDF(sc->type))
1110                                         continue;
1111
1112                                 /* set up random number generator */
1113                                 uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
1114                                 int num_samples = kernel_data.integrator.subsurface_samples;
1115                                 float num_samples_inv = 1.0f/num_samples;
1116                                 RNG bssrdf_rng = cmj_hash(*rng, i);
1117
1118                                 state.flag |= PATH_RAY_BSSRDF_ANCESTOR;
1119
1120                                 /* do subsurface scatter step with copy of shader data, this will
1121                                  * replace the BSSRDF with a diffuse BSDF closure */
1122                                 for(int j = 0; j < num_samples; j++) {
1123                                                 ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
1124                                                 float bssrdf_u, bssrdf_v;
1125                                                 path_rng_2D(kg, &bssrdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
1126                                                 int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
1127
1128                                                 /* compute lighting with the BSDF closure */
1129                                                 for(int hit = 0; hit < num_hits; hit++)
1130                                                         kernel_branched_path_integrate_lighting(kg, rng, sample*num_samples + j,
1131                                                                 aa_samples*num_samples,
1132                                                                 &bssrdf_sd[hit], throughput, num_samples_inv,
1133                                                                 ray_pdf, ray_pdf, state, rng_offset+PRNG_BOUNCE_NUM, &L, buffer);
1134                                 }
1135
1136                                 state.flag &= ~PATH_RAY_BSSRDF_ANCESTOR;
1137                         }
1138                 }
1139 #endif
1140
1141                 /* lighting */
1142                 kernel_branched_path_integrate_lighting(kg, rng, sample, aa_samples,
1143                         &sd, throughput, 1.0f, ray_pdf, ray_pdf, state, rng_offset, &L, buffer);
1144
1145                 /* continue in case of transparency */
1146                 throughput *= shader_bsdf_transparency(kg, &sd);
1147
1148                 if(is_zero(throughput))
1149                         break;
1150
1151                 path_state_next(kg, &state, LABEL_TRANSPARENT);
1152                 ray.P = ray_offset(sd.P, -sd.Ng);
1153                 ray.t -= sd.ray_length; /* clipping works through transparent */
1154         }
1155
1156         float3 L_sum = path_radiance_sum(kg, &L);
1157
1158 #ifdef __CLAMP_SAMPLE__
1159         path_radiance_clamp(&L, &L_sum, kernel_data.integrator.sample_clamp);
1160 #endif
1161
1162         kernel_write_light_passes(kg, buffer, &L, sample);
1163
1164         return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
1165 }
1166
1167 #endif
1168
1169 ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg, ccl_global uint *rng_state, int sample, int x, int y, RNG *rng, Ray *ray)
1170 {
1171         float filter_u;
1172         float filter_v;
1173 #ifdef __CMJ__
1174         int num_samples = kernel_data.integrator.aa_samples;
1175 #else
1176         int num_samples = 0;
1177 #endif
1178
1179         path_rng_init(kg, rng_state, sample, num_samples, rng, x, y, &filter_u, &filter_v);
1180
1181         /* sample camera ray */
1182
1183         float lens_u = 0.0f, lens_v = 0.0f;
1184
1185         if(kernel_data.cam.aperturesize > 0.0f)
1186                 path_rng_2D(kg, rng, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v);
1187
1188         float time = 0.0f;
1189
1190 #ifdef __CAMERA_MOTION__
1191         if(kernel_data.cam.shuttertime != -1.0f)
1192                 time = path_rng_1D(kg, rng, sample, num_samples, PRNG_TIME);
1193 #endif
1194
1195         camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
1196 }
1197
1198 ccl_device void kernel_path_trace(KernelGlobals *kg,
1199         ccl_global float *buffer, ccl_global uint *rng_state,
1200         int sample, int x, int y, int offset, int stride)
1201 {
1202         /* buffer offset */
1203         int index = offset + x + y*stride;
1204         int pass_stride = kernel_data.film.pass_stride;
1205
1206         rng_state += index;
1207         buffer += index*pass_stride;
1208
1209         /* initialize random numbers and ray */
1210         RNG rng;
1211         Ray ray;
1212
1213         kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
1214
1215         /* integrate */
1216         float4 L;
1217
1218         if (ray.t != 0.0f)
1219                 L = kernel_path_integrate(kg, &rng, sample, ray, buffer);
1220         else
1221                 L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
1222
1223         /* accumulate result in output buffer */
1224         kernel_write_pass_float4(buffer, sample, L);
1225
1226         path_rng_end(kg, rng_state, rng);
1227 }
1228
1229 #ifdef __BRANCHED_PATH__
1230 ccl_device void kernel_branched_path_trace(KernelGlobals *kg,
1231         ccl_global float *buffer, ccl_global uint *rng_state,
1232         int sample, int x, int y, int offset, int stride)
1233 {
1234         /* buffer offset */
1235         int index = offset + x + y*stride;
1236         int pass_stride = kernel_data.film.pass_stride;
1237
1238         rng_state += index;
1239         buffer += index*pass_stride;
1240
1241         /* initialize random numbers and ray */
1242         RNG rng;
1243         Ray ray;
1244
1245         kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
1246
1247         /* integrate */
1248         float4 L;
1249
1250         if (ray.t != 0.0f)
1251                 L = kernel_branched_path_integrate(kg, &rng, sample, ray, buffer);
1252         else
1253                 L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
1254
1255         /* accumulate result in output buffer */
1256         kernel_write_pass_float4(buffer, sample, L);
1257
1258         path_rng_end(kg, rng_state, rng);
1259 }
1260 #endif
1261
1262 CCL_NAMESPACE_END
1263