Cycles / SSS:
[blender.git] / intern / cycles / kernel / kernel_path.h
index 3e815ffe40054a413e199890ed08770388c29d22..2806296ba9337520d470ebeba9c91fa74749b3a6 100644 (file)
@@ -44,7 +44,7 @@
 CCL_NAMESPACE_BEGIN
 
 typedef struct PathState {
-       uint flag;
+       int flag;
        int bounce;
 
        int diffuse_bounce;
@@ -215,14 +215,14 @@ __device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ra
                                        return true;
 
                                ShaderData sd;
-                               shader_setup_from_ray(kg, &sd, &isect, ray);
+                               shader_setup_from_ray(kg, &sd, &isect, ray, state->bounce+1);
                                shader_eval_surface(kg, &sd, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
 
                                throughput *= shader_bsdf_transparency(kg, &sd);
 
                                ray->P = ray_offset(sd.P, -sd.Ng);
                                if(ray->t != FLT_MAX)
-                                       ray->D = normalize_length(Pend - ray->P, &ray->t);
+                                       ray->D = normalize_len(Pend - ray->P, &ray->t);
 
                                bounce++;
                        }
@@ -249,6 +249,11 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 #endif
        PathState state;
        int rng_offset = PRNG_BASE_NUM;
+#ifdef __CMJ__
+       int num_samples = kernel_data.integrator.aa_samples;
+#else
+       int num_samples = 0;
+#endif
 
        path_state_init(&state);
 
@@ -266,7 +271,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
                        if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {       
                                float3 pixdiff = ray.dD.dx + ray.dD.dy;
                                /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
-                               difl = kernel_data.curve_kernel_data.minimum_width * length(pixdiff) * 0.5f;
+                               difl = kernel_data.curve_kernel_data.minimum_width * len(pixdiff) * 0.5f;
                        }
 
                        extmax = kernel_data.curve_kernel_data.maximum_width;
@@ -292,10 +297,10 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
                        light_ray.dP = ray.dP;
 
                        /* intersect with lamp */
-                       float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT);
+                       float light_t = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT);
                        float3 emission;
 
-                       if(indirect_lamp_emission(kg, &light_ray, state.flag, ray_pdf, light_t, &emission))
+                       if(indirect_lamp_emission(kg, &light_ray, state.flag, ray_pdf, light_t, &emission, state.bounce))
                                path_radiance_accum_emission(&L, throughput, emission, state.bounce);
                }
 #endif
@@ -313,7 +318,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 
 #ifdef __BACKGROUND__
                        /* sample background shader */
-                       float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf);
+                       float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf, state.bounce);
                        path_radiance_accum_background(&L, throughput, L_background, state.bounce);
 #endif
 
@@ -322,23 +327,10 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 
                /* setup shading */
                ShaderData sd;
-               shader_setup_from_ray(kg, &sd, &isect, &ray);
-               float rbsdf = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF);
+               shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce);
+               float rbsdf = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF);
                shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
 
-               kernel_write_data_passes(kg, buffer, &L, &sd, sample, state.flag, throughput);
-
-               /* blurring of bsdf after bounces, for rays that have a small likelihood
-                * of following this particular path (diffuse, rough glossy) */
-               if(kernel_data.integrator.filter_glossy != FLT_MAX) {
-                       float blur_pdf = kernel_data.integrator.filter_glossy*min_ray_pdf;
-
-                       if(blur_pdf < 1.0f) {
-                               float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
-                               shader_bsdf_blur(kg, &sd, blur_roughness);
-                       }
-               }
-
                /* holdout */
 #ifdef __HOLDOUT__
                if((sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) && (state.flag & PATH_RAY_CAMERA)) {
@@ -359,6 +351,20 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
                }
 #endif
 
+               /* holdout mask objects do not write data passes */
+               kernel_write_data_passes(kg, buffer, &L, &sd, sample, state.flag, throughput);
+
+               /* blurring of bsdf after bounces, for rays that have a small likelihood
+                * of following this particular path (diffuse, rough glossy) */
+               if(kernel_data.integrator.filter_glossy != FLT_MAX) {
+                       float blur_pdf = kernel_data.integrator.filter_glossy*min_ray_pdf;
+
+                       if(blur_pdf < 1.0f) {
+                               float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
+                               shader_bsdf_blur(kg, &sd, blur_roughness);
+                       }
+               }
+
 #ifdef __EMISSION__
                /* emission */
                if(sd.flag & SD_EMISSION) {
@@ -372,12 +378,18 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
                 * mainly due to the mixed in MIS that we use. gives too many unneeded
                 * shader evaluations, only need emission if we are going to terminate */
                float probability = path_state_terminate_probability(kg, &state, throughput);
-               float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE);
 
-               if(terminate >= probability)
+               if(probability == 0.0f) {
                        break;
+               }
+               else if(probability != 1.0f) {
+                       float terminate = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_TERMINATE);
 
-               throughput /= probability;
+                       if(terminate >= probability)
+                               break;
+
+                       throughput /= probability;
+               }
 
 #ifdef __SUBSURFACE__
                /* bssrdf scatter to a different location on the same object, replacing
@@ -401,8 +413,8 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
                /* ambient occlusion */
                if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
                        /* todo: solve correlation */
-                       float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U);
-                       float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V);
+                       float bsdf_u, bsdf_v;
+                       path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
 
                        float ao_factor = kernel_data.background.ao_factor;
                        float3 ao_N;
@@ -435,10 +447,14 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
                if(kernel_data.integrator.use_direct_light) {
                        /* sample illumination from lights to find path contribution */
                        if(sd.flag & SD_BSDF_HAS_EVAL) {
-                               float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT);
-                               float light_o = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_F);
-                               float light_u = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_U);
-                               float light_v = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_V);
+                               float light_t = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT);
+#ifdef __MULTI_CLOSURE__
+                               float light_o = 0.0f;
+#else
+                               float light_o = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_F);
+#endif
+                               float light_u, light_v;
+                               path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
 
                                Ray light_ray;
                                BsdfEval L_light;
@@ -448,7 +464,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
                                light_ray.time = sd.time;
 #endif
 
-                               if(direct_emission(kg, &sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp)) {
+                               if(direct_emission(kg, &sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce)) {
                                        /* trace shadow ray */
                                        float3 shadow;
 
@@ -470,8 +486,8 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
                BsdfEval bsdf_eval;
                float3 bsdf_omega_in;
                differential3 bsdf_domega_in;
-               float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U);
-               float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V);
+               float bsdf_u, bsdf_v;
+               path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
                int label;
 
                label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval,
@@ -524,7 +540,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 #ifdef __NON_PROGRESSIVE__
 
 __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer,
-       float3 throughput, float num_samples_adjust,
+       float3 throughput, int num_samples, int num_total_samples,
        float min_ray_pdf, float ray_pdf, PathState state, int rng_offset, PathRadiance *L)
 {
 #ifdef __LAMP_MIS__
@@ -556,10 +572,10 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
                        light_ray.dP = ray.dP;
 
                        /* intersect with lamp */
-                       float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT);
+                       float light_t = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT);
                        float3 emission;
 
-                       if(indirect_lamp_emission(kg, &light_ray, state.flag, ray_pdf, light_t, &emission))
+                       if(indirect_lamp_emission(kg, &light_ray, state.flag, ray_pdf, light_t, &emission, state.bounce))
                                path_radiance_accum_emission(L, throughput, emission, state.bounce);
                }
 #endif
@@ -567,7 +583,7 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
                if(!hit) {
 #ifdef __BACKGROUND__
                        /* sample background shader */
-                       float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf);
+                       float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf, state.bounce);
                        path_radiance_accum_background(L, throughput, L_background, state.bounce);
 #endif
 
@@ -576,8 +592,8 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
 
                /* setup shading */
                ShaderData sd;
-               shader_setup_from_ray(kg, &sd, &isect, &ray);
-               float rbsdf = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF);
+               shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce);
+               float rbsdf = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF);
                shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_INDIRECT);
                shader_merge_closures(kg, &sd);
 
@@ -603,13 +619,19 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
                /* path termination. this is a strange place to put the termination, it's
                 * mainly due to the mixed in MIS that we use. gives too many unneeded
                 * shader evaluations, only need emission if we are going to terminate */
-               float probability = path_state_terminate_probability(kg, &state, throughput*num_samples_adjust);
-               float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE);
+               float probability = path_state_terminate_probability(kg, &state, throughput*num_samples);
 
-               if(terminate >= probability)
+               if(probability == 0.0f) {
                        break;
+               }
+               else if(probability != 1.0f) {
+                       float terminate = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_TERMINATE);
 
-               throughput /= probability;
+                       if(terminate >= probability)
+                               break;
+
+                       throughput /= probability;
+               }
 
 #ifdef __SUBSURFACE__
                /* bssrdf scatter to a different location on the same object, replacing
@@ -632,9 +654,8 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
 #ifdef __AO__
                /* ambient occlusion */
                if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
-                       /* todo: solve correlation */
-                       float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U);
-                       float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V);
+                       float bsdf_u, bsdf_v;
+                       path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
 
                        float ao_factor = kernel_data.background.ao_factor;
                        float3 ao_N;
@@ -667,10 +688,14 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
                if(kernel_data.integrator.use_direct_light) {
                        /* sample illumination from lights to find path contribution */
                        if(sd.flag & SD_BSDF_HAS_EVAL) {
-                               float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT);
-                               float light_o = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_F);
-                               float light_u = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_U);
-                               float light_v = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_V);
+                               float light_t = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT);
+#ifdef __MULTI_CLOSURE__
+                               float light_o = 0.0f;
+#else
+                               float light_o = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_F);
+#endif
+                               float light_u, light_v;
+                               path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
 
                                Ray light_ray;
                                BsdfEval L_light;
@@ -681,7 +706,7 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
 #endif
 
                                /* sample random light */
-                               if(direct_emission(kg, &sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp)) {
+                               if(direct_emission(kg, &sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce)) {
                                        /* trace shadow ray */
                                        float3 shadow;
 
@@ -703,8 +728,8 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
                BsdfEval bsdf_eval;
                float3 bsdf_omega_in;
                differential3 bsdf_domega_in;
-               float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U);
-               float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V);
+               float bsdf_u, bsdf_v;
+               path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
                int label;
 
                label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval,
@@ -744,19 +769,24 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
        float min_ray_pdf, float ray_pdf, PathState state,
        int rng_offset, PathRadiance *L, __global float *buffer)
 {
+#ifdef __CMJ__
+       int aa_samples = kernel_data.integrator.aa_samples;
+#else
+       int aa_samples = 0;
+#endif
+
 #ifdef __AO__
        /* ambient occlusion */
        if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
-               int num_samples = ceil(kernel_data.integrator.ao_samples*num_samples_adjust);
+               int num_samples = ceil_to_int(kernel_data.integrator.ao_samples*num_samples_adjust);
                float num_samples_inv = num_samples_adjust/num_samples;
                float ao_factor = kernel_data.background.ao_factor;
                float3 ao_N;
                float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
 
                for(int j = 0; j < num_samples; j++) {
-                       /* todo: solve correlation */
-                       float bsdf_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_U);
-                       float bsdf_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_V);
+                       float bsdf_u, bsdf_v;
+                       path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
 
                        float3 ao_D;
                        float ao_pdf;
@@ -797,17 +827,18 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
 
                /* lamp sampling */
                for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
-                       int num_samples = ceil(num_samples_adjust*light_select_num_samples(kg, i));
+                       int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
                        float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
+                       RNG lamp_rng = cmj_hash(*rng, i);
 
                        if(kernel_data.integrator.pdf_triangles != 0.0f)
                                num_samples_inv *= 0.5f;
 
                        for(int j = 0; j < num_samples; j++) {
-                               float light_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_U);
-                               float light_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_V);
+                               float light_u, light_v;
+                               path_rng_2D(kg, &lamp_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
 
-                               if(direct_emission(kg, sd, i, 0.0f, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp)) {
+                               if(direct_emission(kg, sd, i, 0.0f, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce)) {
                                        /* trace shadow ray */
                                        float3 shadow;
 
@@ -821,22 +852,22 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
 
                /* mesh light sampling */
                if(kernel_data.integrator.pdf_triangles != 0.0f) {
-                       int num_samples = ceil(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
+                       int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
                        float num_samples_inv = num_samples_adjust/num_samples;
 
                        if(kernel_data.integrator.num_all_lights)
                                num_samples_inv *= 0.5f;
 
                        for(int j = 0; j < num_samples; j++) {
-                               float light_t = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT);
-                               float light_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_U);
-                               float light_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_V);
+                               float light_t = path_rng_1D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT);
+                               float light_u, light_v;
+                               path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U, &light_u, &light_v);
 
                                /* only sample triangle lights */
                                if(kernel_data.integrator.num_all_lights)
                                        light_t = 0.5f*light_t;
 
-                               if(direct_emission(kg, sd, -1, light_t, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp)) {
+                               if(direct_emission(kg, sd, -1, light_t, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce)) {
                                        /* trace shadow ray */
                                        float3 shadow;
 
@@ -861,16 +892,17 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
 
                int num_samples;
 
-               if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+               if(CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
                        num_samples = kernel_data.integrator.diffuse_samples;
                else if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
                        num_samples = kernel_data.integrator.glossy_samples;
                else
                        num_samples = kernel_data.integrator.transmission_samples;
 
-               num_samples = ceil(num_samples_adjust*num_samples);
+               num_samples = ceil_to_int(num_samples_adjust*num_samples);
 
                float num_samples_inv = num_samples_adjust/num_samples;
+               RNG bsdf_rng = cmj_hash(*rng, i);
 
                for(int j = 0; j < num_samples; j++) {
                        /* sample BSDF */
@@ -878,8 +910,8 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
                        BsdfEval bsdf_eval;
                        float3 bsdf_omega_in;
                        differential3 bsdf_domega_in;
-                       float bsdf_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_U);
-                       float bsdf_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_V);
+                       float bsdf_u, bsdf_v;
+                       path_rng_2D(kg, &bsdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U, &bsdf_u, &bsdf_v);
                        int label;
 
                        label = shader_bsdf_sample_closure(kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval,
@@ -893,10 +925,7 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
                        path_radiance_bsdf_bounce(L, &tp, &bsdf_eval, bsdf_pdf, state.bounce, label);
 
                        /* set labels */
-                       float min_ray_pdf = FLT_MAX;
-
-                       if(!(label & LABEL_TRANSPARENT))
-                               min_ray_pdf = fminf(bsdf_pdf, min_ray_pdf);
+                       float min_ray_pdf = fminf(bsdf_pdf, FLT_MAX);
 
                        /* modify path state */
                        PathState ps = state;
@@ -917,7 +946,7 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
 #endif
 
                        kernel_path_indirect(kg, rng, sample*num_samples + j, bsdf_ray, buffer,
-                               tp*num_samples_inv, num_samples,
+                               tp*num_samples_inv, num_samples, aa_samples*num_samples,
                                min_ray_pdf, bsdf_pdf, ps, rng_offset+PRNG_BOUNCE_NUM, L);
 
                        /* for render passes, sum and reset indirect light pass variables
@@ -940,6 +969,11 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
        float ray_pdf = 0.0f;
        PathState state;
        int rng_offset = PRNG_BASE_NUM;
+#ifdef __CMJ__
+       int aa_samples = kernel_data.integrator.aa_samples;
+#else
+       int aa_samples = 0;
+#endif
 
        path_state_init(&state);
 
@@ -956,7 +990,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
                        if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {       
                                float3 pixdiff = ray.dD.dx + ray.dD.dy;
                                /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
-                               difl = kernel_data.curve_kernel_data.minimum_width * length(pixdiff) * 0.5f;
+                               difl = kernel_data.curve_kernel_data.minimum_width * len(pixdiff) * 0.5f;
                        }
 
                        extmax = kernel_data.curve_kernel_data.maximum_width;
@@ -979,7 +1013,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
 
 #ifdef __BACKGROUND__
                        /* sample background shader */
-                       float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf);
+                       float3 L_background = indirect_background(kg, &ray, state.flag, ray_pdf, state.bounce);
                        path_radiance_accum_background(&L, throughput, L_background, state.bounce);
 #endif
 
@@ -988,13 +1022,10 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
 
                /* setup shading */
                ShaderData sd;
-               shader_setup_from_ray(kg, &sd, &isect, &ray);
-               float rbsdf = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF);
-               shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
+               shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce);
+               shader_eval_surface(kg, &sd, 0.0f, state.flag, SHADER_CONTEXT_MAIN);
                shader_merge_closures(kg, &sd);
 
-               kernel_write_data_passes(kg, buffer, &L, &sd, sample, state.flag, throughput);
-
                /* holdout */
 #ifdef __HOLDOUT__
                if((sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK))) {
@@ -1015,6 +1046,9 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
                }
 #endif
 
+               /* holdout mask objects do not write data passes */
+               kernel_write_data_passes(kg, buffer, &L, &sd, sample, state.flag, throughput);
+
 #ifdef __EMISSION__
                /* emission */
                if(sd.flag & SD_EMISSION) {
@@ -1029,12 +1063,18 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
                         * mainly due to the mixed in MIS that we use. gives too many unneeded
                         * shader evaluations, only need emission if we are going to terminate */
                        float probability = path_state_terminate_probability(kg, &state, throughput);
-                       float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE);
 
-                       if(terminate >= probability)
+                       if(probability == 0.0f) {
                                break;
+                       }
+                       else if(probability != 1.0f) {
+                               float terminate = path_rng_1D(kg, rng, sample, aa_samples, rng_offset + PRNG_TERMINATE);
 
-                       throughput /= probability;
+                               if(terminate >= probability)
+                                       break;
+
+                               throughput /= probability;
+                       }
                }
 
 #ifdef __SUBSURFACE__
@@ -1110,22 +1150,27 @@ __device void kernel_path_trace(KernelGlobals *kg,
 
        float filter_u;
        float filter_v;
+#ifdef __CMJ__
+       int num_samples = kernel_data.integrator.aa_samples;
+#else
+       int num_samples = 0;
+#endif
 
-       path_rng_init(kg, rng_state, sample, &rng, x, y, &filter_u, &filter_v);
+       path_rng_init(kg, rng_state, sample, num_samples, &rng, x, y, &filter_u, &filter_v);
 
        /* sample camera ray */
        Ray ray;
-       
+
        float lens_u = 0.0f, lens_v = 0.0f;
+
+       if(kernel_data.cam.aperturesize > 0.0f)
+               path_rng_2D(kg, &rng, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v);
+
        float time = 0.0f;
-       
-       if(kernel_data.cam.aperturesize > 0.0f) {
-               lens_u = path_rng(kg, &rng, sample, PRNG_LENS_U);
-               lens_v = path_rng(kg, &rng, sample, PRNG_LENS_V);
-       }
+
 #ifdef __CAMERA_MOTION__
        if(kernel_data.cam.shuttertime != -1.0f)
-               time = path_rng(kg, &rng, sample, PRNG_TIME);
+               time = path_rng_1D(kg, &rng, sample, num_samples, PRNG_TIME);
 #endif
 
        camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, &ray);