Cycles CUDA: reduce stack memory by reusing ShaderData.
authorBrecht Van Lommel <brechtvanlommel@gmail.com>
Sun, 22 May 2016 20:35:47 +0000 (22:35 +0200)
committerBrecht Van Lommel <brechtvanlommel@gmail.com>
Mon, 23 May 2016 20:29:24 +0000 (22:29 +0200)
57% less for path and 48% less for branched path.

14 files changed:
intern/cycles/kernel/kernel_bake.h
intern/cycles/kernel/kernel_emission.h
intern/cycles/kernel/kernel_path.h
intern/cycles/kernel/kernel_path_branched.h
intern/cycles/kernel/kernel_path_state.h
intern/cycles/kernel/kernel_path_surface.h
intern/cycles/kernel/kernel_path_volume.h
intern/cycles/kernel/kernel_shadow.h
intern/cycles/kernel/kernel_volume.h
intern/cycles/kernel/split/kernel_background_buffer_update.h
intern/cycles/kernel/split/kernel_data_init.h
intern/cycles/kernel/split/kernel_direct_lighting.h
intern/cycles/kernel/split/kernel_lamp_emission.h
intern/cycles/kernel/split/kernel_shadow_blocked.h

index 392cff9c2816c10d0b5ad87aa63b4cb2c4ad8760..77982ee548a0d208afd6447bd5e8dd1b099b3f01 100644 (file)
@@ -30,6 +30,9 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
        Ray ray;
        float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
 
+       /* emission shader data memory used by various functions */
+       ShaderData emission_sd;
+
        ray.P = sd->P + sd->Ng;
        ray.D = -sd->Ng;
        ray.t = FLT_MAX;
@@ -41,7 +44,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
        path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
 
        /* init path state */
-       path_state_init(kg, &state, &rng, sample, NULL);
+       path_state_init(kg, &emission_sd, &state, &rng, sample, NULL);
 
        /* evaluate surface shader */
        float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
@@ -56,7 +59,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 
                /* sample ambient occlusion */
                if(pass_filter & BAKE_FILTER_AO) {
-                       kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+                       kernel_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput);
                }
 
                /* sample emission */
@@ -75,6 +78,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
                        kernel_path_subsurface_init_indirect(&ss_indirect);
                        if(kernel_path_subsurface_scatter(kg,
                                                          sd,
+                                                         &emission_sd,
                                                          &L_sample,
                                                          &state,
                                                          &rng,
@@ -90,6 +94,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
                                                                              &L_sample,
                                                                              &throughput);
                                        kernel_path_indirect(kg,
+                                                            &emission_sd,
                                                             &rng,
                                                             &ray,
                                                             throughput,
@@ -105,14 +110,14 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 
                /* sample light and BSDF */
                if(!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) {
-                       kernel_path_surface_connect_light(kg, &rng, sd, throughput, &state, &L_sample);
+                       kernel_path_surface_connect_light(kg, &rng, sd, &emission_sd, throughput, &state, &L_sample);
 
                        if(kernel_path_surface_bounce(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) {
 #ifdef __LAMP_MIS__
                                state.ray_t = 0.0f;
 #endif
                                /* compute indirect light */
-                               kernel_path_indirect(kg, &rng, &ray, throughput, 1, &state, &L_sample);
+                               kernel_path_indirect(kg, &emission_sd, &rng, &ray, throughput, 1, &state, &L_sample);
 
                                /* sum and reset indirect light pass variables for the next samples */
                                path_radiance_sum_indirect(&L_sample);
@@ -126,7 +131,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
 
                /* sample ambient occlusion */
                if(pass_filter & BAKE_FILTER_AO) {
-                       kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+                       kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput);
                }
 
                /* sample emission */
@@ -139,7 +144,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
                /* sample subsurface scattering */
                if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
                        /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
-                       kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, throughput);
+                       kernel_branched_path_subsurface_scatter(kg, sd, &emission_sd, &L_sample, &state, &rng, &ray, throughput);
                }
 #endif
 
@@ -150,13 +155,13 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
                        if(kernel_data.integrator.use_direct_light) {
                                int all = kernel_data.integrator.sample_all_lights_direct;
                                kernel_branched_path_surface_connect_light(kg, &rng,
-                                       sd, &state, throughput, 1.0f, &L_sample, all);
+                                       sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all);
                        }
 #endif
 
                        /* indirect light */
                        kernel_branched_path_surface_indirect_light(kg, &rng,
-                               sd, throughput, 1.0f, &state, &L_sample);
+                               sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
                }
        }
 #endif
index 5cf52f9d176292eaf912029c714f704f9cd0f52f..4de8e0f698a1e0ed01e3a2f07a72ee2c9ad2098c 100644 (file)
@@ -18,6 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Direction Emission */
 ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
+                                                ShaderData *emission_sd,
                                                 LightSample *ls,
                                                 ccl_addr_space PathState *state,
                                                 float3 I,
@@ -26,12 +27,6 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
                                                 float time)
 {
        /* setup shading at emitter */
-#ifdef __SPLIT_KERNEL__
-       ShaderData *sd = kg->sd_input;
-#else
-       ShaderData sd_object;
-       ShaderData *sd = &sd_object;
-#endif
        float3 eval;
 
 #ifdef __BACKGROUND_MIS__
@@ -46,28 +41,28 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
                ray.dP = differential3_zero();
                ray.dD = dI;
 
-               shader_setup_from_background(kg, sd, &ray);
+               shader_setup_from_background(kg, emission_sd, &ray);
 
                path_state_modify_bounce(state, true);
-               eval = shader_eval_background(kg, sd, state, 0, SHADER_CONTEXT_EMISSION);
+               eval = shader_eval_background(kg, emission_sd, state, 0, SHADER_CONTEXT_EMISSION);
                path_state_modify_bounce(state, false);
        }
        else
 #endif
        {
-               shader_setup_from_sample(kg, sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time);
+               shader_setup_from_sample(kg, emission_sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time);
 
-               ls->Ng = ccl_fetch(sd, Ng);
+               ls->Ng = ccl_fetch(emission_sd, Ng);
 
                /* no path flag, we're evaluating this for all closures. that's weak but
                 * we'd have to do multiple evaluations otherwise */
                path_state_modify_bounce(state, true);
-               shader_eval_surface(kg, sd, state, 0.0f, 0, SHADER_CONTEXT_EMISSION);
+               shader_eval_surface(kg, emission_sd, state, 0.0f, 0, SHADER_CONTEXT_EMISSION);
                path_state_modify_bounce(state, false);
 
                /* evaluate emissive closure */
-               if(ccl_fetch(sd, flag) & SD_EMISSION)
-                       eval = shader_emissive_eval(kg, sd);
+               if(ccl_fetch(emission_sd, flag) & SD_EMISSION)
+                       eval = shader_emissive_eval(kg, emission_sd);
                else
                        eval = make_float3(0.0f, 0.0f, 0.0f);
        }
@@ -79,6 +74,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
 
 ccl_device_noinline bool direct_emission(KernelGlobals *kg,
                                          ShaderData *sd,
+                                         ShaderData *emission_sd,
                                          LightSample *ls,
                                          ccl_addr_space PathState *state,
                                          Ray *ray,
@@ -94,6 +90,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
        /* evaluate closure */
 
        float3 light_eval = direct_emissive_eval(kg,
+                                                emission_sd,
                                                 ls,
                                                 state,
                                                 -ls->D,
@@ -198,6 +195,7 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader
 /* Indirect Lamp Emission */
 
 ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
+                                                ShaderData *emission_sd,
                                                 ccl_addr_space PathState *state,
                                                 Ray *ray,
                                                 float3 *emission)
@@ -225,6 +223,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
 #endif
 
                float3 L = direct_emissive_eval(kg,
+                                               emission_sd,
                                                &ls,
                                                state,
                                                -ray->D,
@@ -238,7 +237,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
                        Ray volume_ray = *ray;
                        volume_ray.t = ls.t;
                        float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f);
-                       kernel_volume_shadow(kg, state, &volume_ray, &volume_tp);
+                       kernel_volume_shadow(kg, emission_sd, state, &volume_ray, &volume_tp);
                        L *= volume_tp;
                }
 #endif
@@ -260,6 +259,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
 /* Indirect Background */
 
 ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
+                                               ShaderData *emission_sd,
                                                ccl_addr_space PathState *state,
                                                ccl_addr_space Ray *ray)
 {
@@ -280,19 +280,14 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
        /* evaluate background closure */
 #  ifdef __SPLIT_KERNEL__
        Ray priv_ray = *ray;
-       shader_setup_from_background(kg, kg->sd_input, &priv_ray);
-
-       path_state_modify_bounce(state, true);
-       float3 L = shader_eval_background(kg, kg->sd_input, state, state->flag, SHADER_CONTEXT_EMISSION);
-       path_state_modify_bounce(state, false);
+       shader_setup_from_background(kg, emission_sd, &priv_ray);
 #  else
-       ShaderData sd;
-       shader_setup_from_background(kg, &sd, ray);
+       shader_setup_from_background(kg, emission_sd, ray);
+#  endif
 
        path_state_modify_bounce(state, true);
-       float3 L = shader_eval_background(kg, &sd, state, state->flag, SHADER_CONTEXT_EMISSION);
+       float3 L = shader_eval_background(kg, emission_sd, state, state->flag, SHADER_CONTEXT_EMISSION);
        path_state_modify_bounce(state, false);
-#  endif
 
 #ifdef __BACKGROUND_MIS__
        /* check if background light exists or if we should skip pdf */
index c136c85df59854608560ed58f5efb1267dc6f59b..5527d8aa86141e129fdf5418f57e268d8bcac8e3 100644 (file)
@@ -53,6 +53,7 @@
 CCL_NAMESPACE_BEGIN
 
 ccl_device void kernel_path_indirect(KernelGlobals *kg,
+                                     ShaderData *emission_sd,
                                      RNG *rng,
                                      Ray *ray,
                                      float3 throughput,
@@ -60,6 +61,9 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                                      PathState *state,
                                      PathRadiance *L)
 {
+       /* shader data memory used for both volumes and surfaces, saves stack space */
+       ShaderData sd;
+
        /* path iteration */
        for(;;) {
                /* intersect scene */
@@ -87,7 +91,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 
                        /* intersect with lamp */
                        float3 emission;
-                       if(indirect_lamp_emission(kg, state, &light_ray, &emission)) {
+                       if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission)) {
                                path_radiance_accum_emission(L,
                                                             throughput,
                                                             emission,
@@ -115,15 +119,14 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                        if(decoupled) {
                                /* cache steps along volume for repeated sampling */
                                VolumeSegment volume_segment;
-                               ShaderData volume_sd;
 
                                shader_setup_from_volume(kg,
-                                                        &volume_sd,
+                                                        &sd,
                                                         &volume_ray);
                                kernel_volume_decoupled_record(kg,
                                                               state,
                                                               &volume_ray,
-                                                              &volume_sd,
+                                                              &sd,
                                                               &volume_segment,
                                                               heterogeneous);
 
@@ -146,7 +149,8 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                                        /* direct light sampling */
                                        kernel_branched_path_volume_connect_light(kg,
                                                                                  rng,
-                                                                                 &volume_sd,
+                                                                                 &sd,
+                                                                                 emission_sd,
                                                                                  throughput,
                                                                                  state,
                                                                                  L,
@@ -163,7 +167,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                                        result = kernel_volume_decoupled_scatter(kg,
                                                                                 state,
                                                                                 &volume_ray,
-                                                                                &volume_sd,
+                                                                                &sd,
                                                                                 &throughput,
                                                                                 rphase,
                                                                                 rscatter,
@@ -178,7 +182,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                                if(result == VOLUME_PATH_SCATTERED) {
                                        if(kernel_path_volume_bounce(kg,
                                                                     rng,
-                                                                    &volume_sd,
+                                                                    &sd,
                                                                     &throughput,
                                                                     state,
                                                                     L,
@@ -198,16 +202,16 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 #  endif
                        {
                                /* integrate along volume segment with distance sampling */
-                               ShaderData volume_sd;
                                VolumeIntegrateResult result = kernel_volume_integrate(
-                                       kg, state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous);
+                                       kg, state, &sd, &volume_ray, L, &throughput, rng, heterogeneous);
 
 #  ifdef __VOLUME_SCATTER__
                                if(result == VOLUME_PATH_SCATTERED) {
                                        /* direct lighting */
                                        kernel_path_volume_connect_light(kg,
                                                                         rng,
-                                                                        &volume_sd,
+                                                                        &sd,
+                                                                        emission_sd,
                                                                         throughput,
                                                                         state,
                                                                         L);
@@ -215,7 +219,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                                        /* indirect light bounce */
                                        if(kernel_path_volume_bounce(kg,
                                                                     rng,
-                                                                    &volume_sd,
+                                                                    &sd,
                                                                     &throughput,
                                                                     state,
                                                                     L,
@@ -235,7 +239,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                if(!hit) {
 #ifdef __BACKGROUND__
                        /* sample background shader */
-                       float3 L_background = indirect_background(kg, state, ray);
+                       float3 L_background = indirect_background(kg, emission_sd, state, ray);
                        path_radiance_accum_background(L,
                                                       throughput,
                                                       L_background,
@@ -246,7 +250,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                }
 
                /* setup shading */
-               ShaderData sd;
                shader_setup_from_ray(kg,
                                      &sd,
                                      &isect,
@@ -328,7 +331,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                                light_ray.dP = sd.dP;
                                light_ray.dD = differential3_zero();
 
-                               if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) {
+                               if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) {
                                        path_radiance_accum_ao(L,
                                                               throughput,
                                                               ao_alpha,
@@ -378,6 +381,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                        kernel_branched_path_surface_connect_light(kg,
                                                                   rng,
                                                                   &sd,
+                                                                  emission_sd,
                                                                   state,
                                                                   throughput,
                                                                   1.0f,
@@ -393,6 +397,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 
 ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
                                         ShaderData *sd,
+                                        ShaderData *emission_sd,
                                         PathRadiance *L,
                                         PathState *state,
                                         RNG *rng,
@@ -425,7 +430,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
                light_ray.dP = ccl_fetch(sd, dP);
                light_ray.dD = differential3_zero();
 
-               if(!shadow_blocked(kg, state, &light_ray, &ao_shadow))
+               if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow))
                        path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
        }
 }
@@ -435,6 +440,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
 ccl_device bool kernel_path_subsurface_scatter(
         KernelGlobals *kg,
         ShaderData *sd,
+        ShaderData *emission_sd,
         PathRadiance *L,
         PathState *state,
         RNG *rng,
@@ -503,7 +509,7 @@ ccl_device bool kernel_path_subsurface_scatter(
                        hit_L->direct_throughput = L->direct_throughput;
                        path_radiance_copy_indirect(hit_L, L);
 
-                       kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, hit_L);
+                       kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L);
 
                        if(kernel_path_surface_bounce(kg,
                                                      rng,
@@ -526,6 +532,7 @@ ccl_device bool kernel_path_subsurface_scatter(
 
                                        kernel_volume_stack_update_for_subsurface(
                                            kg,
+                                           emission_sd,
                                            &volume_ray,
                                            hit_state->volume_stack);
                                }
@@ -604,8 +611,13 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 
        path_radiance_init(&L, kernel_data.film.use_light_pass);
 
+       /* shader data memory used for both volumes and surfaces, saves stack space */
+       ShaderData sd;
+       /* shader data used by emission, shadows, volume stacks */
+       ShaderData emission_sd;
+
        PathState state;
-       path_state_init(kg, &state, rng, sample, &ray);
+       path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
 
 #ifdef __KERNEL_DEBUG__
        DebugData debug_data;
@@ -669,7 +681,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
                        /* intersect with lamp */
                        float3 emission;
 
-                       if(indirect_lamp_emission(kg, &state, &light_ray, &emission))
+                       if(indirect_lamp_emission(kg, &emission_sd, &state, &light_ray, &emission))
                                path_radiance_accum_emission(&L, throughput, emission, state.bounce);
                }
 #endif
@@ -689,11 +701,10 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
                        if(decoupled) {
                                /* cache steps along volume for repeated sampling */
                                VolumeSegment volume_segment;
-                               ShaderData volume_sd;
 
-                               shader_setup_from_volume(kg, &volume_sd, &volume_ray);
+                               shader_setup_from_volume(kg, &sd, &volume_ray);
                                kernel_volume_decoupled_record(kg, &state,
-                                       &volume_ray, &volume_sd, &volume_segment, heterogeneous);
+                                       &volume_ray, &sd, &volume_segment, heterogeneous);
 
                                volume_segment.sampling_method = sampling_method;
 
@@ -708,8 +719,9 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
                                        int all = false;
 
                                        /* direct light sampling */
-                                       kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
-                                               throughput, &state, &L, all, &volume_ray, &volume_segment);
+                                       kernel_branched_path_volume_connect_light(kg, rng, &sd,
+                                               &emission_sd, throughput, &state, &L, all,
+                                               &volume_ray, &volume_segment);
 
                                        /* indirect sample. if we use distance sampling and take just
                                         * one sample for direct and indirect light, we could share
@@ -718,7 +730,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
                                        float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
 
                                        result = kernel_volume_decoupled_scatter(kg,
-                                               &state, &volume_ray, &volume_sd, &throughput,
+                                               &state, &volume_ray, &sd, &throughput,
                                                rphase, rscatter, &volume_segment, NULL, true);
                                }
 
@@ -726,7 +738,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
                                kernel_volume_decoupled_free(kg, &volume_segment);
 
                                if(result == VOLUME_PATH_SCATTERED) {
-                                       if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
+                                       if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
                                                continue;
                                        else
                                                break;
@@ -739,17 +751,16 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 #  endif
                        {
                                /* integrate along volume segment with distance sampling */
-                               ShaderData volume_sd;
                                VolumeIntegrateResult result = kernel_volume_integrate(
-                                       kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng, heterogeneous);
+                                       kg, &state, &sd, &volume_ray, &L, &throughput, rng, heterogeneous);
 
 #  ifdef __VOLUME_SCATTER__
                                if(result == VOLUME_PATH_SCATTERED) {
                                        /* direct lighting */
-                                       kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L);
+                                       kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
 
                                        /* indirect light bounce */
-                                       if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
+                                       if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
                                                continue;
                                        else
                                                break;
@@ -772,7 +783,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 
 #ifdef __BACKGROUND__
                        /* sample background shader */
-                       float3 L_background = indirect_background(kg, &state, &ray);
+                       float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
                        path_radiance_accum_background(&L, throughput, L_background, state.bounce);
 #endif
 
@@ -780,7 +791,6 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
                }
 
                /* setup shading */
-               ShaderData sd;
                shader_setup_from_ray(kg, &sd, &isect, &ray);
                float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
                shader_eval_surface(kg, &sd, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
@@ -848,7 +858,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 #ifdef __AO__
                /* ambient occlusion */
                if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
-                       kernel_path_ao(kg, &sd, &L, &state, rng, throughput);
+                       kernel_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput);
                }
 #endif
 
@@ -858,6 +868,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
                if(sd.flag & SD_BSSRDF) {
                        if(kernel_path_subsurface_scatter(kg,
                                                          &sd,
+                                                         &emission_sd,
                                                          &L,
                                                          &state,
                                                          rng,
@@ -871,7 +882,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 #endif  /* __SUBSURFACE__ */
 
                /* direct lighting */
-               kernel_path_surface_connect_light(kg, rng, &sd, throughput, &state, &L);
+               kernel_path_surface_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
 
                /* compute direct lighting and next bounce */
                if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
index 13ae4cf669bff1e21110f00d0cf66c846861d6f3..b4dee220aa5a6b84a802999649adce52b8c4647d 100644 (file)
@@ -18,7 +18,13 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __BRANCHED_PATH__
 
-ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, float3 throughput)
+ccl_device void kernel_branched_path_ao(KernelGlobals *kg,
+                                        ShaderData *sd,
+                                        ShaderData *emission_sd,
+                                        PathRadiance *L,
+                                        PathState *state,
+                                        RNG *rng,
+                                        float3 throughput)
 {
        int num_samples = kernel_data.integrator.ao_samples;
        float num_samples_inv = 1.0f/num_samples;
@@ -49,7 +55,7 @@ ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathR
                        light_ray.dP = ccl_fetch(sd, dP);
                        light_ray.dD = differential3_zero();
 
-                       if(!shadow_blocked(kg, state, &light_ray, &ao_shadow))
+                       if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow))
                                path_radiance_accum_ao(L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
                }
        }
@@ -58,8 +64,8 @@ ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathR
 
 /* bounce off surface and integrate indirect light */
 ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
-       RNG *rng, ShaderData *sd, float3 throughput, float num_samples_adjust,
-       PathState *state, PathRadiance *L)
+       RNG *rng, ShaderData *sd, ShaderData *emission_sd, float3 throughput,
+       float num_samples_adjust, PathState *state, PathRadiance *L)
 {
        for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
                const ShaderClosure *sc = &ccl_fetch(sd, closure)[i];
@@ -106,6 +112,7 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
                        }
 
                        kernel_path_indirect(kg,
+                                            emission_sd,
                                             rng,
                                             &bsdf_ray,
                                             tp*num_samples_inv,
@@ -124,6 +131,7 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
 #ifdef __SUBSURFACE__
 ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
                                                         ShaderData *sd,
+                                                        ShaderData *emission_sd,
                                                         PathRadiance *L,
                                                         PathState *state,
                                                         RNG *rng,
@@ -186,6 +194,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
 
                                        kernel_volume_stack_update_for_subsurface(
                                            kg,
+                                           emission_sd,
                                            &volume_ray,
                                            hit_state.volume_stack);
                                }
@@ -199,6 +208,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
                                                kg,
                                                rng,
                                                &bssrdf_sd,
+                                               emission_sd,
                                                &hit_state,
                                                throughput,
                                                num_samples_inv,
@@ -212,6 +222,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
                                        kg,
                                        rng,
                                        &bssrdf_sd,
+                                       emission_sd,
                                        throughput,
                                        num_samples_inv,
                                        &hit_state,
@@ -231,8 +242,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 
        path_radiance_init(&L, kernel_data.film.use_light_pass);
 
+       /* shader data memory used for both volumes and surfaces, saves stack space */
+       ShaderData sd;
+       /* shader data used by emission, shadows, volume stacks */
+       ShaderData emission_sd;
+
        PathState state;
-       path_state_init(kg, &state, rng, sample, &ray);
+       path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
 
 #ifdef __KERNEL_DEBUG__
        DebugData debug_data;
@@ -287,11 +303,10 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 
                        /* cache steps along volume for repeated sampling */
                        VolumeSegment volume_segment;
-                       ShaderData volume_sd;
 
-                       shader_setup_from_volume(kg, &volume_sd, &volume_ray);
+                       shader_setup_from_volume(kg, &sd, &volume_ray);
                        kernel_volume_decoupled_record(kg, &state,
-                               &volume_ray, &volume_sd, &volume_segment, heterogeneous);
+                               &volume_ray, &sd, &volume_segment, heterogeneous);
 
                        /* direct light sampling */
                        if(volume_segment.closure_flag & SD_SCATTER) {
@@ -299,8 +314,9 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 
                                int all = kernel_data.integrator.sample_all_lights_direct;
 
-                               kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
-                                       throughput, &state, &L, all, &volume_ray, &volume_segment);
+                               kernel_branched_path_volume_connect_light(kg, rng, &sd,
+                                       &emission_sd, throughput, &state, &L, all,
+                                       &volume_ray, &volume_segment);
 
                                /* indirect light sampling */
                                int num_samples = kernel_data.integrator.volume_samples;
@@ -326,20 +342,21 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
                                        float rscatter = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_SCATTER_DISTANCE);
 
                                        VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
-                                               &ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
+                                               &ps, &pray, &sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
 
                                        (void)result;
                                        kernel_assert(result == VOLUME_PATH_SCATTERED);
 
                                        if(kernel_path_volume_bounce(kg,
                                                                     rng,
-                                                                    &volume_sd,
+                                                                    &sd,
                                                                     &tp,
                                                                     &ps,
                                                                     &L,
                                                                     &pray))
                                        {
                                                kernel_path_indirect(kg,
+                                                                    &emission_sd,
                                                                     rng,
                                                                     &pray,
                                                                     tp*num_samples_inv,
@@ -373,30 +390,30 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
                        for(int j = 0; j < num_samples; j++) {
                                PathState ps = state;
                                Ray pray = ray;
-                               ShaderData volume_sd;
                                float3 tp = throughput * num_samples_inv;
 
                                /* branch RNG state */
                                path_state_branch(&ps, j, num_samples);
 
                                VolumeIntegrateResult result = kernel_volume_integrate(
-                                       kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous);
+                                       kg, &ps, &sd, &volume_ray, &L, &tp, rng, heterogeneous);
 
 #ifdef __VOLUME_SCATTER__
                                if(result == VOLUME_PATH_SCATTERED) {
                                        /* todo: support equiangular, MIS and all light sampling.
                                         * alternatively get decoupled ray marching working on the GPU */
-                                       kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L);
+                                       kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, tp, &state, &L);
 
                                        if(kernel_path_volume_bounce(kg,
                                                                     rng,
-                                                                    &volume_sd,
+                                                                    &sd,
                                                                     &tp,
                                                                     &ps,
                                                                     &L,
                                                                     &pray))
                                        {
                                                kernel_path_indirect(kg,
+                                                                    &emission_sd,
                                                                     rng,
                                                                     &pray,
                                                                     tp,
@@ -414,7 +431,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
                        }
 
                        /* todo: avoid this calculation using decoupled ray marching */
-                       kernel_volume_shadow(kg, &state, &volume_ray, &throughput);
+                       kernel_volume_shadow(kg, &emission_sd, &state, &volume_ray, &throughput);
 #endif
                }
 #endif
@@ -432,7 +449,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 
 #ifdef __BACKGROUND__
                        /* sample background shader */
-                       float3 L_background = indirect_background(kg, &state, &ray);
+                       float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
                        path_radiance_accum_background(&L, throughput, L_background, state.bounce);
 #endif
 
@@ -440,7 +457,6 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
                }
 
                /* setup shading */
-               ShaderData sd;
                shader_setup_from_ray(kg, &sd, &isect, &ray);
                shader_eval_surface(kg, &sd, &state, 0.0f, state.flag, SHADER_CONTEXT_MAIN);
                shader_merge_closures(&sd);
@@ -499,14 +515,14 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 #ifdef __AO__
                /* ambient occlusion */
                if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
-                       kernel_branched_path_ao(kg, &sd, &L, &state, rng, throughput);
+                       kernel_branched_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput);
                }
 #endif
 
 #ifdef __SUBSURFACE__
                /* bssrdf scatter to a different location on the same object */
                if(sd.flag & SD_BSSRDF) {
-                       kernel_branched_path_subsurface_scatter(kg, &sd, &L, &state,
+                       kernel_branched_path_subsurface_scatter(kg, &sd, &emission_sd, &L, &state,
                                                                rng, &ray, throughput);
                }
 #endif
@@ -519,13 +535,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
                        if(kernel_data.integrator.use_direct_light) {
                                int all = kernel_data.integrator.sample_all_lights_direct;
                                kernel_branched_path_surface_connect_light(kg, rng,
-                                       &sd, &hit_state, throughput, 1.0f, &L, all);
+                                       &sd, &emission_sd, &hit_state, throughput, 1.0f, &L, all);
                        }
 #endif
 
                        /* indirect light */
                        kernel_branched_path_surface_indirect_light(kg, rng,
-                               &sd, throughput, 1.0f, &hit_state, &L);
+                               &sd, &emission_sd, throughput, 1.0f, &hit_state, &L);
 
                        /* continue in case of transparency */
                        throughput *= shader_bsdf_transparency(kg, &sd);
index ef3765f7d89676f642e9a264c7e044a87bf8e720..e0e35d792abc39b3c7497b9048256bedc1bee96a 100644 (file)
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline void path_state_init(KernelGlobals *kg, ccl_addr_space PathState *state, ccl_addr_space RNG *rng, int sample, ccl_addr_space Ray *ray)
+ccl_device_inline void path_state_init(KernelGlobals *kg,
+                                       ShaderData *stack_sd,
+                                       ccl_addr_space PathState *state,
+                                       ccl_addr_space RNG *rng,
+                                       int sample,
+                                       ccl_addr_space Ray *ray)
 {
        state->flag = PATH_RAY_CAMERA|PATH_RAY_MIS_SKIP;
 
@@ -41,7 +46,7 @@ ccl_device_inline void path_state_init(KernelGlobals *kg, ccl_addr_space PathSta
 
        if(kernel_data.integrator.use_volumes) {
                /* initialize volume stack with volume we are inside of */
-               kernel_volume_stack_init(kg, ray, state->volume_stack);
+               kernel_volume_stack_init(kg, stack_sd, ray, state->volume_stack);
                /* seed RNG for cases where we can't use stratified samples */
                state->rng_congruential = lcg_init(*rng + sample*0x51633e2d);
        }
index 1818c4fd2da4a1dc949582015598cff601569101..74b1ae0ca3241b4629682495331e5b0efc14b271 100644 (file)
@@ -20,7 +20,8 @@ CCL_NAMESPACE_BEGIN
 
 /* branched path tracing: connect path directly to position on one or more lights and add it to L */
 ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RNG *rng,
-       ShaderData *sd, PathState *state, float3 throughput, float num_samples_adjust, PathRadiance *L, int sample_all_lights)
+       ShaderData *sd, ShaderData *emission_sd, PathState *state, float3 throughput,
+       float num_samples_adjust, PathRadiance *L, int sample_all_lights)
 {
 #ifdef __EMISSION__
        /* sample illumination from lights to find path contribution */
@@ -55,11 +56,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
                                LightSample ls;
                                lamp_light_sample(kg, i, light_u, light_v, ccl_fetch(sd, P), &ls);
 
-                               if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+                               if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
                                        /* trace shadow ray */
                                        float3 shadow;
 
-                                       if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+                                       if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
                                                /* accumulate */
                                                path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
                                        }
@@ -87,11 +88,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
                                LightSample ls;
                                light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
 
-                               if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+                               if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
                                        /* trace shadow ray */
                                        float3 shadow;
 
-                                       if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+                                       if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
                                                /* accumulate */
                                                path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
                                        }
@@ -109,11 +110,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
                light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
 
                /* sample random light */
-               if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+               if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
                        /* trace shadow ray */
                        float3 shadow;
 
-                       if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+                       if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
                                /* accumulate */
                                path_radiance_accum_light(L, throughput*num_samples_adjust, &L_light, shadow, num_samples_adjust, state->bounce, is_lamp);
                        }
@@ -184,7 +185,8 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng,
 #ifndef __SPLIT_KERNEL__
 /* path tracing: connect path directly to position on a light and add it to L */
 ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_addr_space RNG *rng,
-       ShaderData *sd, float3 throughput, ccl_addr_space PathState *state, PathRadiance *L)
+       ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state,
+       PathRadiance *L)
 {
 #ifdef __EMISSION__
        if(!(kernel_data.integrator.use_direct_light && (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL)))
@@ -206,11 +208,11 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_
        LightSample ls;
        light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
 
-       if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+       if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
                /* trace shadow ray */
                float3 shadow;
 
-               if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+               if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
                        /* accumulate */
                        path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
                }
index 9eb8b240b8833b44243a72e6cd89a135d4b0bc13..e45522a4641a1a5def1273bab963dd9bb86086df 100644 (file)
@@ -19,7 +19,7 @@ CCL_NAMESPACE_BEGIN
 #ifdef __VOLUME_SCATTER__
 
 ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
-       ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L)
+       ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L)
 {
 #ifdef __EMISSION__
        if(!kernel_data.integrator.use_direct_light)
@@ -44,11 +44,11 @@ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
        if(ls.pdf == 0.0f)
                return;
        
-       if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+       if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
                /* trace shadow ray */
                float3 shadow;
 
-               if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+               if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
                        /* accumulate */
                        path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
                }
@@ -106,7 +106,7 @@ bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng,
 }
 
 ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
-       ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L,
+       ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L,
        bool sample_all_lights, Ray *ray, const VolumeSegment *segment)
 {
 #ifdef __EMISSION__
@@ -160,11 +160,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
                                if(ls.pdf == 0.0f)
                                        continue;
 
-                               if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+                               if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
                                        /* trace shadow ray */
                                        float3 shadow;
 
-                                       if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+                                       if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
                                                /* accumulate */
                                                path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
                                        }
@@ -211,11 +211,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
                                if(ls.pdf == 0.0f)
                                        continue;
 
-                               if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+                               if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
                                        /* trace shadow ray */
                                        float3 shadow;
 
-                                       if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+                                       if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
                                                /* accumulate */
                                                path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
                                        }
@@ -251,11 +251,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
                        return;
 
                /* sample random light */
-               if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+               if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
                        /* trace shadow ray */
                        float3 shadow;
 
-                       if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+                       if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
                                /* accumulate */
                                path_radiance_accum_light(L, tp, &L_light, shadow, 1.0f, state->bounce, is_lamp);
                        }
index 504ac2e40bc3b8da53bb3b09754fa69ffe7cfe44..c8f6503cf58bfb6ab38ba48af5e58a61f71c21b1 100644 (file)
@@ -41,7 +41,7 @@ CCL_NAMESPACE_BEGIN
 
 #define STACK_MAX_HITS 64
 
-ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow)
+ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *shadow)
 {
        *shadow = make_float3(1.0f, 1.0f, 1.0f);
 
@@ -107,21 +107,20 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
                                if(ps.volume_stack[0].shader != SHADER_NONE) {
                                        Ray segment_ray = *ray;
                                        segment_ray.t = isect->t;
-                                       kernel_volume_shadow(kg, &ps, &segment_ray, &throughput);
+                                       kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput);
                                }
 #endif
 
                                /* setup shader data at surface */
-                               ShaderData sd;
-                               shader_setup_from_ray(kg, &sd, isect, ray);
+                               shader_setup_from_ray(kg, shadow_sd, isect, ray);
 
                                /* attenuation from transparent surface */
-                               if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+                               if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
                                        path_state_modify_bounce(state, true);
-                                       shader_eval_surface(kg, &sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
+                                       shader_eval_surface(kg, shadow_sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
                                        path_state_modify_bounce(state, false);
 
-                                       throughput *= shader_bsdf_transparency(kg, &sd);
+                                       throughput *= shader_bsdf_transparency(kg, shadow_sd);
                                }
 
                                /* stop if all light is blocked */
@@ -133,13 +132,13 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
                                }
 
                                /* move ray forward */
-                               ray->P = sd.P;
+                               ray->P = shadow_sd->P;
                                if(ray->t != FLT_MAX)
                                        ray->D = normalize_len(Pend - ray->P, &ray->t);
 
 #ifdef __VOLUME__
                                /* exit/enter volume */
-                               kernel_volume_stack_enter_exit(kg, &sd, ps.volume_stack);
+                               kernel_volume_stack_enter_exit(kg, shadow_sd, ps.volume_stack);
 #endif
 
                                bounce++;
@@ -148,7 +147,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
 #ifdef __VOLUME__
                        /* attenuation for last line segment towards light */
                        if(ps.volume_stack[0].shader != SHADER_NONE)
-                               kernel_volume_shadow(kg, &ps, ray, &throughput);
+                               kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
 #endif
 
                        *shadow = throughput;
@@ -164,7 +163,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
 #ifdef __VOLUME__
        if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
                /* apply attenuation from current volume shader */
-               kernel_volume_shadow(kg, state, ray, shadow);
+               kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
        }
 #endif
 
@@ -184,6 +183,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
  * one extra ray cast for the cases were we do want transparency. */
 
 ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
+                                        ShaderData *shadow_sd,
                                         ccl_addr_space PathState *state,
                                         ccl_addr_space Ray *ray_input,
                                         float3 *shadow)
@@ -228,7 +228,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
 #ifdef __VOLUME__
                                        /* attenuation for last line segment towards light */
                                        if(ps.volume_stack[0].shader != SHADER_NONE)
-                                               kernel_volume_shadow(kg, &ps, ray, &throughput);
+                                               kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
 #endif
 
                                        *shadow *= throughput;
@@ -244,39 +244,33 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
                                if(ps.volume_stack[0].shader != SHADER_NONE) {
                                        Ray segment_ray = *ray;
                                        segment_ray.t = isect->t;
-                                       kernel_volume_shadow(kg, &ps, &segment_ray, &throughput);
+                                       kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput);
                                }
 #endif
 
                                /* setup shader data at surface */
-#ifdef __SPLIT_KERNEL__
-                               ShaderData *sd = kg->sd_input;
-#else
-                               ShaderData sd_object;
-                               ShaderData *sd = &sd_object;
-#endif
-                               shader_setup_from_ray(kg, sd, isect, ray);
+                               shader_setup_from_ray(kg, shadow_sd, isect, ray);
 
                                /* attenuation from transparent surface */
-                               if(!(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)) {
+                               if(!(ccl_fetch(shadow_sd, flag) & SD_HAS_ONLY_VOLUME)) {
                                        path_state_modify_bounce(state, true);
-                                       shader_eval_surface(kg, sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
+                                       shader_eval_surface(kg, shadow_sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
                                        path_state_modify_bounce(state, false);
 
-                                       throughput *= shader_bsdf_transparency(kg, sd);
+                                       throughput *= shader_bsdf_transparency(kg, shadow_sd);
                                }
 
                                if(is_zero(throughput))
                                        return true;
 
                                /* move ray forward */
-                               ray->P = ray_offset(ccl_fetch(sd, P), -ccl_fetch(sd, Ng));
+                               ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng));
                                if(ray->t != FLT_MAX)
                                        ray->D = normalize_len(Pend - ray->P, &ray->t);
 
 #ifdef __VOLUME__
                                /* exit/enter volume */
-                               kernel_volume_stack_enter_exit(kg, sd, ps.volume_stack);
+                               kernel_volume_stack_enter_exit(kg, shadow_sd, ps.volume_stack);
 #endif
 
                                bounce++;
@@ -286,7 +280,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
 #ifdef __VOLUME__
        else if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
                /* apply attenuation from current volume shader */
-               kernel_volume_shadow(kg, state, ray, shadow);
+               kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
        }
 #endif
 #endif
index 30a978f6c9e79c90669bd3066f85e2335fc10688..e1ea60f372e05468d9ddc213cf986aae359efa66 100644 (file)
@@ -219,15 +219,14 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
 
 /* get the volume attenuation over line segment defined by ray, with the
  * assumption that there are no surfaces blocking light between the endpoints */
-ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, PathState *state, Ray *ray, float3 *throughput)
+ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *throughput)
 {
-       ShaderData sd;
-       shader_setup_from_volume(kg, &sd, ray);
+       shader_setup_from_volume(kg, shadow_sd, ray);
 
        if(volume_stack_is_heterogeneous(kg, state->volume_stack))
-               kernel_volume_shadow_heterogeneous(kg, state, ray, &sd, throughput);
+               kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
        else
-               kernel_volume_shadow_homogeneous(kg, state, ray, &sd, throughput);
+               kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
 }
 
 /* Equi-angular sampling as in:
@@ -1000,6 +999,7 @@ ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneou
  * is inside of. */
 
 ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
+                                         ShaderData *stack_sd,
                                          Ray *ray,
                                          VolumeStack *stack)
 {
@@ -1040,28 +1040,27 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
                qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
 
                for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
-                       ShaderData sd;
-                       shader_setup_from_ray(kg, &sd, isect, &volume_ray);
-                       if(sd.flag & SD_BACKFACING) {
+                       shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
+                       if(stack_sd->flag & SD_BACKFACING) {
                                bool need_add = true;
                                for(int i = 0; i < enclosed_index && need_add; ++i) {
                                        /* If ray exited the volume and never entered to that volume
                                         * it means that camera is inside such a volume.
                                         */
-                                       if(enclosed_volumes[i] == sd.object) {
+                                       if(enclosed_volumes[i] == stack_sd->object) {
                                                need_add = false;
                                        }
                                }
                                for(int i = 0; i < stack_index && need_add; ++i) {
                                        /* Don't add intersections twice. */
-                                       if(stack[i].object == sd.object) {
+                                       if(stack[i].object == stack_sd->object) {
                                                need_add = false;
                                                break;
                                        }
                                }
                                if(need_add) {
-                                       stack[stack_index].object = sd.object;
-                                       stack[stack_index].shader = sd.shader;
+                                       stack[stack_index].object = stack_sd->object;
+                                       stack[stack_index].shader = stack_sd->shader;
                                        ++stack_index;
                                }
                        }
@@ -1069,7 +1068,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
                                /* If ray from camera enters the volume, this volume shouldn't
                                 * be added to the stack on exit.
                                 */
-                               enclosed_volumes[enclosed_index++] = sd.object;
+                               enclosed_volumes[enclosed_index++] = stack_sd->object;
                        }
                }
        }
@@ -1086,9 +1085,8 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
                        break;
                }
 
-               ShaderData sd;
-               shader_setup_from_ray(kg, &sd, &isect, &volume_ray);
-               if(sd.flag & SD_BACKFACING) {
+               shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
+               if(stack_sd->flag & SD_BACKFACING) {
                        /* If ray exited the volume and never entered to that volume
                         * it means that camera is inside such a volume.
                         */
@@ -1097,20 +1095,20 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
                                /* If ray exited the volume and never entered to that volume
                                 * it means that camera is inside such a volume.
                                 */
-                               if(enclosed_volumes[i] == sd.object) {
+                               if(enclosed_volumes[i] == stack_sd->object) {
                                        need_add = false;
                                }
                        }
                        for(int i = 0; i < stack_index && need_add; ++i) {
                                /* Don't add intersections twice. */
-                               if(stack[i].object == sd.object) {
+                               if(stack[i].object == stack_sd->object) {
                                        need_add = false;
                                        break;
                                }
                        }
                        if(need_add) {
-                               stack[stack_index].object = sd.object;
-                               stack[stack_index].shader = sd.shader;
+                               stack[stack_index].object = stack_sd->object;
+                               stack[stack_index].shader = stack_sd->shader;
                                ++stack_index;
                        }
                }
@@ -1118,11 +1116,11 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
                        /* If ray from camera enters the volume, this volume shouldn't
                         * be added to the stack on exit.
                         */
-                       enclosed_volumes[enclosed_index++] = sd.object;
+                       enclosed_volumes[enclosed_index++] = stack_sd->object;
                }
 
                /* Move ray forward. */
-               volume_ray.P = ray_offset(sd.P, -sd.Ng);
+               volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
                ++step;
        }
 #endif
@@ -1190,6 +1188,7 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd
 
 #ifdef __SUBSURFACE__
 ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
+                                                          ShaderData *stack_sd,
                                                           Ray *ray,
                                                           VolumeStack *stack)
 {
@@ -1210,9 +1209,8 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
                qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
 
                for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
-                       ShaderData sd;
-                       shader_setup_from_ray(kg, &sd, isect, &volume_ray);
-                       kernel_volume_stack_enter_exit(kg, &sd, stack);
+                       shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
+                       kernel_volume_stack_enter_exit(kg, stack_sd, stack);
                }
        }
 #  else
@@ -1224,13 +1222,12 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
                                     &isect,
                                     PATH_RAY_ALL_VISIBILITY))
        {
-               ShaderData sd;
-               shader_setup_from_ray(kg, &sd, &isect, &volume_ray);
-               kernel_volume_stack_enter_exit(kg, &sd, stack);
+               shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
+               kernel_volume_stack_enter_exit(kg, stack_sd, stack);
 
                /* Move ray forward. */
-               volume_ray.P = ray_offset(sd.P, -sd.Ng);
-               volume_ray.t -= sd.ray_length;
+               volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
+               volume_ray.t -= stack_sd->ray_length;
                ++step;
        }
 #  endif
index 3d12a3dd9933d6799396611c85b68891eb3abd8c..f42d0a985bb11090c30a9bc52c666a14a65cb785 100644 (file)
@@ -157,7 +157,7 @@ ccl_device char kernel_background_buffer_update(
                if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
 #ifdef __BACKGROUND__
                        /* sample background shader */
-                       float3 L_background = indirect_background(kg, state, ray);
+                       float3 L_background = indirect_background(kg, kg->sd_input, state, ray);
                        path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
 #endif
                        ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
@@ -226,7 +226,7 @@ ccl_device char kernel_background_buffer_update(
                                *throughput = make_float3(1.0f, 1.0f, 1.0f);
                                *L_transparent = 0.0f;
                                path_radiance_init(L, kernel_data.film.use_light_pass);
-                               path_state_init(kg, state, rng, sample, ray);
+                               path_state_init(kg, kg->sd_input, state, rng, sample, ray);
 #ifdef __KERNEL_DEBUG__
                                debug_data_init(debug_data);
 #endif
index 9891391a3a377c8bad36973f0ad45cb3c9a79c55..e3dbc43757e5f1618da900cd200bc0baa6b21806 100644 (file)
@@ -207,6 +207,7 @@ ccl_device void kernel_data_init(
                        L_transparent_coop[ray_index] = 0.0f;
                        path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass);
                        path_state_init(kg,
+                                       kg->sd_input,
                                        &PathState_coop[ray_index],
                                        &rng_coop[ray_index],
                                        my_sample,
index c7a2aa6426c098ac6b3a35279b0d26702c310d9d..ebe91097496cd82b68338aa012984a3f71f0ebd3 100644 (file)
@@ -88,7 +88,7 @@ ccl_device char kernel_direct_lighting(
 
                        BsdfEval L_light;
                        bool is_lamp;
-                       if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
+                       if(direct_emission(kg, sd, kg->sd_input, &ls, state, &light_ray, &L_light, &is_lamp)) {
                                /* Write intermediate data to global memory to access from
                                 * the next kernel.
                                 */
index dc3b4b34d4eff6923640b17e3248a6b884bf36fd..3bd0e361078fca19756e144d2d7d07741dc629ad 100644 (file)
@@ -74,7 +74,7 @@ ccl_device void kernel_lamp_emission(
                        /* intersect with lamp */
                        float3 emission;
 
-                       if(indirect_lamp_emission(kg, state, &light_ray, &emission)) {
+                       if(indirect_lamp_emission(kg, kg->sd_input, state, &light_ray, &emission)) {
                                path_radiance_accum_emission(L, throughput, emission, state->bounce);
                        }
                }
index 0c989861eef44f50ca9783ee996b3916609c675d..6153af47f96f877faf18607a25d48407a3b3e587 100644 (file)
@@ -71,6 +71,7 @@ ccl_device void kernel_shadow_blocked(
 
                float3 shadow;
                update_path_radiance = !(shadow_blocked(kg,
+                                                       kg->sd_input,
                                                        state,
                                                        light_ray_global,
                                                        &shadow));