Cycles: reduce closure memory usage for emission/shadow shader data.
authorBrecht Van Lommel <brechtvanlommel@gmail.com>
Wed, 1 Nov 2017 20:02:28 +0000 (21:02 +0100)
committerBrecht Van Lommel <brechtvanlommel@gmail.com>
Sun, 5 Nov 2017 19:48:33 +0000 (20:48 +0100)
With a Titan Xp, reduces path trace local memory from 1092MB to 840MB.
Benchmark performance was within 1% with both RX 480 and Titan Xp.

Original patch was implemented by Sergey.

Differential Revision: https://developer.blender.org/D2249

20 files changed:
intern/cycles/kernel/closure/alloc.h
intern/cycles/kernel/kernel_bake.h
intern/cycles/kernel/kernel_emission.h
intern/cycles/kernel/kernel_path.h
intern/cycles/kernel/kernel_path_branched.h
intern/cycles/kernel/kernel_shader.h
intern/cycles/kernel/kernel_shadow.h
intern/cycles/kernel/kernel_subsurface.h
intern/cycles/kernel/kernel_types.h
intern/cycles/kernel/kernel_volume.h
intern/cycles/kernel/split/kernel_buffer_update.h
intern/cycles/kernel/split/kernel_direct_lighting.h
intern/cycles/kernel/split/kernel_do_volume.h
intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
intern/cycles/kernel/split/kernel_path_init.h
intern/cycles/kernel/split/kernel_shader_eval.h
intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
intern/cycles/kernel/split/kernel_split_data_types.h
intern/cycles/kernel/split/kernel_subsurface_scatter.h

index e799855..48a6040 100644 (file)
@@ -20,17 +20,16 @@ ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType ty
 {
        kernel_assert(size <= sizeof(ShaderClosure));
 
-       int num_closure = sd->num_closure;
-       int num_closure_extra = sd->num_closure_extra;
-       if(num_closure + num_closure_extra >= MAX_CLOSURE)
+       if(sd->num_closure_left == 0)
                return NULL;
 
-       ShaderClosure *sc = &sd->closure[num_closure];
+       ShaderClosure *sc = &sd->closure[sd->num_closure];
 
        sc->type = type;
        sc->weight = weight;
 
        sd->num_closure++;
+       sd->num_closure_left--;
 
        return sc;
 }
@@ -44,18 +43,16 @@ ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size)
         * This lets us keep the same fast array iteration over closures, as we
         * found linked list iteration and iteration with skipping to be slower. */
        int num_extra = ((size + sizeof(ShaderClosure) - 1) / sizeof(ShaderClosure));
-       int num_closure = sd->num_closure;
-       int num_closure_extra = sd->num_closure_extra + num_extra;
 
-       if(num_closure + num_closure_extra > MAX_CLOSURE) {
+       if(num_extra > sd->num_closure_left) {
                /* Remove previous closure. */
                sd->num_closure--;
-               sd->num_closure_extra++;
+               sd->num_closure_left++;
                return NULL;
        }
 
-       sd->num_closure_extra = num_closure_extra;
-       return (ccl_addr_space void*)(sd->closure + MAX_CLOSURE - num_closure_extra);
+       sd->num_closure_left -= num_extra;
+       return (ccl_addr_space void*)(sd->closure + sd->num_closure + sd->num_closure_left);
 }
 
 ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight)
index 84d8d84..9ce1035 100644 (file)
@@ -51,7 +51,7 @@ ccl_device_inline void compute_light_pass(KernelGlobals *kg,
        path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL);
 
        /* evaluate surface shader */
-       shader_eval_surface(kg, sd, &state, state.flag);
+       shader_eval_surface(kg, sd, &state, state.flag, MAX_CLOSURE);
 
        /* TODO, disable more closures we don't need besides transparent */
        shader_bsdf_disable_transparency(kg, sd);
@@ -239,12 +239,12 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg,
                }
                else {
                        /* surface color of the pass only */
-                       shader_eval_surface(kg, sd, state, 0);
+                       shader_eval_surface(kg, sd, state, 0, MAX_CLOSURE);
                        return kernel_bake_shader_bsdf(kg, sd, type);
                }
        }
        else {
-               shader_eval_surface(kg, sd, state, 0);
+               shader_eval_surface(kg, sd, state, 0, MAX_CLOSURE);
                color = kernel_bake_shader_bsdf(kg, sd, type);
        }
 
@@ -337,7 +337,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
                {
                        float3 N = sd.N;
                        if((sd.flag & SD_HAS_BUMP)) {
-                               shader_eval_surface(kg, &sd, &state, 0);
+                               shader_eval_surface(kg, &sd, &state, 0, MAX_CLOSURE);
                                N = shader_bsdf_average_normal(kg, &sd);
                        }
 
@@ -352,7 +352,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
                }
                case SHADER_EVAL_EMISSION:
                {
-                       shader_eval_surface(kg, &sd, &state, 0);
+                       shader_eval_surface(kg, &sd, &state, 0, 0);
                        out = shader_emissive_eval(kg, &sd);
                        break;
                }
index 45b8c63..94b0a37 100644 (file)
@@ -70,14 +70,11 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
                /* no path flag, we're evaluating this for all closures. that's weak but
                 * we'd have to do multiple evaluations otherwise */
                path_state_modify_bounce(state, true);
-               shader_eval_surface(kg, emission_sd, state, 0);
+               shader_eval_surface(kg, emission_sd, state, 0, 0);
                path_state_modify_bounce(state, false);
 
                /* evaluate emissive closure */
-               if(emission_sd->flag & SD_EMISSION)
-                       eval = shader_emissive_eval(kg, emission_sd);
-               else
-                       eval = make_float3(0.0f, 0.0f, 0.0f);
+               eval = shader_emissive_eval(kg, emission_sd);
        }
        
        eval *= ls->eval_fac;
index 1099064..8519e06 100644 (file)
@@ -443,7 +443,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                                      sd,
                                      &isect,
                                      ray);
-               shader_eval_surface(kg, sd, state, state->flag);
+               shader_eval_surface(kg, sd, state, state->flag, MAX_CLOSURE);
                shader_prepare_closures(sd, state);
 
                /* Apply shadow catcher, holdout, emission. */
@@ -561,7 +561,7 @@ ccl_device_forceinline void kernel_path_integrate(
                bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
 
                /* Find intersection with lamps and compute emission for MIS. */
-               kernel_path_lamp_emission(kg, state, ray, throughput, &isect, emission_sd, L);
+               kernel_path_lamp_emission(kg, state, ray, throughput, &isect, &sd, L);
 
 #ifdef __VOLUME__
                /* Volume integration. */
@@ -585,7 +585,7 @@ ccl_device_forceinline void kernel_path_integrate(
 
                /* Shade background. */
                if(!hit) {
-                       kernel_path_background(kg, state, ray, throughput, emission_sd, L);
+                       kernel_path_background(kg, state, ray, throughput, &sd, L);
                        break;
                }
                else if(path_state_ao_bounce(kg, state)) {
@@ -594,7 +594,7 @@ ccl_device_forceinline void kernel_path_integrate(
 
                /* Setup and evaluate shader. */
                shader_setup_from_ray(kg, &sd, &isect, ray);
-               shader_eval_surface(kg, &sd, state, state->flag);
+               shader_eval_surface(kg, &sd, state, state->flag, MAX_CLOSURE);
                shader_prepare_closures(&sd, state);
 
                /* Apply shadow catcher, holdout, emission. */
@@ -706,9 +706,11 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
        PathRadiance L;
        path_radiance_init(&L, kernel_data.film.use_light_pass);
 
-       ShaderData emission_sd;
+       ShaderDataTinyStorage emission_sd_storage;
+       ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+
        PathState state;
-       path_state_init(kg, &emission_sd, &state, rng_hash, sample, &ray);
+       path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
 
        /* Integrate. */
        kernel_path_integrate(kg,
@@ -717,7 +719,7 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
                              &ray,
                              &L,
                              buffer,
-                             &emission_sd);
+                             emission_sd);
 
        kernel_write_result(kg, buffer, sample, &L);
 }
index 3877e4f..f93366e 100644 (file)
@@ -436,10 +436,12 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
        /* shader data memory used for both volumes and surfaces, saves stack space */
        ShaderData sd;
        /* shader data used by emission, shadows, volume stacks, indirect path */
-       ShaderData emission_sd, indirect_sd;
+       ShaderDataTinyStorage emission_sd_storage;
+       ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+       ShaderData indirect_sd;
 
        PathState state;
-       path_state_init(kg, &emission_sd, &state, rng_hash, sample, &ray);
+       path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
 
        /* Main Loop
         * Here we only handle transparency intersections from the camera ray.
@@ -460,7 +462,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
                                            &isect,
                                            hit,
                                            &indirect_sd,
-                                           &emission_sd,
+                                           emission_sd,
                                            L);
 #endif  /* __VOLUME__ */
 
@@ -472,7 +474,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
 
                /* Setup and evaluate shader. */
                shader_setup_from_ray(kg, &sd, &isect, &ray);
-               shader_eval_surface(kg, &sd, &state, state.flag);
+               shader_eval_surface(kg, &sd, &state, state.flag, MAX_CLOSURE);
                shader_merge_closures(&sd);
 
                /* Apply shadow catcher, holdout, emission. */
@@ -481,7 +483,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
                                             &state,
                                             &ray,
                                             throughput,
-                                            &emission_sd,
+                                            emission_sd,
                                             L,
                                             buffer))
                {
@@ -513,14 +515,14 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
 #ifdef __AO__
                /* ambient occlusion */
                if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
-                       kernel_branched_path_ao(kg, &sd, &emission_sd, L, &state, throughput);
+                       kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput);
                }
 #endif  /* __AO__ */
 
 #ifdef __SUBSURFACE__
                /* bssrdf scatter to a different location on the same object */
                if(sd.flag & SD_BSSRDF) {
-                       kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, &emission_sd,
+                       kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, emission_sd,
                                                                L, &state, &ray, throughput);
                }
 #endif  /* __SUBSURFACE__ */
@@ -534,13 +536,13 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
                                int all = (kernel_data.integrator.sample_all_lights_direct) ||
                                          (state.flag & PATH_RAY_SHADOW_CATCHER);
                                kernel_branched_path_surface_connect_light(kg,
-                                       &sd, &emission_sd, &hit_state, throughput, 1.0f, L, all);
+                                       &sd, emission_sd, &hit_state, throughput, 1.0f, L, all);
                        }
 #endif  /* __EMISSION__ */
 
                        /* indirect light */
                        kernel_branched_path_surface_indirect_light(kg,
-                               &sd, &indirect_sd, &emission_sd, throughput, 1.0f, &hit_state, L);
+                               &sd, &indirect_sd, emission_sd, throughput, 1.0f, &hit_state, L);
 
                        /* continue in case of transparency */
                        throughput *= shader_bsdf_transparency(kg, &sd);
index 1ba37ed..42f8737 100644 (file)
@@ -955,10 +955,10 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
 /* Surface Evaluation */
 
 ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
-       ccl_addr_space PathState *state, int path_flag)
+       ccl_addr_space PathState *state, int path_flag, int max_closure)
 {
        sd->num_closure = 0;
-       sd->num_closure_extra = 0;
+       sd->num_closure_left = max_closure;
 
 #ifdef __OSL__
        if(kg->osl)
@@ -988,7 +988,7 @@ ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd,
        ccl_addr_space PathState *state, int path_flag)
 {
        sd->num_closure = 0;
-       sd->num_closure_extra = 0;
+       sd->num_closure_left = 0;
 
 #ifdef __SVM__
 #  ifdef __OSL__
@@ -1129,12 +1129,13 @@ ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
                                           ShaderData *sd,
                                           ccl_addr_space PathState *state,
                                           ccl_addr_space VolumeStack *stack,
-                                          int path_flag)
+                                          int path_flag,
+                                          int max_closure)
 {
        /* reset closures once at the start, we will be accumulating the closures
         * for all volumes in the stack into a single array of closures */
        sd->num_closure = 0;
-       sd->num_closure_extra = 0;
+       sd->num_closure_left = max_closure;
        sd->flag = 0;
        sd->object_flag = 0;
 
@@ -1184,7 +1185,7 @@ ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
 ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state)
 {
        sd->num_closure = 0;
-       sd->num_closure_extra = 0;
+       sd->num_closure_left = 0;
 
        /* this will modify sd->P */
 #ifdef __SVM__
index 8a0da6c..ab364d3 100644 (file)
@@ -86,7 +86,8 @@ ccl_device_forceinline bool shadow_handle_transparent_isect(
                shader_eval_surface(kg,
                                    shadow_sd,
                                    state,
-                                   PATH_RAY_SHADOW);
+                                   PATH_RAY_SHADOW,
+                                   0);
                path_state_modify_bounce(state, false);
                *throughput *= shader_bsdf_transparency(kg, shadow_sd);
        }
index 23a09e5..6f75601 100644 (file)
@@ -80,7 +80,7 @@ ccl_device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, const Shad
 {
        sd->flag &= ~SD_CLOSURE_FLAGS;
        sd->num_closure = 0;
-       sd->num_closure_extra = 0;
+       sd->num_closure_left = MAX_CLOSURE;
 
        if(hit) {
                Bssrdf *bssrdf = (Bssrdf *)sc;
@@ -154,7 +154,7 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
 
        if(bump || texture_blur > 0.0f) {
                /* average color and normal at incoming point */
-               shader_eval_surface(kg, sd, state, state_flag);
+               shader_eval_surface(kg, sd, state, state_flag, MAX_CLOSURE);
                float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL);
 
                /* we simply divide out the average color and multiply with the average
index cac3ef2..6d17781 100644 (file)
@@ -984,7 +984,7 @@ typedef ccl_addr_space struct ShaderData {
 
        /* Closure data, we store a fixed array of closures */
        int num_closure;
-       int num_closure_extra;
+       int num_closure_left;
        float randb_closure;
        float3 svm_closure_weight;
 
@@ -997,6 +997,11 @@ typedef ccl_addr_space struct ShaderData {
        struct ShaderClosure closure[MAX_CLOSURE];
 } ShaderData;
 
+typedef ccl_addr_space struct ShaderDataTinyStorage {
+       char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE];
+} ShaderDataTinyStorage;
+#define AS_SHADER_DATA(shader_data_tiny_storage) ((ShaderData*)shader_data_tiny_storage)
+
 /* Path State */
 
 #ifdef __VOLUME__
index 89af16a..fb3c543 100644 (file)
@@ -43,7 +43,7 @@ ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
                                                        float3 *extinction)
 {
        sd->P = P;
-       shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW);
+       shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW, 0);
 
        if(sd->flag & SD_EXTINCTION) {
                *extinction = sd->closure_transparent_extinction;
@@ -62,7 +62,7 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
                                             VolumeShaderCoefficients *coeff)
 {
        sd->P = P;
-       shader_eval_volume(kg, sd, state, state->volume_stack, state->flag);
+       shader_eval_volume(kg, sd, state, state->volume_stack, state->flag, MAX_CLOSURE);
 
        if(!(sd->flag & (SD_EXTINCTION|SD_SCATTER|SD_EMISSION)))
                return false;
index 511334e..180c0b5 100644 (file)
@@ -122,7 +122,12 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
                                 */
                                *throughput = make_float3(1.0f, 1.0f, 1.0f);
                                path_radiance_init(L, kernel_data.film.use_light_pass);
-                               path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, rng_hash, sample, ray);
+                               path_state_init(kg,
+                                               AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
+                                               state,
+                                               rng_hash,
+                                               sample,
+                                               ray);
 #ifdef __SUBSURFACE__
                                kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
 #endif
index 2aac66e..832b0e5 100644 (file)
@@ -98,7 +98,16 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
 
                                BsdfEval L_light;
                                bool is_lamp;
-                               if(direct_emission(kg, sd, &kernel_split_state.sd_DL_shadow[ray_index], &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+                               if(direct_emission(kg,
+                                                  sd,
+                                                  AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
+                                                  &ls,
+                                                  state,
+                                                  &light_ray,
+                                                  &L_light,
+                                                  &is_lamp,
+                                                  terminate))
+                               {
                                        /* Write intermediate data to global memory to access from
                                         * the next kernel.
                                         */
index 491487f..02881da 100644 (file)
@@ -31,7 +31,7 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K
 
        ShaderData *sd = &kernel_split_state.sd[ray_index];
        PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-       ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+       ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
 
        /* GPU: no decoupled ray marching, scatter probalistically */
        int num_samples = kernel_data.integrator.volume_samples;
@@ -141,7 +141,7 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
                ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
                ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
                ShaderData *sd = &kernel_split_state.sd[ray_index];
-               ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+               ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
 
                bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
 
index 906bad8..bc8ca3a 100644 (file)
@@ -101,7 +101,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
                ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
 
                ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-               ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+               ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
                PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
 
                throughput = kernel_split_state.throughput[ray_index];
index 5ad62b5..fdd5422 100644 (file)
@@ -64,7 +64,7 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
                kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
                path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass);
                path_state_init(kg,
-                               &kernel_split_state.sd_DL_shadow[ray_index],
+                               AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
                                &kernel_split_state.path_state[ray_index],
                                rng_hash,
                                sample,
index 7032461..2260253 100644 (file)
@@ -50,7 +50,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
        if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
                ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
 
-               shader_eval_surface(kg, &kernel_split_state.sd[ray_index], state, state->flag);
+               shader_eval_surface(kg, &kernel_split_state.sd[ray_index], state, state->flag, MAX_CLOSURE);
 #ifdef __BRANCHED_PATH__
                if(kernel_data.integrator.branched) {
                        shader_merge_closures(&kernel_split_state.sd[ray_index]);
index 79aa2c9..b50de61 100644 (file)
@@ -34,7 +34,7 @@ ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg)
        }
 
        ShaderData *sd = &kernel_split_state.sd[ray_index];
-       ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+       ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
        PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
        ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
        float3 throughput = kernel_split_state.throughput[ray_index];
index b52f9a5..9a6bdfb 100644 (file)
@@ -47,7 +47,7 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
        float3 throughput = kernel_split_state.throughput[ray_index];
 
        BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
-       ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+       ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
        bool is_lamp = kernel_split_state.is_lamp[ray_index];
 
 #  if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)
index b0e6e5f..d3464fe 100644 (file)
@@ -111,7 +111,7 @@ typedef ccl_global struct SplitBranchedState {
        SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
        SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \
        SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
-       SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \
+       SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
        SPLIT_DATA_SUBSURFACE_ENTRIES \
        SPLIT_DATA_VOLUME_ENTRIES \
        SPLIT_DATA_BRANCHED_ENTRIES \
@@ -127,7 +127,7 @@ typedef ccl_global struct SplitBranchedState {
        SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
        SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
        SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
-       SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \
+       SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
        SPLIT_DATA_SUBSURFACE_ENTRIES \
        SPLIT_DATA_VOLUME_ENTRIES \
        SPLIT_DATA_BRANCHED_ENTRIES \
index 3b95785..8d774c0 100644 (file)
@@ -39,7 +39,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
 
        ShaderData *sd = &branched_state->sd;
        PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-       ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+       ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
 
        for(int i = branched_state->ss_next_closure; i < sd->num_closure; i++) {
                ShaderClosure *sc = &sd->closure[i];
@@ -229,7 +229,7 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
                ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
                ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
                ShaderData *sd = &kernel_split_state.sd[ray_index];
-               ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+               ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
 
                if(sd->flag & SD_BSSRDF) {