Cycles: Remove few function arguments needed only for the split kernel
authorSergey Sharybin <sergey.vfx@gmail.com>
Thu, 14 Jan 2016 14:58:22 +0000 (19:58 +0500)
committerSergey Sharybin <sergey.vfx@gmail.com>
Thu, 28 Jan 2016 17:59:27 +0000 (18:59 +0100)
Use KernelGlobals to access all the global arrays for the intermediate
storage instead of passing all this storage things explicitly.

Tested here with Intel OpenCL, NVIDIA GTX580 and AMD Fiji, didn't see
any artifacts, so guess it's all good.

Reviewers: juicyfruit, dingto, lukasstockner97

Differential Revision: https://developer.blender.org/D1736

14 files changed:
intern/cycles/device/device_opencl.cpp
intern/cycles/kernel/kernel_emission.h
intern/cycles/kernel/kernel_globals.h
intern/cycles/kernel/kernel_shadow.h
intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl
intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl
intern/cycles/kernel/split/kernel_background_buffer_update.h
intern/cycles/kernel/split/kernel_data_init.h
intern/cycles/kernel/split/kernel_direct_lighting.h
intern/cycles/kernel/split/kernel_lamp_emission.h
intern/cycles/kernel/split/kernel_shadow_blocked.h

index 6c7f5b49a77fcce3da07d49f9378d442ea5aef39..4eeec4003ff799ae2efcf9f295cfe5f7077fb980 100644 (file)
@@ -1968,8 +1968,7 @@ public:
        cl_mem AOAlpha_coop;
        cl_mem AOBSDF_coop;
        cl_mem AOLightRay_coop;
-       cl_mem Intersection_coop_AO;
-       cl_mem Intersection_coop_DL;
+       cl_mem Intersection_coop_shadow;
 
 #ifdef WITH_CYCLES_DEBUG
        /* DebugData memory */
@@ -2133,8 +2132,7 @@ public:
                BSDFEval_coop = NULL;
                ISLamp_coop = NULL;
                LightRay_coop = NULL;
-               Intersection_coop_AO = NULL;
-               Intersection_coop_DL = NULL;
+               Intersection_coop_shadow = NULL;
 
 #ifdef WITH_CYCLES_DEBUG
                debugdata_coop = NULL;
@@ -2259,6 +2257,8 @@ public:
        ccl_global type *name;
 #include "kernel_textures.h"
 #undef KERNEL_TEX
+                       void *sd_input;
+                       void *isect_shadow;
                } KernelGlobals;
 
                return sizeof(KernelGlobals);
@@ -2475,8 +2475,7 @@ public:
                release_mem_object_safe(BSDFEval_coop);
                release_mem_object_safe(ISLamp_coop);
                release_mem_object_safe(LightRay_coop);
-               release_mem_object_safe(Intersection_coop_AO);
-               release_mem_object_safe(Intersection_coop_DL);
+               release_mem_object_safe(Intersection_coop_shadow);
 #ifdef WITH_CYCLES_DEBUG
                release_mem_object_safe(debugdata_coop);
 #endif
@@ -2672,8 +2671,7 @@ public:
                        BSDFEval_coop = mem_alloc(num_global_elements * sizeof(BsdfEval));
                        ISLamp_coop = mem_alloc(num_global_elements * sizeof(int));
                        LightRay_coop = mem_alloc(num_global_elements * sizeof(Ray));
-                       Intersection_coop_AO = mem_alloc(num_global_elements * sizeof(Intersection));
-                       Intersection_coop_DL = mem_alloc(num_global_elements * sizeof(Intersection));
+                       Intersection_coop_shadow = mem_alloc(2 * num_global_elements * sizeof(Intersection));
 
 #ifdef WITH_CYCLES_DEBUG
                        debugdata_coop = mem_alloc(num_global_elements * sizeof(DebugData));
@@ -2779,6 +2777,7 @@ public:
                                        PathRadiance_coop,
                                        Ray_coop,
                                        PathState_coop,
+                                       Intersection_coop_shadow,
                                        ray_state);
 
 /* TODO(sergey): Avoid map lookup here. */
@@ -2838,7 +2837,6 @@ public:
                                0,
                                kgbuffer,
                                d_data,
-                               sd,
                                throughput_coop,
                                PathRadiance_coop,
                                Ray_coop,
@@ -2864,7 +2862,6 @@ public:
                                 0,
                                 kgbuffer,
                                 d_data,
-                                sd,
                                 per_sample_output_buffers,
                                 d_rng_state,
                                 rng_coop,
@@ -2946,7 +2943,6 @@ public:
                                kgbuffer,
                                d_data,
                                sd,
-                               sd_DL_shadow,
                                rng_coop,
                                PathState_coop,
                                ISLamp_coop,
@@ -2965,8 +2961,6 @@ public:
                                PathState_coop,
                                LightRay_coop,
                                AOLightRay_coop,
-                               Intersection_coop_AO,
-                               Intersection_coop_DL,
                                ray_state,
                                Queue_data,
                                Queue_index,
index 47d357215cfeb06046d043812c7ae5b5c4018f14..4e662f52150e9d420771924278da395b702e05bb 100644 (file)
@@ -18,15 +18,16 @@ CCL_NAMESPACE_BEGIN
 
 /* Direction Emission */
 ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
-       LightSample *ls, ccl_addr_space PathState *state, float3 I, differential3 dI, float t, float time
-#ifdef __SPLIT_KERNEL__
-       ,ShaderData *sd_input
-#endif
-)
+                                                LightSample *ls,
+                                                ccl_addr_space PathState *state,
+                                                float3 I,
+                                                differential3 dI,
+                                                float t,
+                                                float time)
 {
        /* setup shading at emitter */
 #ifdef __SPLIT_KERNEL__
-       ShaderData *sd = sd_input;
+       ShaderData *sd = kg->sd_input;
 #else
        ShaderData sd_object;
        ShaderData *sd = &sd_object;
@@ -76,12 +77,13 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
        return eval;
 }
 
-ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd,
-       LightSample *ls, ccl_addr_space PathState *state, Ray *ray, BsdfEval *eval, bool *is_lamp
-#ifdef __SPLIT_KERNEL__
-       , ShaderData *sd_DL
-#endif
-       )
+ccl_device_noinline bool direct_emission(KernelGlobals *kg,
+                                         ShaderData *sd,
+                                         LightSample *ls,
+                                         ccl_addr_space PathState *state,
+                                         Ray *ray,
+                                         BsdfEval *eval,
+                                         bool *is_lamp)
 {
        if(ls->pdf == 0.0f)
                return false;
@@ -91,11 +93,13 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd,
 
        /* evaluate closure */
 
-       float3 light_eval = direct_emissive_eval(kg, ls, state, -ls->D, dD, ls->t, ccl_fetch(sd, time)
-#ifdef __SPLIT_KERNEL__
-                                                ,sd_DL
-#endif
-                                                );
+       float3 light_eval = direct_emissive_eval(kg,
+                                                ls,
+                                                state,
+                                                -ls->D,
+                                                dD,
+                                                ls->t,
+                                                ccl_fetch(sd, time));
 
        if(is_zero(light_eval))
                return false;
@@ -193,11 +197,10 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader
 
 /* Indirect Lamp Emission */
 
-ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, ccl_addr_space PathState *state, Ray *ray, float3 *emission
-#ifdef __SPLIT_KERNEL__
-                                                ,ShaderData *sd
-#endif
-                                                )
+ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
+                                                ccl_addr_space PathState *state,
+                                                Ray *ray,
+                                                float3 *emission)
 {
        bool hit_lamp = false;
 
@@ -221,11 +224,13 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, ccl_addr_spac
                }
 #endif
 
-               float3 L = direct_emissive_eval(kg, &ls, state, -ray->D, ray->dD, ls.t, ray->time
-#ifdef __SPLIT_KERNEL__
-                                               ,sd
-#endif
-                                               );
+               float3 L = direct_emissive_eval(kg,
+                                               &ls,
+                                               state,
+                                               -ray->D,
+                                               ray->dD,
+                                               ls.t,
+                                               ray->time);
 
 #ifdef __VOLUME__
                if(state->volume_stack[0].shader != SHADER_NONE) {
@@ -254,11 +259,9 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, ccl_addr_spac
 
 /* Indirect Background */
 
-ccl_device_noinline float3 indirect_background(KernelGlobals *kg, ccl_addr_space PathState *state, ccl_addr_space Ray *ray
-#ifdef __SPLIT_KERNEL__
-                                               ,ShaderData *sd_global
-#endif
-                                               )
+ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
+                                               ccl_addr_space PathState *state,
+                                               ccl_addr_space Ray *ray)
 {
 #ifdef __BACKGROUND__
        int shader = kernel_data.background.surface_shader;
@@ -274,13 +277,13 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg, ccl_addr_space
                        return make_float3(0.0f, 0.0f, 0.0f);
        }
 
-#ifdef __SPLIT_KERNEL__
        /* evaluate background closure */
+#ifdef __SPLIT_KERNEL__
        Ray priv_ray = *ray;
-       shader_setup_from_background(kg, sd_global, &priv_ray);
+       shader_setup_from_background(kg, kg->sd_input, &priv_ray);
 
        path_state_modify_bounce(state, true);
-       float3 L = shader_eval_background(kg, sd_global, state, state->flag, SHADER_CONTEXT_EMISSION);
+       float3 L = shader_eval_background(kg, kg->sd_input, state, state->flag, SHADER_CONTEXT_EMISSION);
        path_state_modify_bounce(state, false);
 #else
        ShaderData sd;
index 17fa18909c46cf5f38e4ba9c90e09797cbeead57..49f6122f3f4e70980a9d56b5d16e38ff87581fa4 100644 (file)
@@ -86,6 +86,11 @@ typedef ccl_addr_space struct KernelGlobals {
 #define KERNEL_TEX(type, ttype, name) \
        ccl_global type *name;
 #include "kernel_textures.h"
+
+#ifdef __SPLIT_KERNEL__
+       ShaderData *sd_input;
+       Intersection *isect_shadow;
+#endif
 } KernelGlobals;
 
 #endif
index e86051095a50cb0e3b93c95172f378a2c0e70131..aefa5d5be85a7796a4bc2173503401d8e417972c 100644 (file)
@@ -186,11 +186,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
 ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
                                         ccl_addr_space PathState *state,
                                         ccl_addr_space Ray *ray_input,
-                                        float3 *shadow
-#ifdef __SPLIT_KERNEL__
-                                        , ShaderData *sd_mem, Intersection *isect_mem
-#endif
-                                       )
+                                        float3 *shadow)
 {
        *shadow = make_float3(1.0f, 1.0f, 1.0f);
 
@@ -205,7 +201,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
 #endif
 
 #ifdef __SPLIT_KERNEL__
-       Intersection *isect = isect_mem;
+       Intersection *isect = &kg->isect_shadow[TIDX];
 #else
        Intersection isect_object;
        Intersection *isect = &isect_object;
@@ -254,7 +250,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
 
                                /* setup shader data at surface */
 #ifdef __SPLIT_KERNEL__
-                               ShaderData *sd = sd_mem;
+                               ShaderData *sd = kg->sd_input;
 #else
                                ShaderData sd_object;
                                ShaderData *sd = &sd_object;
index a3eecd3128b998d91f307d60e6927331667bc56e..1914d241eb1ca8fe7373456b7dc4c120ab6ad16b 100644 (file)
@@ -19,7 +19,6 @@
 __kernel void kernel_ocl_path_trace_background_buffer_update(
         ccl_global char *kg,
         ccl_constant KernelData *data,
-        ccl_global char *sd,
         ccl_global float *per_sample_output_buffers,
         ccl_global uint *rng_state,
         ccl_global uint *rng_coop,             /* Required for buffer Update */
@@ -84,7 +83,6 @@ __kernel void kernel_ocl_path_trace_background_buffer_update(
 #endif
                enqueue_flag =
                        kernel_background_buffer_update((KernelGlobals *)kg,
-                                                       (ShaderData *)sd,
                                                        per_sample_output_buffers,
                                                        rng_state,
                                                        rng_coop,
index b8138676acd37625038de86b30f8648a8ab7e5a4..401c4467afa7338541fca42674aed01d8044feee 100644 (file)
@@ -111,6 +111,7 @@ __kernel void kernel_ocl_path_trace_data_init(
         PathRadiance *PathRadiance_coop,             /* PathRadiance array to store PathRadiance values for all rays */
         ccl_global Ray *Ray_coop,                    /* Ray array to store Ray information for all rays */
         ccl_global PathState *PathState_coop,        /* PathState array to store PathState information for all rays */
+        Intersection *Intersection_coop_shadow,
         ccl_global char *ray_state,                  /* Stores information on current state of a ray */
 
 #define KERNEL_TEX(type, ttype, name)                                   \
@@ -206,6 +207,7 @@ __kernel void kernel_ocl_path_trace_data_init(
                         PathRadiance_coop,
                         Ray_coop,
                         PathState_coop,
+                        Intersection_coop_shadow,
                         ray_state,
 
 #define KERNEL_TEX(type, ttype, name) name,
index d4a7cffb4035e0806c43bdf4a3365b33bb9fbeae..c6a2c8d050c390b7a43b5ddd528f40e5dbc99ac5 100644 (file)
@@ -20,7 +20,6 @@ __kernel void kernel_ocl_path_trace_direct_lighting(
         ccl_global char *kg,
         ccl_constant KernelData *data,
         ccl_global char *sd,                    /* Required for direct lighting */
-        ccl_global char *sd_DL,                 /* Required for direct lighting */
         ccl_global uint *rng_coop,              /* Required for direct lighting */
         ccl_global PathState *PathState_coop,   /* Required for direct lighting */
         ccl_global int *ISLamp_coop,            /* Required for direct lighting */
@@ -63,7 +62,6 @@ __kernel void kernel_ocl_path_trace_direct_lighting(
 #endif
                enqueue_flag = kernel_direct_lighting((KernelGlobals *)kg,
                                                      (ShaderData *)sd,
-                                                     (ShaderData *)sd_DL,
                                                      rng_coop,
                                                      PathState_coop,
                                                      ISLamp_coop,
index 5215a0e0827aac59646e7f3b775e1308a48abe56..3ad9fe596170733d3ce5d7dda5820a6256ed0e04 100644 (file)
@@ -19,7 +19,6 @@
 __kernel void kernel_ocl_path_trace_lamp_emission(
         ccl_global char *kg,
         ccl_constant KernelData *data,
-        ccl_global char *sd,                   /* Required for lamp emission */
         ccl_global float3 *throughput_coop,    /* Required for lamp emission */
         PathRadiance *PathRadiance_coop,       /* Required for lamp emission */
         ccl_global Ray *Ray_coop,              /* Required for lamp emission */
@@ -69,7 +68,6 @@ __kernel void kernel_ocl_path_trace_lamp_emission(
        }
 
        kernel_lamp_emission((KernelGlobals *)kg,
-                            (ShaderData *)sd,
                             throughput_coop,
                             PathRadiance_coop,
                             Ray_coop,
index 260a601946f2a05ab17e4fe9f014959868362772..ba0a9a80c0753326ed8944991e609324eebf1ddf 100644 (file)
@@ -23,8 +23,6 @@ __kernel void kernel_ocl_path_trace_shadow_blocked(
         ccl_global PathState *PathState_coop,  /* Required for shadow blocked */
         ccl_global Ray *LightRay_dl_coop,      /* Required for direct lighting's shadow blocked */
         ccl_global Ray *LightRay_ao_coop,      /* Required for AO's shadow blocked */
-        Intersection *Intersection_coop_AO,
-        Intersection *Intersection_coop_DL,
         ccl_global char *ray_state,
         ccl_global int *Queue_data,            /* Queue memory */
         ccl_global int *Queue_index,           /* Tracks the number of elements in each queue */
@@ -73,8 +71,6 @@ __kernel void kernel_ocl_path_trace_shadow_blocked(
                              PathState_coop,
                              LightRay_dl_coop,
                              LightRay_ao_coop,
-                             Intersection_coop_AO,
-                             Intersection_coop_DL,
                              ray_state,
                              total_num_rays,
                              shadow_blocked_type,
index e02e55b5f186aa92797c2aeaf1e47ffd26458887..3d12a3dd9933d6799396611c85b68891eb3abd8c 100644 (file)
@@ -71,7 +71,6 @@
  */
 ccl_device char kernel_background_buffer_update(
         KernelGlobals *kg,
-        ShaderData *sd,
         ccl_global float *per_sample_output_buffers,
         ccl_global uint *rng_state,
         ccl_global uint *rng_coop,             /* Required for buffer Update */
@@ -158,7 +157,7 @@ ccl_device char kernel_background_buffer_update(
                if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
 #ifdef __BACKGROUND__
                        /* sample background shader */
-                       float3 L_background = indirect_background(kg, state, ray, sd);
+                       float3 L_background = indirect_background(kg, state, ray);
                        path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
 #endif
                        ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
index 27defe2fb9aafb830120cbd9d4abb46071455176..6993a78a7891ff4d3dabd5280c5a5d0f436f1881 100644 (file)
@@ -145,6 +145,7 @@ ccl_device void kernel_data_init(
         PathRadiance *PathRadiance_coop,             /* PathRadiance array to store PathRadiance values for all rays */
         ccl_global Ray *Ray_coop,                    /* Ray array to store Ray information for all rays */
         ccl_global PathState *PathState_coop,        /* PathState array to store PathState information for all rays */
+        Intersection *Intersection_coop_shadow,
         ccl_global char *ray_state,                  /* Stores information on current state of a ray */
 
 #define KERNEL_TEX(type, ttype, name)                                   \
@@ -170,6 +171,8 @@ ccl_device void kernel_data_init(
         int parallel_samples)                        /* Number of samples to be processed in parallel */
 {
        kg->data = data;
+       kg->sd_input = sd_DL_shadow;
+       kg->isect_shadow = Intersection_coop_shadow;
 #define KERNEL_TEX(type, ttype, name) \
        kg->name = name;
 #include "../kernel_textures.h"
index 20d1728f9de216b8689aac7fd8fcd020c041579a..c7a2aa6426c098ac6b3a35279b0d26702c310d9d 100644 (file)
@@ -36,7 +36,6 @@
  * kg (globals) -------------------------------------|                             |
  * queuesize ----------------------------------------|                             |
  *
- * note on sd_DL : sd_DL is neither input nor output to this kernel; sd_DL is filled and consumed in this kernel itself.
  * Note on Queues :
  * This kernel only reads from the QUEUE_ACTIVE_AND_REGENERATED_RAYS queue and processes
  * only the rays of state RAY_ACTIVE; If a ray needs to execute the corresponding shadow_blocked
@@ -51,7 +50,6 @@
 ccl_device char kernel_direct_lighting(
         KernelGlobals *kg,
         ShaderData *sd,                         /* Required for direct lighting */
-        ShaderData *sd_DL,                      /* Required for direct lighting */
         ccl_global uint *rng_coop,              /* Required for direct lighting */
         ccl_global PathState *PathState_coop,   /* Required for direct lighting */
         ccl_global int *ISLamp_coop,            /* Required for direct lighting */
@@ -90,9 +88,7 @@ ccl_device char kernel_direct_lighting(
 
                        BsdfEval L_light;
                        bool is_lamp;
-                       if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp,
-                                          sd_DL))
-                       {
+                       if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
                                /* Write intermediate data to global memory to access from
                                 * the next kernel.
                                 */
index 6329f3ae943dc3a2da894a86d95111dc67d81e9a..b651f79d536b4c1e7f9d61d0b053789aa06e0210 100644 (file)
  * sw -------------------------------------------------|                           |
  * sh -------------------------------------------------|                           |
  * parallel_samples -----------------------------------|                           |
- *
- * note : sd is neither input nor output. Its just filled and consumed in the same, kernel_lamp_emission, kernel.
  */
 ccl_device void kernel_lamp_emission(
         KernelGlobals *kg,
-        ShaderData *sd,                        /* Required for lamp emission */
         ccl_global float3 *throughput_coop,    /* Required for lamp emission */
         PathRadiance *PathRadiance_coop,       /* Required for lamp emission */
         ccl_global Ray *Ray_coop,              /* Required for lamp emission */
@@ -79,7 +76,7 @@ ccl_device void kernel_lamp_emission(
                        /* intersect with lamp */
                        float3 emission;
 
-                       if(indirect_lamp_emission(kg, state, &light_ray, &emission, sd)) {
+                       if(indirect_lamp_emission(kg, state, &light_ray, &emission)) {
                                path_radiance_accum_emission(L, throughput, emission, state->bounce);
                        }
                }
index 5f515a98783007cc6e6863508397de7eb355d3eb..fce554179e3a088c4a0f797f4cb8f2f9fed992e8 100644 (file)
@@ -51,8 +51,6 @@ ccl_device void kernel_shadow_blocked(
         ccl_global PathState *PathState_coop,  /* Required for shadow blocked */
         ccl_global Ray *LightRay_dl_coop,      /* Required for direct lighting's shadow blocked */
         ccl_global Ray *LightRay_ao_coop,      /* Required for AO's shadow blocked */
-        Intersection *Intersection_coop_AO,
-        Intersection *Intersection_coop_DL,
         ccl_global char *ray_state,
         int total_num_rays,
         char shadow_blocked_type,
@@ -67,25 +65,17 @@ ccl_device void kernel_shadow_blocked(
                ccl_global PathState *state = &PathState_coop[ray_index];
                ccl_global Ray *light_ray_dl_global = &LightRay_dl_coop[ray_index];
                ccl_global Ray *light_ray_ao_global = &LightRay_ao_coop[ray_index];
-               Intersection *isect_ao_global = &Intersection_coop_AO[ray_index];
-               Intersection *isect_dl_global = &Intersection_coop_DL[ray_index];
 
                ccl_global Ray *light_ray_global =
                        shadow_blocked_type == RAY_SHADOW_RAY_CAST_AO
                                ? light_ray_ao_global
                                : light_ray_dl_global;
-               Intersection *isect_global =
-                       shadow_blocked_type == RAY_SHADOW_RAY_CAST_AO
-                               ? isect_ao_global
-                               : isect_dl_global;
 
                float3 shadow;
                update_path_radiance = !(shadow_blocked(kg,
                                                        state,
                                                        light_ray_global,
-                                                       &shadow,
-                                                       sd_shadow,
-                                                       isect_global));
+                                                       &shadow));
 
                /* We use light_ray_global's P and t to store shadow and
                 * update_path_radiance.