Code refactor: split displace/background into separate kernels, remove luma.
authorBrecht Van Lommel <brechtvanlommel@gmail.com>
Thu, 5 Oct 2017 13:17:09 +0000 (15:17 +0200)
committerBrecht Van Lommel <brechtvanlommel@gmail.com>
Thu, 5 Oct 2017 15:57:58 +0000 (17:57 +0200)
19 files changed:
intern/cycles/device/device_cpu.cpp
intern/cycles/device/device_cuda.cpp
intern/cycles/device/device_multi.cpp
intern/cycles/device/device_network.cpp
intern/cycles/device/device_network.h
intern/cycles/device/device_task.cpp
intern/cycles/device/device_task.h
intern/cycles/device/opencl/opencl_base.cpp
intern/cycles/kernel/kernel_bake.h
intern/cycles/kernel/kernel_shader.h
intern/cycles/kernel/kernels/cpu/kernel_cpu.h
intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
intern/cycles/kernel/kernels/cuda/kernel.cu
intern/cycles/kernel/kernels/opencl/kernel.cl
intern/cycles/kernel/osl/osl_shader.cpp
intern/cycles/kernel/osl/osl_shader.h
intern/cycles/render/bake.cpp
intern/cycles/render/light.cpp
intern/cycles/render/mesh_displace.cpp

index ff34f4f9ce471d7c7ef7fd8e9655121491f32667..19e3c0a9075b2a3eb0ee20361b69eaf5ab055b72 100644 (file)
@@ -171,10 +171,10 @@ public:
 
        DeviceRequestedFeatures requested_features;
 
-       KernelFunctions<void(*)(KernelGlobals *, float *, int, int, int, int, int)>   path_trace_kernel;
-       KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)>       convert_to_half_float_kernel;
-       KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)>       convert_to_byte_kernel;
-       KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, float*, int, int, int, int, int)> shader_kernel;
+       KernelFunctions<void(*)(KernelGlobals *, float *, int, int, int, int, int)>             path_trace_kernel;
+       KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_half_float_kernel;
+       KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel;
+       KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)>   shader_kernel;
 
        KernelFunctions<void(*)(int, TilesInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int)> filter_divide_shadow_kernel;
        KernelFunctions<void(*)(int, TilesInfo*, int, int, int, int, float*, float*, int*, int, int)>               filter_get_feature_kernel;
@@ -756,7 +756,6 @@ public:
                                shader_kernel()(&kg,
                                                (uint4*)task.shader_input,
                                                (float4*)task.shader_output,
-                                               (float*)task.shader_output_luma,
                                                task.shader_eval_type,
                                                task.shader_filter,
                                                x,
index 8cfc5332e94a155a1a1b64bdc05367714dd6baef..734edcff5034ce826d90f4c38608d7c774e328eb 100644 (file)
@@ -1424,14 +1424,16 @@ public:
                CUfunction cuShader;
                CUdeviceptr d_input = cuda_device_ptr(task.shader_input);
                CUdeviceptr d_output = cuda_device_ptr(task.shader_output);
-               CUdeviceptr d_output_luma = cuda_device_ptr(task.shader_output_luma);
 
                /* get kernel function */
                if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
                        cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake"));
                }
+               else if(task.shader_eval_type == SHADER_EVAL_DISPLACE) {
+                       cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace"));
+               }
                else {
-                       cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader"));
+                       cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_background"));
                }
 
                /* do tasks in smaller chunks, so we can cancel it */
@@ -1450,9 +1452,6 @@ public:
                                int arg = 0;
                                args[arg++] = &d_input;
                                args[arg++] = &d_output;
-                               if(task.shader_eval_type < SHADER_EVAL_BAKE) {
-                                       args[arg++] = &d_output_luma;
-                               }
                                args[arg++] = &task.shader_eval_type;
                                if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
                                        args[arg++] = &task.shader_filter;
index 164ed50bdf69d4ac0fd8f1b544e56473c6183431..b17b972b06fd773f22147c42eaa7f8d776ab508a 100644 (file)
@@ -383,7 +383,6 @@ public:
                                if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half];
                                if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
                                if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];
-                               if(task.shader_output_luma) subtask.shader_output_luma = sub.ptr_map[task.shader_output_luma];
 
                                sub.device->task_add(subtask);
                        }
index 4ff8647f66b446e4d94f10cdb9a27ce2c4d55609..deea59f1d23c1a020a846bb18a31e7c960e68d9f 100644 (file)
@@ -660,10 +660,6 @@ protected:
                        if(task.shader_output)
                                task.shader_output = device_ptr_from_client_pointer(task.shader_output);
 
-                       if(task.shader_output_luma)
-                               task.shader_output_luma = device_ptr_from_client_pointer(task.shader_output_luma);
-
-
                        task.acquire_tile = function_bind(&DeviceServer::task_acquire_tile, this, _1, _2);
                        task.release_tile = function_bind(&DeviceServer::task_release_tile, this, _1);
                        task.update_progress_sample = function_bind(&DeviceServer::task_update_progress_sample, this);
index 7bfebaf5aec2d578b748f5a70c9248d68463b82a..3d3bd99dfe7df54647bfb985cd8a83429ef670ac 100644 (file)
@@ -132,7 +132,7 @@ public:
                archive & type & task.x & task.y & task.w & task.h;
                archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples;
                archive & task.offset & task.stride;
-               archive & task.shader_input & task.shader_output & task.shader_output_luma & task.shader_eval_type;
+               archive & task.shader_input & task.shader_output & task.shader_eval_type;
                archive & task.shader_x & task.shader_w;
                archive & task.need_finish_queue;
        }
@@ -291,7 +291,7 @@ public:
                *archive & type & task.x & task.y & task.w & task.h;
                *archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples;
                *archive & task.offset & task.stride;
-               *archive & task.shader_input & task.shader_output & task.shader_output_luma & task.shader_eval_type;
+               *archive & task.shader_input & task.shader_output & task.shader_eval_type;
                *archive & task.shader_x & task.shader_w;
                *archive & task.need_finish_queue;
 
index 3bc4c3102833172690b7e3c0bc88d9acfa7844f9..3c7d24fb5b790f34b6b102cebfc94f1f96714590 100644 (file)
@@ -31,7 +31,7 @@ CCL_NAMESPACE_BEGIN
 DeviceTask::DeviceTask(Type type_)
 : type(type_), x(0), y(0), w(0), h(0), rgba_byte(0), rgba_half(0), buffer(0),
   sample(0), num_samples(1),
-  shader_input(0), shader_output(0), shader_output_luma(0),
+  shader_input(0), shader_output(0),
   shader_eval_type(0), shader_filter(0), shader_x(0), shader_w(0)
 {
        last_update_time = time_dt();
index 44a1efff1f5eb546e75b672a7f56d62474b2cab2..b9658eb978fb95e59ce9a931bfe969a6f21613e4 100644 (file)
@@ -46,7 +46,7 @@ public:
        int offset, stride;
 
        device_ptr shader_input;
-       device_ptr shader_output, shader_output_luma;
+       device_ptr shader_output;
        int shader_eval_type;
        int shader_filter;
        int shader_x, shader_w;
index 8095611f09977e7c03357878662d1eeb26d8a325..3db3efd110329d899a1511d40429065cefab5b01 100644 (file)
@@ -228,7 +228,8 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
        base_program = OpenCLProgram(this, "base", "kernel.cl", build_options_for_base_program(requested_features));
        base_program.add_kernel(ustring("convert_to_byte"));
        base_program.add_kernel(ustring("convert_to_half_float"));
-       base_program.add_kernel(ustring("shader"));
+       base_program.add_kernel(ustring("displace"));
+       base_program.add_kernel(ustring("background"));
        base_program.add_kernel(ustring("bake"));
        base_program.add_kernel(ustring("zero_buffer"));
 
@@ -1112,7 +1113,6 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
        cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
        cl_mem d_input = CL_MEM_PTR(task.shader_input);
        cl_mem d_output = CL_MEM_PTR(task.shader_output);
-       cl_mem d_output_luma = CL_MEM_PTR(task.shader_output_luma);
        cl_int d_shader_eval_type = task.shader_eval_type;
        cl_int d_shader_filter = task.shader_filter;
        cl_int d_shader_x = task.shader_x;
@@ -1121,10 +1121,15 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
 
        cl_kernel kernel;
 
-       if(task.shader_eval_type >= SHADER_EVAL_BAKE)
+       if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
                kernel = base_program(ustring("bake"));
-       else
-               kernel = base_program(ustring("shader"));
+       }
+       else if(task.shader_eval_type >= SHADER_EVAL_DISPLACE) {
+               kernel = base_program(ustring("displace"));
+       }
+       else {
+               kernel = base_program(ustring("background"));
+       }
 
        cl_uint start_arg_index =
                kernel_set_args(kernel,
@@ -1133,12 +1138,6 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
                                d_input,
                                d_output);
 
-       if(task.shader_eval_type < SHADER_EVAL_BAKE) {
-               start_arg_index += kernel_set_args(kernel,
-                                                  start_arg_index,
-                                                  d_output_luma);
-       }
-
        set_kernel_arg_buffers(kernel, &start_arg_index);
 
        start_arg_index += kernel_set_args(kernel,
index 0d10e17a5936d98008563ff08b24513dc4c51a29..84d8d84d48633db7fbadef761c376a576e2ee543 100644 (file)
@@ -493,78 +493,69 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
 
 #endif  /* __BAKING__ */
 
-ccl_device void kernel_shader_evaluate(KernelGlobals *kg,
-                                       ccl_global uint4 *input,
-                                       ccl_global float4 *output,
-                                       ccl_global float *output_luma,
-                                       ShaderEvalType type,
-                                       int i,
-                                       int sample)
+ccl_device void kernel_displace_evaluate(KernelGlobals *kg,
+                                         ccl_global uint4 *input,
+                                         ccl_global float4 *output,
+                                         int i)
 {
        ShaderData sd;
        PathState state = {0};
        uint4 in = input[i];
-       float3 out;
 
-       if(type == SHADER_EVAL_DISPLACE) {
-               /* setup shader data */
-               int object = in.x;
-               int prim = in.y;
-               float u = __uint_as_float(in.z);
-               float v = __uint_as_float(in.w);
+       /* setup shader data */
+       int object = in.x;
+       int prim = in.y;
+       float u = __uint_as_float(in.z);
+       float v = __uint_as_float(in.w);
 
-               shader_setup_from_displace(kg, &sd, object, prim, u, v);
+       shader_setup_from_displace(kg, &sd, object, prim, u, v);
 
-               /* evaluate */
-               float3 P = sd.P;
-               shader_eval_displacement(kg, &sd, &state);
-               out = sd.P - P;
+       /* evaluate */
+       float3 P = sd.P;
+       shader_eval_displacement(kg, &sd, &state);
+       float3 D = sd.P - P;
 
-               object_inverse_dir_transform(kg, &sd, &out);
-       }
-       else { // SHADER_EVAL_BACKGROUND
-               /* setup ray */
-               Ray ray;
-               float u = __uint_as_float(in.x);
-               float v = __uint_as_float(in.y);
-
-               ray.P = make_float3(0.0f, 0.0f, 0.0f);
-               ray.D = equirectangular_to_direction(u, v);
-               ray.t = 0.0f;
+       object_inverse_dir_transform(kg, &sd, &D);
+
+       /* write output */
+       output[i] += make_float4(D.x, D.y, D.z, 0.0f);
+}
+
+ccl_device void kernel_background_evaluate(KernelGlobals *kg,
+                                           ccl_global uint4 *input,
+                                           ccl_global float4 *output,
+                                           int i)
+{
+       ShaderData sd;
+       PathState state = {0};
+       uint4 in = input[i];
+
+       /* setup ray */
+       Ray ray;
+       float u = __uint_as_float(in.x);
+       float v = __uint_as_float(in.y);
+
+       ray.P = make_float3(0.0f, 0.0f, 0.0f);
+       ray.D = equirectangular_to_direction(u, v);
+       ray.t = 0.0f;
 #ifdef __CAMERA_MOTION__
-               ray.time = 0.5f;
+       ray.time = 0.5f;
 #endif
 
 #ifdef __RAY_DIFFERENTIALS__
-               ray.dD = differential3_zero();
-               ray.dP = differential3_zero();
+       ray.dD = differential3_zero();
+       ray.dP = differential3_zero();
 #endif
 
-               /* setup shader data */
-               shader_setup_from_background(kg, &sd, &ray);
+       /* setup shader data */
+       shader_setup_from_background(kg, &sd, &ray);
+
+       /* evaluate */
+       int flag = 0; /* we can't know which type of BSDF this is for */
+       float3 color = shader_eval_background(kg, &sd, &state, flag);
 
-               /* evaluate */
-               int flag = 0; /* we can't know which type of BSDF this is for */
-               out = shader_eval_background(kg, &sd, &state, flag);
-       }
-       
        /* write output */
-       if(sample == 0) {
-               if(output != NULL) {
-                       output[i] = make_float4(out.x, out.y, out.z, 0.0f);
-               }
-               if(output_luma != NULL) {
-                       output_luma[i] = average(out);
-               }
-       }
-       else {
-               if(output != NULL) {
-                       output[i] += make_float4(out.x, out.y, out.z, 0.0f);
-               }
-               if(output_luma != NULL) {
-                       output_luma[i] += average(out);
-               }
-       }
+       output[i] += make_float4(color.x, color.y, color.z, 0.0f);
 }
 
 CCL_NAMESPACE_END
index eeb4eb0097f876c8d6140ed5f113686ba3b07e8c..695d4fc380a0d1292f5e5a0ecba3cda3bc5a92ee 100644 (file)
@@ -1204,7 +1204,7 @@ ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_
 #ifdef __SVM__
 #  ifdef __OSL__
        if(kg->osl)
-               OSLShader::eval_displacement(kg, sd);
+               OSLShader::eval_displacement(kg, sd, state);
        else
 #  endif
        {
index f5ebf4ad73fc40478176530adfb982078fb9ca3d..6bdb8546a2487bce066407532dedd532de5169f4 100644 (file)
@@ -41,7 +41,6 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
 void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
                                        uint4 *input,
                                        float4 *output,
-                                       float *output_luma,
                                        int type,
                                        int filter,
                                        int i,
index 3fefc1b7e9c6da9cdccf23a58e657c2c3a4380da..fdeb7dcd3e409703cb3ae6c0cec52af133b14db4 100644 (file)
@@ -149,7 +149,6 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
 void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
                                        uint4 *input,
                                        float4 *output,
-                                       float *output_luma,
                                        int type,
                                        int filter,
                                        int i,
@@ -160,7 +159,6 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
        STUB_ASSERT(KERNEL_ARCH, shader);
 #else
        if(type >= SHADER_EVAL_BAKE) {
-               kernel_assert(output_luma == NULL);
 #  ifdef __BAKING__
                kernel_bake_evaluate(kg,
                                     input,
@@ -172,14 +170,11 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
                                     sample);
 #  endif
        }
+       else if(type == SHADER_EVAL_DISPLACE) {
+               kernel_displace_evaluate(kg, input, output, i);
+       }
        else {
-               kernel_shader_evaluate(kg,
-                                      input,
-                                      output,
-                                      output_luma,
-                                      (ShaderEvalType)type,
-                                      i,
-                                      sample);
+               kernel_background_evaluate(kg, input, output, i);
        }
 #endif /* KERNEL_STUB */
 }
index e72edfa7bdf6c08e1727ec0ba58d34e079eaefe4..1ac6afd167a4a160f763d469144396da775dfc9a 100644 (file)
@@ -91,26 +91,37 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal
 
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_shader(uint4 *input,
-                   float4 *output,
-                   float *output_luma,
-                   int type,
-                   int sx,
-                   int sw,
-                   int offset,
-                   int sample)
+kernel_cuda_displace(uint4 *input,
+                     float4 *output,
+                     int type,
+                     int sx,
+                     int sw,
+                     int offset,
+                     int sample)
 {
        int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
 
        if(x < sx + sw) {
                KernelGlobals kg;
-               kernel_shader_evaluate(&kg,
-                                      input,
-                                      output,
-                                      output_luma,
-                                      (ShaderEvalType)type, 
-                                      x,
-                                      sample);
+               kernel_displace_evaluate(&kg, input, output, x);
+       }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_background(uint4 *input,
+                       float4 *output,
+                       int type,
+                       int sx,
+                       int sw,
+                       int offset,
+                       int sample)
+{
+       int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
+
+       if(x < sx + sw) {
+               KernelGlobals kg;
+               kernel_background_evaluate(&kg, input, output, x);
        }
 }
 
index 521b86121ff550ea0497d9a54e8432bcf452776b..66b6e19de8428d8c8ad6079a1277fe07151d5c79 100644 (file)
@@ -72,11 +72,10 @@ __kernel void kernel_ocl_path_trace(
 
 #else  /* __COMPILE_ONLY_MEGAKERNEL__ */
 
-__kernel void kernel_ocl_shader(
+__kernel void kernel_ocl_displace(
        ccl_constant KernelData *data,
        ccl_global uint4 *input,
        ccl_global float4 *output,
-       ccl_global float *output_luma,
 
        KERNEL_BUFFER_PARAMS,
 
@@ -92,13 +91,29 @@ __kernel void kernel_ocl_shader(
        int x = sx + ccl_global_id(0);
 
        if(x < sx + sw) {
-               kernel_shader_evaluate(kg,
-                                      input,
-                                      output,
-                                      output_luma,
-                                      (ShaderEvalType)type,
-                                      x,
-                                      sample);
+               kernel_displace_evaluate(kg, input, output, x);
+       }
+}
+__kernel void kernel_ocl_background(
+       ccl_constant KernelData *data,
+       ccl_global uint4 *input,
+       ccl_global float4 *output,
+
+       KERNEL_BUFFER_PARAMS,
+
+       int type, int sx, int sw, int offset, int sample)
+{
+       KernelGlobals kglobals, *kg = &kglobals;
+
+       kg->data = data;
+
+       kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+       kernel_set_buffer_info(kg);
+
+       int x = sx + ccl_global_id(0);
+
+       if(x < sx + sw) {
+               kernel_background_evaluate(kg, input, output, x);
        }
 }
 
index 9a37e0987aa096554e1cd94ed52b2a0fd21e155d..6b3a996ca12a56254c051e61a1767d0a68ad325e 100644 (file)
@@ -348,14 +348,12 @@ void OSLShader::eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state,
 
 /* Displacement */
 
-void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd)
+void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd, PathState *state)
 {
        /* setup shader globals from shader data */
        OSLThreadData *tdata = kg->osl_tdata;
 
-       PathState state = {0};
-
-       shaderdata_to_shaderglobals(kg, sd, &state, 0, tdata);
+       shaderdata_to_shaderglobals(kg, sd, state, 0, tdata);
 
        /* execute shader */
        OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
index f7020d1223d09a5a7e9f73318331ab491048ed39..6b392b25cf77cf1224dc5d7a12510f247cfbd16c 100644 (file)
@@ -56,7 +56,7 @@ public:
        static void eval_surface(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
        static void eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
        static void eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag);
-       static void eval_displacement(KernelGlobals *kg, ShaderData *sd);
+       static void eval_displacement(KernelGlobals *kg, ShaderData *sd, PathState *state);
 
        /* attributes */
        static int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc);
index c0fcd517390908adf644744d29be5ac97c373694..2bedf3668f7af9343f7b30e3839170bb5f30c6b5 100644 (file)
@@ -174,6 +174,7 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
                device->mem_alloc("bake_input", d_input, MEM_READ_ONLY);
                device->mem_copy_to(d_input);
                device->mem_alloc("bake_output", d_output, MEM_READ_WRITE);
+               device->mem_zero(d_output);
 
                DeviceTask task(DeviceTask::SHADER);
                task.shader_input = d_input.device_pointer;
index 4adc00bc8391ccc69d048ec346953393d9018754..6a7f985b756f51cd623f3003a34c19b22ca1462d 100644 (file)
@@ -60,6 +60,7 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res
        device->mem_alloc("shade_background_pixels_input", d_input, MEM_READ_ONLY);
        device->mem_copy_to(d_input);
        device->mem_alloc("shade_background_pixels_output", d_output, MEM_WRITE_ONLY);
+       device->mem_zero(d_output);
 
        DeviceTask main_task(DeviceTask::SHADER);
        main_task.shader_input = d_input.device_pointer;
index 4ca20cf7ef3e2de95db0a91948aed42f188a1113..350a56bf185a3fb1ec0514ef7c1bd3fa1b4979e8 100644 (file)
@@ -124,6 +124,7 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me
        device->mem_alloc("displace_input", d_input, MEM_READ_ONLY);
        device->mem_copy_to(d_input);
        device->mem_alloc("displace_output", d_output, MEM_WRITE_ONLY);
+       device->mem_zero(d_output);
 
        DeviceTask task(DeviceTask::SHADER);
        task.shader_input = d_input.device_pointer;