Cycles Denoising: Add more robust outlier heuristic to avoid artifacts
authorLukas Stockner <lukas.stockner@freenet.de>
Thu, 18 May 2017 01:03:18 +0000 (03:03 +0200)
committerLukas Stockner <lukas.stockner@freenet.de>
Thu, 18 May 2017 19:55:56 +0000 (21:55 +0200)
Extremely bright pixels in the rendered image cause the denoising algorithm
to produce extremely noticable artifacts. Therefore, a heuristic is needed
to exclude these pixels from the filtering process.

The new approach calculates the 75% percentile of the 5x5 neighborhood of
each pixel and flags the pixel if it is more than twice as bright.

During the reconstruction process, flagged pixels are skipped. Therefore,
they don't cause any problems for neighboring pixels, and the outlier pixels
themselves are replaced by a prediction of their actual value based on their
feature pass values and the neighboring pixels.

Therefore, the denoiser now also works as a smarter despeckling filter that
uses a more accurate prediction of the pixel instead of a simple average.
This can be used even if denoising isn't wanted by setting the denoising
radius to 1.

14 files changed:
intern/cycles/device/device_cpu.cpp
intern/cycles/device/device_cuda.cpp
intern/cycles/device/device_denoising.cpp
intern/cycles/device/device_denoising.h
intern/cycles/device/opencl/opencl.h
intern/cycles/device/opencl/opencl_base.cpp
intern/cycles/kernel/filter/filter_features.h
intern/cycles/kernel/filter/filter_features_sse.h
intern/cycles/kernel/filter/filter_prefilter.h
intern/cycles/kernel/filter/filter_reconstruction.h
intern/cycles/kernel/kernels/cpu/filter_cpu.h
intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
intern/cycles/kernel/kernels/cuda/filter.cu
intern/cycles/kernel/kernels/opencl/filter.cl

index e219ce3cb8e8ee0560b9183d492db85d1a6c0f03..fea5504904906bd90e62418322a3a0ff757b994a 100644 (file)
@@ -176,6 +176,7 @@ public:
 
        KernelFunctions<void(*)(int, TilesInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int, bool)> filter_divide_shadow_kernel;
        KernelFunctions<void(*)(int, TilesInfo*, int, int, int, int, float*, float*, int*, int, int, bool)>               filter_get_feature_kernel;
+       KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)>                                     filter_detect_outliers_kernel;
        KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)>                                     filter_combine_halves_kernel;
 
        KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel;
@@ -210,6 +211,7 @@ public:
          REGISTER_KERNEL(shader),
          REGISTER_KERNEL(filter_divide_shadow),
          REGISTER_KERNEL(filter_get_feature),
+         REGISTER_KERNEL(filter_detect_outliers),
          REGISTER_KERNEL(filter_combine_halves),
          REGISTER_KERNEL(filter_nlm_calc_difference),
          REGISTER_KERNEL(filter_nlm_blur),
@@ -594,6 +596,26 @@ public:
                return true;
        }
 
+       bool denoising_detect_outliers(device_ptr image_ptr,
+                                      device_ptr variance_ptr,
+                                      device_ptr depth_ptr,
+                                      device_ptr output_ptr,
+                                      DenoisingTask *task)
+       {
+               for(int y = task->rect.y; y < task->rect.w; y++) {
+                       for(int x = task->rect.x; x < task->rect.z; x++) {
+                               filter_detect_outliers_kernel()(x, y,
+                                                               (float*) image_ptr,
+                                                               (float*) variance_ptr,
+                                                               (float*) depth_ptr,
+                                                               (float*) output_ptr,
+                                                               &task->rect.x,
+                                                               task->buffer.pass_stride);
+                       }
+               }
+               return true;
+       }
+
        void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
        {
                float *render_buffer = (float*)tile.buffer;
@@ -632,6 +654,7 @@ public:
                denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
                denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
                denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
+               denoising.functions.detect_outliers = function_bind(&CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
                denoising.functions.set_tiles = function_bind(&CPUDevice::denoising_set_tiles, this, _1, &denoising);
 
                denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h);
index 968ee5bc487e6e664e6a1a3f0d3c033732c1c976..4d8ca487799ff0334968d942d9aec4a81beb6f7a 100644 (file)
@@ -1248,6 +1248,38 @@ public:
                return !have_error();
        }
 
+       bool denoising_detect_outliers(device_ptr image_ptr,
+                                      device_ptr variance_ptr,
+                                      device_ptr depth_ptr,
+                                      device_ptr output_ptr,
+                                      DenoisingTask *task)
+       {
+               if(have_error())
+                       return false;
+
+               cuda_push_context();
+
+               CUfunction cuFilterDetectOutliers;
+               cuda_assert(cuModuleGetFunction(&cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers"));
+               cuda_assert(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1));
+               CUDA_GET_BLOCKSIZE(cuFilterDetectOutliers,
+                                  task->rect.z-task->rect.x,
+                                  task->rect.w-task->rect.y);
+
+               void *args[] = {&image_ptr,
+                               &variance_ptr,
+                               &depth_ptr,
+                               &output_ptr,
+                               &task->rect,
+                               &task->buffer.pass_stride};
+
+               CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args);
+               cuda_assert(cuCtxSynchronize());
+
+               cuda_pop_context();
+               return !have_error();
+       }
+
        void denoise(RenderTile &rtile, const DeviceTask &task)
        {
                DenoisingTask denoising(this);
@@ -1258,6 +1290,7 @@ public:
                denoising.functions.non_local_means = function_bind(&CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
                denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
                denoising.functions.get_feature = function_bind(&CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
+               denoising.functions.detect_outliers = function_bind(&CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
                denoising.functions.set_tiles = function_bind(&CUDADevice::denoising_set_tiles, this, _1, &denoising);
 
                denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
index f5543d7c82dd030377aaaf608a79ceb8819dbe87..613bd9112cf3b890adbd9c4664e2211fde8084bc 100644 (file)
@@ -159,11 +159,25 @@ bool DenoisingTask::run_denoising()
                int mean_to[]       = { 8,  9, 10};
                int variance_to[]   = {11, 12, 13};
                int num_color_passes = 3;
+
+               device_only_memory<float> temp_color;
+               temp_color.resize(3*buffer.pass_stride);
+               device->mem_alloc("Denoising temporary color", temp_color, MEM_READ_WRITE);
+
                for(int pass = 0; pass < num_color_passes; pass++) {
-                       device_sub_ptr color_pass    (device, buffer.mem,     mean_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
+                       device_sub_ptr color_pass(device, temp_color, pass*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
                        device_sub_ptr color_var_pass(device, buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
                        functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass);
                }
+
+               {
+                       device_sub_ptr depth_pass    (device, buffer.mem,                                 0,   buffer.pass_stride, MEM_READ_WRITE);
+                       device_sub_ptr color_var_pass(device, buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
+                       device_sub_ptr output_pass   (device, buffer.mem,     mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
+                       functions.detect_outliers(temp_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
+               }
+
+               device->mem_free(temp_color);
        }
 
        storage.w = filter_area.z;
index 86d8eb643860843049b367c4eb575e242c11c6e4..25b93c2ad74d0c253d4577ae8d1c1d83055b152a 100644 (file)
@@ -82,6 +82,11 @@ public:
                              device_ptr mean_ptr,
                              device_ptr variance_ptr
                              )> get_feature;
+               function<bool(device_ptr image_ptr,
+                             device_ptr variance_ptr,
+                             device_ptr depth_ptr,
+                             device_ptr output_ptr
+                             )> detect_outliers;
                function<bool(device_ptr*)> set_tiles;
        } functions;
 
index a458ca6bf646bf7b0b1f3b5dd594e577e24873df..27e196d1e684bd84a0a0f65b1e56ae1233d321eb 100644 (file)
@@ -411,6 +411,11 @@ protected:
                                   device_ptr mean_ptr,
                                   device_ptr variance_ptr,
                                   DenoisingTask *task);
+       bool denoising_detect_outliers(device_ptr image_ptr,
+                                      device_ptr variance_ptr,
+                                      device_ptr depth_ptr,
+                                      device_ptr output_ptr,
+                                      DenoisingTask *task);
        bool denoising_set_tiles(device_ptr *buffers,
                                 DenoisingTask *task);
 
index ae1a7b917c39b8c89da3a2c1483b9268f85f354e..8f33014a00e10628a6423854b2252474388c0123 100644 (file)
@@ -216,6 +216,7 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
        denoising_program = OpenCLProgram(this, "denoising", "filter.cl", "");
        denoising_program.add_kernel(ustring("filter_divide_shadow"));
        denoising_program.add_kernel(ustring("filter_get_feature"));
+       denoising_program.add_kernel(ustring("filter_detect_outliers"));
        denoising_program.add_kernel(ustring("filter_combine_halves"));
        denoising_program.add_kernel(ustring("filter_construct_transform"));
        denoising_program.add_kernel(ustring("filter_nlm_calc_difference"));
@@ -910,6 +911,33 @@ bool OpenCLDeviceBase::denoising_get_feature(int mean_offset,
        return true;
 }
 
+bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr,
+                                                 device_ptr variance_ptr,
+                                                 device_ptr depth_ptr,
+                                                 device_ptr output_ptr,
+                                                 DenoisingTask *task)
+{
+       cl_mem image_mem = CL_MEM_PTR(image_ptr);
+       cl_mem variance_mem = CL_MEM_PTR(variance_ptr);
+       cl_mem depth_mem = CL_MEM_PTR(depth_ptr);
+       cl_mem output_mem = CL_MEM_PTR(output_ptr);
+
+       cl_kernel ckFilterDetectOutliers = denoising_program(ustring("filter_detect_outliers"));
+
+       kernel_set_args(ckFilterDetectOutliers, 0,
+                       image_mem,
+                       variance_mem,
+                       depth_mem,
+                       output_mem,
+                       task->rect,
+                       task->buffer.pass_stride);
+       enqueue_kernel(ckFilterDetectOutliers,
+                      task->rect.z-task->rect.x,
+                      task->rect.w-task->rect.y);
+
+       return true;
+}
+
 bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers,
                                            DenoisingTask *task)
 {
@@ -942,6 +970,7 @@ void OpenCLDeviceBase::denoise(RenderTile &rtile, const DeviceTask &task)
        denoising.functions.non_local_means = function_bind(&OpenCLDeviceBase::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
        denoising.functions.combine_halves = function_bind(&OpenCLDeviceBase::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
        denoising.functions.get_feature = function_bind(&OpenCLDeviceBase::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
+       denoising.functions.detect_outliers = function_bind(&OpenCLDeviceBase::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
 
        denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
        denoising.render_buffer.samples = rtile.sample;
index f5a40d4999743c9e4bd54c7d4fe4649dde08f225..41998c792b69c8c32e2d5593c5728fc48add7448 100644 (file)
@@ -16,7 +16,7 @@
 
  CCL_NAMESPACE_BEGIN
 
-#define ccl_get_feature(buffer, pass) buffer[(pass)*pass_stride]
+#define ccl_get_feature(buffer, pass) (buffer)[(pass)*pass_stride]
 
 /* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).
  * pixel_buffer always points to the current pixel in the first pass. */
@@ -32,7 +32,7 @@ ccl_device_inline void filter_get_features(int2 pixel, ccl_global float ccl_rest
 {
        features[0] = pixel.x;
        features[1] = pixel.y;
-       features[2] = ccl_get_feature(buffer, 0);
+       features[2] = fabsf(ccl_get_feature(buffer, 0));
        features[3] = ccl_get_feature(buffer, 1);
        features[4] = ccl_get_feature(buffer, 2);
        features[5] = ccl_get_feature(buffer, 3);
@@ -50,7 +50,7 @@ ccl_device_inline void filter_get_feature_scales(int2 pixel, ccl_global float cc
 {
        scales[0] = fabsf(pixel.x - mean[0]);
        scales[1] = fabsf(pixel.y - mean[1]);
-       scales[2] = fabsf(ccl_get_feature(buffer, 0) - mean[2]);
+       scales[2] = fabsf(fabsf(ccl_get_feature(buffer, 0)) - mean[2]);
        scales[3] = len_squared(make_float3(ccl_get_feature(buffer, 1) - mean[3],
                                            ccl_get_feature(buffer, 2) - mean[4],
                                            ccl_get_feature(buffer, 3) - mean[5]));
@@ -107,7 +107,7 @@ ccl_device_inline void filter_get_design_row_transform(int2 p_pixel,
        math_vector_zero(design_row+1, rank);
        design_row_add(design_row, rank, transform, stride, 0, q_pixel.x - p_pixel.x);
        design_row_add(design_row, rank, transform, stride, 1, q_pixel.y - p_pixel.y);
-       design_row_add(design_row, rank, transform, stride, 2, ccl_get_feature(q_buffer, 0) - ccl_get_feature(p_buffer, 0));
+       design_row_add(design_row, rank, transform, stride, 2, fabsf(ccl_get_feature(q_buffer, 0)) - fabsf(ccl_get_feature(p_buffer, 0)));
        design_row_add(design_row, rank, transform, stride, 3, ccl_get_feature(q_buffer, 1) - ccl_get_feature(p_buffer, 1));
        design_row_add(design_row, rank, transform, stride, 4, ccl_get_feature(q_buffer, 2) - ccl_get_feature(p_buffer, 2));
        design_row_add(design_row, rank, transform, stride, 5, ccl_get_feature(q_buffer, 3) - ccl_get_feature(p_buffer, 3));
index 303c8f482e3ad64e2b077ced9994110ea7e87b22..a242a8ed0a118246ffe67662b289e58f8dce5e1a 100644 (file)
@@ -37,7 +37,7 @@ ccl_device_inline void filter_get_features_sse(__m128 x, __m128 y, __m128 active
 {
        features[0] = x;
        features[1] = y;
-       features[2] = ccl_get_feature_sse(0);
+       features[2] = _mm_fabs_ps(ccl_get_feature_sse(0));
        features[3] = ccl_get_feature_sse(1);
        features[4] = ccl_get_feature_sse(2);
        features[5] = ccl_get_feature_sse(3);
@@ -58,7 +58,7 @@ ccl_device_inline void filter_get_feature_scales_sse(__m128 x, __m128 y, __m128
        scales[0] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(x, mean[0])), active_pixels);
        scales[1] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(y, mean[1])), active_pixels);
 
-       scales[2] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(ccl_get_feature_sse(0), mean[2])), active_pixels);
+       scales[2] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(_mm_fabs_ps(ccl_get_feature_sse(0)), mean[2])), active_pixels);
 
        __m128 diff, scale;
        diff = _mm_sub_ps(ccl_get_feature_sse(1), mean[3]);
index 54bcf8880523626d130082e29740806d7755e50d..252bcc5e675ee88f1322de78d80611476a62bde6 100644 (file)
@@ -104,6 +104,57 @@ ccl_device void kernel_filter_get_feature(int sample,
        }
 }
 
+ccl_device void kernel_filter_detect_outliers(int x, int y,
+                                              ccl_global float *image,
+                                              ccl_global float *variance,
+                                              ccl_global float *depth,
+                                              ccl_global float *out,
+                                              int4 rect,
+                                              int pass_stride)
+{
+       int buffer_w = align_up(rect.z - rect.x, 4);
+
+       int n = 0;
+       float values[25];
+       for(int y1 = max(y-2, rect.y); y1 < min(y+3, rect.w); y1++) {
+               for(int x1 = max(x-2, rect.x); x1 < min(x+3, rect.z); x1++) {
+                       int idx = (y1-rect.y)*buffer_w + (x1-rect.x);
+                       float L = average(make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]));
+
+                       /* Find the position of L. */
+                       int i;
+                       for(i = 0; i < n; i++) {
+                               if(values[i] > L) break;
+                       }
+                       /* Make space for L by shifting all following values to the right. */
+                       for(int j = n; j > i; j--) {
+                               values[j] = values[j-1];
+                       }
+                       /* Insert L. */
+                       values[i] = L;
+                       n++;
+               }
+       }
+
+       int idx = (y-rect.y)*buffer_w + (x-rect.x);
+       float L = average(make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]));
+
+       float ref = 2.0f*values[(int)(n*0.75f)];
+       float fac = 1.0f;
+       if(L > ref) {
+               /* If the pixel is an outlier, negate the depth value to mark it as one.
+                * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
+               depth[idx] = -depth[idx];
+               fac = ref/L;
+               variance[idx              ] *= fac*fac;
+               variance[idx + pass_stride] *= fac*fac;
+               variance[idx+2*pass_stride] *= fac*fac;
+       }
+       out[idx              ] = fac*image[idx];
+       out[idx + pass_stride] = fac*image[idx + pass_stride];
+       out[idx+2*pass_stride] = fac*image[idx+2*pass_stride];
+}
+
 /* Combine A/B buffers.
  * Calculates the combined mean and the buffer variance. */
 ccl_device void kernel_filter_combine_halves(int x, int y,
index 02f3802fa0cda2897c11323662d2a77d806f6f17..6a7c86e401299a01519022fd310aa0425a751af5 100644 (file)
@@ -54,7 +54,10 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
        float p_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + p_offset, pass_stride));
        float q_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + q_offset, pass_stride));
 
-       if(average(fabs(p_color - q_color)) > 3.0f*(p_std_dev + q_std_dev + 1e-3f)) {
+       /* If the pixel was flagged as an outlier during prefiltering, skip it.
+        * Otherwise, perform the regular confidence interval test. */
+       if(ccl_get_feature(buffer + q_offset, 0) < 0.0f ||
+          average(fabs(p_color - q_color)) > 2.0f*(p_std_dev + q_std_dev + 1e-3f)) {
                return;
        }
 
index 10007ee2635870fe7a04031bb8dd3647a06dc187..9708b4b5b58f7dd2b1f692d7f54cff71dff99530 100644 (file)
@@ -43,6 +43,14 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
                                                    int buffer_denoising_offset,
                                                    bool use_split_variance);
 
+void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
+                                                       ccl_global float *image,
+                                                       ccl_global float *variance,
+                                                       ccl_global float *depth,
+                                                       ccl_global float *output,
+                                                       int *rect,
+                                                       int pass_stride);
+
 void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
                                                       float *mean,
                                                       float *variance,
index 3b71e50ca3b2493a4e45abfbfd7b30568878d991..15325abdccde637321c20c0d7d98c2fa2658436a 100644 (file)
@@ -91,6 +91,21 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
 #endif
 }
 
+void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
+                                                       ccl_global float *image,
+                                                       ccl_global float *variance,
+                                                       ccl_global float *depth,
+                                                       ccl_global float *output,
+                                                       int *rect,
+                                                       int pass_stride)
+{
+#ifdef KERNEL_STUB
+       STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers);
+#else
+       kernel_filter_detect_outliers(x, y, image, variance, depth, output, load_int4(rect), pass_stride);
+#endif
+}
+
 void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
                                                       float *mean,
                                                       float *variance,
index 50f73f9728d98fe33738c4794abcd78ac885f371..f812a6601c6c93891313038b69b4a8d10a99ab5d 100644 (file)
@@ -86,6 +86,22 @@ kernel_cuda_filter_get_feature(int sample,
        }
 }
 
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_filter_detect_outliers(float *image,
+                                   float *variance,
+                                   float *depth,
+                                   float *output,
+                                   int4 prefilter_rect,
+                                   int pass_stride)
+{
+       int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
+       int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
+       if(x < prefilter_rect.z && y < prefilter_rect.w) {
+               kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride);
+       }
+}
+
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
 kernel_cuda_filter_combine_halves(float *mean, float *variance, float *a, float *b, int4 prefilter_rect, int r)
index 3d82bff9892303c55b70f509f6bf6cd77b746f22..fbc3daa62b9eb5aa3f8a83a41e246fd65b779121 100644 (file)
@@ -78,6 +78,20 @@ __kernel void kernel_ocl_filter_get_feature(int sample,
        }
 }
 
+__kernel void kernel_ocl_filter_detect_outliers(ccl_global float *image,
+                                                ccl_global float *variance,
+                                                ccl_global float *depth,
+                                                ccl_global float *output,
+                                                int4 prefilter_rect,
+                                                int pass_stride)
+{
+       int x = prefilter_rect.x + get_global_id(0);
+       int y = prefilter_rect.y + get_global_id(1);
+       if(x < prefilter_rect.z && y < prefilter_rect.w) {
+               kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride);
+       }
+}
+
 __kernel void kernel_ocl_filter_combine_halves(ccl_global float *mean,
                                                ccl_global float *variance,
                                                ccl_global float *a,