Cycles: Improve denoising speed on GPUs with small tile sizes
[blender.git] / intern / cycles / kernel / filter / filter_reconstruction.h
index d6d7639a22d0461d4baec9f7c5a62715e2209c38..b7bf322f9ceb12f889d6690e9b5c5e2bb9fea9bd 100644 (file)
@@ -19,7 +19,7 @@ CCL_NAMESPACE_BEGIN
 ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
                                                        int storage_stride,
                                                        int dx, int dy,
-                                                       int w, int h,
+                                                       int buffer_stride,
                                                        int pass_stride,
                                                        const ccl_global float *ccl_restrict buffer,
                                                        const ccl_global float *ccl_restrict transform,
@@ -33,8 +33,8 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
                return;
        }
 
-       int p_offset =  y    *w +  x;
-       int q_offset = (y+dy)*w + (x+dx);
+       int p_offset =  y     * buffer_stride +  x;
+       int q_offset = (y+dy) * buffer_stride + (x+dx);
 
 #ifdef __KERNEL_GPU__
        const int stride = storage_stride;
@@ -65,7 +65,7 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
        math_vec3_add_strided(XtWY, (*rank)+1, design_row, weight * q_color, stride);
 }
 
-ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h,
+ccl_device_inline void kernel_filter_finalize(int x, int y,
                                               ccl_global float *buffer,
                                               ccl_global int *rank,
                                               int storage_stride,
@@ -81,6 +81,12 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h,
        (void) storage_stride;
 #endif
 
+       if(XtWX[0] < 1e-3f) {
+               /* There is not enough information to determine a denoised result.
+                * As a fallback, keep the original value of the pixel. */
+                return;
+       }
+
        /* The weighted average of pixel colors (essentially, the NLM-filtered image).
         * In case the solution of the linear model fails due to numerical issues,
         * fall back to this value. */
@@ -93,6 +99,9 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h,
                final_color = mean_color;
        }
 
+       /* Clamp pixel value to positive values. */
+       final_color = max(final_color, make_float3(0.0f, 0.0f, 0.0f));
+
        ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z;
        final_color *= sample;
        if(buffer_params.w) {