2 * Copyright 2011-2017 Blender Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
24 const ccl_global float *ccl_restrict buffer,
25 ccl_global float *color_pass,
26 ccl_global float *variance_pass,
27 const ccl_global float *ccl_restrict transform,
30 ccl_global float *XtWX,
31 ccl_global float3 *XtWY,
34 int p_offset = y *w + x;
35 int q_offset = (y+dy)*w + (x+dx);
41 float design_row[DENOISE_FEATURES+1];
42 #elif defined(__KERNEL_CUDA__)
43 const int stride = storage_stride;
44 ccl_local float shared_design_row[(DENOISE_FEATURES+1)*CCL_MAX_LOCAL_SIZE];
45 ccl_local_param float *design_row = shared_design_row + localIdx*(DENOISE_FEATURES+1);
47 const int stride = storage_stride;
48 float design_row[DENOISE_FEATURES+1];
51 float3 p_color = filter_get_pixel_color(color_pass + p_offset, pass_stride);
52 float3 q_color = filter_get_pixel_color(color_pass + q_offset, pass_stride);
54 float p_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + p_offset, pass_stride));
55 float q_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + q_offset, pass_stride));
57 /* If the pixel was flagged as an outlier during prefiltering, skip it.
58 * Otherwise, perform the regular confidence interval test. */
59 if(ccl_get_feature(buffer + q_offset, 0) < 0.0f ||
60 average(fabs(p_color - q_color)) > 2.0f*(p_std_dev + q_std_dev + 1e-3f)) {
64 filter_get_design_row_transform(make_int2(x, y), buffer + p_offset,
65 make_int2(x+dx, y+dy), buffer + q_offset,
66 pass_stride, *rank, design_row, transform, stride);
68 math_trimatrix_add_gramian_strided(XtWX, (*rank)+1, design_row, weight, stride);
69 math_vec3_add_strided(XtWY, (*rank)+1, design_row, weight * q_color, stride);
72 ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h,
73 ccl_global float *buffer,
76 ccl_global float *XtWX,
77 ccl_global float3 *XtWY,
85 const int stride = storage_stride;
88 /* The weighted average of pixel colors (essentially, the NLM-filtered image).
89 * In case the solution of the linear model fails due to numerical issues,
90 * fall back to this value. */
91 float3 mean_color = XtWY[0]/XtWX[0];
93 math_trimatrix_vec3_solve(XtWX, XtWY, (*rank)+1, stride);
95 float3 final_color = XtWY[0];
96 if(!isfinite3_safe(final_color)) {
97 final_color = mean_color;
100 ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z;
101 final_color *= sample;
102 if(buffer_params.w) {
103 final_color.x += combined_buffer[buffer_params.w+0];
104 final_color.y += combined_buffer[buffer_params.w+1];
105 final_color.z += combined_buffer[buffer_params.w+2];
107 combined_buffer[0] = final_color.x;
108 combined_buffer[1] = final_color.y;
109 combined_buffer[2] = final_color.z;