2 * Copyright 2011-2017 Blender Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 ccl_device_inline void kernel_filter_nlm_calc_difference(int x, int y,
21 const ccl_global float *ccl_restrict weight_image,
22 const ccl_global float *ccl_restrict variance_image,
23 ccl_global float *difference_image,
29 int numChannels = channel_offset? 3 : 1;
30 for(int c = 0; c < numChannels; c++) {
31 float cdiff = weight_image[c*channel_offset + y*w+x] - weight_image[c*channel_offset + (y+dy)*w+(x+dx)];
32 float pvar = variance_image[c*channel_offset + y*w+x];
33 float qvar = variance_image[c*channel_offset + (y+dy)*w+(x+dx)];
34 diff += (cdiff*cdiff - a*(pvar + min(pvar, qvar))) / (1e-8f + k_2*(pvar+qvar));
37 diff *= 1.0f/numChannels;
39 difference_image[y*w+x] = diff;
42 ccl_device_inline void kernel_filter_nlm_blur(int x, int y,
43 const ccl_global float *ccl_restrict difference_image,
44 ccl_global float *out_image,
45 int4 rect, int w, int f)
48 const int low = max(rect.y, y-f);
49 const int high = min(rect.w, y+f+1);
50 for(int y1 = low; y1 < high; y1++) {
51 sum += difference_image[y1*w+x];
53 sum *= 1.0f/(high-low);
54 out_image[y*w+x] = sum;
57 ccl_device_inline void kernel_filter_nlm_calc_weight(int x, int y,
58 const ccl_global float *ccl_restrict difference_image,
59 ccl_global float *out_image,
60 int4 rect, int w, int f)
63 const int low = max(rect.x, x-f);
64 const int high = min(rect.z, x+f+1);
65 for(int x1 = low; x1 < high; x1++) {
66 sum += difference_image[y*w+x1];
68 sum *= 1.0f/(high-low);
69 out_image[y*w+x] = fast_expf(-max(sum, 0.0f));
72 ccl_device_inline void kernel_filter_nlm_update_output(int x, int y,
74 const ccl_global float *ccl_restrict difference_image,
75 const ccl_global float *ccl_restrict image,
76 ccl_global float *out_image,
77 ccl_global float *accum_image,
78 int4 rect, int w, int f)
81 const int low = max(rect.x, x-f);
82 const int high = min(rect.z, x+f+1);
83 for(int x1 = low; x1 < high; x1++) {
84 sum += difference_image[y*w+x1];
86 sum *= 1.0f/(high-low);
88 accum_image[y*w+x] += sum;
89 out_image[y*w+x] += sum*image[(y+dy)*w+(x+dx)];
92 accum_image[y*w+x] = sum;
96 ccl_device_inline void kernel_filter_nlm_construct_gramian(int fx, int fy,
98 const ccl_global float *ccl_restrict difference_image,
99 const ccl_global float *ccl_restrict buffer,
100 const ccl_global float *ccl_restrict transform,
101 ccl_global int *rank,
102 ccl_global float *XtWX,
103 ccl_global float3 *XtWY,
110 int y = fy + filter_rect.y;
111 int x = fx + filter_rect.x;
112 const int low = max(rect.x, x-f);
113 const int high = min(rect.z, x+f+1);
115 for(int x1 = low; x1 < high; x1++) {
116 sum += difference_image[y*w+x1];
118 float weight = sum * (1.0f/(high - low));
120 int storage_ofs = fy*filter_rect.z + fx;
121 transform += storage_ofs;
126 kernel_filter_construct_gramian(x, y,
127 filter_rect.z*filter_rect.w,
136 ccl_device_inline void kernel_filter_nlm_normalize(int x, int y,
137 ccl_global float *out_image,
138 const ccl_global float *ccl_restrict accum_image,
141 out_image[y*w+x] /= accum_image[y*w+x];