1a314b100be07f3b83bbf9cb744ef3d5016d1412
[blender.git] / intern / cycles / kernel / filter / filter_nlm_cpu.h
1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy, float ccl_restrict_ptr weightImage, float ccl_restrict_ptr varianceImage, float *differenceImage, int4 rect, int w, int channel_offset, float a, float k_2)
20 {
21         for(int y = rect.y; y < rect.w; y++) {
22                 for(int x = rect.x; x < rect.z; x++) {
23                         float diff = 0.0f;
24                         int numChannels = channel_offset? 3 : 1;
25                         for(int c = 0; c < numChannels; c++) {
26                                 float cdiff = weightImage[c*channel_offset + y*w+x] - weightImage[c*channel_offset + (y+dy)*w+(x+dx)];
27                                 float pvar = varianceImage[c*channel_offset + y*w+x];
28                                 float qvar = varianceImage[c*channel_offset + (y+dy)*w+(x+dx)];
29                                 diff += (cdiff*cdiff - a*(pvar + min(pvar, qvar))) / (1e-8f + k_2*(pvar+qvar));
30                         }
31                         if(numChannels > 1) {
32                                 diff *= 1.0f/numChannels;
33                         }
34                         differenceImage[y*w+x] = diff;
35                 }
36         }
37 }
38
39 ccl_device_inline void kernel_filter_nlm_blur(float ccl_restrict_ptr differenceImage, float *outImage, int4 rect, int w, int f)
40 {
41 #ifdef __KERNEL_SSE3__
42         int aligned_lowx = (rect.x & ~(3));
43         int aligned_highx = ((rect.z + 3) & ~(3));
44 #endif
45         for(int y = rect.y; y < rect.w; y++) {
46                 const int low = max(rect.y, y-f);
47                 const int high = min(rect.w, y+f+1);
48                 for(int x = rect.x; x < rect.z; x++) {
49                         outImage[y*w+x] = 0.0f;
50                 }
51                 for(int y1 = low; y1 < high; y1++) {
52 #ifdef __KERNEL_SSE3__
53                         for(int x = aligned_lowx; x < aligned_highx; x+=4) {
54                                 _mm_store_ps(outImage + y*w+x, _mm_add_ps(_mm_load_ps(outImage + y*w+x), _mm_load_ps(differenceImage + y1*w+x)));
55                         }
56 #else
57                         for(int x = rect.x; x < rect.z; x++) {
58                                 outImage[y*w+x] += differenceImage[y1*w+x];
59                         }
60 #endif
61                 }
62                 for(int x = rect.x; x < rect.z; x++) {
63                         outImage[y*w+x] *= 1.0f/(high - low);
64                 }
65         }
66 }
67
68 ccl_device_inline void kernel_filter_nlm_calc_weight(float ccl_restrict_ptr differenceImage, float *outImage, int4 rect, int w, int f)
69 {
70         for(int y = rect.y; y < rect.w; y++) {
71                 for(int x = rect.x; x < rect.z; x++) {
72                         outImage[y*w+x] = 0.0f;
73                 }
74         }
75         for(int dx = -f; dx <= f; dx++) {
76                 int pos_dx = max(0, dx);
77                 int neg_dx = min(0, dx);
78                 for(int y = rect.y; y < rect.w; y++) {
79                         for(int x = rect.x-neg_dx; x < rect.z-pos_dx; x++) {
80                                 outImage[y*w+x] += differenceImage[y*w+dx+x];
81                         }
82                 }
83         }
84         for(int y = rect.y; y < rect.w; y++) {
85                 for(int x = rect.x; x < rect.z; x++) {
86                         const int low = max(rect.x, x-f);
87                         const int high = min(rect.z, x+f+1);
88                         outImage[y*w+x] = expf(-max(outImage[y*w+x] * (1.0f/(high - low)), 0.0f));
89                 }
90         }
91 }
92
93 ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy, float ccl_restrict_ptr differenceImage, float ccl_restrict_ptr image, float *outImage, float *accumImage, int4 rect, int w, int f)
94 {
95         for(int y = rect.y; y < rect.w; y++) {
96                 for(int x = rect.x; x < rect.z; x++) {
97                         const int low = max(rect.x, x-f);
98                         const int high = min(rect.z, x+f+1);
99                         float sum = 0.0f;
100                         for(int x1 = low; x1 < high; x1++) {
101                                 sum += differenceImage[y*w+x1];
102                         }
103                         float weight = sum * (1.0f/(high - low));
104                         accumImage[y*w+x] += weight;
105                         outImage[y*w+x] += weight*image[(y+dy)*w+(x+dx)];
106                 }
107         }
108 }
109
110 ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy,
111                                                            float ccl_restrict_ptr differenceImage,
112                                                            float ccl_restrict_ptr buffer,
113                                                            float *color_pass,
114                                                            float *variance_pass,
115                                                            float *transform,
116                                                            int *rank,
117                                                            float *XtWX,
118                                                            float3 *XtWY,
119                                                            int4 rect,
120                                                            int4 filter_rect,
121                                                            int w, int h, int f,
122                                                            int pass_stride)
123 {
124         /* fy and fy are in filter-window-relative coordinates, while x and y are in feature-window-relative coordinates. */
125         for(int fy = max(0, rect.y-filter_rect.y); fy < min(filter_rect.w, rect.w-filter_rect.y); fy++) {
126                 int y = fy + filter_rect.y;
127                 for(int fx = max(0, rect.x-filter_rect.x); fx < min(filter_rect.z, rect.z-filter_rect.x); fx++) {
128                         int x = fx + filter_rect.x;
129                         const int low = max(rect.x, x-f);
130                         const int high = min(rect.z, x+f+1);
131                         float sum = 0.0f;
132                         for(int x1 = low; x1 < high; x1++) {
133                                 sum += differenceImage[y*w+x1];
134                         }
135                         float weight = sum * (1.0f/(high - low));
136
137                         int storage_ofs = fy*filter_rect.z + fx;
138                         float  *l_transform = transform + storage_ofs*TRANSFORM_SIZE;
139                         float  *l_XtWX = XtWX + storage_ofs*XTWX_SIZE;
140                         float3 *l_XtWY = XtWY + storage_ofs*XTWY_SIZE;
141                         int    *l_rank = rank + storage_ofs;
142
143                         kernel_filter_construct_gramian(x, y, 1,
144                                                         dx, dy, w, h,
145                                                         pass_stride,
146                                                         buffer,
147                                                         color_pass, variance_pass,
148                                                         l_transform, l_rank,
149                                                         weight, l_XtWX, l_XtWY, 0);
150                 }
151         }
152 }
153
154 ccl_device_inline void kernel_filter_nlm_normalize(float *outImage, float ccl_restrict_ptr accumImage, int4 rect, int w)
155 {
156         for(int y = rect.y; y < rect.w; y++) {
157                 for(int x = rect.x; x < rect.z; x++) {
158                         outImage[y*w+x] /= accumImage[y*w+x];
159                 }
160         }
161 }
162
163 CCL_NAMESPACE_END