\0;115;0cCycles: Cleanup, use ccl_restrict instead of ccl_restrict_ptr
[blender.git] / intern / cycles / kernel / filter / filter_nlm_cpu.h
1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy,
20                                                          const float *ccl_restrict weightImage,
21                                                          const float *ccl_restrict varianceImage,
22                                                          float *differenceImage,
23                                                          int4 rect,
24                                                          int w,
25                                                          int channel_offset,
26                                                          float a,
27                                                          float k_2)
28 {
29         for(int y = rect.y; y < rect.w; y++) {
30                 for(int x = rect.x; x < rect.z; x++) {
31                         float diff = 0.0f;
32                         int numChannels = channel_offset? 3 : 1;
33                         for(int c = 0; c < numChannels; c++) {
34                                 float cdiff = weightImage[c*channel_offset + y*w+x] - weightImage[c*channel_offset + (y+dy)*w+(x+dx)];
35                                 float pvar = varianceImage[c*channel_offset + y*w+x];
36                                 float qvar = varianceImage[c*channel_offset + (y+dy)*w+(x+dx)];
37                                 diff += (cdiff*cdiff - a*(pvar + min(pvar, qvar))) / (1e-8f + k_2*(pvar+qvar));
38                         }
39                         if(numChannels > 1) {
40                                 diff *= 1.0f/numChannels;
41                         }
42                         differenceImage[y*w+x] = diff;
43                 }
44         }
45 }
46
47 ccl_device_inline void kernel_filter_nlm_blur(const float *ccl_restrict differenceImage,
48                                               float *outImage,
49                                               int4 rect,
50                                               int w,
51                                               int f)
52 {
53 #ifdef __KERNEL_SSE3__
54         int aligned_lowx = (rect.x & ~(3));
55         int aligned_highx = ((rect.z + 3) & ~(3));
56 #endif
57         for(int y = rect.y; y < rect.w; y++) {
58                 const int low = max(rect.y, y-f);
59                 const int high = min(rect.w, y+f+1);
60                 for(int x = rect.x; x < rect.z; x++) {
61                         outImage[y*w+x] = 0.0f;
62                 }
63                 for(int y1 = low; y1 < high; y1++) {
64 #ifdef __KERNEL_SSE3__
65                         for(int x = aligned_lowx; x < aligned_highx; x+=4) {
66                                 _mm_store_ps(outImage + y*w+x, _mm_add_ps(_mm_load_ps(outImage + y*w+x), _mm_load_ps(differenceImage + y1*w+x)));
67                         }
68 #else
69                         for(int x = rect.x; x < rect.z; x++) {
70                                 outImage[y*w+x] += differenceImage[y1*w+x];
71                         }
72 #endif
73                 }
74                 for(int x = rect.x; x < rect.z; x++) {
75                         outImage[y*w+x] *= 1.0f/(high - low);
76                 }
77         }
78 }
79
80 ccl_device_inline void kernel_filter_nlm_calc_weight(const float *ccl_restrict differenceImage,
81                                                      float *outImage,
82                                                      int4 rect,
83                                                      int w,
84                                                      int f)
85 {
86         for(int y = rect.y; y < rect.w; y++) {
87                 for(int x = rect.x; x < rect.z; x++) {
88                         outImage[y*w+x] = 0.0f;
89                 }
90         }
91         for(int dx = -f; dx <= f; dx++) {
92                 int pos_dx = max(0, dx);
93                 int neg_dx = min(0, dx);
94                 for(int y = rect.y; y < rect.w; y++) {
95                         for(int x = rect.x-neg_dx; x < rect.z-pos_dx; x++) {
96                                 outImage[y*w+x] += differenceImage[y*w+dx+x];
97                         }
98                 }
99         }
100         for(int y = rect.y; y < rect.w; y++) {
101                 for(int x = rect.x; x < rect.z; x++) {
102                         const int low = max(rect.x, x-f);
103                         const int high = min(rect.z, x+f+1);
104                         outImage[y*w+x] = expf(-max(outImage[y*w+x] * (1.0f/(high - low)), 0.0f));
105                 }
106         }
107 }
108
109 ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy,
110                                                        const float *ccl_restrict differenceImage,
111                                                        const float *ccl_restrict image,
112                                                        float *outImage,
113                                                        float *accumImage,
114                                                        int4 rect,
115                                                        int w,
116                                                        int f)
117 {
118         for(int y = rect.y; y < rect.w; y++) {
119                 for(int x = rect.x; x < rect.z; x++) {
120                         const int low = max(rect.x, x-f);
121                         const int high = min(rect.z, x+f+1);
122                         float sum = 0.0f;
123                         for(int x1 = low; x1 < high; x1++) {
124                                 sum += differenceImage[y*w+x1];
125                         }
126                         float weight = sum * (1.0f/(high - low));
127                         accumImage[y*w+x] += weight;
128                         outImage[y*w+x] += weight*image[(y+dy)*w+(x+dx)];
129                 }
130         }
131 }
132
133 ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy,
134                                                            const float *ccl_restrict differenceImage,
135                                                            const float *ccl_restrict buffer,
136                                                            float *color_pass,
137                                                            float *variance_pass,
138                                                            float *transform,
139                                                            int *rank,
140                                                            float *XtWX,
141                                                            float3 *XtWY,
142                                                            int4 rect,
143                                                            int4 filter_rect,
144                                                            int w, int h, int f,
145                                                            int pass_stride)
146 {
147         /* fy and fy are in filter-window-relative coordinates, while x and y are in feature-window-relative coordinates. */
148         for(int fy = max(0, rect.y-filter_rect.y); fy < min(filter_rect.w, rect.w-filter_rect.y); fy++) {
149                 int y = fy + filter_rect.y;
150                 for(int fx = max(0, rect.x-filter_rect.x); fx < min(filter_rect.z, rect.z-filter_rect.x); fx++) {
151                         int x = fx + filter_rect.x;
152                         const int low = max(rect.x, x-f);
153                         const int high = min(rect.z, x+f+1);
154                         float sum = 0.0f;
155                         for(int x1 = low; x1 < high; x1++) {
156                                 sum += differenceImage[y*w+x1];
157                         }
158                         float weight = sum * (1.0f/(high - low));
159
160                         int storage_ofs = fy*filter_rect.z + fx;
161                         float  *l_transform = transform + storage_ofs*TRANSFORM_SIZE;
162                         float  *l_XtWX = XtWX + storage_ofs*XTWX_SIZE;
163                         float3 *l_XtWY = XtWY + storage_ofs*XTWY_SIZE;
164                         int    *l_rank = rank + storage_ofs;
165
166                         kernel_filter_construct_gramian(x, y, 1,
167                                                         dx, dy, w, h,
168                                                         pass_stride,
169                                                         buffer,
170                                                         color_pass, variance_pass,
171                                                         l_transform, l_rank,
172                                                         weight, l_XtWX, l_XtWY, 0);
173                 }
174         }
175 }
176
177 ccl_device_inline void kernel_filter_nlm_normalize(float *outImage,
178                                                    const float *ccl_restrict accumImage,
179                                                    int4 rect,
180                                                    int w)
181 {
182         for(int y = rect.y; y < rect.w; y++) {
183                 for(int x = rect.x; x < rect.z; x++) {
184                         outImage[y*w+x] /= accumImage[y*w+x];
185                 }
186         }
187 }
188
189 CCL_NAMESPACE_END