Cycles: Cleanup, braces after function definition
[blender.git] / intern / cycles / kernel / kernels / opencl / filter.cl
1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* OpenCL kernel entry points */
18
19 #include "kernel/kernel_compat_opencl.h"
20
21 #include "kernel/filter/filter_kernel.h"
22
23 /* kernels */
24
25 __kernel void kernel_ocl_filter_divide_shadow(int sample,
26                                               ccl_global TilesInfo *tiles,
27                                               ccl_global float *unfilteredA,
28                                               ccl_global float *unfilteredB,
29                                               ccl_global float *sampleVariance,
30                                               ccl_global float *sampleVarianceV,
31                                               ccl_global float *bufferVariance,
32                                               int4 prefilter_rect,
33                                               int buffer_pass_stride,
34                                               int buffer_denoising_offset,
35                                               char use_split_variance)
36 {
37         int x = prefilter_rect.x + get_global_id(0);
38         int y = prefilter_rect.y + get_global_id(1);
39         if(x < prefilter_rect.z && y < prefilter_rect.w) {
40                 kernel_filter_divide_shadow(sample,
41                                             tiles,
42                                             x, y,
43                                             unfilteredA,
44                                             unfilteredB,
45                                             sampleVariance,
46                                             sampleVarianceV,
47                                             bufferVariance,
48                                             prefilter_rect,
49                                             buffer_pass_stride,
50                                             buffer_denoising_offset,
51                                             use_split_variance);
52         }
53 }
54
55 __kernel void kernel_ocl_filter_get_feature(int sample,
56                                             ccl_global TilesInfo *tiles,
57                                             int m_offset,
58                                             int v_offset,
59                                             ccl_global float *mean,
60                                             ccl_global float *variance,
61                                             int4 prefilter_rect,
62                                             int buffer_pass_stride,
63                                             int buffer_denoising_offset,
64                                             char use_split_variance)
65 {
66         int x = prefilter_rect.x + get_global_id(0);
67         int y = prefilter_rect.y + get_global_id(1);
68         if(x < prefilter_rect.z && y < prefilter_rect.w) {
69                 kernel_filter_get_feature(sample,
70                                           tiles,
71                                           m_offset, v_offset,
72                                           x, y,
73                                           mean, variance,
74                                           prefilter_rect,
75                                           buffer_pass_stride,
76                                           buffer_denoising_offset,
77                                           use_split_variance);
78         }
79 }
80
81 __kernel void kernel_ocl_filter_detect_outliers(ccl_global float *image,
82                                                 ccl_global float *variance,
83                                                 ccl_global float *depth,
84                                                 ccl_global float *output,
85                                                 int4 prefilter_rect,
86                                                 int pass_stride)
87 {
88         int x = prefilter_rect.x + get_global_id(0);
89         int y = prefilter_rect.y + get_global_id(1);
90         if(x < prefilter_rect.z && y < prefilter_rect.w) {
91                 kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride);
92         }
93 }
94
95 __kernel void kernel_ocl_filter_combine_halves(ccl_global float *mean,
96                                                ccl_global float *variance,
97                                                ccl_global float *a,
98                                                ccl_global float *b,
99                                                int4 prefilter_rect,
100                                                int r)
101 {
102         int x = prefilter_rect.x + get_global_id(0);
103         int y = prefilter_rect.y + get_global_id(1);
104         if(x < prefilter_rect.z && y < prefilter_rect.w) {
105                 kernel_filter_combine_halves(x, y, mean, variance, a, b, prefilter_rect, r);
106         }
107 }
108
109 __kernel void kernel_ocl_filter_construct_transform(const ccl_global float *ccl_restrict buffer,
110                                                     ccl_global float *transform,
111                                                     ccl_global int *rank,
112                                                     int4 filter_area,
113                                                     int4 rect,
114                                                     int pass_stride,
115                                                     int radius,
116                                                     float pca_threshold)
117 {
118         int x = get_global_id(0);
119         int y = get_global_id(1);
120         if(x < filter_area.z && y < filter_area.w) {
121                 ccl_global int *l_rank = rank + y*filter_area.z + x;
122                 ccl_global float *l_transform = transform + y*filter_area.z + x;
123                 kernel_filter_construct_transform(buffer,
124                                                   x + filter_area.x, y + filter_area.y,
125                                                   rect, pass_stride,
126                                                   l_transform, l_rank,
127                                                   radius, pca_threshold,
128                                                   filter_area.z*filter_area.w,
129                                                   get_local_id(1)*get_local_size(0) + get_local_id(0));
130         }
131 }
132
133 __kernel void kernel_ocl_filter_nlm_calc_difference(int dx,
134                                                     int dy,
135                                                     const ccl_global float *ccl_restrict weightImage,
136                                                     const ccl_global float *ccl_restrict varianceImage,
137                                                     ccl_global float *differenceImage,
138                                                     int4 rect,
139                                                     int w,
140                                                     int channel_offset,
141                                                     float a,
142                                                     float k_2)
143 {
144         int x = get_global_id(0) + rect.x;
145         int y = get_global_id(1) + rect.y;
146         if(x < rect.z && y < rect.w) {
147                 kernel_filter_nlm_calc_difference(x, y, dx, dy, weightImage, varianceImage, differenceImage, rect, w, channel_offset, a, k_2);
148         }
149 }
150
151 __kernel void kernel_ocl_filter_nlm_blur(const ccl_global float *ccl_restrict differenceImage,
152                                          ccl_global float *outImage,
153                                          int4 rect,
154                                          int w,
155                                          int f)
156 {
157         int x = get_global_id(0) + rect.x;
158         int y = get_global_id(1) + rect.y;
159         if(x < rect.z && y < rect.w) {
160                 kernel_filter_nlm_blur(x, y, differenceImage, outImage, rect, w, f);
161         }
162 }
163
164 __kernel void kernel_ocl_filter_nlm_calc_weight(const ccl_global float *ccl_restrict differenceImage,
165                                                 ccl_global float *outImage,
166                                                 int4 rect,
167                                                 int w,
168                                                 int f)
169 {
170         int x = get_global_id(0) + rect.x;
171         int y = get_global_id(1) + rect.y;
172         if(x < rect.z && y < rect.w) {
173                 kernel_filter_nlm_calc_weight(x, y, differenceImage, outImage, rect, w, f);
174         }
175 }
176
177 __kernel void kernel_ocl_filter_nlm_update_output(int dx,
178                                                   int dy,
179                                                   const ccl_global float *ccl_restrict differenceImage,
180                                                   const ccl_global float *ccl_restrict image,
181                                                   ccl_global float *outImage,
182                                                   ccl_global float *accumImage,
183                                                   int4 rect,
184                                                   int w,
185                                                   int f)
186 {
187         int x = get_global_id(0) + rect.x;
188         int y = get_global_id(1) + rect.y;
189         if(x < rect.z && y < rect.w) {
190                 kernel_filter_nlm_update_output(x, y, dx, dy, differenceImage, image, outImage, accumImage, rect, w, f);
191         }
192 }
193
194 __kernel void kernel_ocl_filter_nlm_normalize(ccl_global float *outImage,
195                                               const ccl_global float *ccl_restrict accumImage,
196                                               int4 rect,
197                                               int w)
198 {
199         int x = get_global_id(0) + rect.x;
200         int y = get_global_id(1) + rect.y;
201         if(x < rect.z && y < rect.w) {
202                 kernel_filter_nlm_normalize(x, y, outImage, accumImage, rect, w);
203         }
204 }
205
206 __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx,
207                                                       int dy,
208                                                       const ccl_global float *ccl_restrict differenceImage,
209                                                       const ccl_global float *ccl_restrict buffer,
210                                                       ccl_global float *color_pass,
211                                                       ccl_global float *variance_pass,
212                                                       const ccl_global float *ccl_restrict transform,
213                                                       ccl_global int *rank,
214                                                       ccl_global float *XtWX,
215                                                       ccl_global float3 *XtWY,
216                                                       int4 rect,
217                                                       int4 filter_rect,
218                                                       int w,
219                                                       int h,
220                                                       int f,
221                                                       int pass_stride)
222 {
223         int x = get_global_id(0) + max(0, rect.x-filter_rect.x);
224         int y = get_global_id(1) + max(0, rect.y-filter_rect.y);
225         if(x < min(filter_rect.z, rect.z-filter_rect.x) && y < min(filter_rect.w, rect.w-filter_rect.y)) {
226                 kernel_filter_nlm_construct_gramian(x, y,
227                                                     dx, dy,
228                                                     differenceImage,
229                                                     buffer,
230                                                     color_pass, variance_pass,
231                                                     transform, rank,
232                                                     XtWX, XtWY,
233                                                     rect, filter_rect,
234                                                     w, h, f,
235                                                     pass_stride,
236                                                     get_local_id(1)*get_local_size(0) + get_local_id(0));
237         }
238 }
239
240 __kernel void kernel_ocl_filter_finalize(int w,
241                                              int h,
242                                          ccl_global float *buffer,
243                                          ccl_global int *rank,
244                                          ccl_global float *XtWX,
245                                          ccl_global float3 *XtWY,
246                                          int4 filter_area,
247                                          int4 buffer_params,
248                                          int sample)
249 {
250         int x = get_global_id(0);
251         int y = get_global_id(1);
252         if(x < filter_area.z && y < filter_area.w) {
253                 int storage_ofs = y*filter_area.z+x;
254                 rank += storage_ofs;
255                 XtWX += storage_ofs;
256                 XtWY += storage_ofs;
257                 kernel_filter_finalize(x, y, w, h, buffer, rank, filter_area.z*filter_area.w, XtWX, XtWY, buffer_params, sample);
258         }
259 }
260
261 __kernel void kernel_ocl_filter_set_tiles(ccl_global TilesInfo* tiles,
262                                           ccl_global float *buffer_1,
263                                           ccl_global float *buffer_2,
264                                           ccl_global float *buffer_3,
265                                           ccl_global float *buffer_4,
266                                           ccl_global float *buffer_5,
267                                           ccl_global float *buffer_6,
268                                           ccl_global float *buffer_7,
269                                           ccl_global float *buffer_8,
270                                           ccl_global float *buffer_9)
271 {
272         if((get_global_id(0) == 0) && (get_global_id(1) == 0)) {
273                 tiles->buffers[0] = buffer_1;
274                 tiles->buffers[1] = buffer_2;
275                 tiles->buffers[2] = buffer_3;
276                 tiles->buffers[3] = buffer_4;
277                 tiles->buffers[4] = buffer_5;
278                 tiles->buffers[5] = buffer_6;
279                 tiles->buffers[6] = buffer_7;
280                 tiles->buffers[7] = buffer_8;
281                 tiles->buffers[8] = buffer_9;
282         }
283 }