2 * Copyright 2011-2017 Blender Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* OpenCL kernel entry points */
19 #include "kernel/kernel_compat_opencl.h"
21 #include "kernel/filter/filter_kernel.h"
25 __kernel void kernel_ocl_filter_divide_shadow(int sample,
26 ccl_global TilesInfo *tiles,
27 ccl_global float *unfilteredA,
28 ccl_global float *unfilteredB,
29 ccl_global float *sampleVariance,
30 ccl_global float *sampleVarianceV,
31 ccl_global float *bufferVariance,
33 int buffer_pass_stride,
34 int buffer_denoising_offset,
35 char use_split_variance)
37 int x = prefilter_rect.x + get_global_id(0);
38 int y = prefilter_rect.y + get_global_id(1);
39 if(x < prefilter_rect.z && y < prefilter_rect.w) {
40 kernel_filter_divide_shadow(sample,
50 buffer_denoising_offset,
55 __kernel void kernel_ocl_filter_get_feature(int sample,
56 ccl_global TilesInfo *tiles,
59 ccl_global float *mean,
60 ccl_global float *variance,
62 int buffer_pass_stride,
63 int buffer_denoising_offset,
64 char use_split_variance)
66 int x = prefilter_rect.x + get_global_id(0);
67 int y = prefilter_rect.y + get_global_id(1);
68 if(x < prefilter_rect.z && y < prefilter_rect.w) {
69 kernel_filter_get_feature(sample,
76 buffer_denoising_offset,
81 __kernel void kernel_ocl_filter_detect_outliers(ccl_global float *image,
82 ccl_global float *variance,
83 ccl_global float *depth,
84 ccl_global float *output,
88 int x = prefilter_rect.x + get_global_id(0);
89 int y = prefilter_rect.y + get_global_id(1);
90 if(x < prefilter_rect.z && y < prefilter_rect.w) {
91 kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride);
95 __kernel void kernel_ocl_filter_combine_halves(ccl_global float *mean,
96 ccl_global float *variance,
102 int x = prefilter_rect.x + get_global_id(0);
103 int y = prefilter_rect.y + get_global_id(1);
104 if(x < prefilter_rect.z && y < prefilter_rect.w) {
105 kernel_filter_combine_halves(x, y, mean, variance, a, b, prefilter_rect, r);
109 __kernel void kernel_ocl_filter_construct_transform(const ccl_global float *ccl_restrict buffer,
110 ccl_global float *transform,
111 ccl_global int *rank,
118 int x = get_global_id(0);
119 int y = get_global_id(1);
120 if(x < filter_area.z && y < filter_area.w) {
121 ccl_global int *l_rank = rank + y*filter_area.z + x;
122 ccl_global float *l_transform = transform + y*filter_area.z + x;
123 kernel_filter_construct_transform(buffer,
124 x + filter_area.x, y + filter_area.y,
127 radius, pca_threshold,
128 filter_area.z*filter_area.w,
129 get_local_id(1)*get_local_size(0) + get_local_id(0));
133 __kernel void kernel_ocl_filter_nlm_calc_difference(int dx,
135 const ccl_global float *ccl_restrict weightImage,
136 const ccl_global float *ccl_restrict varianceImage,
137 ccl_global float *differenceImage,
143 int x = get_global_id(0) + rect.x;
144 int y = get_global_id(1) + rect.y;
145 if(x < rect.z && y < rect.w) {
146 kernel_filter_nlm_calc_difference(x, y, dx, dy, weightImage, varianceImage, differenceImage, rect, w, channel_offset, a, k_2);
150 __kernel void kernel_ocl_filter_nlm_blur(const ccl_global float *ccl_restrict differenceImage,
151 ccl_global float *outImage,
155 int x = get_global_id(0) + rect.x;
156 int y = get_global_id(1) + rect.y;
157 if(x < rect.z && y < rect.w) {
158 kernel_filter_nlm_blur(x, y, differenceImage, outImage, rect, w, f);
162 __kernel void kernel_ocl_filter_nlm_calc_weight(const ccl_global float *ccl_restrict differenceImage,
163 ccl_global float *outImage,
167 int x = get_global_id(0) + rect.x;
168 int y = get_global_id(1) + rect.y;
169 if(x < rect.z && y < rect.w) {
170 kernel_filter_nlm_calc_weight(x, y, differenceImage, outImage, rect, w, f);
174 __kernel void kernel_ocl_filter_nlm_update_output(int dx,
176 const ccl_global float *ccl_restrict differenceImage,
177 const ccl_global float *ccl_restrict image,
178 ccl_global float *outImage,
179 ccl_global float *accumImage,
183 int x = get_global_id(0) + rect.x;
184 int y = get_global_id(1) + rect.y;
185 if(x < rect.z && y < rect.w) {
186 kernel_filter_nlm_update_output(x, y, dx, dy, differenceImage, image, outImage, accumImage, rect, w, f);
190 __kernel void kernel_ocl_filter_nlm_normalize(ccl_global float *outImage,
191 const ccl_global float *ccl_restrict accumImage,
194 int x = get_global_id(0) + rect.x;
195 int y = get_global_id(1) + rect.y;
196 if(x < rect.z && y < rect.w) {
197 kernel_filter_nlm_normalize(x, y, outImage, accumImage, rect, w);
201 __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx,
203 const ccl_global float *ccl_restrict differenceImage,
204 const ccl_global float *ccl_restrict buffer,
205 ccl_global float *color_pass,
206 ccl_global float *variance_pass,
207 const ccl_global float *ccl_restrict transform,
208 ccl_global int *rank,
209 ccl_global float *XtWX,
210 ccl_global float3 *XtWY,
217 int x = get_global_id(0) + max(0, rect.x-filter_rect.x);
218 int y = get_global_id(1) + max(0, rect.y-filter_rect.y);
219 if(x < min(filter_rect.z, rect.z-filter_rect.x) && y < min(filter_rect.w, rect.w-filter_rect.y)) {
220 kernel_filter_nlm_construct_gramian(x, y,
224 color_pass, variance_pass,
230 get_local_id(1)*get_local_size(0) + get_local_id(0));
234 __kernel void kernel_ocl_filter_finalize(int w,
236 ccl_global float *buffer,
237 ccl_global int *rank,
238 ccl_global float *XtWX,
239 ccl_global float3 *XtWY,
243 int x = get_global_id(0);
244 int y = get_global_id(1);
245 if(x < filter_area.z && y < filter_area.w) {
246 int storage_ofs = y*filter_area.z+x;
250 kernel_filter_finalize(x, y, w, h, buffer, rank, filter_area.z*filter_area.w, XtWX, XtWY, buffer_params, sample);
254 __kernel void kernel_ocl_filter_set_tiles(ccl_global TilesInfo* tiles,
255 ccl_global float *buffer_1,
256 ccl_global float *buffer_2,
257 ccl_global float *buffer_3,
258 ccl_global float *buffer_4,
259 ccl_global float *buffer_5,
260 ccl_global float *buffer_6,
261 ccl_global float *buffer_7,
262 ccl_global float *buffer_8,
263 ccl_global float *buffer_9)
265 if((get_global_id(0) == 0) && (get_global_id(1) == 0)) {
266 tiles->buffers[0] = buffer_1;
267 tiles->buffers[1] = buffer_2;
268 tiles->buffers[2] = buffer_3;
269 tiles->buffers[3] = buffer_4;
270 tiles->buffers[4] = buffer_5;
271 tiles->buffers[5] = buffer_6;
272 tiles->buffers[6] = buffer_7;
273 tiles->buffers[7] = buffer_8;
274 tiles->buffers[8] = buffer_9;