2 * Copyright 2011-2017 Blender Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* OpenCL kernel entry points */
19 #include "kernel/kernel_compat_opencl.h"
21 #include "kernel/filter/filter_kernel.h"
25 __kernel void kernel_ocl_filter_divide_shadow(int sample,
26 ccl_global TilesInfo *tiles,
27 ccl_global float *unfilteredA,
28 ccl_global float *unfilteredB,
29 ccl_global float *sampleVariance,
30 ccl_global float *sampleVarianceV,
31 ccl_global float *bufferVariance,
33 int buffer_pass_stride,
34 int buffer_denoising_offset,
35 char use_split_variance)
37 int x = prefilter_rect.x + get_global_id(0);
38 int y = prefilter_rect.y + get_global_id(1);
39 if(x < prefilter_rect.z && y < prefilter_rect.w) {
40 kernel_filter_divide_shadow(sample,
50 buffer_denoising_offset,
55 __kernel void kernel_ocl_filter_get_feature(int sample,
56 ccl_global TilesInfo *tiles,
59 ccl_global float *mean,
60 ccl_global float *variance,
62 int buffer_pass_stride,
63 int buffer_denoising_offset,
64 char use_split_variance)
66 int x = prefilter_rect.x + get_global_id(0);
67 int y = prefilter_rect.y + get_global_id(1);
68 if(x < prefilter_rect.z && y < prefilter_rect.w) {
69 kernel_filter_get_feature(sample,
76 buffer_denoising_offset,
81 __kernel void kernel_ocl_filter_detect_outliers(ccl_global float *image,
82 ccl_global float *variance,
83 ccl_global float *depth,
84 ccl_global float *output,
88 int x = prefilter_rect.x + get_global_id(0);
89 int y = prefilter_rect.y + get_global_id(1);
90 if(x < prefilter_rect.z && y < prefilter_rect.w) {
91 kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride);
95 __kernel void kernel_ocl_filter_combine_halves(ccl_global float *mean,
96 ccl_global float *variance,
102 int x = prefilter_rect.x + get_global_id(0);
103 int y = prefilter_rect.y + get_global_id(1);
104 if(x < prefilter_rect.z && y < prefilter_rect.w) {
105 kernel_filter_combine_halves(x, y, mean, variance, a, b, prefilter_rect, r);
109 __kernel void kernel_ocl_filter_construct_transform(const ccl_global float *ccl_restrict buffer,
110 ccl_global float *transform,
111 ccl_global int *rank,
118 int x = get_global_id(0);
119 int y = get_global_id(1);
120 if(x < filter_area.z && y < filter_area.w) {
121 ccl_global int *l_rank = rank + y*filter_area.z + x;
122 ccl_global float *l_transform = transform + y*filter_area.z + x;
123 kernel_filter_construct_transform(buffer,
124 x + filter_area.x, y + filter_area.y,
127 radius, pca_threshold,
128 filter_area.z*filter_area.w,
129 get_local_id(1)*get_local_size(0) + get_local_id(0));
133 __kernel void kernel_ocl_filter_nlm_calc_difference(int dx,
135 const ccl_global float *ccl_restrict weight_image,
136 const ccl_global float *ccl_restrict variance_image,
137 ccl_global float *difference_image,
144 int x = get_global_id(0) + rect.x;
145 int y = get_global_id(1) + rect.y;
146 if(x < rect.z && y < rect.w) {
147 kernel_filter_nlm_calc_difference(x, y, dx, dy, weight_image, variance_image, difference_image, rect, w, channel_offset, a, k_2);
151 __kernel void kernel_ocl_filter_nlm_blur(const ccl_global float *ccl_restrict difference_image,
152 ccl_global float *out_image,
157 int x = get_global_id(0) + rect.x;
158 int y = get_global_id(1) + rect.y;
159 if(x < rect.z && y < rect.w) {
160 kernel_filter_nlm_blur(x, y, difference_image, out_image, rect, w, f);
164 __kernel void kernel_ocl_filter_nlm_calc_weight(const ccl_global float *ccl_restrict difference_image,
165 ccl_global float *out_image,
170 int x = get_global_id(0) + rect.x;
171 int y = get_global_id(1) + rect.y;
172 if(x < rect.z && y < rect.w) {
173 kernel_filter_nlm_calc_weight(x, y, difference_image, out_image, rect, w, f);
177 __kernel void kernel_ocl_filter_nlm_update_output(int dx,
179 const ccl_global float *ccl_restrict difference_image,
180 const ccl_global float *ccl_restrict image,
181 ccl_global float *out_image,
182 ccl_global float *accum_image,
187 int x = get_global_id(0) + rect.x;
188 int y = get_global_id(1) + rect.y;
189 if(x < rect.z && y < rect.w) {
190 kernel_filter_nlm_update_output(x, y, dx, dy, difference_image, image, out_image, accum_image, rect, w, f);
194 __kernel void kernel_ocl_filter_nlm_normalize(ccl_global float *out_image,
195 const ccl_global float *ccl_restrict accum_image,
199 int x = get_global_id(0) + rect.x;
200 int y = get_global_id(1) + rect.y;
201 if(x < rect.z && y < rect.w) {
202 kernel_filter_nlm_normalize(x, y, out_image, accum_image, rect, w);
206 __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx,
208 const ccl_global float *ccl_restrict difference_image,
209 const ccl_global float *ccl_restrict buffer,
210 const ccl_global float *ccl_restrict transform,
211 ccl_global int *rank,
212 ccl_global float *XtWX,
213 ccl_global float3 *XtWY,
221 int x = get_global_id(0) + max(0, rect.x-filter_rect.x);
222 int y = get_global_id(1) + max(0, rect.y-filter_rect.y);
223 if(x < min(filter_rect.z, rect.z-filter_rect.x) && y < min(filter_rect.w, rect.w-filter_rect.y)) {
224 kernel_filter_nlm_construct_gramian(x, y,
233 get_local_id(1)*get_local_size(0) + get_local_id(0));
237 __kernel void kernel_ocl_filter_finalize(int w,
239 ccl_global float *buffer,
240 ccl_global int *rank,
241 ccl_global float *XtWX,
242 ccl_global float3 *XtWY,
247 int x = get_global_id(0);
248 int y = get_global_id(1);
249 if(x < filter_area.z && y < filter_area.w) {
250 int storage_ofs = y*filter_area.z+x;
254 kernel_filter_finalize(x, y, w, h, buffer, rank, filter_area.z*filter_area.w, XtWX, XtWY, buffer_params, sample);
258 __kernel void kernel_ocl_filter_set_tiles(ccl_global TilesInfo* tiles,
259 ccl_global float *buffer_1,
260 ccl_global float *buffer_2,
261 ccl_global float *buffer_3,
262 ccl_global float *buffer_4,
263 ccl_global float *buffer_5,
264 ccl_global float *buffer_6,
265 ccl_global float *buffer_7,
266 ccl_global float *buffer_8,
267 ccl_global float *buffer_9)
269 if((get_global_id(0) == 0) && (get_global_id(1) == 0)) {
270 tiles->buffers[0] = buffer_1;
271 tiles->buffers[1] = buffer_2;
272 tiles->buffers[2] = buffer_3;
273 tiles->buffers[3] = buffer_4;
274 tiles->buffers[4] = buffer_5;
275 tiles->buffers[5] = buffer_6;
276 tiles->buffers[6] = buffer_7;
277 tiles->buffers[7] = buffer_8;
278 tiles->buffers[8] = buffer_9;