2 * Copyright 2011-2017 Blender Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* OpenCL kernel entry points */
19 #include "kernel/kernel_compat_opencl.h"
21 #include "kernel/filter/filter_kernel.h"
25 __kernel void kernel_ocl_filter_divide_shadow(int sample,
26 ccl_global TilesInfo *tiles,
27 ccl_global float *unfilteredA,
28 ccl_global float *unfilteredB,
29 ccl_global float *sampleVariance,
30 ccl_global float *sampleVarianceV,
31 ccl_global float *bufferVariance,
33 int buffer_pass_stride,
34 int buffer_denoising_offset)
36 int x = prefilter_rect.x + get_global_id(0);
37 int y = prefilter_rect.y + get_global_id(1);
38 if(x < prefilter_rect.z && y < prefilter_rect.w) {
39 kernel_filter_divide_shadow(sample,
49 buffer_denoising_offset);
53 __kernel void kernel_ocl_filter_get_feature(int sample,
54 ccl_global TilesInfo *tiles,
57 ccl_global float *mean,
58 ccl_global float *variance,
60 int buffer_pass_stride,
61 int buffer_denoising_offset)
63 int x = prefilter_rect.x + get_global_id(0);
64 int y = prefilter_rect.y + get_global_id(1);
65 if(x < prefilter_rect.z && y < prefilter_rect.w) {
66 kernel_filter_get_feature(sample,
73 buffer_denoising_offset);
77 __kernel void kernel_ocl_filter_detect_outliers(ccl_global float *image,
78 ccl_global float *variance,
79 ccl_global float *depth,
80 ccl_global float *output,
84 int x = prefilter_rect.x + get_global_id(0);
85 int y = prefilter_rect.y + get_global_id(1);
86 if(x < prefilter_rect.z && y < prefilter_rect.w) {
87 kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride);
91 __kernel void kernel_ocl_filter_combine_halves(ccl_global float *mean,
92 ccl_global float *variance,
98 int x = prefilter_rect.x + get_global_id(0);
99 int y = prefilter_rect.y + get_global_id(1);
100 if(x < prefilter_rect.z && y < prefilter_rect.w) {
101 kernel_filter_combine_halves(x, y, mean, variance, a, b, prefilter_rect, r);
105 __kernel void kernel_ocl_filter_construct_transform(const ccl_global float *ccl_restrict buffer,
106 ccl_global float *transform,
107 ccl_global int *rank,
114 int x = get_global_id(0);
115 int y = get_global_id(1);
116 if(x < filter_area.z && y < filter_area.w) {
117 ccl_global int *l_rank = rank + y*filter_area.z + x;
118 ccl_global float *l_transform = transform + y*filter_area.z + x;
119 kernel_filter_construct_transform(buffer,
120 x + filter_area.x, y + filter_area.y,
123 radius, pca_threshold,
124 filter_area.z*filter_area.w,
125 get_local_id(1)*get_local_size(0) + get_local_id(0));
129 __kernel void kernel_ocl_filter_nlm_calc_difference(int dx,
131 const ccl_global float *ccl_restrict weight_image,
132 const ccl_global float *ccl_restrict variance_image,
133 ccl_global float *difference_image,
140 int x = get_global_id(0) + rect.x;
141 int y = get_global_id(1) + rect.y;
142 if(x < rect.z && y < rect.w) {
143 kernel_filter_nlm_calc_difference(x, y, dx, dy, weight_image, variance_image, difference_image, rect, w, channel_offset, a, k_2);
147 __kernel void kernel_ocl_filter_nlm_blur(const ccl_global float *ccl_restrict difference_image,
148 ccl_global float *out_image,
153 int x = get_global_id(0) + rect.x;
154 int y = get_global_id(1) + rect.y;
155 if(x < rect.z && y < rect.w) {
156 kernel_filter_nlm_blur(x, y, difference_image, out_image, rect, w, f);
160 __kernel void kernel_ocl_filter_nlm_calc_weight(const ccl_global float *ccl_restrict difference_image,
161 ccl_global float *out_image,
166 int x = get_global_id(0) + rect.x;
167 int y = get_global_id(1) + rect.y;
168 if(x < rect.z && y < rect.w) {
169 kernel_filter_nlm_calc_weight(x, y, difference_image, out_image, rect, w, f);
173 __kernel void kernel_ocl_filter_nlm_update_output(int dx,
175 const ccl_global float *ccl_restrict difference_image,
176 const ccl_global float *ccl_restrict image,
177 ccl_global float *out_image,
178 ccl_global float *accum_image,
183 int x = get_global_id(0) + rect.x;
184 int y = get_global_id(1) + rect.y;
185 if(x < rect.z && y < rect.w) {
186 kernel_filter_nlm_update_output(x, y, dx, dy, difference_image, image, out_image, accum_image, rect, w, f);
190 __kernel void kernel_ocl_filter_nlm_normalize(ccl_global float *out_image,
191 const ccl_global float *ccl_restrict accum_image,
195 int x = get_global_id(0) + rect.x;
196 int y = get_global_id(1) + rect.y;
197 if(x < rect.z && y < rect.w) {
198 kernel_filter_nlm_normalize(x, y, out_image, accum_image, rect, w);
202 __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx,
204 const ccl_global float *ccl_restrict difference_image,
205 const ccl_global float *ccl_restrict buffer,
206 const ccl_global float *ccl_restrict transform,
207 ccl_global int *rank,
208 ccl_global float *XtWX,
209 ccl_global float3 *XtWY,
217 int x = get_global_id(0) + max(0, rect.x-filter_rect.x);
218 int y = get_global_id(1) + max(0, rect.y-filter_rect.y);
219 if(x < min(filter_rect.z, rect.z-filter_rect.x) && y < min(filter_rect.w, rect.w-filter_rect.y)) {
220 kernel_filter_nlm_construct_gramian(x, y,
229 get_local_id(1)*get_local_size(0) + get_local_id(0));
233 __kernel void kernel_ocl_filter_finalize(int w,
235 ccl_global float *buffer,
236 ccl_global int *rank,
237 ccl_global float *XtWX,
238 ccl_global float3 *XtWY,
243 int x = get_global_id(0);
244 int y = get_global_id(1);
245 if(x < filter_area.z && y < filter_area.w) {
246 int storage_ofs = y*filter_area.z+x;
250 kernel_filter_finalize(x, y, w, h, buffer, rank, filter_area.z*filter_area.w, XtWX, XtWY, buffer_params, sample);
254 __kernel void kernel_ocl_filter_set_tiles(ccl_global TilesInfo* tiles,
255 ccl_global float *buffer_1,
256 ccl_global float *buffer_2,
257 ccl_global float *buffer_3,
258 ccl_global float *buffer_4,
259 ccl_global float *buffer_5,
260 ccl_global float *buffer_6,
261 ccl_global float *buffer_7,
262 ccl_global float *buffer_8,
263 ccl_global float *buffer_9)
265 if((get_global_id(0) == 0) && (get_global_id(1) == 0)) {
266 tiles->buffers[0] = buffer_1;
267 tiles->buffers[1] = buffer_2;
268 tiles->buffers[2] = buffer_3;
269 tiles->buffers[3] = buffer_4;
270 tiles->buffers[4] = buffer_5;
271 tiles->buffers[5] = buffer_6;
272 tiles->buffers[6] = buffer_7;
273 tiles->buffers[7] = buffer_8;
274 tiles->buffers[8] = buffer_9;