2 * Copyright 2011-2017 Blender Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* Templated common implementation part of all CPU kernels.
19 * The idea is that particular .cpp files sets needed optimization flags and
20 * simply includes this file without worry of copying actual implementation over.
23 #include "kernel/kernel_compat_cpu.h"
25 #include "kernel/filter/filter_kernel.h"
28 # include "util/util_debug.h"
29 # define STUB_ASSERT(arch, name) assert(!(#name " kernel stub for architecture " #arch " was called!"))
37 void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
43 float *sampleVariance,
44 float *sampleVarianceV,
45 float *bufferVariance,
47 int buffer_pass_stride,
48 int buffer_denoising_offset,
49 bool use_split_variance)
52 STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow);
54 kernel_filter_divide_shadow(sample, tiles,
61 load_int4(prefilter_rect),
63 buffer_denoising_offset,
68 void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
74 float *mean, float *variance,
76 int buffer_pass_stride,
77 int buffer_denoising_offset,
78 bool use_split_variance)
81 STUB_ASSERT(KERNEL_ARCH, filter_get_feature);
83 kernel_filter_get_feature(sample, tiles,
87 load_int4(prefilter_rect),
89 buffer_denoising_offset,
94 void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
95 ccl_global float *image,
96 ccl_global float *variance,
97 ccl_global float *depth,
98 ccl_global float *output,
103 STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers);
105 kernel_filter_detect_outliers(x, y, image, variance, depth, output, load_int4(rect), pass_stride);
109 void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
118 STUB_ASSERT(KERNEL_ARCH, filter_combine_halves);
120 kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r);
124 void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
136 STUB_ASSERT(KERNEL_ARCH, filter_construct_transform);
139 transform += storage_ofs*TRANSFORM_SIZE;
140 kernel_filter_construct_transform(buffer,
142 load_int4(prefilter_rect),
151 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
155 float *differenceImage,
163 STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference);
165 kernel_filter_nlm_calc_difference(dx, dy, weightImage, variance, differenceImage, load_int4(rect), w, channel_offset, a, k_2);
169 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(float *differenceImage,
176 STUB_ASSERT(KERNEL_ARCH, filter_nlm_blur);
178 kernel_filter_nlm_blur(differenceImage, outImage, load_int4(rect), w, f);
182 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(float *differenceImage,
189 STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_weight);
191 kernel_filter_nlm_calc_weight(differenceImage, outImage, load_int4(rect), w, f);
195 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
197 float *differenceImage,
206 STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output);
208 kernel_filter_nlm_update_output(dx, dy, differenceImage, image, outImage, accumImage, load_int4(rect), w, f);
212 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
214 float *differenceImage,
217 float *variance_pass,
230 STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian);
232 kernel_filter_nlm_construct_gramian(dx, dy, differenceImage, buffer, color_pass, variance_pass, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_rect), w, h, f, pass_stride);
236 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *outImage,
242 STUB_ASSERT(KERNEL_ARCH, filter_nlm_normalize);
244 kernel_filter_nlm_normalize(outImage, accumImage, load_int4(rect), w);
248 void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x,
261 STUB_ASSERT(KERNEL_ARCH, filter_finalize);
263 XtWX += storage_ofs*XTWX_SIZE;
264 XtWY += storage_ofs*XTWY_SIZE;
266 kernel_filter_finalize(x, y, w, h, buffer, rank, 1, XtWX, XtWY, load_int4(buffer_params), sample);