2 * Copyright 2011-2017 Blender Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* Templated common implementation part of all CPU kernels.
19 * The idea is that particular .cpp files sets needed optimization flags and
20 * simply includes this file without worry of copying actual implementation over.
23 #include "kernel/kernel_compat_cpu.h"
25 #include "kernel/filter/filter_kernel.h"
28 # include "util/util_debug.h"
29 # define STUB_ASSERT(arch, name) assert(!(#name " kernel stub for architecture " #arch " was called!"))
37 void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
43 float *sampleVariance,
44 float *sampleVarianceV,
45 float *bufferVariance,
47 int buffer_pass_stride,
48 int buffer_denoising_offset)
51 STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow);
53 kernel_filter_divide_shadow(sample, tiles,
60 load_int4(prefilter_rect),
62 buffer_denoising_offset);
66 void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
72 float *mean, float *variance,
74 int buffer_pass_stride,
75 int buffer_denoising_offset)
78 STUB_ASSERT(KERNEL_ARCH, filter_get_feature);
80 kernel_filter_get_feature(sample, tiles,
84 load_int4(prefilter_rect),
86 buffer_denoising_offset);
90 void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
91 ccl_global float *image,
92 ccl_global float *variance,
93 ccl_global float *depth,
94 ccl_global float *output,
99 STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers);
101 kernel_filter_detect_outliers(x, y, image, variance, depth, output, load_int4(rect), pass_stride);
105 void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
114 STUB_ASSERT(KERNEL_ARCH, filter_combine_halves);
116 kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r);
120 void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
132 STUB_ASSERT(KERNEL_ARCH, filter_construct_transform);
135 transform += storage_ofs*TRANSFORM_SIZE;
136 kernel_filter_construct_transform(buffer,
138 load_int4(prefilter_rect),
147 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
151 float *difference_image,
159 STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference);
161 kernel_filter_nlm_calc_difference(dx, dy, weight_image, variance, difference_image, load_int4(rect), w, channel_offset, a, k_2);
165 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(float *difference_image,
172 STUB_ASSERT(KERNEL_ARCH, filter_nlm_blur);
174 kernel_filter_nlm_blur(difference_image, out_image, load_int4(rect), w, f);
178 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(float *difference_image,
185 STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_weight);
187 kernel_filter_nlm_calc_weight(difference_image, out_image, load_int4(rect), w, f);
191 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
193 float *difference_image,
202 STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output);
204 kernel_filter_nlm_update_output(dx, dy, difference_image, image, out_image, accum_image, load_int4(rect), w, f);
208 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
210 float *difference_image,
224 STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian);
226 kernel_filter_nlm_construct_gramian(dx, dy, difference_image, buffer, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_rect), w, h, f, pass_stride);
230 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image,
236 STUB_ASSERT(KERNEL_ARCH, filter_nlm_normalize);
238 kernel_filter_nlm_normalize(out_image, accum_image, load_int4(rect), w);
242 void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x,
255 STUB_ASSERT(KERNEL_ARCH, filter_finalize);
257 XtWX += storage_ofs*XTWX_SIZE;
258 XtWY += storage_ofs*XTWY_SIZE;
260 kernel_filter_finalize(x, y, w, h, buffer, rank, 1, XtWX, XtWY, load_int4(buffer_params), sample);