Cycles: Improve denoising speed on GPUs with small tile sizes
[blender.git] / intern / cycles / kernel / kernels / cpu / filter_cpu.h
1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* Templated common declaration part of all CPU kernels. */
18
19 void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
20                                                      TilesInfo *tiles,
21                                                      int x,
22                                                      int y,
23                                                      float *unfilteredA,
24                                                      float *unfilteredB,
25                                                      float *sampleV,
26                                                      float *sampleVV,
27                                                      float *bufferV,
28                                                      int* prefilter_rect,
29                                                      int buffer_pass_stride,
30                                                      int buffer_denoising_offset);
31
32 void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
33                                                    TilesInfo *tiles,
34                                                    int m_offset,
35                                                    int v_offset,
36                                                    int x,
37                                                    int y,
38                                                    float *mean,
39                                                    float *variance,
40                                                    int* prefilter_rect,
41                                                    int buffer_pass_stride,
42                                                    int buffer_denoising_offset);
43
44 void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
45                                                        ccl_global float *image,
46                                                        ccl_global float *variance,
47                                                        ccl_global float *depth,
48                                                        ccl_global float *output,
49                                                        int *rect,
50                                                        int pass_stride);
51
52 void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
53                                                       float *mean,
54                                                       float *variance,
55                                                       float *a,
56                                                       float *b,
57                                                       int* prefilter_rect,
58                                                       int r);
59
60 void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
61                                                            int x,
62                                                            int y,
63                                                            int storage_ofs,
64                                                            float *transform,
65                                                            int *rank,
66                                                            int* rect,
67                                                            int pass_stride,
68                                                            int radius,
69                                                            float pca_threshold);
70
71 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
72                                                            int dy,
73                                                            float *weight_image,
74                                                            float *variance,
75                                                            float *difference_image,
76                                                            int* rect,
77                                                            int stride,
78                                                            int channel_offset,
79                                                            float a,
80                                                            float k_2);
81
82 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(float *difference_image,
83                                                 float *out_image,
84                                                 int* rect,
85                                                 int stride,
86                                                 int f);
87
88 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(float *difference_image,
89                                                        float *out_image,
90                                                        int* rect,
91                                                        int stride,
92                                                        int f);
93
94 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
95                                                          int dy,
96                                                          float *difference_image,
97                                                          float *image,
98                                                          float *out_image,
99                                                          float *accum_image,
100                                                          int* rect,
101                                                          int stride,
102                                                          int f);
103
104 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
105                                                              int dy,
106                                                              float *difference_image,
107                                                              float *buffer,
108                                                              float *transform,
109                                                              int *rank,
110                                                              float *XtWX,
111                                                              float3 *XtWY,
112                                                              int *rect,
113                                                              int *filter_window,
114                                                              int stride,
115                                                              int f,
116                                                              int pass_stride);
117
118 void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image,
119                                                      float *accum_image,
120                                                      int* rect,
121                                                      int stride);
122
123 void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x,
124                                                 int y,
125                                                 int storage_ofs,
126                                                 float *buffer,
127                                                 int *rank,
128                                                 float *XtWX,
129                                                 float3 *XtWY,
130                                                 int *buffer_params,
131                                                 int sample);
132
133 #undef KERNEL_ARCH