Cycles: Improve denoising speed on GPUs with small tile sizes
[blender.git] / intern / cycles / device / device_denoising.h
1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifndef __DEVICE_DENOISING_H__
18 #define __DEVICE_DENOISING_H__
19
20 #include "device/device.h"
21
22 #include "render/buffers.h"
23
24 #include "kernel/filter/filter_defines.h"
25
26 CCL_NAMESPACE_BEGIN
27
28 class DenoisingTask {
29 public:
30         /* Parameters of the denoising algorithm. */
31         int radius;
32         float nlm_k_2;
33         float pca_threshold;
34
35         /* Pointer and parameters of the RenderBuffers. */
36         struct RenderBuffers {
37                 int denoising_data_offset;
38                 int denoising_clean_offset;
39                 int pass_stride;
40                 int offset;
41                 int stride;
42                 device_ptr ptr;
43                 int samples;
44         } render_buffer;
45
46         TilesInfo *tiles;
47         device_vector<int> tiles_mem;
48         void tiles_from_rendertiles(RenderTile *rtiles);
49
50         int4 rect;
51         int4 filter_area;
52
53         struct DeviceFunctions {
54                 function<bool(device_ptr image_ptr,    /* Contains the values that are smoothed. */
55                               device_ptr guide_ptr,    /* Contains the values that are used to calculate weights. */
56                               device_ptr variance_ptr, /* Contains the variance of the guide image. */
57                               device_ptr out_ptr       /* The filtered output is written into this image. */
58                               )> non_local_means;
59                 function<bool(device_ptr color_ptr,
60                               device_ptr color_variance_ptr,
61                               device_ptr output_ptr
62                               )> reconstruct;
63                 function<bool()> construct_transform;
64
65                 function<bool(device_ptr a_ptr,
66                               device_ptr b_ptr,
67                               device_ptr mean_ptr,
68                               device_ptr variance_ptr,
69                               int r,
70                               int4 rect
71                               )> combine_halves;
72                 function<bool(device_ptr a_ptr,
73                               device_ptr b_ptr,
74                               device_ptr sample_variance_ptr,
75                               device_ptr sv_variance_ptr,
76                               device_ptr buffer_variance_ptr
77                               )> divide_shadow;
78                 function<bool(int mean_offset,
79                               int variance_offset,
80                               device_ptr mean_ptr,
81                               device_ptr variance_ptr
82                               )> get_feature;
83                 function<bool(device_ptr image_ptr,
84                               device_ptr variance_ptr,
85                               device_ptr depth_ptr,
86                               device_ptr output_ptr
87                               )> detect_outliers;
88                 function<bool(device_ptr*)> set_tiles;
89         } functions;
90
91         /* Stores state of the current Reconstruction operation,
92          * which is accessed by the device in order to perform the operation. */
93         struct ReconstructionState {
94                 device_ptr temporary_1_ptr; /* There two images are used as temporary storage. */
95                 device_ptr temporary_2_ptr;
96
97                 int4 filter_window;
98                 int4 buffer_params;
99
100                 int source_w;
101                 int source_h;
102         } reconstruction_state;
103
104         /* Stores state of the current NLM operation,
105          * which is accessed by the device in order to perform the operation. */
106         struct NLMState {
107                 device_ptr temporary_1_ptr; /* There three images are used as temporary storage. */
108                 device_ptr temporary_2_ptr;
109                 device_ptr temporary_3_ptr;
110
111                 int r;      /* Search radius of the filter. */
112                 int f;      /* Patch size of the filter. */
113                 float a;    /* Variance compensation factor in the MSE estimation. */
114                 float k_2;  /* Squared value of the k parameter of the filter. */
115
116                 void set_parameters(int r_, int f_, float a_, float k_2_) { r = r_; f = f_; a = a_, k_2 = k_2_; }
117         } nlm_state;
118
119         struct Storage {
120                 device_only_memory<float>  transform;
121                 device_only_memory<int>    rank;
122                 device_only_memory<float>  XtWX;
123                 device_only_memory<float3> XtWY;
124                 device_only_memory<float>  temporary_1;
125                 device_only_memory<float>  temporary_2;
126                 device_only_memory<float>  temporary_color;
127                 int w;
128                 int h;
129
130                 Storage(Device *device)
131                 : transform(device, "denoising transform"),
132                   rank(device, "denoising rank"),
133                   XtWX(device, "denoising XtWX"),
134                   XtWY(device, "denoising XtWY"),
135                   temporary_1(device, "denoising NLM temporary 1"),
136                   temporary_2(device, "denoising NLM temporary 2"),
137                   temporary_color(device, "denoising temporary color")
138                 {}
139         } storage;
140
141         DenoisingTask(Device *device);
142         ~DenoisingTask();
143
144         void init_from_devicetask(const DeviceTask &task);
145
146         bool run_denoising();
147
148         struct DenoiseBuffers {
149                 int pass_stride;
150                 int passes;
151                 int stride;
152                 int h;
153                 int width;
154                 device_only_memory<float> mem;
155
156                 DenoiseBuffers(Device *device)
157                 : mem(device, "denoising pixel buffer")
158             {}
159         } buffer;
160
161 protected:
162         Device *device;
163 };
164
165 CCL_NAMESPACE_END
166
167 #endif /* __DEVICE_DENOISING_H__ */