Fix Cycles denoising NaNs with a 1 sample renders.
[blender.git] / intern / cycles / kernel / filter / filter_prefilter.h
1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* First step of the shadow prefiltering, performs the shadow division and stores all data
20  * in a nice and easy rectangular array that can be passed to the NLM filter.
21  *
22  * Calculates:
23  * unfiltered: Contains the two half images of the shadow feature pass
24  * sampleVariance: The sample-based variance calculated in the kernel. Note: This calculation is biased in general, and especially here since the variance of the ratio can only be approximated.
25  * sampleVarianceV: Variance of the sample variance estimation, quite noisy (since it's essentially the buffer variance of the two variance halves)
26  * bufferVariance: The buffer-based variance of the shadow feature. Unbiased, but quite noisy.
27  */
28 ccl_device void kernel_filter_divide_shadow(int sample,
29                                             ccl_global TilesInfo *tiles,
30                                             int x, int y,
31                                             ccl_global float *unfilteredA,
32                                             ccl_global float *unfilteredB,
33                                             ccl_global float *sampleVariance,
34                                             ccl_global float *sampleVarianceV,
35                                             ccl_global float *bufferVariance,
36                                             int4 rect,
37                                             int buffer_pass_stride,
38                                             int buffer_denoising_offset,
39                                             bool use_split_variance)
40 {
41         int xtile = (x < tiles->x[1])? 0: ((x < tiles->x[2])? 1: 2);
42         int ytile = (y < tiles->y[1])? 0: ((y < tiles->y[2])? 1: 2);
43         int tile = ytile*3+xtile;
44
45         int offset = tiles->offsets[tile];
46         int stride = tiles->strides[tile];
47         const ccl_global float *ccl_restrict center_buffer = (ccl_global float*) tiles->buffers[tile];
48         center_buffer += (y*stride + x + offset)*buffer_pass_stride;
49         center_buffer += buffer_denoising_offset + 14;
50
51         int buffer_w = align_up(rect.z - rect.x, 4);
52         int idx = (y-rect.y)*buffer_w + (x - rect.x);
53         unfilteredA[idx] = center_buffer[1] / max(center_buffer[0], 1e-7f);
54         unfilteredB[idx] = center_buffer[4] / max(center_buffer[3], 1e-7f);
55
56         float varA = center_buffer[2];
57         float varB = center_buffer[5];
58         int odd_sample = (sample+1)/2;
59         int even_sample = sample/2;
60         if(use_split_variance) {
61                 varA = max(0.0f, varA - unfilteredA[idx]*unfilteredA[idx]*odd_sample);
62                 varB = max(0.0f, varB - unfilteredB[idx]*unfilteredB[idx]*even_sample);
63         }
64         varA /= max(odd_sample - 1, 1);
65         varB /= max(even_sample - 1, 1);
66
67         sampleVariance[idx]  = 0.5f*(varA + varB) / sample;
68         sampleVarianceV[idx] = 0.5f * (varA - varB) * (varA - varB) / (sample*sample);
69         bufferVariance[idx]  = 0.5f * (unfilteredA[idx] - unfilteredB[idx]) * (unfilteredA[idx] - unfilteredB[idx]);
70 }
71
72 /* Load a regular feature from the render buffers into the denoise buffer.
73  * Parameters:
74  * - sample: The sample amount in the buffer, used to normalize the buffer.
75  * - m_offset, v_offset: Render Buffer Pass offsets of mean and variance of the feature.
76  * - x, y: Current pixel
77  * - mean, variance: Target denoise buffers.
78  * - rect: The prefilter area (lower pixels inclusive, upper pixels exclusive).
79  */
80 ccl_device void kernel_filter_get_feature(int sample,
81                                           ccl_global TilesInfo *tiles,
82                                           int m_offset, int v_offset,
83                                           int x, int y,
84                                           ccl_global float *mean,
85                                           ccl_global float *variance,
86                                           int4 rect, int buffer_pass_stride,
87                                           int buffer_denoising_offset,
88                                           bool use_split_variance)
89 {
90         int xtile = (x < tiles->x[1])? 0: ((x < tiles->x[2])? 1: 2);
91         int ytile = (y < tiles->y[1])? 0: ((y < tiles->y[2])? 1: 2);
92         int tile = ytile*3+xtile;
93         ccl_global float *center_buffer = ((ccl_global float*) tiles->buffers[tile]) + (tiles->offsets[tile] + y*tiles->strides[tile] + x)*buffer_pass_stride + buffer_denoising_offset;
94
95         int buffer_w = align_up(rect.z - rect.x, 4);
96         int idx = (y-rect.y)*buffer_w + (x - rect.x);
97
98         mean[idx] = center_buffer[m_offset] / sample;
99         if (sample > 1) {
100                 if(use_split_variance) {
101                         variance[idx] = max(0.0f, (center_buffer[v_offset] - mean[idx]*mean[idx]*sample) / (sample * (sample-1)));
102                 }
103                 else {
104                         variance[idx] = center_buffer[v_offset] / (sample * (sample-1));
105                 }
106         }
107         else {
108                 /* Can't compute variance with single sample, just set it very high. */
109                 variance[idx] = 1e10f;
110         }
111 }
112
113 ccl_device void kernel_filter_detect_outliers(int x, int y,
114                                               ccl_global float *image,
115                                               ccl_global float *variance,
116                                               ccl_global float *depth,
117                                               ccl_global float *out,
118                                               int4 rect,
119                                               int pass_stride)
120 {
121         int buffer_w = align_up(rect.z - rect.x, 4);
122
123         int n = 0;
124         float values[25];
125         for(int y1 = max(y-2, rect.y); y1 < min(y+3, rect.w); y1++) {
126                 for(int x1 = max(x-2, rect.x); x1 < min(x+3, rect.z); x1++) {
127                         int idx = (y1-rect.y)*buffer_w + (x1-rect.x);
128                         float L = average(make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]));
129
130                         /* Find the position of L. */
131                         int i;
132                         for(i = 0; i < n; i++) {
133                                 if(values[i] > L) break;
134                         }
135                         /* Make space for L by shifting all following values to the right. */
136                         for(int j = n; j > i; j--) {
137                                 values[j] = values[j-1];
138                         }
139                         /* Insert L. */
140                         values[i] = L;
141                         n++;
142                 }
143         }
144
145         int idx = (y-rect.y)*buffer_w + (x-rect.x);
146         float L = average(make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]));
147
148         float ref = 2.0f*values[(int)(n*0.75f)];
149         float fac = 1.0f;
150         if(L > ref) {
151                 /* The pixel appears to be an outlier.
152                  * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel
153                  * should actually be at the reference value:
154                  * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier.
155                  * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight.
156                  */
157                 float stddev = sqrtf(average(make_float3(variance[idx], variance[idx+pass_stride], variance[idx+2*pass_stride])));
158                 if(L - 3*stddev < ref) {
159                         /* The pixel is an outlier, so negate the depth value to mark it as one.
160                          * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
161                         depth[idx] = -depth[idx];
162                         fac = ref/L;
163                         variance[idx              ] *= fac*fac;
164                         variance[idx + pass_stride] *= fac*fac;
165                         variance[idx+2*pass_stride] *= fac*fac;
166                 }
167         }
168         out[idx              ] = fac*image[idx];
169         out[idx + pass_stride] = fac*image[idx + pass_stride];
170         out[idx+2*pass_stride] = fac*image[idx+2*pass_stride];
171 }
172
173 /* Combine A/B buffers.
174  * Calculates the combined mean and the buffer variance. */
175 ccl_device void kernel_filter_combine_halves(int x, int y,
176                                              ccl_global float *mean,
177                                              ccl_global float *variance,
178                                              ccl_global float *a,
179                                              ccl_global float *b,
180                                              int4 rect, int r)
181 {
182         int buffer_w = align_up(rect.z - rect.x, 4);
183         int idx = (y-rect.y)*buffer_w + (x - rect.x);
184
185         if(mean)     mean[idx] = 0.5f * (a[idx]+b[idx]);
186         if(variance) {
187                 if(r == 0) variance[idx] = 0.25f * (a[idx]-b[idx])*(a[idx]-b[idx]);
188                 else {
189                         variance[idx] = 0.0f;
190                         float values[25];
191                         int numValues = 0;
192                         for(int py = max(y-r, rect.y); py < min(y+r+1, rect.w); py++) {
193                                 for(int px = max(x-r, rect.x); px < min(x+r+1, rect.z); px++) {
194                                         int pidx = (py-rect.y)*buffer_w + (px-rect.x);
195                                         values[numValues++] = 0.25f * (a[pidx]-b[pidx])*(a[pidx]-b[pidx]);
196                                 }
197                         }
198                         /* Insertion-sort the variances (fast enough for 25 elements). */
199                         for(int i = 1; i < numValues; i++) {
200                                 float v = values[i];
201                                 int j;
202                                 for(j = i-1; j >= 0 && values[j] > v; j--)
203                                         values[j+1] = values[j];
204                                 values[j+1] = v;
205                         }
206                         variance[idx] = values[(7*numValues)/8];
207                 }
208         }
209 }
210
211 CCL_NAMESPACE_END