2 * Copyright 2011-2017 Blender Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 /* First step of the shadow prefiltering, performs the shadow division and stores all data
20 * in a nice and easy rectangular array that can be passed to the NLM filter.
23 * unfiltered: Contains the two half images of the shadow feature pass
24 * sampleVariance: The sample-based variance calculated in the kernel. Note: This calculation is biased in general, and especially here since the variance of the ratio can only be approximated.
25 * sampleVarianceV: Variance of the sample variance estimation, quite noisy (since it's essentially the buffer variance of the two variance halves)
26 * bufferVariance: The buffer-based variance of the shadow feature. Unbiased, but quite noisy.
28 ccl_device void kernel_filter_divide_shadow(int sample,
29 ccl_global TilesInfo *tiles,
31 ccl_global float *unfilteredA,
32 ccl_global float *unfilteredB,
33 ccl_global float *sampleVariance,
34 ccl_global float *sampleVarianceV,
35 ccl_global float *bufferVariance,
37 int buffer_pass_stride,
38 int buffer_denoising_offset,
39 bool use_split_variance)
41 int xtile = (x < tiles->x[1])? 0: ((x < tiles->x[2])? 1: 2);
42 int ytile = (y < tiles->y[1])? 0: ((y < tiles->y[2])? 1: 2);
43 int tile = ytile*3+xtile;
45 int offset = tiles->offsets[tile];
46 int stride = tiles->strides[tile];
47 ccl_global float ccl_restrict_ptr center_buffer = (ccl_global float*) tiles->buffers[tile];
48 center_buffer += (y*stride + x + offset)*buffer_pass_stride;
49 center_buffer += buffer_denoising_offset + 14;
51 int buffer_w = align_up(rect.z - rect.x, 4);
52 int idx = (y-rect.y)*buffer_w + (x - rect.x);
53 unfilteredA[idx] = center_buffer[1] / max(center_buffer[0], 1e-7f);
54 unfilteredB[idx] = center_buffer[4] / max(center_buffer[3], 1e-7f);
56 float varA = center_buffer[2];
57 float varB = center_buffer[5];
58 int odd_sample = (sample+1)/2;
59 int even_sample = sample/2;
60 if(use_split_variance) {
61 varA = max(0.0f, varA - unfilteredA[idx]*unfilteredA[idx]*odd_sample);
62 varB = max(0.0f, varB - unfilteredB[idx]*unfilteredB[idx]*even_sample);
64 varA /= (odd_sample - 1);
65 varB /= (even_sample - 1);
67 sampleVariance[idx] = 0.5f*(varA + varB) / sample;
68 sampleVarianceV[idx] = 0.5f * (varA - varB) * (varA - varB) / (sample*sample);
69 bufferVariance[idx] = 0.5f * (unfilteredA[idx] - unfilteredB[idx]) * (unfilteredA[idx] - unfilteredB[idx]);
72 /* Load a regular feature from the render buffers into the denoise buffer.
74 * - sample: The sample amount in the buffer, used to normalize the buffer.
75 * - m_offset, v_offset: Render Buffer Pass offsets of mean and variance of the feature.
76 * - x, y: Current pixel
77 * - mean, variance: Target denoise buffers.
78 * - rect: The prefilter area (lower pixels inclusive, upper pixels exclusive).
80 ccl_device void kernel_filter_get_feature(int sample,
81 ccl_global TilesInfo *tiles,
82 int m_offset, int v_offset,
84 ccl_global float *mean,
85 ccl_global float *variance,
86 int4 rect, int buffer_pass_stride,
87 int buffer_denoising_offset,
88 bool use_split_variance)
90 int xtile = (x < tiles->x[1])? 0: ((x < tiles->x[2])? 1: 2);
91 int ytile = (y < tiles->y[1])? 0: ((y < tiles->y[2])? 1: 2);
92 int tile = ytile*3+xtile;
93 ccl_global float *center_buffer = ((ccl_global float*) tiles->buffers[tile]) + (tiles->offsets[tile] + y*tiles->strides[tile] + x)*buffer_pass_stride + buffer_denoising_offset;
95 int buffer_w = align_up(rect.z - rect.x, 4);
96 int idx = (y-rect.y)*buffer_w + (x - rect.x);
98 mean[idx] = center_buffer[m_offset] / sample;
99 if(use_split_variance) {
100 variance[idx] = max(0.0f, (center_buffer[v_offset] - mean[idx]*mean[idx]*sample) / (sample * (sample-1)));
103 variance[idx] = center_buffer[v_offset] / (sample * (sample-1));
107 ccl_device void kernel_filter_detect_outliers(int x, int y,
108 ccl_global float *image,
109 ccl_global float *variance,
110 ccl_global float *depth,
111 ccl_global float *out,
115 int buffer_w = align_up(rect.z - rect.x, 4);
119 for(int y1 = max(y-2, rect.y); y1 < min(y+3, rect.w); y1++) {
120 for(int x1 = max(x-2, rect.x); x1 < min(x+3, rect.z); x1++) {
121 int idx = (y1-rect.y)*buffer_w + (x1-rect.x);
122 float L = average(make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]));
124 /* Find the position of L. */
126 for(i = 0; i < n; i++) {
127 if(values[i] > L) break;
129 /* Make space for L by shifting all following values to the right. */
130 for(int j = n; j > i; j--) {
131 values[j] = values[j-1];
139 int idx = (y-rect.y)*buffer_w + (x-rect.x);
140 float L = average(make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]));
142 float ref = 2.0f*values[(int)(n*0.75f)];
145 /* If the pixel is an outlier, negate the depth value to mark it as one.
146 * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
147 depth[idx] = -depth[idx];
149 variance[idx ] *= fac*fac;
150 variance[idx + pass_stride] *= fac*fac;
151 variance[idx+2*pass_stride] *= fac*fac;
153 out[idx ] = fac*image[idx];
154 out[idx + pass_stride] = fac*image[idx + pass_stride];
155 out[idx+2*pass_stride] = fac*image[idx+2*pass_stride];
158 /* Combine A/B buffers.
159 * Calculates the combined mean and the buffer variance. */
160 ccl_device void kernel_filter_combine_halves(int x, int y,
161 ccl_global float *mean,
162 ccl_global float *variance,
167 int buffer_w = align_up(rect.z - rect.x, 4);
168 int idx = (y-rect.y)*buffer_w + (x - rect.x);
170 if(mean) mean[idx] = 0.5f * (a[idx]+b[idx]);
172 if(r == 0) variance[idx] = 0.25f * (a[idx]-b[idx])*(a[idx]-b[idx]);
174 variance[idx] = 0.0f;
177 for(int py = max(y-r, rect.y); py < min(y+r+1, rect.w); py++) {
178 for(int px = max(x-r, rect.x); px < min(x+r+1, rect.z); px++) {
179 int pidx = (py-rect.y)*buffer_w + (px-rect.x);
180 values[numValues++] = 0.25f * (a[pidx]-b[pidx])*(a[pidx]-b[pidx]);
183 /* Insertion-sort the variances (fast enough for 25 elements). */
184 for(int i = 1; i < numValues; i++) {
187 for(j = i-1; j >= 0 && values[j] > v; j--)
188 values[j+1] = values[j];
191 variance[idx] = values[(7*numValues)/8];