68304e14143a2ecbb05289554f0723c4171e09f2
[blender.git] / intern / cycles / kernel / filter / filter_transform_gpu.h
1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 ccl_device void kernel_filter_construct_transform(ccl_global float ccl_restrict_ptr buffer,
20                                                   int x, int y, int4 rect,
21                                                   int pass_stride,
22                                                   ccl_global float *transform,
23                                                   ccl_global int *rank,
24                                                   int radius, float pca_threshold,
25                                                   int transform_stride, int localIdx)
26 {
27         int buffer_w = align_up(rect.z - rect.x, 4);
28
29 #ifdef __KERNEL_CUDA__
30         ccl_local float shared_features[DENOISE_FEATURES*CCL_MAX_LOCAL_SIZE];
31         ccl_local_param float *features = shared_features + localIdx*DENOISE_FEATURES;
32 #else
33         float features[DENOISE_FEATURES];
34 #endif
35
36         /* === Calculate denoising window. === */
37         int2 low  = make_int2(max(rect.x, x - radius),
38                               max(rect.y, y - radius));
39         int2 high = make_int2(min(rect.z, x + radius + 1),
40                               min(rect.w, y + radius + 1));
41         ccl_global float ccl_restrict_ptr pixel_buffer;
42         int2 pixel;
43
44
45
46
47         /* === Shift feature passes to have mean 0. === */
48         float feature_means[DENOISE_FEATURES];
49         math_vector_zero(feature_means, DENOISE_FEATURES);
50         FOR_PIXEL_WINDOW {
51                 filter_get_features(pixel, pixel_buffer, features, NULL, pass_stride);
52                 math_vector_add(feature_means, features, DENOISE_FEATURES);
53         } END_FOR_PIXEL_WINDOW
54
55         float pixel_scale = 1.0f / ((high.y - low.y) * (high.x - low.x));
56         math_vector_scale(feature_means, pixel_scale, DENOISE_FEATURES);
57
58         /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */
59         float feature_scale[DENOISE_FEATURES];
60         math_vector_zero(feature_scale, DENOISE_FEATURES);
61
62         FOR_PIXEL_WINDOW {
63                 filter_get_feature_scales(pixel, pixel_buffer, features, feature_means, pass_stride);
64                 math_vector_max(feature_scale, features, DENOISE_FEATURES);
65         } END_FOR_PIXEL_WINDOW
66
67         filter_calculate_scale(feature_scale);
68
69
70
71         /* === Generate the feature transformation. ===
72          * This transformation maps the DENOISE_FEATURES-dimentional feature space to a reduced feature (r-feature) space
73          * which generally has fewer dimensions. This mainly helps to prevent overfitting. */
74         float feature_matrix[DENOISE_FEATURES*DENOISE_FEATURES];
75         math_matrix_zero(feature_matrix, DENOISE_FEATURES);
76         FOR_PIXEL_WINDOW {
77                 filter_get_features(pixel, pixel_buffer, features, feature_means, pass_stride);
78                 math_vector_mul(features, feature_scale, DENOISE_FEATURES);
79                 math_matrix_add_gramian(feature_matrix, DENOISE_FEATURES, features, 1.0f);
80         } END_FOR_PIXEL_WINDOW
81
82         math_matrix_jacobi_eigendecomposition(feature_matrix, transform, DENOISE_FEATURES, transform_stride);
83         *rank = 0;
84         if(pca_threshold < 0.0f) {
85                 float threshold_energy = 0.0f;
86                 for(int i = 0; i < DENOISE_FEATURES; i++) {
87                         threshold_energy += feature_matrix[i*DENOISE_FEATURES+i];
88                 }
89                 threshold_energy *= 1.0f - (-pca_threshold);
90
91                 float reduced_energy = 0.0f;
92                 for(int i = 0; i < DENOISE_FEATURES; i++, (*rank)++) {
93                         if(i >= 2 && reduced_energy >= threshold_energy)
94                                 break;
95                         float s = feature_matrix[i*DENOISE_FEATURES+i];
96                         reduced_energy += s;
97                 }
98         }
99         else {
100                 for(int i = 0; i < DENOISE_FEATURES; i++, (*rank)++) {
101                         float s = feature_matrix[i*DENOISE_FEATURES+i];
102                         if(i >= 2 && sqrtf(s) < pca_threshold)
103                                 break;
104                 }
105         }
106
107         math_matrix_transpose(transform, DENOISE_FEATURES, transform_stride);
108
109         /* Bake the feature scaling into the transformation matrix. */
110         for(int i = 0; i < DENOISE_FEATURES; i++) {
111                 for(int j = 0; j < (*rank); j++) {
112                         transform[(i*DENOISE_FEATURES + j)*transform_stride] *= feature_scale[i];
113                 }
114         }
115 }
116
117 CCL_NAMESPACE_END