ed3a92f6241611c07699acf16770d193bbcf8fc6
[blender.git] / intern / cycles / kernel / filter / filter_transform_sse.h
1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 ccl_device void kernel_filter_construct_transform(float ccl_restrict_ptr buffer,
20                                                   int x, int y, int4 rect,
21                                                   int pass_stride,
22                                                   float *transform, int *rank,
23                                                   int radius, float pca_threshold)
24 {
25         int buffer_w = align_up(rect.z - rect.x, 4);
26
27         __m128 features[DENOISE_FEATURES];
28         float ccl_restrict_ptr pixel_buffer;
29         int2 pixel;
30
31         int2 low  = make_int2(max(rect.x, x - radius),
32                               max(rect.y, y - radius));
33         int2 high = make_int2(min(rect.z, x + radius + 1),
34                               min(rect.w, y + radius + 1));
35
36         __m128 feature_means[DENOISE_FEATURES];
37         math_vector_zero_sse(feature_means, DENOISE_FEATURES);
38         FOR_PIXEL_WINDOW_SSE {
39                 filter_get_features_sse(x4, y4, active_pixels, pixel_buffer, features, NULL, pass_stride);
40                 math_vector_add_sse(feature_means, DENOISE_FEATURES, features);
41         } END_FOR_PIXEL_WINDOW_SSE
42
43         __m128 pixel_scale = _mm_set1_ps(1.0f / ((high.y - low.y) * (high.x - low.x)));
44         for(int i = 0; i < DENOISE_FEATURES; i++) {
45                 feature_means[i] = _mm_mul_ps(_mm_hsum_ps(feature_means[i]), pixel_scale);
46         }
47
48         __m128 feature_scale[DENOISE_FEATURES];
49         math_vector_zero_sse(feature_scale, DENOISE_FEATURES);
50         FOR_PIXEL_WINDOW_SSE {
51                 filter_get_feature_scales_sse(x4, y4, active_pixels, pixel_buffer, features, feature_means, pass_stride);
52                 math_vector_max_sse(feature_scale, features, DENOISE_FEATURES);
53         } END_FOR_PIXEL_WINDOW_SSE
54
55         filter_calculate_scale_sse(feature_scale);
56
57         __m128 feature_matrix_sse[DENOISE_FEATURES*DENOISE_FEATURES];
58         math_matrix_zero_sse(feature_matrix_sse, DENOISE_FEATURES);
59         FOR_PIXEL_WINDOW_SSE {
60                 filter_get_features_sse(x4, y4, active_pixels, pixel_buffer, features, feature_means, pass_stride);
61                 math_vector_mul_sse(features, DENOISE_FEATURES, feature_scale);
62                 math_matrix_add_gramian_sse(feature_matrix_sse, DENOISE_FEATURES, features, _mm_set1_ps(1.0f));
63         } END_FOR_PIXEL_WINDOW_SSE
64
65         float feature_matrix[DENOISE_FEATURES*DENOISE_FEATURES];
66         math_matrix_hsum(feature_matrix, DENOISE_FEATURES, feature_matrix_sse);
67
68         math_matrix_jacobi_eigendecomposition(feature_matrix, transform, DENOISE_FEATURES, 1);
69
70         *rank = 0;
71         if(pca_threshold < 0.0f) {
72                 float threshold_energy = 0.0f;
73                 for(int i = 0; i < DENOISE_FEATURES; i++) {
74                         threshold_energy += feature_matrix[i*DENOISE_FEATURES+i];
75                 }
76                 threshold_energy *= 1.0f - (-pca_threshold);
77
78                 float reduced_energy = 0.0f;
79                 for(int i = 0; i < DENOISE_FEATURES; i++, (*rank)++) {
80                         if(i >= 2 && reduced_energy >= threshold_energy)
81                                 break;
82                         float s = feature_matrix[i*DENOISE_FEATURES+i];
83                         reduced_energy += s;
84                 }
85         }
86         else {
87                 for(int i = 0; i < DENOISE_FEATURES; i++, (*rank)++) {
88                         float s = feature_matrix[i*DENOISE_FEATURES+i];
89                         if(i >= 2 && sqrtf(s) < pca_threshold)
90                                 break;
91                 }
92         }
93
94         math_matrix_transpose(transform, DENOISE_FEATURES, 1);
95
96         /* Bake the feature scaling into the transformation matrix. */
97         for(int i = 0; i < DENOISE_FEATURES; i++) {
98                 math_vector_scale(transform + i*DENOISE_FEATURES, _mm_cvtss_f32(feature_scale[i]), *rank);
99         }
100 }
101
102 CCL_NAMESPACE_END