\0;115;0cCycles: Cleanup, use ccl_restrict instead of ccl_restrict_ptr
[blender.git] / intern / cycles / kernel / filter / filter_reconstruction.h
1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
20                                                        int storage_stride,
21                                                        int dx, int dy,
22                                                        int w, int h,
23                                                        int pass_stride,
24                                                        const ccl_global float *ccl_restrict buffer,
25                                                        ccl_global float *color_pass,
26                                                        ccl_global float *variance_pass,
27                                                        const ccl_global float *ccl_restrict transform,
28                                                        ccl_global int *rank,
29                                                        float weight,
30                                                        ccl_global float *XtWX,
31                                                        ccl_global float3 *XtWY,
32                                                        int localIdx)
33 {
34         int p_offset =  y    *w +  x;
35         int q_offset = (y+dy)*w + (x+dx);
36
37 #ifdef __KERNEL_CPU__
38         const int stride = 1;
39         (void)storage_stride;
40         (void)localIdx;
41         float design_row[DENOISE_FEATURES+1];
42 #elif defined(__KERNEL_CUDA__)
43         const int stride = storage_stride;
44         ccl_local float shared_design_row[(DENOISE_FEATURES+1)*CCL_MAX_LOCAL_SIZE];
45         ccl_local_param float *design_row = shared_design_row + localIdx*(DENOISE_FEATURES+1);
46 #else
47         const int stride = storage_stride;
48         float design_row[DENOISE_FEATURES+1];
49 #endif
50
51         float3 p_color = filter_get_pixel_color(color_pass + p_offset, pass_stride);
52         float3 q_color = filter_get_pixel_color(color_pass + q_offset, pass_stride);
53
54         float p_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + p_offset, pass_stride));
55         float q_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + q_offset, pass_stride));
56
57         /* If the pixel was flagged as an outlier during prefiltering, skip it.
58          * Otherwise, perform the regular confidence interval test. */
59         if(ccl_get_feature(buffer + q_offset, 0) < 0.0f ||
60            average(fabs(p_color - q_color)) > 2.0f*(p_std_dev + q_std_dev + 1e-3f)) {
61                 return;
62         }
63
64         filter_get_design_row_transform(make_int2(x, y),       buffer + p_offset,
65                                         make_int2(x+dx, y+dy), buffer + q_offset,
66                                         pass_stride, *rank, design_row, transform, stride);
67
68         math_trimatrix_add_gramian_strided(XtWX, (*rank)+1, design_row, weight, stride);
69         math_vec3_add_strided(XtWY, (*rank)+1, design_row, weight * q_color, stride);
70 }
71
72 ccl_device_inline void kernel_filter_finalize(int x, int y, int w, int h,
73                                               ccl_global float *buffer,
74                                               ccl_global int *rank,
75                                               int storage_stride,
76                                               ccl_global float *XtWX,
77                                               ccl_global float3 *XtWY,
78                                               int4 buffer_params,
79                                               int sample)
80 {
81 #ifdef __KERNEL_CPU__
82         const int stride = 1;
83         (void)storage_stride;
84 #else
85         const int stride = storage_stride;
86 #endif
87
88         math_trimatrix_vec3_solve(XtWX, XtWY, (*rank)+1, stride);
89
90         float3 final_color = XtWY[0];
91
92         ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z;
93         final_color *= sample;
94         if(buffer_params.w) {
95                 final_color.x += combined_buffer[buffer_params.w+0];
96                 final_color.y += combined_buffer[buffer_params.w+1];
97                 final_color.z += combined_buffer[buffer_params.w+2];
98         }
99         combined_buffer[0] = final_color.x;
100         combined_buffer[1] = final_color.y;
101         combined_buffer[2] = final_color.z;
102 }
103
104 #undef STORAGE_TYPE
105
106 CCL_NAMESPACE_END