8ab4c724829c6e4cc35790a17ad4f97d0485fff6
[blender.git] / intern / cycles / kernel / kernel_passes.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, int sample, float value)
20 {
21         ccl_global float *buf = buffer;
22 #if defined(__SPLIT_KERNEL__)
23         atomic_add_and_fetch_float(buf, value);
24 #else
25         *buf = (sample == 0)? value: *buf + value;
26 #endif  /* __SPLIT_KERNEL__ */
27 }
28
29 ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, int sample, float3 value)
30 {
31 #if defined(__SPLIT_KERNEL__)
32         ccl_global float *buf_x = buffer + 0;
33         ccl_global float *buf_y = buffer + 1;
34         ccl_global float *buf_z = buffer + 2;
35
36         atomic_add_and_fetch_float(buf_x, value.x);
37         atomic_add_and_fetch_float(buf_y, value.y);
38         atomic_add_and_fetch_float(buf_z, value.z);
39 #else
40         ccl_global float3 *buf = (ccl_global float3*)buffer;
41         *buf = (sample == 0)? value: *buf + value;
42 #endif  /* __SPLIT_KERNEL__ */
43 }
44
45 ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, int sample, float4 value)
46 {
47 #if defined(__SPLIT_KERNEL__)
48         ccl_global float *buf_x = buffer + 0;
49         ccl_global float *buf_y = buffer + 1;
50         ccl_global float *buf_z = buffer + 2;
51         ccl_global float *buf_w = buffer + 3;
52
53         atomic_add_and_fetch_float(buf_x, value.x);
54         atomic_add_and_fetch_float(buf_y, value.y);
55         atomic_add_and_fetch_float(buf_z, value.z);
56         atomic_add_and_fetch_float(buf_w, value.w);
57 #else
58         ccl_global float4 *buf = (ccl_global float4*)buffer;
59         *buf = (sample == 0)? value: *buf + value;
60 #endif  /* __SPLIT_KERNEL__ */
61 }
62
63 #ifdef __DENOISING_FEATURES__
64 ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, int sample, float value)
65 {
66         kernel_write_pass_float(buffer, sample, value);
67
68         /* The online one-pass variance update that's used for the megakernel can't easily be implemented
69          * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */
70 #  ifdef __SPLIT_KERNEL__
71         kernel_write_pass_float(buffer+1, sample, value*value);
72 #  else
73         if(sample == 0) {
74                 kernel_write_pass_float(buffer+1, sample, 0.0f);
75         }
76         else {
77                 float new_mean = buffer[0] * (1.0f / (sample + 1));
78                 float old_mean = (buffer[0] - value) * (1.0f / sample);
79                 kernel_write_pass_float(buffer+1, sample, (value - new_mean) * (value - old_mean));
80         }
81 #  endif
82 }
83
84 #  if defined(__SPLIT_KERNEL__)
85 #    define kernel_write_pass_float3_unaligned kernel_write_pass_float3
86 #  else
87 ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, int sample, float3 value)
88 {
89         buffer[0] = (sample == 0)? value.x: buffer[0] + value.x;
90         buffer[1] = (sample == 0)? value.y: buffer[1] + value.y;
91         buffer[2] = (sample == 0)? value.z: buffer[2] + value.z;
92 }
93 #  endif
94
95 ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, int sample, float3 value)
96 {
97         kernel_write_pass_float3_unaligned(buffer, sample, value);
98 #  ifdef __SPLIT_KERNEL__
99         kernel_write_pass_float3_unaligned(buffer+3, sample, value*value);
100 #  else
101         if(sample == 0) {
102                 kernel_write_pass_float3_unaligned(buffer+3, sample, make_float3(0.0f, 0.0f, 0.0f));
103         }
104         else {
105                 float3 sum = make_float3(buffer[0], buffer[1], buffer[2]);
106                 float3 new_mean = sum * (1.0f / (sample + 1));
107                 float3 old_mean = (sum - value) * (1.0f / sample);
108                 kernel_write_pass_float3_unaligned(buffer+3, sample, (value - new_mean) * (value - old_mean));
109         }
110 #  endif
111 }
112
113 ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_global float *buffer,
114         int sample, float path_total, float path_total_shaded)
115 {
116         if(kernel_data.film.pass_denoising_data == 0)
117                 return;
118
119         buffer += (sample & 1)? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A;
120
121         path_total = ensure_finite(path_total);
122         path_total_shaded = ensure_finite(path_total_shaded);
123
124         kernel_write_pass_float(buffer, sample/2, path_total);
125         kernel_write_pass_float(buffer+1, sample/2, path_total_shaded);
126
127         float value = path_total_shaded / max(path_total, 1e-7f);
128 #  ifdef __SPLIT_KERNEL__
129         kernel_write_pass_float(buffer+2, sample/2, value*value);
130 #  else
131         if(sample < 2) {
132                 kernel_write_pass_float(buffer+2, sample/2, 0.0f);
133         }
134         else {
135                 float old_value = (buffer[1] - path_total_shaded) / max(buffer[0] - path_total, 1e-7f);
136                 float new_value = buffer[1] / max(buffer[0], 1e-7f);
137                 kernel_write_pass_float(buffer+2, sample, (value - new_value) * (value - old_value));
138         }
139 #  endif
140 }
141 #endif /* __DENOISING_FEATURES__ */
142
143 ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
144                                                         ShaderData *sd,
145                                                         ccl_global PathState *state,
146                                                         PathRadiance *L)
147 {
148 #ifdef __DENOISING_FEATURES__
149         if(state->denoising_feature_weight == 0.0f) {
150                 return;
151         }
152
153         L->denoising_depth += ensure_finite(state->denoising_feature_weight * sd->ray_length);
154
155         float3 normal = make_float3(0.0f, 0.0f, 0.0f);
156         float3 albedo = make_float3(0.0f, 0.0f, 0.0f);
157         float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f;
158
159         for(int i = 0; i < sd->num_closure; i++) {
160                 ShaderClosure *sc = &sd->closure[i];
161
162                 if(!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
163                         continue;
164
165                 /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */
166                 normal += sc->N * sc->sample_weight;
167                 sum_weight += sc->sample_weight;
168                 if(!bsdf_is_specular_like(sc)) {
169                         albedo += sc->weight;
170                         sum_nonspecular_weight += sc->sample_weight;
171                 }
172         }
173
174         /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */
175         if((sum_weight == 0.0f) || (sum_nonspecular_weight*4.0f > sum_weight)) {
176                 if(sum_weight != 0.0f) {
177                         normal /= sum_weight;
178                 }
179                 L->denoising_normal += ensure_finite3(state->denoising_feature_weight * normal);
180                 L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * albedo);
181
182                 state->denoising_feature_weight = 0.0f;
183         }
184 #else
185         (void) kg;
186         (void) sd;
187         (void) state;
188         (void) L;
189 #endif  /* __DENOISING_FEATURES__ */
190 }
191
192 ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L,
193         ShaderData *sd, int sample, ccl_addr_space PathState *state, float3 throughput)
194 {
195 #ifdef __PASSES__
196         int path_flag = state->flag;
197
198         if(!(path_flag & PATH_RAY_CAMERA))
199                 return;
200
201         int flag = kernel_data.film.pass_flag;
202
203         if(!(flag & PASS_ALL))
204                 return;
205         
206         if(!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
207                 if(!(sd->flag & SD_TRANSPARENT) ||
208                    kernel_data.film.pass_alpha_threshold == 0.0f ||
209                    average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold)
210                 {
211
212                         if(sample == 0) {
213                                 if(flag & PASS_DEPTH) {
214                                         float depth = camera_distance(kg, sd->P);
215                                         kernel_write_pass_float(buffer + kernel_data.film.pass_depth, sample, depth);
216                                 }
217                                 if(flag & PASS_OBJECT_ID) {
218                                         float id = object_pass_id(kg, sd->object);
219                                         kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, sample, id);
220                                 }
221                                 if(flag & PASS_MATERIAL_ID) {
222                                         float id = shader_pass_id(kg, sd);
223                                         kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, sample, id);
224                                 }
225                         }
226
227                         if(flag & PASS_NORMAL) {
228                                 float3 normal = sd->N;
229                                 kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, sample, normal);
230                         }
231                         if(flag & PASS_UV) {
232                                 float3 uv = primitive_uv(kg, sd);
233                                 kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, sample, uv);
234                         }
235                         if(flag & PASS_MOTION) {
236                                 float4 speed = primitive_motion_vector(kg, sd);
237                                 kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, sample, speed);
238                                 kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, sample, 1.0f);
239                         }
240
241                         state->flag |= PATH_RAY_SINGLE_PASS_DONE;
242                 }
243         }
244
245         if(flag & (PASS_DIFFUSE_INDIRECT|PASS_DIFFUSE_COLOR|PASS_DIFFUSE_DIRECT))
246                 L->color_diffuse += shader_bsdf_diffuse(kg, sd)*throughput;
247         if(flag & (PASS_GLOSSY_INDIRECT|PASS_GLOSSY_COLOR|PASS_GLOSSY_DIRECT))
248                 L->color_glossy += shader_bsdf_glossy(kg, sd)*throughput;
249         if(flag & (PASS_TRANSMISSION_INDIRECT|PASS_TRANSMISSION_COLOR|PASS_TRANSMISSION_DIRECT))
250                 L->color_transmission += shader_bsdf_transmission(kg, sd)*throughput;
251         if(flag & (PASS_SUBSURFACE_INDIRECT|PASS_SUBSURFACE_COLOR|PASS_SUBSURFACE_DIRECT))
252                 L->color_subsurface += shader_bsdf_subsurface(kg, sd)*throughput;
253
254         if(flag & PASS_MIST) {
255                 /* bring depth into 0..1 range */
256                 float mist_start = kernel_data.film.mist_start;
257                 float mist_inv_depth = kernel_data.film.mist_inv_depth;
258
259                 float depth = camera_distance(kg, sd->P);
260                 float mist = saturate((depth - mist_start)*mist_inv_depth);
261
262                 /* falloff */
263                 float mist_falloff = kernel_data.film.mist_falloff;
264
265                 if(mist_falloff == 1.0f)
266                         ;
267                 else if(mist_falloff == 2.0f)
268                         mist = mist*mist;
269                 else if(mist_falloff == 0.5f)
270                         mist = sqrtf(mist);
271                 else
272                         mist = powf(mist, mist_falloff);
273
274                 /* modulate by transparency */
275                 float3 alpha = shader_bsdf_alpha(kg, sd);
276                 L->mist += (1.0f - mist)*average(throughput*alpha);
277         }
278 #endif
279 }
280
281 ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L, int sample)
282 {
283 #ifdef __PASSES__
284         int flag = kernel_data.film.pass_flag;
285
286         if(!kernel_data.film.use_light_pass)
287                 return;
288         
289         if(flag & PASS_DIFFUSE_INDIRECT)
290                 kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, sample, L->indirect_diffuse);
291         if(flag & PASS_GLOSSY_INDIRECT)
292                 kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, sample, L->indirect_glossy);
293         if(flag & PASS_TRANSMISSION_INDIRECT)
294                 kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect, sample, L->indirect_transmission);
295         if(flag & PASS_SUBSURFACE_INDIRECT)
296                 kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect, sample, L->indirect_subsurface);
297         if(flag & PASS_DIFFUSE_DIRECT)
298                 kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, sample, L->direct_diffuse);
299         if(flag & PASS_GLOSSY_DIRECT)
300                 kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, sample, L->direct_glossy);
301         if(flag & PASS_TRANSMISSION_DIRECT)
302                 kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct, sample, L->direct_transmission);
303         if(flag & PASS_SUBSURFACE_DIRECT)
304                 kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct, sample, L->direct_subsurface);
305
306         if(flag & PASS_EMISSION)
307                 kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, sample, L->emission);
308         if(flag & PASS_BACKGROUND)
309                 kernel_write_pass_float3(buffer + kernel_data.film.pass_background, sample, L->background);
310         if(flag & PASS_AO)
311                 kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, sample, L->ao);
312
313         if(flag & PASS_DIFFUSE_COLOR)
314                 kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, sample, L->color_diffuse);
315         if(flag & PASS_GLOSSY_COLOR)
316                 kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, sample, L->color_glossy);
317         if(flag & PASS_TRANSMISSION_COLOR)
318                 kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, sample, L->color_transmission);
319         if(flag & PASS_SUBSURFACE_COLOR)
320                 kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, sample, L->color_subsurface);
321         if(flag & PASS_SHADOW) {
322                 float4 shadow = L->shadow;
323                 shadow.w = kernel_data.film.pass_shadow_scale;
324                 kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, sample, shadow);
325         }
326         if(flag & PASS_MIST)
327                 kernel_write_pass_float(buffer + kernel_data.film.pass_mist, sample, 1.0f - L->mist);
328 #endif
329 }
330
331 ccl_device_inline void kernel_write_result(KernelGlobals *kg, ccl_global float *buffer,
332         int sample, PathRadiance *L, float alpha, bool is_shadow_catcher)
333 {
334         if(L) {
335                 float3 L_sum;
336 #ifdef __SHADOW_TRICKS__
337                 if(is_shadow_catcher) {
338                         L_sum = path_radiance_sum_shadowcatcher(kg, L, &alpha);
339                 }
340                 else
341 #endif  /* __SHADOW_TRICKS__ */
342                 {
343                         L_sum = path_radiance_clamp_and_sum(kg, L);
344                 }
345
346                 kernel_write_pass_float4(buffer, sample, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha));
347
348                 kernel_write_light_passes(kg, buffer, L, sample);
349
350 #ifdef __DENOISING_FEATURES__
351                 if(kernel_data.film.pass_denoising_data) {
352 #  ifdef __SHADOW_TRICKS__
353                         kernel_write_denoising_shadow(kg, buffer + kernel_data.film.pass_denoising_data, sample, average(L->path_total), average(L->path_total_shaded));
354 #  else
355                         kernel_write_denoising_shadow(kg, buffer + kernel_data.film.pass_denoising_data, sample, 0.0f, 0.0f);
356 #  endif
357                         if(kernel_data.film.pass_denoising_clean) {
358                                 float3 noisy, clean;
359                                 path_radiance_split_denoising(kg, L, &noisy, &clean);
360                                 kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR,
361                                                                   sample, noisy);
362                                 kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean,
363                                                                    sample, clean);
364                         }
365                         else {
366                                 kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR,
367                                                                   sample, L_sum);
368                         }
369
370                         kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL,
371                                                           sample, L->denoising_normal);
372                         kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO,
373                                                           sample, L->denoising_albedo);
374                         kernel_write_pass_float_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH,
375                                                          sample, L->denoising_depth);
376                 }
377 #endif  /* __DENOISING_FEATURES__ */
378         }
379         else {
380                 kernel_write_pass_float4(buffer, sample, make_float4(0.0f, 0.0f, 0.0f, 0.0f));
381
382 #ifdef __DENOISING_FEATURES__
383                 if(kernel_data.film.pass_denoising_data) {
384                         kernel_write_denoising_shadow(kg, buffer + kernel_data.film.pass_denoising_data, sample, 0.0f, 0.0f);
385
386                         kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR,
387                                                           sample, make_float3(0.0f, 0.0f, 0.0f));
388
389                         kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL,
390                                                           sample, make_float3(0.0f, 0.0f, 0.0f));
391                         kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO,
392                                                           sample, make_float3(0.0f, 0.0f, 0.0f));
393                         kernel_write_pass_float_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH,
394                                                          sample, 0.0f);
395
396                         if(kernel_data.film.pass_denoising_clean) {
397                                 kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean,
398                                                                    sample, make_float3(0.0f, 0.0f, 0.0f));
399                         }
400                 }
401 #endif  /* __DENOISING_FEATURES__ */
402         }
403 }
404
405 CCL_NAMESPACE_END
406