Cycles: Added Cryptomatte output.
[blender.git] / intern / cycles / kernel / kernel_passes.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #if defined(__SPLIT_KERNEL__) || defined(__KERNEL_CUDA__)
18 #define __ATOMIC_PASS_WRITE__
19 #endif
20
21 #include "kernel/kernel_id_passes.h"
22
23 CCL_NAMESPACE_BEGIN
24
25 ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, float value)
26 {
27         ccl_global float *buf = buffer;
28 #ifdef __ATOMIC_PASS_WRITE__
29         atomic_add_and_fetch_float(buf, value);
30 #else
31         *buf += value;
32 #endif
33 }
34
35 ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, float3 value)
36 {
37 #ifdef __ATOMIC_PASS_WRITE__
38         ccl_global float *buf_x = buffer + 0;
39         ccl_global float *buf_y = buffer + 1;
40         ccl_global float *buf_z = buffer + 2;
41
42         atomic_add_and_fetch_float(buf_x, value.x);
43         atomic_add_and_fetch_float(buf_y, value.y);
44         atomic_add_and_fetch_float(buf_z, value.z);
45 #else
46         ccl_global float3 *buf = (ccl_global float3*)buffer;
47         *buf += value;
48 #endif
49 }
50
51 ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, float4 value)
52 {
53 #ifdef __ATOMIC_PASS_WRITE__
54         ccl_global float *buf_x = buffer + 0;
55         ccl_global float *buf_y = buffer + 1;
56         ccl_global float *buf_z = buffer + 2;
57         ccl_global float *buf_w = buffer + 3;
58
59         atomic_add_and_fetch_float(buf_x, value.x);
60         atomic_add_and_fetch_float(buf_y, value.y);
61         atomic_add_and_fetch_float(buf_z, value.z);
62         atomic_add_and_fetch_float(buf_w, value.w);
63 #else
64         ccl_global float4 *buf = (ccl_global float4*)buffer;
65         *buf += value;
66 #endif
67 }
68
69 #ifdef __DENOISING_FEATURES__
70 ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, float value)
71 {
72         kernel_write_pass_float(buffer, value);
73
74         /* The online one-pass variance update that's used for the megakernel can't easily be implemented
75          * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */
76         kernel_write_pass_float(buffer+1, value*value);
77 }
78
79 #  ifdef __ATOMIC_PASS_WRITE__
80 #    define kernel_write_pass_float3_unaligned kernel_write_pass_float3
81 #  else
82 ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, float3 value)
83 {
84         buffer[0] += value.x;
85         buffer[1] += value.y;
86         buffer[2] += value.z;
87 }
88 #  endif
89
90 ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, float3 value)
91 {
92         kernel_write_pass_float3_unaligned(buffer, value);
93         kernel_write_pass_float3_unaligned(buffer+3, value*value);
94 }
95
96 ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_global float *buffer,
97         int sample, float path_total, float path_total_shaded)
98 {
99         if(kernel_data.film.pass_denoising_data == 0)
100                 return;
101
102         buffer += (sample & 1)? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A;
103
104         path_total = ensure_finite(path_total);
105         path_total_shaded = ensure_finite(path_total_shaded);
106
107         kernel_write_pass_float(buffer, path_total);
108         kernel_write_pass_float(buffer+1, path_total_shaded);
109
110         float value = path_total_shaded / max(path_total, 1e-7f);
111         kernel_write_pass_float(buffer+2, value*value);
112 }
113 #endif /* __DENOISING_FEATURES__ */
114
115 ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
116                                                         ShaderData *sd,
117                                                         ccl_addr_space PathState *state,
118                                                         PathRadiance *L)
119 {
120 #ifdef __DENOISING_FEATURES__
121         if(state->denoising_feature_weight == 0.0f) {
122                 return;
123         }
124
125         L->denoising_depth += ensure_finite(state->denoising_feature_weight * sd->ray_length);
126
127         /* Skip implicitly transparent surfaces. */
128         if(sd->flag & SD_HAS_ONLY_VOLUME) {
129                 return;
130         }
131
132         float3 normal = make_float3(0.0f, 0.0f, 0.0f);
133         float3 albedo = make_float3(0.0f, 0.0f, 0.0f);
134         float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f;
135
136         for(int i = 0; i < sd->num_closure; i++) {
137                 ShaderClosure *sc = &sd->closure[i];
138
139                 if(!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
140                         continue;
141
142                 /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */
143                 normal += sc->N * sc->sample_weight;
144                 sum_weight += sc->sample_weight;
145                 if(bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) {
146                         albedo += sc->weight;
147                         sum_nonspecular_weight += sc->sample_weight;
148                 }
149         }
150
151         /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */
152         if((sum_weight == 0.0f) || (sum_nonspecular_weight*4.0f > sum_weight)) {
153                 if(sum_weight != 0.0f) {
154                         normal /= sum_weight;
155                 }
156                 L->denoising_normal += ensure_finite3(state->denoising_feature_weight * normal);
157                 L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * albedo);
158
159                 state->denoising_feature_weight = 0.0f;
160         }
161 #else
162         (void) kg;
163         (void) sd;
164         (void) state;
165         (void) L;
166 #endif  /* __DENOISING_FEATURES__ */
167 }
168
169 #ifdef __KERNEL_DEBUG__
170 ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
171                                                  ccl_global float *buffer,
172                                                  PathRadiance *L)
173 {
174         int flag = kernel_data.film.pass_flag;
175         if(flag & PASSMASK(BVH_TRAVERSED_NODES)) {
176                 kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes,
177                                         L->debug_data.num_bvh_traversed_nodes);
178         }
179         if(flag & PASSMASK(BVH_TRAVERSED_INSTANCES)) {
180                 kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances,
181                                         L->debug_data.num_bvh_traversed_instances);
182         }
183         if(flag & PASSMASK(BVH_INTERSECTIONS)) {
184                 kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections,
185                                         L->debug_data.num_bvh_intersections);
186         }
187         if(flag & PASSMASK(RAY_BOUNCES)) {
188                 kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces,
189                                         L->debug_data.num_ray_bounces);
190         }
191 }
192 #endif /* __KERNEL_DEBUG__ */
193
194 #ifdef __KERNEL_CPU__
195 #define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) kernel_write_id_pass_cpu(buffer, depth * 2, id, matte_weight, kg->coverage_##name)
196 ccl_device_inline size_t kernel_write_id_pass_cpu(float *buffer, size_t depth, float id, float matte_weight, CoverageMap *map)
197 {
198         if(map) {
199                 (*map)[id] += matte_weight;
200                 return 0;
201         }
202 #else /* __KERNEL_CPU__ */
203 #define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) kernel_write_id_slots_gpu(buffer, depth * 2, id, matte_weight) 
204 ccl_device_inline size_t kernel_write_id_slots_gpu(ccl_global float *buffer, size_t depth, float id, float matte_weight)
205 {
206 #endif /* __KERNEL_CPU__ */
207         kernel_write_id_slots(buffer, depth, id, matte_weight);
208         return depth * 2;
209 }
210
211 ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L,
212         ShaderData *sd, ccl_addr_space PathState *state, float3 throughput)
213 {
214 #ifdef __PASSES__
215         int path_flag = state->flag;
216
217         if(!(path_flag & PATH_RAY_CAMERA))
218                 return;
219
220         int flag = kernel_data.film.pass_flag;
221         int light_flag = kernel_data.film.light_pass_flag;
222
223         if(!((flag | light_flag) & PASS_ANY))
224                 return;
225
226         if(!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
227                 if(!(sd->flag & SD_TRANSPARENT) ||
228                    kernel_data.film.pass_alpha_threshold == 0.0f ||
229                    average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold)
230                 {
231                         if(state->sample == 0) {
232                                 if(flag & PASSMASK(DEPTH)) {
233                                         float depth = camera_distance(kg, sd->P);
234                                         kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
235                                 }
236                                 if(flag & PASSMASK(OBJECT_ID)) {
237                                         float id = object_pass_id(kg, sd->object);
238                                         kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id);
239                                 }
240                                 if(flag & PASSMASK(MATERIAL_ID)) {
241                                         float id = shader_pass_id(kg, sd);
242                                         kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id);
243                                 }
244                         }
245
246                         if(flag & PASSMASK(NORMAL)) {
247                                 float3 normal = shader_bsdf_average_normal(kg, sd);
248                                 kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
249                         }
250                         if(flag & PASSMASK(UV)) {
251                                 float3 uv = primitive_uv(kg, sd);
252                                 kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv);
253                         }
254                         if(flag & PASSMASK(MOTION)) {
255                                 float4 speed = primitive_motion_vector(kg, sd);
256                                 kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed);
257                                 kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f);
258                         }
259
260                         state->flag |= PATH_RAY_SINGLE_PASS_DONE;
261                 }
262         }
263
264         if(kernel_data.film.cryptomatte_passes) {
265                 const float matte_weight = average(throughput) * (1.0f - average(shader_bsdf_transparency(kg, sd)));
266                 if(matte_weight > 0.0f) {
267                         ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
268                         if(kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
269                                 float id = object_cryptomatte_id(kg, sd->object);
270                                 cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, object);
271                         }
272                         if(kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
273                                 float id = shader_cryptomatte_id(kg, sd->shader);
274                                 cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, material);
275                         }
276                         if(kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
277                                 float id = object_cryptomatte_asset_id(kg, sd->object);
278                                 cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, asset);
279                         }
280                 }
281         }
282
283
284         if(light_flag & PASSMASK_COMPONENT(DIFFUSE))
285                 L->color_diffuse += shader_bsdf_diffuse(kg, sd)*throughput;
286         if(light_flag & PASSMASK_COMPONENT(GLOSSY))
287                 L->color_glossy += shader_bsdf_glossy(kg, sd)*throughput;
288         if(light_flag & PASSMASK_COMPONENT(TRANSMISSION))
289                 L->color_transmission += shader_bsdf_transmission(kg, sd)*throughput;
290         if(light_flag & PASSMASK_COMPONENT(SUBSURFACE))
291                 L->color_subsurface += shader_bsdf_subsurface(kg, sd)*throughput;
292
293         if(light_flag & PASSMASK(MIST)) {
294                 /* bring depth into 0..1 range */
295                 float mist_start = kernel_data.film.mist_start;
296                 float mist_inv_depth = kernel_data.film.mist_inv_depth;
297
298                 float depth = camera_distance(kg, sd->P);
299                 float mist = saturate((depth - mist_start)*mist_inv_depth);
300
301                 /* falloff */
302                 float mist_falloff = kernel_data.film.mist_falloff;
303
304                 if(mist_falloff == 1.0f)
305                         ;
306                 else if(mist_falloff == 2.0f)
307                         mist = mist*mist;
308                 else if(mist_falloff == 0.5f)
309                         mist = sqrtf(mist);
310                 else
311                         mist = powf(mist, mist_falloff);
312
313                 /* modulate by transparency */
314                 float3 alpha = shader_bsdf_alpha(kg, sd);
315                 L->mist += (1.0f - mist)*average(throughput*alpha);
316         }
317 #endif
318 }
319
320 ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L)
321 {
322 #ifdef __PASSES__
323         int light_flag = kernel_data.film.light_pass_flag;
324
325         if(!kernel_data.film.use_light_pass)
326                 return;
327
328         if(light_flag & PASSMASK(DIFFUSE_INDIRECT))
329                 kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, L->indirect_diffuse);
330         if(light_flag & PASSMASK(GLOSSY_INDIRECT))
331                 kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, L->indirect_glossy);
332         if(light_flag & PASSMASK(TRANSMISSION_INDIRECT))
333                 kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect, L->indirect_transmission);
334         if(light_flag & PASSMASK(SUBSURFACE_INDIRECT))
335                 kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect, L->indirect_subsurface);
336         if(light_flag & PASSMASK(VOLUME_INDIRECT))
337                 kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_scatter);
338         if(light_flag & PASSMASK(DIFFUSE_DIRECT))
339                 kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, L->direct_diffuse);
340         if(light_flag & PASSMASK(GLOSSY_DIRECT))
341                 kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, L->direct_glossy);
342         if(light_flag & PASSMASK(TRANSMISSION_DIRECT))
343                 kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct, L->direct_transmission);
344         if(light_flag & PASSMASK(SUBSURFACE_DIRECT))
345                 kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct, L->direct_subsurface);
346         if(light_flag & PASSMASK(VOLUME_DIRECT))
347                 kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_scatter);
348
349         if(light_flag & PASSMASK(EMISSION))
350                 kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, L->emission);
351         if(light_flag & PASSMASK(BACKGROUND))
352                 kernel_write_pass_float3(buffer + kernel_data.film.pass_background, L->background);
353         if(light_flag & PASSMASK(AO))
354                 kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, L->ao);
355
356         if(light_flag & PASSMASK(DIFFUSE_COLOR))
357                 kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, L->color_diffuse);
358         if(light_flag & PASSMASK(GLOSSY_COLOR))
359                 kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, L->color_glossy);
360         if(light_flag & PASSMASK(TRANSMISSION_COLOR))
361                 kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, L->color_transmission);
362         if(light_flag & PASSMASK(SUBSURFACE_COLOR))
363                 kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, L->color_subsurface);
364         if(light_flag & PASSMASK(SHADOW)) {
365                 float4 shadow = L->shadow;
366                 shadow.w = kernel_data.film.pass_shadow_scale;
367                 kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, shadow);
368         }
369         if(light_flag & PASSMASK(MIST))
370                 kernel_write_pass_float(buffer + kernel_data.film.pass_mist, 1.0f - L->mist);
371 #endif
372 }
373
374 ccl_device_inline void kernel_write_result(KernelGlobals *kg,
375                                            ccl_global float *buffer,
376                                            int sample,
377                                            PathRadiance *L)
378 {
379         float alpha;
380         float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha);
381
382         kernel_write_pass_float4(buffer, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha));
383
384         kernel_write_light_passes(kg, buffer, L);
385
386 #ifdef __DENOISING_FEATURES__
387         if(kernel_data.film.pass_denoising_data) {
388 #  ifdef __SHADOW_TRICKS__
389                 kernel_write_denoising_shadow(kg,
390                                               buffer + kernel_data.film.pass_denoising_data,
391                                               sample,
392                                               average(L->path_total),
393                                               average(L->path_total_shaded));
394 #  else
395                 kernel_write_denoising_shadow(kg,
396                                               buffer + kernel_data.film.pass_denoising_data,
397                                               sample,
398                                               0.0f, 0.0f);
399 #  endif
400                 if(kernel_data.film.pass_denoising_clean) {
401                         float3 noisy, clean;
402                         path_radiance_split_denoising(kg, L, &noisy, &clean);
403                         kernel_write_pass_float3_variance(
404                                 buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR,
405                                 noisy);
406                         kernel_write_pass_float3_unaligned(
407                                 buffer + kernel_data.film.pass_denoising_clean,
408                                 clean);
409                 }
410                 else {
411                         kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR,
412                                                             ensure_finite3(L_sum));
413                 }
414
415                 kernel_write_pass_float3_variance(
416                         buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL,
417                         L->denoising_normal);
418                 kernel_write_pass_float3_variance(
419                         buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO,
420                         L->denoising_albedo);
421                 kernel_write_pass_float_variance(
422                         buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH,
423                         L->denoising_depth);
424         }
425 #endif  /* __DENOISING_FEATURES__ */
426
427
428 #ifdef __KERNEL_DEBUG__
429         kernel_write_debug_passes(kg, buffer, L);
430 #endif
431 }
432
433 CCL_NAMESPACE_END