Fix Cycles CUDA performance on CUDA 8.0.
[blender-staging.git] / intern / cycles / kernel / kernel_accumulate.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* BSDF Eval
20  *
21  * BSDF evaluation result, split per BSDF type. This is used to accumulate
22  * render passes separately. */
23
24 ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 value, int use_light_pass)
25 {
26 #ifdef __PASSES__
27         eval->use_light_pass = use_light_pass;
28
29         if(eval->use_light_pass) {
30                 eval->diffuse = make_float3(0.0f, 0.0f, 0.0f);
31                 eval->glossy = make_float3(0.0f, 0.0f, 0.0f);
32                 eval->transmission = make_float3(0.0f, 0.0f, 0.0f);
33                 eval->transparent = make_float3(0.0f, 0.0f, 0.0f);
34                 eval->subsurface = make_float3(0.0f, 0.0f, 0.0f);
35                 eval->scatter = make_float3(0.0f, 0.0f, 0.0f);
36
37                 if(type == CLOSURE_BSDF_TRANSPARENT_ID)
38                         eval->transparent = value;
39                 else if(CLOSURE_IS_BSDF_DIFFUSE(type))
40                         eval->diffuse = value;
41                 else if(CLOSURE_IS_BSDF_GLOSSY(type))
42                         eval->glossy = value;
43                 else if(CLOSURE_IS_BSDF_TRANSMISSION(type))
44                         eval->transmission = value;
45                 else if(CLOSURE_IS_BSDF_BSSRDF(type))
46                         eval->subsurface = value;
47                 else if(CLOSURE_IS_PHASE(type))
48                         eval->scatter = value;
49         }
50         else
51 #endif
52         {
53                 eval->diffuse = value;
54         }
55 }
56
57 ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3 value)
58 {
59 #ifdef __PASSES__
60         if(eval->use_light_pass) {
61                 if(CLOSURE_IS_BSDF_DIFFUSE(type))
62                         eval->diffuse += value;
63                 else if(CLOSURE_IS_BSDF_GLOSSY(type))
64                         eval->glossy += value;
65                 else if(CLOSURE_IS_BSDF_TRANSMISSION(type))
66                         eval->transmission += value;
67                 else if(CLOSURE_IS_BSDF_BSSRDF(type))
68                         eval->subsurface += value;
69                 else if(CLOSURE_IS_PHASE(type))
70                         eval->scatter += value;
71
72                 /* skipping transparent, this function is used by for eval(), will be zero then */
73         }
74         else
75 #endif
76         {
77                 eval->diffuse += value;
78         }
79 }
80
81 ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval)
82 {
83 #ifdef __PASSES__
84         if(eval->use_light_pass) {
85                 return is_zero(eval->diffuse)
86                         && is_zero(eval->glossy)
87                         && is_zero(eval->transmission)
88                         && is_zero(eval->transparent)
89                         && is_zero(eval->subsurface)
90                         && is_zero(eval->scatter);
91         }
92         else
93 #endif
94         {
95                 return is_zero(eval->diffuse);
96         }
97 }
98
99 ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float3 value)
100 {
101 #ifdef __PASSES__
102         if(eval->use_light_pass) {
103                 eval->diffuse *= value;
104                 eval->glossy *= value;
105                 eval->transmission *= value;
106                 eval->subsurface *= value;
107                 eval->scatter *= value;
108
109                 /* skipping transparent, this function is used by for eval(), will be zero then */
110         }
111         else
112 #endif
113         {
114                 eval->diffuse *= value;
115         }
116 }
117
118 /* Path Radiance
119  *
120  * We accumulate different render passes separately. After summing at the end
121  * to get the combined result, it should be identical. We definite directly
122  * visible as the first non-transparent hit, while indirectly visible are the
123  * bounces after that. */
124
125 ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass)
126 {
127         /* clear all */
128 #ifdef __PASSES__
129         L->use_light_pass = use_light_pass;
130
131         if(use_light_pass) {
132                 L->indirect = make_float3(0.0f, 0.0f, 0.0f);
133                 L->direct_throughput = make_float3(0.0f, 0.0f, 0.0f);
134                 L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
135
136                 L->color_diffuse = make_float3(0.0f, 0.0f, 0.0f);
137                 L->color_glossy = make_float3(0.0f, 0.0f, 0.0f);
138                 L->color_transmission = make_float3(0.0f, 0.0f, 0.0f);
139                 L->color_subsurface = make_float3(0.0f, 0.0f, 0.0f);
140                 L->color_scatter = make_float3(0.0f, 0.0f, 0.0f);
141
142                 L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f);
143                 L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
144                 L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
145                 L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
146                 L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
147
148                 L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
149                 L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
150                 L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
151                 L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
152                 L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
153
154                 L->path_diffuse = make_float3(0.0f, 0.0f, 0.0f);
155                 L->path_glossy = make_float3(0.0f, 0.0f, 0.0f);
156                 L->path_transmission = make_float3(0.0f, 0.0f, 0.0f);
157                 L->path_subsurface = make_float3(0.0f, 0.0f, 0.0f);
158                 L->path_scatter = make_float3(0.0f, 0.0f, 0.0f);
159
160                 L->emission = make_float3(0.0f, 0.0f, 0.0f);
161                 L->background = make_float3(0.0f, 0.0f, 0.0f);
162                 L->ao = make_float3(0.0f, 0.0f, 0.0f);
163                 L->shadow = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
164                 L->mist = 0.0f;
165         }
166         else
167 #endif
168         {
169                 L->emission = make_float3(0.0f, 0.0f, 0.0f);
170         }
171 }
172
173 ccl_device_inline void path_radiance_bsdf_bounce(PathRadiance *L, ccl_addr_space float3 *throughput,
174         BsdfEval *bsdf_eval, float bsdf_pdf, int bounce, int bsdf_label)
175 {
176         float inverse_pdf = 1.0f/bsdf_pdf;
177
178 #ifdef __PASSES__
179         if(L->use_light_pass) {
180                 if(bounce == 0 && !(bsdf_label & LABEL_TRANSPARENT)) {
181                         /* first on directly visible surface */
182                         float3 value = *throughput*inverse_pdf;
183
184                         L->path_diffuse = bsdf_eval->diffuse*value;
185                         L->path_glossy = bsdf_eval->glossy*value;
186                         L->path_transmission = bsdf_eval->transmission*value;
187                         L->path_subsurface = bsdf_eval->subsurface*value;
188                         L->path_scatter = bsdf_eval->scatter*value;
189
190                         *throughput = L->path_diffuse + L->path_glossy + L->path_transmission + L->path_subsurface + L->path_scatter;
191                         
192                         L->direct_throughput = *throughput;
193                 }
194                 else {
195                         /* transparent bounce before first hit, or indirectly visible through BSDF */
196                         float3 sum = (bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission + bsdf_eval->transparent +
197                                                   bsdf_eval->subsurface + bsdf_eval->scatter) * inverse_pdf;
198                         *throughput *= sum;
199                 }
200         }
201         else
202 #endif
203         {
204                 *throughput *= bsdf_eval->diffuse*inverse_pdf;
205         }
206 }
207
208 ccl_device_inline void path_radiance_accum_emission(PathRadiance *L, float3 throughput, float3 value, int bounce)
209 {
210 #ifdef __PASSES__
211         if(L->use_light_pass) {
212                 if(bounce == 0)
213                         L->emission += throughput*value;
214                 else if(bounce == 1)
215                         L->direct_emission += throughput*value;
216                 else
217                         L->indirect += throughput*value;
218         }
219         else
220 #endif
221         {
222                 L->emission += throughput*value;
223         }
224 }
225
226 ccl_device_inline void path_radiance_accum_ao(PathRadiance *L, float3 throughput, float3 alpha, float3 bsdf, float3 ao, int bounce)
227 {
228 #ifdef __PASSES__
229         if(L->use_light_pass) {
230                 if(bounce == 0) {
231                         /* directly visible lighting */
232                         L->direct_diffuse += throughput*bsdf*ao;
233                         L->ao += alpha*throughput*ao;
234                 }
235                 else {
236                         /* indirectly visible lighting after BSDF bounce */
237                         L->indirect += throughput*bsdf*ao;
238                 }
239         }
240         else
241 #endif
242         {
243                 L->emission += throughput*bsdf*ao;
244         }
245 }
246
247 ccl_device_inline void path_radiance_accum_light(PathRadiance *L, float3 throughput, BsdfEval *bsdf_eval, float3 shadow, float shadow_fac, int bounce, bool is_lamp)
248 {
249 #ifdef __PASSES__
250         if(L->use_light_pass) {
251                 if(bounce == 0) {
252                         /* directly visible lighting */
253                         L->direct_diffuse += throughput*bsdf_eval->diffuse*shadow;
254                         L->direct_glossy += throughput*bsdf_eval->glossy*shadow;
255                         L->direct_transmission += throughput*bsdf_eval->transmission*shadow;
256                         L->direct_subsurface += throughput*bsdf_eval->subsurface*shadow;
257                         L->direct_scatter += throughput*bsdf_eval->scatter*shadow;
258
259                         if(is_lamp) {
260                                 L->shadow.x += shadow.x*shadow_fac;
261                                 L->shadow.y += shadow.y*shadow_fac;
262                                 L->shadow.z += shadow.z*shadow_fac;
263                         }
264                 }
265                 else {
266                         /* indirectly visible lighting after BSDF bounce */
267                         float3 sum = bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission + bsdf_eval->subsurface + bsdf_eval->scatter;
268                         L->indirect += throughput*sum*shadow;
269                 }
270         }
271         else
272 #endif
273         {
274                 L->emission += throughput*bsdf_eval->diffuse*shadow;
275         }
276 }
277
278 ccl_device_inline void path_radiance_accum_background(PathRadiance *L, float3 throughput, float3 value, int bounce)
279 {
280 #ifdef __PASSES__
281         if(L->use_light_pass) {
282                 if(bounce == 0)
283                         L->background += throughput*value;
284                 else if(bounce == 1)
285                         L->direct_emission += throughput*value;
286                 else
287                         L->indirect += throughput*value;
288         }
289         else
290 #endif
291         {
292                 L->emission += throughput*value;
293         }
294 }
295
296 ccl_device_inline void path_radiance_sum_indirect(PathRadiance *L)
297 {
298 #ifdef __PASSES__
299         /* this division is a bit ugly, but means we only have to keep track of
300          * only a single throughput further along the path, here we recover just
301          * the indirect path that is not influenced by any particular BSDF type */
302         if(L->use_light_pass) {
303                 L->direct_emission = safe_divide_color(L->direct_emission, L->direct_throughput);
304                 L->direct_diffuse += L->path_diffuse*L->direct_emission;
305                 L->direct_glossy += L->path_glossy*L->direct_emission;
306                 L->direct_transmission += L->path_transmission*L->direct_emission;
307                 L->direct_subsurface += L->path_subsurface*L->direct_emission;
308                 L->direct_scatter += L->path_scatter*L->direct_emission;
309
310                 L->indirect = safe_divide_color(L->indirect, L->direct_throughput);
311                 L->indirect_diffuse += L->path_diffuse*L->indirect;
312                 L->indirect_glossy += L->path_glossy*L->indirect;
313                 L->indirect_transmission += L->path_transmission*L->indirect;
314                 L->indirect_subsurface += L->path_subsurface*L->indirect;
315                 L->indirect_scatter += L->path_scatter*L->indirect;
316         }
317 #endif
318 }
319
320 ccl_device_inline void path_radiance_reset_indirect(PathRadiance *L)
321 {
322 #ifdef __PASSES__
323         if(L->use_light_pass) {
324                 L->path_diffuse = make_float3(0.0f, 0.0f, 0.0f);
325                 L->path_glossy = make_float3(0.0f, 0.0f, 0.0f);
326                 L->path_transmission = make_float3(0.0f, 0.0f, 0.0f);
327                 L->path_subsurface = make_float3(0.0f, 0.0f, 0.0f);
328                 L->path_scatter = make_float3(0.0f, 0.0f, 0.0f);
329
330                 L->direct_emission = make_float3(0.0f, 0.0f, 0.0f);
331                 L->indirect = make_float3(0.0f, 0.0f, 0.0f);
332         }
333 #endif
334 }
335
336 ccl_device_inline void path_radiance_copy_indirect(PathRadiance *L,
337                                                    const PathRadiance *L_src)
338 {
339 #ifdef __PASSES__
340         if(L->use_light_pass) {
341                 L->path_diffuse = L_src->path_diffuse;
342                 L->path_glossy = L_src->path_glossy;
343                 L->path_transmission = L_src->path_transmission;
344                 L->path_subsurface = L_src->path_subsurface;
345                 L->path_scatter = L_src->path_scatter;
346
347                 L->direct_emission = L_src->direct_emission;
348                 L->indirect = L_src->indirect;
349         }
350 #endif
351 }
352
353 ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadiance *L)
354 {
355         float3 L_sum;
356         /* Light Passes are used */
357 #ifdef __PASSES__
358         float3 L_direct, L_indirect;
359         float clamp_direct = kernel_data.integrator.sample_clamp_direct;
360         float clamp_indirect = kernel_data.integrator.sample_clamp_indirect;
361         if(L->use_light_pass) {
362                 path_radiance_sum_indirect(L);
363
364                 L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->direct_subsurface + L->direct_scatter + L->emission;
365                 L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission + L->indirect_subsurface + L->indirect_scatter;
366
367                 if(!kernel_data.background.transparent)
368                         L_direct += L->background;
369
370                 L_sum = L_direct + L_indirect;
371                 float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
372
373                 /* Reject invalid value */
374                 if(!isfinite(sum)) {
375                         kernel_assert(!"Non-finite sum in path_radiance_clamp_and_sum!");
376                         L_sum = make_float3(0.0f, 0.0f, 0.0f);
377
378                         L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f);
379                         L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f);
380                         L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f);
381                         L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f);
382                         L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f);
383
384                         L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f);
385                         L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f);
386                         L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f);
387                         L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f);
388                         L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f);
389
390                         L->emission = make_float3(0.0f, 0.0f, 0.0f);
391                 }
392
393                 /* Clamp direct and indirect samples */
394 #ifdef __CLAMP_SAMPLE__
395                 else if(sum > clamp_direct || sum > clamp_indirect) {
396                         float scale;
397
398                         /* Direct */
399                         float sum_direct = fabsf(L_direct.x) + fabsf(L_direct.y) + fabsf(L_direct.z);
400                         if(sum_direct > clamp_direct) {
401                                 scale = clamp_direct/sum_direct;
402                                 L_direct *= scale;
403
404                                 L->direct_diffuse *= scale;
405                                 L->direct_glossy *= scale;
406                                 L->direct_transmission *= scale;
407                                 L->direct_subsurface *= scale;
408                                 L->direct_scatter *= scale;
409                                 L->emission *= scale;
410                                 L->background *= scale;
411                         }
412
413                         /* Indirect */
414                         float sum_indirect = fabsf(L_indirect.x) + fabsf(L_indirect.y) + fabsf(L_indirect.z);
415                         if(sum_indirect > clamp_indirect) {
416                                 scale = clamp_indirect/sum_indirect;
417                                 L_indirect *= scale;
418
419                                 L->indirect_diffuse *= scale;
420                                 L->indirect_glossy *= scale;
421                                 L->indirect_transmission *= scale;
422                                 L->indirect_subsurface *= scale;
423                                 L->indirect_scatter *= scale;
424                         }
425
426                         /* Sum again, after clamping */
427                         L_sum = L_direct + L_indirect;
428                 }
429 #endif
430
431                 return L_sum;
432         }
433
434         /* No Light Passes */
435         else
436 #endif
437         {
438                 L_sum = L->emission;
439         }
440
441         /* Reject invalid value */
442         float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
443         if(!isfinite(sum)) {
444                 kernel_assert(!"Non-finite final sum in path_radiance_clamp_and_sum!");
445                 L_sum = make_float3(0.0f, 0.0f, 0.0f);
446         }
447
448         return L_sum;
449 }
450
451 ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance *L_sample, int num_samples)
452 {
453         float fac = 1.0f/num_samples;
454
455 #ifdef __PASSES__
456         L->direct_diffuse += L_sample->direct_diffuse*fac;
457         L->direct_glossy += L_sample->direct_glossy*fac;
458         L->direct_transmission += L_sample->direct_transmission*fac;
459         L->direct_subsurface += L_sample->direct_subsurface*fac;
460         L->direct_scatter += L_sample->direct_scatter*fac;
461
462         L->indirect_diffuse += L_sample->indirect_diffuse*fac;
463         L->indirect_glossy += L_sample->indirect_glossy*fac;
464         L->indirect_transmission += L_sample->indirect_transmission*fac;
465         L->indirect_subsurface += L_sample->indirect_subsurface*fac;
466         L->indirect_scatter += L_sample->indirect_scatter*fac;
467
468         L->background += L_sample->background*fac;
469         L->ao += L_sample->ao*fac;
470         L->shadow += L_sample->shadow*fac;
471         L->mist += L_sample->mist*fac;
472 #endif
473         L->emission += L_sample->emission * fac;
474 }
475
476 CCL_NAMESPACE_END
477