Merge branch 'master' into blender2.8
[blender.git] / intern / cycles / kernel / kernel_shader.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * ShaderData, used in four steps:
19  *
20  * Setup from incoming ray, sampled position and background.
21  * Execute for surface, volume or displacement.
22  * Evaluate one or more closures.
23  * Release.
24  *
25  */
26
27 #include "kernel/closure/alloc.h"
28 #include "kernel/closure/bsdf_util.h"
29 #include "kernel/closure/bsdf.h"
30 #include "kernel/closure/emissive.h"
31
32 #include "kernel/svm/svm.h"
33
34 CCL_NAMESPACE_BEGIN
35
36 /* ShaderData setup from incoming ray */
37
38 #ifdef __OBJECT_MOTION__
39 ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
40 {
41         if(sd->object_flag & SD_OBJECT_MOTION) {
42                 sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
43                 sd->ob_itfm = transform_quick_inverse(sd->ob_tfm);
44         }
45         else {
46                 sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
47                 sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
48         }
49 }
50 #endif
51
52 ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
53                                                ShaderData *sd,
54                                                const Intersection *isect,
55                                                const Ray *ray)
56 {
57 #ifdef __INSTANCING__
58         sd->object = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
59 #endif
60         sd->lamp = LAMP_NONE;
61
62         sd->type = isect->type;
63         sd->flag = 0;
64         sd->object_flag = kernel_tex_fetch(__object_flag,
65                                                       sd->object);
66
67         /* matrices and time */
68 #ifdef __OBJECT_MOTION__
69         shader_setup_object_transforms(kg, sd, ray->time);
70 #endif
71         sd->time = ray->time;
72
73         sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
74         sd->ray_length = isect->t;
75
76 #ifdef __UV__
77         sd->u = isect->u;
78         sd->v = isect->v;
79 #endif
80
81 #ifdef __HAIR__
82         if(sd->type & PRIMITIVE_ALL_CURVE) {
83                 /* curve */
84                 float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
85
86                 sd->shader = __float_as_int(curvedata.z);
87                 sd->P = curve_refine(kg, sd, isect, ray);
88         }
89         else
90 #endif
91         if(sd->type & PRIMITIVE_TRIANGLE) {
92                 /* static triangle */
93                 float3 Ng = triangle_normal(kg, sd);
94                 sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
95
96                 /* vectors */
97                 sd->P = triangle_refine(kg, sd, isect, ray);
98                 sd->Ng = Ng;
99                 sd->N = Ng;
100
101                 /* smooth normal */
102                 if(sd->shader & SHADER_SMOOTH_NORMAL)
103                         sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
104
105 #ifdef __DPDU__
106                 /* dPdu/dPdv */
107                 triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
108 #endif
109         }
110         else {
111                 /* motion triangle */
112                 motion_triangle_shader_setup(kg, sd, isect, ray, false);
113         }
114
115         sd->I = -ray->D;
116
117         sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
118
119 #ifdef __INSTANCING__
120         if(isect->object != OBJECT_NONE) {
121                 /* instance transform */
122                 object_normal_transform_auto(kg, sd, &sd->N);
123                 object_normal_transform_auto(kg, sd, &sd->Ng);
124 #  ifdef __DPDU__
125                 object_dir_transform_auto(kg, sd, &sd->dPdu);
126                 object_dir_transform_auto(kg, sd, &sd->dPdv);
127 #  endif
128         }
129 #endif
130
131         /* backfacing test */
132         bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
133
134         if(backfacing) {
135                 sd->flag |= SD_BACKFACING;
136                 sd->Ng = -sd->Ng;
137                 sd->N = -sd->N;
138 #ifdef __DPDU__
139                 sd->dPdu = -sd->dPdu;
140                 sd->dPdv = -sd->dPdv;
141 #endif
142         }
143
144 #ifdef __RAY_DIFFERENTIALS__
145         /* differentials */
146         differential_transfer(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, isect->t);
147         differential_incoming(&sd->dI, ray->dD);
148         differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
149 #endif
150 }
151
152 /* ShaderData setup from BSSRDF scatter */
153
154 #ifdef __SUBSURFACE__
155 #  ifndef __KERNEL_CUDA__
156 ccl_device
157 #  else
158 ccl_device_inline
159 #  endif
160 void shader_setup_from_subsurface(
161         KernelGlobals *kg,
162         ShaderData *sd,
163         const Intersection *isect,
164         const Ray *ray)
165 {
166         const bool backfacing = sd->flag & SD_BACKFACING;
167
168         /* object, matrices, time, ray_length stay the same */
169         sd->flag = 0;
170         sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
171         sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
172         sd->type = isect->type;
173
174 #  ifdef __UV__
175         sd->u = isect->u;
176         sd->v = isect->v;
177 #  endif
178
179         /* fetch triangle data */
180         if(sd->type == PRIMITIVE_TRIANGLE) {
181                 float3 Ng = triangle_normal(kg, sd);
182                 sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
183
184                 /* static triangle */
185                 sd->P = triangle_refine_local(kg, sd, isect, ray);
186                 sd->Ng = Ng;
187                 sd->N = Ng;
188
189                 if(sd->shader & SHADER_SMOOTH_NORMAL)
190                         sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
191
192 #  ifdef __DPDU__
193                 /* dPdu/dPdv */
194                 triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
195 #  endif
196         }
197         else {
198                 /* motion triangle */
199                 motion_triangle_shader_setup(kg, sd, isect, ray, true);
200         }
201
202         sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
203
204 #  ifdef __INSTANCING__
205         if(isect->object != OBJECT_NONE) {
206                 /* instance transform */
207                 object_normal_transform_auto(kg, sd, &sd->N);
208                 object_normal_transform_auto(kg, sd, &sd->Ng);
209 #    ifdef __DPDU__
210                 object_dir_transform_auto(kg, sd, &sd->dPdu);
211                 object_dir_transform_auto(kg, sd, &sd->dPdv);
212 #    endif
213         }
214 #  endif
215
216         /* backfacing test */
217         if(backfacing) {
218                 sd->flag |= SD_BACKFACING;
219                 sd->Ng = -sd->Ng;
220                 sd->N = -sd->N;
221 #  ifdef __DPDU__
222                 sd->dPdu = -sd->dPdu;
223                 sd->dPdv = -sd->dPdv;
224 #  endif
225         }
226
227         /* should not get used in principle as the shading will only use a diffuse
228          * BSDF, but the shader might still access it */
229         sd->I = sd->N;
230
231 #  ifdef __RAY_DIFFERENTIALS__
232         /* differentials */
233         differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
234         /* don't modify dP and dI */
235 #  endif
236 }
237 #endif
238
239 /* ShaderData setup from position sampled on mesh */
240
241 ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
242                                                 ShaderData *sd,
243                                                 const float3 P,
244                                                 const float3 Ng,
245                                                 const float3 I,
246                                                 int shader, int object, int prim,
247                                                 float u, float v, float t,
248                                                 float time,
249                                                 bool object_space,
250                                                 int lamp)
251 {
252         /* vectors */
253         sd->P = P;
254         sd->N = Ng;
255         sd->Ng = Ng;
256         sd->I = I;
257         sd->shader = shader;
258         if(prim != PRIM_NONE)
259                 sd->type = PRIMITIVE_TRIANGLE;
260         else if(lamp != LAMP_NONE)
261                 sd->type = PRIMITIVE_LAMP;
262         else
263                 sd->type = PRIMITIVE_NONE;
264
265         /* primitive */
266 #ifdef __INSTANCING__
267         sd->object = object;
268 #endif
269         sd->lamp = LAMP_NONE;
270         /* currently no access to bvh prim index for strand sd->prim*/
271         sd->prim = prim;
272 #ifdef __UV__
273         sd->u = u;
274         sd->v = v;
275 #endif
276         sd->time = time;
277         sd->ray_length = t;
278
279         sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
280         sd->object_flag = 0;
281         if(sd->object != OBJECT_NONE) {
282                 sd->object_flag |= kernel_tex_fetch(__object_flag,
283                                                     sd->object);
284
285 #ifdef __OBJECT_MOTION__
286                 shader_setup_object_transforms(kg, sd, time);
287         }
288         else if(lamp != LAMP_NONE) {
289                 sd->ob_tfm  = lamp_fetch_transform(kg, lamp, false);
290                 sd->ob_itfm = lamp_fetch_transform(kg, lamp, true);
291                 sd->lamp = lamp;
292 #endif
293         }
294
295         /* transform into world space */
296         if(object_space) {
297                 object_position_transform_auto(kg, sd, &sd->P);
298                 object_normal_transform_auto(kg, sd, &sd->Ng);
299                 sd->N = sd->Ng;
300                 object_dir_transform_auto(kg, sd, &sd->I);
301         }
302
303         if(sd->type & PRIMITIVE_TRIANGLE) {
304                 /* smooth normal */
305                 if(sd->shader & SHADER_SMOOTH_NORMAL) {
306                         sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
307
308 #ifdef __INSTANCING__
309                         if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
310                                 object_normal_transform_auto(kg, sd, &sd->N);
311                         }
312 #endif
313                 }
314
315                 /* dPdu/dPdv */
316 #ifdef __DPDU__
317                 triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
318
319 #  ifdef __INSTANCING__
320                 if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
321                         object_dir_transform_auto(kg, sd, &sd->dPdu);
322                         object_dir_transform_auto(kg, sd, &sd->dPdv);
323                 }
324 #  endif
325 #endif
326         }
327         else {
328 #ifdef __DPDU__
329                 sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
330                 sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
331 #endif
332         }
333
334         /* backfacing test */
335         if(sd->prim != PRIM_NONE) {
336                 bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
337
338                 if(backfacing) {
339                         sd->flag |= SD_BACKFACING;
340                         sd->Ng = -sd->Ng;
341                         sd->N = -sd->N;
342 #ifdef __DPDU__
343                         sd->dPdu = -sd->dPdu;
344                         sd->dPdv = -sd->dPdv;
345 #endif
346                 }
347         }
348
349 #ifdef __RAY_DIFFERENTIALS__
350         /* no ray differentials here yet */
351         sd->dP = differential3_zero();
352         sd->dI = differential3_zero();
353         sd->du = differential_zero();
354         sd->dv = differential_zero();
355 #endif
356 }
357
358 /* ShaderData setup for displacement */
359
360 ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
361         int object, int prim, float u, float v)
362 {
363         float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
364         int shader;
365
366         triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
367
368         /* force smooth shading for displacement */
369         shader |= SHADER_SMOOTH_NORMAL;
370
371         shader_setup_from_sample(kg, sd,
372                                  P, Ng, I,
373                                  shader, object, prim,
374                                  u, v, 0.0f, 0.5f,
375                                  !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
376                                  LAMP_NONE);
377 }
378
379 /* ShaderData setup from ray into background */
380
381 ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
382 {
383         /* vectors */
384         sd->P = ray->D;
385         sd->N = -ray->D;
386         sd->Ng = -ray->D;
387         sd->I = -ray->D;
388         sd->shader = kernel_data.background.surface_shader;
389         sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
390         sd->object_flag = 0;
391         sd->time = ray->time;
392         sd->ray_length = 0.0f;
393
394 #ifdef __INSTANCING__
395         sd->object = PRIM_NONE;
396 #endif
397         sd->lamp = LAMP_NONE;
398         sd->prim = PRIM_NONE;
399 #ifdef __UV__
400         sd->u = 0.0f;
401         sd->v = 0.0f;
402 #endif
403
404 #ifdef __DPDU__
405         /* dPdu/dPdv */
406         sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
407         sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
408 #endif
409
410 #ifdef __RAY_DIFFERENTIALS__
411         /* differentials */
412         sd->dP = ray->dD;
413         differential_incoming(&sd->dI, sd->dP);
414         sd->du = differential_zero();
415         sd->dv = differential_zero();
416 #endif
417 }
418
419 /* ShaderData setup from point inside volume */
420
421 #ifdef __VOLUME__
422 ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
423 {
424         /* vectors */
425         sd->P = ray->P;
426         sd->N = -ray->D;
427         sd->Ng = -ray->D;
428         sd->I = -ray->D;
429         sd->shader = SHADER_NONE;
430         sd->flag = 0;
431         sd->object_flag = 0;
432         sd->time = ray->time;
433         sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
434
435 #  ifdef __INSTANCING__
436         sd->object = PRIM_NONE; /* todo: fill this for texture coordinates */
437 #  endif
438         sd->lamp = LAMP_NONE;
439         sd->prim = PRIM_NONE;
440         sd->type = PRIMITIVE_NONE;
441
442 #  ifdef __UV__
443         sd->u = 0.0f;
444         sd->v = 0.0f;
445 #  endif
446
447 #  ifdef __DPDU__
448         /* dPdu/dPdv */
449         sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
450         sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
451 #  endif
452
453 #  ifdef __RAY_DIFFERENTIALS__
454         /* differentials */
455         sd->dP = ray->dD;
456         differential_incoming(&sd->dI, sd->dP);
457         sd->du = differential_zero();
458         sd->dv = differential_zero();
459 #  endif
460
461         /* for NDC coordinates */
462         sd->ray_P = ray->P;
463         sd->ray_dP = ray->dP;
464 }
465 #endif  /* __VOLUME__ */
466
467 /* Merging */
468
469 #if defined(__BRANCHED_PATH__) || defined(__VOLUME__)
470 ccl_device_inline void shader_merge_closures(ShaderData *sd)
471 {
472         /* merge identical closures, better when we sample a single closure at a time */
473         for(int i = 0; i < sd->num_closure; i++) {
474                 ShaderClosure *sci = &sd->closure[i];
475
476                 for(int j = i + 1; j < sd->num_closure; j++) {
477                         ShaderClosure *scj = &sd->closure[j];
478
479                         if(sci->type != scj->type)
480                                 continue;
481                         if(!bsdf_merge(sci, scj))
482                                 continue;
483
484                         sci->weight += scj->weight;
485                         sci->sample_weight += scj->sample_weight;
486
487                         int size = sd->num_closure - (j+1);
488                         if(size > 0) {
489                                 for(int k = 0; k < size; k++) {
490                                         scj[k] = scj[k+1];
491                                 }
492                         }
493
494                         sd->num_closure--;
495                         kernel_assert(sd->num_closure >= 0);
496                         j--;
497                 }
498         }
499 }
500 #endif  /* __BRANCHED_PATH__ || __VOLUME__ */
501
502 /* Defensive sampling. */
503
504 ccl_device_inline void shader_prepare_closures(ShaderData *sd,
505                                                ccl_addr_space PathState *state)
506 {
507         /* We can likely also do defensive sampling at deeper bounces, particularly
508          * for cases like a perfect mirror but possibly also others. This will need
509          * a good heuristic. */
510         if(state->bounce + state->transparent_bounce == 0 && sd->num_closure > 1) {
511                 float sum = 0.0f;
512
513                 for(int i = 0; i < sd->num_closure; i++) {
514                         ShaderClosure *sc = &sd->closure[i];
515                         if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
516                                 sum += sc->sample_weight;
517                         }
518                 }
519
520                 for(int i = 0; i < sd->num_closure; i++) {
521                         ShaderClosure *sc = &sd->closure[i];
522                         if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
523                                 sc->sample_weight = max(sc->sample_weight, 0.125f * sum);
524                         }
525                 }
526         }
527 }
528
529
530 /* BSDF */
531
532 ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, ShaderData *sd, const float3 omega_in, float *pdf,
533         const ShaderClosure *skip_sc, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
534 {
535         /* this is the veach one-sample model with balance heuristic, some pdf
536          * factors drop out when using balance heuristic weighting */
537         for(int i = 0; i < sd->num_closure; i++) {
538                 const ShaderClosure *sc = &sd->closure[i];
539
540                 if(sc != skip_sc && CLOSURE_IS_BSDF(sc->type)) {
541                         float bsdf_pdf = 0.0f;
542                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
543
544                         if(bsdf_pdf != 0.0f) {
545                                 bsdf_eval_accum(result_eval, sc->type, eval*sc->weight, 1.0f);
546                                 sum_pdf += bsdf_pdf*sc->sample_weight;
547                         }
548
549                         sum_sample_weight += sc->sample_weight;
550                 }
551         }
552
553         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
554 }
555
556 #ifdef __BRANCHED_PATH__
557 ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
558                                                         ShaderData *sd,
559                                                         const float3 omega_in,
560                                                         BsdfEval *result_eval,
561                                                         float light_pdf,
562                                                         bool use_mis)
563 {
564         for(int i = 0; i < sd->num_closure; i++) {
565                 const ShaderClosure *sc = &sd->closure[i];
566                 if(CLOSURE_IS_BSDF(sc->type)) {
567                         float bsdf_pdf = 0.0f;
568                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
569                         if(bsdf_pdf != 0.0f) {
570                                 float mis_weight = use_mis? power_heuristic(light_pdf, bsdf_pdf): 1.0f;
571                                 bsdf_eval_accum(result_eval,
572                                                 sc->type,
573                                                 eval * sc->weight,
574                                                 mis_weight);
575                         }
576                 }
577         }
578 }
579 #endif  /* __BRANCHED_PATH__ */
580
581
582 #ifndef __KERNEL_CUDA__
583 ccl_device
584 #else
585 ccl_device_inline
586 #endif
587 void shader_bsdf_eval(KernelGlobals *kg,
588                       ShaderData *sd,
589                       const float3 omega_in,
590                       BsdfEval *eval,
591                       float light_pdf,
592                       bool use_mis)
593 {
594         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
595
596 #ifdef __BRANCHED_PATH__
597         if(kernel_data.integrator.branched)
598                 _shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis);
599         else
600 #endif
601         {
602                 float pdf;
603                 _shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, NULL, eval, 0.0f, 0.0f);
604                 if(use_mis) {
605                         float weight = power_heuristic(light_pdf, pdf);
606                         bsdf_eval_mis(eval, weight);
607                 }
608         }
609 }
610
611 ccl_device_inline const ShaderClosure *shader_bsdf_pick(ShaderData *sd,
612                                                         float *randu)
613 {
614         /* Note the sampling here must match shader_bssrdf_pick,
615          * since we reuse the same random number. */
616         int sampled = 0;
617
618         if(sd->num_closure > 1) {
619                 /* Pick a BSDF or based on sample weights. */
620                 float sum = 0.0f;
621
622                 for(int i = 0; i < sd->num_closure; i++) {
623                         const ShaderClosure *sc = &sd->closure[i];
624
625                         if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
626                                 sum += sc->sample_weight;
627                         }
628                 }
629
630                 float r = (*randu)*sum;
631                 float partial_sum = 0.0f;
632
633                 for(int i = 0; i < sd->num_closure; i++) {
634                         const ShaderClosure *sc = &sd->closure[i];
635
636                         if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
637                                 float next_sum = partial_sum + sc->sample_weight;
638
639                                 if(r < next_sum) {
640                                         sampled = i;
641
642                                         /* Rescale to reuse for direction sample, to better
643                                          * preserve stratifaction. */
644                                         *randu = (r - partial_sum) / sc->sample_weight;
645                                         break;
646                                 }
647
648                                 partial_sum = next_sum;
649                         }
650                 }
651         }
652
653         const ShaderClosure *sc = &sd->closure[sampled];
654         return CLOSURE_IS_BSDF(sc->type)? sc: NULL;
655 }
656
657 ccl_device_inline const ShaderClosure *shader_bssrdf_pick(ShaderData *sd,
658                                                           ccl_addr_space float3 *throughput,
659                                                           float *randu)
660 {
661         /* Note the sampling here must match shader_bsdf_pick,
662          * since we reuse the same random number. */
663         int sampled = 0;
664
665         if(sd->num_closure > 1) {
666                 /* Pick a BSDF or BSSRDF or based on sample weights. */
667                 float sum_bsdf = 0.0f;
668                 float sum_bssrdf = 0.0f;
669
670                 for(int i = 0; i < sd->num_closure; i++) {
671                         const ShaderClosure *sc = &sd->closure[i];
672
673                         if(CLOSURE_IS_BSDF(sc->type)) {
674                                 sum_bsdf += sc->sample_weight;
675                         }
676                         else if(CLOSURE_IS_BSSRDF(sc->type)) {
677                                 sum_bssrdf += sc->sample_weight;
678                         }
679                 }
680
681                 float r = (*randu)*(sum_bsdf + sum_bssrdf);
682                 float partial_sum = 0.0f;
683
684                 for(int i = 0; i < sd->num_closure; i++) {
685                         const ShaderClosure *sc = &sd->closure[i];
686
687                         if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
688                                 float next_sum = partial_sum + sc->sample_weight;
689
690                                 if(r < next_sum) {
691                                         if(CLOSURE_IS_BSDF(sc->type)) {
692                                                 *throughput *= (sum_bsdf + sum_bssrdf) / sum_bsdf;
693                                                 return NULL;
694                                         }
695                                         else {
696                                                 *throughput *= (sum_bsdf + sum_bssrdf) / sum_bssrdf;
697                                                 sampled = i;
698
699                                                 /* Rescale to reuse for direction sample, to better
700                                                  * preserve stratifaction. */
701                                                 *randu = (r - partial_sum) / sc->sample_weight;
702                                                 break;
703                                         }
704                                 }
705
706                                 partial_sum = next_sum;
707                         }
708                 }
709         }
710
711         const ShaderClosure *sc = &sd->closure[sampled];
712         return CLOSURE_IS_BSSRDF(sc->type)? sc: NULL;
713 }
714
715 ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
716                                          ShaderData *sd,
717                                          float randu, float randv,
718                                          BsdfEval *bsdf_eval,
719                                          float3 *omega_in,
720                                          differential3 *domega_in,
721                                          float *pdf)
722 {
723         const ShaderClosure *sc = shader_bsdf_pick(sd, &randu);
724         if(sc == NULL) {
725                 *pdf = 0.0f;
726                 return LABEL_NONE;
727         }
728
729         /* BSSRDF should already have been handled elsewhere. */
730         kernel_assert(CLOSURE_IS_BSDF(sc->type));
731
732         int label;
733         float3 eval;
734
735         *pdf = 0.0f;
736         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
737
738         if(*pdf != 0.0f) {
739                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
740
741                 if(sd->num_closure > 1) {
742                         float sweight = sc->sample_weight;
743                         _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sc, bsdf_eval, *pdf*sweight, sweight);
744                 }
745         }
746
747         return label;
748 }
749
750 ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd,
751         const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval,
752         float3 *omega_in, differential3 *domega_in, float *pdf)
753 {
754         int label;
755         float3 eval;
756
757         *pdf = 0.0f;
758         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
759
760         if(*pdf != 0.0f)
761                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
762
763         return label;
764 }
765
766 ccl_device float shader_bsdf_average_roughness(ShaderData *sd)
767 {
768         float roughness = 0.0f;
769         float sum_weight = 0.0f;
770
771         for(int i = 0; i < sd->num_closure; i++) {
772                 ShaderClosure *sc = &sd->closure[i];
773
774                 if(CLOSURE_IS_BSDF(sc->type)) {
775                         /* sqrt once to undo the squaring from multiplying roughness on the
776                          * two axes, and once for the squared roughness convention. */
777                         float weight = fabsf(average(sc->weight));
778                         roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc)));
779                         sum_weight += weight;
780                 }
781         }
782
783         return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f;
784 }
785
786 ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness)
787 {
788         for(int i = 0; i < sd->num_closure; i++) {
789                 ShaderClosure *sc = &sd->closure[i];
790
791                 if(CLOSURE_IS_BSDF(sc->type))
792                         bsdf_blur(kg, sc, roughness);
793         }
794 }
795
796 ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, const ShaderData *sd)
797 {
798         if(sd->flag & SD_HAS_ONLY_VOLUME) {
799                 return make_float3(1.0f, 1.0f, 1.0f);
800         }
801         else if(sd->flag & SD_TRANSPARENT) {
802                 return sd->closure_transparent_extinction;
803         }
804         else {
805                 return make_float3(0.0f, 0.0f, 0.0f);
806         }
807 }
808
809 ccl_device void shader_bsdf_disable_transparency(KernelGlobals *kg, ShaderData *sd)
810 {
811         if(sd->flag & SD_TRANSPARENT) {
812                 for(int i = 0; i < sd->num_closure; i++) {
813                         ShaderClosure *sc = &sd->closure[i];
814
815                         if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
816                                 sc->sample_weight = 0.0f;
817                                 sc->weight = make_float3(0.0f, 0.0f, 0.0f);
818                         }
819                 }
820
821                 sd->flag &= ~SD_TRANSPARENT;
822         }
823 }
824
825 ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd)
826 {
827         float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd);
828
829         alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f));
830         alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f));
831
832         return alpha;
833 }
834
835 ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
836 {
837         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
838
839         for(int i = 0; i < sd->num_closure; i++) {
840                 ShaderClosure *sc = &sd->closure[i];
841
842                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
843                         eval += sc->weight;
844         }
845
846         return eval;
847 }
848
849 ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd)
850 {
851         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
852
853         for(int i = 0; i < sd->num_closure; i++) {
854                 ShaderClosure *sc = &sd->closure[i];
855
856                 if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
857                         eval += sc->weight;
858         }
859
860         return eval;
861 }
862
863 ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
864 {
865         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
866
867         for(int i = 0; i < sd->num_closure; i++) {
868                 ShaderClosure *sc = &sd->closure[i];
869
870                 if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
871                         eval += sc->weight;
872         }
873
874         return eval;
875 }
876
877 ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
878 {
879         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
880
881         for(int i = 0; i < sd->num_closure; i++) {
882                 ShaderClosure *sc = &sd->closure[i];
883
884                 if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
885                         eval += sc->weight;
886         }
887
888         return eval;
889 }
890
891 ccl_device float3 shader_bsdf_average_normal(KernelGlobals *kg, ShaderData *sd)
892 {
893         float3 N = make_float3(0.0f, 0.0f, 0.0f);
894
895         for(int i = 0; i < sd->num_closure; i++) {
896                 ShaderClosure *sc = &sd->closure[i];
897                 if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
898                         N += sc->N*fabsf(average(sc->weight));
899         }
900
901         return (is_zero(N))? sd->N : normalize(N);
902 }
903
904 ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_)
905 {
906         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
907         float3 N = make_float3(0.0f, 0.0f, 0.0f);
908
909         for(int i = 0; i < sd->num_closure; i++) {
910                 ShaderClosure *sc = &sd->closure[i];
911
912                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
913                         const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
914                         eval += sc->weight*ao_factor;
915                         N += bsdf->N*fabsf(average(sc->weight));
916                 }
917         }
918
919         *N_ = (is_zero(N))? sd->N : normalize(N);
920         return eval;
921 }
922
923 #ifdef __SUBSURFACE__
924 ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_)
925 {
926         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
927         float3 N = make_float3(0.0f, 0.0f, 0.0f);
928         float texture_blur = 0.0f, weight_sum = 0.0f;
929
930         for(int i = 0; i < sd->num_closure; i++) {
931                 ShaderClosure *sc = &sd->closure[i];
932
933                 if(CLOSURE_IS_BSSRDF(sc->type)) {
934                         const Bssrdf *bssrdf = (const Bssrdf*)sc;
935                         float avg_weight = fabsf(average(sc->weight));
936
937                         N += bssrdf->N*avg_weight;
938                         eval += sc->weight;
939                         texture_blur += bssrdf->texture_blur*avg_weight;
940                         weight_sum += avg_weight;
941                 }
942         }
943
944         if(N_)
945                 *N_ = (is_zero(N))? sd->N: normalize(N);
946
947         if(texture_blur_)
948                 *texture_blur_ = safe_divide(texture_blur, weight_sum);
949
950         return eval;
951 }
952 #endif  /* __SUBSURFACE__ */
953
954 /* Emission */
955
956 ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd)
957 {
958         if(sd->flag & SD_EMISSION) {
959                 return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background;
960         }
961         else {
962                 return make_float3(0.0f, 0.0f, 0.0f);
963         }
964 }
965
966 /* Holdout */
967
968 ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
969 {
970         float3 weight = make_float3(0.0f, 0.0f, 0.0f);
971
972         for(int i = 0; i < sd->num_closure; i++) {
973                 ShaderClosure *sc = &sd->closure[i];
974
975                 if(CLOSURE_IS_HOLDOUT(sc->type))
976                         weight += sc->weight;
977         }
978
979         return weight;
980 }
981
982 /* Surface Evaluation */
983
984 ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
985         ccl_addr_space PathState *state, int path_flag)
986 {
987         /* If path is being terminated, we are tracing a shadow ray or evaluating
988          * emission, then we don't need to store closures. The emission and shadow
989          * shader data also do not have a closure array to save GPU memory. */
990         int max_closures;
991         if(path_flag & (PATH_RAY_TERMINATE|PATH_RAY_SHADOW|PATH_RAY_EMISSION)) {
992                 max_closures = 0;
993         }
994         else {
995                 max_closures = kernel_data.integrator.max_closures;
996         }
997
998         sd->num_closure = 0;
999         sd->num_closure_left = max_closures;
1000
1001 #ifdef __OSL__
1002         if(kg->osl)
1003                 OSLShader::eval_surface(kg, sd, state, path_flag);
1004         else
1005 #endif
1006         {
1007 #ifdef __SVM__
1008                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
1009 #else
1010                 DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd,
1011                                                              sizeof(DiffuseBsdf),
1012                                                              make_float3(0.8f, 0.8f, 0.8f));
1013                 if(bsdf != NULL) {
1014                         bsdf->N = sd->N;
1015                         sd->flag |= bsdf_diffuse_setup(bsdf);
1016                 }
1017 #endif
1018         }
1019
1020         if(sd->flag & SD_BSDF_NEEDS_LCG) {
1021                 sd->lcg_state = lcg_state_init_addrspace(state, 0xb4bc3953);
1022         }
1023 }
1024
1025 /* Background Evaluation */
1026
1027 ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd,
1028         ccl_addr_space PathState *state, int path_flag)
1029 {
1030         sd->num_closure = 0;
1031         sd->num_closure_left = 0;
1032
1033 #ifdef __SVM__
1034 #  ifdef __OSL__
1035         if(kg->osl) {
1036                 OSLShader::eval_background(kg, sd, state, path_flag);
1037         }
1038         else
1039 #  endif  /* __OSL__ */
1040         {
1041                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
1042         }
1043
1044         if(sd->flag & SD_EMISSION) {
1045                 return sd->closure_emission_background;
1046         }
1047         else {
1048                 return make_float3(0.0f, 0.0f, 0.0f);
1049         }
1050 #else  /* __SVM__ */
1051         return make_float3(0.8f, 0.8f, 0.8f);
1052 #endif  /* __SVM__ */
1053 }
1054
1055 /* Volume */
1056
1057 #ifdef __VOLUME__
1058
1059 ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, const float3 omega_in, float *pdf,
1060         int skip_phase, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
1061 {
1062         for(int i = 0; i < sd->num_closure; i++) {
1063                 if(i == skip_phase)
1064                         continue;
1065
1066                 const ShaderClosure *sc = &sd->closure[i];
1067
1068                 if(CLOSURE_IS_PHASE(sc->type)) {
1069                         float phase_pdf = 0.0f;
1070                         float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf);
1071
1072                         if(phase_pdf != 0.0f) {
1073                                 bsdf_eval_accum(result_eval, sc->type, eval, 1.0f);
1074                                 sum_pdf += phase_pdf*sc->sample_weight;
1075                         }
1076
1077                         sum_sample_weight += sc->sample_weight;
1078                 }
1079         }
1080
1081         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
1082 }
1083
1084 ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd,
1085         const float3 omega_in, BsdfEval *eval, float *pdf)
1086 {
1087         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
1088
1089         _shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
1090 }
1091
1092 ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *sd,
1093         float randu, float randv, BsdfEval *phase_eval,
1094         float3 *omega_in, differential3 *domega_in, float *pdf)
1095 {
1096         int sampled = 0;
1097
1098         if(sd->num_closure > 1) {
1099                 /* pick a phase closure based on sample weights */
1100                 float sum = 0.0f;
1101
1102                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
1103                         const ShaderClosure *sc = &sd->closure[sampled];
1104
1105                         if(CLOSURE_IS_PHASE(sc->type))
1106                                 sum += sc->sample_weight;
1107                 }
1108
1109                 float r = randu*sum;
1110                 float partial_sum = 0.0f;
1111
1112                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
1113                         const ShaderClosure *sc = &sd->closure[sampled];
1114
1115                         if(CLOSURE_IS_PHASE(sc->type)) {
1116                                 float next_sum = partial_sum + sc->sample_weight;
1117
1118                                 if(r <= next_sum) {
1119                                         /* Rescale to reuse for BSDF direction sample. */
1120                                         randu = (r - partial_sum) / sc->sample_weight;
1121                                         break;
1122                                 }
1123
1124                                 partial_sum = next_sum;
1125                         }
1126                 }
1127
1128                 if(sampled == sd->num_closure) {
1129                         *pdf = 0.0f;
1130                         return LABEL_NONE;
1131                 }
1132         }
1133
1134         /* todo: this isn't quite correct, we don't weight anisotropy properly
1135          * depending on color channels, even if this is perhaps not a common case */
1136         const ShaderClosure *sc = &sd->closure[sampled];
1137         int label;
1138         float3 eval;
1139
1140         *pdf = 0.0f;
1141         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
1142
1143         if(*pdf != 0.0f) {
1144                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
1145         }
1146
1147         return label;
1148 }
1149
1150 ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *sd,
1151         const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval,
1152         float3 *omega_in, differential3 *domega_in, float *pdf)
1153 {
1154         int label;
1155         float3 eval;
1156
1157         *pdf = 0.0f;
1158         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
1159
1160         if(*pdf != 0.0f)
1161                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
1162
1163         return label;
1164 }
1165
1166 /* Volume Evaluation */
1167
1168 ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
1169                                           ShaderData *sd,
1170                                           ccl_addr_space PathState *state,
1171                                           ccl_addr_space VolumeStack *stack,
1172                                           int path_flag)
1173 {
1174         /* If path is being terminated, we are tracing a shadow ray or evaluating
1175          * emission, then we don't need to store closures. The emission and shadow
1176          * shader data also do not have a closure array to save GPU memory. */
1177         int max_closures;
1178         if(path_flag & (PATH_RAY_TERMINATE|PATH_RAY_SHADOW|PATH_RAY_EMISSION)) {
1179                 max_closures = 0;
1180         }
1181         else {
1182                 max_closures = kernel_data.integrator.max_closures;
1183         }
1184
1185         /* reset closures once at the start, we will be accumulating the closures
1186          * for all volumes in the stack into a single array of closures */
1187         sd->num_closure = 0;
1188         sd->num_closure_left = max_closures;
1189         sd->flag = 0;
1190         sd->object_flag = 0;
1191
1192         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1193                 /* setup shaderdata from stack. it's mostly setup already in
1194                  * shader_setup_from_volume, this switching should be quick */
1195                 sd->object = stack[i].object;
1196                 sd->lamp = LAMP_NONE;
1197                 sd->shader = stack[i].shader;
1198
1199                 sd->flag &= ~SD_SHADER_FLAGS;
1200                 sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
1201                 sd->object_flag &= ~SD_OBJECT_FLAGS;
1202
1203                 if(sd->object != OBJECT_NONE) {
1204                         sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object);
1205
1206 #ifdef __OBJECT_MOTION__
1207                         /* todo: this is inefficient for motion blur, we should be
1208                          * caching matrices instead of recomputing them each step */
1209                         shader_setup_object_transforms(kg, sd, sd->time);
1210 #endif
1211                 }
1212
1213                 /* evaluate shader */
1214 #ifdef __SVM__
1215 #  ifdef __OSL__
1216                 if(kg->osl) {
1217                         OSLShader::eval_volume(kg, sd, state, path_flag);
1218                 }
1219                 else
1220 #  endif
1221                 {
1222                         svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag);
1223                 }
1224 #endif
1225
1226                 /* merge closures to avoid exceeding number of closures limit */
1227                 if(i > 0)
1228                         shader_merge_closures(sd);
1229         }
1230 }
1231
1232 #endif  /* __VOLUME__ */
1233
1234 /* Displacement Evaluation */
1235
1236 ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state)
1237 {
1238         sd->num_closure = 0;
1239         sd->num_closure_left = 0;
1240
1241         /* this will modify sd->P */
1242 #ifdef __SVM__
1243 #  ifdef __OSL__
1244         if(kg->osl)
1245                 OSLShader::eval_displacement(kg, sd, state);
1246         else
1247 #  endif
1248         {
1249                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0);
1250         }
1251 #endif
1252 }
1253
1254 /* Transparent Shadows */
1255
1256 #ifdef __TRANSPARENT_SHADOWS__
1257 ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect)
1258 {
1259         int prim = kernel_tex_fetch(__prim_index, isect->prim);
1260         int shader = 0;
1261
1262 #ifdef __HAIR__
1263         if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
1264 #endif
1265                 shader = kernel_tex_fetch(__tri_shader, prim);
1266 #ifdef __HAIR__
1267         }
1268         else {
1269                 float4 str = kernel_tex_fetch(__curves, prim);
1270                 shader = __float_as_int(str.z);
1271         }
1272 #endif
1273         int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
1274
1275         return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
1276 }
1277 #endif  /* __TRANSPARENT_SHADOWS__ */
1278
1279 CCL_NAMESPACE_END