Cycles: better path termination for transparency.
[blender.git] / intern / cycles / kernel / kernel_shader.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * ShaderData, used in four steps:
19  *
20  * Setup from incoming ray, sampled position and background.
21  * Execute for surface, volume or displacement.
22  * Evaluate one or more closures.
23  * Release.
24  *
25  */
26
27 #include "kernel/closure/alloc.h"
28 #include "kernel/closure/bsdf_util.h"
29 #include "kernel/closure/bsdf.h"
30 #include "kernel/closure/emissive.h"
31
32 #include "kernel/svm/svm.h"
33
34 CCL_NAMESPACE_BEGIN
35
36 /* ShaderData setup from incoming ray */
37
38 #ifdef __OBJECT_MOTION__
39 ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
40 {
41         if(sd->object_flag & SD_OBJECT_MOTION) {
42                 sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
43                 sd->ob_itfm = transform_quick_inverse(sd->ob_tfm);
44         }
45         else {
46                 sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
47                 sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
48         }
49 }
50 #endif
51
52 ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
53                                                ShaderData *sd,
54                                                const Intersection *isect,
55                                                const Ray *ray)
56 {
57 #ifdef __INSTANCING__
58         sd->object = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
59 #endif
60         sd->lamp = LAMP_NONE;
61
62         sd->type = isect->type;
63         sd->flag = 0;
64         sd->object_flag = kernel_tex_fetch(__object_flag,
65                                                       sd->object);
66
67         /* matrices and time */
68 #ifdef __OBJECT_MOTION__
69         shader_setup_object_transforms(kg, sd, ray->time);
70 #endif
71         sd->time = ray->time;
72
73         sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
74         sd->ray_length = isect->t;
75
76 #ifdef __UV__
77         sd->u = isect->u;
78         sd->v = isect->v;
79 #endif
80
81 #ifdef __HAIR__
82         if(sd->type & PRIMITIVE_ALL_CURVE) {
83                 /* curve */
84                 float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
85
86                 sd->shader = __float_as_int(curvedata.z);
87                 sd->P = curve_refine(kg, sd, isect, ray);
88         }
89         else
90 #endif
91         if(sd->type & PRIMITIVE_TRIANGLE) {
92                 /* static triangle */
93                 float3 Ng = triangle_normal(kg, sd);
94                 sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
95
96                 /* vectors */
97                 sd->P = triangle_refine(kg, sd, isect, ray);
98                 sd->Ng = Ng;
99                 sd->N = Ng;
100
101                 /* smooth normal */
102                 if(sd->shader & SHADER_SMOOTH_NORMAL)
103                         sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
104
105 #ifdef __DPDU__
106                 /* dPdu/dPdv */
107                 triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
108 #endif
109         }
110         else {
111                 /* motion triangle */
112                 motion_triangle_shader_setup(kg, sd, isect, ray, false);
113         }
114
115         sd->I = -ray->D;
116
117         sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
118
119 #ifdef __INSTANCING__
120         if(isect->object != OBJECT_NONE) {
121                 /* instance transform */
122                 object_normal_transform_auto(kg, sd, &sd->N);
123                 object_normal_transform_auto(kg, sd, &sd->Ng);
124 #  ifdef __DPDU__
125                 object_dir_transform_auto(kg, sd, &sd->dPdu);
126                 object_dir_transform_auto(kg, sd, &sd->dPdv);
127 #  endif
128         }
129 #endif
130
131         /* backfacing test */
132         bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
133
134         if(backfacing) {
135                 sd->flag |= SD_BACKFACING;
136                 sd->Ng = -sd->Ng;
137                 sd->N = -sd->N;
138 #ifdef __DPDU__
139                 sd->dPdu = -sd->dPdu;
140                 sd->dPdv = -sd->dPdv;
141 #endif
142         }
143
144 #ifdef __RAY_DIFFERENTIALS__
145         /* differentials */
146         differential_transfer(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, isect->t);
147         differential_incoming(&sd->dI, ray->dD);
148         differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
149 #endif
150 }
151
152 /* ShaderData setup from BSSRDF scatter */
153
154 #ifdef __SUBSURFACE__
155 #  ifndef __KERNEL_CUDA__
156 ccl_device
157 #  else
158 ccl_device_inline
159 #  endif
160 void shader_setup_from_subsurface(
161         KernelGlobals *kg,
162         ShaderData *sd,
163         const Intersection *isect,
164         const Ray *ray)
165 {
166         const bool backfacing = sd->flag & SD_BACKFACING;
167
168         /* object, matrices, time, ray_length stay the same */
169         sd->flag = 0;
170         sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
171         sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
172         sd->type = isect->type;
173
174 #  ifdef __UV__
175         sd->u = isect->u;
176         sd->v = isect->v;
177 #  endif
178
179         /* fetch triangle data */
180         if(sd->type == PRIMITIVE_TRIANGLE) {
181                 float3 Ng = triangle_normal(kg, sd);
182                 sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
183
184                 /* static triangle */
185                 sd->P = triangle_refine_local(kg, sd, isect, ray);
186                 sd->Ng = Ng;
187                 sd->N = Ng;
188
189                 if(sd->shader & SHADER_SMOOTH_NORMAL)
190                         sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
191
192 #  ifdef __DPDU__
193                 /* dPdu/dPdv */
194                 triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
195 #  endif
196         }
197         else {
198                 /* motion triangle */
199                 motion_triangle_shader_setup(kg, sd, isect, ray, true);
200         }
201
202         sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
203
204 #  ifdef __INSTANCING__
205         if(isect->object != OBJECT_NONE) {
206                 /* instance transform */
207                 object_normal_transform_auto(kg, sd, &sd->N);
208                 object_normal_transform_auto(kg, sd, &sd->Ng);
209 #    ifdef __DPDU__
210                 object_dir_transform_auto(kg, sd, &sd->dPdu);
211                 object_dir_transform_auto(kg, sd, &sd->dPdv);
212 #    endif
213         }
214 #  endif
215
216         /* backfacing test */
217         if(backfacing) {
218                 sd->flag |= SD_BACKFACING;
219                 sd->Ng = -sd->Ng;
220                 sd->N = -sd->N;
221 #  ifdef __DPDU__
222                 sd->dPdu = -sd->dPdu;
223                 sd->dPdv = -sd->dPdv;
224 #  endif
225         }
226
227         /* should not get used in principle as the shading will only use a diffuse
228          * BSDF, but the shader might still access it */
229         sd->I = sd->N;
230
231 #  ifdef __RAY_DIFFERENTIALS__
232         /* differentials */
233         differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
234         /* don't modify dP and dI */
235 #  endif
236 }
237 #endif
238
239 /* ShaderData setup from position sampled on mesh */
240
241 ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
242                                                 ShaderData *sd,
243                                                 const float3 P,
244                                                 const float3 Ng,
245                                                 const float3 I,
246                                                 int shader, int object, int prim,
247                                                 float u, float v, float t,
248                                                 float time,
249                                                 bool object_space,
250                                                 int lamp)
251 {
252         /* vectors */
253         sd->P = P;
254         sd->N = Ng;
255         sd->Ng = Ng;
256         sd->I = I;
257         sd->shader = shader;
258         if(prim != PRIM_NONE)
259                 sd->type = PRIMITIVE_TRIANGLE;
260         else if(lamp != LAMP_NONE)
261                 sd->type = PRIMITIVE_LAMP;
262         else
263                 sd->type = PRIMITIVE_NONE;
264
265         /* primitive */
266 #ifdef __INSTANCING__
267         sd->object = object;
268 #endif
269         sd->lamp = LAMP_NONE;
270         /* currently no access to bvh prim index for strand sd->prim*/
271         sd->prim = prim;
272 #ifdef __UV__
273         sd->u = u;
274         sd->v = v;
275 #endif
276         sd->time = time;
277         sd->ray_length = t;
278
279         sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
280         sd->object_flag = 0;
281         if(sd->object != OBJECT_NONE) {
282                 sd->object_flag |= kernel_tex_fetch(__object_flag,
283                                                     sd->object);
284
285 #ifdef __OBJECT_MOTION__
286                 shader_setup_object_transforms(kg, sd, time);
287         }
288         else if(lamp != LAMP_NONE) {
289                 sd->ob_tfm  = lamp_fetch_transform(kg, lamp, false);
290                 sd->ob_itfm = lamp_fetch_transform(kg, lamp, true);
291                 sd->lamp = lamp;
292 #endif
293         }
294
295         /* transform into world space */
296         if(object_space) {
297                 object_position_transform_auto(kg, sd, &sd->P);
298                 object_normal_transform_auto(kg, sd, &sd->Ng);
299                 sd->N = sd->Ng;
300                 object_dir_transform_auto(kg, sd, &sd->I);
301         }
302
303         if(sd->type & PRIMITIVE_TRIANGLE) {
304                 /* smooth normal */
305                 if(sd->shader & SHADER_SMOOTH_NORMAL) {
306                         sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
307
308 #ifdef __INSTANCING__
309                         if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
310                                 object_normal_transform_auto(kg, sd, &sd->N);
311                         }
312 #endif
313                 }
314
315                 /* dPdu/dPdv */
316 #ifdef __DPDU__
317                 triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
318
319 #  ifdef __INSTANCING__
320                 if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
321                         object_dir_transform_auto(kg, sd, &sd->dPdu);
322                         object_dir_transform_auto(kg, sd, &sd->dPdv);
323                 }
324 #  endif
325 #endif
326         }
327         else {
328 #ifdef __DPDU__
329                 sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
330                 sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
331 #endif
332         }
333
334         /* backfacing test */
335         if(sd->prim != PRIM_NONE) {
336                 bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
337
338                 if(backfacing) {
339                         sd->flag |= SD_BACKFACING;
340                         sd->Ng = -sd->Ng;
341                         sd->N = -sd->N;
342 #ifdef __DPDU__
343                         sd->dPdu = -sd->dPdu;
344                         sd->dPdv = -sd->dPdv;
345 #endif
346                 }
347         }
348
349 #ifdef __RAY_DIFFERENTIALS__
350         /* no ray differentials here yet */
351         sd->dP = differential3_zero();
352         sd->dI = differential3_zero();
353         sd->du = differential_zero();
354         sd->dv = differential_zero();
355 #endif
356 }
357
358 /* ShaderData setup for displacement */
359
360 ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
361         int object, int prim, float u, float v)
362 {
363         float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
364         int shader;
365
366         triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
367
368         /* force smooth shading for displacement */
369         shader |= SHADER_SMOOTH_NORMAL;
370
371         shader_setup_from_sample(kg, sd,
372                                  P, Ng, I,
373                                  shader, object, prim,
374                                  u, v, 0.0f, 0.5f,
375                                  !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
376                                  LAMP_NONE);
377 }
378
379 /* ShaderData setup from ray into background */
380
381 ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
382 {
383         /* vectors */
384         sd->P = ray->D;
385         sd->N = -ray->D;
386         sd->Ng = -ray->D;
387         sd->I = -ray->D;
388         sd->shader = kernel_data.background.surface_shader;
389         sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
390         sd->object_flag = 0;
391         sd->time = ray->time;
392         sd->ray_length = 0.0f;
393
394 #ifdef __INSTANCING__
395         sd->object = PRIM_NONE;
396 #endif
397         sd->lamp = LAMP_NONE;
398         sd->prim = PRIM_NONE;
399 #ifdef __UV__
400         sd->u = 0.0f;
401         sd->v = 0.0f;
402 #endif
403
404 #ifdef __DPDU__
405         /* dPdu/dPdv */
406         sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
407         sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
408 #endif
409
410 #ifdef __RAY_DIFFERENTIALS__
411         /* differentials */
412         sd->dP = ray->dD;
413         differential_incoming(&sd->dI, sd->dP);
414         sd->du = differential_zero();
415         sd->dv = differential_zero();
416 #endif
417 }
418
419 /* ShaderData setup from point inside volume */
420
421 #ifdef __VOLUME__
422 ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
423 {
424         /* vectors */
425         sd->P = ray->P;
426         sd->N = -ray->D;
427         sd->Ng = -ray->D;
428         sd->I = -ray->D;
429         sd->shader = SHADER_NONE;
430         sd->flag = 0;
431         sd->object_flag = 0;
432         sd->time = ray->time;
433         sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
434
435 #  ifdef __INSTANCING__
436         sd->object = PRIM_NONE; /* todo: fill this for texture coordinates */
437 #  endif
438         sd->lamp = LAMP_NONE;
439         sd->prim = PRIM_NONE;
440         sd->type = PRIMITIVE_NONE;
441
442 #  ifdef __UV__
443         sd->u = 0.0f;
444         sd->v = 0.0f;
445 #  endif
446
447 #  ifdef __DPDU__
448         /* dPdu/dPdv */
449         sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
450         sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
451 #  endif
452
453 #  ifdef __RAY_DIFFERENTIALS__
454         /* differentials */
455         sd->dP = ray->dD;
456         differential_incoming(&sd->dI, sd->dP);
457         sd->du = differential_zero();
458         sd->dv = differential_zero();
459 #  endif
460
461         /* for NDC coordinates */
462         sd->ray_P = ray->P;
463         sd->ray_dP = ray->dP;
464 }
465 #endif  /* __VOLUME__ */
466
467 /* Merging */
468
469 #if defined(__BRANCHED_PATH__) || defined(__VOLUME__)
470 ccl_device_inline void shader_merge_closures(ShaderData *sd)
471 {
472         /* merge identical closures, better when we sample a single closure at a time */
473         for(int i = 0; i < sd->num_closure; i++) {
474                 ShaderClosure *sci = &sd->closure[i];
475
476                 for(int j = i + 1; j < sd->num_closure; j++) {
477                         ShaderClosure *scj = &sd->closure[j];
478
479                         if(sci->type != scj->type)
480                                 continue;
481                         if(!bsdf_merge(sci, scj))
482                                 continue;
483
484                         sci->weight += scj->weight;
485                         sci->sample_weight += scj->sample_weight;
486
487                         int size = sd->num_closure - (j+1);
488                         if(size > 0) {
489                                 for(int k = 0; k < size; k++) {
490                                         scj[k] = scj[k+1];
491                                 }
492                         }
493
494                         sd->num_closure--;
495                         kernel_assert(sd->num_closure >= 0);
496                         j--;
497                 }
498         }
499 }
500 #endif  /* __BRANCHED_PATH__ || __VOLUME__ */
501
502 /* Defensive sampling. */
503
504 ccl_device_inline void shader_prepare_closures(ShaderData *sd,
505                                                ccl_addr_space PathState *state)
506 {
507         /* We can likely also do defensive sampling at deeper bounces, particularly
508          * for cases like a perfect mirror but possibly also others. This will need
509          * a good heuristic. */
510         if(state->bounce + state->transparent_bounce == 0 && sd->num_closure > 1) {
511                 float sum = 0.0f;
512
513                 for(int i = 0; i < sd->num_closure; i++) {
514                         ShaderClosure *sc = &sd->closure[i];
515                         if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
516                                 sum += sc->sample_weight;
517                         }
518                 }
519
520                 for(int i = 0; i < sd->num_closure; i++) {
521                         ShaderClosure *sc = &sd->closure[i];
522                         if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
523                                 sc->sample_weight = max(sc->sample_weight, 0.125f * sum);
524                         }
525                 }
526         }
527 }
528
529
530 /* BSDF */
531
532 ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, ShaderData *sd, const float3 omega_in, float *pdf,
533         const ShaderClosure *skip_sc, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
534 {
535         /* this is the veach one-sample model with balance heuristic, some pdf
536          * factors drop out when using balance heuristic weighting */
537         for(int i = 0; i < sd->num_closure; i++) {
538                 const ShaderClosure *sc = &sd->closure[i];
539
540                 if(sc != skip_sc && CLOSURE_IS_BSDF(sc->type)) {
541                         float bsdf_pdf = 0.0f;
542                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
543
544                         if(bsdf_pdf != 0.0f) {
545                                 bsdf_eval_accum(result_eval, sc->type, eval*sc->weight, 1.0f);
546                                 sum_pdf += bsdf_pdf*sc->sample_weight;
547                         }
548
549                         sum_sample_weight += sc->sample_weight;
550                 }
551         }
552
553         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
554 }
555
556 #ifdef __BRANCHED_PATH__
557 ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
558                                                         ShaderData *sd,
559                                                         const float3 omega_in,
560                                                         BsdfEval *result_eval,
561                                                         float light_pdf,
562                                                         bool use_mis)
563 {
564         for(int i = 0; i < sd->num_closure; i++) {
565                 const ShaderClosure *sc = &sd->closure[i];
566                 if(CLOSURE_IS_BSDF(sc->type)) {
567                         float bsdf_pdf = 0.0f;
568                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
569                         if(bsdf_pdf != 0.0f) {
570                                 float mis_weight = use_mis? power_heuristic(light_pdf, bsdf_pdf): 1.0f;
571                                 bsdf_eval_accum(result_eval,
572                                                 sc->type,
573                                                 eval * sc->weight,
574                                                 mis_weight);
575                         }
576                 }
577         }
578 }
579 #endif  /* __BRANCHED_PATH__ */
580
581
582 #ifndef __KERNEL_CUDA__
583 ccl_device
584 #else
585 ccl_device_inline
586 #endif
587 void shader_bsdf_eval(KernelGlobals *kg,
588                       ShaderData *sd,
589                       const float3 omega_in,
590                       BsdfEval *eval,
591                       float light_pdf,
592                       bool use_mis)
593 {
594         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
595
596 #ifdef __BRANCHED_PATH__
597         if(kernel_data.integrator.branched)
598                 _shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis);
599         else
600 #endif
601         {
602                 float pdf;
603                 _shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, NULL, eval, 0.0f, 0.0f);
604                 if(use_mis) {
605                         float weight = power_heuristic(light_pdf, pdf);
606                         bsdf_eval_mis(eval, weight);
607                 }
608         }
609 }
610
611 ccl_device_inline const ShaderClosure *shader_bsdf_pick(ShaderData *sd,
612                                                         float *randu)
613 {
614         /* Note the sampling here must match shader_bssrdf_pick,
615          * since we reuse the same random number. */
616         int sampled = 0;
617
618         if(sd->num_closure > 1) {
619                 /* Pick a BSDF or based on sample weights. */
620                 float sum = 0.0f;
621
622                 for(int i = 0; i < sd->num_closure; i++) {
623                         const ShaderClosure *sc = &sd->closure[i];
624
625                         if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
626                                 sum += sc->sample_weight;
627                         }
628                 }
629
630                 float r = (*randu)*sum;
631                 float partial_sum = 0.0f;
632
633                 for(int i = 0; i < sd->num_closure; i++) {
634                         const ShaderClosure *sc = &sd->closure[i];
635
636                         if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
637                                 float next_sum = partial_sum + sc->sample_weight;
638
639                                 if(r < next_sum) {
640                                         sampled = i;
641
642                                         /* Rescale to reuse for direction sample, to better
643                                          * preserve stratifaction. */
644                                         *randu = (r - partial_sum) / sc->sample_weight;
645                                         break;
646                                 }
647
648                                 partial_sum = next_sum;
649                         }
650                 }
651         }
652
653         const ShaderClosure *sc = &sd->closure[sampled];
654         return CLOSURE_IS_BSDF(sc->type)? sc: NULL;
655 }
656
657 ccl_device_inline const ShaderClosure *shader_bssrdf_pick(ShaderData *sd,
658                                                           ccl_addr_space float3 *throughput,
659                                                           float *randu)
660 {
661         /* Note the sampling here must match shader_bsdf_pick,
662          * since we reuse the same random number. */
663         int sampled = 0;
664
665         if(sd->num_closure > 1) {
666                 /* Pick a BSDF or BSSRDF or based on sample weights. */
667                 float sum_bsdf = 0.0f;
668                 float sum_bssrdf = 0.0f;
669
670                 for(int i = 0; i < sd->num_closure; i++) {
671                         const ShaderClosure *sc = &sd->closure[i];
672
673                         if(CLOSURE_IS_BSDF(sc->type)) {
674                                 sum_bsdf += sc->sample_weight;
675                         }
676                         else if(CLOSURE_IS_BSSRDF(sc->type)) {
677                                 sum_bssrdf += sc->sample_weight;
678                         }
679                 }
680
681                 float r = (*randu)*(sum_bsdf + sum_bssrdf);
682                 float partial_sum = 0.0f;
683
684                 for(int i = 0; i < sd->num_closure; i++) {
685                         const ShaderClosure *sc = &sd->closure[i];
686
687                         if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
688                                 float next_sum = partial_sum + sc->sample_weight;
689
690                                 if(r < next_sum) {
691                                         if(CLOSURE_IS_BSDF(sc->type)) {
692                                                 *throughput *= (sum_bsdf + sum_bssrdf) / sum_bsdf;
693                                                 return NULL;
694                                         }
695                                         else {
696                                                 *throughput *= (sum_bsdf + sum_bssrdf) / sum_bssrdf;
697                                                 sampled = i;
698
699                                                 /* Rescale to reuse for direction sample, to better
700                                                  * preserve stratifaction. */
701                                                 *randu = (r - partial_sum) / sc->sample_weight;
702                                                 break;
703                                         }
704                                 }
705
706                                 partial_sum = next_sum;
707                         }
708                 }
709         }
710
711         const ShaderClosure *sc = &sd->closure[sampled];
712         return CLOSURE_IS_BSSRDF(sc->type)? sc: NULL;
713 }
714
715 ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
716                                          ShaderData *sd,
717                                          float randu, float randv,
718                                          BsdfEval *bsdf_eval,
719                                          float3 *omega_in,
720                                          differential3 *domega_in,
721                                          float *pdf)
722 {
723         const ShaderClosure *sc = shader_bsdf_pick(sd, &randu);
724         if(sc == NULL) {
725                 *pdf = 0.0f;
726                 return LABEL_NONE;
727         }
728
729         /* BSSRDF should already have been handled elsewhere. */
730         kernel_assert(CLOSURE_IS_BSDF(sc->type));
731
732         int label;
733         float3 eval;
734
735         *pdf = 0.0f;
736         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
737
738         if(*pdf != 0.0f) {
739                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
740
741                 if(sd->num_closure > 1) {
742                         float sweight = sc->sample_weight;
743                         _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sc, bsdf_eval, *pdf*sweight, sweight);
744                 }
745         }
746
747         return label;
748 }
749
750 ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd,
751         const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval,
752         float3 *omega_in, differential3 *domega_in, float *pdf)
753 {
754         int label;
755         float3 eval;
756
757         *pdf = 0.0f;
758         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
759
760         if(*pdf != 0.0f)
761                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
762
763         return label;
764 }
765
766 ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness)
767 {
768         for(int i = 0; i < sd->num_closure; i++) {
769                 ShaderClosure *sc = &sd->closure[i];
770
771                 if(CLOSURE_IS_BSDF(sc->type))
772                         bsdf_blur(kg, sc, roughness);
773         }
774 }
775
776 ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, const ShaderData *sd)
777 {
778         if(sd->flag & SD_HAS_ONLY_VOLUME) {
779                 return make_float3(1.0f, 1.0f, 1.0f);
780         }
781         else if(sd->flag & SD_TRANSPARENT) {
782                 return sd->closure_transparent_extinction;
783         }
784         else {
785                 return make_float3(0.0f, 0.0f, 0.0f);
786         }
787 }
788
789 ccl_device void shader_bsdf_disable_transparency(KernelGlobals *kg, ShaderData *sd)
790 {
791         if(sd->flag & SD_TRANSPARENT) {
792                 for(int i = 0; i < sd->num_closure; i++) {
793                         ShaderClosure *sc = &sd->closure[i];
794
795                         if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
796                                 sc->sample_weight = 0.0f;
797                                 sc->weight = make_float3(0.0f, 0.0f, 0.0f);
798                         }
799                 }
800
801                 sd->flag &= ~SD_TRANSPARENT;
802         }
803 }
804
805 ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd)
806 {
807         float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd);
808
809         alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f));
810         alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f));
811
812         return alpha;
813 }
814
815 ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
816 {
817         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
818
819         for(int i = 0; i < sd->num_closure; i++) {
820                 ShaderClosure *sc = &sd->closure[i];
821
822                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
823                         eval += sc->weight;
824         }
825
826         return eval;
827 }
828
829 ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd)
830 {
831         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
832
833         for(int i = 0; i < sd->num_closure; i++) {
834                 ShaderClosure *sc = &sd->closure[i];
835
836                 if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
837                         eval += sc->weight;
838         }
839
840         return eval;
841 }
842
843 ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
844 {
845         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
846
847         for(int i = 0; i < sd->num_closure; i++) {
848                 ShaderClosure *sc = &sd->closure[i];
849
850                 if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
851                         eval += sc->weight;
852         }
853
854         return eval;
855 }
856
857 ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
858 {
859         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
860
861         for(int i = 0; i < sd->num_closure; i++) {
862                 ShaderClosure *sc = &sd->closure[i];
863
864                 if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
865                         eval += sc->weight;
866         }
867
868         return eval;
869 }
870
871 ccl_device float3 shader_bsdf_average_normal(KernelGlobals *kg, ShaderData *sd)
872 {
873         float3 N = make_float3(0.0f, 0.0f, 0.0f);
874
875         for(int i = 0; i < sd->num_closure; i++) {
876                 ShaderClosure *sc = &sd->closure[i];
877                 if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
878                         N += sc->N*average(sc->weight);
879         }
880
881         return (is_zero(N))? sd->N : normalize(N);
882 }
883
884 ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_)
885 {
886         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
887         float3 N = make_float3(0.0f, 0.0f, 0.0f);
888
889         for(int i = 0; i < sd->num_closure; i++) {
890                 ShaderClosure *sc = &sd->closure[i];
891
892                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
893                         const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
894                         eval += sc->weight*ao_factor;
895                         N += bsdf->N*average(sc->weight);
896                 }
897                 else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) {
898                         eval += sc->weight;
899                         N += sd->N*average(sc->weight);
900                 }
901         }
902
903         *N_ = (is_zero(N))? sd->N : normalize(N);
904         return eval;
905 }
906
907 #ifdef __SUBSURFACE__
908 ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_)
909 {
910         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
911         float3 N = make_float3(0.0f, 0.0f, 0.0f);
912         float texture_blur = 0.0f, weight_sum = 0.0f;
913
914         for(int i = 0; i < sd->num_closure; i++) {
915                 ShaderClosure *sc = &sd->closure[i];
916
917                 if(CLOSURE_IS_BSSRDF(sc->type)) {
918                         const Bssrdf *bssrdf = (const Bssrdf*)sc;
919                         float avg_weight = fabsf(average(sc->weight));
920
921                         N += bssrdf->N*avg_weight;
922                         eval += sc->weight;
923                         texture_blur += bssrdf->texture_blur*avg_weight;
924                         weight_sum += avg_weight;
925                 }
926         }
927
928         if(N_)
929                 *N_ = (is_zero(N))? sd->N: normalize(N);
930
931         if(texture_blur_)
932                 *texture_blur_ = safe_divide(texture_blur, weight_sum);
933
934         return eval;
935 }
936 #endif  /* __SUBSURFACE__ */
937
938 /* Emission */
939
940 ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd)
941 {
942         if(sd->flag & SD_EMISSION) {
943                 return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background;
944         }
945         else {
946                 return make_float3(0.0f, 0.0f, 0.0f);
947         }
948 }
949
950 /* Holdout */
951
952 ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
953 {
954         float3 weight = make_float3(0.0f, 0.0f, 0.0f);
955
956         for(int i = 0; i < sd->num_closure; i++) {
957                 ShaderClosure *sc = &sd->closure[i];
958
959                 if(CLOSURE_IS_HOLDOUT(sc->type))
960                         weight += sc->weight;
961         }
962
963         return weight;
964 }
965
966 /* Surface Evaluation */
967
968 ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
969         ccl_addr_space PathState *state, int path_flag)
970 {
971         /* If path is being terminated, we are tracing a shadow ray or evaluating
972          * emission, then we don't need to store closures. The emission and shadow
973          * shader data also do not have a closure array to save GPU memory. */
974         int max_closures;
975         if(path_flag & (PATH_RAY_TERMINATE|PATH_RAY_SHADOW|PATH_RAY_EMISSION)) {
976                 max_closures = 0;
977         }
978         else {
979                 max_closures = kernel_data.integrator.max_closures;
980         }
981
982         sd->num_closure = 0;
983         sd->num_closure_left = max_closures;
984
985 #ifdef __OSL__
986         if(kg->osl)
987                 OSLShader::eval_surface(kg, sd, state, path_flag);
988         else
989 #endif
990         {
991 #ifdef __SVM__
992                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
993 #else
994                 DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd,
995                                                              sizeof(DiffuseBsdf),
996                                                              make_float3(0.8f, 0.8f, 0.8f));
997                 bsdf->N = sd->N;
998                 sd->flag |= bsdf_diffuse_setup(bsdf);
999 #endif
1000         }
1001
1002         if(sd->flag & SD_BSDF_NEEDS_LCG) {
1003                 sd->lcg_state = lcg_state_init_addrspace(state, 0xb4bc3953);
1004         }
1005 }
1006
1007 /* Background Evaluation */
1008
1009 ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd,
1010         ccl_addr_space PathState *state, int path_flag)
1011 {
1012         sd->num_closure = 0;
1013         sd->num_closure_left = 0;
1014
1015 #ifdef __SVM__
1016 #  ifdef __OSL__
1017         if(kg->osl) {
1018                 OSLShader::eval_background(kg, sd, state, path_flag);
1019         }
1020         else
1021 #  endif  /* __OSL__ */
1022         {
1023                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
1024         }
1025
1026         if(sd->flag & SD_EMISSION) {
1027                 return sd->closure_emission_background;
1028         }
1029         else {
1030                 return make_float3(0.0f, 0.0f, 0.0f);
1031         }
1032 #else  /* __SVM__ */
1033         return make_float3(0.8f, 0.8f, 0.8f);
1034 #endif  /* __SVM__ */
1035 }
1036
1037 /* Volume */
1038
1039 #ifdef __VOLUME__
1040
1041 ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, const float3 omega_in, float *pdf,
1042         int skip_phase, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
1043 {
1044         for(int i = 0; i < sd->num_closure; i++) {
1045                 if(i == skip_phase)
1046                         continue;
1047
1048                 const ShaderClosure *sc = &sd->closure[i];
1049
1050                 if(CLOSURE_IS_PHASE(sc->type)) {
1051                         float phase_pdf = 0.0f;
1052                         float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf);
1053
1054                         if(phase_pdf != 0.0f) {
1055                                 bsdf_eval_accum(result_eval, sc->type, eval, 1.0f);
1056                                 sum_pdf += phase_pdf*sc->sample_weight;
1057                         }
1058
1059                         sum_sample_weight += sc->sample_weight;
1060                 }
1061         }
1062
1063         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
1064 }
1065
1066 ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd,
1067         const float3 omega_in, BsdfEval *eval, float *pdf)
1068 {
1069         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
1070
1071         _shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
1072 }
1073
1074 ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *sd,
1075         float randu, float randv, BsdfEval *phase_eval,
1076         float3 *omega_in, differential3 *domega_in, float *pdf)
1077 {
1078         int sampled = 0;
1079
1080         if(sd->num_closure > 1) {
1081                 /* pick a phase closure based on sample weights */
1082                 float sum = 0.0f;
1083
1084                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
1085                         const ShaderClosure *sc = &sd->closure[sampled];
1086
1087                         if(CLOSURE_IS_PHASE(sc->type))
1088                                 sum += sc->sample_weight;
1089                 }
1090
1091                 float r = randu*sum;
1092                 float partial_sum = 0.0f;
1093
1094                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
1095                         const ShaderClosure *sc = &sd->closure[sampled];
1096
1097                         if(CLOSURE_IS_PHASE(sc->type)) {
1098                                 float next_sum = partial_sum + sc->sample_weight;
1099
1100                                 if(r <= next_sum) {
1101                                         /* Rescale to reuse for BSDF direction sample. */
1102                                         randu = (r - partial_sum) / sc->sample_weight;
1103                                         break;
1104                                 }
1105
1106                                 partial_sum = next_sum;
1107                         }
1108                 }
1109
1110                 if(sampled == sd->num_closure) {
1111                         *pdf = 0.0f;
1112                         return LABEL_NONE;
1113                 }
1114         }
1115
1116         /* todo: this isn't quite correct, we don't weight anisotropy properly
1117          * depending on color channels, even if this is perhaps not a common case */
1118         const ShaderClosure *sc = &sd->closure[sampled];
1119         int label;
1120         float3 eval;
1121
1122         *pdf = 0.0f;
1123         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
1124
1125         if(*pdf != 0.0f) {
1126                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
1127         }
1128
1129         return label;
1130 }
1131
1132 ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *sd,
1133         const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval,
1134         float3 *omega_in, differential3 *domega_in, float *pdf)
1135 {
1136         int label;
1137         float3 eval;
1138
1139         *pdf = 0.0f;
1140         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
1141
1142         if(*pdf != 0.0f)
1143                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
1144
1145         return label;
1146 }
1147
1148 /* Volume Evaluation */
1149
1150 ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
1151                                           ShaderData *sd,
1152                                           ccl_addr_space PathState *state,
1153                                           ccl_addr_space VolumeStack *stack,
1154                                           int path_flag)
1155 {
1156         /* If path is being terminated, we are tracing a shadow ray or evaluating
1157          * emission, then we don't need to store closures. The emission and shadow
1158          * shader data also do not have a closure array to save GPU memory. */
1159         int max_closures;
1160         if(path_flag & (PATH_RAY_TERMINATE|PATH_RAY_SHADOW|PATH_RAY_EMISSION)) {
1161                 max_closures = 0;
1162         }
1163         else {
1164                 max_closures = kernel_data.integrator.max_closures;
1165         }
1166
1167         /* reset closures once at the start, we will be accumulating the closures
1168          * for all volumes in the stack into a single array of closures */
1169         sd->num_closure = 0;
1170         sd->num_closure_left = max_closures;
1171         sd->flag = 0;
1172         sd->object_flag = 0;
1173
1174         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1175                 /* setup shaderdata from stack. it's mostly setup already in
1176                  * shader_setup_from_volume, this switching should be quick */
1177                 sd->object = stack[i].object;
1178                 sd->lamp = LAMP_NONE;
1179                 sd->shader = stack[i].shader;
1180
1181                 sd->flag &= ~SD_SHADER_FLAGS;
1182                 sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
1183                 sd->object_flag &= ~SD_OBJECT_FLAGS;
1184
1185                 if(sd->object != OBJECT_NONE) {
1186                         sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object);
1187
1188 #ifdef __OBJECT_MOTION__
1189                         /* todo: this is inefficient for motion blur, we should be
1190                          * caching matrices instead of recomputing them each step */
1191                         shader_setup_object_transforms(kg, sd, sd->time);
1192 #endif
1193                 }
1194
1195                 /* evaluate shader */
1196 #ifdef __SVM__
1197 #  ifdef __OSL__
1198                 if(kg->osl) {
1199                         OSLShader::eval_volume(kg, sd, state, path_flag);
1200                 }
1201                 else
1202 #  endif
1203                 {
1204                         svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag);
1205                 }
1206 #endif
1207
1208                 /* merge closures to avoid exceeding number of closures limit */
1209                 if(i > 0)
1210                         shader_merge_closures(sd);
1211         }
1212 }
1213
1214 #endif  /* __VOLUME__ */
1215
1216 /* Displacement Evaluation */
1217
1218 ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state)
1219 {
1220         sd->num_closure = 0;
1221         sd->num_closure_left = 0;
1222
1223         /* this will modify sd->P */
1224 #ifdef __SVM__
1225 #  ifdef __OSL__
1226         if(kg->osl)
1227                 OSLShader::eval_displacement(kg, sd, state);
1228         else
1229 #  endif
1230         {
1231                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0);
1232         }
1233 #endif
1234 }
1235
1236 /* Transparent Shadows */
1237
1238 #ifdef __TRANSPARENT_SHADOWS__
1239 ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect)
1240 {
1241         int prim = kernel_tex_fetch(__prim_index, isect->prim);
1242         int shader = 0;
1243
1244 #ifdef __HAIR__
1245         if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
1246 #endif
1247                 shader = kernel_tex_fetch(__tri_shader, prim);
1248 #ifdef __HAIR__
1249         }
1250         else {
1251                 float4 str = kernel_tex_fetch(__curves, prim);
1252                 shader = __float_as_int(str.z);
1253         }
1254 #endif
1255         int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE);
1256
1257         return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
1258 }
1259 #endif  /* __TRANSPARENT_SHADOWS__ */
1260
1261 CCL_NAMESPACE_END