Code cleanup: deduplicate some branched and split kernel code.
[blender-staging.git] / intern / cycles / kernel / kernel_shader.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * ShaderData, used in four steps:
19  *
20  * Setup from incoming ray, sampled position and background.
21  * Execute for surface, volume or displacement.
22  * Evaluate one or more closures.
23  * Release.
24  *
25  */
26
27 #include "kernel/closure/alloc.h"
28 #include "kernel/closure/bsdf_util.h"
29 #include "kernel/closure/bsdf.h"
30 #include "kernel/closure/emissive.h"
31
32 #include "kernel/svm/svm.h"
33
34 CCL_NAMESPACE_BEGIN
35
36 /* ShaderData setup from incoming ray */
37
38 #ifdef __OBJECT_MOTION__
39 ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
40 {
41         if(sd->object_flag & SD_OBJECT_MOTION) {
42                 sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
43                 sd->ob_itfm = transform_quick_inverse(sd->ob_tfm);
44         }
45         else {
46                 sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
47                 sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
48         }
49 }
50 #endif
51
52 ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
53                                                ShaderData *sd,
54                                                const Intersection *isect,
55                                                const Ray *ray)
56 {
57 #ifdef __INSTANCING__
58         sd->object = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
59 #endif
60
61         sd->type = isect->type;
62         sd->flag = 0;
63         sd->object_flag = kernel_tex_fetch(__object_flag,
64                                                       sd->object);
65
66         /* matrices and time */
67 #ifdef __OBJECT_MOTION__
68         shader_setup_object_transforms(kg, sd, ray->time);
69 #endif
70         sd->time = ray->time;
71
72         sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
73         sd->ray_length = isect->t;
74
75 #ifdef __UV__
76         sd->u = isect->u;
77         sd->v = isect->v;
78 #endif
79
80 #ifdef __HAIR__
81         if(sd->type & PRIMITIVE_ALL_CURVE) {
82                 /* curve */
83                 float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
84
85                 sd->shader = __float_as_int(curvedata.z);
86                 sd->P = curve_refine(kg, sd, isect, ray);
87         }
88         else
89 #endif
90         if(sd->type & PRIMITIVE_TRIANGLE) {
91                 /* static triangle */
92                 float3 Ng = triangle_normal(kg, sd);
93                 sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
94
95                 /* vectors */
96                 sd->P = triangle_refine(kg, sd, isect, ray);
97                 sd->Ng = Ng;
98                 sd->N = Ng;
99                 
100                 /* smooth normal */
101                 if(sd->shader & SHADER_SMOOTH_NORMAL)
102                         sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
103
104 #ifdef __DPDU__
105                 /* dPdu/dPdv */
106                 triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
107 #endif
108         }
109         else {
110                 /* motion triangle */
111                 motion_triangle_shader_setup(kg, sd, isect, ray, false);
112         }
113
114         sd->I = -ray->D;
115
116         sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
117
118 #ifdef __INSTANCING__
119         if(isect->object != OBJECT_NONE) {
120                 /* instance transform */
121                 object_normal_transform_auto(kg, sd, &sd->N);
122                 object_normal_transform_auto(kg, sd, &sd->Ng);
123 #  ifdef __DPDU__
124                 object_dir_transform_auto(kg, sd, &sd->dPdu);
125                 object_dir_transform_auto(kg, sd, &sd->dPdv);
126 #  endif
127         }
128 #endif
129
130         /* backfacing test */
131         bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
132
133         if(backfacing) {
134                 sd->flag |= SD_BACKFACING;
135                 sd->Ng = -sd->Ng;
136                 sd->N = -sd->N;
137 #ifdef __DPDU__
138                 sd->dPdu = -sd->dPdu;
139                 sd->dPdv = -sd->dPdv;
140 #endif
141         }
142
143 #ifdef __RAY_DIFFERENTIALS__
144         /* differentials */
145         differential_transfer(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, isect->t);
146         differential_incoming(&sd->dI, ray->dD);
147         differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
148 #endif
149 }
150
151 /* ShaderData setup from BSSRDF scatter */
152
153 #ifdef __SUBSURFACE__
154 #  ifndef __KERNEL_CUDA__
155 ccl_device
156 #  else
157 ccl_device_inline
158 #  endif
159 void shader_setup_from_subsurface(
160         KernelGlobals *kg,
161         ShaderData *sd,
162         const Intersection *isect,
163         const Ray *ray)
164 {
165         const bool backfacing = sd->flag & SD_BACKFACING;
166
167         /* object, matrices, time, ray_length stay the same */
168         sd->flag = 0;
169         sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
170         sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
171         sd->type = isect->type;
172
173 #  ifdef __UV__
174         sd->u = isect->u;
175         sd->v = isect->v;
176 #  endif
177
178         /* fetch triangle data */
179         if(sd->type == PRIMITIVE_TRIANGLE) {
180                 float3 Ng = triangle_normal(kg, sd);
181                 sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
182
183                 /* static triangle */
184                 sd->P = triangle_refine_subsurface(kg, sd, isect, ray);
185                 sd->Ng = Ng;
186                 sd->N = Ng;
187
188                 if(sd->shader & SHADER_SMOOTH_NORMAL)
189                         sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
190
191 #  ifdef __DPDU__
192                 /* dPdu/dPdv */
193                 triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
194 #  endif
195         }
196         else {
197                 /* motion triangle */
198                 motion_triangle_shader_setup(kg, sd, isect, ray, true);
199         }
200
201         sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
202
203 #  ifdef __INSTANCING__
204         if(isect->object != OBJECT_NONE) {
205                 /* instance transform */
206                 object_normal_transform_auto(kg, sd, &sd->N);
207                 object_normal_transform_auto(kg, sd, &sd->Ng);
208 #    ifdef __DPDU__
209                 object_dir_transform_auto(kg, sd, &sd->dPdu);
210                 object_dir_transform_auto(kg, sd, &sd->dPdv);
211 #    endif
212         }
213 #  endif
214
215         /* backfacing test */
216         if(backfacing) {
217                 sd->flag |= SD_BACKFACING;
218                 sd->Ng = -sd->Ng;
219                 sd->N = -sd->N;
220 #  ifdef __DPDU__
221                 sd->dPdu = -sd->dPdu;
222                 sd->dPdv = -sd->dPdv;
223 #  endif
224         }
225
226         /* should not get used in principle as the shading will only use a diffuse
227          * BSDF, but the shader might still access it */
228         sd->I = sd->N;
229
230 #  ifdef __RAY_DIFFERENTIALS__
231         /* differentials */
232         differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
233         /* don't modify dP and dI */
234 #  endif
235 }
236 #endif
237
238 /* ShaderData setup from position sampled on mesh */
239
240 ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
241                                                 ShaderData *sd,
242                                                 const float3 P,
243                                                 const float3 Ng,
244                                                 const float3 I,
245                                                 int shader, int object, int prim,
246                                                 float u, float v, float t,
247                                                 float time,
248                                                 bool object_space,
249                                                 int lamp)
250 {
251         /* vectors */
252         sd->P = P;
253         sd->N = Ng;
254         sd->Ng = Ng;
255         sd->I = I;
256         sd->shader = shader;
257         if(prim != PRIM_NONE)
258                 sd->type = PRIMITIVE_TRIANGLE;
259         else if(lamp != LAMP_NONE)
260                 sd->type = PRIMITIVE_LAMP;
261         else
262                 sd->type = PRIMITIVE_NONE;
263
264         /* primitive */
265 #ifdef __INSTANCING__
266         sd->object = object;
267 #endif
268         /* currently no access to bvh prim index for strand sd->prim*/
269         sd->prim = prim;
270 #ifdef __UV__
271         sd->u = u;
272         sd->v = v;
273 #endif
274         sd->time = time;
275         sd->ray_length = t;
276
277         sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
278         sd->object_flag = 0;
279         if(sd->object != OBJECT_NONE) {
280                 sd->object_flag |= kernel_tex_fetch(__object_flag,
281                                                     sd->object);
282
283 #ifdef __OBJECT_MOTION__
284                 shader_setup_object_transforms(kg, sd, time);
285         }
286         else if(lamp != LAMP_NONE) {
287                 sd->ob_tfm  = lamp_fetch_transform(kg, lamp, false);
288                 sd->ob_itfm = lamp_fetch_transform(kg, lamp, true);
289 #endif
290         }
291
292         /* transform into world space */
293         if(object_space) {
294                 object_position_transform_auto(kg, sd, &sd->P);
295                 object_normal_transform_auto(kg, sd, &sd->Ng);
296                 sd->N = sd->Ng;
297                 object_dir_transform_auto(kg, sd, &sd->I);
298         }
299
300         if(sd->type & PRIMITIVE_TRIANGLE) {
301                 /* smooth normal */
302                 if(sd->shader & SHADER_SMOOTH_NORMAL) {
303                         sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
304
305 #ifdef __INSTANCING__
306                         if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
307                                 object_normal_transform_auto(kg, sd, &sd->N);
308                         }
309 #endif
310                 }
311
312                 /* dPdu/dPdv */
313 #ifdef __DPDU__
314                 triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
315
316 #  ifdef __INSTANCING__
317                 if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
318                         object_dir_transform_auto(kg, sd, &sd->dPdu);
319                         object_dir_transform_auto(kg, sd, &sd->dPdv);
320                 }
321 #  endif
322 #endif
323         }
324         else {
325 #ifdef __DPDU__
326                 sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
327                 sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
328 #endif
329         }
330
331         /* backfacing test */
332         if(sd->prim != PRIM_NONE) {
333                 bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
334
335                 if(backfacing) {
336                         sd->flag |= SD_BACKFACING;
337                         sd->Ng = -sd->Ng;
338                         sd->N = -sd->N;
339 #ifdef __DPDU__
340                         sd->dPdu = -sd->dPdu;
341                         sd->dPdv = -sd->dPdv;
342 #endif
343                 }
344         }
345
346 #ifdef __RAY_DIFFERENTIALS__
347         /* no ray differentials here yet */
348         sd->dP = differential3_zero();
349         sd->dI = differential3_zero();
350         sd->du = differential_zero();
351         sd->dv = differential_zero();
352 #endif
353 }
354
355 /* ShaderData setup for displacement */
356
357 ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
358         int object, int prim, float u, float v)
359 {
360         float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
361         int shader;
362
363         triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
364
365         /* force smooth shading for displacement */
366         shader |= SHADER_SMOOTH_NORMAL;
367
368         shader_setup_from_sample(kg, sd,
369                                  P, Ng, I,
370                                  shader, object, prim,
371                                  u, v, 0.0f, 0.5f,
372                                  !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
373                                  LAMP_NONE);
374 }
375
376 /* ShaderData setup from ray into background */
377
378 ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
379 {
380         /* vectors */
381         sd->P = ray->D;
382         sd->N = -ray->D;
383         sd->Ng = -ray->D;
384         sd->I = -ray->D;
385         sd->shader = kernel_data.background.surface_shader;
386         sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
387         sd->object_flag = 0;
388         sd->time = ray->time;
389         sd->ray_length = 0.0f;
390
391 #ifdef __INSTANCING__
392         sd->object = PRIM_NONE;
393 #endif
394         sd->prim = PRIM_NONE;
395 #ifdef __UV__
396         sd->u = 0.0f;
397         sd->v = 0.0f;
398 #endif
399
400 #ifdef __DPDU__
401         /* dPdu/dPdv */
402         sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
403         sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
404 #endif
405
406 #ifdef __RAY_DIFFERENTIALS__
407         /* differentials */
408         sd->dP = ray->dD;
409         differential_incoming(&sd->dI, sd->dP);
410         sd->du = differential_zero();
411         sd->dv = differential_zero();
412 #endif
413 }
414
415 /* ShaderData setup from point inside volume */
416
417 #ifdef __VOLUME__
418 ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
419 {
420         /* vectors */
421         sd->P = ray->P;
422         sd->N = -ray->D;  
423         sd->Ng = -ray->D;
424         sd->I = -ray->D;
425         sd->shader = SHADER_NONE;
426         sd->flag = 0;
427         sd->object_flag = 0;
428         sd->time = ray->time;
429         sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
430
431 #ifdef __INSTANCING__
432         sd->object = PRIM_NONE; /* todo: fill this for texture coordinates */
433 #endif
434         sd->prim = PRIM_NONE;
435         sd->type = PRIMITIVE_NONE;
436
437 #ifdef __UV__
438         sd->u = 0.0f;
439         sd->v = 0.0f;
440 #endif
441
442 #ifdef __DPDU__
443         /* dPdu/dPdv */
444         sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
445         sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
446 #endif
447
448 #ifdef __RAY_DIFFERENTIALS__
449         /* differentials */
450         sd->dP = ray->dD;
451         differential_incoming(&sd->dI, sd->dP);
452         sd->du = differential_zero();
453         sd->dv = differential_zero();
454 #endif
455
456         /* for NDC coordinates */
457         sd->ray_P = ray->P;
458         sd->ray_dP = ray->dP;
459 }
460 #endif
461
462 /* Merging */
463
464 #if defined(__BRANCHED_PATH__) || defined(__VOLUME__)
465 ccl_device_inline void shader_merge_closures(ShaderData *sd)
466 {
467         /* merge identical closures, better when we sample a single closure at a time */
468         for(int i = 0; i < sd->num_closure; i++) {
469                 ShaderClosure *sci = &sd->closure[i];
470
471                 for(int j = i + 1; j < sd->num_closure; j++) {
472                         ShaderClosure *scj = &sd->closure[j];
473
474                         if(sci->type != scj->type)
475                                 continue;
476                         if(!bsdf_merge(sci, scj))
477                                 continue;
478
479                         sci->weight += scj->weight;
480                         sci->sample_weight += scj->sample_weight;
481
482                         int size = sd->num_closure - (j+1);
483                         if(size > 0) {
484                                 for(int k = 0; k < size; k++) {
485                                         scj[k] = scj[k+1];
486                                 }
487                         }
488
489                         sd->num_closure--;
490                         kernel_assert(sd->num_closure >= 0);
491                         j--;
492                 }
493         }
494 }
495 #endif
496
497 /* BSDF */
498
499 ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, ShaderData *sd, const float3 omega_in, float *pdf,
500         int skip_bsdf, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
501 {
502         /* this is the veach one-sample model with balance heuristic, some pdf
503          * factors drop out when using balance heuristic weighting */
504         for(int i = 0; i < sd->num_closure; i++) {
505                 if(i == skip_bsdf)
506                         continue;
507
508                 const ShaderClosure *sc = &sd->closure[i];
509
510                 if(CLOSURE_IS_BSDF(sc->type)) {
511                         float bsdf_pdf = 0.0f;
512                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
513
514                         if(bsdf_pdf != 0.0f) {
515                                 bsdf_eval_accum(result_eval, sc->type, eval*sc->weight, 1.0f);
516                                 sum_pdf += bsdf_pdf*sc->sample_weight;
517                         }
518
519                         sum_sample_weight += sc->sample_weight;
520                 }
521         }
522
523         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
524 }
525
526 #ifdef __BRANCHED_PATH__
527 ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
528                                                         ShaderData *sd,
529                                                         const float3 omega_in,
530                                                         BsdfEval *result_eval,
531                                                         float light_pdf,
532                                                         bool use_mis)
533 {
534         for(int i = 0; i < sd->num_closure; i++) {
535                 const ShaderClosure *sc = &sd->closure[i];
536                 if(CLOSURE_IS_BSDF(sc->type)) {
537                         float bsdf_pdf = 0.0f;
538                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
539                         if(bsdf_pdf != 0.0f) {
540                                 float mis_weight = use_mis? power_heuristic(light_pdf, bsdf_pdf): 1.0f;
541                                 bsdf_eval_accum(result_eval,
542                                                 sc->type,
543                                                 eval * sc->weight,
544                                                 mis_weight);
545                         }
546                 }
547         }
548 }
549 #endif
550
551
552 #ifndef __KERNEL_CUDA__
553 ccl_device
554 #else
555 ccl_device_inline
556 #endif
557 void shader_bsdf_eval(KernelGlobals *kg,
558                       ShaderData *sd,
559                       const float3 omega_in,
560                       BsdfEval *eval,
561                       float light_pdf,
562                       bool use_mis)
563 {
564         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
565
566 #ifdef __BRANCHED_PATH__
567         if(kernel_data.integrator.branched)
568                 _shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis);
569         else
570 #endif
571         {
572                 float pdf;
573                 _shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, -1, eval, 0.0f, 0.0f);
574                 if(use_mis) {
575                         float weight = power_heuristic(light_pdf, pdf);
576                         bsdf_eval_mis(eval, weight);
577                 }
578         }
579 }
580
581 ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
582                                          ShaderData *sd,
583                                          float randu, float randv,
584                                          BsdfEval *bsdf_eval,
585                                          float3 *omega_in,
586                                          differential3 *domega_in,
587                                          float *pdf)
588 {
589         int sampled = 0;
590
591         if(sd->num_closure > 1) {
592                 /* pick a BSDF closure based on sample weights */
593                 float sum = 0.0f;
594
595                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
596                         const ShaderClosure *sc = &sd->closure[sampled];
597                         
598                         if(CLOSURE_IS_BSDF(sc->type))
599                                 sum += sc->sample_weight;
600                 }
601
602                 float r = sd->randb_closure*sum;
603                 sum = 0.0f;
604
605                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
606                         const ShaderClosure *sc = &sd->closure[sampled];
607                         
608                         if(CLOSURE_IS_BSDF(sc->type)) {
609                                 sum += sc->sample_weight;
610
611                                 if(r <= sum)
612                                         break;
613                         }
614                 }
615
616                 if(sampled == sd->num_closure) {
617                         *pdf = 0.0f;
618                         return LABEL_NONE;
619                 }
620         }
621
622         const ShaderClosure *sc = &sd->closure[sampled];
623
624         int label;
625         float3 eval;
626
627         *pdf = 0.0f;
628         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
629
630         if(*pdf != 0.0f) {
631                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
632
633                 if(sd->num_closure > 1) {
634                         float sweight = sc->sample_weight;
635                         _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sampled, bsdf_eval, *pdf*sweight, sweight);
636                 }
637         }
638
639         return label;
640 }
641
642 ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd,
643         const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval,
644         float3 *omega_in, differential3 *domega_in, float *pdf)
645 {
646         int label;
647         float3 eval;
648
649         *pdf = 0.0f;
650         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
651
652         if(*pdf != 0.0f)
653                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
654
655         return label;
656 }
657
658 ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness)
659 {
660         for(int i = 0; i < sd->num_closure; i++) {
661                 ShaderClosure *sc = &sd->closure[i];
662
663                 if(CLOSURE_IS_BSDF(sc->type))
664                         bsdf_blur(kg, sc, roughness);
665         }
666 }
667
668 ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, const ShaderData *sd)
669 {
670         if(sd->flag & SD_HAS_ONLY_VOLUME)
671                 return make_float3(1.0f, 1.0f, 1.0f);
672
673         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
674
675         for(int i = 0; i < sd->num_closure; i++) {
676                 const ShaderClosure *sc = &sd->closure[i];
677
678                 if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) // todo: make this work for osl
679                         eval += sc->weight;
680         }
681
682         return eval;
683 }
684
685 ccl_device void shader_bsdf_disable_transparency(KernelGlobals *kg, ShaderData *sd)
686 {
687         for(int i = 0; i < sd->num_closure; i++) {
688                 ShaderClosure *sc = &sd->closure[i];
689
690                 if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
691                         sc->sample_weight = 0.0f;
692                         sc->weight = make_float3(0.0f, 0.0f, 0.0f);
693                 }
694         }
695 }
696
697 ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd)
698 {
699         float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd);
700
701         alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f));
702         alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f));
703         
704         return alpha;
705 }
706
707 ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
708 {
709         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
710
711         for(int i = 0; i < sd->num_closure; i++) {
712                 ShaderClosure *sc = &sd->closure[i];
713
714                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
715                         eval += sc->weight;
716         }
717
718         return eval;
719 }
720
721 ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd)
722 {
723         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
724
725         for(int i = 0; i < sd->num_closure; i++) {
726                 ShaderClosure *sc = &sd->closure[i];
727
728                 if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
729                         eval += sc->weight;
730         }
731
732         return eval;
733 }
734
735 ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
736 {
737         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
738
739         for(int i = 0; i < sd->num_closure; i++) {
740                 ShaderClosure *sc = &sd->closure[i];
741
742                 if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
743                         eval += sc->weight;
744         }
745
746         return eval;
747 }
748
749 ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
750 {
751         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
752
753         for(int i = 0; i < sd->num_closure; i++) {
754                 ShaderClosure *sc = &sd->closure[i];
755
756                 if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
757                         eval += sc->weight;
758         }
759
760         return eval;
761 }
762
763 ccl_device float3 shader_bsdf_average_normal(KernelGlobals *kg, ShaderData *sd)
764 {
765         float3 N = make_float3(0.0f, 0.0f, 0.0f);
766
767         for(int i = 0; i < sd->num_closure; i++) {
768                 ShaderClosure *sc = &sd->closure[i];
769                 if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
770                         N += sc->N*average(sc->weight);
771         }
772
773         return (is_zero(N))? sd->N : normalize(N);
774 }
775
776 ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_)
777 {
778         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
779         float3 N = make_float3(0.0f, 0.0f, 0.0f);
780
781         for(int i = 0; i < sd->num_closure; i++) {
782                 ShaderClosure *sc = &sd->closure[i];
783
784                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
785                         const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
786                         eval += sc->weight*ao_factor;
787                         N += bsdf->N*average(sc->weight);
788                 }
789                 else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) {
790                         eval += sc->weight;
791                         N += sd->N*average(sc->weight);
792                 }
793         }
794
795         *N_ = (is_zero(N))? sd->N : normalize(N);
796         return eval;
797 }
798
799 #ifdef __SUBSURFACE__
800 ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_)
801 {
802         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
803         float3 N = make_float3(0.0f, 0.0f, 0.0f);
804         float texture_blur = 0.0f, weight_sum = 0.0f;
805
806         for(int i = 0; i < sd->num_closure; i++) {
807                 ShaderClosure *sc = &sd->closure[i];
808
809                 if(CLOSURE_IS_BSSRDF(sc->type)) {
810                         const Bssrdf *bssrdf = (const Bssrdf*)sc;
811                         float avg_weight = fabsf(average(sc->weight));
812
813                         N += bssrdf->N*avg_weight;
814                         eval += sc->weight;
815                         texture_blur += bssrdf->texture_blur*avg_weight;
816                         weight_sum += avg_weight;
817                 }
818         }
819
820         if(N_)
821                 *N_ = (is_zero(N))? sd->N: normalize(N);
822
823         if(texture_blur_)
824                 *texture_blur_ = safe_divide(texture_blur, weight_sum);
825         
826         return eval;
827 }
828 #endif
829
830 /* Emission */
831
832 ccl_device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure *sc)
833 {
834         return emissive_simple_eval(sd->Ng, sd->I);
835 }
836
837 ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd)
838 {
839         float3 eval;
840         eval = make_float3(0.0f, 0.0f, 0.0f);
841
842         for(int i = 0; i < sd->num_closure; i++) {
843                 ShaderClosure *sc = &sd->closure[i];
844
845                 if(CLOSURE_IS_EMISSION(sc->type))
846                         eval += emissive_eval(kg, sd, sc)*sc->weight;
847         }
848
849         return eval;
850 }
851
852 /* Holdout */
853
854 ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
855 {
856         float3 weight = make_float3(0.0f, 0.0f, 0.0f);
857
858         for(int i = 0; i < sd->num_closure; i++) {
859                 ShaderClosure *sc = &sd->closure[i];
860
861                 if(CLOSURE_IS_HOLDOUT(sc->type))
862                         weight += sc->weight;
863         }
864
865         return weight;
866 }
867
868 /* Surface Evaluation */
869
870 ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
871         ccl_addr_space PathState *state, float randb, int path_flag)
872 {
873         sd->num_closure = 0;
874         sd->num_closure_extra = 0;
875         sd->randb_closure = randb;
876
877 #ifdef __OSL__
878         if(kg->osl)
879                 OSLShader::eval_surface(kg, sd, state, path_flag);
880         else
881 #endif
882         {
883 #ifdef __SVM__
884                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
885 #else
886                 DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd,
887                                                              sizeof(DiffuseBsdf),
888                                                              make_float3(0.8f, 0.8f, 0.8f));
889                 bsdf->N = sd->N;
890                 sd->flag |= bsdf_diffuse_setup(bsdf);
891 #endif
892         }
893
894         if(sd->flag & SD_BSDF_NEEDS_LCG) {
895                 sd->lcg_state = lcg_state_init_addrspace(state, 0xb4bc3953);
896         }
897 }
898
899 /* Background Evaluation */
900
901 ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd,
902         ccl_addr_space PathState *state, int path_flag)
903 {
904         sd->num_closure = 0;
905         sd->num_closure_extra = 0;
906         sd->randb_closure = 0.0f;
907
908 #ifdef __SVM__
909 #ifdef __OSL__
910         if(kg->osl) {
911                 OSLShader::eval_background(kg, sd, state, path_flag);
912         }
913         else
914 #endif
915         {
916                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
917         }
918
919         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
920
921         for(int i = 0; i < sd->num_closure; i++) {
922                 const ShaderClosure *sc = &sd->closure[i];
923
924                 if(CLOSURE_IS_BACKGROUND(sc->type))
925                         eval += sc->weight;
926         }
927
928         return eval;
929 #else
930         return make_float3(0.8f, 0.8f, 0.8f);
931 #endif
932 }
933
934 /* Volume */
935
936 #ifdef __VOLUME__
937
938 ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, const float3 omega_in, float *pdf,
939         int skip_phase, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
940 {
941         for(int i = 0; i < sd->num_closure; i++) {
942                 if(i == skip_phase)
943                         continue;
944
945                 const ShaderClosure *sc = &sd->closure[i];
946
947                 if(CLOSURE_IS_PHASE(sc->type)) {
948                         float phase_pdf = 0.0f;
949                         float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf);
950
951                         if(phase_pdf != 0.0f) {
952                                 bsdf_eval_accum(result_eval, sc->type, eval, 1.0f);
953                                 sum_pdf += phase_pdf*sc->sample_weight;
954                         }
955
956                         sum_sample_weight += sc->sample_weight;
957                 }
958         }
959
960         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
961 }
962
963 ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd,
964         const float3 omega_in, BsdfEval *eval, float *pdf)
965 {
966         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
967
968         _shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
969 }
970
971 ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *sd,
972         float randu, float randv, BsdfEval *phase_eval,
973         float3 *omega_in, differential3 *domega_in, float *pdf)
974 {
975         int sampled = 0;
976
977         if(sd->num_closure > 1) {
978                 /* pick a phase closure based on sample weights */
979                 float sum = 0.0f;
980
981                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
982                         const ShaderClosure *sc = &sd->closure[sampled];
983                         
984                         if(CLOSURE_IS_PHASE(sc->type))
985                                 sum += sc->sample_weight;
986                 }
987
988                 float r = sd->randb_closure*sum;
989                 sum = 0.0f;
990
991                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
992                         const ShaderClosure *sc = &sd->closure[sampled];
993                         
994                         if(CLOSURE_IS_PHASE(sc->type)) {
995                                 sum += sc->sample_weight;
996
997                                 if(r <= sum)
998                                         break;
999                         }
1000                 }
1001
1002                 if(sampled == sd->num_closure) {
1003                         *pdf = 0.0f;
1004                         return LABEL_NONE;
1005                 }
1006         }
1007
1008         /* todo: this isn't quite correct, we don't weight anisotropy properly
1009          * depending on color channels, even if this is perhaps not a common case */
1010         const ShaderClosure *sc = &sd->closure[sampled];
1011         int label;
1012         float3 eval;
1013
1014         *pdf = 0.0f;
1015         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
1016
1017         if(*pdf != 0.0f) {
1018                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
1019         }
1020
1021         return label;
1022 }
1023
1024 ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *sd,
1025         const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval,
1026         float3 *omega_in, differential3 *domega_in, float *pdf)
1027 {
1028         int label;
1029         float3 eval;
1030
1031         *pdf = 0.0f;
1032         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
1033
1034         if(*pdf != 0.0f)
1035                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
1036
1037         return label;
1038 }
1039
1040 /* Volume Evaluation */
1041
1042 ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
1043                                           ShaderData *sd,
1044                                           ccl_addr_space PathState *state,
1045                                           ccl_addr_space VolumeStack *stack,
1046                                           int path_flag)
1047 {
1048         /* reset closures once at the start, we will be accumulating the closures
1049          * for all volumes in the stack into a single array of closures */
1050         sd->num_closure = 0;
1051         sd->num_closure_extra = 0;
1052         sd->flag = 0;
1053         sd->object_flag = 0;
1054
1055         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1056                 /* setup shaderdata from stack. it's mostly setup already in
1057                  * shader_setup_from_volume, this switching should be quick */
1058                 sd->object = stack[i].object;
1059                 sd->shader = stack[i].shader;
1060
1061                 sd->flag &= ~SD_SHADER_FLAGS;
1062                 sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
1063                 sd->object_flag &= ~SD_OBJECT_FLAGS;
1064
1065                 if(sd->object != OBJECT_NONE) {
1066                         sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object);
1067
1068 #ifdef __OBJECT_MOTION__
1069                         /* todo: this is inefficient for motion blur, we should be
1070                          * caching matrices instead of recomputing them each step */
1071                         shader_setup_object_transforms(kg, sd, sd->time);
1072 #endif
1073                 }
1074
1075                 /* evaluate shader */
1076 #ifdef __SVM__
1077 #  ifdef __OSL__
1078                 if(kg->osl) {
1079                         OSLShader::eval_volume(kg, sd, state, path_flag);
1080                 }
1081                 else
1082 #  endif
1083                 {
1084                         svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag);
1085                 }
1086 #endif
1087
1088                 /* merge closures to avoid exceeding number of closures limit */
1089                 if(i > 0)
1090                         shader_merge_closures(sd);
1091         }
1092 }
1093
1094 #endif
1095
1096 /* Displacement Evaluation */
1097
1098 ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state)
1099 {
1100         sd->num_closure = 0;
1101         sd->num_closure_extra = 0;
1102         sd->randb_closure = 0.0f;
1103
1104         /* this will modify sd->P */
1105 #ifdef __SVM__
1106 #  ifdef __OSL__
1107         if(kg->osl)
1108                 OSLShader::eval_displacement(kg, sd);
1109         else
1110 #  endif
1111         {
1112                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0);
1113         }
1114 #endif
1115 }
1116
1117 /* Transparent Shadows */
1118
1119 #ifdef __TRANSPARENT_SHADOWS__
1120 ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect)
1121 {
1122         int prim = kernel_tex_fetch(__prim_index, isect->prim);
1123         int shader = 0;
1124
1125 #ifdef __HAIR__
1126         if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
1127 #endif
1128                 shader = kernel_tex_fetch(__tri_shader, prim);
1129 #ifdef __HAIR__
1130         }
1131         else {
1132                 float4 str = kernel_tex_fetch(__curves, prim);
1133                 shader = __float_as_int(str.z);
1134         }
1135 #endif
1136         int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE);
1137
1138         return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
1139 }
1140 #endif
1141
1142 CCL_NAMESPACE_END
1143