59c1331a63ca64f2f7fde502484258dad1c0b6a0
[blender.git] / intern / cycles / kernel / kernel_shader.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * ShaderData, used in four steps:
19  *
20  * Setup from incoming ray, sampled position and background.
21  * Execute for surface, volume or displacement.
22  * Evaluate one or more closures.
23  * Release.
24  *
25  */
26
27 #include "closure/alloc.h"
28 #include "closure/bsdf_util.h"
29 #include "closure/bsdf.h"
30 #include "closure/emissive.h"
31
32 #include "svm/svm.h"
33
34 CCL_NAMESPACE_BEGIN
35
36 /* ShaderData setup from incoming ray */
37
38 #ifdef __OBJECT_MOTION__
39 ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
40 {
41         if(ccl_fetch(sd, object_flag) & SD_OBJECT_MOTION) {
42                 ccl_fetch(sd, ob_tfm) = object_fetch_transform_motion(kg, ccl_fetch(sd, object), time);
43                 ccl_fetch(sd, ob_itfm) = transform_quick_inverse(ccl_fetch(sd, ob_tfm));
44         }
45         else {
46                 ccl_fetch(sd, ob_tfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
47                 ccl_fetch(sd, ob_itfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
48         }
49 }
50 #endif
51
52 ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
53                                                ShaderData *sd,
54                                                const Intersection *isect,
55                                                const Ray *ray)
56 {
57 #ifdef __INSTANCING__
58         ccl_fetch(sd, object) = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
59 #endif
60
61         ccl_fetch(sd, type) = isect->type;
62         ccl_fetch(sd, flag) = 0;
63         ccl_fetch(sd, object_flag) = kernel_tex_fetch(__object_flag,
64                                                       ccl_fetch(sd, object));
65
66         /* matrices and time */
67 #ifdef __OBJECT_MOTION__
68         shader_setup_object_transforms(kg, sd, ray->time);
69         ccl_fetch(sd, time) = ray->time;
70 #endif
71
72         ccl_fetch(sd, prim) = kernel_tex_fetch(__prim_index, isect->prim);
73         ccl_fetch(sd, ray_length) = isect->t;
74
75 #ifdef __UV__
76         ccl_fetch(sd, u) = isect->u;
77         ccl_fetch(sd, v) = isect->v;
78 #endif
79
80 #ifdef __HAIR__
81         if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
82                 /* curve */
83                 float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
84
85                 ccl_fetch(sd, shader) = __float_as_int(curvedata.z);
86                 ccl_fetch(sd, P) = bvh_curve_refine(kg, sd, isect, ray);
87         }
88         else
89 #endif
90         if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) {
91                 /* static triangle */
92                 float3 Ng = triangle_normal(kg, sd);
93                 ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim));
94
95                 /* vectors */
96                 ccl_fetch(sd, P) = triangle_refine(kg, sd, isect, ray);
97                 ccl_fetch(sd, Ng) = Ng;
98                 ccl_fetch(sd, N) = Ng;
99                 
100                 /* smooth normal */
101                 if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL)
102                         ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));
103
104 #ifdef __DPDU__
105                 /* dPdu/dPdv */
106                 triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
107 #endif
108         }
109         else {
110                 /* motion triangle */
111                 motion_triangle_shader_setup(kg, sd, isect, ray, false);
112         }
113
114         ccl_fetch(sd, I) = -ray->D;
115
116         ccl_fetch(sd, flag) |= kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
117
118 #ifdef __INSTANCING__
119         if(isect->object != OBJECT_NONE) {
120                 /* instance transform */
121                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
122                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, Ng));
123 #  ifdef __DPDU__
124                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
125                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
126 #  endif
127         }
128 #endif
129
130         /* backfacing test */
131         bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f);
132
133         if(backfacing) {
134                 ccl_fetch(sd, flag) |= SD_BACKFACING;
135                 ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng);
136                 ccl_fetch(sd, N) = -ccl_fetch(sd, N);
137 #ifdef __DPDU__
138                 ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu);
139                 ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv);
140 #endif
141         }
142
143 #ifdef __RAY_DIFFERENTIALS__
144         /* differentials */
145         differential_transfer(&ccl_fetch(sd, dP), ray->dP, ray->D, ray->dD, ccl_fetch(sd, Ng), isect->t);
146         differential_incoming(&ccl_fetch(sd, dI), ray->dD);
147         differential_dudv(&ccl_fetch(sd, du), &ccl_fetch(sd, dv), ccl_fetch(sd, dPdu), ccl_fetch(sd, dPdv), ccl_fetch(sd, dP), ccl_fetch(sd, Ng));
148 #endif
149 }
150
151 /* ShaderData setup from BSSRDF scatter */
152
153 #ifdef __SUBSURFACE__
154 #  ifndef __KERNEL_CUDA__
155 ccl_device
156 #  else
157 ccl_device_inline
158 #  endif
159 void shader_setup_from_subsurface(
160         KernelGlobals *kg,
161         ShaderData *sd,
162         const Intersection *isect,
163         const Ray *ray)
164 {
165         const bool backfacing = sd->flag & SD_BACKFACING;
166
167         /* object, matrices, time, ray_length stay the same */
168         sd->flag = 0;
169         sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
170         sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
171         sd->type = isect->type;
172
173 #  ifdef __UV__
174         sd->u = isect->u;
175         sd->v = isect->v;
176 #  endif
177
178         /* fetch triangle data */
179         if(sd->type == PRIMITIVE_TRIANGLE) {
180                 float3 Ng = triangle_normal(kg, sd);
181                 sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
182
183                 /* static triangle */
184                 sd->P = triangle_refine_subsurface(kg, sd, isect, ray);
185                 sd->Ng = Ng;
186                 sd->N = Ng;
187
188                 if(sd->shader & SHADER_SMOOTH_NORMAL)
189                         sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
190
191 #  ifdef __DPDU__
192                 /* dPdu/dPdv */
193                 triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
194 #  endif
195         }
196         else {
197                 /* motion triangle */
198                 motion_triangle_shader_setup(kg, sd, isect, ray, true);
199         }
200
201         sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
202
203 #  ifdef __INSTANCING__
204         if(isect->object != OBJECT_NONE) {
205                 /* instance transform */
206                 object_normal_transform(kg, sd, &sd->N);
207                 object_normal_transform(kg, sd, &sd->Ng);
208 #    ifdef __DPDU__
209                 object_dir_transform(kg, sd, &sd->dPdu);
210                 object_dir_transform(kg, sd, &sd->dPdv);
211 #    endif
212         }
213 #  endif
214
215         /* backfacing test */
216         if(backfacing) {
217                 sd->flag |= SD_BACKFACING;
218                 sd->Ng = -sd->Ng;
219                 sd->N = -sd->N;
220 #  ifdef __DPDU__
221                 sd->dPdu = -sd->dPdu;
222                 sd->dPdv = -sd->dPdv;
223 #  endif
224         }
225
226         /* should not get used in principle as the shading will only use a diffuse
227          * BSDF, but the shader might still access it */
228         sd->I = sd->N;
229
230 #  ifdef __RAY_DIFFERENTIALS__
231         /* differentials */
232         differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
233         /* don't modify dP and dI */
234 #  endif
235 }
236 #endif
237
238 /* ShaderData setup from position sampled on mesh */
239
240 ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
241                                                 ShaderData *sd,
242                                                 const float3 P,
243                                                 const float3 Ng,
244                                                 const float3 I,
245                                                 int shader, int object, int prim,
246                                                 float u, float v, float t,
247                                                 float time,
248                                                 bool object_space,
249                                                 int lamp)
250 {
251         /* vectors */
252         ccl_fetch(sd, P) = P;
253         ccl_fetch(sd, N) = Ng;
254         ccl_fetch(sd, Ng) = Ng;
255         ccl_fetch(sd, I) = I;
256         ccl_fetch(sd, shader) = shader;
257         if(prim != PRIM_NONE)
258                 ccl_fetch(sd, type) = PRIMITIVE_TRIANGLE;
259         else if(lamp != LAMP_NONE)
260                 ccl_fetch(sd, type) = PRIMITIVE_LAMP;
261         else
262                 ccl_fetch(sd, type) = PRIMITIVE_NONE;
263
264         /* primitive */
265 #ifdef __INSTANCING__
266         ccl_fetch(sd, object) = object;
267 #endif
268         /* currently no access to bvh prim index for strand sd->prim*/
269         ccl_fetch(sd, prim) = prim;
270 #ifdef __UV__
271         ccl_fetch(sd, u) = u;
272         ccl_fetch(sd, v) = v;
273 #endif
274         ccl_fetch(sd, ray_length) = t;
275
276         ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
277         ccl_fetch(sd, object_flag) = 0;
278         if(ccl_fetch(sd, object) != OBJECT_NONE) {
279                 ccl_fetch(sd, object_flag) |= kernel_tex_fetch(__object_flag,
280                                                                ccl_fetch(sd, object));
281
282 #ifdef __OBJECT_MOTION__
283                 shader_setup_object_transforms(kg, sd, time);
284                 ccl_fetch(sd, time) = time;
285         }
286         else if(lamp != LAMP_NONE) {
287                 ccl_fetch(sd, ob_tfm)  = lamp_fetch_transform(kg, lamp, false);
288                 ccl_fetch(sd, ob_itfm) = lamp_fetch_transform(kg, lamp, true);
289 #endif
290         }
291
292         /* transform into world space */
293         if(object_space) {
294                 object_position_transform_auto(kg, sd, &ccl_fetch(sd, P));
295                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, Ng));
296                 ccl_fetch(sd, N) = ccl_fetch(sd, Ng);
297                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, I));
298         }
299
300         if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) {
301                 /* smooth normal */
302                 if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
303                         ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));
304
305 #ifdef __INSTANCING__
306                         if(!(ccl_fetch(sd, object_flag) & SD_OBJECT_TRANSFORM_APPLIED)) {
307                                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
308                         }
309 #endif
310                 }
311
312                 /* dPdu/dPdv */
313 #ifdef __DPDU__
314                 triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
315
316 #  ifdef __INSTANCING__
317                 if(!(ccl_fetch(sd, object_flag) & SD_OBJECT_TRANSFORM_APPLIED)) {
318                         object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
319                         object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
320                 }
321 #  endif
322 #endif
323         }
324         else {
325 #ifdef __DPDU__
326                 ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f);
327                 ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f);
328 #endif
329         }
330
331         /* backfacing test */
332         if(ccl_fetch(sd, prim) != PRIM_NONE) {
333                 bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f);
334
335                 if(backfacing) {
336                         ccl_fetch(sd, flag) |= SD_BACKFACING;
337                         ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng);
338                         ccl_fetch(sd, N) = -ccl_fetch(sd, N);
339 #ifdef __DPDU__
340                         ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu);
341                         ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv);
342 #endif
343                 }
344         }
345
346 #ifdef __RAY_DIFFERENTIALS__
347         /* no ray differentials here yet */
348         ccl_fetch(sd, dP) = differential3_zero();
349         ccl_fetch(sd, dI) = differential3_zero();
350         ccl_fetch(sd, du) = differential_zero();
351         ccl_fetch(sd, dv) = differential_zero();
352 #endif
353 }
354
355 /* ShaderData setup for displacement */
356
357 ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
358         int object, int prim, float u, float v)
359 {
360         float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
361         int shader;
362
363         triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
364
365         /* force smooth shading for displacement */
366         shader |= SHADER_SMOOTH_NORMAL;
367
368         shader_setup_from_sample(kg, sd,
369                                  P, Ng, I,
370                                  shader, object, prim,
371                                  u, v, 0.0f, 0.5f,
372                                  !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
373                                  LAMP_NONE);
374 }
375
376 /* ShaderData setup from ray into background */
377
378 ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
379 {
380         /* vectors */
381         ccl_fetch(sd, P) = ray->D;
382         ccl_fetch(sd, N) = -ray->D;
383         ccl_fetch(sd, Ng) = -ray->D;
384         ccl_fetch(sd, I) = -ray->D;
385         ccl_fetch(sd, shader) = kernel_data.background.surface_shader;
386         ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
387         ccl_fetch(sd, object_flag) = 0;
388 #ifdef __OBJECT_MOTION__
389         ccl_fetch(sd, time) = ray->time;
390 #endif
391         ccl_fetch(sd, ray_length) = 0.0f;
392
393 #ifdef __INSTANCING__
394         ccl_fetch(sd, object) = PRIM_NONE;
395 #endif
396         ccl_fetch(sd, prim) = PRIM_NONE;
397 #ifdef __UV__
398         ccl_fetch(sd, u) = 0.0f;
399         ccl_fetch(sd, v) = 0.0f;
400 #endif
401
402 #ifdef __DPDU__
403         /* dPdu/dPdv */
404         ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f);
405         ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f);
406 #endif
407
408 #ifdef __RAY_DIFFERENTIALS__
409         /* differentials */
410         ccl_fetch(sd, dP) = ray->dD;
411         differential_incoming(&ccl_fetch(sd, dI), ccl_fetch(sd, dP));
412         ccl_fetch(sd, du) = differential_zero();
413         ccl_fetch(sd, dv) = differential_zero();
414 #endif
415 }
416
417 /* ShaderData setup from point inside volume */
418
419 #ifdef __VOLUME__
420 ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
421 {
422         /* vectors */
423         sd->P = ray->P;
424         sd->N = -ray->D;  
425         sd->Ng = -ray->D;
426         sd->I = -ray->D;
427         sd->shader = SHADER_NONE;
428         sd->flag = 0;
429         sd->object_flag = 0;
430 #ifdef __OBJECT_MOTION__
431         sd->time = ray->time;
432 #endif
433         sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
434
435 #ifdef __INSTANCING__
436         sd->object = PRIM_NONE; /* todo: fill this for texture coordinates */
437 #endif
438         sd->prim = PRIM_NONE;
439         sd->type = PRIMITIVE_NONE;
440
441 #ifdef __UV__
442         sd->u = 0.0f;
443         sd->v = 0.0f;
444 #endif
445
446 #ifdef __DPDU__
447         /* dPdu/dPdv */
448         sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
449         sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
450 #endif
451
452 #ifdef __RAY_DIFFERENTIALS__
453         /* differentials */
454         sd->dP = ray->dD;
455         differential_incoming(&sd->dI, sd->dP);
456         sd->du = differential_zero();
457         sd->dv = differential_zero();
458 #endif
459
460         /* for NDC coordinates */
461         sd->ray_P = ray->P;
462         sd->ray_dP = ray->dP;
463 }
464 #endif
465
466 /* Merging */
467
468 #if defined(__BRANCHED_PATH__) || defined(__VOLUME__)
469 ccl_device_inline void shader_merge_closures(ShaderData *sd)
470 {
471         /* merge identical closures, better when we sample a single closure at a time */
472         for(int i = 0; i < sd->num_closure; i++) {
473                 ShaderClosure *sci = &sd->closure[i];
474
475                 for(int j = i + 1; j < sd->num_closure; j++) {
476                         ShaderClosure *scj = &sd->closure[j];
477
478                         if(sci->type != scj->type)
479                                 continue;
480                         if(!bsdf_merge(sci, scj))
481                                 continue;
482
483                         sci->weight += scj->weight;
484                         sci->sample_weight += scj->sample_weight;
485
486                         int size = sd->num_closure - (j+1);
487                         if(size > 0) {
488                                 for(int k = 0; k < size; k++) {
489                                         scj[k] = scj[k+1];
490                                 }
491                         }
492
493                         sd->num_closure--;
494                         kernel_assert(sd->num_closure >= 0);
495                         j--;
496                 }
497         }
498 }
499 #endif
500
501 /* BSDF */
502
503 ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, ShaderData *sd, const float3 omega_in, float *pdf,
504         int skip_bsdf, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
505 {
506         /* this is the veach one-sample model with balance heuristic, some pdf
507          * factors drop out when using balance heuristic weighting */
508         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
509                 if(i == skip_bsdf)
510                         continue;
511
512                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
513
514                 if(CLOSURE_IS_BSDF(sc->type)) {
515                         float bsdf_pdf = 0.0f;
516                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
517
518                         if(bsdf_pdf != 0.0f) {
519                                 bsdf_eval_accum(result_eval, sc->type, eval*sc->weight);
520                                 sum_pdf += bsdf_pdf*sc->sample_weight;
521                         }
522
523                         sum_sample_weight += sc->sample_weight;
524                 }
525         }
526
527         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
528 }
529
530 #ifdef __BRANCHED_PATH__
531 ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
532                                                         ShaderData *sd,
533                                                         const float3 omega_in,
534                                                         BsdfEval *result_eval,
535                                                         float light_pdf,
536                                                         bool use_mis)
537 {
538         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
539                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
540                 if(CLOSURE_IS_BSDF(sc->type)) {
541                         float bsdf_pdf = 0.0f;
542                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
543                         if(bsdf_pdf != 0.0f) {
544                                 float mis_weight = use_mis? power_heuristic(light_pdf, bsdf_pdf): 1.0f;
545                                 bsdf_eval_accum(result_eval,
546                                                 sc->type,
547                                                 eval * sc->weight * mis_weight);
548                         }
549                 }
550         }
551 }
552 #endif
553
554
555 #ifndef __KERNEL_CUDA__
556 ccl_device
557 #else
558 ccl_device_inline
559 #endif
560 void shader_bsdf_eval(KernelGlobals *kg,
561                       ShaderData *sd,
562                       const float3 omega_in,
563                       BsdfEval *eval,
564                       float light_pdf,
565                       bool use_mis)
566 {
567         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
568
569 #ifdef __BRANCHED_PATH__
570         if(kernel_data.integrator.branched)
571                 _shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis);
572         else
573 #endif
574         {
575                 float pdf;
576                 _shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, -1, eval, 0.0f, 0.0f);
577                 if(use_mis) {
578                         float weight = power_heuristic(light_pdf, pdf);
579                         bsdf_eval_mul(eval, weight);
580                 }
581         }
582 }
583
584 ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
585                                          ShaderData *sd,
586                                          float randu, float randv,
587                                          BsdfEval *bsdf_eval,
588                                          float3 *omega_in,
589                                          differential3 *domega_in,
590                                          float *pdf)
591 {
592         int sampled = 0;
593
594         if(ccl_fetch(sd, num_closure) > 1) {
595                 /* pick a BSDF closure based on sample weights */
596                 float sum = 0.0f;
597
598                 for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) {
599                         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
600                         
601                         if(CLOSURE_IS_BSDF(sc->type))
602                                 sum += sc->sample_weight;
603                 }
604
605                 float r = ccl_fetch(sd, randb_closure)*sum;
606                 sum = 0.0f;
607
608                 for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) {
609                         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
610                         
611                         if(CLOSURE_IS_BSDF(sc->type)) {
612                                 sum += sc->sample_weight;
613
614                                 if(r <= sum)
615                                         break;
616                         }
617                 }
618
619                 if(sampled == ccl_fetch(sd, num_closure)) {
620                         *pdf = 0.0f;
621                         return LABEL_NONE;
622                 }
623         }
624
625         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
626
627         int label;
628         float3 eval;
629
630         *pdf = 0.0f;
631         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
632
633         if(*pdf != 0.0f) {
634                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
635
636                 if(ccl_fetch(sd, num_closure) > 1) {
637                         float sweight = sc->sample_weight;
638                         _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sampled, bsdf_eval, *pdf*sweight, sweight);
639                 }
640         }
641
642         return label;
643 }
644
645 ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd,
646         const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval,
647         float3 *omega_in, differential3 *domega_in, float *pdf)
648 {
649         int label;
650         float3 eval;
651
652         *pdf = 0.0f;
653         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
654
655         if(*pdf != 0.0f)
656                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
657
658         return label;
659 }
660
661 ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness)
662 {
663         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
664                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
665
666                 if(CLOSURE_IS_BSDF(sc->type))
667                         bsdf_blur(kg, sc, roughness);
668         }
669 }
670
671 ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd)
672 {
673         if(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)
674                 return make_float3(1.0f, 1.0f, 1.0f);
675
676         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
677
678         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
679                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
680
681                 if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) // todo: make this work for osl
682                         eval += sc->weight;
683         }
684
685         return eval;
686 }
687
688 ccl_device void shader_bsdf_disable_transparency(KernelGlobals *kg, ShaderData *sd)
689 {
690         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
691                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
692
693                 if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
694                         sc->sample_weight = 0.0f;
695                         sc->weight = make_float3(0.0f, 0.0f, 0.0f);
696                 }
697         }
698 }
699
700 ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd)
701 {
702         float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd);
703
704         alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f));
705         alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f));
706         
707         return alpha;
708 }
709
710 ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
711 {
712         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
713
714         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
715                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
716
717                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
718                         eval += sc->weight;
719         }
720
721         return eval;
722 }
723
724 ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd)
725 {
726         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
727
728         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
729                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
730
731                 if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
732                         eval += sc->weight;
733         }
734
735         return eval;
736 }
737
738 ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
739 {
740         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
741
742         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
743                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
744
745                 if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
746                         eval += sc->weight;
747         }
748
749         return eval;
750 }
751
752 ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
753 {
754         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
755
756         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
757                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
758
759                 if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
760                         eval += sc->weight;
761         }
762
763         return eval;
764 }
765
766 ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_)
767 {
768         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
769         float3 N = make_float3(0.0f, 0.0f, 0.0f);
770
771         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
772                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
773
774                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
775                         const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
776                         eval += sc->weight*ao_factor;
777                         N += bsdf->N*average(sc->weight);
778                 }
779                 else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) {
780                         eval += sc->weight;
781                         N += ccl_fetch(sd, N)*average(sc->weight);
782                 }
783         }
784
785         if(is_zero(N))
786                 N = ccl_fetch(sd, N);
787         else
788                 N = normalize(N);
789
790         *N_ = N;
791         return eval;
792 }
793
794 #ifdef __SUBSURFACE__
795 ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_)
796 {
797         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
798         float3 N = make_float3(0.0f, 0.0f, 0.0f);
799         float texture_blur = 0.0f, weight_sum = 0.0f;
800
801         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
802                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
803
804                 if(CLOSURE_IS_BSSRDF(sc->type)) {
805                         const Bssrdf *bssrdf = (const Bssrdf*)sc;
806                         float avg_weight = fabsf(average(sc->weight));
807
808                         N += bssrdf->N*avg_weight;
809                         eval += sc->weight;
810                         texture_blur += bssrdf->texture_blur*avg_weight;
811                         weight_sum += avg_weight;
812                 }
813         }
814
815         if(N_)
816                 *N_ = (is_zero(N))? ccl_fetch(sd, N): normalize(N);
817
818         if(texture_blur_)
819                 *texture_blur_ = texture_blur/weight_sum;
820         
821         return eval;
822 }
823 #endif
824
825 /* Emission */
826
827 ccl_device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure *sc)
828 {
829         return emissive_simple_eval(ccl_fetch(sd, Ng), ccl_fetch(sd, I));
830 }
831
832 ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd)
833 {
834         float3 eval;
835         eval = make_float3(0.0f, 0.0f, 0.0f);
836
837         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
838                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
839
840                 if(CLOSURE_IS_EMISSION(sc->type))
841                         eval += emissive_eval(kg, sd, sc)*sc->weight;
842         }
843
844         return eval;
845 }
846
847 /* Holdout */
848
849 ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
850 {
851         float3 weight = make_float3(0.0f, 0.0f, 0.0f);
852
853         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
854                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
855
856                 if(CLOSURE_IS_HOLDOUT(sc->type))
857                         weight += sc->weight;
858         }
859
860         return weight;
861 }
862
863 /* Surface Evaluation */
864
865 ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_space RNG *rng,
866         ccl_addr_space PathState *state, float randb, int path_flag, ShaderContext ctx)
867 {
868         ccl_fetch(sd, num_closure) = 0;
869         ccl_fetch(sd, num_closure_extra) = 0;
870         ccl_fetch(sd, randb_closure) = randb;
871
872 #ifdef __OSL__
873         if(kg->osl)
874                 OSLShader::eval_surface(kg, sd, state, path_flag, ctx);
875         else
876 #endif
877         {
878 #ifdef __SVM__
879                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
880 #else
881                 DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd,
882                                                              sizeof(DiffuseBsdf),
883                                                              make_float3(0.8f, 0.8f, 0.8f));
884                 bsdf->N = ccl_fetch(sd, N);
885                 ccl_fetch(sd, flag) |= bsdf_diffuse_setup(bsdf);
886 #endif
887         }
888
889         if(rng && (ccl_fetch(sd, flag) & SD_BSDF_NEEDS_LCG)) {
890                 ccl_fetch(sd, lcg_state) = lcg_state_init_addrspace(rng, state, 0xb4bc3953);
891         }
892 }
893
894 /* Background Evaluation */
895
896 ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd,
897         ccl_addr_space PathState *state, int path_flag, ShaderContext ctx)
898 {
899         ccl_fetch(sd, num_closure) = 0;
900         ccl_fetch(sd, num_closure_extra) = 0;
901         ccl_fetch(sd, randb_closure) = 0.0f;
902
903 #ifdef __SVM__
904 #ifdef __OSL__
905         if(kg->osl) {
906                 OSLShader::eval_background(kg, sd, state, path_flag, ctx);
907         }
908         else
909 #endif
910         {
911                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
912         }
913
914         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
915
916         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
917                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
918
919                 if(CLOSURE_IS_BACKGROUND(sc->type))
920                         eval += sc->weight;
921         }
922
923         return eval;
924 #else
925         return make_float3(0.8f, 0.8f, 0.8f);
926 #endif
927 }
928
929 /* Volume */
930
931 #ifdef __VOLUME__
932
933 ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, const float3 omega_in, float *pdf,
934         int skip_phase, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
935 {
936         for(int i = 0; i < sd->num_closure; i++) {
937                 if(i == skip_phase)
938                         continue;
939
940                 const ShaderClosure *sc = &sd->closure[i];
941
942                 if(CLOSURE_IS_PHASE(sc->type)) {
943                         float phase_pdf = 0.0f;
944                         float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf);
945
946                         if(phase_pdf != 0.0f) {
947                                 bsdf_eval_accum(result_eval, sc->type, eval);
948                                 sum_pdf += phase_pdf*sc->sample_weight;
949                         }
950
951                         sum_sample_weight += sc->sample_weight;
952                 }
953         }
954
955         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
956 }
957
958 ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd,
959         const float3 omega_in, BsdfEval *eval, float *pdf)
960 {
961         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
962
963         _shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
964 }
965
966 ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *sd,
967         float randu, float randv, BsdfEval *phase_eval,
968         float3 *omega_in, differential3 *domega_in, float *pdf)
969 {
970         int sampled = 0;
971
972         if(sd->num_closure > 1) {
973                 /* pick a phase closure based on sample weights */
974                 float sum = 0.0f;
975
976                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
977                         const ShaderClosure *sc = &sd->closure[sampled];
978                         
979                         if(CLOSURE_IS_PHASE(sc->type))
980                                 sum += sc->sample_weight;
981                 }
982
983                 float r = sd->randb_closure*sum;
984                 sum = 0.0f;
985
986                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
987                         const ShaderClosure *sc = &sd->closure[sampled];
988                         
989                         if(CLOSURE_IS_PHASE(sc->type)) {
990                                 sum += sc->sample_weight;
991
992                                 if(r <= sum)
993                                         break;
994                         }
995                 }
996
997                 if(sampled == sd->num_closure) {
998                         *pdf = 0.0f;
999                         return LABEL_NONE;
1000                 }
1001         }
1002
1003         /* todo: this isn't quite correct, we don't weight anisotropy properly
1004          * depending on color channels, even if this is perhaps not a common case */
1005         const ShaderClosure *sc = &sd->closure[sampled];
1006         int label;
1007         float3 eval;
1008
1009         *pdf = 0.0f;
1010         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
1011
1012         if(*pdf != 0.0f) {
1013                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
1014         }
1015
1016         return label;
1017 }
1018
1019 ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *sd,
1020         const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval,
1021         float3 *omega_in, differential3 *domega_in, float *pdf)
1022 {
1023         int label;
1024         float3 eval;
1025
1026         *pdf = 0.0f;
1027         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
1028
1029         if(*pdf != 0.0f)
1030                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
1031
1032         return label;
1033 }
1034
1035 /* Volume Evaluation */
1036
1037 ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
1038                                           ShaderData *sd,
1039                                           PathState *state,
1040                                           VolumeStack *stack,
1041                                           int path_flag,
1042                                           ShaderContext ctx)
1043 {
1044         /* reset closures once at the start, we will be accumulating the closures
1045          * for all volumes in the stack into a single array of closures */
1046         sd->num_closure = 0;
1047         sd->num_closure_extra = 0;
1048         sd->flag = 0;
1049         sd->object_flag = 0;
1050
1051         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1052                 /* setup shaderdata from stack. it's mostly setup already in
1053                  * shader_setup_from_volume, this switching should be quick */
1054                 sd->object = stack[i].object;
1055                 sd->shader = stack[i].shader;
1056
1057                 sd->flag &= ~SD_SHADER_FLAGS;
1058                 sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
1059                 sd->object_flag &= ~SD_OBJECT_FLAGS;
1060
1061                 if(sd->object != OBJECT_NONE) {
1062                         sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object);
1063
1064 #ifdef __OBJECT_MOTION__
1065                         /* todo: this is inefficient for motion blur, we should be
1066                          * caching matrices instead of recomputing them each step */
1067                         shader_setup_object_transforms(kg, sd, sd->time);
1068 #endif
1069                 }
1070
1071                 /* evaluate shader */
1072 #ifdef __SVM__
1073 #  ifdef __OSL__
1074                 if(kg->osl) {
1075                         OSLShader::eval_volume(kg, sd, state, path_flag, ctx);
1076                 }
1077                 else
1078 #  endif
1079                 {
1080                         svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag);
1081                 }
1082 #endif
1083
1084                 /* merge closures to avoid exceeding number of closures limit */
1085                 if(i > 0)
1086                         shader_merge_closures(sd);
1087         }
1088 }
1089
1090 #endif
1091
1092 /* Displacement Evaluation */
1093
1094 ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, ShaderContext ctx)
1095 {
1096         ccl_fetch(sd, num_closure) = 0;
1097         ccl_fetch(sd, num_closure_extra) = 0;
1098         ccl_fetch(sd, randb_closure) = 0.0f;
1099
1100         /* this will modify sd->P */
1101 #ifdef __SVM__
1102 #  ifdef __OSL__
1103         if(kg->osl)
1104                 OSLShader::eval_displacement(kg, sd, ctx);
1105         else
1106 #  endif
1107         {
1108                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0);
1109         }
1110 #endif
1111 }
1112
1113 /* Transparent Shadows */
1114
1115 #ifdef __TRANSPARENT_SHADOWS__
1116 ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect)
1117 {
1118         int prim = kernel_tex_fetch(__prim_index, isect->prim);
1119         int shader = 0;
1120
1121 #ifdef __HAIR__
1122         if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
1123 #endif
1124                 shader = kernel_tex_fetch(__tri_shader, prim);
1125 #ifdef __HAIR__
1126         }
1127         else {
1128                 float4 str = kernel_tex_fetch(__curves, prim);
1129                 shader = __float_as_int(str.z);
1130         }
1131 #endif
1132         int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE);
1133
1134         return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
1135 }
1136 #endif
1137
1138 CCL_NAMESPACE_END
1139