3e098c922dcd6a269a7c3f593bca0f8f6cb61126
[blender-staging.git] / intern / cycles / kernel / kernel_shader.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * ShaderData, used in four steps:
19  *
20  * Setup from incoming ray, sampled position and background.
21  * Execute for surface, volume or displacement.
22  * Evaluate one or more closures.
23  * Release.
24  *
25  */
26
27 #include "closure/alloc.h"
28 #include "closure/bsdf_util.h"
29 #include "closure/bsdf.h"
30 #include "closure/emissive.h"
31
32 #include "svm/svm.h"
33
34 CCL_NAMESPACE_BEGIN
35
36 /* ShaderData setup from incoming ray */
37
38 #ifdef __OBJECT_MOTION__
39 ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
40 {
41         if(ccl_fetch(sd, flag) & SD_OBJECT_MOTION) {
42                 ccl_fetch(sd, ob_tfm) = object_fetch_transform_motion(kg, ccl_fetch(sd, object), time);
43                 ccl_fetch(sd, ob_itfm) = transform_quick_inverse(ccl_fetch(sd, ob_tfm));
44         }
45         else {
46                 ccl_fetch(sd, ob_tfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
47                 ccl_fetch(sd, ob_itfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
48         }
49 }
50 #endif
51
52 ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
53                                                ShaderData *sd,
54                                                const Intersection *isect,
55                                                const Ray *ray)
56 {
57 #ifdef __INSTANCING__
58         ccl_fetch(sd, object) = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
59 #endif
60
61         ccl_fetch(sd, type) = isect->type;
62         ccl_fetch(sd, flag) = kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));
63
64         /* matrices and time */
65 #ifdef __OBJECT_MOTION__
66         shader_setup_object_transforms(kg, sd, ray->time);
67         ccl_fetch(sd, time) = ray->time;
68 #endif
69
70         ccl_fetch(sd, prim) = kernel_tex_fetch(__prim_index, isect->prim);
71         ccl_fetch(sd, ray_length) = isect->t;
72
73 #ifdef __UV__
74         ccl_fetch(sd, u) = isect->u;
75         ccl_fetch(sd, v) = isect->v;
76 #endif
77
78 #ifdef __HAIR__
79         if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
80                 /* curve */
81                 float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
82
83                 ccl_fetch(sd, shader) = __float_as_int(curvedata.z);
84                 ccl_fetch(sd, P) = bvh_curve_refine(kg, sd, isect, ray);
85         }
86         else
87 #endif
88         if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) {
89                 /* static triangle */
90                 float3 Ng = triangle_normal(kg, sd);
91                 ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim));
92
93                 /* vectors */
94                 ccl_fetch(sd, P) = triangle_refine(kg, sd, isect, ray);
95                 ccl_fetch(sd, Ng) = Ng;
96                 ccl_fetch(sd, N) = Ng;
97                 
98                 /* smooth normal */
99                 if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL)
100                         ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));
101
102 #ifdef __DPDU__
103                 /* dPdu/dPdv */
104                 triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
105 #endif
106         }
107         else {
108                 /* motion triangle */
109                 motion_triangle_shader_setup(kg, sd, isect, ray, false);
110         }
111
112         ccl_fetch(sd, I) = -ray->D;
113
114         ccl_fetch(sd, flag) |= kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
115
116 #ifdef __INSTANCING__
117         if(isect->object != OBJECT_NONE) {
118                 /* instance transform */
119                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
120                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, Ng));
121 #  ifdef __DPDU__
122                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
123                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
124 #  endif
125         }
126 #endif
127
128         /* backfacing test */
129         bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f);
130
131         if(backfacing) {
132                 ccl_fetch(sd, flag) |= SD_BACKFACING;
133                 ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng);
134                 ccl_fetch(sd, N) = -ccl_fetch(sd, N);
135 #ifdef __DPDU__
136                 ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu);
137                 ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv);
138 #endif
139         }
140
141 #ifdef __RAY_DIFFERENTIALS__
142         /* differentials */
143         differential_transfer(&ccl_fetch(sd, dP), ray->dP, ray->D, ray->dD, ccl_fetch(sd, Ng), isect->t);
144         differential_incoming(&ccl_fetch(sd, dI), ray->dD);
145         differential_dudv(&ccl_fetch(sd, du), &ccl_fetch(sd, dv), ccl_fetch(sd, dPdu), ccl_fetch(sd, dPdv), ccl_fetch(sd, dP), ccl_fetch(sd, Ng));
146 #endif
147 }
148
149 /* ShaderData setup from BSSRDF scatter */
150
151 #ifdef __SUBSURFACE__
152 #  ifndef __KERNEL_CUDA__
153 ccl_device
154 #  else
155 ccl_device_inline
156 #  endif
157 void shader_setup_from_subsurface(
158         KernelGlobals *kg,
159         ShaderData *sd,
160         const Intersection *isect,
161         const Ray *ray)
162 {
163         bool backfacing = sd->flag & SD_BACKFACING;
164
165         /* object, matrices, time, ray_length stay the same */
166         sd->flag = kernel_tex_fetch(__object_flag, sd->object);
167         sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
168         sd->type = isect->type;
169
170 #  ifdef __UV__
171         sd->u = isect->u;
172         sd->v = isect->v;
173 #  endif
174
175         /* fetch triangle data */
176         if(sd->type == PRIMITIVE_TRIANGLE) {
177                 float3 Ng = triangle_normal(kg, sd);
178                 sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
179
180                 /* static triangle */
181                 sd->P = triangle_refine_subsurface(kg, sd, isect, ray);
182                 sd->Ng = Ng;
183                 sd->N = Ng;
184
185                 if(sd->shader & SHADER_SMOOTH_NORMAL)
186                         sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
187
188 #  ifdef __DPDU__
189                 /* dPdu/dPdv */
190                 triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
191 #  endif
192         }
193         else {
194                 /* motion triangle */
195                 motion_triangle_shader_setup(kg, sd, isect, ray, true);
196         }
197
198         sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
199
200 #  ifdef __INSTANCING__
201         if(isect->object != OBJECT_NONE) {
202                 /* instance transform */
203                 object_normal_transform(kg, sd, &sd->N);
204                 object_normal_transform(kg, sd, &sd->Ng);
205 #    ifdef __DPDU__
206                 object_dir_transform(kg, sd, &sd->dPdu);
207                 object_dir_transform(kg, sd, &sd->dPdv);
208 #    endif
209         }
210 #  endif
211
212         /* backfacing test */
213         if(backfacing) {
214                 sd->flag |= SD_BACKFACING;
215                 sd->Ng = -sd->Ng;
216                 sd->N = -sd->N;
217 #  ifdef __DPDU__
218                 sd->dPdu = -sd->dPdu;
219                 sd->dPdv = -sd->dPdv;
220 #  endif
221         }
222
223         /* should not get used in principle as the shading will only use a diffuse
224          * BSDF, but the shader might still access it */
225         sd->I = sd->N;
226
227 #  ifdef __RAY_DIFFERENTIALS__
228         /* differentials */
229         differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
230         /* don't modify dP and dI */
231 #  endif
232 }
233 #endif
234
235 /* ShaderData setup from position sampled on mesh */
236
237 ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
238                                                 ShaderData *sd,
239                                                 const float3 P,
240                                                 const float3 Ng,
241                                                 const float3 I,
242                                                 int shader, int object, int prim,
243                                                 float u, float v, float t,
244                                                 float time,
245                                                 bool object_space)
246 {
247         /* vectors */
248         ccl_fetch(sd, P) = P;
249         ccl_fetch(sd, N) = Ng;
250         ccl_fetch(sd, Ng) = Ng;
251         ccl_fetch(sd, I) = I;
252         ccl_fetch(sd, shader) = shader;
253         ccl_fetch(sd, type) = (prim == PRIM_NONE)? PRIMITIVE_NONE: PRIMITIVE_TRIANGLE;
254
255         /* primitive */
256 #ifdef __INSTANCING__
257         ccl_fetch(sd, object) = object;
258 #endif
259         /* currently no access to bvh prim index for strand sd->prim*/
260         ccl_fetch(sd, prim) = prim;
261 #ifdef __UV__
262         ccl_fetch(sd, u) = u;
263         ccl_fetch(sd, v) = v;
264 #endif
265         ccl_fetch(sd, ray_length) = t;
266
267         ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
268         if(ccl_fetch(sd, object) != OBJECT_NONE) {
269                 ccl_fetch(sd, flag) |= kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));
270
271 #ifdef __OBJECT_MOTION__
272                 shader_setup_object_transforms(kg, sd, time);
273         }
274
275         ccl_fetch(sd, time) = time;
276 #else
277         }
278 #endif
279
280         /* transform into world space */
281         if(object_space) {
282                 object_position_transform_auto(kg, sd, &ccl_fetch(sd, P));
283                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, Ng));
284                 ccl_fetch(sd, N) = ccl_fetch(sd, Ng);
285                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, I));
286         }
287
288         if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) {
289                 /* smooth normal */
290                 if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
291                         ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));
292
293 #ifdef __INSTANCING__
294                         if(!(ccl_fetch(sd, flag) & SD_TRANSFORM_APPLIED)) {
295                                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
296                         }
297 #endif
298                 }
299
300                 /* dPdu/dPdv */
301 #ifdef __DPDU__
302                 triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
303
304 #  ifdef __INSTANCING__
305                 if(!(ccl_fetch(sd, flag) & SD_TRANSFORM_APPLIED)) {
306                         object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
307                         object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
308                 }
309 #  endif
310 #endif
311         }
312         else {
313 #ifdef __DPDU__
314                 ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f);
315                 ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f);
316 #endif
317         }
318
319         /* backfacing test */
320         if(ccl_fetch(sd, prim) != PRIM_NONE) {
321                 bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f);
322
323                 if(backfacing) {
324                         ccl_fetch(sd, flag) |= SD_BACKFACING;
325                         ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng);
326                         ccl_fetch(sd, N) = -ccl_fetch(sd, N);
327 #ifdef __DPDU__
328                         ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu);
329                         ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv);
330 #endif
331                 }
332         }
333
334 #ifdef __RAY_DIFFERENTIALS__
335         /* no ray differentials here yet */
336         ccl_fetch(sd, dP) = differential3_zero();
337         ccl_fetch(sd, dI) = differential3_zero();
338         ccl_fetch(sd, du) = differential_zero();
339         ccl_fetch(sd, dv) = differential_zero();
340 #endif
341 }
342
343 /* ShaderData setup for displacement */
344
345 ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
346         int object, int prim, float u, float v)
347 {
348         float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
349         int shader;
350
351         triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
352
353         /* force smooth shading for displacement */
354         shader |= SHADER_SMOOTH_NORMAL;
355
356         shader_setup_from_sample(kg, sd,
357                                  P, Ng, I,
358                                  shader, object, prim,
359                                  u, v, 0.0f, 0.5f,
360                                  !(kernel_tex_fetch(__object_flag, object) & SD_TRANSFORM_APPLIED));
361 }
362
363 /* ShaderData setup from ray into background */
364
365 ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
366 {
367         /* vectors */
368         ccl_fetch(sd, P) = ray->D;
369         ccl_fetch(sd, N) = -ray->D;
370         ccl_fetch(sd, Ng) = -ray->D;
371         ccl_fetch(sd, I) = -ray->D;
372         ccl_fetch(sd, shader) = kernel_data.background.surface_shader;
373         ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
374 #ifdef __OBJECT_MOTION__
375         ccl_fetch(sd, time) = ray->time;
376 #endif
377         ccl_fetch(sd, ray_length) = 0.0f;
378
379 #ifdef __INSTANCING__
380         ccl_fetch(sd, object) = PRIM_NONE;
381 #endif
382         ccl_fetch(sd, prim) = PRIM_NONE;
383 #ifdef __UV__
384         ccl_fetch(sd, u) = 0.0f;
385         ccl_fetch(sd, v) = 0.0f;
386 #endif
387
388 #ifdef __DPDU__
389         /* dPdu/dPdv */
390         ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f);
391         ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f);
392 #endif
393
394 #ifdef __RAY_DIFFERENTIALS__
395         /* differentials */
396         ccl_fetch(sd, dP) = ray->dD;
397         differential_incoming(&ccl_fetch(sd, dI), ccl_fetch(sd, dP));
398         ccl_fetch(sd, du) = differential_zero();
399         ccl_fetch(sd, dv) = differential_zero();
400 #endif
401 }
402
403 /* ShaderData setup from point inside volume */
404
405 #ifdef __VOLUME__
406 ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
407 {
408         /* vectors */
409         sd->P = ray->P;
410         sd->N = -ray->D;  
411         sd->Ng = -ray->D;
412         sd->I = -ray->D;
413         sd->shader = SHADER_NONE;
414         sd->flag = 0;
415 #ifdef __OBJECT_MOTION__
416         sd->time = ray->time;
417 #endif
418         sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
419
420 #ifdef __INSTANCING__
421         sd->object = PRIM_NONE; /* todo: fill this for texture coordinates */
422 #endif
423         sd->prim = PRIM_NONE;
424         sd->type = PRIMITIVE_NONE;
425
426 #ifdef __UV__
427         sd->u = 0.0f;
428         sd->v = 0.0f;
429 #endif
430
431 #ifdef __DPDU__
432         /* dPdu/dPdv */
433         sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
434         sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
435 #endif
436
437 #ifdef __RAY_DIFFERENTIALS__
438         /* differentials */
439         sd->dP = ray->dD;
440         differential_incoming(&sd->dI, sd->dP);
441         sd->du = differential_zero();
442         sd->dv = differential_zero();
443 #endif
444
445         /* for NDC coordinates */
446         sd->ray_P = ray->P;
447         sd->ray_dP = ray->dP;
448 }
449 #endif
450
451 /* Merging */
452
453 #if defined(__BRANCHED_PATH__) || defined(__VOLUME__)
454 ccl_device_inline void shader_merge_closures(ShaderData *sd)
455 {
456         /* merge identical closures, better when we sample a single closure at a time */
457         for(int i = 0; i < sd->num_closure; i++) {
458                 ShaderClosure *sci = &sd->closure[i];
459
460                 for(int j = i + 1; j < sd->num_closure; j++) {
461                         ShaderClosure *scj = &sd->closure[j];
462
463                         if(sci->type != scj->type)
464                                 continue;
465                         if(!bsdf_merge(sci, scj))
466                                 continue;
467
468                         sci->weight += scj->weight;
469                         sci->sample_weight += scj->sample_weight;
470
471                         int size = sd->num_closure - (j+1);
472                         if(size > 0) {
473                                 for(int k = 0; k < size; k++) {
474                                         scj[k] = scj[k+1];
475                                 }
476                         }
477
478                         sd->num_closure--;
479                         kernel_assert(sd->num_closure >= 0);
480                         j--;
481                 }
482         }
483 }
484 #endif
485
486 /* BSDF */
487
488 ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, ShaderData *sd, const float3 omega_in, float *pdf,
489         int skip_bsdf, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
490 {
491         /* this is the veach one-sample model with balance heuristic, some pdf
492          * factors drop out when using balance heuristic weighting */
493         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
494                 if(i == skip_bsdf)
495                         continue;
496
497                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
498
499                 if(CLOSURE_IS_BSDF(sc->type)) {
500                         float bsdf_pdf = 0.0f;
501                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
502
503                         if(bsdf_pdf != 0.0f) {
504                                 bsdf_eval_accum(result_eval, sc->type, eval*sc->weight);
505                                 sum_pdf += bsdf_pdf*sc->sample_weight;
506                         }
507
508                         sum_sample_weight += sc->sample_weight;
509                 }
510         }
511
512         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
513 }
514
515 #ifdef __BRANCHED_PATH__
516 ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
517                                                         ShaderData *sd,
518                                                         const float3 omega_in,
519                                                         BsdfEval *result_eval,
520                                                         float light_pdf,
521                                                         bool use_mis)
522 {
523         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
524                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
525                 if(CLOSURE_IS_BSDF(sc->type)) {
526                         float bsdf_pdf = 0.0f;
527                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
528                         if(bsdf_pdf != 0.0f) {
529                                 float mis_weight = use_mis? power_heuristic(light_pdf, bsdf_pdf): 1.0f;
530                                 bsdf_eval_accum(result_eval,
531                                                 sc->type,
532                                                 eval * sc->weight * mis_weight);
533                         }
534                 }
535         }
536 }
537 #endif
538
539
540 #ifndef __KERNEL_CUDA__
541 ccl_device
542 #else
543 ccl_device_inline
544 #endif
545 void shader_bsdf_eval(KernelGlobals *kg,
546                       ShaderData *sd,
547                       const float3 omega_in,
548                       BsdfEval *eval,
549                       float light_pdf,
550                       bool use_mis)
551 {
552         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
553
554 #ifdef __BRANCHED_PATH__
555         if(kernel_data.integrator.branched)
556                 _shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis);
557         else
558 #endif
559         {
560                 float pdf;
561                 _shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, -1, eval, 0.0f, 0.0f);
562                 if(use_mis) {
563                         float weight = power_heuristic(light_pdf, pdf);
564                         bsdf_eval_mul(eval, make_float3(weight, weight, weight));
565                 }
566         }
567 }
568
569 ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
570                                          ShaderData *sd,
571                                          float randu, float randv,
572                                          BsdfEval *bsdf_eval,
573                                          float3 *omega_in,
574                                          differential3 *domega_in,
575                                          float *pdf)
576 {
577         int sampled = 0;
578
579         if(ccl_fetch(sd, num_closure) > 1) {
580                 /* pick a BSDF closure based on sample weights */
581                 float sum = 0.0f;
582
583                 for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) {
584                         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
585                         
586                         if(CLOSURE_IS_BSDF(sc->type))
587                                 sum += sc->sample_weight;
588                 }
589
590                 float r = ccl_fetch(sd, randb_closure)*sum;
591                 sum = 0.0f;
592
593                 for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) {
594                         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
595                         
596                         if(CLOSURE_IS_BSDF(sc->type)) {
597                                 sum += sc->sample_weight;
598
599                                 if(r <= sum)
600                                         break;
601                         }
602                 }
603
604                 if(sampled == ccl_fetch(sd, num_closure)) {
605                         *pdf = 0.0f;
606                         return LABEL_NONE;
607                 }
608         }
609
610         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
611
612         int label;
613         float3 eval;
614
615         *pdf = 0.0f;
616         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
617
618         if(*pdf != 0.0f) {
619                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
620
621                 if(ccl_fetch(sd, num_closure) > 1) {
622                         float sweight = sc->sample_weight;
623                         _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sampled, bsdf_eval, *pdf*sweight, sweight);
624                 }
625         }
626
627         return label;
628 }
629
630 ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd,
631         const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval,
632         float3 *omega_in, differential3 *domega_in, float *pdf)
633 {
634         int label;
635         float3 eval;
636
637         *pdf = 0.0f;
638         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
639
640         if(*pdf != 0.0f)
641                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
642
643         return label;
644 }
645
646 ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness)
647 {
648         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
649                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
650
651                 if(CLOSURE_IS_BSDF(sc->type))
652                         bsdf_blur(kg, sc, roughness);
653         }
654 }
655
656 ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd)
657 {
658         if(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)
659                 return make_float3(1.0f, 1.0f, 1.0f);
660
661         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
662
663         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
664                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
665
666                 if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) // todo: make this work for osl
667                         eval += sc->weight;
668         }
669
670         return eval;
671 }
672
673 ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd)
674 {
675         float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd);
676
677         alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f));
678         alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f));
679         
680         return alpha;
681 }
682
683 ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
684 {
685         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
686
687         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
688                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
689
690                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
691                         eval += sc->weight;
692         }
693
694         return eval;
695 }
696
697 ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd)
698 {
699         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
700
701         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
702                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
703
704                 if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
705                         eval += sc->weight;
706         }
707
708         return eval;
709 }
710
711 ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
712 {
713         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
714
715         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
716                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
717
718                 if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
719                         eval += sc->weight;
720         }
721
722         return eval;
723 }
724
725 ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
726 {
727         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
728
729         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
730                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
731
732                 if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
733                         eval += sc->weight;
734         }
735
736         return eval;
737 }
738
739 ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_)
740 {
741         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
742         float3 N = make_float3(0.0f, 0.0f, 0.0f);
743
744         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
745                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
746
747                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
748                         const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
749                         eval += sc->weight*ao_factor;
750                         N += bsdf->N*average(sc->weight);
751                 }
752                 else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) {
753                         eval += sc->weight;
754                         N += ccl_fetch(sd, N)*average(sc->weight);
755                 }
756         }
757
758         if(is_zero(N))
759                 N = ccl_fetch(sd, N);
760         else
761                 N = normalize(N);
762
763         *N_ = N;
764         return eval;
765 }
766
767 #ifdef __SUBSURFACE__
768 ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_)
769 {
770         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
771         float3 N = make_float3(0.0f, 0.0f, 0.0f);
772         float texture_blur = 0.0f, weight_sum = 0.0f;
773
774         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
775                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
776
777                 if(CLOSURE_IS_BSSRDF(sc->type)) {
778                         const Bssrdf *bssrdf = (const Bssrdf*)sc;
779                         float avg_weight = fabsf(average(sc->weight));
780
781                         N += bssrdf->N*avg_weight;
782                         eval += sc->weight;
783                         texture_blur += bssrdf->texture_blur*avg_weight;
784                         weight_sum += avg_weight;
785                 }
786         }
787
788         if(N_)
789                 *N_ = (is_zero(N))? ccl_fetch(sd, N): normalize(N);
790
791         if(texture_blur_)
792                 *texture_blur_ = texture_blur/weight_sum;
793         
794         return eval;
795 }
796 #endif
797
798 /* Emission */
799
800 ccl_device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure *sc)
801 {
802         return emissive_simple_eval(ccl_fetch(sd, Ng), ccl_fetch(sd, I));
803 }
804
805 ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd)
806 {
807         float3 eval;
808         eval = make_float3(0.0f, 0.0f, 0.0f);
809
810         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
811                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
812
813                 if(CLOSURE_IS_EMISSION(sc->type))
814                         eval += emissive_eval(kg, sd, sc)*sc->weight;
815         }
816
817         return eval;
818 }
819
820 /* Holdout */
821
822 ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
823 {
824         float3 weight = make_float3(0.0f, 0.0f, 0.0f);
825
826         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
827                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
828
829                 if(CLOSURE_IS_HOLDOUT(sc->type))
830                         weight += sc->weight;
831         }
832
833         return weight;
834 }
835
836 /* Surface Evaluation */
837
838 ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_space RNG *rng,
839         ccl_addr_space PathState *state, float randb, int path_flag, ShaderContext ctx)
840 {
841         ccl_fetch(sd, num_closure) = 0;
842         ccl_fetch(sd, num_closure_extra) = 0;
843         ccl_fetch(sd, randb_closure) = randb;
844
845 #ifdef __OSL__
846         if(kg->osl)
847                 OSLShader::eval_surface(kg, sd, state, path_flag, ctx);
848         else
849 #endif
850         {
851 #ifdef __SVM__
852                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
853 #else
854                 DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd,
855                                                              sizeof(DiffuseBsdf),
856                                                              make_float3(0.8f, 0.8f, 0.8f));
857                 bsdf->N = ccl_fetch(sd, N);
858                 ccl_fetch(sd, flag) |= bsdf_diffuse_setup(bsdf);
859 #endif
860         }
861
862         if(rng && (ccl_fetch(sd, flag) & SD_BSDF_NEEDS_LCG)) {
863                 ccl_fetch(sd, lcg_state) = lcg_state_init_addrspace(rng, state, 0xb4bc3953);
864         }
865 }
866
867 /* Background Evaluation */
868
869 ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd,
870         ccl_addr_space PathState *state, int path_flag, ShaderContext ctx)
871 {
872         ccl_fetch(sd, num_closure) = 0;
873         ccl_fetch(sd, num_closure_extra) = 0;
874         ccl_fetch(sd, randb_closure) = 0.0f;
875
876 #ifdef __SVM__
877 #ifdef __OSL__
878         if(kg->osl) {
879                 OSLShader::eval_background(kg, sd, state, path_flag, ctx);
880         }
881         else
882 #endif
883         {
884                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
885         }
886
887         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
888
889         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
890                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
891
892                 if(CLOSURE_IS_BACKGROUND(sc->type))
893                         eval += sc->weight;
894         }
895
896         return eval;
897 #else
898         return make_float3(0.8f, 0.8f, 0.8f);
899 #endif
900 }
901
902 /* Volume */
903
904 #ifdef __VOLUME__
905
906 ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, const float3 omega_in, float *pdf,
907         int skip_phase, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
908 {
909         for(int i = 0; i < sd->num_closure; i++) {
910                 if(i == skip_phase)
911                         continue;
912
913                 const ShaderClosure *sc = &sd->closure[i];
914
915                 if(CLOSURE_IS_PHASE(sc->type)) {
916                         float phase_pdf = 0.0f;
917                         float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf);
918
919                         if(phase_pdf != 0.0f) {
920                                 bsdf_eval_accum(result_eval, sc->type, eval);
921                                 sum_pdf += phase_pdf*sc->sample_weight;
922                         }
923
924                         sum_sample_weight += sc->sample_weight;
925                 }
926         }
927
928         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
929 }
930
931 ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd,
932         const float3 omega_in, BsdfEval *eval, float *pdf)
933 {
934         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
935
936         _shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
937 }
938
939 ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *sd,
940         float randu, float randv, BsdfEval *phase_eval,
941         float3 *omega_in, differential3 *domega_in, float *pdf)
942 {
943         int sampled = 0;
944
945         if(sd->num_closure > 1) {
946                 /* pick a phase closure based on sample weights */
947                 float sum = 0.0f;
948
949                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
950                         const ShaderClosure *sc = &sd->closure[sampled];
951                         
952                         if(CLOSURE_IS_PHASE(sc->type))
953                                 sum += sc->sample_weight;
954                 }
955
956                 float r = sd->randb_closure*sum;
957                 sum = 0.0f;
958
959                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
960                         const ShaderClosure *sc = &sd->closure[sampled];
961                         
962                         if(CLOSURE_IS_PHASE(sc->type)) {
963                                 sum += sc->sample_weight;
964
965                                 if(r <= sum)
966                                         break;
967                         }
968                 }
969
970                 if(sampled == sd->num_closure) {
971                         *pdf = 0.0f;
972                         return LABEL_NONE;
973                 }
974         }
975
976         /* todo: this isn't quite correct, we don't weight anisotropy properly
977          * depending on color channels, even if this is perhaps not a common case */
978         const ShaderClosure *sc = &sd->closure[sampled];
979         int label;
980         float3 eval;
981
982         *pdf = 0.0f;
983         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
984
985         if(*pdf != 0.0f) {
986                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
987         }
988
989         return label;
990 }
991
992 ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *sd,
993         const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval,
994         float3 *omega_in, differential3 *domega_in, float *pdf)
995 {
996         int label;
997         float3 eval;
998
999         *pdf = 0.0f;
1000         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
1001
1002         if(*pdf != 0.0f)
1003                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
1004
1005         return label;
1006 }
1007
1008 /* Volume Evaluation */
1009
1010 ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
1011                                           ShaderData *sd,
1012                                           PathState *state,
1013                                           VolumeStack *stack,
1014                                           int path_flag,
1015                                           ShaderContext ctx)
1016 {
1017         /* reset closures once at the start, we will be accumulating the closures
1018          * for all volumes in the stack into a single array of closures */
1019         sd->num_closure = 0;
1020         sd->num_closure_extra = 0;
1021         sd->flag = 0;
1022
1023         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1024                 /* setup shaderdata from stack. it's mostly setup already in
1025                  * shader_setup_from_volume, this switching should be quick */
1026                 sd->object = stack[i].object;
1027                 sd->shader = stack[i].shader;
1028
1029                 sd->flag &= ~(SD_SHADER_FLAGS|SD_OBJECT_FLAGS);
1030                 sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
1031
1032                 if(sd->object != OBJECT_NONE) {
1033                         sd->flag |= kernel_tex_fetch(__object_flag, sd->object);
1034
1035 #ifdef __OBJECT_MOTION__
1036                         /* todo: this is inefficient for motion blur, we should be
1037                          * caching matrices instead of recomputing them each step */
1038                         shader_setup_object_transforms(kg, sd, sd->time);
1039 #endif
1040                 }
1041
1042                 /* evaluate shader */
1043 #ifdef __SVM__
1044 #  ifdef __OSL__
1045                 if(kg->osl) {
1046                         OSLShader::eval_volume(kg, sd, state, path_flag, ctx);
1047                 }
1048                 else
1049 #  endif
1050                 {
1051                         svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag);
1052                 }
1053 #endif
1054
1055                 /* merge closures to avoid exceeding number of closures limit */
1056                 if(i > 0)
1057                         shader_merge_closures(sd);
1058         }
1059 }
1060
1061 #endif
1062
1063 /* Displacement Evaluation */
1064
1065 ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, ShaderContext ctx)
1066 {
1067         ccl_fetch(sd, num_closure) = 0;
1068         ccl_fetch(sd, num_closure_extra) = 0;
1069         ccl_fetch(sd, randb_closure) = 0.0f;
1070
1071         /* this will modify sd->P */
1072 #ifdef __SVM__
1073 #  ifdef __OSL__
1074         if(kg->osl)
1075                 OSLShader::eval_displacement(kg, sd, ctx);
1076         else
1077 #  endif
1078         {
1079                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0);
1080         }
1081 #endif
1082 }
1083
1084 /* Transparent Shadows */
1085
1086 #ifdef __TRANSPARENT_SHADOWS__
1087 ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect)
1088 {
1089         int prim = kernel_tex_fetch(__prim_index, isect->prim);
1090         int shader = 0;
1091
1092 #ifdef __HAIR__
1093         if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
1094 #endif
1095                 shader = kernel_tex_fetch(__tri_shader, prim);
1096 #ifdef __HAIR__
1097         }
1098         else {
1099                 float4 str = kernel_tex_fetch(__curves, prim);
1100                 shader = __float_as_int(str.z);
1101         }
1102 #endif
1103         int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE);
1104
1105         return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
1106 }
1107 #endif
1108
1109 CCL_NAMESPACE_END
1110