3a4770f82f1409d69c9fe0b499afd968a496b316
[blender.git] / intern / cycles / kernel / kernel_shader.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * ShaderData, used in four steps:
19  *
20  * Setup from incoming ray, sampled position and background.
21  * Execute for surface, volume or displacement.
22  * Evaluate one or more closures.
23  * Release.
24  *
25  */
26
27 #include "closure/bsdf_util.h"
28 #include "closure/bsdf.h"
29 #include "closure/emissive.h"
30
31 #include "svm/svm.h"
32
33 CCL_NAMESPACE_BEGIN
34
35 /* ShaderData setup from incoming ray */
36
37 #ifdef __OBJECT_MOTION__
38 ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
39 {
40         if(ccl_fetch(sd, flag) & SD_OBJECT_MOTION) {
41                 ccl_fetch(sd, ob_tfm) = object_fetch_transform_motion(kg, ccl_fetch(sd, object), time);
42                 ccl_fetch(sd, ob_itfm) = transform_quick_inverse(ccl_fetch(sd, ob_tfm));
43         }
44         else {
45                 ccl_fetch(sd, ob_tfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
46                 ccl_fetch(sd, ob_itfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
47         }
48 }
49 #endif
50
51 ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
52                                                ShaderData *sd,
53                                                const Intersection *isect,
54                                                const Ray *ray)
55 {
56 #ifdef __INSTANCING__
57         ccl_fetch(sd, object) = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
58 #endif
59
60         ccl_fetch(sd, type) = isect->type;
61         ccl_fetch(sd, flag) = kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));
62
63         /* matrices and time */
64 #ifdef __OBJECT_MOTION__
65         shader_setup_object_transforms(kg, sd, ray->time);
66         ccl_fetch(sd, time) = ray->time;
67 #endif
68
69         ccl_fetch(sd, prim) = kernel_tex_fetch(__prim_index, isect->prim);
70         ccl_fetch(sd, ray_length) = isect->t;
71
72 #ifdef __UV__
73         ccl_fetch(sd, u) = isect->u;
74         ccl_fetch(sd, v) = isect->v;
75 #endif
76
77 #ifdef __HAIR__
78         if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
79                 /* curve */
80                 float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
81
82                 ccl_fetch(sd, shader) = __float_as_int(curvedata.z);
83                 ccl_fetch(sd, P) = bvh_curve_refine(kg, sd, isect, ray);
84         }
85         else
86 #endif
87         if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) {
88                 /* static triangle */
89                 float3 Ng = triangle_normal(kg, sd);
90                 ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim));
91
92                 /* vectors */
93                 ccl_fetch(sd, P) = triangle_refine(kg, sd, isect, ray);
94                 ccl_fetch(sd, Ng) = Ng;
95                 ccl_fetch(sd, N) = Ng;
96                 
97                 /* smooth normal */
98                 if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL)
99                         ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));
100
101 #ifdef __DPDU__
102                 /* dPdu/dPdv */
103                 triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
104 #endif
105         }
106         else {
107                 /* motion triangle */
108                 motion_triangle_shader_setup(kg, sd, isect, ray, false);
109         }
110
111         ccl_fetch(sd, I) = -ray->D;
112
113         ccl_fetch(sd, flag) |= kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
114
115 #ifdef __INSTANCING__
116         if(isect->object != OBJECT_NONE) {
117                 /* instance transform */
118                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
119                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, Ng));
120 #  ifdef __DPDU__
121                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
122                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
123 #  endif
124         }
125 #endif
126
127         /* backfacing test */
128         bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f);
129
130         if(backfacing) {
131                 ccl_fetch(sd, flag) |= SD_BACKFACING;
132                 ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng);
133                 ccl_fetch(sd, N) = -ccl_fetch(sd, N);
134 #ifdef __DPDU__
135                 ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu);
136                 ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv);
137 #endif
138         }
139
140 #ifdef __RAY_DIFFERENTIALS__
141         /* differentials */
142         differential_transfer(&ccl_fetch(sd, dP), ray->dP, ray->D, ray->dD, ccl_fetch(sd, Ng), isect->t);
143         differential_incoming(&ccl_fetch(sd, dI), ray->dD);
144         differential_dudv(&ccl_fetch(sd, du), &ccl_fetch(sd, dv), ccl_fetch(sd, dPdu), ccl_fetch(sd, dPdv), ccl_fetch(sd, dP), ccl_fetch(sd, Ng));
145 #endif
146 }
147
148 /* ShaderData setup from BSSRDF scatter */
149
150 #ifdef __SUBSURFACE__
151 ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData *sd,
152         const Intersection *isect, const Ray *ray)
153 {
154         bool backfacing = sd->flag & SD_BACKFACING;
155
156         /* object, matrices, time, ray_length stay the same */
157         sd->flag = kernel_tex_fetch(__object_flag, sd->object);
158         sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
159         sd->type = isect->type;
160
161 #  ifdef __UV__
162         sd->u = isect->u;
163         sd->v = isect->v;
164 #  endif
165
166         /* fetch triangle data */
167         if(sd->type == PRIMITIVE_TRIANGLE) {
168                 float3 Ng = triangle_normal(kg, sd);
169                 sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
170
171                 /* static triangle */
172                 sd->P = triangle_refine_subsurface(kg, sd, isect, ray);
173                 sd->Ng = Ng;
174                 sd->N = Ng;
175
176                 if(sd->shader & SHADER_SMOOTH_NORMAL)
177                         sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
178
179 #  ifdef __DPDU__
180                 /* dPdu/dPdv */
181                 triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
182 #  endif
183         }
184         else {
185                 /* motion triangle */
186                 motion_triangle_shader_setup(kg, sd, isect, ray, true);
187         }
188
189         sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
190
191 #  ifdef __INSTANCING__
192         if(isect->object != OBJECT_NONE) {
193                 /* instance transform */
194                 object_normal_transform(kg, sd, &sd->N);
195                 object_normal_transform(kg, sd, &sd->Ng);
196 #    ifdef __DPDU__
197                 object_dir_transform(kg, sd, &sd->dPdu);
198                 object_dir_transform(kg, sd, &sd->dPdv);
199 #    endif
200         }
201 #  endif
202
203         /* backfacing test */
204         if(backfacing) {
205                 sd->flag |= SD_BACKFACING;
206                 sd->Ng = -sd->Ng;
207                 sd->N = -sd->N;
208 #  ifdef __DPDU__
209                 sd->dPdu = -sd->dPdu;
210                 sd->dPdv = -sd->dPdv;
211 #  endif
212         }
213
214         /* should not get used in principle as the shading will only use a diffuse
215          * BSDF, but the shader might still access it */
216         sd->I = sd->N;
217
218 #  ifdef __RAY_DIFFERENTIALS__
219         /* differentials */
220         differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
221         /* don't modify dP and dI */
222 #  endif
223 }
224 #endif
225
226 /* ShaderData setup from position sampled on mesh */
227
228 ccl_device void shader_setup_from_sample(KernelGlobals *kg,
229                                          ShaderData *sd,
230                                          const float3 P,
231                                          const float3 Ng,
232                                          const float3 I,
233                                          int shader, int object, int prim,
234                                          float u, float v, float t,
235                                          float time)
236 {
237         /* vectors */
238         ccl_fetch(sd, P) = P;
239         ccl_fetch(sd, N) = Ng;
240         ccl_fetch(sd, Ng) = Ng;
241         ccl_fetch(sd, I) = I;
242         ccl_fetch(sd, shader) = shader;
243         ccl_fetch(sd, type) = (prim == PRIM_NONE)? PRIMITIVE_NONE: PRIMITIVE_TRIANGLE;
244
245         /* primitive */
246 #ifdef __INSTANCING__
247         ccl_fetch(sd, object) = object;
248 #endif
249         /* currently no access to bvh prim index for strand sd->prim*/
250         ccl_fetch(sd, prim) = prim;
251 #ifdef __UV__
252         ccl_fetch(sd, u) = u;
253         ccl_fetch(sd, v) = v;
254 #endif
255         ccl_fetch(sd, ray_length) = t;
256
257         /* detect instancing, for non-instanced the object index is -object-1 */
258 #ifdef __INSTANCING__
259         bool instanced = false;
260
261         if(ccl_fetch(sd, prim) != PRIM_NONE) {
262                 if(ccl_fetch(sd, object) >= 0)
263                         instanced = true;
264                 else
265 #endif
266                         ccl_fetch(sd, object) = ~ccl_fetch(sd, object);
267 #ifdef __INSTANCING__
268         }
269 #endif
270
271         ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
272         if(ccl_fetch(sd, object) != OBJECT_NONE) {
273                 ccl_fetch(sd, flag) |= kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));
274
275 #ifdef __OBJECT_MOTION__
276                 shader_setup_object_transforms(kg, sd, time);
277         }
278
279         ccl_fetch(sd, time) = time;
280 #else
281         }
282 #endif
283
284         if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) {
285                 /* smooth normal */
286                 if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
287                         ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));
288
289 #ifdef __INSTANCING__
290                         if(instanced)
291                                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
292 #endif
293                 }
294
295                 /* dPdu/dPdv */
296 #ifdef __DPDU__
297                 triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
298
299 #  ifdef __INSTANCING__
300                 if(instanced) {
301                         object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
302                         object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
303                 }
304 #  endif
305 #endif
306         }
307         else {
308 #ifdef __DPDU__
309                 ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f);
310                 ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f);
311 #endif
312         }
313
314         /* backfacing test */
315         if(ccl_fetch(sd, prim) != PRIM_NONE) {
316                 bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f);
317
318                 if(backfacing) {
319                         ccl_fetch(sd, flag) |= SD_BACKFACING;
320                         ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng);
321                         ccl_fetch(sd, N) = -ccl_fetch(sd, N);
322 #ifdef __DPDU__
323                         ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu);
324                         ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv);
325 #endif
326                 }
327         }
328
329 #ifdef __RAY_DIFFERENTIALS__
330         /* no ray differentials here yet */
331         ccl_fetch(sd, dP) = differential3_zero();
332         ccl_fetch(sd, dI) = differential3_zero();
333         ccl_fetch(sd, du) = differential_zero();
334         ccl_fetch(sd, dv) = differential_zero();
335 #endif
336 }
337
338 /* ShaderData setup for displacement */
339
340 ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
341         int object, int prim, float u, float v)
342 {
343         float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
344         int shader;
345
346         triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
347
348         /* force smooth shading for displacement */
349         shader |= SHADER_SMOOTH_NORMAL;
350
351         /* watch out: no instance transform currently */
352
353         shader_setup_from_sample(kg, sd, P, Ng, I, shader, object, prim, u, v, 0.0f, TIME_INVALID);
354 }
355
356 /* ShaderData setup from ray into background */
357
358 ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
359 {
360         /* vectors */
361         ccl_fetch(sd, P) = ray->D;
362         ccl_fetch(sd, N) = -ray->D;
363         ccl_fetch(sd, Ng) = -ray->D;
364         ccl_fetch(sd, I) = -ray->D;
365         ccl_fetch(sd, shader) = kernel_data.background.surface_shader;
366         ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
367 #ifdef __OBJECT_MOTION__
368         ccl_fetch(sd, time) = ray->time;
369 #endif
370         ccl_fetch(sd, ray_length) = 0.0f;
371
372 #ifdef __INSTANCING__
373         ccl_fetch(sd, object) = PRIM_NONE;
374 #endif
375         ccl_fetch(sd, prim) = PRIM_NONE;
376 #ifdef __UV__
377         ccl_fetch(sd, u) = 0.0f;
378         ccl_fetch(sd, v) = 0.0f;
379 #endif
380
381 #ifdef __DPDU__
382         /* dPdu/dPdv */
383         ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f);
384         ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f);
385 #endif
386
387 #ifdef __RAY_DIFFERENTIALS__
388         /* differentials */
389         ccl_fetch(sd, dP) = ray->dD;
390         differential_incoming(&ccl_fetch(sd, dI), ccl_fetch(sd, dP));
391         ccl_fetch(sd, du) = differential_zero();
392         ccl_fetch(sd, dv) = differential_zero();
393 #endif
394 }
395
396 /* ShaderData setup from point inside volume */
397
398 #ifdef __VOLUME__
399 ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
400 {
401         /* vectors */
402         sd->P = ray->P;
403         sd->N = -ray->D;  
404         sd->Ng = -ray->D;
405         sd->I = -ray->D;
406         sd->shader = SHADER_NONE;
407         sd->flag = 0;
408 #ifdef __OBJECT_MOTION__
409         sd->time = ray->time;
410 #endif
411         sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
412
413 #ifdef __INSTANCING__
414         sd->object = PRIM_NONE; /* todo: fill this for texture coordinates */
415 #endif
416         sd->prim = PRIM_NONE;
417         sd->type = PRIMITIVE_NONE;
418
419 #ifdef __UV__
420         sd->u = 0.0f;
421         sd->v = 0.0f;
422 #endif
423
424 #ifdef __DPDU__
425         /* dPdu/dPdv */
426         sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
427         sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
428 #endif
429
430 #ifdef __RAY_DIFFERENTIALS__
431         /* differentials */
432         sd->dP = ray->dD;
433         differential_incoming(&sd->dI, sd->dP);
434         sd->du = differential_zero();
435         sd->dv = differential_zero();
436 #endif
437
438         /* for NDC coordinates */
439         sd->ray_P = ray->P;
440         sd->ray_dP = ray->dP;
441 }
442 #endif
443
444 /* Merging */
445
446 #if defined(__BRANCHED_PATH__) || defined(__VOLUME__)
447 ccl_device void shader_merge_closures(ShaderData *sd)
448 {
449         /* merge identical closures, better when we sample a single closure at a time */
450         for(int i = 0; i < sd->num_closure; i++) {
451                 ShaderClosure *sci = &sd->closure[i];
452
453                 for(int j = i + 1; j < sd->num_closure; j++) {
454                         ShaderClosure *scj = &sd->closure[j];
455
456 #ifdef __OSL__
457                         if(sci->prim || scj->prim)
458                                 continue;
459 #endif
460
461                         if(!(sci->type == scj->type && sci->data0 == scj->data0 && sci->data1 == scj->data1 && sci->data2 == scj->data2))
462                                 continue;
463
464                         if(CLOSURE_IS_BSDF_OR_BSSRDF(sci->type)) {
465                                 if(sci->N != scj->N)
466                                         continue;
467                                 else if(CLOSURE_IS_BSDF_ANISOTROPIC(sci->type) && sci->T != scj->T)
468                                         continue;
469                         }
470
471                         if((sd->flag & SD_BSDF_HAS_CUSTOM) && !(sci->custom1 == scj->custom1 && sci->custom2 == scj->custom2 && sci->custom3 == scj->custom3))
472                                 continue;
473
474                         sci->weight += scj->weight;
475                         sci->sample_weight += scj->sample_weight;
476
477                         int size = sd->num_closure - (j+1);
478                         if(size > 0) {
479                                 for(int k = 0; k < size; k++) {
480                                         scj[k] = scj[k+1];
481                                 }
482                         }
483
484                         sd->num_closure--;
485                         kernel_assert(sd->num_closure >= 0);
486                         j--;
487                 }
488         }
489 }
490 #endif
491
492 /* BSDF */
493
494 ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, ShaderData *sd, const float3 omega_in, float *pdf,
495         int skip_bsdf, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
496 {
497         /* this is the veach one-sample model with balance heuristic, some pdf
498          * factors drop out when using balance heuristic weighting */
499         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
500                 if(i == skip_bsdf)
501                         continue;
502
503                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
504
505                 if(CLOSURE_IS_BSDF(sc->type)) {
506                         float bsdf_pdf = 0.0f;
507                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
508
509                         if(bsdf_pdf != 0.0f) {
510                                 bsdf_eval_accum(result_eval, sc->type, eval*sc->weight);
511                                 sum_pdf += bsdf_pdf*sc->sample_weight;
512                         }
513
514                         sum_sample_weight += sc->sample_weight;
515                 }
516         }
517
518         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
519 }
520
521 #ifdef __BRANCHED_PATH__
522 ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
523                                                         ShaderData *sd,
524                                                         const float3 omega_in,
525                                                         BsdfEval *result_eval,
526                                                         float light_pdf,
527                                                         bool use_mis)
528 {
529         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
530                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
531                 if(CLOSURE_IS_BSDF(sc->type)) {
532                         float bsdf_pdf = 0.0f;
533                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
534                         if(bsdf_pdf != 0.0f) {
535                                 float mis_weight = use_mis? power_heuristic(light_pdf, bsdf_pdf): 1.0f;
536                                 bsdf_eval_accum(result_eval,
537                                                 sc->type,
538                                                 eval * sc->weight * mis_weight);
539                         }
540                 }
541         }
542 }
543 #endif
544
545 ccl_device void shader_bsdf_eval(KernelGlobals *kg,
546                                  ShaderData *sd,
547                                  const float3 omega_in,
548                                  BsdfEval *eval,
549                                  float light_pdf,
550                                  bool use_mis)
551 {
552         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
553
554 #ifdef __BRANCHED_PATH__
555         if(kernel_data.integrator.branched)
556                 _shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis);
557         else
558 #endif
559         {
560                 float pdf;
561                 _shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, -1, eval, 0.0f, 0.0f);
562                 if(use_mis) {
563                         float weight = power_heuristic(light_pdf, pdf);
564                         bsdf_eval_mul(eval, make_float3(weight, weight, weight));
565                 }
566         }
567 }
568
569 ccl_device int shader_bsdf_sample(KernelGlobals *kg, ShaderData *sd,
570         float randu, float randv, BsdfEval *bsdf_eval,
571         float3 *omega_in, differential3 *domega_in, float *pdf)
572 {
573         int sampled = 0;
574
575         if(ccl_fetch(sd, num_closure) > 1) {
576                 /* pick a BSDF closure based on sample weights */
577                 float sum = 0.0f;
578
579                 for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) {
580                         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
581                         
582                         if(CLOSURE_IS_BSDF(sc->type))
583                                 sum += sc->sample_weight;
584                 }
585
586                 float r = ccl_fetch(sd, randb_closure)*sum;
587                 sum = 0.0f;
588
589                 for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) {
590                         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
591                         
592                         if(CLOSURE_IS_BSDF(sc->type)) {
593                                 sum += sc->sample_weight;
594
595                                 if(r <= sum)
596                                         break;
597                         }
598                 }
599
600                 if(sampled == ccl_fetch(sd, num_closure)) {
601                         *pdf = 0.0f;
602                         return LABEL_NONE;
603                 }
604         }
605
606         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
607
608         int label;
609         float3 eval;
610
611         *pdf = 0.0f;
612         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
613
614         if(*pdf != 0.0f) {
615                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
616
617                 if(ccl_fetch(sd, num_closure) > 1) {
618                         float sweight = sc->sample_weight;
619                         _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sampled, bsdf_eval, *pdf*sweight, sweight);
620                 }
621         }
622
623         return label;
624 }
625
626 ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd,
627         const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval,
628         float3 *omega_in, differential3 *domega_in, float *pdf)
629 {
630         int label;
631         float3 eval;
632
633         *pdf = 0.0f;
634         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
635
636         if(*pdf != 0.0f)
637                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
638
639         return label;
640 }
641
642 ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness)
643 {
644         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
645                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
646
647                 if(CLOSURE_IS_BSDF(sc->type))
648                         bsdf_blur(kg, sc, roughness);
649         }
650 }
651
652 ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd)
653 {
654         if(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)
655                 return make_float3(1.0f, 1.0f, 1.0f);
656
657         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
658
659         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
660                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
661
662                 if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) // todo: make this work for osl
663                         eval += sc->weight;
664         }
665
666         return eval;
667 }
668
669 ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd)
670 {
671         float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd);
672
673         alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f));
674         alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f));
675         
676         return alpha;
677 }
678
679 ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
680 {
681         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
682
683         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
684                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
685
686                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
687                         eval += sc->weight;
688         }
689
690         return eval;
691 }
692
693 ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd)
694 {
695         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
696
697         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
698                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
699
700                 if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
701                         eval += sc->weight;
702         }
703
704         return eval;
705 }
706
707 ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
708 {
709         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
710
711         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
712                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
713
714                 if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
715                         eval += sc->weight;
716         }
717
718         return eval;
719 }
720
721 ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
722 {
723         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
724
725         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
726                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
727
728                 if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
729                         eval += sc->weight;
730         }
731
732         return eval;
733 }
734
735 ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_)
736 {
737         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
738         float3 N = make_float3(0.0f, 0.0f, 0.0f);
739
740         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
741                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
742
743                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
744                         eval += sc->weight*ao_factor;
745                         N += sc->N*average(sc->weight);
746                 }
747                 else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) {
748                         eval += sc->weight;
749                         N += ccl_fetch(sd, N)*average(sc->weight);
750                 }
751         }
752
753         if(is_zero(N))
754                 N = ccl_fetch(sd, N);
755         else
756                 N = normalize(N);
757
758         *N_ = N;
759         return eval;
760 }
761
762 ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_)
763 {
764         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
765         float3 N = make_float3(0.0f, 0.0f, 0.0f);
766         float texture_blur = 0.0f, weight_sum = 0.0f;
767
768         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
769                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
770
771                 if(CLOSURE_IS_BSSRDF(sc->type)) {
772                         float avg_weight = fabsf(average(sc->weight));
773
774                         N += sc->N*avg_weight;
775                         eval += sc->weight;
776                         texture_blur += sc->data1*avg_weight;
777                         weight_sum += avg_weight;
778                 }
779         }
780
781         if(N_)
782                 *N_ = (is_zero(N))? ccl_fetch(sd, N): normalize(N);
783
784         if(texture_blur_)
785                 *texture_blur_ = texture_blur/weight_sum;
786         
787         return eval;
788 }
789
790 /* Emission */
791
792 ccl_device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure *sc)
793 {
794         return emissive_simple_eval(ccl_fetch(sd, Ng), ccl_fetch(sd, I));
795 }
796
797 ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd)
798 {
799         float3 eval;
800         eval = make_float3(0.0f, 0.0f, 0.0f);
801
802         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
803                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
804
805                 if(CLOSURE_IS_EMISSION(sc->type))
806                         eval += emissive_eval(kg, sd, sc)*sc->weight;
807         }
808
809         return eval;
810 }
811
812 /* Holdout */
813
814 ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
815 {
816         float3 weight = make_float3(0.0f, 0.0f, 0.0f);
817
818         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
819                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
820
821                 if(CLOSURE_IS_HOLDOUT(sc->type))
822                         weight += sc->weight;
823         }
824
825         return weight;
826 }
827
828 /* Surface Evaluation */
829
830 ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, RNG *rng,
831         ccl_addr_space PathState *state, float randb, int path_flag, ShaderContext ctx)
832 {
833         ccl_fetch(sd, num_closure) = 0;
834         ccl_fetch(sd, randb_closure) = randb;
835
836 #ifdef __OSL__
837         if(kg->osl)
838                 OSLShader::eval_surface(kg, sd, state, path_flag, ctx);
839         else
840 #endif
841         {
842 #ifdef __SVM__
843                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
844 #else
845                 ccl_fetch_array(sd, closure, 0)->weight = make_float3(0.8f, 0.8f, 0.8f);
846                 ccl_fetch_array(sd, closure, 0)->N = ccl_fetch(sd, N);
847                 ccl_fetch_array(sd, closure, 0)->data0 = 0.0f;
848                 ccl_fetch_array(sd, closure, 0)->data1 = 0.0f;
849                 ccl_fetch(sd, flag) |= bsdf_diffuse_setup(ccl_fetch_array(sd, closure, 0));
850 #endif
851         }
852
853         if(rng && (ccl_fetch(sd, flag) & SD_BSDF_NEEDS_LCG)) {
854                 ccl_fetch(sd, lcg_state) = lcg_state_init(rng, state, 0xb4bc3953);
855         }
856 }
857
858 /* Background Evaluation */
859
860 ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd,
861         ccl_addr_space PathState *state, int path_flag, ShaderContext ctx)
862 {
863         ccl_fetch(sd, num_closure) = 0;
864         ccl_fetch(sd, randb_closure) = 0.0f;
865
866 #ifdef __OSL__
867         if(kg->osl) {
868                 return OSLShader::eval_background(kg, sd, state, path_flag, ctx);
869         }
870         else
871 #endif
872
873         {
874 #ifdef __SVM__
875                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
876
877                 float3 eval = make_float3(0.0f, 0.0f, 0.0f);
878
879                 for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
880                         const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
881
882                         if(CLOSURE_IS_BACKGROUND(sc->type))
883                                 eval += sc->weight;
884                 }
885
886                 return eval;
887 #else
888                 return make_float3(0.8f, 0.8f, 0.8f);
889 #endif
890         }
891 }
892
893 /* Volume */
894
895 #ifdef __VOLUME__
896
897 ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, const float3 omega_in, float *pdf,
898         int skip_phase, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
899 {
900         for(int i = 0; i < sd->num_closure; i++) {
901                 if(i == skip_phase)
902                         continue;
903
904                 const ShaderClosure *sc = &sd->closure[i];
905
906                 if(CLOSURE_IS_PHASE(sc->type)) {
907                         float phase_pdf = 0.0f;
908                         float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf);
909
910                         if(phase_pdf != 0.0f) {
911                                 bsdf_eval_accum(result_eval, sc->type, eval);
912                                 sum_pdf += phase_pdf*sc->sample_weight;
913                         }
914
915                         sum_sample_weight += sc->sample_weight;
916                 }
917         }
918
919         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
920 }
921
922 ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd,
923         const float3 omega_in, BsdfEval *eval, float *pdf)
924 {
925         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
926
927         _shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
928 }
929
930 ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *sd,
931         float randu, float randv, BsdfEval *phase_eval,
932         float3 *omega_in, differential3 *domega_in, float *pdf)
933 {
934         int sampled = 0;
935
936         if(sd->num_closure > 1) {
937                 /* pick a phase closure based on sample weights */
938                 float sum = 0.0f;
939
940                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
941                         const ShaderClosure *sc = &sd->closure[sampled];
942                         
943                         if(CLOSURE_IS_PHASE(sc->type))
944                                 sum += sc->sample_weight;
945                 }
946
947                 float r = sd->randb_closure*sum;
948                 sum = 0.0f;
949
950                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
951                         const ShaderClosure *sc = &sd->closure[sampled];
952                         
953                         if(CLOSURE_IS_PHASE(sc->type)) {
954                                 sum += sc->sample_weight;
955
956                                 if(r <= sum)
957                                         break;
958                         }
959                 }
960
961                 if(sampled == sd->num_closure) {
962                         *pdf = 0.0f;
963                         return LABEL_NONE;
964                 }
965         }
966
967         /* todo: this isn't quite correct, we don't weight anisotropy properly
968          * depending on color channels, even if this is perhaps not a common case */
969         const ShaderClosure *sc = &sd->closure[sampled];
970         int label;
971         float3 eval;
972
973         *pdf = 0.0f;
974         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
975
976         if(*pdf != 0.0f) {
977                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
978         }
979
980         return label;
981 }
982
983 ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *sd,
984         const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval,
985         float3 *omega_in, differential3 *domega_in, float *pdf)
986 {
987         int label;
988         float3 eval;
989
990         *pdf = 0.0f;
991         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
992
993         if(*pdf != 0.0f)
994                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
995
996         return label;
997 }
998
999 /* Volume Evaluation */
1000
1001 ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd,
1002         PathState *state, VolumeStack *stack, int path_flag, ShaderContext ctx)
1003 {
1004         /* reset closures once at the start, we will be accumulating the closures
1005          * for all volumes in the stack into a single array of closures */
1006         sd->num_closure = 0;
1007         sd->flag = 0;
1008
1009         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1010                 /* setup shaderdata from stack. it's mostly setup already in
1011                  * shader_setup_from_volume, this switching should be quick */
1012                 sd->object = stack[i].object;
1013                 sd->shader = stack[i].shader;
1014
1015                 sd->flag &= ~(SD_SHADER_FLAGS|SD_OBJECT_FLAGS);
1016                 sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
1017
1018                 if(sd->object != OBJECT_NONE) {
1019                         sd->flag |= kernel_tex_fetch(__object_flag, sd->object);
1020
1021 #ifdef __OBJECT_MOTION__
1022                         /* todo: this is inefficient for motion blur, we should be
1023                          * caching matrices instead of recomputing them each step */
1024                         shader_setup_object_transforms(kg, sd, sd->time);
1025 #endif
1026                 }
1027
1028                 /* evaluate shader */
1029 #ifdef __SVM__
1030 #  ifdef __OSL__
1031                 if(kg->osl) {
1032                         OSLShader::eval_volume(kg, sd, state, path_flag, ctx);
1033                 }
1034                 else
1035 #  endif
1036                 {
1037                         svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag);
1038                 }
1039 #endif
1040
1041                 /* merge closures to avoid exceeding number of closures limit */
1042                 if(i > 0)
1043                         shader_merge_closures(sd);
1044         }
1045 }
1046
1047 #endif
1048
1049 /* Displacement Evaluation */
1050
1051 ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, ShaderContext ctx)
1052 {
1053         ccl_fetch(sd, num_closure) = 0;
1054         ccl_fetch(sd, randb_closure) = 0.0f;
1055
1056         /* this will modify sd->P */
1057 #ifdef __SVM__
1058 #  ifdef __OSL__
1059         if(kg->osl)
1060                 OSLShader::eval_displacement(kg, sd, ctx);
1061         else
1062 #  endif
1063         {
1064                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0);
1065         }
1066 #endif
1067 }
1068
1069 /* Transparent Shadows */
1070
1071 #ifdef __TRANSPARENT_SHADOWS__
1072 ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect)
1073 {
1074         int prim = kernel_tex_fetch(__prim_index, isect->prim);
1075         int shader = 0;
1076
1077 #ifdef __HAIR__
1078         if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
1079 #endif
1080                 shader = kernel_tex_fetch(__tri_shader, prim);
1081 #ifdef __HAIR__
1082         }
1083         else {
1084                 float4 str = kernel_tex_fetch(__curves, prim);
1085                 shader = __float_as_int(str.z);
1086         }
1087 #endif
1088         int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2);
1089
1090         return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
1091 }
1092 #endif
1093
1094 CCL_NAMESPACE_END
1095