Cycles: Tweaks to support CUDA 8 toolkit
[blender.git] / intern / cycles / kernel / kernel_shader.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * ShaderData, used in four steps:
19  *
20  * Setup from incoming ray, sampled position and background.
21  * Execute for surface, volume or displacement.
22  * Evaluate one or more closures.
23  * Release.
24  *
25  */
26
27 #include "closure/alloc.h"
28 #include "closure/bsdf_util.h"
29 #include "closure/bsdf.h"
30 #include "closure/emissive.h"
31
32 #include "svm/svm.h"
33
34 CCL_NAMESPACE_BEGIN
35
36 /* ShaderData setup from incoming ray */
37
38 #ifdef __OBJECT_MOTION__
39 ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
40 {
41         if(ccl_fetch(sd, flag) & SD_OBJECT_MOTION) {
42                 ccl_fetch(sd, ob_tfm) = object_fetch_transform_motion(kg, ccl_fetch(sd, object), time);
43                 ccl_fetch(sd, ob_itfm) = transform_quick_inverse(ccl_fetch(sd, ob_tfm));
44         }
45         else {
46                 ccl_fetch(sd, ob_tfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
47                 ccl_fetch(sd, ob_itfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
48         }
49 }
50 #endif
51
52 ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
53                                                ShaderData *sd,
54                                                const Intersection *isect,
55                                                const Ray *ray)
56 {
57 #ifdef __INSTANCING__
58         ccl_fetch(sd, object) = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
59 #endif
60
61         ccl_fetch(sd, type) = isect->type;
62         ccl_fetch(sd, flag) = kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));
63
64         /* matrices and time */
65 #ifdef __OBJECT_MOTION__
66         shader_setup_object_transforms(kg, sd, ray->time);
67         ccl_fetch(sd, time) = ray->time;
68 #endif
69
70         ccl_fetch(sd, prim) = kernel_tex_fetch(__prim_index, isect->prim);
71         ccl_fetch(sd, ray_length) = isect->t;
72
73 #ifdef __UV__
74         ccl_fetch(sd, u) = isect->u;
75         ccl_fetch(sd, v) = isect->v;
76 #endif
77
78 #ifdef __HAIR__
79         if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
80                 /* curve */
81                 float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
82
83                 ccl_fetch(sd, shader) = __float_as_int(curvedata.z);
84                 ccl_fetch(sd, P) = bvh_curve_refine(kg, sd, isect, ray);
85         }
86         else
87 #endif
88         if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) {
89                 /* static triangle */
90                 float3 Ng = triangle_normal(kg, sd);
91                 ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim));
92
93                 /* vectors */
94                 ccl_fetch(sd, P) = triangle_refine(kg, sd, isect, ray);
95                 ccl_fetch(sd, Ng) = Ng;
96                 ccl_fetch(sd, N) = Ng;
97                 
98                 /* smooth normal */
99                 if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL)
100                         ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));
101
102 #ifdef __DPDU__
103                 /* dPdu/dPdv */
104                 triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
105 #endif
106         }
107         else {
108                 /* motion triangle */
109                 motion_triangle_shader_setup(kg, sd, isect, ray, false);
110         }
111
112         ccl_fetch(sd, I) = -ray->D;
113
114         ccl_fetch(sd, flag) |= kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
115
116 #ifdef __INSTANCING__
117         if(isect->object != OBJECT_NONE) {
118                 /* instance transform */
119                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
120                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, Ng));
121 #  ifdef __DPDU__
122                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
123                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
124 #  endif
125         }
126 #endif
127
128         /* backfacing test */
129         bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f);
130
131         if(backfacing) {
132                 ccl_fetch(sd, flag) |= SD_BACKFACING;
133                 ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng);
134                 ccl_fetch(sd, N) = -ccl_fetch(sd, N);
135 #ifdef __DPDU__
136                 ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu);
137                 ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv);
138 #endif
139         }
140
141 #ifdef __RAY_DIFFERENTIALS__
142         /* differentials */
143         differential_transfer(&ccl_fetch(sd, dP), ray->dP, ray->D, ray->dD, ccl_fetch(sd, Ng), isect->t);
144         differential_incoming(&ccl_fetch(sd, dI), ray->dD);
145         differential_dudv(&ccl_fetch(sd, du), &ccl_fetch(sd, dv), ccl_fetch(sd, dPdu), ccl_fetch(sd, dPdv), ccl_fetch(sd, dP), ccl_fetch(sd, Ng));
146 #endif
147 }
148
149 /* ShaderData setup from BSSRDF scatter */
150
151 #ifdef __SUBSURFACE__
152 ccl_device void shader_setup_from_subsurface(
153         KernelGlobals *kg,
154         ShaderData *sd,
155         const Intersection *isect,
156         const Ray *ray)
157 {
158         bool backfacing = sd->flag & SD_BACKFACING;
159
160         /* object, matrices, time, ray_length stay the same */
161         sd->flag = kernel_tex_fetch(__object_flag, sd->object);
162         sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
163         sd->type = isect->type;
164
165 #  ifdef __UV__
166         sd->u = isect->u;
167         sd->v = isect->v;
168 #  endif
169
170         /* fetch triangle data */
171         if(sd->type == PRIMITIVE_TRIANGLE) {
172                 float3 Ng = triangle_normal(kg, sd);
173                 sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
174
175                 /* static triangle */
176                 sd->P = triangle_refine_subsurface(kg, sd, isect, ray);
177                 sd->Ng = Ng;
178                 sd->N = Ng;
179
180                 if(sd->shader & SHADER_SMOOTH_NORMAL)
181                         sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
182
183 #  ifdef __DPDU__
184                 /* dPdu/dPdv */
185                 triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
186 #  endif
187         }
188         else {
189                 /* motion triangle */
190                 motion_triangle_shader_setup(kg, sd, isect, ray, true);
191         }
192
193         sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
194
195 #  ifdef __INSTANCING__
196         if(isect->object != OBJECT_NONE) {
197                 /* instance transform */
198                 object_normal_transform(kg, sd, &sd->N);
199                 object_normal_transform(kg, sd, &sd->Ng);
200 #    ifdef __DPDU__
201                 object_dir_transform(kg, sd, &sd->dPdu);
202                 object_dir_transform(kg, sd, &sd->dPdv);
203 #    endif
204         }
205 #  endif
206
207         /* backfacing test */
208         if(backfacing) {
209                 sd->flag |= SD_BACKFACING;
210                 sd->Ng = -sd->Ng;
211                 sd->N = -sd->N;
212 #  ifdef __DPDU__
213                 sd->dPdu = -sd->dPdu;
214                 sd->dPdv = -sd->dPdv;
215 #  endif
216         }
217
218         /* should not get used in principle as the shading will only use a diffuse
219          * BSDF, but the shader might still access it */
220         sd->I = sd->N;
221
222 #  ifdef __RAY_DIFFERENTIALS__
223         /* differentials */
224         differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
225         /* don't modify dP and dI */
226 #  endif
227 }
228 #endif
229
230 /* ShaderData setup from position sampled on mesh */
231
232 ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
233                                                 ShaderData *sd,
234                                                 const float3 P,
235                                                 const float3 Ng,
236                                                 const float3 I,
237                                                 int shader, int object, int prim,
238                                                 float u, float v, float t,
239                                                 float time)
240 {
241         /* vectors */
242         ccl_fetch(sd, P) = P;
243         ccl_fetch(sd, N) = Ng;
244         ccl_fetch(sd, Ng) = Ng;
245         ccl_fetch(sd, I) = I;
246         ccl_fetch(sd, shader) = shader;
247         ccl_fetch(sd, type) = (prim == PRIM_NONE)? PRIMITIVE_NONE: PRIMITIVE_TRIANGLE;
248
249         /* primitive */
250 #ifdef __INSTANCING__
251         ccl_fetch(sd, object) = object;
252 #endif
253         /* currently no access to bvh prim index for strand sd->prim*/
254         ccl_fetch(sd, prim) = prim;
255 #ifdef __UV__
256         ccl_fetch(sd, u) = u;
257         ccl_fetch(sd, v) = v;
258 #endif
259         ccl_fetch(sd, ray_length) = t;
260
261         /* detect instancing, for non-instanced the object index is -object-1 */
262 #ifdef __INSTANCING__
263         bool instanced = false;
264
265         if(ccl_fetch(sd, prim) != PRIM_NONE) {
266                 if(ccl_fetch(sd, object) >= 0)
267                         instanced = true;
268                 else
269 #endif
270                         ccl_fetch(sd, object) = ~ccl_fetch(sd, object);
271 #ifdef __INSTANCING__
272         }
273 #endif
274
275         ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
276         if(ccl_fetch(sd, object) != OBJECT_NONE) {
277                 ccl_fetch(sd, flag) |= kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));
278
279 #ifdef __OBJECT_MOTION__
280                 shader_setup_object_transforms(kg, sd, time);
281         }
282
283         ccl_fetch(sd, time) = time;
284 #else
285         }
286 #endif
287
288         if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) {
289                 /* smooth normal */
290                 if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
291                         ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));
292
293 #ifdef __INSTANCING__
294                         if(instanced)
295                                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
296 #endif
297                 }
298
299                 /* dPdu/dPdv */
300 #ifdef __DPDU__
301                 triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
302
303 #  ifdef __INSTANCING__
304                 if(instanced) {
305                         object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
306                         object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
307                 }
308 #  endif
309 #endif
310         }
311         else {
312 #ifdef __DPDU__
313                 ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f);
314                 ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f);
315 #endif
316         }
317
318         /* backfacing test */
319         if(ccl_fetch(sd, prim) != PRIM_NONE) {
320                 bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f);
321
322                 if(backfacing) {
323                         ccl_fetch(sd, flag) |= SD_BACKFACING;
324                         ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng);
325                         ccl_fetch(sd, N) = -ccl_fetch(sd, N);
326 #ifdef __DPDU__
327                         ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu);
328                         ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv);
329 #endif
330                 }
331         }
332
333 #ifdef __RAY_DIFFERENTIALS__
334         /* no ray differentials here yet */
335         ccl_fetch(sd, dP) = differential3_zero();
336         ccl_fetch(sd, dI) = differential3_zero();
337         ccl_fetch(sd, du) = differential_zero();
338         ccl_fetch(sd, dv) = differential_zero();
339 #endif
340 }
341
342 /* ShaderData setup for displacement */
343
344 ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
345         int object, int prim, float u, float v)
346 {
347         float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
348         int shader;
349
350         triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
351
352         /* force smooth shading for displacement */
353         shader |= SHADER_SMOOTH_NORMAL;
354
355         /* watch out: no instance transform currently */
356
357         shader_setup_from_sample(kg, sd, P, Ng, I, shader, object, prim, u, v, 0.0f, TIME_INVALID);
358 }
359
360 /* ShaderData setup from ray into background */
361
362 ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
363 {
364         /* vectors */
365         ccl_fetch(sd, P) = ray->D;
366         ccl_fetch(sd, N) = -ray->D;
367         ccl_fetch(sd, Ng) = -ray->D;
368         ccl_fetch(sd, I) = -ray->D;
369         ccl_fetch(sd, shader) = kernel_data.background.surface_shader;
370         ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
371 #ifdef __OBJECT_MOTION__
372         ccl_fetch(sd, time) = ray->time;
373 #endif
374         ccl_fetch(sd, ray_length) = 0.0f;
375
376 #ifdef __INSTANCING__
377         ccl_fetch(sd, object) = PRIM_NONE;
378 #endif
379         ccl_fetch(sd, prim) = PRIM_NONE;
380 #ifdef __UV__
381         ccl_fetch(sd, u) = 0.0f;
382         ccl_fetch(sd, v) = 0.0f;
383 #endif
384
385 #ifdef __DPDU__
386         /* dPdu/dPdv */
387         ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f);
388         ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f);
389 #endif
390
391 #ifdef __RAY_DIFFERENTIALS__
392         /* differentials */
393         ccl_fetch(sd, dP) = ray->dD;
394         differential_incoming(&ccl_fetch(sd, dI), ccl_fetch(sd, dP));
395         ccl_fetch(sd, du) = differential_zero();
396         ccl_fetch(sd, dv) = differential_zero();
397 #endif
398 }
399
400 /* ShaderData setup from point inside volume */
401
402 #ifdef __VOLUME__
403 ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
404 {
405         /* vectors */
406         sd->P = ray->P;
407         sd->N = -ray->D;  
408         sd->Ng = -ray->D;
409         sd->I = -ray->D;
410         sd->shader = SHADER_NONE;
411         sd->flag = 0;
412 #ifdef __OBJECT_MOTION__
413         sd->time = ray->time;
414 #endif
415         sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
416
417 #ifdef __INSTANCING__
418         sd->object = PRIM_NONE; /* todo: fill this for texture coordinates */
419 #endif
420         sd->prim = PRIM_NONE;
421         sd->type = PRIMITIVE_NONE;
422
423 #ifdef __UV__
424         sd->u = 0.0f;
425         sd->v = 0.0f;
426 #endif
427
428 #ifdef __DPDU__
429         /* dPdu/dPdv */
430         sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
431         sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
432 #endif
433
434 #ifdef __RAY_DIFFERENTIALS__
435         /* differentials */
436         sd->dP = ray->dD;
437         differential_incoming(&sd->dI, sd->dP);
438         sd->du = differential_zero();
439         sd->dv = differential_zero();
440 #endif
441
442         /* for NDC coordinates */
443         sd->ray_P = ray->P;
444         sd->ray_dP = ray->dP;
445 }
446 #endif
447
448 /* Merging */
449
450 #if defined(__BRANCHED_PATH__) || defined(__VOLUME__)
451 ccl_device_inline void shader_merge_closures(ShaderData *sd)
452 {
453         /* merge identical closures, better when we sample a single closure at a time */
454         for(int i = 0; i < sd->num_closure; i++) {
455                 ShaderClosure *sci = &sd->closure[i];
456
457                 for(int j = i + 1; j < sd->num_closure; j++) {
458                         ShaderClosure *scj = &sd->closure[j];
459
460                         if(sci->type != scj->type)
461                                 continue;
462                         if(!bsdf_merge(sci, scj))
463                                 continue;
464
465                         sci->weight += scj->weight;
466                         sci->sample_weight += scj->sample_weight;
467
468                         int size = sd->num_closure - (j+1);
469                         if(size > 0) {
470                                 for(int k = 0; k < size; k++) {
471                                         scj[k] = scj[k+1];
472                                 }
473                         }
474
475                         sd->num_closure--;
476                         kernel_assert(sd->num_closure >= 0);
477                         j--;
478                 }
479         }
480 }
481 #endif
482
483 /* BSDF */
484
485 ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, ShaderData *sd, const float3 omega_in, float *pdf,
486         int skip_bsdf, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
487 {
488         /* this is the veach one-sample model with balance heuristic, some pdf
489          * factors drop out when using balance heuristic weighting */
490         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
491                 if(i == skip_bsdf)
492                         continue;
493
494                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
495
496                 if(CLOSURE_IS_BSDF(sc->type)) {
497                         float bsdf_pdf = 0.0f;
498                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
499
500                         if(bsdf_pdf != 0.0f) {
501                                 bsdf_eval_accum(result_eval, sc->type, eval*sc->weight);
502                                 sum_pdf += bsdf_pdf*sc->sample_weight;
503                         }
504
505                         sum_sample_weight += sc->sample_weight;
506                 }
507         }
508
509         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
510 }
511
512 #ifdef __BRANCHED_PATH__
513 ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
514                                                         ShaderData *sd,
515                                                         const float3 omega_in,
516                                                         BsdfEval *result_eval,
517                                                         float light_pdf,
518                                                         bool use_mis)
519 {
520         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
521                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
522                 if(CLOSURE_IS_BSDF(sc->type)) {
523                         float bsdf_pdf = 0.0f;
524                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
525                         if(bsdf_pdf != 0.0f) {
526                                 float mis_weight = use_mis? power_heuristic(light_pdf, bsdf_pdf): 1.0f;
527                                 bsdf_eval_accum(result_eval,
528                                                 sc->type,
529                                                 eval * sc->weight * mis_weight);
530                         }
531                 }
532         }
533 }
534 #endif
535
536 ccl_device void shader_bsdf_eval(KernelGlobals *kg,
537                                  ShaderData *sd,
538                                  const float3 omega_in,
539                                  BsdfEval *eval,
540                                  float light_pdf,
541                                  bool use_mis)
542 {
543         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
544
545 #ifdef __BRANCHED_PATH__
546         if(kernel_data.integrator.branched)
547                 _shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis);
548         else
549 #endif
550         {
551                 float pdf;
552                 _shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, -1, eval, 0.0f, 0.0f);
553                 if(use_mis) {
554                         float weight = power_heuristic(light_pdf, pdf);
555                         bsdf_eval_mul(eval, make_float3(weight, weight, weight));
556                 }
557         }
558 }
559
560 ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
561                                          ShaderData *sd,
562                                          float randu, float randv,
563                                          BsdfEval *bsdf_eval,
564                                          float3 *omega_in,
565                                          differential3 *domega_in,
566                                          float *pdf)
567 {
568         int sampled = 0;
569
570         if(ccl_fetch(sd, num_closure) > 1) {
571                 /* pick a BSDF closure based on sample weights */
572                 float sum = 0.0f;
573
574                 for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) {
575                         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
576                         
577                         if(CLOSURE_IS_BSDF(sc->type))
578                                 sum += sc->sample_weight;
579                 }
580
581                 float r = ccl_fetch(sd, randb_closure)*sum;
582                 sum = 0.0f;
583
584                 for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) {
585                         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
586                         
587                         if(CLOSURE_IS_BSDF(sc->type)) {
588                                 sum += sc->sample_weight;
589
590                                 if(r <= sum)
591                                         break;
592                         }
593                 }
594
595                 if(sampled == ccl_fetch(sd, num_closure)) {
596                         *pdf = 0.0f;
597                         return LABEL_NONE;
598                 }
599         }
600
601         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
602
603         int label;
604         float3 eval;
605
606         *pdf = 0.0f;
607         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
608
609         if(*pdf != 0.0f) {
610                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
611
612                 if(ccl_fetch(sd, num_closure) > 1) {
613                         float sweight = sc->sample_weight;
614                         _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sampled, bsdf_eval, *pdf*sweight, sweight);
615                 }
616         }
617
618         return label;
619 }
620
621 ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd,
622         const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval,
623         float3 *omega_in, differential3 *domega_in, float *pdf)
624 {
625         int label;
626         float3 eval;
627
628         *pdf = 0.0f;
629         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
630
631         if(*pdf != 0.0f)
632                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
633
634         return label;
635 }
636
637 ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness)
638 {
639         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
640                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
641
642                 if(CLOSURE_IS_BSDF(sc->type))
643                         bsdf_blur(kg, sc, roughness);
644         }
645 }
646
647 ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd)
648 {
649         if(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)
650                 return make_float3(1.0f, 1.0f, 1.0f);
651
652         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
653
654         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
655                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
656
657                 if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) // todo: make this work for osl
658                         eval += sc->weight;
659         }
660
661         return eval;
662 }
663
664 ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd)
665 {
666         float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd);
667
668         alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f));
669         alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f));
670         
671         return alpha;
672 }
673
674 ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
675 {
676         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
677
678         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
679                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
680
681                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
682                         eval += sc->weight;
683         }
684
685         return eval;
686 }
687
688 ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd)
689 {
690         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
691
692         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
693                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
694
695                 if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
696                         eval += sc->weight;
697         }
698
699         return eval;
700 }
701
702 ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
703 {
704         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
705
706         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
707                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
708
709                 if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
710                         eval += sc->weight;
711         }
712
713         return eval;
714 }
715
716 ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
717 {
718         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
719
720         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
721                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
722
723                 if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
724                         eval += sc->weight;
725         }
726
727         return eval;
728 }
729
730 ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_)
731 {
732         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
733         float3 N = make_float3(0.0f, 0.0f, 0.0f);
734
735         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
736                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
737
738                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
739                         const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
740                         eval += sc->weight*ao_factor;
741                         N += bsdf->N*average(sc->weight);
742                 }
743                 else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) {
744                         eval += sc->weight;
745                         N += ccl_fetch(sd, N)*average(sc->weight);
746                 }
747         }
748
749         if(is_zero(N))
750                 N = ccl_fetch(sd, N);
751         else
752                 N = normalize(N);
753
754         *N_ = N;
755         return eval;
756 }
757
758 #ifdef __SUBSURFACE__
759 ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_)
760 {
761         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
762         float3 N = make_float3(0.0f, 0.0f, 0.0f);
763         float texture_blur = 0.0f, weight_sum = 0.0f;
764
765         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
766                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
767
768                 if(CLOSURE_IS_BSSRDF(sc->type)) {
769                         const Bssrdf *bssrdf = (const Bssrdf*)sc;
770                         float avg_weight = fabsf(average(sc->weight));
771
772                         N += bssrdf->N*avg_weight;
773                         eval += sc->weight;
774                         texture_blur += bssrdf->texture_blur*avg_weight;
775                         weight_sum += avg_weight;
776                 }
777         }
778
779         if(N_)
780                 *N_ = (is_zero(N))? ccl_fetch(sd, N): normalize(N);
781
782         if(texture_blur_)
783                 *texture_blur_ = texture_blur/weight_sum;
784         
785         return eval;
786 }
787 #endif
788
789 /* Emission */
790
791 ccl_device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure *sc)
792 {
793         return emissive_simple_eval(ccl_fetch(sd, Ng), ccl_fetch(sd, I));
794 }
795
796 ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd)
797 {
798         float3 eval;
799         eval = make_float3(0.0f, 0.0f, 0.0f);
800
801         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
802                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
803
804                 if(CLOSURE_IS_EMISSION(sc->type))
805                         eval += emissive_eval(kg, sd, sc)*sc->weight;
806         }
807
808         return eval;
809 }
810
811 /* Holdout */
812
813 ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
814 {
815         float3 weight = make_float3(0.0f, 0.0f, 0.0f);
816
817         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
818                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
819
820                 if(CLOSURE_IS_HOLDOUT(sc->type))
821                         weight += sc->weight;
822         }
823
824         return weight;
825 }
826
827 /* Surface Evaluation */
828
829 ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_space RNG *rng,
830         ccl_addr_space PathState *state, float randb, int path_flag, ShaderContext ctx)
831 {
832         ccl_fetch(sd, num_closure) = 0;
833         ccl_fetch(sd, num_closure_extra) = 0;
834         ccl_fetch(sd, randb_closure) = randb;
835
836 #ifdef __OSL__
837         if(kg->osl)
838                 OSLShader::eval_surface(kg, sd, state, path_flag, ctx);
839         else
840 #endif
841         {
842 #ifdef __SVM__
843                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
844 #else
845                 ccl_fetch_array(sd, closure, 0)->weight = make_float3(0.8f, 0.8f, 0.8f);
846                 ccl_fetch_array(sd, closure, 0)->N = ccl_fetch(sd, N);
847                 ccl_fetch_array(sd, closure, 0)->data0 = 0.0f;
848                 ccl_fetch_array(sd, closure, 0)->data1 = 0.0f;
849                 ccl_fetch(sd, flag) |= bsdf_diffuse_setup(ccl_fetch_array(sd, closure, 0));
850 #endif
851         }
852
853         if(rng && (ccl_fetch(sd, flag) & SD_BSDF_NEEDS_LCG)) {
854                 ccl_fetch(sd, lcg_state) = lcg_state_init_addrspace(rng, state, 0xb4bc3953);
855         }
856 }
857
858 /* Background Evaluation */
859
860 ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd,
861         ccl_addr_space PathState *state, int path_flag, ShaderContext ctx)
862 {
863         ccl_fetch(sd, num_closure) = 0;
864         ccl_fetch(sd, num_closure_extra) = 0;
865         ccl_fetch(sd, randb_closure) = 0.0f;
866
867 #ifdef __SVM__
868 #ifdef __OSL__
869         if(kg->osl) {
870                 OSLShader::eval_background(kg, sd, state, path_flag, ctx);
871         }
872         else
873 #endif
874         {
875                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
876         }
877
878         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
879
880         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
881                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
882
883                 if(CLOSURE_IS_BACKGROUND(sc->type))
884                         eval += sc->weight;
885         }
886
887         return eval;
888 #else
889         return make_float3(0.8f, 0.8f, 0.8f);
890 #endif
891 }
892
893 /* Volume */
894
895 #ifdef __VOLUME__
896
897 ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, const float3 omega_in, float *pdf,
898         int skip_phase, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
899 {
900         for(int i = 0; i < sd->num_closure; i++) {
901                 if(i == skip_phase)
902                         continue;
903
904                 const ShaderClosure *sc = &sd->closure[i];
905
906                 if(CLOSURE_IS_PHASE(sc->type)) {
907                         float phase_pdf = 0.0f;
908                         float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf);
909
910                         if(phase_pdf != 0.0f) {
911                                 bsdf_eval_accum(result_eval, sc->type, eval);
912                                 sum_pdf += phase_pdf*sc->sample_weight;
913                         }
914
915                         sum_sample_weight += sc->sample_weight;
916                 }
917         }
918
919         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
920 }
921
922 ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd,
923         const float3 omega_in, BsdfEval *eval, float *pdf)
924 {
925         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
926
927         _shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
928 }
929
930 ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *sd,
931         float randu, float randv, BsdfEval *phase_eval,
932         float3 *omega_in, differential3 *domega_in, float *pdf)
933 {
934         int sampled = 0;
935
936         if(sd->num_closure > 1) {
937                 /* pick a phase closure based on sample weights */
938                 float sum = 0.0f;
939
940                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
941                         const ShaderClosure *sc = &sd->closure[sampled];
942                         
943                         if(CLOSURE_IS_PHASE(sc->type))
944                                 sum += sc->sample_weight;
945                 }
946
947                 float r = sd->randb_closure*sum;
948                 sum = 0.0f;
949
950                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
951                         const ShaderClosure *sc = &sd->closure[sampled];
952                         
953                         if(CLOSURE_IS_PHASE(sc->type)) {
954                                 sum += sc->sample_weight;
955
956                                 if(r <= sum)
957                                         break;
958                         }
959                 }
960
961                 if(sampled == sd->num_closure) {
962                         *pdf = 0.0f;
963                         return LABEL_NONE;
964                 }
965         }
966
967         /* todo: this isn't quite correct, we don't weight anisotropy properly
968          * depending on color channels, even if this is perhaps not a common case */
969         const ShaderClosure *sc = &sd->closure[sampled];
970         int label;
971         float3 eval;
972
973         *pdf = 0.0f;
974         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
975
976         if(*pdf != 0.0f) {
977                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
978         }
979
980         return label;
981 }
982
983 ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *sd,
984         const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval,
985         float3 *omega_in, differential3 *domega_in, float *pdf)
986 {
987         int label;
988         float3 eval;
989
990         *pdf = 0.0f;
991         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
992
993         if(*pdf != 0.0f)
994                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
995
996         return label;
997 }
998
999 /* Volume Evaluation */
1000
1001 ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
1002                                           ShaderData *sd,
1003                                           PathState *state,
1004                                           VolumeStack *stack,
1005                                           int path_flag,
1006                                           ShaderContext ctx)
1007 {
1008         /* reset closures once at the start, we will be accumulating the closures
1009          * for all volumes in the stack into a single array of closures */
1010         sd->num_closure = 0;
1011         sd->num_closure_extra = 0;
1012         sd->flag = 0;
1013
1014         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1015                 /* setup shaderdata from stack. it's mostly setup already in
1016                  * shader_setup_from_volume, this switching should be quick */
1017                 sd->object = stack[i].object;
1018                 sd->shader = stack[i].shader;
1019
1020                 sd->flag &= ~(SD_SHADER_FLAGS|SD_OBJECT_FLAGS);
1021                 sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
1022
1023                 if(sd->object != OBJECT_NONE) {
1024                         sd->flag |= kernel_tex_fetch(__object_flag, sd->object);
1025
1026 #ifdef __OBJECT_MOTION__
1027                         /* todo: this is inefficient for motion blur, we should be
1028                          * caching matrices instead of recomputing them each step */
1029                         shader_setup_object_transforms(kg, sd, sd->time);
1030 #endif
1031                 }
1032
1033                 /* evaluate shader */
1034 #ifdef __SVM__
1035 #  ifdef __OSL__
1036                 if(kg->osl) {
1037                         OSLShader::eval_volume(kg, sd, state, path_flag, ctx);
1038                 }
1039                 else
1040 #  endif
1041                 {
1042                         svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag);
1043                 }
1044 #endif
1045
1046                 /* merge closures to avoid exceeding number of closures limit */
1047                 if(i > 0)
1048                         shader_merge_closures(sd);
1049         }
1050 }
1051
1052 #endif
1053
1054 /* Displacement Evaluation */
1055
1056 ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, ShaderContext ctx)
1057 {
1058         ccl_fetch(sd, num_closure) = 0;
1059         ccl_fetch(sd, num_closure_extra) = 0;
1060         ccl_fetch(sd, randb_closure) = 0.0f;
1061
1062         /* this will modify sd->P */
1063 #ifdef __SVM__
1064 #  ifdef __OSL__
1065         if(kg->osl)
1066                 OSLShader::eval_displacement(kg, sd, ctx);
1067         else
1068 #  endif
1069         {
1070                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0);
1071         }
1072 #endif
1073 }
1074
1075 /* Transparent Shadows */
1076
1077 #ifdef __TRANSPARENT_SHADOWS__
1078 ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect)
1079 {
1080         int prim = kernel_tex_fetch(__prim_index, isect->prim);
1081         int shader = 0;
1082
1083 #ifdef __HAIR__
1084         if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
1085 #endif
1086                 shader = kernel_tex_fetch(__tri_shader, prim);
1087 #ifdef __HAIR__
1088         }
1089         else {
1090                 float4 str = kernel_tex_fetch(__curves, prim);
1091                 shader = __float_as_int(str.z);
1092         }
1093 #endif
1094         int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2);
1095
1096         return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
1097 }
1098 #endif
1099
1100 CCL_NAMESPACE_END
1101