b7641c37d935d443f25528a4d2dfb430e8fa3d24
[blender.git] / intern / cycles / kernel / kernel_shader.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * ShaderData, used in four steps:
19  *
20  * Setup from incoming ray, sampled position and background.
21  * Execute for surface, volume or displacement.
22  * Evaluate one or more closures.
23  * Release.
24  *
25  */
26
27 #include "closure/alloc.h"
28 #include "closure/bsdf_util.h"
29 #include "closure/bsdf.h"
30 #include "closure/emissive.h"
31
32 #include "svm/svm.h"
33
34 CCL_NAMESPACE_BEGIN
35
36 /* ShaderData setup from incoming ray */
37
38 #ifdef __OBJECT_MOTION__
39 ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
40 {
41         if(ccl_fetch(sd, flag) & SD_OBJECT_MOTION) {
42                 ccl_fetch(sd, ob_tfm) = object_fetch_transform_motion(kg, ccl_fetch(sd, object), time);
43                 ccl_fetch(sd, ob_itfm) = transform_quick_inverse(ccl_fetch(sd, ob_tfm));
44         }
45         else {
46                 ccl_fetch(sd, ob_tfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
47                 ccl_fetch(sd, ob_itfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
48         }
49 }
50 #endif
51
52 ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
53                                                ShaderData *sd,
54                                                const Intersection *isect,
55                                                const Ray *ray)
56 {
57 #ifdef __INSTANCING__
58         ccl_fetch(sd, object) = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
59 #endif
60
61         ccl_fetch(sd, type) = isect->type;
62         ccl_fetch(sd, flag) = kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));
63
64         /* matrices and time */
65 #ifdef __OBJECT_MOTION__
66         shader_setup_object_transforms(kg, sd, ray->time);
67         ccl_fetch(sd, time) = ray->time;
68 #endif
69
70         ccl_fetch(sd, prim) = kernel_tex_fetch(__prim_index, isect->prim);
71         ccl_fetch(sd, ray_length) = isect->t;
72
73 #ifdef __UV__
74         ccl_fetch(sd, u) = isect->u;
75         ccl_fetch(sd, v) = isect->v;
76 #endif
77
78 #ifdef __HAIR__
79         if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
80                 /* curve */
81                 float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
82
83                 ccl_fetch(sd, shader) = __float_as_int(curvedata.z);
84                 ccl_fetch(sd, P) = bvh_curve_refine(kg, sd, isect, ray);
85         }
86         else
87 #endif
88         if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) {
89                 /* static triangle */
90                 float3 Ng = triangle_normal(kg, sd);
91                 ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim));
92
93                 /* vectors */
94                 ccl_fetch(sd, P) = triangle_refine(kg, sd, isect, ray);
95                 ccl_fetch(sd, Ng) = Ng;
96                 ccl_fetch(sd, N) = Ng;
97                 
98                 /* smooth normal */
99                 if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL)
100                         ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));
101
102 #ifdef __DPDU__
103                 /* dPdu/dPdv */
104                 triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
105 #endif
106         }
107         else {
108                 /* motion triangle */
109                 motion_triangle_shader_setup(kg, sd, isect, ray, false);
110         }
111
112         ccl_fetch(sd, I) = -ray->D;
113
114         ccl_fetch(sd, flag) |= kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
115
116 #ifdef __INSTANCING__
117         if(isect->object != OBJECT_NONE) {
118                 /* instance transform */
119                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
120                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, Ng));
121 #  ifdef __DPDU__
122                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
123                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
124 #  endif
125         }
126 #endif
127
128         /* backfacing test */
129         bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f);
130
131         if(backfacing) {
132                 ccl_fetch(sd, flag) |= SD_BACKFACING;
133                 ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng);
134                 ccl_fetch(sd, N) = -ccl_fetch(sd, N);
135 #ifdef __DPDU__
136                 ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu);
137                 ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv);
138 #endif
139         }
140
141 #ifdef __RAY_DIFFERENTIALS__
142         /* differentials */
143         differential_transfer(&ccl_fetch(sd, dP), ray->dP, ray->D, ray->dD, ccl_fetch(sd, Ng), isect->t);
144         differential_incoming(&ccl_fetch(sd, dI), ray->dD);
145         differential_dudv(&ccl_fetch(sd, du), &ccl_fetch(sd, dv), ccl_fetch(sd, dPdu), ccl_fetch(sd, dPdv), ccl_fetch(sd, dP), ccl_fetch(sd, Ng));
146 #endif
147 }
148
149 /* ShaderData setup from BSSRDF scatter */
150
151 #ifdef __SUBSURFACE__
152 ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData *sd,
153         const Intersection *isect, const Ray *ray)
154 {
155         bool backfacing = sd->flag & SD_BACKFACING;
156
157         /* object, matrices, time, ray_length stay the same */
158         sd->flag = kernel_tex_fetch(__object_flag, sd->object);
159         sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
160         sd->type = isect->type;
161
162 #  ifdef __UV__
163         sd->u = isect->u;
164         sd->v = isect->v;
165 #  endif
166
167         /* fetch triangle data */
168         if(sd->type == PRIMITIVE_TRIANGLE) {
169                 float3 Ng = triangle_normal(kg, sd);
170                 sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
171
172                 /* static triangle */
173                 sd->P = triangle_refine_subsurface(kg, sd, isect, ray);
174                 sd->Ng = Ng;
175                 sd->N = Ng;
176
177                 if(sd->shader & SHADER_SMOOTH_NORMAL)
178                         sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
179
180 #  ifdef __DPDU__
181                 /* dPdu/dPdv */
182                 triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
183 #  endif
184         }
185         else {
186                 /* motion triangle */
187                 motion_triangle_shader_setup(kg, sd, isect, ray, true);
188         }
189
190         sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
191
192 #  ifdef __INSTANCING__
193         if(isect->object != OBJECT_NONE) {
194                 /* instance transform */
195                 object_normal_transform(kg, sd, &sd->N);
196                 object_normal_transform(kg, sd, &sd->Ng);
197 #    ifdef __DPDU__
198                 object_dir_transform(kg, sd, &sd->dPdu);
199                 object_dir_transform(kg, sd, &sd->dPdv);
200 #    endif
201         }
202 #  endif
203
204         /* backfacing test */
205         if(backfacing) {
206                 sd->flag |= SD_BACKFACING;
207                 sd->Ng = -sd->Ng;
208                 sd->N = -sd->N;
209 #  ifdef __DPDU__
210                 sd->dPdu = -sd->dPdu;
211                 sd->dPdv = -sd->dPdv;
212 #  endif
213         }
214
215         /* should not get used in principle as the shading will only use a diffuse
216          * BSDF, but the shader might still access it */
217         sd->I = sd->N;
218
219 #  ifdef __RAY_DIFFERENTIALS__
220         /* differentials */
221         differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
222         /* don't modify dP and dI */
223 #  endif
224 }
225 #endif
226
227 /* ShaderData setup from position sampled on mesh */
228
229 ccl_device void shader_setup_from_sample(KernelGlobals *kg,
230                                          ShaderData *sd,
231                                          const float3 P,
232                                          const float3 Ng,
233                                          const float3 I,
234                                          int shader, int object, int prim,
235                                          float u, float v, float t,
236                                          float time)
237 {
238         /* vectors */
239         ccl_fetch(sd, P) = P;
240         ccl_fetch(sd, N) = Ng;
241         ccl_fetch(sd, Ng) = Ng;
242         ccl_fetch(sd, I) = I;
243         ccl_fetch(sd, shader) = shader;
244         ccl_fetch(sd, type) = (prim == PRIM_NONE)? PRIMITIVE_NONE: PRIMITIVE_TRIANGLE;
245
246         /* primitive */
247 #ifdef __INSTANCING__
248         ccl_fetch(sd, object) = object;
249 #endif
250         /* currently no access to bvh prim index for strand sd->prim*/
251         ccl_fetch(sd, prim) = prim;
252 #ifdef __UV__
253         ccl_fetch(sd, u) = u;
254         ccl_fetch(sd, v) = v;
255 #endif
256         ccl_fetch(sd, ray_length) = t;
257
258         /* detect instancing, for non-instanced the object index is -object-1 */
259 #ifdef __INSTANCING__
260         bool instanced = false;
261
262         if(ccl_fetch(sd, prim) != PRIM_NONE) {
263                 if(ccl_fetch(sd, object) >= 0)
264                         instanced = true;
265                 else
266 #endif
267                         ccl_fetch(sd, object) = ~ccl_fetch(sd, object);
268 #ifdef __INSTANCING__
269         }
270 #endif
271
272         ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
273         if(ccl_fetch(sd, object) != OBJECT_NONE) {
274                 ccl_fetch(sd, flag) |= kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));
275
276 #ifdef __OBJECT_MOTION__
277                 shader_setup_object_transforms(kg, sd, time);
278         }
279
280         ccl_fetch(sd, time) = time;
281 #else
282         }
283 #endif
284
285         if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) {
286                 /* smooth normal */
287                 if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
288                         ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));
289
290 #ifdef __INSTANCING__
291                         if(instanced)
292                                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
293 #endif
294                 }
295
296                 /* dPdu/dPdv */
297 #ifdef __DPDU__
298                 triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
299
300 #  ifdef __INSTANCING__
301                 if(instanced) {
302                         object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
303                         object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
304                 }
305 #  endif
306 #endif
307         }
308         else {
309 #ifdef __DPDU__
310                 ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f);
311                 ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f);
312 #endif
313         }
314
315         /* backfacing test */
316         if(ccl_fetch(sd, prim) != PRIM_NONE) {
317                 bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f);
318
319                 if(backfacing) {
320                         ccl_fetch(sd, flag) |= SD_BACKFACING;
321                         ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng);
322                         ccl_fetch(sd, N) = -ccl_fetch(sd, N);
323 #ifdef __DPDU__
324                         ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu);
325                         ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv);
326 #endif
327                 }
328         }
329
330 #ifdef __RAY_DIFFERENTIALS__
331         /* no ray differentials here yet */
332         ccl_fetch(sd, dP) = differential3_zero();
333         ccl_fetch(sd, dI) = differential3_zero();
334         ccl_fetch(sd, du) = differential_zero();
335         ccl_fetch(sd, dv) = differential_zero();
336 #endif
337 }
338
339 /* ShaderData setup for displacement */
340
341 ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
342         int object, int prim, float u, float v)
343 {
344         float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
345         int shader;
346
347         triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
348
349         /* force smooth shading for displacement */
350         shader |= SHADER_SMOOTH_NORMAL;
351
352         /* watch out: no instance transform currently */
353
354         shader_setup_from_sample(kg, sd, P, Ng, I, shader, object, prim, u, v, 0.0f, TIME_INVALID);
355 }
356
357 /* ShaderData setup from ray into background */
358
359 ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
360 {
361         /* vectors */
362         ccl_fetch(sd, P) = ray->D;
363         ccl_fetch(sd, N) = -ray->D;
364         ccl_fetch(sd, Ng) = -ray->D;
365         ccl_fetch(sd, I) = -ray->D;
366         ccl_fetch(sd, shader) = kernel_data.background.surface_shader;
367         ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
368 #ifdef __OBJECT_MOTION__
369         ccl_fetch(sd, time) = ray->time;
370 #endif
371         ccl_fetch(sd, ray_length) = 0.0f;
372
373 #ifdef __INSTANCING__
374         ccl_fetch(sd, object) = PRIM_NONE;
375 #endif
376         ccl_fetch(sd, prim) = PRIM_NONE;
377 #ifdef __UV__
378         ccl_fetch(sd, u) = 0.0f;
379         ccl_fetch(sd, v) = 0.0f;
380 #endif
381
382 #ifdef __DPDU__
383         /* dPdu/dPdv */
384         ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f);
385         ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f);
386 #endif
387
388 #ifdef __RAY_DIFFERENTIALS__
389         /* differentials */
390         ccl_fetch(sd, dP) = ray->dD;
391         differential_incoming(&ccl_fetch(sd, dI), ccl_fetch(sd, dP));
392         ccl_fetch(sd, du) = differential_zero();
393         ccl_fetch(sd, dv) = differential_zero();
394 #endif
395 }
396
397 /* ShaderData setup from point inside volume */
398
399 #ifdef __VOLUME__
400 ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
401 {
402         /* vectors */
403         sd->P = ray->P;
404         sd->N = -ray->D;  
405         sd->Ng = -ray->D;
406         sd->I = -ray->D;
407         sd->shader = SHADER_NONE;
408         sd->flag = 0;
409 #ifdef __OBJECT_MOTION__
410         sd->time = ray->time;
411 #endif
412         sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
413
414 #ifdef __INSTANCING__
415         sd->object = PRIM_NONE; /* todo: fill this for texture coordinates */
416 #endif
417         sd->prim = PRIM_NONE;
418         sd->type = PRIMITIVE_NONE;
419
420 #ifdef __UV__
421         sd->u = 0.0f;
422         sd->v = 0.0f;
423 #endif
424
425 #ifdef __DPDU__
426         /* dPdu/dPdv */
427         sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
428         sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
429 #endif
430
431 #ifdef __RAY_DIFFERENTIALS__
432         /* differentials */
433         sd->dP = ray->dD;
434         differential_incoming(&sd->dI, sd->dP);
435         sd->du = differential_zero();
436         sd->dv = differential_zero();
437 #endif
438
439         /* for NDC coordinates */
440         sd->ray_P = ray->P;
441         sd->ray_dP = ray->dP;
442 }
443 #endif
444
445 /* Merging */
446
447 #if defined(__BRANCHED_PATH__) || defined(__VOLUME__)
448 ccl_device void shader_merge_closures(ShaderData *sd)
449 {
450         /* merge identical closures, better when we sample a single closure at a time */
451         for(int i = 0; i < sd->num_closure; i++) {
452                 ShaderClosure *sci = &sd->closure[i];
453
454                 for(int j = i + 1; j < sd->num_closure; j++) {
455                         ShaderClosure *scj = &sd->closure[j];
456
457                         if(sci->type != scj->type)
458                                 continue;
459                         if(!bsdf_merge(sci, scj))
460                                 continue;
461
462                         sci->weight += scj->weight;
463                         sci->sample_weight += scj->sample_weight;
464
465                         int size = sd->num_closure - (j+1);
466                         if(size > 0) {
467                                 for(int k = 0; k < size; k++) {
468                                         scj[k] = scj[k+1];
469                                 }
470                         }
471
472                         sd->num_closure--;
473                         kernel_assert(sd->num_closure >= 0);
474                         j--;
475                 }
476         }
477 }
478 #endif
479
480 /* BSDF */
481
482 ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, ShaderData *sd, const float3 omega_in, float *pdf,
483         int skip_bsdf, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
484 {
485         /* this is the veach one-sample model with balance heuristic, some pdf
486          * factors drop out when using balance heuristic weighting */
487         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
488                 if(i == skip_bsdf)
489                         continue;
490
491                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
492
493                 if(CLOSURE_IS_BSDF(sc->type)) {
494                         float bsdf_pdf = 0.0f;
495                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
496
497                         if(bsdf_pdf != 0.0f) {
498                                 bsdf_eval_accum(result_eval, sc->type, eval*sc->weight);
499                                 sum_pdf += bsdf_pdf*sc->sample_weight;
500                         }
501
502                         sum_sample_weight += sc->sample_weight;
503                 }
504         }
505
506         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
507 }
508
509 #ifdef __BRANCHED_PATH__
510 ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
511                                                         ShaderData *sd,
512                                                         const float3 omega_in,
513                                                         BsdfEval *result_eval,
514                                                         float light_pdf,
515                                                         bool use_mis)
516 {
517         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
518                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
519                 if(CLOSURE_IS_BSDF(sc->type)) {
520                         float bsdf_pdf = 0.0f;
521                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
522                         if(bsdf_pdf != 0.0f) {
523                                 float mis_weight = use_mis? power_heuristic(light_pdf, bsdf_pdf): 1.0f;
524                                 bsdf_eval_accum(result_eval,
525                                                 sc->type,
526                                                 eval * sc->weight * mis_weight);
527                         }
528                 }
529         }
530 }
531 #endif
532
533 ccl_device void shader_bsdf_eval(KernelGlobals *kg,
534                                  ShaderData *sd,
535                                  const float3 omega_in,
536                                  BsdfEval *eval,
537                                  float light_pdf,
538                                  bool use_mis)
539 {
540         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
541
542 #ifdef __BRANCHED_PATH__
543         if(kernel_data.integrator.branched)
544                 _shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis);
545         else
546 #endif
547         {
548                 float pdf;
549                 _shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, -1, eval, 0.0f, 0.0f);
550                 if(use_mis) {
551                         float weight = power_heuristic(light_pdf, pdf);
552                         bsdf_eval_mul(eval, make_float3(weight, weight, weight));
553                 }
554         }
555 }
556
557 ccl_device int shader_bsdf_sample(KernelGlobals *kg, ShaderData *sd,
558         float randu, float randv, BsdfEval *bsdf_eval,
559         float3 *omega_in, differential3 *domega_in, float *pdf)
560 {
561         int sampled = 0;
562
563         if(ccl_fetch(sd, num_closure) > 1) {
564                 /* pick a BSDF closure based on sample weights */
565                 float sum = 0.0f;
566
567                 for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) {
568                         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
569                         
570                         if(CLOSURE_IS_BSDF(sc->type))
571                                 sum += sc->sample_weight;
572                 }
573
574                 float r = ccl_fetch(sd, randb_closure)*sum;
575                 sum = 0.0f;
576
577                 for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) {
578                         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
579                         
580                         if(CLOSURE_IS_BSDF(sc->type)) {
581                                 sum += sc->sample_weight;
582
583                                 if(r <= sum)
584                                         break;
585                         }
586                 }
587
588                 if(sampled == ccl_fetch(sd, num_closure)) {
589                         *pdf = 0.0f;
590                         return LABEL_NONE;
591                 }
592         }
593
594         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
595
596         int label;
597         float3 eval;
598
599         *pdf = 0.0f;
600         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
601
602         if(*pdf != 0.0f) {
603                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
604
605                 if(ccl_fetch(sd, num_closure) > 1) {
606                         float sweight = sc->sample_weight;
607                         _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sampled, bsdf_eval, *pdf*sweight, sweight);
608                 }
609         }
610
611         return label;
612 }
613
614 ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd,
615         const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval,
616         float3 *omega_in, differential3 *domega_in, float *pdf)
617 {
618         int label;
619         float3 eval;
620
621         *pdf = 0.0f;
622         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
623
624         if(*pdf != 0.0f)
625                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
626
627         return label;
628 }
629
630 ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness)
631 {
632         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
633                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
634
635                 if(CLOSURE_IS_BSDF(sc->type))
636                         bsdf_blur(kg, sc, roughness);
637         }
638 }
639
640 ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd)
641 {
642         if(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)
643                 return make_float3(1.0f, 1.0f, 1.0f);
644
645         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
646
647         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
648                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
649
650                 if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) // todo: make this work for osl
651                         eval += sc->weight;
652         }
653
654         return eval;
655 }
656
657 ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd)
658 {
659         float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd);
660
661         alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f));
662         alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f));
663         
664         return alpha;
665 }
666
667 ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
668 {
669         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
670
671         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
672                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
673
674                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
675                         eval += sc->weight;
676         }
677
678         return eval;
679 }
680
681 ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd)
682 {
683         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
684
685         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
686                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
687
688                 if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
689                         eval += sc->weight;
690         }
691
692         return eval;
693 }
694
695 ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
696 {
697         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
698
699         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
700                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
701
702                 if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
703                         eval += sc->weight;
704         }
705
706         return eval;
707 }
708
709 ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
710 {
711         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
712
713         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
714                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
715
716                 if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
717                         eval += sc->weight;
718         }
719
720         return eval;
721 }
722
723 ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_)
724 {
725         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
726         float3 N = make_float3(0.0f, 0.0f, 0.0f);
727
728         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
729                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
730
731                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
732                         const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
733                         eval += sc->weight*ao_factor;
734                         N += bsdf->N*average(sc->weight);
735                 }
736                 else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) {
737                         eval += sc->weight;
738                         N += ccl_fetch(sd, N)*average(sc->weight);
739                 }
740         }
741
742         if(is_zero(N))
743                 N = ccl_fetch(sd, N);
744         else
745                 N = normalize(N);
746
747         *N_ = N;
748         return eval;
749 }
750
751 #ifdef __SUBSURFACE__
752 ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_)
753 {
754         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
755         float3 N = make_float3(0.0f, 0.0f, 0.0f);
756         float texture_blur = 0.0f, weight_sum = 0.0f;
757
758         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
759                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
760
761                 if(CLOSURE_IS_BSSRDF(sc->type)) {
762                         const Bssrdf *bssrdf = (const Bssrdf*)sc;
763                         float avg_weight = fabsf(average(sc->weight));
764
765                         N += bssrdf->N*avg_weight;
766                         eval += sc->weight;
767                         texture_blur += bssrdf->texture_blur*avg_weight;
768                         weight_sum += avg_weight;
769                 }
770         }
771
772         if(N_)
773                 *N_ = (is_zero(N))? ccl_fetch(sd, N): normalize(N);
774
775         if(texture_blur_)
776                 *texture_blur_ = texture_blur/weight_sum;
777         
778         return eval;
779 }
780 #endif
781
782 /* Emission */
783
784 ccl_device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure *sc)
785 {
786         return emissive_simple_eval(ccl_fetch(sd, Ng), ccl_fetch(sd, I));
787 }
788
789 ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd)
790 {
791         float3 eval;
792         eval = make_float3(0.0f, 0.0f, 0.0f);
793
794         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
795                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
796
797                 if(CLOSURE_IS_EMISSION(sc->type))
798                         eval += emissive_eval(kg, sd, sc)*sc->weight;
799         }
800
801         return eval;
802 }
803
804 /* Holdout */
805
806 ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
807 {
808         float3 weight = make_float3(0.0f, 0.0f, 0.0f);
809
810         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
811                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
812
813                 if(CLOSURE_IS_HOLDOUT(sc->type))
814                         weight += sc->weight;
815         }
816
817         return weight;
818 }
819
820 /* Surface Evaluation */
821
822 ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_space RNG *rng,
823         ccl_addr_space PathState *state, float randb, int path_flag, ShaderContext ctx)
824 {
825         ccl_fetch(sd, num_closure) = 0;
826         ccl_fetch(sd, num_closure_extra) = 0;
827         ccl_fetch(sd, randb_closure) = randb;
828
829 #ifdef __OSL__
830         if(kg->osl)
831                 OSLShader::eval_surface(kg, sd, state, path_flag, ctx);
832         else
833 #endif
834         {
835 #ifdef __SVM__
836                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
837 #else
838                 ccl_fetch_array(sd, closure, 0)->weight = make_float3(0.8f, 0.8f, 0.8f);
839                 ccl_fetch_array(sd, closure, 0)->N = ccl_fetch(sd, N);
840                 ccl_fetch_array(sd, closure, 0)->data0 = 0.0f;
841                 ccl_fetch_array(sd, closure, 0)->data1 = 0.0f;
842                 ccl_fetch(sd, flag) |= bsdf_diffuse_setup(ccl_fetch_array(sd, closure, 0));
843 #endif
844         }
845
846         if(rng && (ccl_fetch(sd, flag) & SD_BSDF_NEEDS_LCG)) {
847                 ccl_fetch(sd, lcg_state) = lcg_state_init_addrspace(rng, state, 0xb4bc3953);
848         }
849 }
850
851 /* Background Evaluation */
852
853 ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd,
854         ccl_addr_space PathState *state, int path_flag, ShaderContext ctx)
855 {
856         ccl_fetch(sd, num_closure) = 0;
857         ccl_fetch(sd, num_closure_extra) = 0;
858         ccl_fetch(sd, randb_closure) = 0.0f;
859
860 #ifdef __SVM__
861 #ifdef __OSL__
862         if(kg->osl) {
863                 OSLShader::eval_background(kg, sd, state, path_flag, ctx);
864         }
865         else
866 #endif
867         {
868                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
869         }
870
871         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
872
873         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
874                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
875
876                 if(CLOSURE_IS_BACKGROUND(sc->type))
877                         eval += sc->weight;
878         }
879
880         return eval;
881 #else
882         return make_float3(0.8f, 0.8f, 0.8f);
883 #endif
884 }
885
886 /* Volume */
887
888 #ifdef __VOLUME__
889
890 ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, const float3 omega_in, float *pdf,
891         int skip_phase, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
892 {
893         for(int i = 0; i < sd->num_closure; i++) {
894                 if(i == skip_phase)
895                         continue;
896
897                 const ShaderClosure *sc = &sd->closure[i];
898
899                 if(CLOSURE_IS_PHASE(sc->type)) {
900                         float phase_pdf = 0.0f;
901                         float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf);
902
903                         if(phase_pdf != 0.0f) {
904                                 bsdf_eval_accum(result_eval, sc->type, eval);
905                                 sum_pdf += phase_pdf*sc->sample_weight;
906                         }
907
908                         sum_sample_weight += sc->sample_weight;
909                 }
910         }
911
912         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
913 }
914
915 ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd,
916         const float3 omega_in, BsdfEval *eval, float *pdf)
917 {
918         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
919
920         _shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
921 }
922
923 ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *sd,
924         float randu, float randv, BsdfEval *phase_eval,
925         float3 *omega_in, differential3 *domega_in, float *pdf)
926 {
927         int sampled = 0;
928
929         if(sd->num_closure > 1) {
930                 /* pick a phase closure based on sample weights */
931                 float sum = 0.0f;
932
933                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
934                         const ShaderClosure *sc = &sd->closure[sampled];
935                         
936                         if(CLOSURE_IS_PHASE(sc->type))
937                                 sum += sc->sample_weight;
938                 }
939
940                 float r = sd->randb_closure*sum;
941                 sum = 0.0f;
942
943                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
944                         const ShaderClosure *sc = &sd->closure[sampled];
945                         
946                         if(CLOSURE_IS_PHASE(sc->type)) {
947                                 sum += sc->sample_weight;
948
949                                 if(r <= sum)
950                                         break;
951                         }
952                 }
953
954                 if(sampled == sd->num_closure) {
955                         *pdf = 0.0f;
956                         return LABEL_NONE;
957                 }
958         }
959
960         /* todo: this isn't quite correct, we don't weight anisotropy properly
961          * depending on color channels, even if this is perhaps not a common case */
962         const ShaderClosure *sc = &sd->closure[sampled];
963         int label;
964         float3 eval;
965
966         *pdf = 0.0f;
967         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
968
969         if(*pdf != 0.0f) {
970                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
971         }
972
973         return label;
974 }
975
976 ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *sd,
977         const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval,
978         float3 *omega_in, differential3 *domega_in, float *pdf)
979 {
980         int label;
981         float3 eval;
982
983         *pdf = 0.0f;
984         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
985
986         if(*pdf != 0.0f)
987                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
988
989         return label;
990 }
991
992 /* Volume Evaluation */
993
994 ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd,
995         PathState *state, VolumeStack *stack, int path_flag, ShaderContext ctx)
996 {
997         /* reset closures once at the start, we will be accumulating the closures
998          * for all volumes in the stack into a single array of closures */
999         sd->num_closure = 0;
1000         sd->num_closure_extra = 0;
1001         sd->flag = 0;
1002
1003         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1004                 /* setup shaderdata from stack. it's mostly setup already in
1005                  * shader_setup_from_volume, this switching should be quick */
1006                 sd->object = stack[i].object;
1007                 sd->shader = stack[i].shader;
1008
1009                 sd->flag &= ~(SD_SHADER_FLAGS|SD_OBJECT_FLAGS);
1010                 sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
1011
1012                 if(sd->object != OBJECT_NONE) {
1013                         sd->flag |= kernel_tex_fetch(__object_flag, sd->object);
1014
1015 #ifdef __OBJECT_MOTION__
1016                         /* todo: this is inefficient for motion blur, we should be
1017                          * caching matrices instead of recomputing them each step */
1018                         shader_setup_object_transforms(kg, sd, sd->time);
1019 #endif
1020                 }
1021
1022                 /* evaluate shader */
1023 #ifdef __SVM__
1024 #  ifdef __OSL__
1025                 if(kg->osl) {
1026                         OSLShader::eval_volume(kg, sd, state, path_flag, ctx);
1027                 }
1028                 else
1029 #  endif
1030                 {
1031                         svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag);
1032                 }
1033 #endif
1034
1035                 /* merge closures to avoid exceeding number of closures limit */
1036                 if(i > 0)
1037                         shader_merge_closures(sd);
1038         }
1039 }
1040
1041 #endif
1042
1043 /* Displacement Evaluation */
1044
1045 ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, ShaderContext ctx)
1046 {
1047         ccl_fetch(sd, num_closure) = 0;
1048         ccl_fetch(sd, num_closure_extra) = 0;
1049         ccl_fetch(sd, randb_closure) = 0.0f;
1050
1051         /* this will modify sd->P */
1052 #ifdef __SVM__
1053 #  ifdef __OSL__
1054         if(kg->osl)
1055                 OSLShader::eval_displacement(kg, sd, ctx);
1056         else
1057 #  endif
1058         {
1059                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0);
1060         }
1061 #endif
1062 }
1063
1064 /* Transparent Shadows */
1065
1066 #ifdef __TRANSPARENT_SHADOWS__
1067 ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect)
1068 {
1069         int prim = kernel_tex_fetch(__prim_index, isect->prim);
1070         int shader = 0;
1071
1072 #ifdef __HAIR__
1073         if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
1074 #endif
1075                 shader = kernel_tex_fetch(__tri_shader, prim);
1076 #ifdef __HAIR__
1077         }
1078         else {
1079                 float4 str = kernel_tex_fetch(__curves, prim);
1080                 shader = __float_as_int(str.z);
1081         }
1082 #endif
1083         int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2);
1084
1085         return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
1086 }
1087 #endif
1088
1089 CCL_NAMESPACE_END
1090