Cycles: Make object flag names more obvious that hey are object and not shader
[blender.git] / intern / cycles / kernel / kernel_shader.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * ShaderData, used in four steps:
19  *
20  * Setup from incoming ray, sampled position and background.
21  * Execute for surface, volume or displacement.
22  * Evaluate one or more closures.
23  * Release.
24  *
25  */
26
27 #include "closure/alloc.h"
28 #include "closure/bsdf_util.h"
29 #include "closure/bsdf.h"
30 #include "closure/emissive.h"
31
32 #include "svm/svm.h"
33
34 CCL_NAMESPACE_BEGIN
35
36 /* ShaderData setup from incoming ray */
37
38 #ifdef __OBJECT_MOTION__
39 ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
40 {
41         if(ccl_fetch(sd, flag) & SD_OBJECT_MOTION) {
42                 ccl_fetch(sd, ob_tfm) = object_fetch_transform_motion(kg, ccl_fetch(sd, object), time);
43                 ccl_fetch(sd, ob_itfm) = transform_quick_inverse(ccl_fetch(sd, ob_tfm));
44         }
45         else {
46                 ccl_fetch(sd, ob_tfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
47                 ccl_fetch(sd, ob_itfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
48         }
49 }
50 #endif
51
52 ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
53                                                ShaderData *sd,
54                                                const Intersection *isect,
55                                                const Ray *ray)
56 {
57 #ifdef __INSTANCING__
58         ccl_fetch(sd, object) = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
59 #endif
60
61         ccl_fetch(sd, type) = isect->type;
62         ccl_fetch(sd, flag) = kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));
63
64         /* matrices and time */
65 #ifdef __OBJECT_MOTION__
66         shader_setup_object_transforms(kg, sd, ray->time);
67         ccl_fetch(sd, time) = ray->time;
68 #endif
69
70         ccl_fetch(sd, prim) = kernel_tex_fetch(__prim_index, isect->prim);
71         ccl_fetch(sd, ray_length) = isect->t;
72
73 #ifdef __UV__
74         ccl_fetch(sd, u) = isect->u;
75         ccl_fetch(sd, v) = isect->v;
76 #endif
77
78 #ifdef __HAIR__
79         if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
80                 /* curve */
81                 float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
82
83                 ccl_fetch(sd, shader) = __float_as_int(curvedata.z);
84                 ccl_fetch(sd, P) = bvh_curve_refine(kg, sd, isect, ray);
85         }
86         else
87 #endif
88         if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) {
89                 /* static triangle */
90                 float3 Ng = triangle_normal(kg, sd);
91                 ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim));
92
93                 /* vectors */
94                 ccl_fetch(sd, P) = triangle_refine(kg, sd, isect, ray);
95                 ccl_fetch(sd, Ng) = Ng;
96                 ccl_fetch(sd, N) = Ng;
97                 
98                 /* smooth normal */
99                 if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL)
100                         ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));
101
102 #ifdef __DPDU__
103                 /* dPdu/dPdv */
104                 triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
105 #endif
106         }
107         else {
108                 /* motion triangle */
109                 motion_triangle_shader_setup(kg, sd, isect, ray, false);
110         }
111
112         ccl_fetch(sd, I) = -ray->D;
113
114         ccl_fetch(sd, flag) |= kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
115
116 #ifdef __INSTANCING__
117         if(isect->object != OBJECT_NONE) {
118                 /* instance transform */
119                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
120                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, Ng));
121 #  ifdef __DPDU__
122                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
123                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
124 #  endif
125         }
126 #endif
127
128         /* backfacing test */
129         bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f);
130
131         if(backfacing) {
132                 ccl_fetch(sd, flag) |= SD_BACKFACING;
133                 ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng);
134                 ccl_fetch(sd, N) = -ccl_fetch(sd, N);
135 #ifdef __DPDU__
136                 ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu);
137                 ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv);
138 #endif
139         }
140
141 #ifdef __RAY_DIFFERENTIALS__
142         /* differentials */
143         differential_transfer(&ccl_fetch(sd, dP), ray->dP, ray->D, ray->dD, ccl_fetch(sd, Ng), isect->t);
144         differential_incoming(&ccl_fetch(sd, dI), ray->dD);
145         differential_dudv(&ccl_fetch(sd, du), &ccl_fetch(sd, dv), ccl_fetch(sd, dPdu), ccl_fetch(sd, dPdv), ccl_fetch(sd, dP), ccl_fetch(sd, Ng));
146 #endif
147 }
148
149 /* ShaderData setup from BSSRDF scatter */
150
151 #ifdef __SUBSURFACE__
152 #  ifndef __KERNEL_CUDA__
153 ccl_device
154 #  else
155 ccl_device_inline
156 #  endif
157 void shader_setup_from_subsurface(
158         KernelGlobals *kg,
159         ShaderData *sd,
160         const Intersection *isect,
161         const Ray *ray)
162 {
163         bool backfacing = sd->flag & SD_BACKFACING;
164
165         /* object, matrices, time, ray_length stay the same */
166         sd->flag = kernel_tex_fetch(__object_flag, sd->object);
167         sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
168         sd->type = isect->type;
169
170 #  ifdef __UV__
171         sd->u = isect->u;
172         sd->v = isect->v;
173 #  endif
174
175         /* fetch triangle data */
176         if(sd->type == PRIMITIVE_TRIANGLE) {
177                 float3 Ng = triangle_normal(kg, sd);
178                 sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
179
180                 /* static triangle */
181                 sd->P = triangle_refine_subsurface(kg, sd, isect, ray);
182                 sd->Ng = Ng;
183                 sd->N = Ng;
184
185                 if(sd->shader & SHADER_SMOOTH_NORMAL)
186                         sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
187
188 #  ifdef __DPDU__
189                 /* dPdu/dPdv */
190                 triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
191 #  endif
192         }
193         else {
194                 /* motion triangle */
195                 motion_triangle_shader_setup(kg, sd, isect, ray, true);
196         }
197
198         sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
199
200 #  ifdef __INSTANCING__
201         if(isect->object != OBJECT_NONE) {
202                 /* instance transform */
203                 object_normal_transform(kg, sd, &sd->N);
204                 object_normal_transform(kg, sd, &sd->Ng);
205 #    ifdef __DPDU__
206                 object_dir_transform(kg, sd, &sd->dPdu);
207                 object_dir_transform(kg, sd, &sd->dPdv);
208 #    endif
209         }
210 #  endif
211
212         /* backfacing test */
213         if(backfacing) {
214                 sd->flag |= SD_BACKFACING;
215                 sd->Ng = -sd->Ng;
216                 sd->N = -sd->N;
217 #  ifdef __DPDU__
218                 sd->dPdu = -sd->dPdu;
219                 sd->dPdv = -sd->dPdv;
220 #  endif
221         }
222
223         /* should not get used in principle as the shading will only use a diffuse
224          * BSDF, but the shader might still access it */
225         sd->I = sd->N;
226
227 #  ifdef __RAY_DIFFERENTIALS__
228         /* differentials */
229         differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
230         /* don't modify dP and dI */
231 #  endif
232 }
233 #endif
234
235 /* ShaderData setup from position sampled on mesh */
236
237 ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
238                                                 ShaderData *sd,
239                                                 const float3 P,
240                                                 const float3 Ng,
241                                                 const float3 I,
242                                                 int shader, int object, int prim,
243                                                 float u, float v, float t,
244                                                 float time,
245                                                 bool object_space,
246                                                 int lamp)
247 {
248         /* vectors */
249         ccl_fetch(sd, P) = P;
250         ccl_fetch(sd, N) = Ng;
251         ccl_fetch(sd, Ng) = Ng;
252         ccl_fetch(sd, I) = I;
253         ccl_fetch(sd, shader) = shader;
254         if(prim != PRIM_NONE)
255                 ccl_fetch(sd, type) = PRIMITIVE_TRIANGLE;
256         else if(lamp != LAMP_NONE)
257                 ccl_fetch(sd, type) = PRIMITIVE_LAMP;
258         else
259                 ccl_fetch(sd, type) = PRIMITIVE_NONE;
260
261         /* primitive */
262 #ifdef __INSTANCING__
263         ccl_fetch(sd, object) = object;
264 #endif
265         /* currently no access to bvh prim index for strand sd->prim*/
266         ccl_fetch(sd, prim) = prim;
267 #ifdef __UV__
268         ccl_fetch(sd, u) = u;
269         ccl_fetch(sd, v) = v;
270 #endif
271         ccl_fetch(sd, ray_length) = t;
272
273         ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
274         if(ccl_fetch(sd, object) != OBJECT_NONE) {
275                 ccl_fetch(sd, flag) |= kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));
276
277 #ifdef __OBJECT_MOTION__
278                 shader_setup_object_transforms(kg, sd, time);
279                 ccl_fetch(sd, time) = time;
280         }
281         else if(lamp != LAMP_NONE) {
282                 ccl_fetch(sd, ob_tfm)  = lamp_fetch_transform(kg, lamp, false);
283                 ccl_fetch(sd, ob_itfm) = lamp_fetch_transform(kg, lamp, true);
284 #endif
285         }
286
287         /* transform into world space */
288         if(object_space) {
289                 object_position_transform_auto(kg, sd, &ccl_fetch(sd, P));
290                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, Ng));
291                 ccl_fetch(sd, N) = ccl_fetch(sd, Ng);
292                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, I));
293         }
294
295         if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) {
296                 /* smooth normal */
297                 if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
298                         ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));
299
300 #ifdef __INSTANCING__
301                         if(!(ccl_fetch(sd, flag) & SD_OBJECT_TRANSFORM_APPLIED)) {
302                                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
303                         }
304 #endif
305                 }
306
307                 /* dPdu/dPdv */
308 #ifdef __DPDU__
309                 triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
310
311 #  ifdef __INSTANCING__
312                 if(!(ccl_fetch(sd, flag) & SD_OBJECT_TRANSFORM_APPLIED)) {
313                         object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
314                         object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
315                 }
316 #  endif
317 #endif
318         }
319         else {
320 #ifdef __DPDU__
321                 ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f);
322                 ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f);
323 #endif
324         }
325
326         /* backfacing test */
327         if(ccl_fetch(sd, prim) != PRIM_NONE) {
328                 bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f);
329
330                 if(backfacing) {
331                         ccl_fetch(sd, flag) |= SD_BACKFACING;
332                         ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng);
333                         ccl_fetch(sd, N) = -ccl_fetch(sd, N);
334 #ifdef __DPDU__
335                         ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu);
336                         ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv);
337 #endif
338                 }
339         }
340
341 #ifdef __RAY_DIFFERENTIALS__
342         /* no ray differentials here yet */
343         ccl_fetch(sd, dP) = differential3_zero();
344         ccl_fetch(sd, dI) = differential3_zero();
345         ccl_fetch(sd, du) = differential_zero();
346         ccl_fetch(sd, dv) = differential_zero();
347 #endif
348 }
349
350 /* ShaderData setup for displacement */
351
352 ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
353         int object, int prim, float u, float v)
354 {
355         float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
356         int shader;
357
358         triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
359
360         /* force smooth shading for displacement */
361         shader |= SHADER_SMOOTH_NORMAL;
362
363         shader_setup_from_sample(kg, sd,
364                                  P, Ng, I,
365                                  shader, object, prim,
366                                  u, v, 0.0f, 0.5f,
367                                  !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
368                                  LAMP_NONE);
369 }
370
371 /* ShaderData setup from ray into background */
372
373 ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
374 {
375         /* vectors */
376         ccl_fetch(sd, P) = ray->D;
377         ccl_fetch(sd, N) = -ray->D;
378         ccl_fetch(sd, Ng) = -ray->D;
379         ccl_fetch(sd, I) = -ray->D;
380         ccl_fetch(sd, shader) = kernel_data.background.surface_shader;
381         ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
382 #ifdef __OBJECT_MOTION__
383         ccl_fetch(sd, time) = ray->time;
384 #endif
385         ccl_fetch(sd, ray_length) = 0.0f;
386
387 #ifdef __INSTANCING__
388         ccl_fetch(sd, object) = PRIM_NONE;
389 #endif
390         ccl_fetch(sd, prim) = PRIM_NONE;
391 #ifdef __UV__
392         ccl_fetch(sd, u) = 0.0f;
393         ccl_fetch(sd, v) = 0.0f;
394 #endif
395
396 #ifdef __DPDU__
397         /* dPdu/dPdv */
398         ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f);
399         ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f);
400 #endif
401
402 #ifdef __RAY_DIFFERENTIALS__
403         /* differentials */
404         ccl_fetch(sd, dP) = ray->dD;
405         differential_incoming(&ccl_fetch(sd, dI), ccl_fetch(sd, dP));
406         ccl_fetch(sd, du) = differential_zero();
407         ccl_fetch(sd, dv) = differential_zero();
408 #endif
409 }
410
411 /* ShaderData setup from point inside volume */
412
413 #ifdef __VOLUME__
414 ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
415 {
416         /* vectors */
417         sd->P = ray->P;
418         sd->N = -ray->D;  
419         sd->Ng = -ray->D;
420         sd->I = -ray->D;
421         sd->shader = SHADER_NONE;
422         sd->flag = 0;
423 #ifdef __OBJECT_MOTION__
424         sd->time = ray->time;
425 #endif
426         sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
427
428 #ifdef __INSTANCING__
429         sd->object = PRIM_NONE; /* todo: fill this for texture coordinates */
430 #endif
431         sd->prim = PRIM_NONE;
432         sd->type = PRIMITIVE_NONE;
433
434 #ifdef __UV__
435         sd->u = 0.0f;
436         sd->v = 0.0f;
437 #endif
438
439 #ifdef __DPDU__
440         /* dPdu/dPdv */
441         sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
442         sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
443 #endif
444
445 #ifdef __RAY_DIFFERENTIALS__
446         /* differentials */
447         sd->dP = ray->dD;
448         differential_incoming(&sd->dI, sd->dP);
449         sd->du = differential_zero();
450         sd->dv = differential_zero();
451 #endif
452
453         /* for NDC coordinates */
454         sd->ray_P = ray->P;
455         sd->ray_dP = ray->dP;
456 }
457 #endif
458
459 /* Merging */
460
461 #if defined(__BRANCHED_PATH__) || defined(__VOLUME__)
462 ccl_device_inline void shader_merge_closures(ShaderData *sd)
463 {
464         /* merge identical closures, better when we sample a single closure at a time */
465         for(int i = 0; i < sd->num_closure; i++) {
466                 ShaderClosure *sci = &sd->closure[i];
467
468                 for(int j = i + 1; j < sd->num_closure; j++) {
469                         ShaderClosure *scj = &sd->closure[j];
470
471                         if(sci->type != scj->type)
472                                 continue;
473                         if(!bsdf_merge(sci, scj))
474                                 continue;
475
476                         sci->weight += scj->weight;
477                         sci->sample_weight += scj->sample_weight;
478
479                         int size = sd->num_closure - (j+1);
480                         if(size > 0) {
481                                 for(int k = 0; k < size; k++) {
482                                         scj[k] = scj[k+1];
483                                 }
484                         }
485
486                         sd->num_closure--;
487                         kernel_assert(sd->num_closure >= 0);
488                         j--;
489                 }
490         }
491 }
492 #endif
493
494 /* BSDF */
495
496 ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, ShaderData *sd, const float3 omega_in, float *pdf,
497         int skip_bsdf, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
498 {
499         /* this is the veach one-sample model with balance heuristic, some pdf
500          * factors drop out when using balance heuristic weighting */
501         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
502                 if(i == skip_bsdf)
503                         continue;
504
505                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
506
507                 if(CLOSURE_IS_BSDF(sc->type)) {
508                         float bsdf_pdf = 0.0f;
509                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
510
511                         if(bsdf_pdf != 0.0f) {
512                                 bsdf_eval_accum(result_eval, sc->type, eval*sc->weight);
513                                 sum_pdf += bsdf_pdf*sc->sample_weight;
514                         }
515
516                         sum_sample_weight += sc->sample_weight;
517                 }
518         }
519
520         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
521 }
522
523 #ifdef __BRANCHED_PATH__
524 ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
525                                                         ShaderData *sd,
526                                                         const float3 omega_in,
527                                                         BsdfEval *result_eval,
528                                                         float light_pdf,
529                                                         bool use_mis)
530 {
531         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
532                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
533                 if(CLOSURE_IS_BSDF(sc->type)) {
534                         float bsdf_pdf = 0.0f;
535                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
536                         if(bsdf_pdf != 0.0f) {
537                                 float mis_weight = use_mis? power_heuristic(light_pdf, bsdf_pdf): 1.0f;
538                                 bsdf_eval_accum(result_eval,
539                                                 sc->type,
540                                                 eval * sc->weight * mis_weight);
541                         }
542                 }
543         }
544 }
545 #endif
546
547
548 #ifndef __KERNEL_CUDA__
549 ccl_device
550 #else
551 ccl_device_inline
552 #endif
553 void shader_bsdf_eval(KernelGlobals *kg,
554                       ShaderData *sd,
555                       const float3 omega_in,
556                       BsdfEval *eval,
557                       float light_pdf,
558                       bool use_mis)
559 {
560         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
561
562 #ifdef __BRANCHED_PATH__
563         if(kernel_data.integrator.branched)
564                 _shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis);
565         else
566 #endif
567         {
568                 float pdf;
569                 _shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, -1, eval, 0.0f, 0.0f);
570                 if(use_mis) {
571                         float weight = power_heuristic(light_pdf, pdf);
572                         bsdf_eval_mul(eval, weight);
573                 }
574         }
575 }
576
577 ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
578                                          ShaderData *sd,
579                                          float randu, float randv,
580                                          BsdfEval *bsdf_eval,
581                                          float3 *omega_in,
582                                          differential3 *domega_in,
583                                          float *pdf)
584 {
585         int sampled = 0;
586
587         if(ccl_fetch(sd, num_closure) > 1) {
588                 /* pick a BSDF closure based on sample weights */
589                 float sum = 0.0f;
590
591                 for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) {
592                         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
593                         
594                         if(CLOSURE_IS_BSDF(sc->type))
595                                 sum += sc->sample_weight;
596                 }
597
598                 float r = ccl_fetch(sd, randb_closure)*sum;
599                 sum = 0.0f;
600
601                 for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) {
602                         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
603                         
604                         if(CLOSURE_IS_BSDF(sc->type)) {
605                                 sum += sc->sample_weight;
606
607                                 if(r <= sum)
608                                         break;
609                         }
610                 }
611
612                 if(sampled == ccl_fetch(sd, num_closure)) {
613                         *pdf = 0.0f;
614                         return LABEL_NONE;
615                 }
616         }
617
618         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
619
620         int label;
621         float3 eval;
622
623         *pdf = 0.0f;
624         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
625
626         if(*pdf != 0.0f) {
627                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
628
629                 if(ccl_fetch(sd, num_closure) > 1) {
630                         float sweight = sc->sample_weight;
631                         _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sampled, bsdf_eval, *pdf*sweight, sweight);
632                 }
633         }
634
635         return label;
636 }
637
638 ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd,
639         const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval,
640         float3 *omega_in, differential3 *domega_in, float *pdf)
641 {
642         int label;
643         float3 eval;
644
645         *pdf = 0.0f;
646         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
647
648         if(*pdf != 0.0f)
649                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
650
651         return label;
652 }
653
654 ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness)
655 {
656         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
657                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
658
659                 if(CLOSURE_IS_BSDF(sc->type))
660                         bsdf_blur(kg, sc, roughness);
661         }
662 }
663
664 ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd)
665 {
666         if(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)
667                 return make_float3(1.0f, 1.0f, 1.0f);
668
669         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
670
671         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
672                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
673
674                 if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) // todo: make this work for osl
675                         eval += sc->weight;
676         }
677
678         return eval;
679 }
680
681 ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd)
682 {
683         float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd);
684
685         alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f));
686         alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f));
687         
688         return alpha;
689 }
690
691 ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
692 {
693         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
694
695         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
696                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
697
698                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
699                         eval += sc->weight;
700         }
701
702         return eval;
703 }
704
705 ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd)
706 {
707         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
708
709         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
710                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
711
712                 if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
713                         eval += sc->weight;
714         }
715
716         return eval;
717 }
718
719 ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
720 {
721         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
722
723         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
724                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
725
726                 if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
727                         eval += sc->weight;
728         }
729
730         return eval;
731 }
732
733 ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
734 {
735         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
736
737         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
738                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
739
740                 if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
741                         eval += sc->weight;
742         }
743
744         return eval;
745 }
746
747 ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_)
748 {
749         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
750         float3 N = make_float3(0.0f, 0.0f, 0.0f);
751
752         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
753                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
754
755                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
756                         const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
757                         eval += sc->weight*ao_factor;
758                         N += bsdf->N*average(sc->weight);
759                 }
760                 else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) {
761                         eval += sc->weight;
762                         N += ccl_fetch(sd, N)*average(sc->weight);
763                 }
764         }
765
766         if(is_zero(N))
767                 N = ccl_fetch(sd, N);
768         else
769                 N = normalize(N);
770
771         *N_ = N;
772         return eval;
773 }
774
775 #ifdef __SUBSURFACE__
776 ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_)
777 {
778         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
779         float3 N = make_float3(0.0f, 0.0f, 0.0f);
780         float texture_blur = 0.0f, weight_sum = 0.0f;
781
782         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
783                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
784
785                 if(CLOSURE_IS_BSSRDF(sc->type)) {
786                         const Bssrdf *bssrdf = (const Bssrdf*)sc;
787                         float avg_weight = fabsf(average(sc->weight));
788
789                         N += bssrdf->N*avg_weight;
790                         eval += sc->weight;
791                         texture_blur += bssrdf->texture_blur*avg_weight;
792                         weight_sum += avg_weight;
793                 }
794         }
795
796         if(N_)
797                 *N_ = (is_zero(N))? ccl_fetch(sd, N): normalize(N);
798
799         if(texture_blur_)
800                 *texture_blur_ = texture_blur/weight_sum;
801         
802         return eval;
803 }
804 #endif
805
806 /* Emission */
807
808 ccl_device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure *sc)
809 {
810         return emissive_simple_eval(ccl_fetch(sd, Ng), ccl_fetch(sd, I));
811 }
812
813 ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd)
814 {
815         float3 eval;
816         eval = make_float3(0.0f, 0.0f, 0.0f);
817
818         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
819                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
820
821                 if(CLOSURE_IS_EMISSION(sc->type))
822                         eval += emissive_eval(kg, sd, sc)*sc->weight;
823         }
824
825         return eval;
826 }
827
828 /* Holdout */
829
830 ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
831 {
832         float3 weight = make_float3(0.0f, 0.0f, 0.0f);
833
834         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
835                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
836
837                 if(CLOSURE_IS_HOLDOUT(sc->type))
838                         weight += sc->weight;
839         }
840
841         return weight;
842 }
843
844 /* Surface Evaluation */
845
846 ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_space RNG *rng,
847         ccl_addr_space PathState *state, float randb, int path_flag, ShaderContext ctx)
848 {
849         ccl_fetch(sd, num_closure) = 0;
850         ccl_fetch(sd, num_closure_extra) = 0;
851         ccl_fetch(sd, randb_closure) = randb;
852
853 #ifdef __OSL__
854         if(kg->osl)
855                 OSLShader::eval_surface(kg, sd, state, path_flag, ctx);
856         else
857 #endif
858         {
859 #ifdef __SVM__
860                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
861 #else
862                 DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd,
863                                                              sizeof(DiffuseBsdf),
864                                                              make_float3(0.8f, 0.8f, 0.8f));
865                 bsdf->N = ccl_fetch(sd, N);
866                 ccl_fetch(sd, flag) |= bsdf_diffuse_setup(bsdf);
867 #endif
868         }
869
870         if(rng && (ccl_fetch(sd, flag) & SD_BSDF_NEEDS_LCG)) {
871                 ccl_fetch(sd, lcg_state) = lcg_state_init_addrspace(rng, state, 0xb4bc3953);
872         }
873 }
874
875 /* Background Evaluation */
876
877 ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd,
878         ccl_addr_space PathState *state, int path_flag, ShaderContext ctx)
879 {
880         ccl_fetch(sd, num_closure) = 0;
881         ccl_fetch(sd, num_closure_extra) = 0;
882         ccl_fetch(sd, randb_closure) = 0.0f;
883
884 #ifdef __SVM__
885 #ifdef __OSL__
886         if(kg->osl) {
887                 OSLShader::eval_background(kg, sd, state, path_flag, ctx);
888         }
889         else
890 #endif
891         {
892                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
893         }
894
895         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
896
897         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
898                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
899
900                 if(CLOSURE_IS_BACKGROUND(sc->type))
901                         eval += sc->weight;
902         }
903
904         return eval;
905 #else
906         return make_float3(0.8f, 0.8f, 0.8f);
907 #endif
908 }
909
910 /* Volume */
911
912 #ifdef __VOLUME__
913
914 ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, const float3 omega_in, float *pdf,
915         int skip_phase, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
916 {
917         for(int i = 0; i < sd->num_closure; i++) {
918                 if(i == skip_phase)
919                         continue;
920
921                 const ShaderClosure *sc = &sd->closure[i];
922
923                 if(CLOSURE_IS_PHASE(sc->type)) {
924                         float phase_pdf = 0.0f;
925                         float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf);
926
927                         if(phase_pdf != 0.0f) {
928                                 bsdf_eval_accum(result_eval, sc->type, eval);
929                                 sum_pdf += phase_pdf*sc->sample_weight;
930                         }
931
932                         sum_sample_weight += sc->sample_weight;
933                 }
934         }
935
936         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
937 }
938
939 ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd,
940         const float3 omega_in, BsdfEval *eval, float *pdf)
941 {
942         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
943
944         _shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
945 }
946
947 ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *sd,
948         float randu, float randv, BsdfEval *phase_eval,
949         float3 *omega_in, differential3 *domega_in, float *pdf)
950 {
951         int sampled = 0;
952
953         if(sd->num_closure > 1) {
954                 /* pick a phase closure based on sample weights */
955                 float sum = 0.0f;
956
957                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
958                         const ShaderClosure *sc = &sd->closure[sampled];
959                         
960                         if(CLOSURE_IS_PHASE(sc->type))
961                                 sum += sc->sample_weight;
962                 }
963
964                 float r = sd->randb_closure*sum;
965                 sum = 0.0f;
966
967                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
968                         const ShaderClosure *sc = &sd->closure[sampled];
969                         
970                         if(CLOSURE_IS_PHASE(sc->type)) {
971                                 sum += sc->sample_weight;
972
973                                 if(r <= sum)
974                                         break;
975                         }
976                 }
977
978                 if(sampled == sd->num_closure) {
979                         *pdf = 0.0f;
980                         return LABEL_NONE;
981                 }
982         }
983
984         /* todo: this isn't quite correct, we don't weight anisotropy properly
985          * depending on color channels, even if this is perhaps not a common case */
986         const ShaderClosure *sc = &sd->closure[sampled];
987         int label;
988         float3 eval;
989
990         *pdf = 0.0f;
991         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
992
993         if(*pdf != 0.0f) {
994                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
995         }
996
997         return label;
998 }
999
1000 ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *sd,
1001         const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval,
1002         float3 *omega_in, differential3 *domega_in, float *pdf)
1003 {
1004         int label;
1005         float3 eval;
1006
1007         *pdf = 0.0f;
1008         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
1009
1010         if(*pdf != 0.0f)
1011                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
1012
1013         return label;
1014 }
1015
1016 /* Volume Evaluation */
1017
1018 ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
1019                                           ShaderData *sd,
1020                                           PathState *state,
1021                                           VolumeStack *stack,
1022                                           int path_flag,
1023                                           ShaderContext ctx)
1024 {
1025         /* reset closures once at the start, we will be accumulating the closures
1026          * for all volumes in the stack into a single array of closures */
1027         sd->num_closure = 0;
1028         sd->num_closure_extra = 0;
1029         sd->flag = 0;
1030
1031         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1032                 /* setup shaderdata from stack. it's mostly setup already in
1033                  * shader_setup_from_volume, this switching should be quick */
1034                 sd->object = stack[i].object;
1035                 sd->shader = stack[i].shader;
1036
1037                 sd->flag &= ~(SD_SHADER_FLAGS|SD_OBJECT_FLAGS);
1038                 sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
1039
1040                 if(sd->object != OBJECT_NONE) {
1041                         sd->flag |= kernel_tex_fetch(__object_flag, sd->object);
1042
1043 #ifdef __OBJECT_MOTION__
1044                         /* todo: this is inefficient for motion blur, we should be
1045                          * caching matrices instead of recomputing them each step */
1046                         shader_setup_object_transforms(kg, sd, sd->time);
1047 #endif
1048                 }
1049
1050                 /* evaluate shader */
1051 #ifdef __SVM__
1052 #  ifdef __OSL__
1053                 if(kg->osl) {
1054                         OSLShader::eval_volume(kg, sd, state, path_flag, ctx);
1055                 }
1056                 else
1057 #  endif
1058                 {
1059                         svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag);
1060                 }
1061 #endif
1062
1063                 /* merge closures to avoid exceeding number of closures limit */
1064                 if(i > 0)
1065                         shader_merge_closures(sd);
1066         }
1067 }
1068
1069 #endif
1070
1071 /* Displacement Evaluation */
1072
1073 ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, ShaderContext ctx)
1074 {
1075         ccl_fetch(sd, num_closure) = 0;
1076         ccl_fetch(sd, num_closure_extra) = 0;
1077         ccl_fetch(sd, randb_closure) = 0.0f;
1078
1079         /* this will modify sd->P */
1080 #ifdef __SVM__
1081 #  ifdef __OSL__
1082         if(kg->osl)
1083                 OSLShader::eval_displacement(kg, sd, ctx);
1084         else
1085 #  endif
1086         {
1087                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0);
1088         }
1089 #endif
1090 }
1091
1092 /* Transparent Shadows */
1093
1094 #ifdef __TRANSPARENT_SHADOWS__
1095 ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect)
1096 {
1097         int prim = kernel_tex_fetch(__prim_index, isect->prim);
1098         int shader = 0;
1099
1100 #ifdef __HAIR__
1101         if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
1102 #endif
1103                 shader = kernel_tex_fetch(__tri_shader, prim);
1104 #ifdef __HAIR__
1105         }
1106         else {
1107                 float4 str = kernel_tex_fetch(__curves, prim);
1108                 shader = __float_as_int(str.z);
1109         }
1110 #endif
1111         int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE);
1112
1113         return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
1114 }
1115 #endif
1116
1117 CCL_NAMESPACE_END
1118