Cycles: Some more inline policy tweaks for CUDA 8
[blender.git] / intern / cycles / kernel / kernel_shader.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * ShaderData, used in four steps:
19  *
20  * Setup from incoming ray, sampled position and background.
21  * Execute for surface, volume or displacement.
22  * Evaluate one or more closures.
23  * Release.
24  *
25  */
26
27 #include "closure/alloc.h"
28 #include "closure/bsdf_util.h"
29 #include "closure/bsdf.h"
30 #include "closure/emissive.h"
31
32 #include "svm/svm.h"
33
34 CCL_NAMESPACE_BEGIN
35
36 /* ShaderData setup from incoming ray */
37
38 #ifdef __OBJECT_MOTION__
39 ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
40 {
41         if(ccl_fetch(sd, flag) & SD_OBJECT_MOTION) {
42                 ccl_fetch(sd, ob_tfm) = object_fetch_transform_motion(kg, ccl_fetch(sd, object), time);
43                 ccl_fetch(sd, ob_itfm) = transform_quick_inverse(ccl_fetch(sd, ob_tfm));
44         }
45         else {
46                 ccl_fetch(sd, ob_tfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM);
47                 ccl_fetch(sd, ob_itfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM);
48         }
49 }
50 #endif
51
52 ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
53                                                ShaderData *sd,
54                                                const Intersection *isect,
55                                                const Ray *ray)
56 {
57 #ifdef __INSTANCING__
58         ccl_fetch(sd, object) = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
59 #endif
60
61         ccl_fetch(sd, type) = isect->type;
62         ccl_fetch(sd, flag) = kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));
63
64         /* matrices and time */
65 #ifdef __OBJECT_MOTION__
66         shader_setup_object_transforms(kg, sd, ray->time);
67         ccl_fetch(sd, time) = ray->time;
68 #endif
69
70         ccl_fetch(sd, prim) = kernel_tex_fetch(__prim_index, isect->prim);
71         ccl_fetch(sd, ray_length) = isect->t;
72
73 #ifdef __UV__
74         ccl_fetch(sd, u) = isect->u;
75         ccl_fetch(sd, v) = isect->v;
76 #endif
77
78 #ifdef __HAIR__
79         if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
80                 /* curve */
81                 float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
82
83                 ccl_fetch(sd, shader) = __float_as_int(curvedata.z);
84                 ccl_fetch(sd, P) = bvh_curve_refine(kg, sd, isect, ray);
85         }
86         else
87 #endif
88         if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) {
89                 /* static triangle */
90                 float3 Ng = triangle_normal(kg, sd);
91                 ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim));
92
93                 /* vectors */
94                 ccl_fetch(sd, P) = triangle_refine(kg, sd, isect, ray);
95                 ccl_fetch(sd, Ng) = Ng;
96                 ccl_fetch(sd, N) = Ng;
97                 
98                 /* smooth normal */
99                 if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL)
100                         ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));
101
102 #ifdef __DPDU__
103                 /* dPdu/dPdv */
104                 triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
105 #endif
106         }
107         else {
108                 /* motion triangle */
109                 motion_triangle_shader_setup(kg, sd, isect, ray, false);
110         }
111
112         ccl_fetch(sd, I) = -ray->D;
113
114         ccl_fetch(sd, flag) |= kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
115
116 #ifdef __INSTANCING__
117         if(isect->object != OBJECT_NONE) {
118                 /* instance transform */
119                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
120                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, Ng));
121 #  ifdef __DPDU__
122                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
123                 object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
124 #  endif
125         }
126 #endif
127
128         /* backfacing test */
129         bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f);
130
131         if(backfacing) {
132                 ccl_fetch(sd, flag) |= SD_BACKFACING;
133                 ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng);
134                 ccl_fetch(sd, N) = -ccl_fetch(sd, N);
135 #ifdef __DPDU__
136                 ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu);
137                 ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv);
138 #endif
139         }
140
141 #ifdef __RAY_DIFFERENTIALS__
142         /* differentials */
143         differential_transfer(&ccl_fetch(sd, dP), ray->dP, ray->D, ray->dD, ccl_fetch(sd, Ng), isect->t);
144         differential_incoming(&ccl_fetch(sd, dI), ray->dD);
145         differential_dudv(&ccl_fetch(sd, du), &ccl_fetch(sd, dv), ccl_fetch(sd, dPdu), ccl_fetch(sd, dPdv), ccl_fetch(sd, dP), ccl_fetch(sd, Ng));
146 #endif
147 }
148
149 /* ShaderData setup from BSSRDF scatter */
150
151 #ifdef __SUBSURFACE__
152 #  ifndef __KERNEL_CUDS__
153 ccl_device
154 #  else
155 ccl_device_inline
156 #  endif
157 void shader_setup_from_subsurface(
158         KernelGlobals *kg,
159         ShaderData *sd,
160         const Intersection *isect,
161         const Ray *ray)
162 {
163         bool backfacing = sd->flag & SD_BACKFACING;
164
165         /* object, matrices, time, ray_length stay the same */
166         sd->flag = kernel_tex_fetch(__object_flag, sd->object);
167         sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
168         sd->type = isect->type;
169
170 #  ifdef __UV__
171         sd->u = isect->u;
172         sd->v = isect->v;
173 #  endif
174
175         /* fetch triangle data */
176         if(sd->type == PRIMITIVE_TRIANGLE) {
177                 float3 Ng = triangle_normal(kg, sd);
178                 sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
179
180                 /* static triangle */
181                 sd->P = triangle_refine_subsurface(kg, sd, isect, ray);
182                 sd->Ng = Ng;
183                 sd->N = Ng;
184
185                 if(sd->shader & SHADER_SMOOTH_NORMAL)
186                         sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
187
188 #  ifdef __DPDU__
189                 /* dPdu/dPdv */
190                 triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
191 #  endif
192         }
193         else {
194                 /* motion triangle */
195                 motion_triangle_shader_setup(kg, sd, isect, ray, true);
196         }
197
198         sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
199
200 #  ifdef __INSTANCING__
201         if(isect->object != OBJECT_NONE) {
202                 /* instance transform */
203                 object_normal_transform(kg, sd, &sd->N);
204                 object_normal_transform(kg, sd, &sd->Ng);
205 #    ifdef __DPDU__
206                 object_dir_transform(kg, sd, &sd->dPdu);
207                 object_dir_transform(kg, sd, &sd->dPdv);
208 #    endif
209         }
210 #  endif
211
212         /* backfacing test */
213         if(backfacing) {
214                 sd->flag |= SD_BACKFACING;
215                 sd->Ng = -sd->Ng;
216                 sd->N = -sd->N;
217 #  ifdef __DPDU__
218                 sd->dPdu = -sd->dPdu;
219                 sd->dPdv = -sd->dPdv;
220 #  endif
221         }
222
223         /* should not get used in principle as the shading will only use a diffuse
224          * BSDF, but the shader might still access it */
225         sd->I = sd->N;
226
227 #  ifdef __RAY_DIFFERENTIALS__
228         /* differentials */
229         differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
230         /* don't modify dP and dI */
231 #  endif
232 }
233 #endif
234
235 /* ShaderData setup from position sampled on mesh */
236
237 ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
238                                                 ShaderData *sd,
239                                                 const float3 P,
240                                                 const float3 Ng,
241                                                 const float3 I,
242                                                 int shader, int object, int prim,
243                                                 float u, float v, float t,
244                                                 float time)
245 {
246         /* vectors */
247         ccl_fetch(sd, P) = P;
248         ccl_fetch(sd, N) = Ng;
249         ccl_fetch(sd, Ng) = Ng;
250         ccl_fetch(sd, I) = I;
251         ccl_fetch(sd, shader) = shader;
252         ccl_fetch(sd, type) = (prim == PRIM_NONE)? PRIMITIVE_NONE: PRIMITIVE_TRIANGLE;
253
254         /* primitive */
255 #ifdef __INSTANCING__
256         ccl_fetch(sd, object) = object;
257 #endif
258         /* currently no access to bvh prim index for strand sd->prim*/
259         ccl_fetch(sd, prim) = prim;
260 #ifdef __UV__
261         ccl_fetch(sd, u) = u;
262         ccl_fetch(sd, v) = v;
263 #endif
264         ccl_fetch(sd, ray_length) = t;
265
266         /* detect instancing, for non-instanced the object index is -object-1 */
267 #ifdef __INSTANCING__
268         bool instanced = false;
269
270         if(ccl_fetch(sd, prim) != PRIM_NONE) {
271                 if(ccl_fetch(sd, object) >= 0)
272                         instanced = true;
273                 else
274 #endif
275                         ccl_fetch(sd, object) = ~ccl_fetch(sd, object);
276 #ifdef __INSTANCING__
277         }
278 #endif
279
280         ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
281         if(ccl_fetch(sd, object) != OBJECT_NONE) {
282                 ccl_fetch(sd, flag) |= kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));
283
284 #ifdef __OBJECT_MOTION__
285                 shader_setup_object_transforms(kg, sd, time);
286         }
287
288         ccl_fetch(sd, time) = time;
289 #else
290         }
291 #endif
292
293         if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) {
294                 /* smooth normal */
295                 if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
296                         ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v));
297
298 #ifdef __INSTANCING__
299                         if(instanced)
300                                 object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N));
301 #endif
302                 }
303
304                 /* dPdu/dPdv */
305 #ifdef __DPDU__
306                 triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv));
307
308 #  ifdef __INSTANCING__
309                 if(instanced) {
310                         object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu));
311                         object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv));
312                 }
313 #  endif
314 #endif
315         }
316         else {
317 #ifdef __DPDU__
318                 ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f);
319                 ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f);
320 #endif
321         }
322
323         /* backfacing test */
324         if(ccl_fetch(sd, prim) != PRIM_NONE) {
325                 bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f);
326
327                 if(backfacing) {
328                         ccl_fetch(sd, flag) |= SD_BACKFACING;
329                         ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng);
330                         ccl_fetch(sd, N) = -ccl_fetch(sd, N);
331 #ifdef __DPDU__
332                         ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu);
333                         ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv);
334 #endif
335                 }
336         }
337
338 #ifdef __RAY_DIFFERENTIALS__
339         /* no ray differentials here yet */
340         ccl_fetch(sd, dP) = differential3_zero();
341         ccl_fetch(sd, dI) = differential3_zero();
342         ccl_fetch(sd, du) = differential_zero();
343         ccl_fetch(sd, dv) = differential_zero();
344 #endif
345 }
346
347 /* ShaderData setup for displacement */
348
349 ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
350         int object, int prim, float u, float v)
351 {
352         float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
353         int shader;
354
355         triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
356
357         /* force smooth shading for displacement */
358         shader |= SHADER_SMOOTH_NORMAL;
359
360         /* watch out: no instance transform currently */
361
362         shader_setup_from_sample(kg, sd, P, Ng, I, shader, object, prim, u, v, 0.0f, TIME_INVALID);
363 }
364
365 /* ShaderData setup from ray into background */
366
367 ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
368 {
369         /* vectors */
370         ccl_fetch(sd, P) = ray->D;
371         ccl_fetch(sd, N) = -ray->D;
372         ccl_fetch(sd, Ng) = -ray->D;
373         ccl_fetch(sd, I) = -ray->D;
374         ccl_fetch(sd, shader) = kernel_data.background.surface_shader;
375         ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
376 #ifdef __OBJECT_MOTION__
377         ccl_fetch(sd, time) = ray->time;
378 #endif
379         ccl_fetch(sd, ray_length) = 0.0f;
380
381 #ifdef __INSTANCING__
382         ccl_fetch(sd, object) = PRIM_NONE;
383 #endif
384         ccl_fetch(sd, prim) = PRIM_NONE;
385 #ifdef __UV__
386         ccl_fetch(sd, u) = 0.0f;
387         ccl_fetch(sd, v) = 0.0f;
388 #endif
389
390 #ifdef __DPDU__
391         /* dPdu/dPdv */
392         ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f);
393         ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f);
394 #endif
395
396 #ifdef __RAY_DIFFERENTIALS__
397         /* differentials */
398         ccl_fetch(sd, dP) = ray->dD;
399         differential_incoming(&ccl_fetch(sd, dI), ccl_fetch(sd, dP));
400         ccl_fetch(sd, du) = differential_zero();
401         ccl_fetch(sd, dv) = differential_zero();
402 #endif
403 }
404
405 /* ShaderData setup from point inside volume */
406
407 #ifdef __VOLUME__
408 ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
409 {
410         /* vectors */
411         sd->P = ray->P;
412         sd->N = -ray->D;  
413         sd->Ng = -ray->D;
414         sd->I = -ray->D;
415         sd->shader = SHADER_NONE;
416         sd->flag = 0;
417 #ifdef __OBJECT_MOTION__
418         sd->time = ray->time;
419 #endif
420         sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
421
422 #ifdef __INSTANCING__
423         sd->object = PRIM_NONE; /* todo: fill this for texture coordinates */
424 #endif
425         sd->prim = PRIM_NONE;
426         sd->type = PRIMITIVE_NONE;
427
428 #ifdef __UV__
429         sd->u = 0.0f;
430         sd->v = 0.0f;
431 #endif
432
433 #ifdef __DPDU__
434         /* dPdu/dPdv */
435         sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
436         sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
437 #endif
438
439 #ifdef __RAY_DIFFERENTIALS__
440         /* differentials */
441         sd->dP = ray->dD;
442         differential_incoming(&sd->dI, sd->dP);
443         sd->du = differential_zero();
444         sd->dv = differential_zero();
445 #endif
446
447         /* for NDC coordinates */
448         sd->ray_P = ray->P;
449         sd->ray_dP = ray->dP;
450 }
451 #endif
452
453 /* Merging */
454
455 #if defined(__BRANCHED_PATH__) || defined(__VOLUME__)
456 ccl_device_inline void shader_merge_closures(ShaderData *sd)
457 {
458         /* merge identical closures, better when we sample a single closure at a time */
459         for(int i = 0; i < sd->num_closure; i++) {
460                 ShaderClosure *sci = &sd->closure[i];
461
462                 for(int j = i + 1; j < sd->num_closure; j++) {
463                         ShaderClosure *scj = &sd->closure[j];
464
465                         if(sci->type != scj->type)
466                                 continue;
467                         if(!bsdf_merge(sci, scj))
468                                 continue;
469
470                         sci->weight += scj->weight;
471                         sci->sample_weight += scj->sample_weight;
472
473                         int size = sd->num_closure - (j+1);
474                         if(size > 0) {
475                                 for(int k = 0; k < size; k++) {
476                                         scj[k] = scj[k+1];
477                                 }
478                         }
479
480                         sd->num_closure--;
481                         kernel_assert(sd->num_closure >= 0);
482                         j--;
483                 }
484         }
485 }
486 #endif
487
488 /* BSDF */
489
490 ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, ShaderData *sd, const float3 omega_in, float *pdf,
491         int skip_bsdf, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
492 {
493         /* this is the veach one-sample model with balance heuristic, some pdf
494          * factors drop out when using balance heuristic weighting */
495         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
496                 if(i == skip_bsdf)
497                         continue;
498
499                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
500
501                 if(CLOSURE_IS_BSDF(sc->type)) {
502                         float bsdf_pdf = 0.0f;
503                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
504
505                         if(bsdf_pdf != 0.0f) {
506                                 bsdf_eval_accum(result_eval, sc->type, eval*sc->weight);
507                                 sum_pdf += bsdf_pdf*sc->sample_weight;
508                         }
509
510                         sum_sample_weight += sc->sample_weight;
511                 }
512         }
513
514         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
515 }
516
517 #ifdef __BRANCHED_PATH__
518 ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
519                                                         ShaderData *sd,
520                                                         const float3 omega_in,
521                                                         BsdfEval *result_eval,
522                                                         float light_pdf,
523                                                         bool use_mis)
524 {
525         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
526                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
527                 if(CLOSURE_IS_BSDF(sc->type)) {
528                         float bsdf_pdf = 0.0f;
529                         float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
530                         if(bsdf_pdf != 0.0f) {
531                                 float mis_weight = use_mis? power_heuristic(light_pdf, bsdf_pdf): 1.0f;
532                                 bsdf_eval_accum(result_eval,
533                                                 sc->type,
534                                                 eval * sc->weight * mis_weight);
535                         }
536                 }
537         }
538 }
539 #endif
540
541
542 #ifndef __KERNEL_CUDS__
543 ccl_device
544 #else
545 ccl_device_inline
546 #endif
547 void shader_bsdf_eval(KernelGlobals *kg,
548                       ShaderData *sd,
549                       const float3 omega_in,
550                       BsdfEval *eval,
551                       float light_pdf,
552                       bool use_mis)
553 {
554         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
555
556 #ifdef __BRANCHED_PATH__
557         if(kernel_data.integrator.branched)
558                 _shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis);
559         else
560 #endif
561         {
562                 float pdf;
563                 _shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, -1, eval, 0.0f, 0.0f);
564                 if(use_mis) {
565                         float weight = power_heuristic(light_pdf, pdf);
566                         bsdf_eval_mul(eval, make_float3(weight, weight, weight));
567                 }
568         }
569 }
570
571 ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
572                                          ShaderData *sd,
573                                          float randu, float randv,
574                                          BsdfEval *bsdf_eval,
575                                          float3 *omega_in,
576                                          differential3 *domega_in,
577                                          float *pdf)
578 {
579         int sampled = 0;
580
581         if(ccl_fetch(sd, num_closure) > 1) {
582                 /* pick a BSDF closure based on sample weights */
583                 float sum = 0.0f;
584
585                 for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) {
586                         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
587                         
588                         if(CLOSURE_IS_BSDF(sc->type))
589                                 sum += sc->sample_weight;
590                 }
591
592                 float r = ccl_fetch(sd, randb_closure)*sum;
593                 sum = 0.0f;
594
595                 for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) {
596                         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
597                         
598                         if(CLOSURE_IS_BSDF(sc->type)) {
599                                 sum += sc->sample_weight;
600
601                                 if(r <= sum)
602                                         break;
603                         }
604                 }
605
606                 if(sampled == ccl_fetch(sd, num_closure)) {
607                         *pdf = 0.0f;
608                         return LABEL_NONE;
609                 }
610         }
611
612         const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled);
613
614         int label;
615         float3 eval;
616
617         *pdf = 0.0f;
618         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
619
620         if(*pdf != 0.0f) {
621                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
622
623                 if(ccl_fetch(sd, num_closure) > 1) {
624                         float sweight = sc->sample_weight;
625                         _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sampled, bsdf_eval, *pdf*sweight, sweight);
626                 }
627         }
628
629         return label;
630 }
631
632 ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd,
633         const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval,
634         float3 *omega_in, differential3 *domega_in, float *pdf)
635 {
636         int label;
637         float3 eval;
638
639         *pdf = 0.0f;
640         label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
641
642         if(*pdf != 0.0f)
643                 bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass);
644
645         return label;
646 }
647
648 ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness)
649 {
650         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
651                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
652
653                 if(CLOSURE_IS_BSDF(sc->type))
654                         bsdf_blur(kg, sc, roughness);
655         }
656 }
657
658 ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd)
659 {
660         if(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)
661                 return make_float3(1.0f, 1.0f, 1.0f);
662
663         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
664
665         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
666                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
667
668                 if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) // todo: make this work for osl
669                         eval += sc->weight;
670         }
671
672         return eval;
673 }
674
675 ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd)
676 {
677         float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd);
678
679         alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f));
680         alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f));
681         
682         return alpha;
683 }
684
685 ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
686 {
687         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
688
689         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
690                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
691
692                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
693                         eval += sc->weight;
694         }
695
696         return eval;
697 }
698
699 ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd)
700 {
701         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
702
703         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
704                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
705
706                 if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
707                         eval += sc->weight;
708         }
709
710         return eval;
711 }
712
713 ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
714 {
715         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
716
717         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
718                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
719
720                 if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
721                         eval += sc->weight;
722         }
723
724         return eval;
725 }
726
727 ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
728 {
729         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
730
731         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
732                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
733
734                 if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type))
735                         eval += sc->weight;
736         }
737
738         return eval;
739 }
740
741 ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_)
742 {
743         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
744         float3 N = make_float3(0.0f, 0.0f, 0.0f);
745
746         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
747                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
748
749                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
750                         const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc;
751                         eval += sc->weight*ao_factor;
752                         N += bsdf->N*average(sc->weight);
753                 }
754                 else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) {
755                         eval += sc->weight;
756                         N += ccl_fetch(sd, N)*average(sc->weight);
757                 }
758         }
759
760         if(is_zero(N))
761                 N = ccl_fetch(sd, N);
762         else
763                 N = normalize(N);
764
765         *N_ = N;
766         return eval;
767 }
768
769 #ifdef __SUBSURFACE__
770 ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_)
771 {
772         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
773         float3 N = make_float3(0.0f, 0.0f, 0.0f);
774         float texture_blur = 0.0f, weight_sum = 0.0f;
775
776         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
777                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
778
779                 if(CLOSURE_IS_BSSRDF(sc->type)) {
780                         const Bssrdf *bssrdf = (const Bssrdf*)sc;
781                         float avg_weight = fabsf(average(sc->weight));
782
783                         N += bssrdf->N*avg_weight;
784                         eval += sc->weight;
785                         texture_blur += bssrdf->texture_blur*avg_weight;
786                         weight_sum += avg_weight;
787                 }
788         }
789
790         if(N_)
791                 *N_ = (is_zero(N))? ccl_fetch(sd, N): normalize(N);
792
793         if(texture_blur_)
794                 *texture_blur_ = texture_blur/weight_sum;
795         
796         return eval;
797 }
798 #endif
799
800 /* Emission */
801
802 ccl_device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure *sc)
803 {
804         return emissive_simple_eval(ccl_fetch(sd, Ng), ccl_fetch(sd, I));
805 }
806
807 ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd)
808 {
809         float3 eval;
810         eval = make_float3(0.0f, 0.0f, 0.0f);
811
812         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
813                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
814
815                 if(CLOSURE_IS_EMISSION(sc->type))
816                         eval += emissive_eval(kg, sd, sc)*sc->weight;
817         }
818
819         return eval;
820 }
821
822 /* Holdout */
823
824 ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
825 {
826         float3 weight = make_float3(0.0f, 0.0f, 0.0f);
827
828         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
829                 ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
830
831                 if(CLOSURE_IS_HOLDOUT(sc->type))
832                         weight += sc->weight;
833         }
834
835         return weight;
836 }
837
838 /* Surface Evaluation */
839
840 ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_space RNG *rng,
841         ccl_addr_space PathState *state, float randb, int path_flag, ShaderContext ctx)
842 {
843         ccl_fetch(sd, num_closure) = 0;
844         ccl_fetch(sd, num_closure_extra) = 0;
845         ccl_fetch(sd, randb_closure) = randb;
846
847 #ifdef __OSL__
848         if(kg->osl)
849                 OSLShader::eval_surface(kg, sd, state, path_flag, ctx);
850         else
851 #endif
852         {
853 #ifdef __SVM__
854                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
855 #else
856                 ccl_fetch_array(sd, closure, 0)->weight = make_float3(0.8f, 0.8f, 0.8f);
857                 ccl_fetch_array(sd, closure, 0)->N = ccl_fetch(sd, N);
858                 ccl_fetch_array(sd, closure, 0)->data0 = 0.0f;
859                 ccl_fetch_array(sd, closure, 0)->data1 = 0.0f;
860                 ccl_fetch(sd, flag) |= bsdf_diffuse_setup(ccl_fetch_array(sd, closure, 0));
861 #endif
862         }
863
864         if(rng && (ccl_fetch(sd, flag) & SD_BSDF_NEEDS_LCG)) {
865                 ccl_fetch(sd, lcg_state) = lcg_state_init_addrspace(rng, state, 0xb4bc3953);
866         }
867 }
868
869 /* Background Evaluation */
870
871 ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd,
872         ccl_addr_space PathState *state, int path_flag, ShaderContext ctx)
873 {
874         ccl_fetch(sd, num_closure) = 0;
875         ccl_fetch(sd, num_closure_extra) = 0;
876         ccl_fetch(sd, randb_closure) = 0.0f;
877
878 #ifdef __SVM__
879 #ifdef __OSL__
880         if(kg->osl) {
881                 OSLShader::eval_background(kg, sd, state, path_flag, ctx);
882         }
883         else
884 #endif
885         {
886                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag);
887         }
888
889         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
890
891         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
892                 const ShaderClosure *sc = ccl_fetch_array(sd, closure, i);
893
894                 if(CLOSURE_IS_BACKGROUND(sc->type))
895                         eval += sc->weight;
896         }
897
898         return eval;
899 #else
900         return make_float3(0.8f, 0.8f, 0.8f);
901 #endif
902 }
903
904 /* Volume */
905
906 #ifdef __VOLUME__
907
908 ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, const float3 omega_in, float *pdf,
909         int skip_phase, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
910 {
911         for(int i = 0; i < sd->num_closure; i++) {
912                 if(i == skip_phase)
913                         continue;
914
915                 const ShaderClosure *sc = &sd->closure[i];
916
917                 if(CLOSURE_IS_PHASE(sc->type)) {
918                         float phase_pdf = 0.0f;
919                         float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf);
920
921                         if(phase_pdf != 0.0f) {
922                                 bsdf_eval_accum(result_eval, sc->type, eval);
923                                 sum_pdf += phase_pdf*sc->sample_weight;
924                         }
925
926                         sum_sample_weight += sc->sample_weight;
927                 }
928         }
929
930         *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
931 }
932
933 ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd,
934         const float3 omega_in, BsdfEval *eval, float *pdf)
935 {
936         bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
937
938         _shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
939 }
940
941 ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *sd,
942         float randu, float randv, BsdfEval *phase_eval,
943         float3 *omega_in, differential3 *domega_in, float *pdf)
944 {
945         int sampled = 0;
946
947         if(sd->num_closure > 1) {
948                 /* pick a phase closure based on sample weights */
949                 float sum = 0.0f;
950
951                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
952                         const ShaderClosure *sc = &sd->closure[sampled];
953                         
954                         if(CLOSURE_IS_PHASE(sc->type))
955                                 sum += sc->sample_weight;
956                 }
957
958                 float r = sd->randb_closure*sum;
959                 sum = 0.0f;
960
961                 for(sampled = 0; sampled < sd->num_closure; sampled++) {
962                         const ShaderClosure *sc = &sd->closure[sampled];
963                         
964                         if(CLOSURE_IS_PHASE(sc->type)) {
965                                 sum += sc->sample_weight;
966
967                                 if(r <= sum)
968                                         break;
969                         }
970                 }
971
972                 if(sampled == sd->num_closure) {
973                         *pdf = 0.0f;
974                         return LABEL_NONE;
975                 }
976         }
977
978         /* todo: this isn't quite correct, we don't weight anisotropy properly
979          * depending on color channels, even if this is perhaps not a common case */
980         const ShaderClosure *sc = &sd->closure[sampled];
981         int label;
982         float3 eval;
983
984         *pdf = 0.0f;
985         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
986
987         if(*pdf != 0.0f) {
988                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
989         }
990
991         return label;
992 }
993
994 ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *sd,
995         const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval,
996         float3 *omega_in, differential3 *domega_in, float *pdf)
997 {
998         int label;
999         float3 eval;
1000
1001         *pdf = 0.0f;
1002         label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
1003
1004         if(*pdf != 0.0f)
1005                 bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass);
1006
1007         return label;
1008 }
1009
1010 /* Volume Evaluation */
1011
1012 ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
1013                                           ShaderData *sd,
1014                                           PathState *state,
1015                                           VolumeStack *stack,
1016                                           int path_flag,
1017                                           ShaderContext ctx)
1018 {
1019         /* reset closures once at the start, we will be accumulating the closures
1020          * for all volumes in the stack into a single array of closures */
1021         sd->num_closure = 0;
1022         sd->num_closure_extra = 0;
1023         sd->flag = 0;
1024
1025         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1026                 /* setup shaderdata from stack. it's mostly setup already in
1027                  * shader_setup_from_volume, this switching should be quick */
1028                 sd->object = stack[i].object;
1029                 sd->shader = stack[i].shader;
1030
1031                 sd->flag &= ~(SD_SHADER_FLAGS|SD_OBJECT_FLAGS);
1032                 sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
1033
1034                 if(sd->object != OBJECT_NONE) {
1035                         sd->flag |= kernel_tex_fetch(__object_flag, sd->object);
1036
1037 #ifdef __OBJECT_MOTION__
1038                         /* todo: this is inefficient for motion blur, we should be
1039                          * caching matrices instead of recomputing them each step */
1040                         shader_setup_object_transforms(kg, sd, sd->time);
1041 #endif
1042                 }
1043
1044                 /* evaluate shader */
1045 #ifdef __SVM__
1046 #  ifdef __OSL__
1047                 if(kg->osl) {
1048                         OSLShader::eval_volume(kg, sd, state, path_flag, ctx);
1049                 }
1050                 else
1051 #  endif
1052                 {
1053                         svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag);
1054                 }
1055 #endif
1056
1057                 /* merge closures to avoid exceeding number of closures limit */
1058                 if(i > 0)
1059                         shader_merge_closures(sd);
1060         }
1061 }
1062
1063 #endif
1064
1065 /* Displacement Evaluation */
1066
1067 ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, ShaderContext ctx)
1068 {
1069         ccl_fetch(sd, num_closure) = 0;
1070         ccl_fetch(sd, num_closure_extra) = 0;
1071         ccl_fetch(sd, randb_closure) = 0.0f;
1072
1073         /* this will modify sd->P */
1074 #ifdef __SVM__
1075 #  ifdef __OSL__
1076         if(kg->osl)
1077                 OSLShader::eval_displacement(kg, sd, ctx);
1078         else
1079 #  endif
1080         {
1081                 svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0);
1082         }
1083 #endif
1084 }
1085
1086 /* Transparent Shadows */
1087
1088 #ifdef __TRANSPARENT_SHADOWS__
1089 ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect)
1090 {
1091         int prim = kernel_tex_fetch(__prim_index, isect->prim);
1092         int shader = 0;
1093
1094 #ifdef __HAIR__
1095         if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
1096 #endif
1097                 shader = kernel_tex_fetch(__tri_shader, prim);
1098 #ifdef __HAIR__
1099         }
1100         else {
1101                 float4 str = kernel_tex_fetch(__curves, prim);
1102                 shader = __float_as_int(str.z);
1103         }
1104 #endif
1105         int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2);
1106
1107         return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
1108 }
1109 #endif
1110
1111 CCL_NAMESPACE_END
1112