Cycles: Subsurface Scattering
[blender.git] / intern / cycles / kernel / kernel_subsurface.h
1 /*
2  * Copyright 2013, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 CCL_NAMESPACE_BEGIN
20
21 #include "closure/bssrdf.h"
22
23 /* NEW BSSRDF: See "BSSRDF Importance Sampling", SIGGRAPH 2013 */
24
25 /* TODO:
26  * - test using power heuristic for combing bssrdfs
27  * - try to reduce one sample model variance
28  * - possible shade all hits for progressive integrator
29  * - cubic and gaussian scale difference tweak
30  */
31
32 #define BSSRDF_MULTI_EVAL
33
34 __device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, ShaderData *sd, float *probability)
35 {
36         /* sum sample weights of bssrdf and bsdf */
37         float bsdf_sum = 0.0f;
38         float bssrdf_sum = 0.0f;
39
40         for(int i = 0; i < sd->num_closure; i++) {
41                 ShaderClosure *sc = &sd->closure[i];
42                 
43                 if(CLOSURE_IS_BSDF(sc->type))
44                         bsdf_sum += sc->sample_weight;
45                 else if(CLOSURE_IS_BSSRDF(sc->type))
46                         bssrdf_sum += sc->sample_weight;
47         }
48
49         /* use bsdf or bssrdf? */
50         float r = sd->randb_closure*(bsdf_sum + bssrdf_sum);
51
52         if(r < bsdf_sum) {
53                 /* use bsdf, and adjust randb so we can reuse it for picking a bsdf */
54                 sd->randb_closure = r/bsdf_sum;
55                 *probability = (bsdf_sum > 0.0f)? (bsdf_sum + bssrdf_sum)/bsdf_sum: 1.0f;
56                 return NULL;
57         }
58
59         /* use bssrdf */
60         r -= bsdf_sum;
61
62         float sum = 0.0f;
63
64         for(int i = 0; i < sd->num_closure; i++) {
65                 ShaderClosure *sc = &sd->closure[i];
66                 
67                 if(CLOSURE_IS_BSSRDF(sc->type)) {
68                         sum += sc->sample_weight;
69
70                         if(r <= sum) {
71                                 sd->randb_closure = (r - (sum - sc->sample_weight))/sc->sample_weight;
72
73 #ifdef BSSRDF_MULTI_EVAL
74                                 *probability = (bssrdf_sum > 0.0f)? (bsdf_sum + bssrdf_sum)/bssrdf_sum: 1.0f;
75 #else
76                                 *probability = (bssrdf_sum > 0.0f)? (bsdf_sum + bssrdf_sum)/sc->sample_weight: 1.0f;
77 #endif
78                                 return sc;
79                         }
80                 }
81         }
82
83         /* should never happen */
84         sd->randb_closure = 0.0f;
85         *probability = 1.0f;
86         return NULL;
87 }
88
89 __device float3 subsurface_scatter_eval(ShaderData *sd, ShaderClosure *sc, float disk_r, float r, bool all)
90 {
91 #ifdef BSSRDF_MULTI_EVAL
92         /* this is the veach one-sample model with balance heuristic, some pdf
93          * factors drop out when using balance heuristic weighting */
94         float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
95         float pdf_sum = 0.0f;
96         float sample_weight_sum = 0.0f;
97         int num_bssrdf = 0;
98
99         for(int i = 0; i < sd->num_closure; i++) {
100                 sc = &sd->closure[i];
101                 
102                 if(CLOSURE_IS_BSSRDF(sc->type)) {
103                         float sample_weight = (all)? 1.0f: sc->sample_weight;
104                         sample_weight_sum += sample_weight;
105                 }
106         }
107
108         float sample_weight_inv = 1.0f/sample_weight_sum;
109
110         //printf("num closures %d\n", sd->num_closure);
111
112         for(int i = 0; i < sd->num_closure; i++) {
113                 sc = &sd->closure[i];
114                 
115                 if(CLOSURE_IS_BSSRDF(sc->type)) {
116                         /* in case of non-progressive integrate we sample all bssrdf's once,
117                          * for progressive we pick one, so adjust pdf for that */
118                         float sample_weight = (all)? 1.0f: sc->sample_weight * sample_weight_inv;
119
120                         /* compute pdf */
121                         float pdf = bssrdf_pdf(sc, r);
122                         float disk_pdf = bssrdf_pdf(sc, disk_r);
123
124                         /* TODO power heuristic is not working correct here */
125                         eval_sum += sc->weight*pdf; //*sample_weight*disk_pdf;
126                         pdf_sum += sample_weight*disk_pdf; //*sample_weight*disk_pdf;
127
128                         num_bssrdf++;
129                 }
130         }
131
132         return (pdf_sum > 0.0f)? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f);
133 #else
134         float pdf = bssrdf_pdf(pick_sc, r);
135         float disk_pdf = bssrdf_pdf(pick_sc, disk_r);
136
137         return pick_sc->weight * pdf / disk_pdf;
138 #endif
139 }
140
141 /* replace closures with a single diffuse bsdf closure after scatter step */
142 __device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, float3 weight, bool hit, float3 N)
143 {
144         sd->flag &= ~SD_CLOSURE_FLAGS;
145         sd->randb_closure = 0.0f;
146
147         if(hit) {
148                 ShaderClosure *sc = &sd->closure[0];
149                 sd->num_closure = 1;
150
151                 sc->weight = weight;
152                 sc->sample_weight = 1.0f;
153                 sc->data0 = 0.0f;
154                 sc->data1 = 0.0f;
155                 sc->N = N;
156                 sd->flag |= bsdf_diffuse_setup(sc);
157
158                 /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
159                  * can recognize it as not being a regular diffuse closure */
160                 sc->type = CLOSURE_BSDF_BSSRDF_ID;
161         }
162         else
163                 sd->num_closure = 0;
164 }
165
166 /* optionally do blurring of color and/or bump mapping, at the cost of a shader evaluation */
167 __device float3 subsurface_color_pow(float3 color, float exponent)
168 {
169         color = max(color, make_float3(0.0f, 0.0f, 0.0f));
170
171         if(exponent == 1.0f) {
172                 /* nothing to do */
173         }
174         else if(exponent == 0.5f) {
175                 color.x = sqrtf(color.x);
176                 color.y = sqrtf(color.y);
177                 color.z = sqrtf(color.z);
178         }
179         else {
180                 color.x = powf(color.x, exponent);
181                 color.y = powf(color.y, exponent);
182                 color.z = powf(color.z, exponent);
183         }
184
185         return color;
186 }
187
188 __device void subsurface_color_bump_blur(KernelGlobals *kg, ShaderData *out_sd, ShaderData *in_sd, int state_flag, float3 *eval, float3 *N)
189 {
190         /* average color and texture blur at outgoing point */
191         float texture_blur;
192         float3 out_color = shader_bssrdf_sum(out_sd, NULL, &texture_blur);
193
194         /* do we have bump mapping? */
195         bool bump = (out_sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
196
197         if(bump || texture_blur > 0.0f) {
198                 /* average color and normal at incoming point */
199                 shader_eval_surface(kg, in_sd, 0.0f, state_flag, SHADER_CONTEXT_SSS);
200                 float3 in_color = shader_bssrdf_sum(in_sd, (bump)? N: NULL, NULL);
201
202                 /* we simply divide out the average color and multiply with the average
203                  * of the other one. we could try to do this per closure but it's quite
204                  * tricky to match closures between shader evaluations, their number and
205                  * order may change, this is simpler */
206                 if(texture_blur > 0.0f) {
207                         out_color = subsurface_color_pow(out_color, texture_blur);
208                         in_color = subsurface_color_pow(in_color, texture_blur);
209
210                         *eval *= safe_divide_color(in_color, out_color);
211                 }
212         }
213 }
214
215 /* subsurface scattering step, from a point on the surface to other nearby points on the same object */
216 __device int subsurface_scatter_multi_step(KernelGlobals *kg, ShaderData *sd, ShaderData bssrdf_sd[BSSRDF_MAX_HITS],
217         int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
218 {
219         /* pick random axis in local frame and point on disk */
220         float3 disk_N, disk_T, disk_B;
221         float pick_pdf_N, pick_pdf_T, pick_pdf_B;
222         
223         disk_N = sd->Ng;
224         make_orthonormals(disk_N, &disk_T, &disk_B);
225
226         if(disk_u < 0.5f) {
227                 pick_pdf_N = 0.5f;
228                 pick_pdf_T = 0.25f;
229                 pick_pdf_B = 0.25f;
230                 disk_u *= 2.0f;
231         }
232         else if(disk_u < 0.75f) {
233                 float3 tmp = disk_N;
234                 disk_N = disk_T;
235                 disk_T = tmp;
236                 pick_pdf_N = 0.25f;
237                 pick_pdf_T = 0.5f;
238                 pick_pdf_B = 0.25f;
239                 disk_u = (disk_u - 0.5f)*4.0f;
240         }
241         else {
242                 float3 tmp = disk_N;
243                 disk_N = disk_B;
244                 disk_B = tmp;
245                 pick_pdf_N = 0.25f;
246                 pick_pdf_T = 0.25f;
247                 pick_pdf_B = 0.5f;
248                 disk_u = (disk_u - 0.75f)*4.0f;
249         }
250
251         /* sample point on disk */
252     float phi = M_2PI_F * disk_u;
253     float disk_r = disk_v;
254         float disk_height;
255
256         bssrdf_sample(sc, disk_r, &disk_r, &disk_height);
257
258         float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
259
260         /* create ray */
261         Ray ray;
262         ray.P = sd->P + disk_N*disk_height + disk_P;
263         ray.D = -disk_N;
264         ray.t = 2.0f*disk_height;
265         ray.dP = sd->dP;
266         ray.dD = differential3_zero();
267         ray.time = sd->time;
268
269         /* intersect with the same object. if multiple intersections are found it
270          * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
271         Intersection isect[BSSRDF_MAX_HITS];
272         uint num_hits = scene_intersect_subsurface(kg, &ray, isect, sd->object, lcg_state, BSSRDF_MAX_HITS);
273
274         /* evaluate bssrdf */
275         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
276         int num_eval_hits = min(num_hits, BSSRDF_MAX_HITS);
277
278         for(int hit = 0; hit < num_eval_hits; hit++) {
279                 ShaderData *bsd = &bssrdf_sd[hit];
280
281                 /* setup new shading point */
282                 *bsd = *sd;
283                 shader_setup_from_subsurface(kg, bsd, &isect[hit], &ray);
284
285                 /* probability densities for local frame axes */
286                 float pdf_N = pick_pdf_N * fabsf(dot(disk_N, bsd->Ng));
287                 float pdf_T = pick_pdf_T * fabsf(dot(disk_T, bsd->Ng));
288                 float pdf_B = pick_pdf_B * fabsf(dot(disk_B, bsd->Ng));
289                 
290                 /* multiple importance sample between 3 axes, power heuristic
291                  * found to be slightly better than balance heuristic */
292                 float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
293
294                 /* real distance to sampled point */
295                 float r = len(bsd->P - sd->P);
296
297                 /* evaluate */
298                 float w = mis_weight / pdf_N;
299                 if(num_hits > BSSRDF_MAX_HITS)
300                         w *= num_hits/(float)BSSRDF_MAX_HITS;
301                 eval = subsurface_scatter_eval(bsd, sc, disk_r, r, all) * w;
302
303                 /* optionally blur colors and bump mapping */
304                 float3 N = bsd->N;
305                 subsurface_color_bump_blur(kg, sd, bsd, state_flag, &eval, &N);
306
307                 /* setup diffuse bsdf */
308                 subsurface_scatter_setup_diffuse_bsdf(bsd, eval, true, N);
309         }
310
311         return num_eval_hits;
312 }
313
314 /* subsurface scattering step, from a point on the surface to another nearby point on the same object */
315 __device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
316         int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
317 {
318         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
319         uint num_hits = 0;
320
321         /* pick random axis in local frame and point on disk */
322         float3 disk_N, disk_T, disk_B;
323         float pick_pdf_N, pick_pdf_T, pick_pdf_B;
324         
325         disk_N = sd->Ng;
326         make_orthonormals(disk_N, &disk_T, &disk_B);
327
328         if(disk_u < 0.5f) {
329                 pick_pdf_N = 0.5f;
330                 pick_pdf_T = 0.25f;
331                 pick_pdf_B = 0.25f;
332                 disk_u *= 2.0f;
333         }
334         else if(disk_u < 0.75f) {
335                 float3 tmp = disk_N;
336                 disk_N = disk_T;
337                 disk_T = tmp;
338                 pick_pdf_N = 0.25f;
339                 pick_pdf_T = 0.5f;
340                 pick_pdf_B = 0.25f;
341                 disk_u = (disk_u - 0.5f)*4.0f;
342         }
343         else {
344                 float3 tmp = disk_N;
345                 disk_N = disk_B;
346                 disk_B = tmp;
347                 pick_pdf_N = 0.25f;
348                 pick_pdf_T = 0.25f;
349                 pick_pdf_B = 0.5f;
350                 disk_u = (disk_u - 0.75f)*4.0f;
351         }
352
353         /* sample point on disk */
354         float phi = M_2PI_F * disk_u;
355         float disk_r = disk_v;
356         float disk_height;
357
358         bssrdf_sample(sc, disk_r, &disk_r, &disk_height);
359
360         float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
361
362         /* create ray */
363         Ray ray;
364         ray.P = sd->P + disk_N*disk_height + disk_P;
365         ray.D = -disk_N;
366         ray.t = 2.0f*disk_height;
367         ray.dP = sd->dP;
368         ray.dD = differential3_zero();
369         ray.time = sd->time;
370
371         /* intersect with the same object. if multiple intersections are
372          * found it will randomly pick one of them */
373         Intersection isect;
374         num_hits = scene_intersect_subsurface(kg, &ray, &isect, sd->object, lcg_state, 1);
375
376         /* evaluate bssrdf */
377         if(num_hits > 0) {
378                 float3 origP = sd->P;
379
380                 /* setup new shading point */
381                 shader_setup_from_subsurface(kg, sd, &isect, &ray);
382
383                 /* probability densities for local frame axes */
384                 float pdf_N = pick_pdf_N * fabsf(dot(disk_N, sd->Ng));
385                 float pdf_T = pick_pdf_T * fabsf(dot(disk_T, sd->Ng));
386                 float pdf_B = pick_pdf_B * fabsf(dot(disk_B, sd->Ng));
387                 
388                 /* multiple importance sample between 3 axes, power heuristic
389                  * found to be slightly better than balance heuristic */
390                 float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
391
392                 /* real distance to sampled point */
393                 float r = len(sd->P - origP);
394
395                 /* evaluate */
396                 float w = (mis_weight * num_hits) / pdf_N;
397                 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
398         }
399
400         /* optionally blur colors and bump mapping */
401         float3 N = sd->N;
402         subsurface_color_bump_blur(kg, sd, sd, state_flag, &eval, &N);
403
404         /* setup diffuse bsdf */
405         subsurface_scatter_setup_diffuse_bsdf(sd, eval, (num_hits > 0), N);
406 }
407
408
409 /* OLD BSSRDF */
410
411 __device float old_bssrdf_sample_distance(KernelGlobals *kg, float radius, float refl, float u)
412 {
413         int table_offset = kernel_data.bssrdf.table_offset;
414         float r = lookup_table_read_2D(kg, u, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE);
415
416         return r*radius;
417 }
418
419 #ifdef BSSRDF_MULTI_EVAL
420 __device float old_bssrdf_pdf(KernelGlobals *kg, float radius, float refl, float r)
421 {
422         if(r >= radius)
423                 return 0.0f;
424
425         /* todo: when we use the real BSSRDF this will need to be divided by the maximum
426          * radius instead of the average radius */
427         float t = r/radius;
428
429         int table_offset = kernel_data.bssrdf.table_offset + BSSRDF_PDF_TABLE_OFFSET;
430         float pdf = lookup_table_read_2D(kg, t, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE);
431
432         pdf /= radius;
433
434         return pdf;
435 }
436 #endif
437
438 #ifdef BSSRDF_MULTI_EVAL
439 __device float3 old_subsurface_scatter_multi_eval(KernelGlobals *kg, ShaderData *sd, bool hit, float refl, float *r, int num_r, bool all)
440 {
441         /* compute pdf */
442         float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
443         float pdf_sum = 0.0f;
444         float sample_weight_sum = 0.0f;
445         int num_bssrdf = 0;
446
447         for(int i = 0; i < sd->num_closure; i++) {
448                 ShaderClosure *sc = &sd->closure[i];
449                 
450                 if(CLOSURE_IS_BSSRDF(sc->type)) {
451                         float sample_weight = (all)? 1.0f: sc->sample_weight;
452
453                         /* compute pdf */
454                         float pdf = 1.0f;
455                         for(int i = 0; i < num_r; i++)
456                                 pdf *= old_bssrdf_pdf(kg, sc->data0, refl, r[i]);
457
458                         eval_sum += sc->weight*pdf;
459                         pdf_sum += sample_weight*pdf;
460
461                         sample_weight_sum += sample_weight;
462                         num_bssrdf++;
463                 }
464         }
465
466         float inv_pdf_sum;
467         
468         if(pdf_sum > 0.0f) {
469                 /* in case of non-progressive integrate we sample all bssrdf's once,
470                  * for progressive we pick one, so adjust pdf for that */
471                 if(all)
472                         inv_pdf_sum = 1.0f/pdf_sum;
473                 else
474                         inv_pdf_sum = sample_weight_sum/pdf_sum;
475         }
476         else
477                 inv_pdf_sum = 0.0f;
478
479         float3 weight = eval_sum * inv_pdf_sum;
480
481         return weight;
482 }
483 #endif
484
485 /* subsurface scattering step, from a point on the surface to another nearby point on the same object */
486 __device void old_subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int state_flag, ShaderClosure *sc, uint *lcg_state, bool all)
487 {
488         float radius = sc->data0;
489         float refl = max(average(sc->weight)*3.0f, 0.0f);
490         float r = 0.0f;
491         bool hit = false;
492         float3 weight = make_float3(1.0f, 1.0f, 1.0f);
493 #ifdef BSSRDF_MULTI_EVAL
494         float r_attempts[BSSRDF_MAX_ATTEMPTS];
495 #endif
496         int num_attempts;
497
498         /* attempt to find a hit a given number of times before giving up */
499         for(num_attempts = 0; num_attempts < kernel_data.bssrdf.num_attempts; num_attempts++) {
500                 /* random numbers for sampling */
501                 float u1 = lcg_step_float(lcg_state);
502                 float u2 = lcg_step_float(lcg_state);
503                 float u3 = lcg_step_float(lcg_state);
504                 float u4 = lcg_step_float(lcg_state);
505                 float u5 = lcg_step_float(lcg_state);
506
507                 r = old_bssrdf_sample_distance(kg, radius, refl, u5);
508 #ifdef BSSRDF_MULTI_EVAL
509                 r_attempts[num_attempts] = r;
510 #endif
511
512                 float3 p1 = sd->P + sample_uniform_sphere(u1, u2)*r;
513                 float3 p2 = sd->P + sample_uniform_sphere(u3, u4)*r;
514
515                 /* create ray */
516                 Ray ray;
517                 ray.P = p1;
518                 ray.D = normalize_len(p2 - p1, &ray.t);
519                 ray.dP = sd->dP;
520                 ray.dD = differential3_zero();
521                 ray.time = sd->time;
522
523                 /* intersect with the same object. if multiple intersections are
524                  * found it will randomly pick one of them */
525                 Intersection isect;
526                 if(scene_intersect_subsurface(kg, &ray, &isect, sd->object, lcg_state, 1) == 0)
527                         continue;
528
529                 /* setup new shading point */
530                 shader_setup_from_subsurface(kg, sd, &isect, &ray);
531
532                 hit = true;
533                 num_attempts++;
534                 break;
535         }
536
537         /* evaluate subsurface scattering closures */
538 #ifdef BSSRDF_MULTI_EVAL
539         weight *= old_subsurface_scatter_multi_eval(kg, sd, hit, refl, r_attempts, num_attempts, all);
540 #else
541         weight *= sc->weight;
542 #endif
543
544         if(!hit)
545                 weight = make_float3(0.0f, 0.0f, 0.0f);
546
547         /* optionally blur colors and bump mapping */
548         float3 N = sd->N;
549         subsurface_color_bump_blur(kg, sd, sd, state_flag, &weight, &N);
550
551         /* replace closures with a single diffuse BSDF */
552         subsurface_scatter_setup_diffuse_bsdf(sd, weight, hit, N);
553 }
554
555 __device bool old_subsurface_scatter_use(ShaderData *sd)
556 {
557         for(int i = 0; i < sd->num_closure; i++) {
558                 ShaderClosure *sc = &sd->closure[i];
559                 
560                 if(sc->type == CLOSURE_BSSRDF_COMPATIBLE_ID)
561                         return true;
562         }
563
564         return false;
565 }
566
567 CCL_NAMESPACE_END
568