6c8b7cca4ce4e37547376af7a1b851ef5d618df8
[blender.git] / intern / cycles / kernel / kernel_subsurface.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* BSSRDF using disk based importance sampling.
20  *
21  * BSSRDF Importance Sampling, SIGGRAPH 2013
22  * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
23  *
24  */
25
26 /* TODO:
27  * - test using power heuristic for combing bssrdfs
28  * - try to reduce one sample model variance
29  */
30
31 #define BSSRDF_MULTI_EVAL
32
33 ccl_device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, ShaderData *sd, float *probability)
34 {
35         /* sum sample weights of bssrdf and bsdf */
36         float bsdf_sum = 0.0f;
37         float bssrdf_sum = 0.0f;
38
39         for(int i = 0; i < sd->num_closure; i++) {
40                 ShaderClosure *sc = &sd->closure[i];
41                 
42                 if(CLOSURE_IS_BSDF(sc->type))
43                         bsdf_sum += sc->sample_weight;
44                 else if(CLOSURE_IS_BSSRDF(sc->type))
45                         bssrdf_sum += sc->sample_weight;
46         }
47
48         /* use bsdf or bssrdf? */
49         float r = sd->randb_closure*(bsdf_sum + bssrdf_sum);
50
51         if(r < bsdf_sum) {
52                 /* use bsdf, and adjust randb so we can reuse it for picking a bsdf */
53                 sd->randb_closure = r/bsdf_sum;
54                 *probability = (bsdf_sum > 0.0f)? (bsdf_sum + bssrdf_sum)/bsdf_sum: 1.0f;
55                 return NULL;
56         }
57
58         /* use bssrdf */
59         r -= bsdf_sum;
60
61         float sum = 0.0f;
62
63         for(int i = 0; i < sd->num_closure; i++) {
64                 ShaderClosure *sc = &sd->closure[i];
65                 
66                 if(CLOSURE_IS_BSSRDF(sc->type)) {
67                         sum += sc->sample_weight;
68
69                         if(r <= sum) {
70                                 sd->randb_closure = (r - (sum - sc->sample_weight))/sc->sample_weight;
71
72 #ifdef BSSRDF_MULTI_EVAL
73                                 *probability = (bssrdf_sum > 0.0f)? (bsdf_sum + bssrdf_sum)/bssrdf_sum: 1.0f;
74 #else
75                                 *probability = (bssrdf_sum > 0.0f)? (bsdf_sum + bssrdf_sum)/sc->sample_weight: 1.0f;
76 #endif
77                                 return sc;
78                         }
79                 }
80         }
81
82         /* should never happen */
83         sd->randb_closure = 0.0f;
84         *probability = 1.0f;
85         return NULL;
86 }
87
88 ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd,
89                                                  ShaderClosure *sc,
90                                                  float disk_r,
91                                                  float r,
92                                                  bool all)
93 {
94 #ifdef BSSRDF_MULTI_EVAL
95         /* this is the veach one-sample model with balance heuristic, some pdf
96          * factors drop out when using balance heuristic weighting */
97         float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
98         float pdf_sum = 0.0f;
99         float sample_weight_sum = 0.0f;
100         int num_bssrdf = 0;
101
102         for(int i = 0; i < sd->num_closure; i++) {
103                 sc = &sd->closure[i];
104                 
105                 if(CLOSURE_IS_BSSRDF(sc->type)) {
106                         float sample_weight = (all)? 1.0f: sc->sample_weight;
107                         sample_weight_sum += sample_weight;
108                 }
109         }
110
111         float sample_weight_inv = 1.0f/sample_weight_sum;
112
113         for(int i = 0; i < sd->num_closure; i++) {
114                 sc = &sd->closure[i];
115                 
116                 if(CLOSURE_IS_BSSRDF(sc->type)) {
117                         /* in case of branched path integrate we sample all bssrdf's once,
118                          * for path trace we pick one, so adjust pdf for that */
119                         float sample_weight = (all)? 1.0f: sc->sample_weight * sample_weight_inv;
120
121                         /* compute pdf */
122                         float pdf = bssrdf_pdf(sc, r);
123                         float disk_pdf = bssrdf_pdf(sc, disk_r);
124
125                         /* TODO power heuristic is not working correct here */
126                         eval_sum += sc->weight*pdf; //*sample_weight*disk_pdf;
127                         pdf_sum += sample_weight*disk_pdf; //*sample_weight*disk_pdf;
128
129                         num_bssrdf++;
130                 }
131         }
132
133         return (pdf_sum > 0.0f)? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f);
134 #else
135         float pdf = bssrdf_pdf(pick_sc, r);
136         float disk_pdf = bssrdf_pdf(pick_sc, disk_r);
137
138         return pick_sc->weight * pdf / disk_pdf;
139 #endif
140 }
141
142 /* replace closures with a single diffuse bsdf closure after scatter step */
143 ccl_device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, ShaderClosure *sc, float3 weight, bool hit, float3 N)
144 {
145         sd->flag &= ~SD_CLOSURE_FLAGS;
146         sd->randb_closure = 0.0f;
147         sd->num_closure = 0;
148         sd->num_closure_extra = 0;
149
150         if(hit) {
151                 Bssrdf *bssrdf = (Bssrdf *)sc;
152                 if(bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID) {
153                         PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), weight);
154
155                         if(bsdf) {
156                                 bsdf->N = N;
157                                 bsdf->roughness = bssrdf->roughness;
158                                 sd->flag |= bsdf_principled_diffuse_setup(bsdf);
159
160                                 /* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes
161                                 * can recognize it as not being a regular Disney principled diffuse closure */
162                                 bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
163                         }
164                 }
165                 else if(CLOSURE_IS_BSSRDF(bssrdf->type)) {
166                         DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
167
168                         if(bsdf) {
169                                 bsdf->N = N;
170                                 sd->flag |= bsdf_diffuse_setup(bsdf);
171
172                                 /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
173                                 * can recognize it as not being a regular diffuse closure */
174                                 bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
175                         }
176                 }
177         }
178 }
179
180 /* optionally do blurring of color and/or bump mapping, at the cost of a shader evaluation */
181 ccl_device float3 subsurface_color_pow(float3 color, float exponent)
182 {
183         color = max(color, make_float3(0.0f, 0.0f, 0.0f));
184
185         if(exponent == 1.0f) {
186                 /* nothing to do */
187         }
188         else if(exponent == 0.5f) {
189                 color.x = sqrtf(color.x);
190                 color.y = sqrtf(color.y);
191                 color.z = sqrtf(color.z);
192         }
193         else {
194                 color.x = powf(color.x, exponent);
195                 color.y = powf(color.y, exponent);
196                 color.z = powf(color.z, exponent);
197         }
198
199         return color;
200 }
201
202 ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
203                                            ShaderData *sd,
204                                            ccl_addr_space PathState *state,
205                                            int state_flag,
206                                            float3 *eval,
207                                            float3 *N)
208 {
209         /* average color and texture blur at outgoing point */
210         float texture_blur;
211         float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur);
212
213         /* do we have bump mapping? */
214         bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
215
216         if(bump || texture_blur > 0.0f) {
217                 /* average color and normal at incoming point */
218                 shader_eval_surface(kg, sd, NULL, state, 0.0f, state_flag, SHADER_CONTEXT_SSS);
219                 float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL);
220
221                 /* we simply divide out the average color and multiply with the average
222                  * of the other one. we could try to do this per closure but it's quite
223                  * tricky to match closures between shader evaluations, their number and
224                  * order may change, this is simpler */
225                 if(texture_blur > 0.0f) {
226                         out_color = subsurface_color_pow(out_color, texture_blur);
227                         in_color = subsurface_color_pow(in_color, texture_blur);
228
229                         *eval *= safe_divide_color(in_color, out_color);
230                 }
231         }
232 }
233
234 /* Subsurface scattering step, from a point on the surface to other
235  * nearby points on the same object.
236  */
237 ccl_device_inline int subsurface_scatter_multi_intersect(
238         KernelGlobals *kg,
239         SubsurfaceIntersection *ss_isect,
240         ShaderData *sd,
241         ShaderClosure *sc,
242         RNG *lcg_state,
243         float disk_u,
244         float disk_v,
245         bool all)
246 {
247         /* pick random axis in local frame and point on disk */
248         float3 disk_N, disk_T, disk_B;
249         float pick_pdf_N, pick_pdf_T, pick_pdf_B;
250
251         disk_N = sd->Ng;
252         make_orthonormals(disk_N, &disk_T, &disk_B);
253
254         /* reusing variable for picking the closure gives a bit nicer stratification
255          * for path tracer, for branched we do all closures so it doesn't help */
256         float axisu = (all)? disk_u: sd->randb_closure;
257
258         if(axisu < 0.5f) {
259                 pick_pdf_N = 0.5f;
260                 pick_pdf_T = 0.25f;
261                 pick_pdf_B = 0.25f;
262                 if(all)
263                         disk_u *= 2.0f;
264         }
265         else if(axisu < 0.75f) {
266                 float3 tmp = disk_N;
267                 disk_N = disk_T;
268                 disk_T = tmp;
269                 pick_pdf_N = 0.25f;
270                 pick_pdf_T = 0.5f;
271                 pick_pdf_B = 0.25f;
272                 if(all)
273                         disk_u = (disk_u - 0.5f)*4.0f;
274         }
275         else {
276                 float3 tmp = disk_N;
277                 disk_N = disk_B;
278                 disk_B = tmp;
279                 pick_pdf_N = 0.25f;
280                 pick_pdf_T = 0.25f;
281                 pick_pdf_B = 0.5f;
282                 if(all)
283                         disk_u = (disk_u - 0.75f)*4.0f;
284         }
285
286         /* sample point on disk */
287         float phi = M_2PI_F * disk_u;
288         float disk_r = disk_v;
289         float disk_height;
290
291         bssrdf_sample(sc, disk_r, &disk_r, &disk_height);
292
293         float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
294
295         /* create ray */
296 #ifdef __SPLIT_KERNEL__
297         Ray ray_object = ss_isect->ray;
298         Ray *ray = &ray_object;
299 #else
300         Ray *ray = &ss_isect->ray;
301 #endif
302         ray->P = sd->P + disk_N*disk_height + disk_P;
303         ray->D = -disk_N;
304         ray->t = 2.0f*disk_height;
305         ray->dP = sd->dP;
306         ray->dD = differential3_zero();
307         ray->time = sd->time;
308
309         /* intersect with the same object. if multiple intersections are found it
310          * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
311         scene_intersect_subsurface(kg,
312                                    *ray,
313                                    ss_isect,
314                                    sd->object,
315                                    lcg_state,
316                                    BSSRDF_MAX_HITS);
317         int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
318
319         for(int hit = 0; hit < num_eval_hits; hit++) {
320                 /* Quickly retrieve P and Ng without setting up ShaderData. */
321                 float3 hit_P;
322                 if(sd->type & PRIMITIVE_TRIANGLE) {
323                         hit_P = triangle_refine_subsurface(kg,
324                                                            sd,
325                                                            &ss_isect->hits[hit],
326                                                            ray);
327                 }
328 #ifdef __OBJECT_MOTION__
329                 else  if(sd->type & PRIMITIVE_MOTION_TRIANGLE) {
330                         float3 verts[3];
331                         motion_triangle_vertices(
332                                 kg,
333                                 sd->object,
334                                 kernel_tex_fetch(__prim_index, ss_isect->hits[hit].prim),
335                                 sd->time,
336                                 verts);
337                         hit_P = motion_triangle_refine_subsurface(kg,
338                                                                   sd,
339                                                                   &ss_isect->hits[hit],
340                                                                   ray,
341                                                                   verts);
342                 }
343 #endif  /* __OBJECT_MOTION__ */
344                 else {
345                         ss_isect->weight[hit] = make_float3(0.0f, 0.0f, 0.0f);
346                         continue;
347                 }
348
349                 float3 hit_Ng = ss_isect->Ng[hit];
350                 if(ss_isect->hits[hit].object != OBJECT_NONE) {
351                         object_normal_transform(kg, sd, &hit_Ng);
352                 }
353
354                 /* probability densities for local frame axes */
355                 float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
356                 float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
357                 float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
358
359                 /* multiple importance sample between 3 axes, power heuristic
360                  * found to be slightly better than balance heuristic */
361                 float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
362
363                 /* real distance to sampled point */
364                 float r = len(hit_P - sd->P);
365
366                 /* evaluate */
367                 float w = mis_weight / pdf_N;
368                 if(ss_isect->num_hits > BSSRDF_MAX_HITS)
369                         w *= ss_isect->num_hits/(float)BSSRDF_MAX_HITS;
370                 float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
371
372                 ss_isect->weight[hit] = eval;
373         }
374
375 #ifdef __SPLIT_KERNEL__
376         ss_isect->ray = *ray;
377 #endif
378
379         return num_eval_hits;
380 }
381
382 ccl_device_noinline void subsurface_scatter_multi_setup(
383         KernelGlobals *kg,
384         SubsurfaceIntersection* ss_isect,
385         int hit,
386         ShaderData *sd,
387         ccl_addr_space PathState *state,
388         int state_flag,
389         ShaderClosure *sc,
390         bool all)
391 {
392 #ifdef __SPLIT_KERNEL__
393         Ray ray_object = ss_isect->ray;
394         Ray *ray = &ray_object;
395 #else
396         Ray *ray = &ss_isect->ray;
397 #endif
398         /* Setup new shading point. */
399         shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray);
400
401         /* Optionally blur colors and bump mapping. */
402         float3 weight = ss_isect->weight[hit];
403         float3 N = sd->N;
404         subsurface_color_bump_blur(kg, sd, state, state_flag, &weight, &N);
405
406         /* Setup diffuse BSDF. */
407         subsurface_scatter_setup_diffuse_bsdf(sd, sc, weight, true, N);
408 }
409
410 #ifndef __SPLIT_KERNEL__
411 /* subsurface scattering step, from a point on the surface to another nearby point on the same object */
412 ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathState *state,
413         int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
414 {
415         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
416
417         /* pick random axis in local frame and point on disk */
418         float3 disk_N, disk_T, disk_B;
419         float pick_pdf_N, pick_pdf_T, pick_pdf_B;
420
421         disk_N = sd->Ng;
422         make_orthonormals(disk_N, &disk_T, &disk_B);
423
424         if(sd->randb_closure < 0.5f) {
425                 pick_pdf_N = 0.5f;
426                 pick_pdf_T = 0.25f;
427                 pick_pdf_B = 0.25f;
428         }
429         else if(sd->randb_closure < 0.75f) {
430                 float3 tmp = disk_N;
431                 disk_N = disk_T;
432                 disk_T = tmp;
433                 pick_pdf_N = 0.25f;
434                 pick_pdf_T = 0.5f;
435                 pick_pdf_B = 0.25f;
436         }
437         else {
438                 float3 tmp = disk_N;
439                 disk_N = disk_B;
440                 disk_B = tmp;
441                 pick_pdf_N = 0.25f;
442                 pick_pdf_T = 0.25f;
443                 pick_pdf_B = 0.5f;
444         }
445
446         /* sample point on disk */
447         float phi = M_2PI_F * disk_u;
448         float disk_r = disk_v;
449         float disk_height;
450
451         bssrdf_sample(sc, disk_r, &disk_r, &disk_height);
452
453         float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
454
455         /* create ray */
456         Ray ray;
457         ray.P = sd->P + disk_N*disk_height + disk_P;
458         ray.D = -disk_N;
459         ray.t = 2.0f*disk_height;
460         ray.dP = sd->dP;
461         ray.dD = differential3_zero();
462         ray.time = sd->time;
463
464         /* intersect with the same object. if multiple intersections are
465          * found it will randomly pick one of them */
466         SubsurfaceIntersection ss_isect;
467         scene_intersect_subsurface(kg, ray, &ss_isect, sd->object, lcg_state, 1);
468
469         /* evaluate bssrdf */
470         if(ss_isect.num_hits > 0) {
471                 float3 origP = sd->P;
472
473                 /* setup new shading point */
474                 shader_setup_from_subsurface(kg, sd, &ss_isect.hits[0], &ray);
475
476                 /* probability densities for local frame axes */
477                 float pdf_N = pick_pdf_N * fabsf(dot(disk_N, sd->Ng));
478                 float pdf_T = pick_pdf_T * fabsf(dot(disk_T, sd->Ng));
479                 float pdf_B = pick_pdf_B * fabsf(dot(disk_B, sd->Ng));
480
481                 /* multiple importance sample between 3 axes, power heuristic
482                  * found to be slightly better than balance heuristic */
483                 float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
484
485                 /* real distance to sampled point */
486                 float r = len(sd->P - origP);
487
488                 /* evaluate */
489                 float w = (mis_weight * ss_isect.num_hits) / pdf_N;
490                 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
491         }
492
493         /* optionally blur colors and bump mapping */
494         float3 N = sd->N;
495         subsurface_color_bump_blur(kg, sd, state, state_flag, &eval, &N);
496
497         /* setup diffuse bsdf */
498         subsurface_scatter_setup_diffuse_bsdf(sd, sc, eval, (ss_isect.num_hits > 0), N);
499 }
500 #endif /* ! __SPLIT_KERNEL__ */
501
502 CCL_NAMESPACE_END
503