2 * Copyright 2011-2013 Blender Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef __KERNEL_BSSRDF_H__
18 #define __KERNEL_BSSRDF_H__
22 typedef ccl_addr_space struct Bssrdf {
33 /* Planar Truncated Gaussian
35 * Note how this is different from the typical gaussian, this one integrates
36 * to 1 over the plane (where you get an extra 2*pi*x factor). We are lucky
37 * that integrating x*exp(-x) gives a nice closed form solution. */
39 /* paper suggests 1/12.46 which is much too small, suspect it's *12.46 */
40 #define GAUSS_TRUNCATE 12.46f
42 ccl_device float bssrdf_gaussian_eval(const float radius, float r)
44 /* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm
45 * = 1 - exp(-Rm*Rm/(2*v)) */
46 const float v = radius*radius*(0.25f*0.25f);
47 const float Rm = sqrtf(v*GAUSS_TRUNCATE);
52 return expf(-r*r/(2.0f*v))/(2.0f*M_PI_F*v);
55 ccl_device float bssrdf_gaussian_pdf(const float radius, float r)
57 /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
58 const float area_truncated = 1.0f - expf(-0.5f*GAUSS_TRUNCATE);
60 return bssrdf_gaussian_eval(radius, r) * (1.0f/(area_truncated));
63 ccl_device void bssrdf_gaussian_sample(const float radius, float xi, float *r, float *h)
65 /* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v))
66 * r = sqrt(-2*v*logf(xi)) */
67 const float v = radius*radius*(0.25f*0.25f);
68 const float Rm = sqrtf(v*GAUSS_TRUNCATE);
70 /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
71 const float area_truncated = 1.0f - expf(-0.5f*GAUSS_TRUNCATE);
74 const float r_squared = -2.0f*v*logf(1.0f - xi*area_truncated);
75 *r = sqrtf(r_squared);
77 /* h^2 + r^2 = Rm^2 */
78 *h = safe_sqrtf(Rm*Rm - r_squared);
81 /* Planar Cubic BSSRDF falloff
83 * This is basically (Rm - x)^3, with some factors to normalize it. For sampling
84 * we integrate 2*pi*x * (Rm - x)^3, which gives us a quintic equation that as
85 * far as I can tell has no closed form solution. So we get an iterative solution
86 * instead with newton-raphson. */
88 ccl_device float bssrdf_cubic_eval(const float radius, const float sharpness, float r)
90 if(sharpness == 0.0f) {
91 const float Rm = radius;
96 /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */
97 const float Rm5 = (Rm*Rm) * (Rm*Rm) * Rm;
98 const float f = Rm - r;
99 const float num = f*f*f;
101 return (10.0f * num) / (Rm5 * M_PI_F);
105 float Rm = radius*(1.0f + sharpness);
110 /* custom variation with extra sharpness, to match the previous code */
111 const float y = 1.0f/(1.0f + sharpness);
112 float Rmy, ry, ryinv;
114 if(sharpness == 1.0f) {
117 ryinv = (ry > 0.0f)? 1.0f/ry: 0.0f;
122 ryinv = (r > 0.0f)? powf(r, y - 1.0f): 0.0f;
125 const float Rmy5 = (Rmy*Rmy) * (Rmy*Rmy) * Rmy;
126 const float f = Rmy - ry;
127 const float num = f*(f*f)*(y*ryinv);
129 return (10.0f * num) / (Rmy5 * M_PI_F);
133 ccl_device float bssrdf_cubic_pdf(const float radius, const float sharpness, float r)
135 return bssrdf_cubic_eval(radius, sharpness, r);
138 /* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */
139 ccl_device_forceinline float bssrdf_cubic_quintic_root_find(float xi)
141 /* newton-raphson iteration, usually succeeds in 2-4 iterations, except
142 * outside 0.02 ... 0.98 where it can go up to 10, so overall performance
143 * should not be too bad */
144 const float tolerance = 1e-6f;
145 const int max_iteration_count = 10;
149 for(i = 0; i < max_iteration_count; i++) {
152 float nx = (1.0f - x);
154 float f = 10.0f*x2 - 20.0f*x3 + 15.0f*x2*x2 - 4.0f*x2*x3 - xi;
155 float f_ = 20.0f*(x*nx)*(nx*nx);
157 if(fabsf(f) < tolerance || f_ == 0.0f)
160 x = saturate(x - f/f_);
166 ccl_device void bssrdf_cubic_sample(const float radius, const float sharpness, float xi, float *r, float *h)
169 float r_ = bssrdf_cubic_quintic_root_find(xi);
171 if(sharpness != 0.0f) {
172 r_ = powf(r_, 1.0f + sharpness);
173 Rm *= (1.0f + sharpness);
179 /* h^2 + r^2 = Rm^2 */
180 *h = safe_sqrtf(Rm*Rm - r_*r_);
183 /* Approximate Reflectance Profiles
184 * http://graphics.pixar.com/library/ApproxBSSRDF/paper.pdf
187 /* This is a bit arbitrary, just need big enough radius so it matches
188 * the mean free length, but still not too big so sampling is still
189 * effective. Might need some further tweaks.
191 #define BURLEY_TRUNCATE 16.0f
192 #define BURLEY_TRUNCATE_CDF 0.9963790093708328f // cdf(BURLEY_TRUNCATE)
194 ccl_device_inline float bssrdf_burley_fitting(float A)
196 /* Diffuse surface transmission, equation (6). */
197 return 1.9f - A + 3.5f * (A - 0.8f) * (A - 0.8f);
200 /* Scale mean free path length so it gives similar looking result
201 * to Cubic and Gaussian models.
203 ccl_device_inline float3 bssrdf_burley_compatible_mfp(float3 r)
205 return 0.25f * M_1_PI_F * r;
208 ccl_device void bssrdf_burley_setup(Bssrdf *bssrdf)
210 /* Mean free path length. */
211 const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius);
212 /* Surface albedo. */
213 const float3 A = bssrdf->albedo;
214 const float3 s = make_float3(bssrdf_burley_fitting(A.x),
215 bssrdf_burley_fitting(A.y),
216 bssrdf_burley_fitting(A.z));
218 bssrdf->radius = l / s;
221 ccl_device float bssrdf_burley_eval(const float d, float r)
223 const float Rm = BURLEY_TRUNCATE * d;
228 /* Burley refletance profile, equation (3).
231 * - Surface albedo is already included into sc->weight, no need to
232 * multiply by this term here.
233 * - This is normalized diffuse model, so the equation is mutliplied
234 * by 2*pi, which also matches cdf().
236 float exp_r_3_d = expf(-r / (3.0f * d));
237 float exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d;
238 return (exp_r_d + exp_r_3_d) / (4.0f*d);
241 ccl_device float bssrdf_burley_pdf(const float d, float r)
243 return bssrdf_burley_eval(d, r) * (1.0f/BURLEY_TRUNCATE_CDF);
246 /* Find the radius for desired CDF value.
247 * Returns scaled radius, meaning the result is to be scaled up by d.
248 * Since there's no closed form solution we do Newton-Raphson method to find it.
250 ccl_device_forceinline float bssrdf_burley_root_find(float xi)
252 const float tolerance = 1e-6f;
253 const int max_iteration_count = 10;
254 /* Do initial guess based on manual curve fitting, this allows us to reduce
255 * number of iterations to maximum 4 across the [0..1] range. We keep maximum
256 * number of iteration higher just to be sure we didn't miss root in some
261 r = expf(xi * xi * 2.4f) - 1.0f;
264 /* TODO(sergey): Some nicer curve fit is possible here. */
267 /* Solve against scaled radius. */
268 for(int i = 0; i < max_iteration_count; i++) {
269 float exp_r_3 = expf(-r / 3.0f);
270 float exp_r = exp_r_3 * exp_r_3 * exp_r_3;
271 float f = 1.0f - 0.25f * exp_r - 0.75f * exp_r_3 - xi;
272 float f_ = 0.25f * exp_r + 0.25f * exp_r_3;
274 if(fabsf(f) < tolerance || f_ == 0.0f) {
286 ccl_device void bssrdf_burley_sample(const float d,
291 const float Rm = BURLEY_TRUNCATE * d;
292 const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d;
296 /* h^2 + r^2 = Rm^2 */
297 *h = safe_sqrtf(Rm*Rm - r_*r_);
300 /* None BSSRDF falloff
302 * Samples distributed over disk with no falloff, for reference. */
304 ccl_device float bssrdf_none_eval(const float radius, float r)
306 const float Rm = radius;
307 return (r < Rm)? 1.0f: 0.0f;
310 ccl_device float bssrdf_none_pdf(const float radius, float r)
312 /* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */
313 const float Rm = radius;
314 const float area = (M_PI_F*Rm*Rm);
316 return bssrdf_none_eval(radius, r) / area;
319 ccl_device void bssrdf_none_sample(const float radius, float xi, float *r, float *h)
321 /* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2
323 const float Rm = radius;
324 const float r_ = sqrtf(xi)*Rm;
328 /* h^2 + r^2 = Rm^2 */
329 *h = safe_sqrtf(Rm*Rm - r_*r_);
334 ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight)
336 Bssrdf *bssrdf = (Bssrdf*)closure_alloc(sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
342 float sample_weight = fabsf(average(weight));
343 bssrdf->sample_weight = sample_weight;
344 return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? bssrdf : NULL;
347 ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type)
350 int bssrdf_channels = 3;
351 float3 diffuse_weight = make_float3(0.0f, 0.0f, 0.0f);
353 /* Verify if the radii are large enough to sample without precision issues. */
354 if(bssrdf->radius.x < BSSRDF_MIN_RADIUS) {
355 diffuse_weight.x = bssrdf->weight.x;
356 bssrdf->weight.x = 0.0f;
357 bssrdf->radius.x = 0.0f;
360 if(bssrdf->radius.y < BSSRDF_MIN_RADIUS) {
361 diffuse_weight.y = bssrdf->weight.y;
362 bssrdf->weight.y = 0.0f;
363 bssrdf->radius.y = 0.0f;
366 if(bssrdf->radius.z < BSSRDF_MIN_RADIUS) {
367 diffuse_weight.z = bssrdf->weight.z;
368 bssrdf->weight.z = 0.0f;
369 bssrdf->radius.z = 0.0f;
373 if(bssrdf_channels < 3) {
374 /* Add diffuse BSDF if any radius too small. */
375 #ifdef __PRINCIPLED__
376 if(type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
377 type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
379 float roughness = bssrdf->roughness;
380 float3 N = bssrdf->N;
382 PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), diffuse_weight);
385 bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
387 bsdf->roughness = roughness;
388 flag |= bsdf_principled_diffuse_setup(bsdf);
392 #endif /* __PRINCIPLED__ */
394 DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), diffuse_weight);
397 bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
399 flag |= bsdf_diffuse_setup(bsdf);
404 /* Setup BSSRDF if radius is large enough. */
405 if(bssrdf_channels > 0) {
407 bssrdf->channels = bssrdf_channels;
408 bssrdf->sample_weight = fabsf(average(bssrdf->weight)) * bssrdf->channels;
409 bssrdf->texture_blur = saturate(bssrdf->texture_blur);
410 bssrdf->sharpness = saturate(bssrdf->sharpness);
412 if(type == CLOSURE_BSSRDF_BURLEY_ID ||
413 type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
414 type == CLOSURE_BSSRDF_RANDOM_WALK_ID ||
415 type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
417 bssrdf_burley_setup(bssrdf);
424 bssrdf->sample_weight = 0.0f;
430 ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float *h)
432 const Bssrdf *bssrdf = (const Bssrdf*)sc;
435 /* Sample color channel and reuse random number. Only a subset of channels
436 * may be used if their radius was too small to handle as BSSRDF. */
437 xi *= bssrdf->channels;
440 radius = (bssrdf->radius.x > 0.0f)? bssrdf->radius.x:
441 (bssrdf->radius.y > 0.0f)? bssrdf->radius.y:
446 radius = (bssrdf->radius.x > 0.0f)? bssrdf->radius.y:
451 radius = bssrdf->radius.z;
455 if(bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
456 bssrdf_cubic_sample(radius, bssrdf->sharpness, xi, r, h);
458 else if(bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID){
459 bssrdf_gaussian_sample(radius, xi, r, h);
461 else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
462 bssrdf_burley_sample(radius, xi, r, h);
466 ccl_device float bssrdf_channel_pdf(const Bssrdf *bssrdf, float radius, float r)
471 else if(bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
472 return bssrdf_cubic_pdf(radius, bssrdf->sharpness, r);
474 else if(bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
475 return bssrdf_gaussian_pdf(radius, r);
477 else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
478 return bssrdf_burley_pdf(radius, r);
482 ccl_device_forceinline float3 bssrdf_eval(const ShaderClosure *sc, float r)
484 const Bssrdf *bssrdf = (const Bssrdf*)sc;
487 bssrdf_channel_pdf(bssrdf, bssrdf->radius.x, r),
488 bssrdf_channel_pdf(bssrdf, bssrdf->radius.y, r),
489 bssrdf_channel_pdf(bssrdf, bssrdf->radius.z, r));
492 ccl_device_forceinline float bssrdf_pdf(const ShaderClosure *sc, float r)
494 const Bssrdf *bssrdf = (const Bssrdf*)sc;
495 float3 pdf = bssrdf_eval(sc, r);
497 return (pdf.x + pdf.y + pdf.z) / bssrdf->channels;
502 #endif /* __KERNEL_BSSRDF_H__ */