Cycles: Subsurface Scattering
authorBrecht Van Lommel <brechtvanlommel@pandora.be>
Sun, 18 Aug 2013 14:15:57 +0000 (14:15 +0000)
committerBrecht Van Lommel <brechtvanlommel@pandora.be>
Sun, 18 Aug 2013 14:15:57 +0000 (14:15 +0000)
New features:

* Bump mapping now works with SSS
* Texture Blur factor for SSS, see the documentation for details:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Nodes/Shaders#Subsurface_Scattering

Work in progress for feedback:

Initial implementation of the "BSSRDF Importance Sampling" paper, which uses
a different importance sampling method. It gives better quality results in
many ways, with the availability of both Cubic and Gaussian falloff functions,
but also tends to be more noisy when using the progressive integrator and does
not give great results with some geometry. It works quite well for the
non-progressive integrator and is often less noisy there.

This code may still change a lot, so unless you're testing it may be best to
stick to the Compatible falloff function.

Skin test render and file that takes advantage of the gaussian falloff:
http://www.pasteall.org/pic/show.php?id=57661
http://www.pasteall.org/pic/show.php?id=57662
http://www.pasteall.org/blend/23501

36 files changed:
intern/cycles/blender/blender_object.cpp
intern/cycles/blender/blender_shader.cpp
intern/cycles/kernel/closure/bsdf_microfacet.h
intern/cycles/kernel/closure/bssrdf.h
intern/cycles/kernel/kernel_bvh.h
intern/cycles/kernel/kernel_bvh_subsurface.h [new file with mode: 0644]
intern/cycles/kernel/kernel_bvh_traversal.h
intern/cycles/kernel/kernel_montecarlo.h
intern/cycles/kernel/kernel_path.h
intern/cycles/kernel/kernel_random.h
intern/cycles/kernel/kernel_shader.h
intern/cycles/kernel/kernel_subsurface.h
intern/cycles/kernel/kernel_types.h
intern/cycles/kernel/osl/osl_bssrdf.cpp
intern/cycles/kernel/osl/osl_bssrdf.h
intern/cycles/kernel/osl/osl_closures.cpp
intern/cycles/kernel/osl/osl_closures.h
intern/cycles/kernel/osl/osl_shader.cpp
intern/cycles/kernel/shaders/node_subsurface_scattering.osl
intern/cycles/kernel/shaders/stdosl.h
intern/cycles/kernel/svm/svm_closure.h
intern/cycles/kernel/svm/svm_types.h
intern/cycles/render/bssrdf.cpp
intern/cycles/render/graph.h
intern/cycles/render/nodes.cpp
intern/cycles/render/nodes.h
intern/cycles/render/osl.cpp
intern/cycles/render/shader.cpp
intern/cycles/render/shader.h
intern/cycles/render/svm.cpp
intern/cycles/util/util_math.h
source/blender/editors/space_node/drawnode.c
source/blender/makesdna/DNA_node_types.h
source/blender/makesrna/intern/rna_nodetree.c
source/blender/nodes/NOD_static_types.h
source/blender/nodes/shader/nodes/node_shader_subsurface_scattering.c

index 3410c73fbde5a02e25a8805e641850b5ad44e771..08ad2bab22e3809656dc72b6aa81a17498683056 100644 (file)
@@ -298,7 +298,7 @@ Object *BlenderSync::sync_object(BL::Object b_parent, int persistent_id[OBJECT_P
                        object->random_id = hash_int_2d(object->random_id, 0);
 
                /* visibility flags for both parent */
-               object->visibility = object_ray_visibility(b_ob) & PATH_RAY_ALL;
+               object->visibility = object_ray_visibility(b_ob) & PATH_RAY_ALL_VISIBILITY;
                if(b_parent.ptr.data != b_ob.ptr.data) {
                        object->visibility &= object_ray_visibility(b_parent);
                        object->random_id ^= hash_int(hash_string(b_parent.name().c_str()));
@@ -306,7 +306,7 @@ Object *BlenderSync::sync_object(BL::Object b_parent, int persistent_id[OBJECT_P
 
                /* make holdout objects on excluded layer invisible for non-camera rays */
                if(use_holdout && (layer_flag & render_layer.exclude_layer))
-                       object->visibility &= ~(PATH_RAY_ALL - PATH_RAY_CAMERA);
+                       object->visibility &= ~(PATH_RAY_ALL_VISIBILITY - PATH_RAY_CAMERA);
 
                /* camera flag is not actually used, instead is tested
                 * against render layer flags */
index 469ba15d291fa7a7746f892495faf18935f08f95..b5ea46e096aa746f357350f2947fe2752de485d3 100644 (file)
@@ -317,7 +317,23 @@ static ShaderNode *add_node(Scene *scene, BL::BlendData b_data, BL::Scene b_scen
                node = new DiffuseBsdfNode();
        }
        else if (b_node.is_a(&RNA_ShaderNodeSubsurfaceScattering)) {
-               node = new SubsurfaceScatteringNode();
+               BL::ShaderNodeSubsurfaceScattering b_subsurface_node(b_node);
+
+               SubsurfaceScatteringNode *subsurface = new SubsurfaceScatteringNode();
+
+               switch(b_subsurface_node.falloff()) {
+               case BL::ShaderNodeSubsurfaceScattering::falloff_COMPATIBLE:
+                       subsurface->closure = CLOSURE_BSSRDF_COMPATIBLE_ID;
+                       break;
+               case BL::ShaderNodeSubsurfaceScattering::falloff_CUBIC:
+                       subsurface->closure = CLOSURE_BSSRDF_CUBIC_ID;
+                       break;
+               case BL::ShaderNodeSubsurfaceScattering::falloff_GAUSSIAN:
+                       subsurface->closure = CLOSURE_BSSRDF_GAUSSIAN_ID;
+                       break;
+               }
+
+               node = subsurface;
        }
        else if (b_node.is_a(&RNA_ShaderNodeBsdfGlossy)) {
                BL::ShaderNodeBsdfGlossy b_glossy_node(b_node);
index 915b9eafbc1f25cd1eb32d69d0491b04ce265cdc..b159f585831000e129acb68005ddb7e336d2eda3 100644 (file)
@@ -37,11 +37,6 @@ CCL_NAMESPACE_BEGIN
 
 /* GGX */
 
-__device_inline float safe_sqrtf(float f)
-{
-       return sqrtf(max(f, 0.0f));
-}
-
 __device int bsdf_microfacet_ggx_setup(ShaderClosure *sc)
 {
        sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* m_ag */
index 486de4ca65f617c1e4c0fed13c3ca5ac10b98387..23b932a91c6a858e94064d52adc577f8f70e31f8 100644 (file)
 
 CCL_NAMESPACE_BEGIN
 
-__device int bssrdf_setup(ShaderClosure *sc)
+__device int bssrdf_setup(ShaderClosure *sc, ClosureType type)
 {
        if(sc->data0 < BSSRDF_MIN_RADIUS) {
                /* revert to diffuse BSDF if radius too small */
                sc->data0 = 0.0f;
                sc->data1 = 0.0f;
-               return bsdf_diffuse_setup(sc);
+               int flag = bsdf_diffuse_setup(sc);
+               sc->type = CLOSURE_BSDF_BSSRDF_ID;
+               return flag;
        }
        else {
-               /* IOR param */
-               sc->data1 = max(sc->data1, 1.0f);
-               sc->type = CLOSURE_BSSRDF_ID;
+               sc->data1 = clamp(sc->data1, 0.0f, 1.0f); /* texture blur */
+               sc->type = type;
 
                return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSSRDF;
        }
 }
 
-/* Simple Cubic BSSRDF falloff */
+/* Planar Truncated Gaussian
+ *
+ * Note how this is different from the typical gaussian, this one integrates
+ * to 1 over the plane (where you get an extra 2*pi*x factor). We are lucky
+ * that integrating x*exp(-x) gives a nice closed form solution. */
+
+/* paper suggests 1/12.46 which is much too small, suspect it's *12.46 */
+#define GAUSS_TRUNCATE 12.46f
 
-__device float bssrdf_cubic(float ld, float r)
+__device float bssrdf_gaussian_eval(ShaderClosure *sc, float r)
 {
-       if(ld == 0.0f)
-               return (r == 0.0f)? 1.0f: 0.0f;
+       /* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm
+        * = 1 - exp(-Rm*Rm/(2*v)) */
+       const float v = sc->data0;
+       const float Rm = sqrtf(v*GAUSS_TRUNCATE);
+
+       if(r >= Rm)
+               return 0.0f;
 
-       return powf(ld - min(r, ld), 3.0f) * 4.0f/powf(ld, 4.0f);
+       return expf(-r*r/(2.0f*v))/(2.0f*M_PI_F*v);
 }
 
-/* Original BSSRDF fallof function */
-
-typedef struct BSSRDFParams {
-       float eta;              /* index of refraction */
-       float sigma_t_; /* reduced extinction coefficient */
-       float sigma_tr; /* effective extinction coefficient */
-       float Fdr;              /* diffuse fresnel reflectance */
-       float D;                /* diffusion constant */
-       float A;
-       float alpha_;   /* reduced albedo */
-       float zr;               /* distance of virtual lightsource above surface */
-       float zv;               /* distance of virtual lightsource below surface */
-       float ld;               /* mean free path */
-       float ro;               /* diffuse reflectance */
-} BSSRDFParams;
-
-__device float bssrdf_reduced_albedo_Rd(float alpha_, float A, float ro)
+__device float bssrdf_gaussian_pdf(ShaderClosure *sc, float r)
 {
-       float sq;
+       /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
+       const float area_truncated = 1.0f - expf(-0.5f*GAUSS_TRUNCATE);
+
+       return bssrdf_gaussian_eval(sc, r) * (1.0f/(area_truncated));
+}
+
+__device void bssrdf_gaussian_sample(ShaderClosure *sc, float xi, float *r, float *h)
+{
+       /* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v))
+        * r = sqrt(-2*v*logf(xi)) */
+
+       const float v = sc->data0;
+       const float Rm = sqrtf(v*GAUSS_TRUNCATE);
+
+       /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
+       const float area_truncated = 1.0f - expf(-0.5f*GAUSS_TRUNCATE);
+
+       /* r(xi) */
+       const float r_squared = -2.0f*v*logf(1.0f - xi*area_truncated);
+       *r = sqrtf(r_squared);
+
+        /* h^2 + r^2 = Rm^2 */
+        *h = sqrtf(Rm*Rm - r_squared);
+}
+
+/* Planar Cubic BSSRDF falloff
+ *
+ * This is basically (Rm - x)^3, with some factors to normalize it. For sampling
+ * we integrate 2*pi*x * (Rm - x)^3, which gives us a quintic equation that as
+ * far as I can tell has no closed form solution. So we get an iterative solution
+ * instead with newton-raphson. */
+
+__device float bssrdf_cubic_eval(ShaderClosure *sc, float r)
+{
+       const float Rm = sc->data0;
+
+       if(r >= Rm)
+               return 0.0f;
+       
+       /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */
+       const float Rm5 = (Rm*Rm) * (Rm*Rm) * Rm;
+       const float f = Rm - min(r, Rm);
+       const float f3 = f*f*f;
 
-       sq = sqrtf(3.0f*(1.0f - alpha_));
-       return (alpha_/2.0f)*(1.0f + expf((-4.0f/3.0f)*A*sq))*expf(-sq) - ro;
+       return (f3 * 10.0f) / (Rm5 * M_PI_F);
 }
 
-__device float bssrdf_compute_reduced_albedo(float A, float ro)
+__device float bssrdf_cubic_pdf(ShaderClosure *sc, float r)
 {
-       const float tolerance = 1e-8f;
-       const int max_iteration_count = 20;
-       float d, fsub, xn_1 = 0.0f, xn = 1.0f, fxn, fxn_1;
+       return bssrdf_cubic_eval(sc, r);
+}
+
+/* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */
+__device float bssrdf_cubic_quintic_root_find(float xi)
+{
+       /* newton-raphson iteration, usually succeeds in 2-4 iterations, except
+        * outside 0.02 ... 0.98 where it can go up to 10, so overall performance
+        * should not be too bad */
+       const float tolerance = 1e-6f;
+       const int max_iteration_count = 10;
+       float x = 0.25f;
        int i;
 
-       /* use secant method to compute reduced albedo using Rd function inverse
-        * with a given reflectance */
-       fxn = bssrdf_reduced_albedo_Rd(xn, A, ro);
-       fxn_1 = bssrdf_reduced_albedo_Rd(xn_1, A, ro);
+       for (i = 0; i < max_iteration_count; i++) {
+               float x2 = x*x;
+               float x3 = x2*x;
+               float nx = (1.0f - x);
 
-       for (i= 0; i < max_iteration_count; i++) {
-               fsub = (fxn - fxn_1);
-               if (fabsf(fsub) < tolerance)
-                       break;
-               d = ((xn - xn_1)/fsub)*fxn;
-               if (fabsf(d) < tolerance)
-                       break;
+               float f = 10.0f*x2 - 20.0f*x3 + 15.0f*x2*x2 - 4.0f*x2*x3 - xi;
+               float f_ = 20.0f*(x*nx)*(nx*nx);
 
-               xn_1 = xn;
-               fxn_1 = fxn;
-               xn = xn - d;
+               if(fabsf(f) < tolerance || f_ == 0.0f)
+                       break;
 
-               if (xn > 1.0f) xn = 1.0f;
-               if (xn_1 > 1.0f) xn_1 = 1.0f;
-               
-               fxn = bssrdf_reduced_albedo_Rd(xn, A, ro);
+               x = clamp(x - f/f_, 0.0f, 1.0f);
        }
 
-       /* avoid division by zero later */
-       if (xn <= 0.0f)
-               xn = 0.00001f;
-
-       return xn;
+       return x;
 }
 
-__device void bssrdf_setup_params(BSSRDFParams *ss, float refl, float radius, float ior)
+__device void bssrdf_cubic_sample(ShaderClosure *sc, float xi, float *r, float *h)
 {
-       ss->eta = ior;
-       ss->Fdr = -1.440f/ior*ior + 0.710f/ior + 0.668f + 0.0636f*ior;
-       ss->A = (1.0f + ss->Fdr)/(1.0f - ss->Fdr);
-       ss->ld = radius;
-       ss->ro = min(refl, 0.999f);
+       const float Rm = sc->data0;
+       const float r_ = bssrdf_cubic_quintic_root_find(xi) * Rm;
 
-       ss->alpha_ = bssrdf_compute_reduced_albedo(ss->A, ss->ro);
+       *r = r_;
 
-       ss->sigma_tr = 1.0f/ss->ld;
-       ss->sigma_t_ = ss->sigma_tr/sqrtf(3.0f*(1.0f - ss->alpha_));
+       /* h^2 + r^2 = Rm^2 */
+       *h = sqrtf(Rm*Rm - r_*r_);
+}
 
-       ss->D = 1.0f/(3.0f*ss->sigma_t_);
+/* None BSSRDF falloff
+ * 
+ * Samples distributed over disk with no falloff, for reference. */
 
-       ss->zr = 1.0f/ss->sigma_t_;
-       ss->zv = ss->zr + 4.0f*ss->A*ss->D;
+__device float bssrdf_none_eval(ShaderClosure *sc, float r)
+{
+       const float Rm = sc->data0;
+       return (r < Rm)? 1.0f: 0.0f;
 }
 
-/* exponential falloff function */
+__device float bssrdf_none_pdf(ShaderClosure *sc, float r)
+{
+       /* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */
+       const float Rm = sc->data0;
+       const float area = (M_PI_F*Rm*Rm);
+
+       return bssrdf_none_eval(sc, r) / area;
+}
 
-__device float bssrdf_original(const BSSRDFParams *ss, float r)
+__device void bssrdf_none_sample(ShaderClosure *sc, float xi, float *r, float *h)
 {
-       if(ss->ld == 0.0f)
-               return (r == 0.0f)? 1.0f: 0.0f;
+       /* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2
+        * r = sqrt(xi)*Rm */
+       const float Rm = sc->data0;
+       const float r_ = sqrtf(xi)*Rm;
+
+       *r = r_;
 
-       float rr = r*r;
-       float sr, sv, Rdr, Rdv;
+       /* h^2 + r^2 = Rm^2 */
+       *h = sqrtf(Rm*Rm - r_*r_);
+}
 
-       sr = sqrtf(rr + ss->zr*ss->zr);
-       sv = sqrtf(rr + ss->zv*ss->zv);
+/* Generic */
 
-       Rdr = ss->zr*(1.0f + ss->sigma_tr*sr)*expf(-ss->sigma_tr*sr)/(sr*sr*sr);
-       Rdv = ss->zv*(1.0f + ss->sigma_tr*sv)*expf(-ss->sigma_tr*sv)/(sv*sv*sv);
+__device void bssrdf_sample(ShaderClosure *sc, float xi, float *r, float *h)
+{
+       if(sc->type == CLOSURE_BSSRDF_CUBIC_ID)
+               bssrdf_cubic_sample(sc, xi, r, h);
+       else
+               bssrdf_gaussian_sample(sc, xi, r, h);
+}
 
-       return ss->alpha_*(1.0f/M_4PI_F)*(Rdr + Rdv);
+__device float bssrdf_pdf(ShaderClosure *sc, float r)
+{
+       if(sc->type == CLOSURE_BSSRDF_CUBIC_ID)
+               return bssrdf_cubic_pdf(sc, r);
+       else
+               return bssrdf_gaussian_pdf(sc, r);
 }
 
 CCL_NAMESPACE_END
index f0f1fcd4c0a6ad8bf4fee3b2ce865fd84ebc7345..4cc92254b016ec3fdf6b6c132c495ae985ff7b4c 100644 (file)
@@ -488,7 +488,7 @@ __device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersectio
 
                        /*stochastic fade from minimum width*/
                        if(lcg_state && coverage != 1.0f) {
-                               if(lcg_step(lcg_state) > coverage)
+                               if(lcg_step_float(lcg_state) > coverage)
                                        return hit;
                        }
 
@@ -640,7 +640,7 @@ __device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect,
                float adjradius = or1 + z * (or2 - or1) / l;
                adjradius = adjradius / (r1 + z * gd);
                if(lcg_state && adjradius != 1.0f) {
-                       if(lcg_step(lcg_state) > adjradius)
+                       if(lcg_step_float(lcg_state) > adjradius)
                                return false;
                }
                /* --- */
@@ -690,8 +690,8 @@ __device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect,
  * only want to intersect with primitives in the same object, and if case of
  * multiple hits we pick a single random primitive as the intersection point. */
 
-__device_inline bool bvh_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect,
-       float3 P, float3 idir, int object, int triAddr, float tmax, int *num_hits, float subsurface_random)
+__device_inline void bvh_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array,
+       float3 P, float3 idir, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits)
 {
        /* compute and check intersection t-value */
        float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0);
@@ -718,20 +718,30 @@ __device_inline bool bvh_triangle_intersect_subsurface(KernelGlobals *kg, Inters
                        if(v >= 0.0f && u + v <= 1.0f) {
                                (*num_hits)++;
 
-                               if(subsurface_random * (*num_hits) <= 1.0f) {
-                                       /* record intersection */
-                                       isect->prim = triAddr;
-                                       isect->object = object;
-                                       isect->u = u;
-                                       isect->v = v;
-                                       isect->t = t;
-                                       return true;
+                               int hit;
+
+                               if(*num_hits <= max_hits) {
+                                       hit = *num_hits - 1;
                                }
+                               else {
+                                       /* reservoir sampling: if we are at the maximum number of
+                                        * hits, randomly replace element or skip it */
+                                       hit = lcg_step_uint(lcg_state) % *num_hits;
+
+                                       if(hit >= max_hits)
+                                               return;
+                               }
+
+                               /* record intersection */
+                               Intersection *isect = &isect_array[hit];
+                               isect->prim = triAddr;
+                               isect->object = object;
+                               isect->u = u;
+                               isect->v = v;
+                               isect->t = t;
                        }
                }
        }
-
-       return false;
 }
 #endif
 
@@ -741,7 +751,6 @@ __device_inline bool bvh_triangle_intersect_subsurface(KernelGlobals *kg, Inters
 #define BVH_MOTION                             2
 #define BVH_HAIR                               4
 #define BVH_HAIR_MINIMUM_WIDTH 8
-#define BVH_SUBSURFACE                 16
 
 #define BVH_FUNCTION_NAME bvh_intersect
 #define BVH_FUNCTION_FEATURES 0
@@ -773,32 +782,31 @@ __device_inline bool bvh_triangle_intersect_subsurface(KernelGlobals *kg, Inters
 
 #if defined(__SUBSURFACE__)
 #define BVH_FUNCTION_NAME bvh_intersect_subsurface
-#define BVH_FUNCTION_FEATURES BVH_SUBSURFACE
-#include "kernel_bvh_traversal.h"
+#include "kernel_bvh_subsurface.h"
 #endif
 
 #if defined(__SUBSURFACE__) && defined(__INSTANCING__)
 #define BVH_FUNCTION_NAME bvh_intersect_subsurface_instancing
-#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_SUBSURFACE
-#include "kernel_bvh_traversal.h"
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING
+#include "kernel_bvh_subsurface.h"
 #endif
 
 #if defined(__SUBSURFACE__) && defined(__HAIR__)
 #define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair
-#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_SUBSURFACE|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
-#include "kernel_bvh_traversal.h"
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
+#include "kernel_bvh_subsurface.h"
 #endif
 
 #if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__)
 #define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion
-#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_SUBSURFACE|BVH_MOTION
-#include "kernel_bvh_traversal.h"
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
+#include "kernel_bvh_subsurface.h"
 #endif
 
 #if defined(__SUBSURFACE__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
 #define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair_motion
-#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_SUBSURFACE|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
-#include "kernel_bvh_traversal.h"
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
+#include "kernel_bvh_subsurface.h"
 #endif
 
 
@@ -844,38 +852,38 @@ __device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const ui
 }
 
 #ifdef __SUBSURFACE__
-__device_inline int scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, float subsurface_random)
+__device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
 {
 #ifdef __OBJECT_MOTION__
        if(kernel_data.bvh.have_motion) {
 #ifdef __HAIR__
                if(kernel_data.bvh.have_curves)
-                       return bvh_intersect_subsurface_hair_motion(kg, ray, isect, subsurface_object, subsurface_random);
+                       return bvh_intersect_subsurface_hair_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
 #endif /* __HAIR__ */
 
-               return bvh_intersect_subsurface_motion(kg, ray, isect, subsurface_object, subsurface_random);
+               return bvh_intersect_subsurface_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
        }
 #endif /* __OBJECT_MOTION__ */
 
 #ifdef __HAIR__ 
        if(kernel_data.bvh.have_curves)
-               return bvh_intersect_subsurface_hair(kg, ray, isect, subsurface_object, subsurface_random);
+               return bvh_intersect_subsurface_hair(kg, ray, isect, subsurface_object, lcg_state, max_hits);
 #endif /* __HAIR__ */
 
 #ifdef __KERNEL_CPU__
 
 #ifdef __INSTANCING__
        if(kernel_data.bvh.have_instancing)
-               return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, subsurface_random);
+               return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
 #endif /* __INSTANCING__ */
 
-       return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, subsurface_random);
+       return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
 #else /* __KERNEL_CPU__ */
 
 #ifdef __INSTANCING__
-       return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, subsurface_random);
+       return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
 #else
-       return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, subsurface_random);
+       return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
 #endif /* __INSTANCING__ */
 
 #endif /* __KERNEL_CPU__ */
@@ -980,6 +988,51 @@ __device_inline float3 bvh_triangle_refine(KernelGlobals *kg, ShaderData *sd, co
 #endif
 }
 
+/* same as above, except that isect->t is assumed to be in object space for instancing */
+__device_inline float3 bvh_triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray)
+{
+       float3 P = ray->P;
+       float3 D = ray->D;
+       float t = isect->t;
+
+#ifdef __INTERSECTION_REFINE__
+       if(isect->object != ~0) {
+#ifdef __OBJECT_MOTION__
+               Transform tfm = sd->ob_itfm;
+#else
+               Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
+#endif
+
+               P = transform_point(&tfm, P);
+               D = transform_direction(&tfm, D);
+               D = normalize(D);
+       }
+
+       P = P + D*t;
+
+       float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0);
+       float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
+       float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z);
+       float rt = Oz * invDz;
+
+       P = P + D*rt;
+
+       if(isect->object != ~0) {
+#ifdef __OBJECT_MOTION__
+               Transform tfm = sd->ob_tfm;
+#else
+               Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
+#endif
+
+               P = transform_point(&tfm, P);
+       }
+
+       return P;
+#else
+       return P + D*t;
+#endif
+}
+
 #ifdef __HAIR__
 
 __device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3)
diff --git a/intern/cycles/kernel/kernel_bvh_subsurface.h b/intern/cycles/kernel/kernel_bvh_subsurface.h
new file mode 100644 (file)
index 0000000..ac30bea
--- /dev/null
@@ -0,0 +1,308 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2013, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This is a template BVH traversal function for subsurface scattering, where
+ * various features can be enabled/disabled. This way we can compile optimized
+ * versions for each case without new features slowing things down.
+ *
+ * BVH_INSTANCING: object instancing
+ * BVH_MOTION: motion blur rendering
+ *
+ */
+
+#define FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)
+
+__device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersection *isect_array,
+       int subsurface_object, uint *lcg_state, int max_hits)
+{
+       /* todo:
+        * - test if pushing distance on the stack helps (for non shadow rays)
+        * - separate version for shadow rays
+        * - likely and unlikely for if() statements
+        * - SSE for hair
+        * - test restrict attribute for pointers
+        */
+       
+       /* traversal stack in CUDA thread-local memory */
+       int traversalStack[BVH_STACK_SIZE];
+       traversalStack[0] = ENTRYPOINT_SENTINEL;
+
+       /* traversal variables in registers */
+       int stackPtr = 0;
+       int nodeAddr = kernel_data.bvh.root;
+
+       /* ray parameters in registers */
+       const float tmax = ray->t;
+       float3 P = ray->P;
+       float3 idir = bvh_inverse_direction(ray->D);
+       int object = ~0;
+
+       const uint visibility = ~0;
+       uint num_hits = 0;
+
+#if FEATURE(BVH_MOTION)
+       Transform ob_tfm;
+#endif
+
+#if defined(__KERNEL_SSE2__)
+       const shuffle_swap_t shuf_identity = shuffle_swap_identity();
+       const shuffle_swap_t shuf_swap = shuffle_swap_swap();
+       
+       const __m128i pn = _mm_set_epi32(0x80000000, 0x80000000, 0x00000000, 0x00000000);
+       __m128 Psplat[3], idirsplat[3];
+
+       Psplat[0] = _mm_set_ps1(P.x);
+       Psplat[1] = _mm_set_ps1(P.y);
+       Psplat[2] = _mm_set_ps1(P.z);
+
+       idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), _mm_castsi128_ps(pn));
+       idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), _mm_castsi128_ps(pn));
+       idirsplat[2] = _mm_xor_ps(_mm_set_ps1(idir.z), _mm_castsi128_ps(pn));
+
+       __m128 tsplat = _mm_set_ps(-tmax, -tmax, 0.0f, 0.0f);
+
+       shuffle_swap_t shufflex = (idir.x >= 0)? shuf_identity: shuf_swap;
+       shuffle_swap_t shuffley = (idir.y >= 0)? shuf_identity: shuf_swap;
+       shuffle_swap_t shufflez = (idir.z >= 0)? shuf_identity: shuf_swap;
+#endif
+
+       /* traversal loop */
+       do {
+               do
+               {
+                       /* traverse internal nodes */
+                       while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL)
+                       {
+                               bool traverseChild0, traverseChild1;
+                               int nodeAddrChild1;
+
+#if !defined(__KERNEL_SSE2__)
+                               /* Intersect two child bounding boxes, non-SSE version */
+                               float t = tmax;
+
+                               /* fetch node data */
+                               float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0);
+                               float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1);
+                               float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2);
+                               float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
+
+                               /* intersect ray against child nodes */
+                               NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
+                               NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
+                               NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
+                               NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
+                               NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
+                               NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
+                               NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
+                               NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
+
+                               NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
+                               NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
+                               NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
+                               NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
+                               NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
+                               NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
+                               NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
+                               NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
+
+                               /* decide which nodes to traverse next */
+#ifdef __VISIBILITY_FLAG__
+                               /* this visibility test gives a 5% performance hit, how to solve? */
+                               traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility);
+                               traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility);
+#else
+                               traverseChild0 = (c0max >= c0min);
+                               traverseChild1 = (c1max >= c1min);
+#endif
+
+#else // __KERNEL_SSE2__
+                               /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
+
+                               /* fetch node data */
+                               __m128 *bvh_nodes = (__m128*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
+                               float4 cnodes = ((float4*)bvh_nodes)[3];
+
+                               /* intersect ray against child nodes */
+                               const __m128 tminmaxx = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[0], shufflex), Psplat[0]), idirsplat[0]);
+                               const __m128 tminmaxy = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[1], shuffley), Psplat[1]), idirsplat[1]);
+                               const __m128 tminmaxz = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[2], shufflez), Psplat[2]), idirsplat[2]);
+
+                               const __m128 tminmax = _mm_xor_ps(_mm_max_ps(_mm_max_ps(tminmaxx, tminmaxy), _mm_max_ps(tminmaxz, tsplat)), _mm_castsi128_ps(pn));
+                               const __m128 lrhit = _mm_cmple_ps(tminmax, shuffle_swap(tminmax, shuf_swap));
+
+                               /* decide which nodes to traverse next */
+#ifdef __VISIBILITY_FLAG__
+                               /* this visibility test gives a 5% performance hit, how to solve? */
+                               traverseChild0 = (_mm_movemask_ps(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility);
+                               traverseChild1 = (_mm_movemask_ps(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility);
+#else
+                               traverseChild0 = (_mm_movemask_ps(lrhit) & 1);
+                               traverseChild1 = (_mm_movemask_ps(lrhit) & 2);
+#endif
+#endif // __KERNEL_SSE2__
+
+                               nodeAddr = __float_as_int(cnodes.x);
+                               nodeAddrChild1 = __float_as_int(cnodes.y);
+
+                               if(traverseChild0 && traverseChild1) {
+                                       /* both children were intersected, push the farther one */
+#if !defined(__KERNEL_SSE2__)
+                                       bool closestChild1 = (c1min < c0min);
+#else
+                                       union { __m128 m128; float v[4]; } uminmax;
+                                       uminmax.m128 = tminmax;
+                                       bool closestChild1 = uminmax.v[1] < uminmax.v[0];
+#endif
+
+                                       if(closestChild1) {
+                                               int tmp = nodeAddr;
+                                               nodeAddr = nodeAddrChild1;
+                                               nodeAddrChild1 = tmp;
+                                       }
+
+                                       ++stackPtr;
+                                       traversalStack[stackPtr] = nodeAddrChild1;
+                               }
+                               else {
+                                       /* one child was intersected */
+                                       if(traverseChild1) {
+                                               nodeAddr = nodeAddrChild1;
+                                       }
+                                       else if(!traverseChild0) {
+                                               /* neither child was intersected */
+                                               nodeAddr = traversalStack[stackPtr];
+                                               --stackPtr;
+                                       }
+                               }
+                       }
+
+                       /* if node is leaf, fetch triangle list */
+                       if(nodeAddr < 0) {
+                               float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+(BVH_NODE_SIZE-1));
+                               int primAddr = __float_as_int(leaf.x);
+
+#if FEATURE(BVH_INSTANCING)
+                               if(primAddr >= 0) {
+#endif
+                                       int primAddr2 = __float_as_int(leaf.y);
+
+                                       /* pop */
+                                       nodeAddr = traversalStack[stackPtr];
+                                       --stackPtr;
+
+                                       /* primitive intersection */
+                                       while(primAddr < primAddr2) {
+                                               /* only primitives from the same object */
+                                               uint tri_object = (object == ~0)? kernel_tex_fetch(__prim_object, primAddr): object;
+
+                                               if(tri_object == subsurface_object) {
+
+                                                       /* intersect ray against primitive */
+                                                       bvh_triangle_intersect_subsurface(kg, isect_array, P, idir, object, primAddr, tmax, &num_hits, lcg_state, max_hits);
+                                               }
+
+                                               primAddr++;
+                                       }
+                               }
+#if FEATURE(BVH_INSTANCING)
+                               else {
+                                       /* instance push */
+                                       if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) {
+                                               object = subsurface_object;
+
+                                               float t_ignore = FLT_MAX;
+#if FEATURE(BVH_MOTION)
+                                               bvh_instance_motion_push(kg, object, ray, &P, &idir, &t_ignore, &ob_tfm, tmax);
+#else
+                                               bvh_instance_push(kg, object, ray, &P, &idir, &t_ignore, tmax);
+#endif
+
+#if defined(__KERNEL_SSE2__)
+                                               Psplat[0] = _mm_set_ps1(P.x);
+                                               Psplat[1] = _mm_set_ps1(P.y);
+                                               Psplat[2] = _mm_set_ps1(P.z);
+
+                                               idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), _mm_castsi128_ps(pn));
+                                               idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), _mm_castsi128_ps(pn));
+                                               idirsplat[2] = _mm_xor_ps(_mm_set_ps1(idir.z), _mm_castsi128_ps(pn));
+
+                                               tsplat = _mm_set_ps(-tmax, -tmax, 0.0f, 0.0f);
+
+                                               shufflex = (idir.x >= 0)? shuf_identity: shuf_swap;
+                                               shuffley = (idir.y >= 0)? shuf_identity: shuf_swap;
+                                               shufflez = (idir.z >= 0)? shuf_identity: shuf_swap;
+#endif
+
+                                               ++stackPtr;
+                                               traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+
+                                               nodeAddr = kernel_tex_fetch(__object_node, object);
+                                       }
+                                       else {
+                                               /* pop */
+                                               nodeAddr = traversalStack[stackPtr];
+                                               --stackPtr;
+                                       }
+                               }
+                       }
+#endif
+               } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+#if FEATURE(BVH_INSTANCING)
+               if(stackPtr >= 0) {
+                       kernel_assert(object != ~0);
+
+                       /* instance pop */
+                       float t_ignore = FLT_MAX;
+#if FEATURE(BVH_MOTION)
+                       bvh_instance_motion_pop(kg, object, ray, &P, &idir, &t_ignore, &ob_tfm, tmax);
+#else
+                       bvh_instance_pop(kg, object, ray, &P, &idir, &t_ignore, tmax);
+#endif
+
+#if defined(__KERNEL_SSE2__)
+                       Psplat[0] = _mm_set_ps1(P.x);
+                       Psplat[1] = _mm_set_ps1(P.y);
+                       Psplat[2] = _mm_set_ps1(P.z);
+
+                       idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), _mm_castsi128_ps(pn));
+                       idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), _mm_castsi128_ps(pn));
+                       idirsplat[2] = _mm_xor_ps(_mm_set_ps1(idir.z), _mm_castsi128_ps(pn));
+
+                       tsplat = _mm_set_ps(-tmax, -tmax, 0.0f, 0.0f);
+
+                       shufflex = (idir.x >= 0)? shuf_identity: shuf_swap;
+                       shuffley = (idir.y >= 0)? shuf_identity: shuf_swap;
+                       shufflez = (idir.z >= 0)? shuf_identity: shuf_swap;
+#endif
+
+                       object = ~0;
+                       nodeAddr = traversalStack[stackPtr];
+                       --stackPtr;
+               }
+#endif
+       } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+       return num_hits;
+}
+
+#undef FEATURE
+#undef BVH_FUNCTION_NAME
+#undef BVH_FUNCTION_FEATURES
+
index cfca405e7a53f3fe0059217fc5f060262a898cab..a9264f318eb66c6a7330023c228a5ed69fe29b86 100644 (file)
@@ -24,7 +24,6 @@
  * BVH_INSTANCING: object instancing
  * BVH_HAIR: hair curve rendering
  * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
- * BVH_SUBSURFACE: subsurface same object, random triangle intersection
  * BVH_MOTION: motion blur rendering
  *
  */
 #define FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)
 
 __device bool BVH_FUNCTION_NAME
-(KernelGlobals *kg, const Ray *ray, Intersection *isect
-#if FEATURE(BVH_SUBSURFACE)
-, int subsurface_object, float subsurface_random
-#else
-, const uint visibility
-#endif
-#if FEATURE(BVH_HAIR_MINIMUM_WIDTH) && !FEATURE(BVH_SUBSURFACE)
+(KernelGlobals *kg, const Ray *ray, Intersection *isect, const uint visibility
+#if FEATURE(BVH_HAIR_MINIMUM_WIDTH)
 , uint *lcg_state, float difl, float extmax
 #endif
 )
@@ -65,11 +59,6 @@ __device bool BVH_FUNCTION_NAME
        float3 idir = bvh_inverse_direction(ray->D);
        int object = ~0;
 
-#if FEATURE(BVH_SUBSURFACE)
-       const uint visibility = ~0;
-       int num_hits = 0;
-#endif
-
 #if FEATURE(BVH_MOTION)
        Transform ob_tfm;
 #endif
@@ -141,7 +130,7 @@ __device bool BVH_FUNCTION_NAME
                                NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
                                NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
 
-#if FEATURE(BVH_HAIR_MINIMUM_WIDTH) && !FEATURE(BVH_SUBSURFACE)
+#if FEATURE(BVH_HAIR_MINIMUM_WIDTH)
                                if(difl != 0.0f) {
                                        float hdiff = 1.0f + difl;
                                        float ldiff = 1.0f - difl;
@@ -245,59 +234,37 @@ __device bool BVH_FUNCTION_NAME
                                        while(primAddr < primAddr2) {
                                                bool hit;
 
-#if FEATURE(BVH_SUBSURFACE)
-                                               /* only primitives from the same object */
-                                               uint tri_object = (object == ~0)? kernel_tex_fetch(__prim_object, primAddr): object;
-
-                                               if(tri_object == subsurface_object) {
-#endif
-
-                                                       /* intersect ray against primitive */
+                                               /* intersect ray against primitive */
 #if FEATURE(BVH_HAIR)
-                                                       uint segment = kernel_tex_fetch(__prim_segment, primAddr);
-#if !FEATURE(BVH_SUBSURFACE)
-                                                       if(segment != ~0) {
+                                               uint segment = kernel_tex_fetch(__prim_segment, primAddr);
+                                               if(segment != ~0) {
 
-                                                               if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) 
+                                                       if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) 
 #if FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-                                                                       hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax);
-                                                               else
-                                                                       hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax);
+                                                               hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax);
+                                                       else
+                                                               hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax);
 #else
-                                                                       hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment);
-                                                               else
-                                                                       hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment);
-#endif
-                                                       }
+                                                               hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment);
                                                        else
+                                                               hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment);
 #endif
-#endif
-#if FEATURE(BVH_SUBSURFACE)
-#if FEATURE(BVH_HAIR)
-                                                       if(segment == ~0)
-#endif
-                                                       {
-                                                               hit = bvh_triangle_intersect_subsurface(kg, isect, P, idir, object, primAddr, tmax, &num_hits, subsurface_random);
-                                                               (void)hit;
-                                                       }
-
                                                }
-#else
-                                                               hit = bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr);
+                                               else
+#endif
+                                                       hit = bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr);
 
-                                                       /* shadow ray early termination */
+                                               /* shadow ray early termination */
 #if defined(__KERNEL_SSE2__) && !FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-                                                       if(hit) {
-                                                               if(visibility == PATH_RAY_SHADOW_OPAQUE)
-                                                                       return true;
-
-                                                               tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f);
-                                                       }
-#else
-                                                       if(hit && visibility == PATH_RAY_SHADOW_OPAQUE)
+                                               if(hit) {
+                                                       if(visibility == PATH_RAY_SHADOW_OPAQUE)
                                                                return true;
-#endif
 
+                                                       tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f);
+                                               }
+#else
+                                               if(hit && visibility == PATH_RAY_SHADOW_OPAQUE)
+                                                       return true;
 #endif
 
                                                primAddr++;
@@ -306,47 +273,34 @@ __device bool BVH_FUNCTION_NAME
 #if FEATURE(BVH_INSTANCING)
                                else {
                                        /* instance push */
-#if FEATURE(BVH_SUBSURFACE)
-                                       if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) {
-                                               object = subsurface_object;
-#else
-                                               object = kernel_tex_fetch(__prim_object, -primAddr-1);
-#endif
+                                       object = kernel_tex_fetch(__prim_object, -primAddr-1);
 
 #if FEATURE(BVH_MOTION)
-                                               bvh_instance_motion_push(kg, object, ray, &P, &idir, &isect->t, &ob_tfm, tmax);
+                                       bvh_instance_motion_push(kg, object, ray, &P, &idir, &isect->t, &ob_tfm, tmax);
 #else
-                                               bvh_instance_push(kg, object, ray, &P, &idir, &isect->t, tmax);
+                                       bvh_instance_push(kg, object, ray, &P, &idir, &isect->t, tmax);
 #endif
 
 #if defined(__KERNEL_SSE2__) && !FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-                                               Psplat[0] = _mm_set_ps1(P.x);
-                                               Psplat[1] = _mm_set_ps1(P.y);
-                                               Psplat[2] = _mm_set_ps1(P.z);
+                                       Psplat[0] = _mm_set_ps1(P.x);
+                                       Psplat[1] = _mm_set_ps1(P.y);
+                                       Psplat[2] = _mm_set_ps1(P.z);
 
-                                               idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), _mm_castsi128_ps(pn));
-                                               idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), _mm_castsi128_ps(pn));
-                                               idirsplat[2] = _mm_xor_ps(_mm_set_ps1(idir.z), _mm_castsi128_ps(pn));
+                                       idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), _mm_castsi128_ps(pn));
+                                       idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), _mm_castsi128_ps(pn));
+                                       idirsplat[2] = _mm_xor_ps(_mm_set_ps1(idir.z), _mm_castsi128_ps(pn));
 
-                                               tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f);
+                                       tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f);
 
-                                               shufflex = (idir.x >= 0)? shuf_identity: shuf_swap;
-                                               shuffley = (idir.y >= 0)? shuf_identity: shuf_swap;
-                                               shufflez = (idir.z >= 0)? shuf_identity: shuf_swap;
+                                       shufflex = (idir.x >= 0)? shuf_identity: shuf_swap;
+                                       shuffley = (idir.y >= 0)? shuf_identity: shuf_swap;
+                                       shufflez = (idir.z >= 0)? shuf_identity: shuf_swap;
 #endif
 
-                                               ++stackPtr;
-                                               traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+                                       ++stackPtr;
+                                       traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
 
-                                               nodeAddr = kernel_tex_fetch(__object_node, object);
-#if FEATURE(BVH_SUBSURFACE)
-                                       }
-                                       else {
-                                               /* pop */
-                                               nodeAddr = traversalStack[stackPtr];
-                                               --stackPtr;
-                                       }
-#endif
+                                       nodeAddr = kernel_tex_fetch(__object_node, object);
                                }
                        }
 #endif
@@ -386,11 +340,7 @@ __device bool BVH_FUNCTION_NAME
 #endif
        } while(nodeAddr != ENTRYPOINT_SENTINEL);
 
-#if FEATURE(BVH_SUBSURFACE)
-       return (num_hits != 0);
-#else
        return (isect->prim != ~0);
-#endif
 }
 
 #undef FEATURE
index 7d5e4cd9df523f58f9662c75858987caf9c7d14c..592c45867ac06e004af6210baff57c95714dbf39 100644 (file)
@@ -108,11 +108,26 @@ __device float3 sample_uniform_sphere(float u1, float u2)
        return make_float3(x, y, z);
 }
 
+__device float balance_heuristic(float a, float b)
+{
+       return (a)/(a + b);
+}
+
+__device float balance_heuristic_3(float a, float b, float c)
+{
+       return (a)/(a + b + c);
+}
+
 __device float power_heuristic(float a, float b)
 {
        return (a*a)/(a*a + b*b);
 }
 
+__device float power_heuristic_3(float a, float b, float c)
+{
+       return (a*a)/(a*a + b*b + c*c);
+}
+
 __device float2 concentric_sample_disk(float u1, float u2)
 {
        float r, theta;
index bf06f8dd5f6e49484e29c7f338bc79aeed05c333..d613943e85db3d109077c48afa64aade003f45a9 100644 (file)
@@ -100,11 +100,11 @@ __device_inline void path_state_next(KernelGlobals *kg, PathState *state, int la
 
        /* diffuse/glossy/singular */
        if(label & LABEL_DIFFUSE) {
-               state->flag |= PATH_RAY_DIFFUSE;
+               state->flag |= PATH_RAY_DIFFUSE|PATH_RAY_DIFFUSE_ANCESTOR;
                state->flag &= ~(PATH_RAY_GLOSSY|PATH_RAY_SINGULAR|PATH_RAY_MIS_SKIP);
        }
        else if(label & LABEL_GLOSSY) {
-               state->flag |= PATH_RAY_GLOSSY;
+               state->flag |= PATH_RAY_GLOSSY|PATH_RAY_GLOSSY_ANCESTOR;
                state->flag &= ~(PATH_RAY_DIFFUSE|PATH_RAY_SINGULAR|PATH_RAY_MIS_SKIP);
        }
        else {
@@ -117,7 +117,7 @@ __device_inline void path_state_next(KernelGlobals *kg, PathState *state, int la
 
 __device_inline uint path_state_ray_visibility(KernelGlobals *kg, PathState *state)
 {
-       uint flag = state->flag;
+       uint flag = state->flag & PATH_RAY_ALL_VISIBILITY;
 
        /* for visibility, diffuse/glossy are for reflection only */
        if(flag & PATH_RAY_TRANSMIT)
@@ -404,7 +404,15 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
                        /* do bssrdf scatter step if we picked a bssrdf closure */
                        if(sc) {
                                uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
-                               subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
+
+                               if(old_subsurface_scatter_use(&sd)) {
+                                       old_subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
+                               }
+                               else {
+                                       float bssrdf_u, bssrdf_v;
+                                       path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+                                       subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
+                               }
                        }
                }
 #endif
@@ -646,7 +654,15 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
                        /* do bssrdf scatter step if we picked a bssrdf closure */
                        if(sc) {
                                uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
-                               subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
+
+                               if(old_subsurface_scatter_use(&sd)) {
+                                       old_subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
+                               }
+                               else {
+                                       float bssrdf_u, bssrdf_v;
+                                       path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+                                       subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
+                               }
                        }
                }
 #endif
@@ -1090,17 +1106,32 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
                                uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
                                int num_samples = kernel_data.integrator.subsurface_samples;
                                float num_samples_inv = 1.0f/num_samples;
+                               RNG bssrdf_rng = cmj_hash(*rng, i);
 
                                /* do subsurface scatter step with copy of shader data, this will
                                 * replace the BSSRDF with a diffuse BSDF closure */
                                for(int j = 0; j < num_samples; j++) {
-                                       ShaderData bssrdf_sd = sd;
-                                       subsurface_scatter_step(kg, &bssrdf_sd, state.flag, sc, &lcg_state, true);
-
-                                       /* compute lighting with the BSDF closure */
-                                       kernel_path_non_progressive_lighting(kg, rng, sample*num_samples + j,
-                                               &bssrdf_sd, throughput, num_samples_inv,
-                                               ray_pdf, ray_pdf, state, rng_offset, &L, buffer);
+                                       if(old_subsurface_scatter_use(&sd)) {
+                                               ShaderData bssrdf_sd = sd;
+                                               old_subsurface_scatter_step(kg, &bssrdf_sd, state.flag, sc, &lcg_state, true);
+
+                                               /* compute lighting with the BSDF closure */
+                                               kernel_path_non_progressive_lighting(kg, rng, sample*num_samples + j,
+                                                       &bssrdf_sd, throughput, num_samples_inv,
+                                                       ray_pdf, ray_pdf, state, rng_offset, &L, buffer);
+                                       }
+                                       else {
+                                               ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
+                                               float bssrdf_u, bssrdf_v;
+                                               path_rng_2D(kg, &bssrdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+                                               int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
+
+                                               /* compute lighting with the BSDF closure */
+                                               for(int hit = 0; hit < num_hits; hit++)
+                                                       kernel_path_non_progressive_lighting(kg, rng, sample*num_samples + j,
+                                                               &bssrdf_sd[hit], throughput, num_samples_inv,
+                                                               ray_pdf, ray_pdf, state, rng_offset, &L, buffer);
+                                       }
                                }
                        }
                }
index c86ac34a057015b577bc9504af6efbaa95c9cb19..be848d9bb169aa5a6aa2a54cabb3529acdceeba5 100644 (file)
@@ -235,7 +235,14 @@ __device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng)
 
 #endif
 
-__device float lcg_step(uint *rng)
+__device uint lcg_step_uint(uint *rng)
+{
+       /* implicit mod 2^32 */
+       *rng = (1103515245*(*rng) + 12345);
+       return *rng;
+}
+
+__device float lcg_step_float(uint *rng)
 {
        /* implicit mod 2^32 */
        *rng = (1103515245*(*rng) + 12345);
@@ -245,7 +252,7 @@ __device float lcg_step(uint *rng)
 __device uint lcg_init(uint seed)
 {
        uint rng = seed;
-       lcg_step(&rng);
+       lcg_step_uint(&rng);
        return rng;
 }
 
index 5dd12f98b9ce8a5be95af7fc554514e85fa996fc..2c86cc5e2278f1ae0b21f6597a38a557a5c50d16 100644 (file)
@@ -184,52 +184,32 @@ __device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData
        sd->flag = kernel_tex_fetch(__object_flag, sd->object);
        sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
 
-#ifdef __HAIR__
-       if(kernel_tex_fetch(__prim_segment, isect->prim) != ~0) {
-               /* Strand Shader setting*/
-               float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
-
-               sd->shader = __float_as_int(curvedata.z);
-               sd->segment = isect->segment;
-
-               float tcorr = isect->t;
-               if(kernel_data.curve.curveflags & CURVE_KN_POSTINTERSECTCORRECTION)
-                       tcorr = (isect->u < 0)? tcorr + sqrtf(isect->v) : tcorr - sqrtf(isect->v);
-
-               sd->P = bvh_curve_refine(kg, sd, isect, ray, tcorr);
-       }
-       else {
-#endif
-               /* fetch triangle data */
-               float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim);
-               float3 Ng = make_float3(Ns.x, Ns.y, Ns.z);
-               sd->shader = __float_as_int(Ns.w);
+       /* fetch triangle data */
+       float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim);
+       float3 Ng = make_float3(Ns.x, Ns.y, Ns.z);
+       sd->shader = __float_as_int(Ns.w);
 
 #ifdef __HAIR__
-               sd->segment = ~0;
+       sd->segment = ~0;
 #endif
 
 #ifdef __UV__
-               sd->u = isect->u;
-               sd->v = isect->v;
+       sd->u = isect->u;
+       sd->v = isect->v;
 #endif
 
-               /* vectors */
-               sd->P = bvh_triangle_refine(kg, sd, isect, ray);
-               sd->Ng = Ng;
-               sd->N = Ng;
-               
-               /* smooth normal */
-               if(sd->shader & SHADER_SMOOTH_NORMAL)
-                       sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
+       /* vectors */
+       sd->P = bvh_triangle_refine_subsurface(kg, sd, isect, ray);
+       sd->Ng = Ng;
+       sd->N = Ng;
+       
+       /* smooth normal */
+       if(sd->shader & SHADER_SMOOTH_NORMAL)
+               sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
 
 #ifdef __DPDU__
-               /* dPdu/dPdv */
-               triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim);
-#endif
-
-#ifdef __HAIR__
-       }
+       /* dPdu/dPdv */
+       triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim);
 #endif
 
        sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
@@ -468,6 +448,8 @@ __device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData
 __device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, const ShaderData *sd, const float3 omega_in, float *pdf,
        int skip_bsdf, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
 {
+       /* this is the veach one-sample model with balance heuristic, some pdf
+        * factors drop out when using balance heuristic weighting */
        for(int i = 0; i< sd->num_closure; i++) {
                if(i == skip_bsdf)
                        continue;
@@ -706,34 +688,34 @@ __device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
 #endif
 }
 
-__device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N)
+__device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_)
 {
 #ifdef __MULTI_CLOSURE__
        float3 eval = make_float3(0.0f, 0.0f, 0.0f);
-
-       *N = make_float3(0.0f, 0.0f, 0.0f);
+       float3 N = make_float3(0.0f, 0.0f, 0.0f);
 
        for(int i = 0; i< sd->num_closure; i++) {
                ShaderClosure *sc = &sd->closure[i];
 
                if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
                        eval += sc->weight*ao_factor;
-                       *N += sc->N*average(sc->weight);
+                       N += sc->N*average(sc->weight);
                }
                else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) {
                        eval += sc->weight;
-                       *N += sd->N*average(sc->weight);
+                       N += sd->N*average(sc->weight);
                }
        }
 
-       if(is_zero(*N))
-               *N = sd->N;
+       if(is_zero(N))
+               N = sd->N;
        else
-               *N = normalize(*N);
+               N = normalize(N);
 
+       *N_ = N;
        return eval;
 #else
-       *N = sd->N;
+       *N_ = sd->N;
 
        if(CLOSURE_IS_BSDF_DIFFUSE(sd->closure.type))
                return sd->closure.weight*ao_factor;
@@ -744,6 +726,49 @@ __device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_facto
 #endif
 }
 
+__device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_)
+{
+#ifdef __MULTI_CLOSURE__
+       float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+       float3 N = make_float3(0.0f, 0.0f, 0.0f);
+       float texture_blur = 0.0f, weight_sum = 0.0f;
+
+       for(int i = 0; i< sd->num_closure; i++) {
+               ShaderClosure *sc = &sd->closure[i];
+
+               if(CLOSURE_IS_BSSRDF(sc->type)) {
+                       float avg_weight = fabsf(average(sc->weight));
+
+                       N += sc->N*avg_weight;
+                       eval += sc->weight;
+                       texture_blur += sc->data1*avg_weight;
+                       weight_sum += avg_weight;
+               }
+       }
+
+       if(N_)
+               *N_ = (is_zero(N))? sd->N: normalize(N);
+
+       if(texture_blur_)
+               *texture_blur_ = texture_blur/weight_sum;
+       
+       return eval;
+#else
+       if(CLOSURE_IS_BSSRDF(sd->closure.type)) {
+               if(N_) *N_ = sd->closure.N;
+               if(texture_blur_) *texture_blur_ = sd->closure.data1;
+
+               return sd->closure.weight;
+       }
+       else {
+               if(N_) *N_ = sd->N;
+               if(texture_blur_) *texture_blur_ = 0.0f;
+
+               return make_float3(0.0f, 0.0f, 0.0f);
+       }
+#endif
+}
+
 /* Emission */
 
 __device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure *sc)
index 4fae961512e95d9c4fc4f8a1648eece78c46ae37..8f5bcdf06e268378b4e921ab3edc5098cc21bca5 100644 (file)
 
 CCL_NAMESPACE_BEGIN
 
-#define BSSRDF_MULTI_EVAL
-#define BSSRDF_SKIP_NO_HIT
-
-__device float bssrdf_sample_distance(KernelGlobals *kg, float radius, float refl, float u)
-{
-       int table_offset = kernel_data.bssrdf.table_offset;
-       float r = lookup_table_read_2D(kg, u, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE);
-
-       return r*radius;
-}
+#include "closure/bssrdf.h"
 
-#ifdef BSSRDF_MULTI_EVAL
-__device float bssrdf_pdf(KernelGlobals *kg, float radius, float refl, float r)
-{
-       if(r >= radius)
-               return 0.0f;
+/* NEW BSSRDF: See "BSSRDF Importance Sampling", SIGGRAPH 2013 */
 
-       /* todo: when we use the real BSSRDF this will need to be divided by the maximum
-        * radius instead of the average radius */
-       float t = r/radius;
-
-       int table_offset = kernel_data.bssrdf.table_offset + BSSRDF_PDF_TABLE_OFFSET;
-       float pdf = lookup_table_read_2D(kg, t, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE);
-
-       pdf /= radius;
+/* TODO:
+ * - test using power heuristic for combing bssrdfs
+ * - try to reduce one sample model variance
+ * - possible shade all hits for progressive integrator
+ * - cubic and gaussian scale difference tweak
+ */
 
-       return pdf;
-}
-#endif
+#define BSSRDF_MULTI_EVAL
 
 __device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, ShaderData *sd, float *probability)
 {
@@ -75,7 +58,6 @@ __device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, Shade
 
        /* use bssrdf */
        r -= bsdf_sum;
-       sd->randb_closure = 0.0f; /* not needed anymore */
 
        float sum = 0.0f;
 
@@ -86,6 +68,8 @@ __device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, Shade
                        sum += sc->sample_weight;
 
                        if(r <= sum) {
+                               sd->randb_closure = (r - (sum - sc->sample_weight))/sc->sample_weight;
+
 #ifdef BSSRDF_MULTI_EVAL
                                *probability = (bssrdf_sum > 0.0f)? (bsdf_sum + bssrdf_sum)/bssrdf_sum: 1.0f;
 #else
@@ -97,12 +81,362 @@ __device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, Shade
        }
 
        /* should never happen */
+       sd->randb_closure = 0.0f;
        *probability = 1.0f;
        return NULL;
 }
 
+__device float3 subsurface_scatter_eval(ShaderData *sd, ShaderClosure *sc, float disk_r, float r, bool all)
+{
 #ifdef BSSRDF_MULTI_EVAL
-__device float3 subsurface_scatter_multi_eval(KernelGlobals *kg, ShaderData *sd, bool hit, float refl, float *r, int num_r, bool all)
+       /* this is the veach one-sample model with balance heuristic, some pdf
+        * factors drop out when using balance heuristic weighting */
+       float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
+       float pdf_sum = 0.0f;
+       float sample_weight_sum = 0.0f;
+       int num_bssrdf = 0;
+
+       for(int i = 0; i < sd->num_closure; i++) {
+               sc = &sd->closure[i];
+               
+               if(CLOSURE_IS_BSSRDF(sc->type)) {
+                       float sample_weight = (all)? 1.0f: sc->sample_weight;
+                       sample_weight_sum += sample_weight;
+               }
+       }
+
+       float sample_weight_inv = 1.0f/sample_weight_sum;
+
+       //printf("num closures %d\n", sd->num_closure);
+
+       for(int i = 0; i < sd->num_closure; i++) {
+               sc = &sd->closure[i];
+               
+               if(CLOSURE_IS_BSSRDF(sc->type)) {
+                       /* in case of non-progressive integrate we sample all bssrdf's once,
+                        * for progressive we pick one, so adjust pdf for that */
+                       float sample_weight = (all)? 1.0f: sc->sample_weight * sample_weight_inv;
+
+                       /* compute pdf */
+                       float pdf = bssrdf_pdf(sc, r);
+                       float disk_pdf = bssrdf_pdf(sc, disk_r);
+
+                       /* TODO power heuristic is not working correct here */
+                       eval_sum += sc->weight*pdf; //*sample_weight*disk_pdf;
+                       pdf_sum += sample_weight*disk_pdf; //*sample_weight*disk_pdf;
+
+                       num_bssrdf++;
+               }
+       }
+
+       return (pdf_sum > 0.0f)? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f);
+#else
+       float pdf = bssrdf_pdf(pick_sc, r);
+       float disk_pdf = bssrdf_pdf(pick_sc, disk_r);
+
+       return pick_sc->weight * pdf / disk_pdf;
+#endif
+}
+
+/* replace closures with a single diffuse bsdf closure after scatter step */
+__device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, float3 weight, bool hit, float3 N)
+{
+       sd->flag &= ~SD_CLOSURE_FLAGS;
+       sd->randb_closure = 0.0f;
+
+       if(hit) {
+               ShaderClosure *sc = &sd->closure[0];
+               sd->num_closure = 1;
+
+               sc->weight = weight;
+               sc->sample_weight = 1.0f;
+               sc->data0 = 0.0f;
+               sc->data1 = 0.0f;
+               sc->N = N;
+               sd->flag |= bsdf_diffuse_setup(sc);
+
+               /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
+                * can recognize it as not being a regular diffuse closure */
+               sc->type = CLOSURE_BSDF_BSSRDF_ID;
+       }
+       else
+               sd->num_closure = 0;
+}
+
+/* optionally do blurring of color and/or bump mapping, at the cost of a shader evaluation */
+__device float3 subsurface_color_pow(float3 color, float exponent)
+{
+       color = max(color, make_float3(0.0f, 0.0f, 0.0f));
+
+       if(exponent == 1.0f) {
+               /* nothing to do */
+       }
+       else if(exponent == 0.5f) {
+               color.x = sqrtf(color.x);
+               color.y = sqrtf(color.y);
+               color.z = sqrtf(color.z);
+       }
+       else {
+               color.x = powf(color.x, exponent);
+               color.y = powf(color.y, exponent);
+               color.z = powf(color.z, exponent);
+       }
+
+       return color;
+}
+
+__device void subsurface_color_bump_blur(KernelGlobals *kg, ShaderData *out_sd, ShaderData *in_sd, int state_flag, float3 *eval, float3 *N)
+{
+       /* average color and texture blur at outgoing point */
+       float texture_blur;
+       float3 out_color = shader_bssrdf_sum(out_sd, NULL, &texture_blur);
+
+       /* do we have bump mapping? */
+       bool bump = (out_sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
+
+       if(bump || texture_blur > 0.0f) {
+               /* average color and normal at incoming point */
+               shader_eval_surface(kg, in_sd, 0.0f, state_flag, SHADER_CONTEXT_SSS);
+               float3 in_color = shader_bssrdf_sum(in_sd, (bump)? N: NULL, NULL);
+
+               /* we simply divide out the average color and multiply with the average
+                * of the other one. we could try to do this per closure but it's quite
+                * tricky to match closures between shader evaluations, their number and
+                * order may change, this is simpler */
+               if(texture_blur > 0.0f) {
+                       out_color = subsurface_color_pow(out_color, texture_blur);
+                       in_color = subsurface_color_pow(in_color, texture_blur);
+
+                       *eval *= safe_divide_color(in_color, out_color);
+               }
+       }
+}
+
+/* subsurface scattering step, from a point on the surface to other nearby points on the same object */
+__device int subsurface_scatter_multi_step(KernelGlobals *kg, ShaderData *sd, ShaderData bssrdf_sd[BSSRDF_MAX_HITS],
+       int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
+{
+       /* pick random axis in local frame and point on disk */
+       float3 disk_N, disk_T, disk_B;
+       float pick_pdf_N, pick_pdf_T, pick_pdf_B;
+       
+       disk_N = sd->Ng;
+       make_orthonormals(disk_N, &disk_T, &disk_B);
+
+       if(disk_u < 0.5f) {
+               pick_pdf_N = 0.5f;
+               pick_pdf_T = 0.25f;
+               pick_pdf_B = 0.25f;
+               disk_u *= 2.0f;
+       }
+       else if(disk_u < 0.75f) {
+               float3 tmp = disk_N;
+               disk_N = disk_T;
+               disk_T = tmp;
+               pick_pdf_N = 0.25f;
+               pick_pdf_T = 0.5f;
+               pick_pdf_B = 0.25f;
+               disk_u = (disk_u - 0.5f)*4.0f;
+       }
+       else {
+               float3 tmp = disk_N;
+               disk_N = disk_B;
+               disk_B = tmp;
+               pick_pdf_N = 0.25f;
+               pick_pdf_T = 0.25f;
+               pick_pdf_B = 0.5f;
+               disk_u = (disk_u - 0.75f)*4.0f;
+       }
+
+       /* sample point on disk */
+    float phi = M_2PI_F * disk_u;
+    float disk_r = disk_v;
+       float disk_height;
+
+       bssrdf_sample(sc, disk_r, &disk_r, &disk_height);
+
+       float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
+
+       /* create ray */
+       Ray ray;
+       ray.P = sd->P + disk_N*disk_height + disk_P;
+       ray.D = -disk_N;
+       ray.t = 2.0f*disk_height;
+       ray.dP = sd->dP;
+       ray.dD = differential3_zero();
+       ray.time = sd->time;
+
+       /* intersect with the same object. if multiple intersections are found it
+        * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
+       Intersection isect[BSSRDF_MAX_HITS];
+       uint num_hits = scene_intersect_subsurface(kg, &ray, isect, sd->object, lcg_state, BSSRDF_MAX_HITS);
+
+       /* evaluate bssrdf */
+       float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+       int num_eval_hits = min(num_hits, BSSRDF_MAX_HITS);
+
+       for(int hit = 0; hit < num_eval_hits; hit++) {
+               ShaderData *bsd = &bssrdf_sd[hit];
+
+               /* setup new shading point */
+               *bsd = *sd;
+               shader_setup_from_subsurface(kg, bsd, &isect[hit], &ray);
+
+               /* probability densities for local frame axes */
+               float pdf_N = pick_pdf_N * fabsf(dot(disk_N, bsd->Ng));
+               float pdf_T = pick_pdf_T * fabsf(dot(disk_T, bsd->Ng));
+               float pdf_B = pick_pdf_B * fabsf(dot(disk_B, bsd->Ng));
+               
+               /* multiple importance sample between 3 axes, power heuristic
+                * found to be slightly better than balance heuristic */
+               float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
+
+               /* real distance to sampled point */
+               float r = len(bsd->P - sd->P);
+
+               /* evaluate */
+               float w = mis_weight / pdf_N;
+               if(num_hits > BSSRDF_MAX_HITS)
+                       w *= num_hits/(float)BSSRDF_MAX_HITS;
+               eval = subsurface_scatter_eval(bsd, sc, disk_r, r, all) * w;
+
+               /* optionally blur colors and bump mapping */
+               float3 N = bsd->N;
+               subsurface_color_bump_blur(kg, sd, bsd, state_flag, &eval, &N);
+
+               /* setup diffuse bsdf */
+               subsurface_scatter_setup_diffuse_bsdf(bsd, eval, true, N);
+       }
+
+       return num_eval_hits;
+}
+
+/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
+__device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
+       int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
+{
+       float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+       uint num_hits = 0;
+
+       /* pick random axis in local frame and point on disk */
+       float3 disk_N, disk_T, disk_B;
+       float pick_pdf_N, pick_pdf_T, pick_pdf_B;
+       
+       disk_N = sd->Ng;
+       make_orthonormals(disk_N, &disk_T, &disk_B);
+
+       if(disk_u < 0.5f) {
+               pick_pdf_N = 0.5f;
+               pick_pdf_T = 0.25f;
+               pick_pdf_B = 0.25f;
+               disk_u *= 2.0f;
+       }
+       else if(disk_u < 0.75f) {
+               float3 tmp = disk_N;
+               disk_N = disk_T;
+               disk_T = tmp;
+               pick_pdf_N = 0.25f;
+               pick_pdf_T = 0.5f;
+               pick_pdf_B = 0.25f;
+               disk_u = (disk_u - 0.5f)*4.0f;
+       }
+       else {
+               float3 tmp = disk_N;
+               disk_N = disk_B;
+               disk_B = tmp;
+               pick_pdf_N = 0.25f;
+               pick_pdf_T = 0.25f;
+               pick_pdf_B = 0.5f;
+               disk_u = (disk_u - 0.75f)*4.0f;
+       }
+
+       /* sample point on disk */
+       float phi = M_2PI_F * disk_u;
+       float disk_r = disk_v;
+       float disk_height;
+
+       bssrdf_sample(sc, disk_r, &disk_r, &disk_height);
+
+       float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
+
+       /* create ray */
+       Ray ray;
+       ray.P = sd->P + disk_N*disk_height + disk_P;
+       ray.D = -disk_N;
+       ray.t = 2.0f*disk_height;
+       ray.dP = sd->dP;
+       ray.dD = differential3_zero();
+       ray.time = sd->time;
+
+       /* intersect with the same object. if multiple intersections are
+        * found it will randomly pick one of them */
+       Intersection isect;
+       num_hits = scene_intersect_subsurface(kg, &ray, &isect, sd->object, lcg_state, 1);
+
+       /* evaluate bssrdf */
+       if(num_hits > 0) {
+               float3 origP = sd->P;
+
+               /* setup new shading point */
+               shader_setup_from_subsurface(kg, sd, &isect, &ray);
+
+               /* probability densities for local frame axes */
+               float pdf_N = pick_pdf_N * fabsf(dot(disk_N, sd->Ng));
+               float pdf_T = pick_pdf_T * fabsf(dot(disk_T, sd->Ng));
+               float pdf_B = pick_pdf_B * fabsf(dot(disk_B, sd->Ng));
+               
+               /* multiple importance sample between 3 axes, power heuristic
+                * found to be slightly better than balance heuristic */
+               float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
+
+               /* real distance to sampled point */
+               float r = len(sd->P - origP);
+
+               /* evaluate */
+               float w = (mis_weight * num_hits) / pdf_N;
+               eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
+       }
+
+       /* optionally blur colors and bump mapping */
+       float3 N = sd->N;
+       subsurface_color_bump_blur(kg, sd, sd, state_flag, &eval, &N);
+
+       /* setup diffuse bsdf */
+       subsurface_scatter_setup_diffuse_bsdf(sd, eval, (num_hits > 0), N);
+}
+
+
+/* OLD BSSRDF */
+
+__device float old_bssrdf_sample_distance(KernelGlobals *kg, float radius, float refl, float u)
+{
+       int table_offset = kernel_data.bssrdf.table_offset;
+       float r = lookup_table_read_2D(kg, u, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE);
+
+       return r*radius;
+}
+
+#ifdef BSSRDF_MULTI_EVAL
+__device float old_bssrdf_pdf(KernelGlobals *kg, float radius, float refl, float r)
+{
+       if(r >= radius)
+               return 0.0f;
+
+       /* todo: when we use the real BSSRDF this will need to be divided by the maximum
+        * radius instead of the average radius */
+       float t = r/radius;
+
+       int table_offset = kernel_data.bssrdf.table_offset + BSSRDF_PDF_TABLE_OFFSET;
+       float pdf = lookup_table_read_2D(kg, t, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE);
+
+       pdf /= radius;
+
+       return pdf;
+}
+#endif
+
+#ifdef BSSRDF_MULTI_EVAL
+__device float3 old_subsurface_scatter_multi_eval(KernelGlobals *kg, ShaderData *sd, bool hit, float refl, float *r, int num_r, bool all)
 {
        /* compute pdf */
        float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
@@ -119,7 +453,7 @@ __device float3 subsurface_scatter_multi_eval(KernelGlobals *kg, ShaderData *sd,
                        /* compute pdf */
                        float pdf = 1.0f;
                        for(int i = 0; i < num_r; i++)
-                               pdf *= bssrdf_pdf(kg, sc->data0, refl, r[i]);
+                               pdf *= old_bssrdf_pdf(kg, sc->data0, refl, r[i]);
 
                        eval_sum += sc->weight*pdf;
                        pdf_sum += sample_weight*pdf;
@@ -148,31 +482,8 @@ __device float3 subsurface_scatter_multi_eval(KernelGlobals *kg, ShaderData *sd,
 }
 #endif
 
-/* replace closures with a single diffuse bsdf closure after scatter step */
-__device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, float3 weight)
-{
-       ShaderClosure *sc = &sd->closure[0];
-       sd->num_closure = 1;
-
-       sc->weight = weight;
-       sc->sample_weight = 1.0f;
-       sc->data0 = 0.0f;
-       sc->data1 = 0.0f;
-       sc->N = sd->N;
-       sd->flag &= ~SD_CLOSURE_FLAGS;
-       sd->flag |= bsdf_diffuse_setup(sc);
-       sd->randb_closure = 0.0f;
-
-       /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
-        * can recognize it as not being a regular diffuse closure */
-       sc->type = CLOSURE_BSDF_BSSRDF_ID;
-
-       /* todo: evaluate shading to get blurred textures and bump mapping */
-       /* shader_eval_surface(kg, sd, 0.0f, state_flag, SHADER_CONTEXT_SSS); */
-}
-
 /* subsurface scattering step, from a point on the surface to another nearby point on the same object */
-__device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int state_flag, ShaderClosure *sc, uint *lcg_state, bool all)
+__device void old_subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int state_flag, ShaderClosure *sc, uint *lcg_state, bool all)
 {
        float radius = sc->data0;
        float refl = max(average(sc->weight)*3.0f, 0.0f);
@@ -187,14 +498,13 @@ __device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int sta
        /* attempt to find a hit a given number of times before giving up */
        for(num_attempts = 0; num_attempts < kernel_data.bssrdf.num_attempts; num_attempts++) {
                /* random numbers for sampling */
-               float u1 = lcg_step(lcg_state);
-               float u2 = lcg_step(lcg_state);
-               float u3 = lcg_step(lcg_state);
-               float u4 = lcg_step(lcg_state);
-               float u5 = lcg_step(lcg_state);
-               float u6 = lcg_step(lcg_state);
-
-               r = bssrdf_sample_distance(kg, radius, refl, u5);
+               float u1 = lcg_step_float(lcg_state);
+               float u2 = lcg_step_float(lcg_state);
+               float u3 = lcg_step_float(lcg_state);
+               float u4 = lcg_step_float(lcg_state);
+               float u5 = lcg_step_float(lcg_state);
+
+               r = old_bssrdf_sample_distance(kg, radius, refl, u5);
 #ifdef BSSRDF_MULTI_EVAL
                r_attempts[num_attempts] = r;
 #endif
@@ -213,7 +523,7 @@ __device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int sta
                /* intersect with the same object. if multiple intersections are
                 * found it will randomly pick one of them */
                Intersection isect;
-               if(!scene_intersect_subsurface(kg, &ray, &isect, sd->object, u6))
+               if(scene_intersect_subsurface(kg, &ray, &isect, sd->object, lcg_state, 1) == 0)
                        continue;
 
                /* setup new shading point */
@@ -226,18 +536,32 @@ __device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int sta
 
        /* evaluate subsurface scattering closures */
 #ifdef BSSRDF_MULTI_EVAL
-       weight *= subsurface_scatter_multi_eval(kg, sd, hit, refl, r_attempts, num_attempts, all);
+       weight *= old_subsurface_scatter_multi_eval(kg, sd, hit, refl, r_attempts, num_attempts, all);
 #else
        weight *= sc->weight;
 #endif
 
-#ifdef BSSRDF_SKIP_NO_HIT
        if(!hit)
                weight = make_float3(0.0f, 0.0f, 0.0f);
-#endif
+
+       /* optionally blur colors and bump mapping */
+       float3 N = sd->N;
+       subsurface_color_bump_blur(kg, sd, sd, state_flag, &weight, &N);
 
        /* replace closures with a single diffuse BSDF */
-       subsurface_scatter_setup_diffuse_bsdf(sd, weight);
+       subsurface_scatter_setup_diffuse_bsdf(sd, weight, hit, N);
+}
+
+__device bool old_subsurface_scatter_use(ShaderData *sd)
+{
+       for(int i = 0; i < sd->num_closure; i++) {
+               ShaderClosure *sc = &sd->closure[i];
+               
+               if(sc->type == CLOSURE_BSSRDF_COMPATIBLE_ID)
+                       return true;
+       }
+
+       return false;
 }
 
 CCL_NAMESPACE_END
index 3008698313e7cc800117346bfd038bcee8631707..3421ba440075406f28a67501836bb2978036881c 100644 (file)
@@ -43,6 +43,7 @@ CCL_NAMESPACE_BEGIN
 #define BSSRDF_LOOKUP_TABLE_SIZE       (BSSRDF_RADIUS_TABLE_SIZE*BSSRDF_REFL_TABLE_SIZE*2)
 #define BSSRDF_MIN_RADIUS                      1e-8f
 #define BSSRDF_MAX_ATTEMPTS                    8
+#define BSSRDF_MAX_HITS                                4
 
 #define BB_DRAPPER                             800.0f
 #define BB_MAX_TABLE_RANGE             12000.0f
@@ -214,12 +215,13 @@ enum PathRayFlag {
        PATH_RAY_SHADOW_TRANSPARENT = 256,
        PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE|PATH_RAY_SHADOW_TRANSPARENT),
 
-       PATH_RAY_MIS_SKIP = 512,
+       PATH_RAY_CURVE = 512, /* visibility flag to define curve segments*/
 
-       PATH_RAY_ALL = (1|2|4|8|16|32|64|128|256|512),
+       PATH_RAY_ALL_VISIBILITY = (1|2|4|8|16|32|64|128|256|512),
 
-       /* visibility flag to define curve segments*/
-       PATH_RAY_CURVE = 1024,
+       PATH_RAY_MIS_SKIP = 1024,
+       PATH_RAY_DIFFUSE_ANCESTOR = 2048,
+       PATH_RAY_GLOSSY_ANCESTOR = 4096,
 
        /* this gives collisions with localview bits
         * see: blender_util.h, grr - Campbell */
@@ -507,11 +509,12 @@ enum ShaderDataFlag {
        SD_HAS_TRANSPARENT_SHADOW = 1024,       /* has transparent shadow */
        SD_HAS_VOLUME = 2048,                           /* has volume shader */
        SD_HOMOGENEOUS_VOLUME = 4096,           /* has homogeneous volume */
+       SD_HAS_BSSRDF_BUMP = 8192,                      /* bssrdf normal uses bump */
 
        /* object flags */
-       SD_HOLDOUT_MASK = 8192,                         /* holdout for camera rays */
-       SD_OBJECT_MOTION = 16384,                       /* has object motion blur */
-       SD_TRANSFORM_APPLIED = 32768            /* vertices have transform applied */
+       SD_HOLDOUT_MASK = 16384,                        /* holdout for camera rays */
+       SD_OBJECT_MOTION = 32768,                       /* has object motion blur */
+       SD_TRANSFORM_APPLIED = 65536            /* vertices have transform applied */
 };
 
 struct KernelGlobals;
index ba9b13126ac5ebec6c4a8f3593b11df674efa83a..7405b0be567cb0982186b3d2e72171f37b4fd7df 100644 (file)
@@ -47,18 +47,59 @@ CCL_NAMESPACE_BEGIN
 
 using namespace OSL;
 
-class BSSRDFClosure : public CBSSRDFClosure {
+/* Cubic */
+
+class CubicBSSRDFClosure : public CBSSRDFClosure {
 public:
        size_t memsize() const { return sizeof(*this); }
        const char *name() const { return "bssrdf_cubic"; }
 
        void setup()
        {
+               sc.type = CLOSURE_BSSRDF_COMPATIBLE_ID;
                sc.prim = NULL;
                sc.data0 = fabsf(average(radius));
-               sc.data1 = 1.3f;
+               sc.data1 = 0.0f; // XXX texture blur
+       }
+
+       bool mergeable(const ClosurePrimitive *other) const
+       {
+               return false;
+       }
 
-               m_shaderdata_flag = bssrdf_setup(&sc);
+       void print_on(std::ostream &out) const
+       {
+               out << name() << " ((" << sc.N[0] << ", " << sc.N[1] << ", " << sc.N[2] << "))";
+       }
+};
+
+ClosureParam *closure_bssrdf_cubic_params()
+{
+       static ClosureParam params[] = {
+               CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, sc.N),
+               CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, radius),
+               //CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, sc.data1),
+           CLOSURE_STRING_KEYPARAM("label"),
+           CLOSURE_FINISH_PARAM(CubicBSSRDFClosure)
+       };
+       return params;
+}
+
+CLOSURE_PREPARE(closure_bssrdf_cubic_prepare, CubicBSSRDFClosure)
+
+/* Gaussian */
+
+class GaussianBSSRDFClosure : public CBSSRDFClosure {
+public:
+       size_t memsize() const { return sizeof(*this); }
+       const char *name() const { return "bssrdf_gaussian"; }
+
+       void setup()
+       {
+               sc.type = CLOSURE_BSSRDF_GAUSSIAN_ID;
+               sc.prim = NULL;
+               sc.data0 = fabsf(average(radius));
+               sc.data1 = 0.0f; // XXX texture blurring!
        }
 
        bool mergeable(const ClosurePrimitive *other) const
@@ -72,19 +113,19 @@ public:
        }
 };
 
-ClosureParam *closure_bssrdf_params()
+ClosureParam *closure_bssrdf_gaussian_params()
 {
        static ClosureParam params[] = {
-               CLOSURE_FLOAT3_PARAM(BSSRDFClosure, sc.N),
-               CLOSURE_FLOAT3_PARAM(BSSRDFClosure, radius),
-               //CLOSURE_FLOAT_PARAM(BSSRDFClosure, sc.data1),
+               CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, sc.N),
+               CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, radius),
+               //CLOSURE_FLOAT_PARAM(GaussianBSSRDFClosure, sc.data1),
            CLOSURE_STRING_KEYPARAM("label"),
-           CLOSURE_FINISH_PARAM(BSSRDFClosure)
+           CLOSURE_FINISH_PARAM(GaussianBSSRDFClosure)
        };
        return params;
 }
 
-CLOSURE_PREPARE(closure_bssrdf_prepare, BSSRDFClosure)
+CLOSURE_PREPARE(closure_bssrdf_gaussian_prepare, GaussianBSSRDFClosure)
 
 CCL_NAMESPACE_END
 
index 54df055405e553ed9b6f4c76bcece59d6dd33ae8..ee9fc7c4ac5c22eeda4948cc806c343c35ae9a26 100644 (file)
@@ -48,15 +48,10 @@ public:
        ShaderClosure sc;
        float3 radius;
 
-       CBSSRDFClosure() : OSL::ClosurePrimitive(BSSRDF),
-         m_shaderdata_flag(0) { }
+       CBSSRDFClosure() : OSL::ClosurePrimitive(BSSRDF) { }
        ~CBSSRDFClosure() { }
 
        int scattering() const { return LABEL_DIFFUSE; }
-       int shaderdata_flag() const { return m_shaderdata_flag; }
-
-protected:
-       int m_shaderdata_flag;
 };
 
 CCL_NAMESPACE_END
index b1549e95920e8f281b6f39534c8d7c8759cf6b7d..c03e50d431328084ad87923e6833fd439919864d 100644 (file)
@@ -218,7 +218,9 @@ void OSLShader::register_closures(OSLShadingSystem *ss_)
        register_closure(ss, "phong_ramp", id++,
                closure_bsdf_phong_ramp_params(), closure_bsdf_phong_ramp_prepare);
        register_closure(ss, "bssrdf_cubic", id++,
-               closure_bssrdf_params(), closure_bssrdf_prepare);
+               closure_bssrdf_cubic_params(), closure_bssrdf_cubic_prepare);
+       register_closure(ss, "bssrdf_gaussian", id++,
+               closure_bssrdf_gaussian_params(), closure_bssrdf_gaussian_prepare);
 }
 
 CCL_NAMESPACE_END
index ca5f441aa2d0d15c9da468bbe619d40dad09ac2d..e3a7e89059784dec18fbbe8fa2f071df57a37d1e 100644 (file)
@@ -50,7 +50,8 @@ OSL::ClosureParam *closure_bsdf_diffuse_ramp_params();
 OSL::ClosureParam *closure_bsdf_phong_ramp_params();
 OSL::ClosureParam *closure_westin_backscatter_params();
 OSL::ClosureParam *closure_westin_sheen_params();
-OSL::ClosureParam *closure_bssrdf_params();
+OSL::ClosureParam *closure_bssrdf_cubic_params();
+OSL::ClosureParam *closure_bssrdf_gaussian_params();
 
 void closure_emission_prepare(OSL::RendererServices *, int id, void *data);
 void closure_background_prepare(OSL::RendererServices *, int id, void *data);
@@ -60,7 +61,8 @@ void closure_bsdf_diffuse_ramp_prepare(OSL::RendererServices *, int id, void *da
 void closure_bsdf_phong_ramp_prepare(OSL::RendererServices *, int id, void *data);
 void closure_westin_backscatter_prepare(OSL::RendererServices *, int id, void *data);
 void closure_westin_sheen_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bssrdf_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bssrdf_cubic_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bssrdf_gaussian_prepare(OSL::RendererServices *, int id, void *data);
 
 enum {
        AmbientOcclusion = 100
@@ -89,7 +91,8 @@ public:
        ShaderClosure sc;
 
        CBSDFClosure(int scattering) : OSL::ClosurePrimitive(BSDF),
-         m_scattering_label(scattering), m_shaderdata_flag(0) { }
+         m_scattering_label(scattering), m_shaderdata_flag(0)
+       { memset(&sc, 0, sizeof(sc)); }
        ~CBSDFClosure() { }
 
        int scattering() const { return m_scattering_label; }
index dedda1dc10ea6bc9c0bf9e941cdecdc5a30799f8..23be0acb4d32604d5a4ab5bf82cd39d3e49df19b 100644 (file)
  */
 
 #include "kernel_compat_cpu.h"
+#include "kernel_montecarlo.h"
 #include "kernel_types.h"
 #include "kernel_globals.h"
 #include "kernel_object.h"
 
+#include "closure/bsdf_diffuse.h"
+#include "closure/bssrdf.h"
+
 #include "osl_bssrdf.h"
 #include "osl_closures.h"
 #include "osl_globals.h"
@@ -136,7 +140,7 @@ static void shaderdata_to_shaderglobals(KernelGlobals *kg, ShaderData *sd,
 
 /* Surface */
 
-static void flatten_surface_closure_tree(ShaderData *sd, bool no_glossy,
+static void flatten_surface_closure_tree(ShaderData *sd, int path_flag,
                                          const OSL::ClosureColor *closure, float3 weight = make_float3(1.0f, 1.0f, 1.0f))
 {
        /* OSL gives us a closure tree, we flatten it into arrays per
@@ -156,8 +160,11 @@ static void flatten_surface_closure_tree(ShaderData *sd, bool no_glossy,
                                        int scattering = bsdf->scattering();
 
                                        /* no caustics option */
-                                       if (no_glossy && scattering == LABEL_GLOSSY)
-                                               return;
+                                       if(scattering == LABEL_GLOSSY && (path_flag & PATH_RAY_DIFFUSE)) {
+                                               KernelGlobals *kg = sd->osl_globals;
+                                               if(kernel_data.integrator.no_caustics)
+                                                       return;
+                                       }
 
                                        /* sample weight */
                                        float sample_weight = fabsf(average(weight));
@@ -230,26 +237,32 @@ static void flatten_surface_closure_tree(ShaderData *sd, bool no_glossy,
                                                sc.data1 = bssrdf->sc.data1;
                                                sc.prim = NULL;
 
+                                               /* disable in case of diffuse ancestor, can't see it well then and
+                                                * adds considerably noise due to probabilities of continuing path
+                                                * getting lower and lower */
+                                               if(sc.type != CLOSURE_BSSRDF_COMPATIBLE_ID && (path_flag & PATH_RAY_DIFFUSE_ANCESTOR))
+                                                       bssrdf->radius = make_float3(0.0f, 0.0f, 0.0f);
+
                                                /* create one closure for each color channel */
                                                if(fabsf(weight.x) > 0.0f) {
                                                        sc.weight = make_float3(weight.x, 0.0f, 0.0f);
                                                        sc.data0 = bssrdf->radius.x;
+                                                       sd->flag |= bssrdf_setup(&sc, sc.type);
                                                        sd->closure[sd->num_closure++] = sc;
-                                                       sd->flag |= bssrdf->shaderdata_flag();
                                                }
 
                                                if(fabsf(weight.y) > 0.0f) {
                                                        sc.weight = make_float3(0.0f, weight.y, 0.0f);
                                                        sc.data0 = bssrdf->radius.y;
+                                                       sd->flag |= bssrdf_setup(&sc, sc.type);
                                                        sd->closure[sd->num_closure++] = sc;
-                                                       sd->flag |= bssrdf->shaderdata_flag();
                                                }
 
                                                if(fabsf(weight.z) > 0.0f) {
                                                        sc.weight = make_float3(0.0f, 0.0f, weight.z);
                                                        sc.data0 = bssrdf->radius.z;
+                                                       sd->flag |= bssrdf_setup(&sc, sc.type);
                                                        sd->closure[sd->num_closure++] = sc;
-                                                       sd->flag |= bssrdf->shaderdata_flag();
                                                }
                                        }
                                        break;
@@ -264,12 +277,12 @@ static void flatten_surface_closure_tree(ShaderData *sd, bool no_glossy,
        }
        else if (closure->type == OSL::ClosureColor::MUL) {
                OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
-               flatten_surface_closure_tree(sd, no_glossy, mul->closure, TO_FLOAT3(mul->weight) * weight);
+               flatten_surface_closure_tree(sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight);
        }
        else if (closure->type == OSL::ClosureColor::ADD) {
                OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
-               flatten_surface_closure_tree(sd, no_glossy, add->closureA, weight);
-               flatten_surface_closure_tree(sd, no_glossy, add->closureB, weight);
+               flatten_surface_closure_tree(sd, path_flag, add->closureA, weight);
+               flatten_surface_closure_tree(sd, path_flag, add->closureB, weight);
        }
 }
 
@@ -292,10 +305,8 @@ void OSLShader::eval_surface(KernelGlobals *kg, ShaderData *sd, float randb, int
        sd->num_closure = 0;
        sd->randb_closure = randb;
 
-       if (globals->Ci) {
-               bool no_glossy = (path_flag & PATH_RAY_DIFFUSE) && kernel_data.integrator.no_caustics;
-               flatten_surface_closure_tree(sd, no_glossy, globals->Ci);
-       }
+       if (globals->Ci)
+               flatten_surface_closure_tree(sd, path_flag, globals->Ci);
 }
 
 /* Background */
index 5c25c44ec8ffed6a1470ce116f1fa8b1c9d622b7..eb21a5f69bd07e0712e16edb6b27bee4c0d93d8c 100644 (file)
@@ -22,12 +22,14 @@ shader node_subsurface_scattering(
        color Color = 0.8,
        float Scale = 1.0,
        vector Radius = vector(0.1, 0.1, 0.1),
-       float IOR = 1.3,
+       float TextureBlur = 0.0, // XXX use
+       string Falloff = "Cubic",
        normal Normal = N,
        output closure color BSSRDF = 0)
 {
-       float eta = max(IOR, 1.0 + 1e-5);
-
-       BSSRDF = Color * bssrdf_cubic(N, Scale * Radius);
+       if(Falloff == "Cubic")
+               BSSRDF = Color * bssrdf_cubic(N, Scale * Radius);
+       else if(Falloff == "Gaussian")
+               BSSRDF = Color * bssrdf_gaussian(N, Scale * Radius);
 }
 
index 24c3e187783a4c4cbfdc27109a04e9286e17e7e0..7d1c2443ee77fac58b73becea7b3ffc769498a11 100644 (file)
@@ -463,7 +463,10 @@ closure color emission() BUILTIN;
 closure color background() BUILTIN;
 closure color holdout() BUILTIN;
 closure color ambient_occlusion() BUILTIN;
+
+// BSSRDF
 closure color bssrdf_cubic(normal N, vector radius) BUILTIN;
+closure color bssrdf_gaussian(normal N, vector radius) BUILTIN;
 
 // Backwards compatibility
 
index 847195134e89ed559d86117398cd753691e4a122..bd4a2d781eb6b6bd7058a4a8257ef7635651f94a 100644 (file)
@@ -340,28 +340,36 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st
                        break;
                }
 #ifdef __SUBSURFACE__
-               case CLOSURE_BSSRDF_ID: {
+               case CLOSURE_BSSRDF_COMPATIBLE_ID:
+               case CLOSURE_BSSRDF_CUBIC_ID:
+               case CLOSURE_BSSRDF_GAUSSIAN_ID: {
                        ShaderClosure *sc = &sd->closure[sd->num_closure];
                        float3 weight = sc->weight * mix_weight;
                        float sample_weight = fabsf(average(weight));
+                       
+                       /* disable in case of diffuse ancestor, can't see it well then and
+                        * adds considerably noise due to probabilities of continuing path
+                        * getting lower and lower */
+                       if(type != CLOSURE_BSSRDF_COMPATIBLE_ID && (path_flag & PATH_RAY_DIFFUSE_ANCESTOR))
+                               param1 = 0.0f;
 
                        if(sample_weight > 1e-5f && sd->num_closure+2 < MAX_CLOSURE) {
                                /* radius * scale */
                                float3 radius = stack_load_float3(stack, data_node.w)*param1;
-                               /* index of refraction */
-                               float eta = fmaxf(param2, 1.0f + 1e-5f);
+                               /* texture color blur */
+                               float texture_blur = param2;
 
                                /* create one closure per color channel */
                                if(fabsf(weight.x) > 0.0f) {
                                        sc->weight = make_float3(weight.x, 0.0f, 0.0f);
                                        sc->sample_weight = sample_weight;
                                        sc->data0 = radius.x;
-                                       sc->data1 = eta;
+                                       sc->data1 = texture_blur;
 #ifdef __OSL__
                                        sc->prim = NULL;
 #endif
                                        sc->N = N;
-                                       sd->flag |= bssrdf_setup(sc);
+                                       sd->flag |= bssrdf_setup(sc, (ClosureType)type);
 
                                        sd->num_closure++;
                                        sc++;
@@ -371,12 +379,12 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st
                                        sc->weight = make_float3(0.0f, weight.y, 0.0f);
                                        sc->sample_weight = sample_weight;
                                        sc->data0 = radius.y;
-                                       sc->data1 = eta;
+                                       sc->data1 = texture_blur;
 #ifdef __OSL__
                                        sc->prim = NULL;
 #endif
                                        sc->N = N;
-                                       sd->flag |= bssrdf_setup(sc);
+                                       sd->flag |= bssrdf_setup(sc, (ClosureType)type);
 
                                        sd->num_closure++;
                                        sc++;
@@ -386,12 +394,12 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st
                                        sc->weight = make_float3(0.0f, 0.0f, weight.z);
                                        sc->sample_weight = sample_weight;
                                        sc->data0 = radius.z;
-                                       sc->data1 = eta;
+                                       sc->data1 = texture_blur;
 #ifdef __OSL__
                                        sc->prim = NULL;
 #endif
                                        sc->N = N;
-                                       sd->flag |= bssrdf_setup(sc);
+                                       sd->flag |= bssrdf_setup(sc, (ClosureType)type);
 
                                        sd->num_closure++;
                                        sc++;
index 939decf80a9fe90e9a464549475cf0db116c3e06..37ed5ead49f54dde27dc4bbc7acf3d68b4da4732 100644 (file)
@@ -369,8 +369,12 @@ typedef enum ClosureType {
        CLOSURE_BSDF_BSSRDF_ID,
        CLOSURE_BSDF_TRANSPARENT_ID,
 
+       /* BSSRDF */
+       CLOSURE_BSSRDF_COMPATIBLE_ID,
+       CLOSURE_BSSRDF_CUBIC_ID,
+       CLOSURE_BSSRDF_GAUSSIAN_ID,
+
        /* Other */
-       CLOSURE_BSSRDF_ID,
        CLOSURE_EMISSION_ID,
        CLOSURE_DEBUG_ID,
        CLOSURE_BACKGROUND_ID,
@@ -391,7 +395,7 @@ typedef enum ClosureType {
 #define CLOSURE_IS_BSDF_GLOSSY(type) (type >= CLOSURE_BSDF_GLOSSY_ID && type <= CLOSURE_BSDF_GLOSSY_TOON_ID)
 #define CLOSURE_IS_BSDF_TRANSMISSION(type) (type >= CLOSURE_BSDF_TRANSMISSION_ID && type <= CLOSURE_BSDF_SHARP_GLASS_ID)
 #define CLOSURE_IS_BSDF_BSSRDF(type) (type == CLOSURE_BSDF_BSSRDF_ID)
-#define CLOSURE_IS_BSSRDF(type) (type == CLOSURE_BSSRDF_ID)
+#define CLOSURE_IS_BSSRDF(type) (type >= CLOSURE_BSSRDF_COMPATIBLE_ID && type <= CLOSURE_BSSRDF_GAUSSIAN_ID)
 #define CLOSURE_IS_VOLUME(type) (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_ISOTROPIC_ID)
 #define CLOSURE_IS_EMISSION(type) (type == CLOSURE_EMISSION_ID)
 #define CLOSURE_IS_HOLDOUT(type) (type == CLOSURE_HOLDOUT_ID)
index 8ec3c6a1384f85e4e3a99d843661328a1d43396e..bba4e6e9df31e4bdf987407f6b46281da054afbb 100644 (file)
 #include "kernel_types.h"
 #include "kernel_montecarlo.h"
 
-#include "closure/bsdf_diffuse.h"
-#include "closure/bssrdf.h"
-
 CCL_NAMESPACE_BEGIN
 
+static float bssrdf_cubic(float ld, float r)
+{
+       if(ld == 0.0f)
+               return (r == 0.0f)? 1.0f: 0.0f;
+
+       return powf(ld - min(r, ld), 3.0f) * 4.0f/powf(ld, 4.0f);
+}
+
 /* Cumulative density function utilities */
 
 static float cdf_lookup_inverse(const vector<float>& table, float2 range, float x)
@@ -61,25 +66,19 @@ static void cdf_invert(vector<float>& to, float2 to_range, const vector<float>&
 
 /* BSSRDF */
 
-static float bssrdf_lookup_table_max_radius(const BSSRDFParams *ss)
-{
-       /* todo: adjust when we use the real BSSRDF */
-       return ss->ld;
-}
-
-static void bssrdf_lookup_table_create(const BSSRDFParams *ss, vector<float>& sample_table, vector<float>& pdf_table)
+static void bssrdf_lookup_table_create(float ld, vector<float>& sample_table, vector<float>& pdf_table)
 {
        const int size = BSSRDF_RADIUS_TABLE_SIZE;
        vector<float> cdf(size);
        vector<float> pdf(size);
        float step = 1.0f/(float)(size - 1);
-       float max_radius = bssrdf_lookup_table_max_radius(ss);
+       float max_radius = ld;
        float pdf_sum = 0.0f;
 
        /* compute the probability density function */
        for(int i = 0; i < pdf.size(); i++) {
                float x = (i*step)*max_radius;
-               pdf[i] = bssrdf_cubic(ss->ld, x);
+               pdf[i] = bssrdf_cubic(ld, x);
                pdf_sum += pdf[i];
        }
 
@@ -124,13 +123,9 @@ void bssrdf_table_build(vector<float>& table)
 
        /* create a 2D lookup table, for reflection x sample radius */
        for(int i = 0; i < BSSRDF_REFL_TABLE_SIZE; i++) {
-               float refl = (float)i/(float)(BSSRDF_REFL_TABLE_SIZE-1);
-               float ior = 1.3f;
                float radius = 1.0f;
 
-               BSSRDFParams ss;
-               bssrdf_setup_params(&ss, refl, radius, ior);
-               bssrdf_lookup_table_create(&ss, sample_table, pdf_table);
+               bssrdf_lookup_table_create(radius, sample_table, pdf_table);
 
                memcpy(&table[i*BSSRDF_RADIUS_TABLE_SIZE], &sample_table[0], BSSRDF_RADIUS_TABLE_SIZE*sizeof(float));
                memcpy(&table[BSSRDF_PDF_TABLE_OFFSET + i*BSSRDF_RADIUS_TABLE_SIZE], &pdf_table[0], BSSRDF_RADIUS_TABLE_SIZE*sizeof(float));
index da8ed987346f93283e411e215ec5a77b33d9aa44..df361cde2b412096e16f4c7db89f8b48ecf68537 100644 (file)
@@ -76,7 +76,8 @@ enum ShaderNodeSpecialType {
        SHADER_SPECIAL_TYPE_NONE,
        SHADER_SPECIAL_TYPE_PROXY,
        SHADER_SPECIAL_TYPE_MIX_CLOSURE,
-       SHADER_SPECIAL_TYPE_AUTOCONVERT
+       SHADER_SPECIAL_TYPE_AUTOCONVERT,
+       SHADER_SPECIAL_TYPE_GEOMETRY
 };
 
 /* Enum
@@ -190,6 +191,7 @@ public:
        virtual bool has_surface_transparent() { return false; }
        virtual bool has_surface_bssrdf() { return false; }
        virtual bool has_converter_blackbody() { return false; }
+       virtual bool has_bssrdf_bump() { return false; }
 
        vector<ShaderInput*> inputs;
        vector<ShaderOutput*> outputs;
index db402c5fc9f1d11240cce1d331b36b289bb10565..70fa30fe03b5460622c6b5144907db692472d18a 100644 (file)
@@ -1276,16 +1276,18 @@ void ProxyNode::compile(OSLCompiler& compiler)
 BsdfNode::BsdfNode(bool scattering_)
 : ShaderNode("bsdf"), scattering(scattering_)
 {
-       closure = ccl::CLOSURE_BSSRDF_ID;
-
        add_input("Color", SHADER_SOCKET_COLOR, make_float3(0.8f, 0.8f, 0.8f));
        add_input("Normal", SHADER_SOCKET_NORMAL, ShaderInput::NORMAL);
        add_input("SurfaceMixWeight", SHADER_SOCKET_FLOAT, 0.0f, ShaderInput::USE_SVM);
 
-       if(scattering)
+       if(scattering) {
+               closure = CLOSURE_BSSRDF_CUBIC_ID;
                add_output("BSSRDF", SHADER_SOCKET_CLOSURE);
-       else
+       }
+       else {
+               closure = CLOSURE_BSDF_DIFFUSE_ID;
                add_output("BSDF", SHADER_SOCKET_CLOSURE);
+       }
 }
 
 void BsdfNode::compile(SVMCompiler& compiler, ShaderInput *param1, ShaderInput *param2, ShaderInput *param3)
@@ -1600,27 +1602,47 @@ void TransparentBsdfNode::compile(OSLCompiler& compiler)
 
 /* Subsurface Scattering Closure */
 
+static ShaderEnum subsurface_falloff_init()
+{
+       ShaderEnum enm;
+
+       enm.insert("Cubic", CLOSURE_BSSRDF_CUBIC_ID);
+       enm.insert("Gaussian", CLOSURE_BSSRDF_GAUSSIAN_ID);
+
+       return enm;
+}
+
+ShaderEnum SubsurfaceScatteringNode::falloff_enum = subsurface_falloff_init();
+
 SubsurfaceScatteringNode::SubsurfaceScatteringNode()
 : BsdfNode(true)
 {
        name = "subsurface_scattering";
-       closure = CLOSURE_BSSRDF_ID;
+       closure = CLOSURE_BSSRDF_CUBIC_ID;
 
        add_input("Scale", SHADER_SOCKET_FLOAT, 0.01f);
        add_input("Radius", SHADER_SOCKET_VECTOR, make_float3(0.1f, 0.1f, 0.1f));
-       add_input("IOR", SHADER_SOCKET_FLOAT, 1.3f);
+       add_input("Texture Blur", SHADER_SOCKET_FLOAT, 1.0f);
 }
 
 void SubsurfaceScatteringNode::compile(SVMCompiler& compiler)
 {
-       BsdfNode::compile(compiler, input("Scale"), input("IOR"), input("Radius"));
+       BsdfNode::compile(compiler, input("Scale"), input("Texture Blur"), input("Radius"));
 }
 
 void SubsurfaceScatteringNode::compile(OSLCompiler& compiler)
 {
+       compiler.parameter("Falloff", falloff_enum[closure]);
        compiler.add(this, "node_subsurface_scattering");
 }
 
+bool SubsurfaceScatteringNode::has_bssrdf_bump()
+{
+       /* detect if anything is plugged into the normal input besides the default */
+       ShaderInput *normal_in = input("Normal");
+       return (normal_in->link && normal_in->link->parent->special_type != SHADER_SPECIAL_TYPE_GEOMETRY);
+}
+
 /* Emissive Closure */
 
 EmissionNode::EmissionNode()
@@ -1835,6 +1857,8 @@ void IsotropicVolumeNode::compile(OSLCompiler& compiler)
 GeometryNode::GeometryNode()
 : ShaderNode("geometry")
 {
+       special_type = SHADER_SPECIAL_TYPE_GEOMETRY;
+
        add_input("NormalIn", SHADER_SOCKET_NORMAL, ShaderInput::NORMAL, ShaderInput::USE_OSL);
        add_output("Position", SHADER_SOCKET_POINT);
        add_output("Normal", SHADER_SOCKET_NORMAL);
index 46b426ea20b0a95a5cf3d7559b738242f5a7a983..ce7942eaae569ac9daaadc8575690a305b2b1b4c 100644 (file)
@@ -271,6 +271,9 @@ class SubsurfaceScatteringNode : public BsdfNode {
 public:
        SHADER_NODE_CLASS(SubsurfaceScatteringNode)
        bool has_surface_bssrdf() { return true; }
+       bool has_bssrdf_bump();
+
+       static ShaderEnum falloff_enum;
 };
 
 class EmissionNode : public ShaderNode {
index 3f269f44abe4f19278c1b8c0d320aae932485b0e..291827f6f41bfe606c759bdf518eecb408bde778 100644 (file)
@@ -201,11 +201,16 @@ void OSLShaderManager::shading_system_init()
                        "reflection",   /* PATH_RAY_REFLECT */
                        "refraction",   /* PATH_RAY_TRANSMIT */
                        "diffuse",              /* PATH_RAY_DIFFUSE */
-                       "gloss_sharedy",                /* PATH_RAY_GLOSSY */
+                       "glossy",               /* PATH_RAY_GLOSSY */
                        "singular",             /* PATH_RAY_SINGULAR */
                        "transparent",  /* PATH_RAY_TRANSPARENT */
                        "shadow",               /* PATH_RAY_SHADOW_OPAQUE */
                        "shadow",               /* PATH_RAY_SHADOW_TRANSPARENT */
+
+                       "__unused__",
+                       "__unused__",
+                       "diffuse_ancestor", /* PATH_RAY_DIFFUSE_ANCESTOR */
+                       "glossy_ancestor",  /* PATH_RAY_GLOSSY_ANCESTOR */
                };
 
                const int nraytypes = sizeof(raytypes)/sizeof(raytypes[0]);
@@ -543,8 +548,10 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath)
                        current_shader->has_surface_emission = true;
                if(info->has_surface_transparent)
                        current_shader->has_surface_transparent = true;
-               if(info->has_surface_bssrdf)
+               if(info->has_surface_bssrdf) {
                        current_shader->has_surface_bssrdf = true;
+                       current_shader->has_bssrdf_bump = true; /* can't detect yet */
+               }
        }
 }
 
@@ -705,8 +712,11 @@ void OSLCompiler::generate_nodes(const set<ShaderNode*>& nodes)
                                                current_shader->has_surface_emission = true;
                                        if(node->has_surface_transparent())
                                                current_shader->has_surface_transparent = true;
-                                       if(node->has_surface_bssrdf())
+                                       if(node->has_surface_bssrdf()) {
                                                current_shader->has_surface_bssrdf = true;
+                                               if(node->has_bssrdf_bump())
+                                                       current_shader->has_bssrdf_bump = true;
+                                       }
                                }
                                else
                                        nodes_done = false;
@@ -773,6 +783,7 @@ void OSLCompiler::compile(OSLGlobals *og, Shader *shader)
                shader->has_surface_emission = false;
                shader->has_surface_transparent = false;
                shader->has_surface_bssrdf = false;
+               shader->has_bssrdf_bump = false;
                shader->has_volume = false;
                shader->has_displacement = false;
 
index 5b326e0a017a84c66ab4511ba793478056d0517e..75b3b193e7647ef70c31df39e7dd15c4bfc7b6a4 100644 (file)
@@ -55,6 +55,7 @@ Shader::Shader()
        has_converter_blackbody = false;
        has_volume = false;
        has_displacement = false;
+       has_bssrdf_bump = false;
 
        used = false;
 
@@ -236,11 +237,19 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc
                        flag |= SD_HOMOGENEOUS_VOLUME;
                if(shader->has_surface_bssrdf)
                        has_surface_bssrdf = true;
+               if(shader->has_bssrdf_bump)
+                       flag |= SD_HAS_BSSRDF_BUMP;
                if(shader->has_converter_blackbody)
                        has_converter_blackbody = true;
 
+               /* regular shader */
                shader_flag[i++] = flag;
                shader_flag[i++] = shader->pass_id;
+
+               /* shader with bump mapping */
+               if(shader->graph_bump)
+                       flag |= SD_HAS_BSSRDF_BUMP;
+
                shader_flag[i++] = flag;
                shader_flag[i++] = shader->pass_id;
        }
index d7eac603fa65cbf0c1696ae772d8db793d46a1c9..146b94c9ef5ab704b9a4b65ecc04b043306ba8ad 100644 (file)
@@ -78,6 +78,7 @@ public:
        bool has_displacement;
        bool has_surface_bssrdf;
        bool has_converter_blackbody;
+       bool has_bssrdf_bump;
 
        /* requested mesh attributes */
        AttributeRequestSet attributes;
index 4e6171554654fe56685865bddf3b793ec92fdd37..9580823d1414188b19b63c03e0c3167676cd47d2 100644 (file)
@@ -495,8 +495,11 @@ void SVMCompiler::generate_closure(ShaderNode *node, set<ShaderNode*>& done)
                        current_shader->has_surface_emission = true;
                if(node->has_surface_transparent())
                        current_shader->has_surface_transparent = true;
-               if(node->has_surface_bssrdf())
+               if(node->has_surface_bssrdf()) {
                        current_shader->has_surface_bssrdf = true;
+                       if(node->has_bssrdf_bump())
+                               current_shader->has_bssrdf_bump = true;
+               }
 
                /* end node is added outside of this */
        }
@@ -557,8 +560,11 @@ void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& don
                        current_shader->has_surface_emission = true;
                if(node->has_surface_transparent())
                        current_shader->has_surface_transparent = true;
-               if(node->has_surface_bssrdf())
+               if(node->has_surface_bssrdf()) {
                        current_shader->has_surface_bssrdf = true;
+                       if(node->has_bssrdf_bump())
+                               current_shader->has_bssrdf_bump = true;
+               }
        }
 
        done.insert(node);
@@ -676,6 +682,7 @@ void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int in
        shader->has_surface_emission = false;
        shader->has_surface_transparent = false;
        shader->has_surface_bssrdf = false;
+       shader->has_bssrdf_bump = false;
        shader->has_converter_blackbody = false;
        shader->has_volume = false;
        shader->has_displacement = false;
index cde547cd77ce8f73b8ef841882669af048b0f4c0..32ce821624d676b121e1eb178b64f87c08b38664 100644 (file)
@@ -1078,6 +1078,19 @@ __device_inline float triangle_area(const float3 v1, const float3 v2, const floa
 
 __device_inline void make_orthonormals(const float3 N, float3 *a, float3 *b)
 {
+#if 0
+       if(fabsf(N.y) >= 0.999f) {
+               *a = make_float3(1, 0, 0);
+               *b = make_float3(0, 0, 1);
+               return;
+       }
+       if(fabsf(N.z) >= 0.999f) {
+               *a = make_float3(1, 0, 0);
+               *b = make_float3(0, 1, 0);
+               return;
+       }
+#endif
+
        if(N.x != N.y || N.x != N.z)
                *a = make_float3(N.z-N.y, N.x-N.z, N.y-N.x);  //(1,1,1)x N
        else
@@ -1161,6 +1174,11 @@ __device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle)
 
 /* NaN-safe math ops */
 
+__device_inline float safe_sqrtf(float f)
+{
+       return sqrtf(max(f, 0.0f));
+}
+
 __device float safe_asinf(float a)
 {
        if(a <= -1.0f)
index 5cc22b25f72e7613fbd1fcb84f16614a7241fc4a..a011657ce51f952e99a284de592a5f21081f6416 100644 (file)
@@ -907,6 +907,11 @@ static void node_shader_buts_glossy(uiLayout *layout, bContext *UNUSED(C), Point
        uiItemR(layout, ptr, "distribution", 0, "", ICON_NONE);
 }
 
+static void node_shader_buts_subsurface(uiLayout *layout, bContext *UNUSED(C), PointerRNA *ptr)
+{
+       uiItemR(layout, ptr, "falloff", 0, "", ICON_NONE);
+}
+
 static void node_shader_buts_toon(uiLayout *layout, bContext *UNUSED(C), PointerRNA *ptr)
 {
        uiItemR(layout, ptr, "component", 0, "", ICON_NONE);
@@ -1039,6 +1044,9 @@ static void node_shader_set_butfunc(bNodeType *ntype)
                case SH_NODE_BSDF_REFRACTION:
                        ntype->uifunc = node_shader_buts_glossy;
                        break;
+               case SH_NODE_SUBSURFACE_SCATTERING:
+                       ntype->uifunc = node_shader_buts_subsurface;
+                       break;
                case SH_NODE_BSDF_TOON:
                        ntype->uifunc = node_shader_buts_toon;
                        break;
index fda696f2e28b6763cea1aa3e4753ee682d95a21d..6984d0299e8bb03a44dcb12e147db45c683c935c 100644 (file)
@@ -965,6 +965,11 @@ typedef struct NodeShaderNormalMap {
 #define SHD_NORMAL_MAP_BLENDER_OBJECT  3
 #define SHD_NORMAL_MAP_BLENDER_WORLD   4
 
+/* tangent */
+#define SHD_SUBSURFACE_COMPATIBLE              0
+#define SHD_SUBSURFACE_CUBIC                   1
+#define SHD_SUBSURFACE_GAUSSIAN                        2
+
 /* blur node */
 #define CMP_NODE_BLUR_ASPECT_NONE              0
 #define CMP_NODE_BLUR_ASPECT_Y                 1
index b4576509098d4d8f7d626d8d1004a69befb8c023..7be333247a616c5962c6a998b7b9ccddbd0f4b8b 100644 (file)
@@ -3640,6 +3640,25 @@ static void def_sh_tangent(StructRNA *srna)
        RNA_def_struct_sdna_from(srna, "bNode", NULL);
 }
 
+
+static void def_sh_subsurface(StructRNA *srna)
+{
+       static EnumPropertyItem prop_subsurface_falloff_items[] = {
+               {SHD_SUBSURFACE_COMPATIBLE, "COMPATIBLE", 0, "Compatible", "Subsurface scattering falloff compatible with previous versions"},
+               {SHD_SUBSURFACE_CUBIC, "CUBIC", 0, "Cubic", "Simple cubic falloff function"},
+               {SHD_SUBSURFACE_GAUSSIAN, "GAUSSIAN", 0, "Gaussian", "Normal distribution, multiple can be combined to fit more complex profiles"},
+               {0, NULL, 0, NULL, NULL}
+       };
+
+       PropertyRNA *prop;
+       
+       prop = RNA_def_property(srna, "falloff", PROP_ENUM, PROP_NONE);
+       RNA_def_property_enum_sdna(prop, NULL, "custom1");
+       RNA_def_property_enum_items(prop, prop_subsurface_falloff_items);
+       RNA_def_property_ui_text(prop, "Falloff", "Function to determine how much light nearby points contribute based on their distance to the shading point");
+       RNA_def_property_update(prop, NC_NODE | NA_EDITED, "rna_Node_update");
+}
+
 static void def_sh_script(StructRNA *srna)
 {
        PropertyRNA *prop;
index eeec40c911f719f5e67f42336fba57d9a0ac43f8..6e2c30f98cf1da82e3e7e62ed89f236c9039a1a0 100644 (file)
@@ -86,7 +86,7 @@ DefNode( ShaderNode,     SH_NODE_BSDF_TRANSLUCENT,   0,                      "BS
 DefNode( ShaderNode,     SH_NODE_BSDF_TRANSPARENT,   0,                      "BSDF_TRANSPARENT",   BsdfTransparent,  "Transparent BSDF",  ""       )
 DefNode( ShaderNode,     SH_NODE_BSDF_VELVET,        0,                      "BSDF_VELVET",        BsdfVelvet,       "Velvet BSDF",       ""       )
 DefNode( ShaderNode,     SH_NODE_BSDF_TOON,          def_toon,               "BSDF_TOON",          BsdfToon,         "Toon BSDF",         ""       )
-DefNode( ShaderNode,     SH_NODE_SUBSURFACE_SCATTERING, 0,                   "SUBSURFACE_SCATTERING",SubsurfaceScattering,"Subsurface Scattering","")
+DefNode( ShaderNode,     SH_NODE_SUBSURFACE_SCATTERING, def_sh_subsurface,   "SUBSURFACE_SCATTERING",SubsurfaceScattering,"Subsurface Scattering","")
 DefNode( ShaderNode,     SH_NODE_VOLUME_TRANSPARENT, 0,                      "VOLUME_TRANSPARENT", VolumeTransparent,"Transparent Volume",""       )
 DefNode( ShaderNode,     SH_NODE_VOLUME_ISOTROPIC,   0,                      "VOLUME_ISOTROPIC",   VolumeIsotropic,  "Isotropic Volume",  ""       )
 DefNode( ShaderNode,     SH_NODE_EMISSION,           0,                      "EMISSION",           Emission,         "Emission",          ""       )
index 3bdc3813fd79934b4077d444c84fb22c28cf81cc..8ad529d192e5e6201df18ce1d184f03392fc9eef 100644 (file)
 /* **************** OUTPUT ******************** */
 
 static bNodeSocketTemplate sh_node_subsurface_scattering_in[] = {
-       {       SOCK_RGBA, 1, N_("Color"),              0.8f, 0.8f, 0.8f, 1.0f, 0.0f, 1.0f},
-       {       SOCK_FLOAT, 1, N_("Scale"),             1.0, 0.0f, 0.0f, 0.0f, 0.0f, 1000.0f},
-       {       SOCK_VECTOR, 1, N_("Radius"),   1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 100.0f},
-       //{     SOCK_FLOAT, 1, N_("IOR"),               1.3f, 0.0f, 0.0f, 0.0f, 1.0f, 1000.0f},
-       {       SOCK_VECTOR, 1, N_("Normal"),   0.0f, 0.0f, 0.0f, 1.0f, -1.0f, 1.0f, PROP_NONE, SOCK_HIDE_VALUE},
+       {       SOCK_RGBA, 1, N_("Color"),                      0.8f, 0.8f, 0.8f, 1.0f, 0.0f, 1.0f},
+       {       SOCK_FLOAT, 1, N_("Scale"),                     1.0, 0.0f, 0.0f, 0.0f, 0.0f, 1000.0f},
+       {       SOCK_VECTOR, 1, N_("Radius"),           1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 100.0f},
+       {       SOCK_FLOAT, 1, N_("Texture Blur"),      0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f},
+       {       SOCK_VECTOR, 1, N_("Normal"),           0.0f, 0.0f, 0.0f, 1.0f, -1.0f, 1.0f, PROP_NONE, SOCK_HIDE_VALUE},
        {       -1, 0, ""       }
 };