Fix T54105: random walk SSS missing in branched indirect paths.
authorBrecht Van Lommel <brechtvanlommel@gmail.com>
Mon, 19 Feb 2018 23:15:14 +0000 (00:15 +0100)
committerBrecht Van Lommel <brechtvanlommel@gmail.com>
Wed, 21 Feb 2018 16:56:26 +0000 (17:56 +0100)
Unify the path and branched path indirect SSS code. No performance impact
found on CUDA, for AMD split kernel the extra code was already there.

intern/cycles/kernel/kernel_path.h
intern/cycles/kernel/kernel_subsurface.h
intern/cycles/kernel/split/kernel_indirect_subsurface.h
intern/cycles/kernel/split/kernel_split_common.h
intern/cycles/kernel/split/kernel_subsurface_scatter.h

index dbbb80ca37ff7192452b86bd401176532dc8adc3..4728a25a3bc6a8bb5c044a1a34a39a050f8db478 100644 (file)
@@ -400,6 +400,13 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                                      PathState *state,
                                      PathRadiance *L)
 {
+#ifdef __SUBSURFACE__
+       SubsurfaceIndirectRays ss_indirect;
+       kernel_path_subsurface_init_indirect(&ss_indirect);
+
+       for(;;) {
+#endif  /* __SUBSURFACE__ */
+
        /* path iteration */
        for(;;) {
                /* Find intersection with objects in scene. */
@@ -485,29 +492,21 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                }
 #endif  /* __AO__ */
 
+
 #ifdef __SUBSURFACE__
                /* bssrdf scatter to a different location on the same object, replacing
                 * the closures with a diffuse BSDF */
                if(sd->flag & SD_BSSRDF) {
-                       float bssrdf_u, bssrdf_v;
-                       path_state_rng_2D(kg,
-                                         state,
-                                         PRNG_BSDF_U,
-                                         &bssrdf_u, &bssrdf_v);
-
-                       const ShaderClosure *sc = shader_bssrdf_pick(sd, &throughput, &bssrdf_u);
-
-                       /* do bssrdf scatter step if we picked a bssrdf closure */
-                       if(sc) {
-                               uint lcg_state = lcg_state_init(state, 0x68bc21eb);
-
-                               subsurface_scatter_step(kg,
-                                                       sd,
-                                                       state,
-                                                       sc,
-                                                       &lcg_state,
-                                                       bssrdf_u, bssrdf_v,
-                                                       false);
+                       if(kernel_path_subsurface_scatter(kg,
+                                                         sd,
+                                                         emission_sd,
+                                                         L,
+                                                         state,
+                                                         ray,
+                                                         &throughput,
+                                                         &ss_indirect))
+                       {
+                               break;
                        }
                }
 #endif  /* __SUBSURFACE__ */
@@ -530,6 +529,24 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                if(!kernel_path_surface_bounce(kg, sd, &throughput, state, &L->state, ray))
                        break;
        }
+
+#ifdef __SUBSURFACE__
+               /* Trace indirect subsurface rays by restarting the loop. this uses less
+                * stack memory than invoking kernel_path_indirect.
+                */
+               if(ss_indirect.num_rays) {
+                       kernel_path_subsurface_setup_indirect(kg,
+                                                             &ss_indirect,
+                                                             state,
+                                                             ray,
+                                                             L,
+                                                             &throughput);
+               }
+               else {
+                       break;
+               }
+       }
+#endif  /* __SUBSURFACE__ */
 }
 
 #endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */
index 80dda31c61e17c7931a5c71013291e4a7637c54f..134f362f5e6ca350ba523316ca66c5efedc7dad1 100644 (file)
@@ -69,44 +69,42 @@ ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd,
 }
 
 /* replace closures with a single diffuse bsdf closure after scatter step */
-ccl_device void subsurface_scatter_setup_diffuse_bsdf(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, float3 weight, bool hit, float3 N)
+ccl_device void subsurface_scatter_setup_diffuse_bsdf(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, float3 weight, float3 N)
 {
        sd->flag &= ~SD_CLOSURE_FLAGS;
        sd->num_closure = 0;
        sd->num_closure_left = kernel_data.integrator.max_closures;
 
-       if(hit) {
-               Bssrdf *bssrdf = (Bssrdf *)sc;
+       Bssrdf *bssrdf = (Bssrdf *)sc;
 #ifdef __PRINCIPLED__
-               if(bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
-                  bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
-               {
-                       PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), weight);
-
-                       if(bsdf) {
-                               bsdf->N = N;
-                               bsdf->roughness = bssrdf->roughness;
-                               sd->flag |= bsdf_principled_diffuse_setup(bsdf);
-
-                               /* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes
-                                * can recognize it as not being a regular Disney principled diffuse closure */
-                               bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
-                       }
+       if(bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
+          bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
+       {
+               PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), weight);
+
+               if(bsdf) {
+                       bsdf->N = N;
+                       bsdf->roughness = bssrdf->roughness;
+                       sd->flag |= bsdf_principled_diffuse_setup(bsdf);
+
+                       /* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes
+                        * can recognize it as not being a regular Disney principled diffuse closure */
+                       bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
                }
-               else if(CLOSURE_IS_BSDF_BSSRDF(bssrdf->type) ||
-                       CLOSURE_IS_BSSRDF(bssrdf->type))
+       }
+       else if(CLOSURE_IS_BSDF_BSSRDF(bssrdf->type) ||
+                       CLOSURE_IS_BSSRDF(bssrdf->type))
 #endif  /* __PRINCIPLED__ */
-               {
-                       DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
+       {
+               DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
 
-                       if(bsdf) {
-                               bsdf->N = N;
-                               sd->flag |= bsdf_diffuse_setup(bsdf);
+               if(bsdf) {
+                       bsdf->N = N;
+                       sd->flag |= bsdf_diffuse_setup(bsdf);
 
-                               /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
-                                * can recognize it as not being a regular diffuse closure */
-                               bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
-                       }
+                       /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
+                        * can recognize it as not being a regular diffuse closure */
+                       bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
                }
        }
 }
@@ -334,104 +332,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
        subsurface_color_bump_blur(kg, sd, state, &weight, &N);
 
        /* Setup diffuse BSDF. */
-       subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, weight, true, N);
-}
-
-/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
-ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state,
-       const ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
-{
-       float3 eval = make_float3(0.0f, 0.0f, 0.0f);
-
-       /* pick random axis in local frame and point on disk */
-       float3 disk_N, disk_T, disk_B;
-       float pick_pdf_N, pick_pdf_T, pick_pdf_B;
-
-       disk_N = sd->Ng;
-       make_orthonormals(disk_N, &disk_T, &disk_B);
-
-       if(disk_v < 0.5f) {
-               pick_pdf_N = 0.5f;
-               pick_pdf_T = 0.25f;
-               pick_pdf_B = 0.25f;
-               disk_v *= 2.0f;
-       }
-       else if(disk_v < 0.75f) {
-               float3 tmp = disk_N;
-               disk_N = disk_T;
-               disk_T = tmp;
-               pick_pdf_N = 0.25f;
-               pick_pdf_T = 0.5f;
-               pick_pdf_B = 0.25f;
-               disk_v = (disk_v - 0.5f)*4.0f;
-       }
-       else {
-               float3 tmp = disk_N;
-               disk_N = disk_B;
-               disk_B = tmp;
-               pick_pdf_N = 0.25f;
-               pick_pdf_T = 0.25f;
-               pick_pdf_B = 0.5f;
-               disk_v = (disk_v - 0.75f)*4.0f;
-       }
-
-       /* sample point on disk */
-       float phi = M_2PI_F * disk_v;
-       float disk_height, disk_r;
-
-       bssrdf_sample(sc, disk_u, &disk_r, &disk_height);
-
-       float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
-
-       /* create ray */
-       Ray ray;
-       ray.P = sd->P + disk_N*disk_height + disk_P;
-       ray.D = -disk_N;
-       ray.t = 2.0f*disk_height;
-       ray.dP = sd->dP;
-       ray.dD = differential3_zero();
-       ray.time = sd->time;
-
-       /* intersect with the same object. if multiple intersections are
-        * found it will randomly pick one of them */
-       LocalIntersection ss_isect;
-       scene_intersect_local(kg, ray, &ss_isect, sd->object, lcg_state, 1);
-
-       /* evaluate bssrdf */
-       if(ss_isect.num_hits > 0) {
-               float3 origP = sd->P;
-
-               /* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */
-#if defined(__SPLIT_KERNEL__) && defined(__KERNEL_OPENCL_AMD__) && defined(__KERNEL_GPU__)
-               kernel_split_params.dummy_sd_flag = sd->flag;
-#endif
-               /* setup new shading point */
-               shader_setup_from_subsurface(kg, sd, &ss_isect.hits[0], &ray);
-
-               /* Probability densities for local frame axes. */
-               float pdf_N = pick_pdf_N * fabsf(dot(disk_N, sd->Ng));
-               float pdf_T = pick_pdf_T * fabsf(dot(disk_T, sd->Ng));
-               float pdf_B = pick_pdf_B * fabsf(dot(disk_B, sd->Ng));
-
-               /* Multiple importance sample between 3 axes, power heuristic
-                * found to be slightly better than balance heuristic. pdf_N
-                * in the MIS weight and denominator cancelled out. */
-               float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
-               w *= ss_isect.num_hits;
-
-               /* Real distance to sampled point. */
-               float r = len(sd->P - origP);
-
-               /* Evaluate profiles. */
-               eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
-       }
-
-       /* optionally blur colors and bump mapping */
-       float3 N = sd->N;
-       subsurface_color_bump_blur(kg, sd, state, &eval, &N);
-
-       /* setup diffuse bsdf */
-       subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, eval, (ss_isect.num_hits > 0), N);
+       subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, weight, N);
 }
 
 /* Random walk subsurface scattering.
index e9fe5552e8cde90fdbd09178c4d5983dda2e8803..b65f3d1a9402aa5fe83f82c3736c583e5cafc485 100644 (file)
@@ -49,28 +49,22 @@ ccl_device void kernel_indirect_subsurface(KernelGlobals *kg)
        ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
        ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
 
-#ifdef __BRANCHED_PATH__
-       if(!kernel_data.integrator.branched) {
-#endif
-               if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
-                       ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+       if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+               ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
 
-                       /* Trace indirect subsurface rays by restarting the loop. this uses less
-                        * stack memory than invoking kernel_path_indirect.
-                        */
-                       if(ss_indirect->num_rays) {
-                               kernel_path_subsurface_setup_indirect(kg,
-                                                                         ss_indirect,
-                                                                         state,
-                                                                         ray,
-                                                                         L,
-                                                                         throughput);
-                               ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
-                       }
+               /* Trace indirect subsurface rays by restarting the loop. this uses less
+                * stack memory than invoking kernel_path_indirect.
+                */
+               if(ss_indirect->num_rays) {
+                       kernel_path_subsurface_setup_indirect(kg,
+                                                                                                 ss_indirect,
+                                                                                                 state,
+                                                                                                 ray,
+                                                                                                 L,
+                                                                                                 throughput);
+                       ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
                }
-#ifdef __BRANCHED_PATH__
        }
-#endif
 
 #endif  /* __SUBSURFACE__ */
 
index 21886ee62ee2e0aebaf9c800617bd1237c2bcbfd..b52e7bddc8294e72c9129dc55c48b257b5d29c49 100644 (file)
@@ -59,7 +59,12 @@ ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index)
        ccl_global char *ray_state = kernel_split_state.ray_state;
 
 #ifdef __BRANCHED_PATH__
-       if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) {
+       ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+
+       if(ss_indirect->num_rays) {
+               ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+       }
+       else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) {
                int orig_ray = kernel_split_state.branched_state[ray_index].original_ray;
 
                PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
index e50d63ea3bc6d5900463ebc8cd502b5f3e94d804..af0303d860890d71557cfb95b5fba5c075f8a726 100644 (file)
@@ -228,7 +228,9 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
                if(sd->flag & SD_BSSRDF) {
 
 #ifdef __BRANCHED_PATH__
-                       if(!kernel_data.integrator.branched) {
+                       if(!kernel_data.integrator.branched ||
+                          IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT))
+                       {
 #endif
                                if(kernel_path_subsurface_scatter(kg,
                                                                  sd,
@@ -243,27 +245,6 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
                                }
 #ifdef __BRANCHED_PATH__
                        }
-                       else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
-                               float bssrdf_u, bssrdf_v;
-                               path_state_rng_2D(kg,
-                                                 state,
-                                                 PRNG_BSDF_U,
-                                                 &bssrdf_u, &bssrdf_v);
-
-                               const ShaderClosure *sc = shader_bssrdf_pick(sd, throughput, &bssrdf_u);
-
-                               /* do bssrdf scatter step if we picked a bssrdf closure */
-                               if(sc) {
-                                       uint lcg_state = lcg_state_init_addrspace(state, 0x68bc21eb);
-                                       subsurface_scatter_step(kg,
-                                                               sd,
-                                                               state,
-                                                               sc,
-                                                               &lcg_state,
-                                                               bssrdf_u, bssrdf_v,
-                                                               false);
-                               }
-                       }
                        else {
                                kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index);