Code refactor: tweaks in SSS code to prepare for coming changes.
authorBrecht Van Lommel <brechtvanlommel@gmail.com>
Thu, 8 Feb 2018 11:45:12 +0000 (12:45 +0100)
committerBrecht Van Lommel <brechtvanlommel@gmail.com>
Thu, 8 Feb 2018 15:56:11 +0000 (16:56 +0100)
This also fixes a subtle bug in the split kernel branched path SSS, the
volume stack update can't be shared between multiple hit points.

intern/cycles/kernel/kernel_path.h
intern/cycles/kernel/kernel_path_branched.h
intern/cycles/kernel/kernel_path_subsurface.h
intern/cycles/kernel/kernel_subsurface.h
intern/cycles/kernel/split/kernel_split_data_types.h
intern/cycles/kernel/split/kernel_subsurface_scatter.h

index 1e98bca66ad5ae6a48ee6c82c3d977d5020d9732..afca4575331c93c2cd7f67d9db5c77c05c3cc2fa 100644 (file)
@@ -504,7 +504,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                                subsurface_scatter_step(kg,
                                                        sd,
                                                        state,
-                                                       state->flag,
                                                        sc,
                                                        &lcg_state,
                                                        bssrdf_u, bssrdf_v,
index 70d0292c5a5c25c9b39d8f637993e5a7be031ae1..5f917d509ec90805f46672c7c58529406c224e89 100644 (file)
@@ -340,9 +340,13 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
                /* do subsurface scatter step with copy of shader data, this will
                 * replace the BSSRDF with a diffuse BSDF closure */
                for(int j = 0; j < num_samples; j++) {
+                       PathState hit_state = *state;
+                       path_state_branch(&hit_state, j, num_samples);
+                       hit_state.rng_hash = bssrdf_rng_hash;
+
                        LocalIntersection ss_isect;
                        float bssrdf_u, bssrdf_v;
-                       path_branched_rng_2D(kg, bssrdf_rng_hash, state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+                       path_state_rng_2D(kg, &hit_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
                        int num_hits = subsurface_scatter_multi_intersect(kg,
                                                                          &ss_isect,
                                                                          sd,
@@ -350,6 +354,9 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
                                                                          &lcg_state,
                                                                          bssrdf_u, bssrdf_v,
                                                                          true);
+
+                       hit_state.rng_offset += PRNG_BOUNCE_NUM;
+
 #ifdef __VOLUME__
                        Ray volume_ray = *ray;
                        bool need_update_volume_stack =
@@ -364,15 +371,8 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
                                                               &ss_isect,
                                                               hit,
                                                               &bssrdf_sd,
-                                                              state,
-                                                              state->flag,
-                                                              sc,
-                                                              true);
-
-                               PathState hit_state = *state;
-
-                               path_state_branch(&hit_state, j, num_samples);
-                               hit_state.rng_offset += PRNG_BOUNCE_NUM;
+                                                              &hit_state,
+                                                              sc);
 
 #ifdef __VOLUME__
                                if(need_update_volume_stack) {
@@ -381,6 +381,10 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
                                        volume_ray.D = normalize_len(P - volume_ray.P,
                                                                     &volume_ray.t);
 
+                                       for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
+                                               hit_state.volume_stack[k] = state->volume_stack[k];
+                                       }
+
                                        kernel_volume_stack_update_for_subsurface(
                                            kg,
                                            emission_sd,
@@ -393,7 +397,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
                                /* direct light */
                                if(kernel_data.integrator.use_direct_light) {
                                        int all = (kernel_data.integrator.sample_all_lights_direct) ||
-                                                 (state->flag & PATH_RAY_SHADOW_CATCHER);
+                                                 (hit_state.flag & PATH_RAY_SHADOW_CATCHER);
                                        kernel_branched_path_surface_connect_light(
                                                kg,
                                                &bssrdf_sd,
index c29b41e42222e8311f6461772824a0a0b197d4ce..a48bde6443e7796377aa4550c7342018f94adf3a 100644 (file)
@@ -71,9 +71,7 @@ bool kernel_path_subsurface_scatter(
                                                       hit,
                                                       sd,
                                                       state,
-                                                      state->flag,
-                                                      sc,
-                                                      false);
+                                                      sc);
 
                        kernel_path_surface_connect_light(kg, sd, emission_sd, *throughput, state, L);
 
index 582a20704d34d4e893c122a248d12716a5dbf347..f4759b26191e25b3c5b396439be9fded9528d769 100644 (file)
@@ -135,7 +135,6 @@ ccl_device float3 subsurface_color_pow(float3 color, float exponent)
 ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
                                            ShaderData *sd,
                                            ccl_addr_space PathState *state,
-                                           int state_flag,
                                            float3 *eval,
                                            float3 *N)
 {
@@ -148,7 +147,7 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
 
        if(bump || texture_blur > 0.0f) {
                /* average color and normal at incoming point */
-               shader_eval_surface(kg, sd, state, state_flag, kernel_data.integrator.max_closures);
+               shader_eval_surface(kg, sd, state, state->flag, kernel_data.integrator.max_closures);
                float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL);
 
                /* we simply divide out the average color and multiply with the average
@@ -311,9 +310,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
         int hit,
         ShaderData *sd,
         ccl_addr_space PathState *state,
-        int state_flag,
-        const ShaderClosure *sc,
-        bool all)
+        const ShaderClosure *sc)
 {
 #ifdef __SPLIT_KERNEL__
        Ray ray_object = ss_isect->ray;
@@ -333,7 +330,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
        /* Optionally blur colors and bump mapping. */
        float3 weight = ss_isect->weight[hit];
        float3 N = sd->N;
-       subsurface_color_bump_blur(kg, sd, state, state_flag, &weight, &N);
+       subsurface_color_bump_blur(kg, sd, state, &weight, &N);
 
        /* Setup diffuse BSDF. */
        subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, weight, true, N);
@@ -341,7 +338,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
 
 /* subsurface scattering step, from a point on the surface to another nearby point on the same object */
 ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state,
-       int state_flag, const ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
+       const ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
 {
        float3 eval = make_float3(0.0f, 0.0f, 0.0f);
 
@@ -430,7 +427,7 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_a
 
        /* optionally blur colors and bump mapping */
        float3 N = sd->N;
-       subsurface_color_bump_blur(kg, sd, state, state_flag, &eval, &N);
+       subsurface_color_bump_blur(kg, sd, state, &eval, &N);
 
        /* setup diffuse bsdf */
        subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, eval, (ss_isect.num_hits > 0), N);
index 5f40fdc9240558d954504ffc0d8d5fa5f895a4d8..56194d9f857fd094446ba2bc6743038022a1a072 100644 (file)
@@ -67,10 +67,6 @@ typedef ccl_global struct SplitBranchedState {
 
        uint lcg_state;
        LocalIntersection ss_isect;
-
-#  ifdef __VOLUME__
-       VolumeStack volume_stack[VOLUME_STACK_SIZE];
-#  endif  /* __VOLUME__ */
 #endif  /*__SUBSURFACE__ */
 
        int shared_sample_count; /* number of branched samples shared with other threads */
index 993e8d4d477182ce5d088ce2064607aa271a8b72..f902d0009187aed4c9345acb25eaaebc53be876d 100644 (file)
@@ -61,11 +61,16 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
                /* do subsurface scatter step with copy of shader data, this will
                 * replace the BSSRDF with a diffuse BSDF closure */
                for(int j = branched_state->ss_next_sample; j < num_samples; j++) {
+                       ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index];
+                       *hit_state = branched_state->path_state;
+                       hit_state->rng_hash = bssrdf_rng_hash;
+                       path_state_branch(hit_state, j, num_samples);
+
                        ccl_global LocalIntersection *ss_isect = &branched_state->ss_isect;
                        float bssrdf_u, bssrdf_v;
                        path_branched_rng_2D(kg,
                                             bssrdf_rng_hash,
-                                            &branched_state->path_state,
+                                            hit_state,
                                             j,
                                             num_samples,
                                             PRNG_BSDF_U,
@@ -89,6 +94,8 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
                                *ss_isect = ss_isect_private;
                        }
 
+                       hit_state->rng_offset += PRNG_BOUNCE_NUM;
+
 #ifdef __VOLUME__
                        Ray volume_ray = branched_state->ray;
                        bool need_update_volume_stack =
@@ -107,39 +114,24 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
                                                               &ss_isect_private,
                                                               hit,
                                                               bssrdf_sd,
-                                                              &branched_state->path_state,
-                                                              branched_state->path_state.flag,
-                                                              sc,
-                                                              true);
+                                                              hit_state,
+                                                              sc);
                                *ss_isect = ss_isect_private;
 
-                               ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index];
-                               *hit_state = branched_state->path_state;
-
-                               path_state_branch(hit_state, j, num_samples);
-                               hit_state->rng_offset += PRNG_BOUNCE_NUM;
-
 #ifdef __VOLUME__
                                if(need_update_volume_stack) {
                                        /* Setup ray from previous surface point to the new one. */
                                        float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng);
                                        volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
 
-                                       /* this next part is expensive as it does scene intersection so only do once */
-                                       if(branched_state->next_closure == 0 && branched_state->next_sample == 0) {
-                                               for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
-                                                       branched_state->volume_stack[k] = hit_state->volume_stack[k];
-                                               }
-
-                                               kernel_volume_stack_update_for_subsurface(kg,
-                                                                                         emission_sd,
-                                                                                         &volume_ray,
-                                                                                         branched_state->volume_stack);
-                                       }
-
                                        for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
-                                               hit_state->volume_stack[k] = branched_state->volume_stack[k];
+                                               hit_state->volume_stack[k] = branched_state->path_state.volume_stack[k];
                                        }
+
+                                       kernel_volume_stack_update_for_subsurface(kg,
+                                                                                 emission_sd,
+                                                                                 &volume_ray,
+                                                                                 hit_state->volume_stack);
                                }
 #endif  /* __VOLUME__ */
 
@@ -148,7 +140,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
                                        /* direct light */
                                        if(kernel_data.integrator.use_direct_light) {
                                                int all = (kernel_data.integrator.sample_all_lights_direct) ||
-                                                             (branched_state->path_state.flag & PATH_RAY_SHADOW_CATCHER);
+                                                             (hit_state->flag & PATH_RAY_SHADOW_CATCHER);
                                                kernel_branched_path_surface_connect_light(kg,
                                                                                           bssrdf_sd,
                                                                                           emission_sd,
@@ -265,7 +257,6 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
                                        subsurface_scatter_step(kg,
                                                                sd,
                                                                state,
-                                                               state->flag,
                                                                sc,
                                                                &lcg_state,
                                                                bssrdf_u, bssrdf_v,