ClangFormat: apply to source, most of intern
[blender.git] / intern / cycles / kernel / kernel_volume.h
index 44c8f79..e024003 100644 (file)
@@ -19,9 +19,9 @@ CCL_NAMESPACE_BEGIN
 /* Events for probalistic scattering */
 
 typedef enum VolumeIntegrateResult {
-       VOLUME_PATH_SCATTERED = 0,
-       VOLUME_PATH_ATTENUATED = 1,
-       VOLUME_PATH_MISSED = 2
+  VOLUME_PATH_SCATTERED = 0,
+  VOLUME_PATH_ATTENUATED = 1,
+  VOLUME_PATH_MISSED = 2
 } VolumeIntegrateResult;
 
 /* Volume shader properties
@@ -30,9 +30,9 @@ typedef enum VolumeIntegrateResult {
  * sigma_t = sigma_a + sigma_s */
 
 typedef struct VolumeShaderCoefficients {
-       float3 sigma_t;
-       float3 sigma_s;
-       float3 emission;
+  float3 sigma_t;
+  float3 sigma_s;
+  float3 emission;
 } VolumeShaderCoefficients;
 
 #ifdef __VOLUME__
@@ -44,16 +44,16 @@ ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
                                                        float3 P,
                                                        float3 *extinction)
 {
-       sd->P = P;
-       shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW);
-
-       if(sd->flag & SD_EXTINCTION) {
-               *extinction = sd->closure_transparent_extinction;
-               return true;
-       }
-       else {
-               return false;
-       }
+  sd->P = P;
+  shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW);
+
+  if (sd->flag & SD_EXTINCTION) {
+    *extinction = sd->closure_transparent_extinction;
+    return true;
+  }
+  else {
+    return false;
+  }
 }
 
 /* evaluate shader to get absorption, scattering and emission at P */
@@ -63,97 +63,97 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
                                             float3 P,
                                             VolumeShaderCoefficients *coeff)
 {
-       sd->P = P;
-       shader_eval_volume(kg, sd, state, state->volume_stack, state->flag);
+  sd->P = P;
+  shader_eval_volume(kg, sd, state, state->volume_stack, state->flag);
 
-       if(!(sd->flag & (SD_EXTINCTION|SD_SCATTER|SD_EMISSION)))
-               return false;
+  if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION)))
+    return false;
 
-       coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
-       coeff->sigma_t = (sd->flag & SD_EXTINCTION)? sd->closure_transparent_extinction:
-                                                    make_float3(0.0f, 0.0f, 0.0f);
-       coeff->emission = (sd->flag & SD_EMISSION)? sd->closure_emission_background:
-                                                   make_float3(0.0f, 0.0f, 0.0f);
+  coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
+  coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction :
+                                                make_float3(0.0f, 0.0f, 0.0f);
+  coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background :
+                                               make_float3(0.0f, 0.0f, 0.0f);
 
-       if(sd->flag & SD_SCATTER) {
-               for(int i = 0; i < sd->num_closure; i++) {
-                       const ShaderClosure *sc = &sd->closure[i];
+  if (sd->flag & SD_SCATTER) {
+    for (int i = 0; i < sd->num_closure; i++) {
+      const ShaderClosure *sc = &sd->closure[i];
 
-                       if(CLOSURE_IS_VOLUME(sc->type))
-                               coeff->sigma_s += sc->weight;
-               }
-       }
+      if (CLOSURE_IS_VOLUME(sc->type))
+        coeff->sigma_s += sc->weight;
+    }
+  }
 
-       return true;
+  return true;
 }
 
-#endif  /* __VOLUME__ */
+#endif /* __VOLUME__ */
 
 ccl_device float3 volume_color_transmittance(float3 sigma, float t)
 {
-       return exp3(-sigma * t);
+  return exp3(-sigma * t);
 }
 
 ccl_device float kernel_volume_channel_get(float3 value, int channel)
 {
-       return (channel == 0)? value.x: ((channel == 1)? value.y: value.z);
+  return (channel == 0) ? value.x : ((channel == 1) ? value.y : value.z);
 }
 
 #ifdef __VOLUME__
 
 ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
 {
-       for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
-               int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
-
-               if(shader_flag & SD_HETEROGENEOUS_VOLUME) {
-                       return true;
-               }
-               else if(shader_flag & SD_NEED_ATTRIBUTES) {
-                       /* We want to render world or objects without any volume grids
-                        * as homogenous, but can only verify this at runtime since other
-                        * heterogenous volume objects may be using the same shader. */
-                       int object = stack[i].object;
-                       if(object != OBJECT_NONE) {
-                               int object_flag = kernel_tex_fetch(__object_flag, object);
-                               if(object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) {
-                                       return true;
-                               }
-                       }
-               }
-       }
-
-       return false;
+  for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
+    int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
+
+    if (shader_flag & SD_HETEROGENEOUS_VOLUME) {
+      return true;
+    }
+    else if (shader_flag & SD_NEED_ATTRIBUTES) {
+      /* We want to render world or objects without any volume grids
+       * as homogenous, but can only verify this at runtime since other
+       * heterogenous volume objects may be using the same shader. */
+      int object = stack[i].object;
+      if (object != OBJECT_NONE) {
+        int object_flag = kernel_tex_fetch(__object_flag, object);
+        if (object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) {
+          return true;
+        }
+      }
+    }
+  }
+
+  return false;
 }
 
 ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack)
 {
-       if(kernel_data.integrator.num_all_lights == 0)
-               return 0;
+  if (kernel_data.integrator.num_all_lights == 0)
+    return 0;
 
-       int method = -1;
+  int method = -1;
 
-       for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
-               int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
+  for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
+    int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
 
-               if(shader_flag & SD_VOLUME_MIS) {
-                       return SD_VOLUME_MIS;
-               }
-               else if(shader_flag & SD_VOLUME_EQUIANGULAR) {
-                       if(method == 0)
-                               return SD_VOLUME_MIS;
+    if (shader_flag & SD_VOLUME_MIS) {
+      return SD_VOLUME_MIS;
+    }
+    else if (shader_flag & SD_VOLUME_EQUIANGULAR) {
+      if (method == 0)
+        return SD_VOLUME_MIS;
 
-                       method = SD_VOLUME_EQUIANGULAR;
-               }
-               else {
-                       if(method == SD_VOLUME_EQUIANGULAR)
-                               return SD_VOLUME_MIS;
+      method = SD_VOLUME_EQUIANGULAR;
+    }
+    else {
+      if (method == SD_VOLUME_EQUIANGULAR)
+        return SD_VOLUME_MIS;
 
-                       method = 0;
-               }
-       }
+      method = 0;
+    }
+  }
 
-       return method;
+  return method;
 }
 
 ccl_device_inline void kernel_volume_step_init(KernelGlobals *kg,
@@ -162,16 +162,16 @@ ccl_device_inline void kernel_volume_step_init(KernelGlobals *kg,
                                                float *step_size,
                                                float *step_offset)
 {
-       const int max_steps = kernel_data.integrator.volume_max_steps;
-       float step = min(kernel_data.integrator.volume_step_size, t);
+  const int max_steps = kernel_data.integrator.volume_max_steps;
+  float step = min(kernel_data.integrator.volume_step_size, t);
 
-       /* compute exact steps in advance for malloc */
-       if(t > max_steps * step) {
-               step = t / (float)max_steps;
-       }
+  /* compute exact steps in advance for malloc */
+  if (t > max_steps * step) {
+    step = t / (float)max_steps;
+  }
 
-       *step_size = step;
-       *step_offset = path_state_rng_1D_hash(kg, state, 0x1e31d8a4) * step;
+  *step_size = step;
+  *step_offset = path_state_rng_1D_hash(kg, state, 0x1e31d8a4) * step;
 }
 
 /* Volume Shadows
@@ -187,10 +187,10 @@ ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg,
                                                  ShaderData *sd,
                                                  float3 *throughput)
 {
-       float3 sigma_t;
+  float3 sigma_t;
 
-       if(volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
-               *throughput *= volume_color_transmittance(sigma_t, ray->t);
+  if (volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
+    *throughput *= volume_color_transmittance(sigma_t, ray->t);
 }
 
 /* heterogeneous volume: integrate stepping through the volume until we
@@ -201,57 +201,57 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
                                                    ShaderData *sd,
                                                    float3 *throughput)
 {
-       float3 tp = *throughput;
-       const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
-
-       /* prepare for stepping */
-       int max_steps = kernel_data.integrator.volume_max_steps;
-       float step_offset, step_size;
-       kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
-
-       /* compute extinction at the start */
-       float t = 0.0f;
-
-       float3 sum = make_float3(0.0f, 0.0f, 0.0f);
-
-       for(int i = 0; i < max_steps; i++) {
-               /* advance to new position */
-               float new_t = min(ray->t, (i+1) * step_size);
-
-               /* use random position inside this segment to sample shader, adjust
-                * for last step that is shorter than other steps. */
-               if(new_t == ray->t) {
-                       step_offset *= (new_t - t) / step_size;
-               }
-
-               float3 new_P = ray->P + ray->D * (t + step_offset);
-               float3 sigma_t;
-
-               /* compute attenuation over segment */
-               if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
-                       /* Compute expf() only for every Nth step, to save some calculations
-                        * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
-
-                       sum += (-sigma_t * (new_t - t));
-                       if((i & 0x07) == 0) { /* ToDo: Other interval? */
-                               tp = *throughput * exp3(sum);
-
-                               /* stop if nearly all light is blocked */
-                               if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
-                                       break;
-                       }
-               }
-
-               /* stop if at the end of the volume */
-               t = new_t;
-               if(t == ray->t) {
-                       /* Update throughput in case we haven't done it above */
-                       tp = *throughput * exp3(sum);
-                       break;
-               }
-       }
-
-       *throughput = tp;
+  float3 tp = *throughput;
+  const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
+
+  /* prepare for stepping */
+  int max_steps = kernel_data.integrator.volume_max_steps;
+  float step_offset, step_size;
+  kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
+
+  /* compute extinction at the start */
+  float t = 0.0f;
+
+  float3 sum = make_float3(0.0f, 0.0f, 0.0f);
+
+  for (int i = 0; i < max_steps; i++) {
+    /* advance to new position */
+    float new_t = min(ray->t, (i + 1) * step_size);
+
+    /* use random position inside this segment to sample shader, adjust
+     * for last step that is shorter than other steps. */
+    if (new_t == ray->t) {
+      step_offset *= (new_t - t) / step_size;
+    }
+
+    float3 new_P = ray->P + ray->D * (t + step_offset);
+    float3 sigma_t;
+
+    /* compute attenuation over segment */
+    if (volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
+      /* Compute expf() only for every Nth step, to save some calculations
+       * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
+
+      sum += (-sigma_t * (new_t - t));
+      if ((i & 0x07) == 0) { /* ToDo: Other interval? */
+        tp = *throughput * exp3(sum);
+
+        /* stop if nearly all light is blocked */
+        if (tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
+          break;
+      }
+    }
+
+    /* stop if at the end of the volume */
+    t = new_t;
+    if (t == ray->t) {
+      /* Update throughput in case we haven't done it above */
+      tp = *throughput * exp3(sum);
+      break;
+    }
+  }
+
+  *throughput = tp;
 }
 
 /* get the volume attenuation over line segment defined by ray, with the
@@ -262,422 +262,433 @@ ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
                                               Ray *ray,
                                               float3 *throughput)
 {
-       shader_setup_from_volume(kg, shadow_sd, ray);
+  shader_setup_from_volume(kg, shadow_sd, ray);
 
-       if(volume_stack_is_heterogeneous(kg, state->volume_stack))
-               kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
-       else
-               kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
+  if (volume_stack_is_heterogeneous(kg, state->volume_stack))
+    kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
+  else
+    kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
 }
 
-#endif  /* __VOLUME__ */
+#endif /* __VOLUME__ */
 
 /* Equi-angular sampling as in:
  * "Importance Sampling Techniques for Path Tracing in Participating Media" */
 
 ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf)
 {
-       float t = ray->t;
-
-       float delta = dot((light_P - ray->P) , ray->D);
-       float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
-       if(UNLIKELY(D == 0.0f)) {
-               *pdf = 0.0f;
-               return 0.0f;
-       }
-       float theta_a = -atan2f(delta, D);
-       float theta_b = atan2f(t - delta, D);
-       float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
-       if(UNLIKELY(theta_b == theta_a)) {
-               *pdf = 0.0f;
-               return 0.0f;
-       }
-       *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
-
-       return min(t, delta + t_); /* min is only for float precision errors */
+  float t = ray->t;
+
+  float delta = dot((light_P - ray->P), ray->D);
+  float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+  if (UNLIKELY(D == 0.0f)) {
+    *pdf = 0.0f;
+    return 0.0f;
+  }
+  float theta_a = -atan2f(delta, D);
+  float theta_b = atan2f(t - delta, D);
+  float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
+  if (UNLIKELY(theta_b == theta_a)) {
+    *pdf = 0.0f;
+    return 0.0f;
+  }
+  *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
+
+  return min(t, delta + t_); /* min is only for float precision errors */
 }
 
 ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
 {
-       float delta = dot((light_P - ray->P) , ray->D);
-       float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
-       if(UNLIKELY(D == 0.0f)) {
-               return 0.0f;
-       }
+  float delta = dot((light_P - ray->P), ray->D);
+  float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+  if (UNLIKELY(D == 0.0f)) {
+    return 0.0f;
+  }
 
-       float t = ray->t;
-       float t_ = sample_t - delta;
+  float t = ray->t;
+  float t_ = sample_t - delta;
 
-       float theta_a = -atan2f(delta, D);
-       float theta_b = atan2f(t - delta, D);
-       if(UNLIKELY(theta_b == theta_a)) {
-               return 0.0f;
-       }
+  float theta_a = -atan2f(delta, D);
+  float theta_b = atan2f(t - delta, D);
+  if (UNLIKELY(theta_b == theta_a)) {
+    return 0.0f;
+  }
 
-       float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
+  float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
 
-       return pdf;
+  return pdf;
 }
 
 /* Distance sampling */
 
-ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
+ccl_device float kernel_volume_distance_sample(
+    float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
 {
-       /* xi is [0, 1[ so log(0) should never happen, division by zero is
-        * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
-       float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
-       float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
-       float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
+  /* xi is [0, 1[ so log(0) should never happen, division by zero is
+   * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
+  float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
+  float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
+  float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
 
-       float sample_t = min(max_t, -logf(1.0f - xi*(1.0f - sample_transmittance))/sample_sigma_t);
+  float sample_t = min(max_t, -logf(1.0f - xi * (1.0f - sample_transmittance)) / sample_sigma_t);
 
-       *transmittance = volume_color_transmittance(sigma_t, sample_t);
-       *pdf = safe_divide_color(sigma_t * *transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
+  *transmittance = volume_color_transmittance(sigma_t, sample_t);
+  *pdf = safe_divide_color(sigma_t * *transmittance,
+                           make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
 
-       /* todo: optimization: when taken together with hit/miss decision,
-        * the full_transmittance cancels out drops out and xi does not
-        * need to be remapped */
+  /* todo: optimization: when taken together with hit/miss decision,
+   * the full_transmittance cancels out drops out and xi does not
+   * need to be remapped */
 
-       return sample_t;
+  return sample_t;
 }
 
 ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
 {
-       float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
-       float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
+  float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
+  float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
 
-       return safe_divide_color(sigma_t * transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
+  return safe_divide_color(sigma_t * transmittance,
+                           make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
 }
 
 /* Emission */
 
-ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff, int closure_flag, float3 transmittance, float t)
+ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff,
+                                                   int closure_flag,
+                                                   float3 transmittance,
+                                                   float t)
 {
-       /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
-        * this goes to E * t as sigma_t goes to zero
-        *
-        * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
-       float3 emission = coeff->emission;
-
-       if(closure_flag & SD_EXTINCTION) {
-               float3 sigma_t = coeff->sigma_t;
-
-               emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t;
-               emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t;
-               emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t;
-       }
-       else
-               emission *= t;
-
-       return emission;
+  /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
+   * this goes to E * t as sigma_t goes to zero
+   *
+   * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
+  float3 emission = coeff->emission;
+
+  if (closure_flag & SD_EXTINCTION) {
+    float3 sigma_t = coeff->sigma_t;
+
+    emission.x *= (sigma_t.x > 0.0f) ? (1.0f - transmittance.x) / sigma_t.x : t;
+    emission.y *= (sigma_t.y > 0.0f) ? (1.0f - transmittance.y) / sigma_t.y : t;
+    emission.z *= (sigma_t.z > 0.0f) ? (1.0f - transmittance.z) / sigma_t.z : t;
+  }
+  else
+    emission *= t;
+
+  return emission;
 }
 
 /* Volume Path */
 
-ccl_device int kernel_volume_sample_channel(float3 albedo, float3 throughput, float rand, float3 *pdf)
+ccl_device int kernel_volume_sample_channel(float3 albedo,
+                                            float3 throughput,
+                                            float rand,
+                                            float3 *pdf)
 {
-       /* Sample color channel proportional to throughput and single scattering
-        * albedo, to significantly reduce noise with many bounce, following:
-        *
-        * "Practical and Controllable Subsurface Scattering for Production Path
-        *  Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
-       float3 weights = fabs(throughput * albedo);
-       float sum_weights = weights.x + weights.y + weights.z;
-       float3 weights_pdf;
-
-       if(sum_weights > 0.0f) {
-               weights_pdf = weights/sum_weights;
-       }
-       else {
-               weights_pdf = make_float3(1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f);
-       }
-
-       *pdf = weights_pdf;
-
-       /* OpenCL does not support -> on float3, so don't use pdf->x. */
-       if(rand < weights_pdf.x) {
-               return 0;
-       }
-       else if(rand < weights_pdf.x + weights_pdf.y) {
-               return 1;
-       }
-       else {
-               return 2;
-       }
+  /* Sample color channel proportional to throughput and single scattering
+   * albedo, to significantly reduce noise with many bounce, following:
+   *
+   * "Practical and Controllable Subsurface Scattering for Production Path
+   *  Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
+  float3 weights = fabs(throughput * albedo);
+  float sum_weights = weights.x + weights.y + weights.z;
+  float3 weights_pdf;
+
+  if (sum_weights > 0.0f) {
+    weights_pdf = weights / sum_weights;
+  }
+  else {
+    weights_pdf = make_float3(1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f);
+  }
+
+  *pdf = weights_pdf;
+
+  /* OpenCL does not support -> on float3, so don't use pdf->x. */
+  if (rand < weights_pdf.x) {
+    return 0;
+  }
+  else if (rand < weights_pdf.x + weights_pdf.y) {
+    return 1;
+  }
+  else {
+    return 2;
+  }
 }
 
 #ifdef __VOLUME__
 
 /* homogeneous volume: assume shader evaluation at the start gives
  * the volume shading coefficient for the entire line segment */
-ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(
-    KernelGlobals *kg,
-    ccl_addr_space PathState *state,
-    Ray *ray,
-    ShaderData *sd,
-    PathRadiance *L,
-    ccl_addr_space float3 *throughput,
-    bool probalistic_scatter)
+ccl_device VolumeIntegrateResult
+kernel_volume_integrate_homogeneous(KernelGlobals *kg,
+                                    ccl_addr_space PathState *state,
+                                    Ray *ray,
+                                    ShaderData *sd,
+                                    PathRadiance *L,
+                                    ccl_addr_space float3 *throughput,
+                                    bool probalistic_scatter)
 {
-       VolumeShaderCoefficients coeff;
-
-       if(!volume_shader_sample(kg, sd, state, ray->P, &coeff))
-               return VOLUME_PATH_MISSED;
-
-       int closure_flag = sd->flag;
-       float t = ray->t;
-       float3 new_tp;
-
-#ifdef __VOLUME_SCATTER__
-       /* randomly scatter, and if we do t is shortened */
-       if(closure_flag & SD_SCATTER) {
-               /* Sample channel, use MIS with balance heuristic. */
-               float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
-               float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
-               float3 channel_pdf;
-               int channel = kernel_volume_sample_channel(albedo, *throughput, rphase, &channel_pdf);
-
-               /* decide if we will hit or miss */
-               bool scatter = true;
-               float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
-
-               if(probalistic_scatter) {
-                       float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
-                       float sample_transmittance = expf(-sample_sigma_t * t);
-
-                       if(1.0f - xi >= sample_transmittance) {
-                               scatter = true;
-
-                               /* rescale random number so we can reuse it */
-                               xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
-
-                       }
-                       else
-                               scatter = false;
-               }
-
-               if(scatter) {
-                       /* scattering */
-                       float3 pdf;
-                       float3 transmittance;
-                       float sample_t;
-
-                       /* distance sampling */
-                       sample_t = kernel_volume_distance_sample(ray->t, coeff.sigma_t, channel, xi, &transmittance, &pdf);
-
-                       /* modify pdf for hit/miss decision */
-                       if(probalistic_scatter)
-                               pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(coeff.sigma_t, t);
-
-                       new_tp = *throughput * coeff.sigma_s * transmittance / dot(channel_pdf, pdf);
-                       t = sample_t;
-               }
-               else {
-                       /* no scattering */
-                       float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
-                       float pdf = dot(channel_pdf, transmittance);
-                       new_tp = *throughput * transmittance / pdf;
-               }
-       }
-       else
-#endif
-       if(closure_flag & SD_EXTINCTION) {
-               /* absorption only, no sampling needed */
-               float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
-               new_tp = *throughput * transmittance;
-       }
-       else {
-               new_tp = *throughput;
-       }
-
-       /* integrate emission attenuated by extinction */
-       if(L && (closure_flag & SD_EMISSION)) {
-               float3 transmittance = volume_color_transmittance(coeff.sigma_t, ray->t);
-               float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t);
-               path_radiance_accum_emission(L, state, *throughput, emission);
-       }
-
-       /* modify throughput */
-       if(closure_flag & SD_EXTINCTION) {
-               *throughput = new_tp;
-
-               /* prepare to scatter to new direction */
-               if(t < ray->t) {
-                       /* adjust throughput and move to new location */
-                       sd->P = ray->P + t*ray->D;
-
-                       return VOLUME_PATH_SCATTERED;
-               }
-       }
-
-       return VOLUME_PATH_ATTENUATED;
+  VolumeShaderCoefficients coeff;
+
+  if (!volume_shader_sample(kg, sd, state, ray->P, &coeff))
+    return VOLUME_PATH_MISSED;
+
+  int closure_flag = sd->flag;
+  float t = ray->t;
+  float3 new_tp;
+
+#  ifdef __VOLUME_SCATTER__
+  /* randomly scatter, and if we do t is shortened */
+  if (closure_flag & SD_SCATTER) {
+    /* Sample channel, use MIS with balance heuristic. */
+    float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+    float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+    float3 channel_pdf;
+    int channel = kernel_volume_sample_channel(albedo, *throughput, rphase, &channel_pdf);
+
+    /* decide if we will hit or miss */
+    bool scatter = true;
+    float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+
+    if (probalistic_scatter) {
+      float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
+      float sample_transmittance = expf(-sample_sigma_t * t);
+
+      if (1.0f - xi >= sample_transmittance) {
+        scatter = true;
+
+        /* rescale random number so we can reuse it */
+        xi = 1.0f - (1.0f - xi - sample_transmittance) / (1.0f - sample_transmittance);
+      }
+      else
+        scatter = false;
+    }
+
+    if (scatter) {
+      /* scattering */
+      float3 pdf;
+      float3 transmittance;
+      float sample_t;
+
+      /* distance sampling */
+      sample_t = kernel_volume_distance_sample(
+          ray->t, coeff.sigma_t, channel, xi, &transmittance, &pdf);
+
+      /* modify pdf for hit/miss decision */
+      if (probalistic_scatter)
+        pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(coeff.sigma_t, t);
+
+      new_tp = *throughput * coeff.sigma_s * transmittance / dot(channel_pdf, pdf);
+      t = sample_t;
+    }
+    else {
+      /* no scattering */
+      float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
+      float pdf = dot(channel_pdf, transmittance);
+      new_tp = *throughput * transmittance / pdf;
+    }
+  }
+  else
+#  endif
+      if (closure_flag & SD_EXTINCTION) {
+    /* absorption only, no sampling needed */
+    float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
+    new_tp = *throughput * transmittance;
+  }
+  else {
+    new_tp = *throughput;
+  }
+
+  /* integrate emission attenuated by extinction */
+  if (L && (closure_flag & SD_EMISSION)) {
+    float3 transmittance = volume_color_transmittance(coeff.sigma_t, ray->t);
+    float3 emission = kernel_volume_emission_integrate(
+        &coeff, closure_flag, transmittance, ray->t);
+    path_radiance_accum_emission(L, state, *throughput, emission);
+  }
+
+  /* modify throughput */
+  if (closure_flag & SD_EXTINCTION) {
+    *throughput = new_tp;
+
+    /* prepare to scatter to new direction */
+    if (t < ray->t) {
+      /* adjust throughput and move to new location */
+      sd->P = ray->P + t * ray->D;
+
+      return VOLUME_PATH_SCATTERED;
+    }
+  }
+
+  return VOLUME_PATH_ATTENUATED;
 }
 
 /* heterogeneous volume distance sampling: integrate stepping through the
  * volume until we reach the end, get absorbed entirely, or run out of
  * iterations. this does probabilistically scatter or get transmitted through
  * for path tracing where we don't want to branch. */
-ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
-    KernelGlobals *kg,
-    ccl_addr_space PathState *state,
-    Ray *ray,
-    ShaderData *sd,
-    PathRadiance *L,
-    ccl_addr_space float3 *throughput)
+ccl_device VolumeIntegrateResult
+kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
+                                               ccl_addr_space PathState *state,
+                                               Ray *ray,
+                                               ShaderData *sd,
+                                               PathRadiance *L,
+                                               ccl_addr_space float3 *throughput)
 {
-       float3 tp = *throughput;
-       const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
-
-       /* prepare for stepping */
-       int max_steps = kernel_data.integrator.volume_max_steps;
-       float step_offset, step_size;
-       kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
-
-       /* compute coefficients at the start */
-       float t = 0.0f;
-       float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
-
-       /* pick random color channel, we use the Veach one-sample
-        * model with balance heuristic for the channels */
-       float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
-       float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
-       bool has_scatter = false;
-
-       for(int i = 0; i < max_steps; i++) {
-               /* advance to new position */
-               float new_t = min(ray->t, (i+1) * step_size);
-               float dt = new_t - t;
-
-               /* use random position inside this segment to sample shader,
-               * for last shorter step we remap it to fit within the segment. */
-               if(new_t == ray->t) {
-                       step_offset *= (new_t - t) / step_size;
-               }
-
-               float3 new_P = ray->P + ray->D * (t + step_offset);
-               VolumeShaderCoefficients coeff;
-
-               /* compute segment */
-               if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
-                       int closure_flag = sd->flag;
-                       float3 new_tp;
-                       float3 transmittance;
-                       bool scatter = false;
-
-                       /* distance sampling */
-#ifdef __VOLUME_SCATTER__
-                       if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_EXTINCTION))) {
-                               has_scatter = true;
-
-                               /* Sample channel, use MIS with balance heuristic. */
-                               float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
-                               float3 channel_pdf;
-                               int channel = kernel_volume_sample_channel(albedo, tp, rphase, &channel_pdf);
-
-                               /* compute transmittance over full step */
-                               transmittance = volume_color_transmittance(coeff.sigma_t, dt);
-
-                               /* decide if we will scatter or continue */
-                               float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
-
-                               if(1.0f - xi >= sample_transmittance) {
-                                       /* compute sampling distance */
-                                       float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
-                                       float new_dt = -logf(1.0f - xi)/sample_sigma_t;
-                                       new_t = t + new_dt;
-
-                                       /* transmittance and pdf */
-                                       float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
-                                       float3 pdf = coeff.sigma_t * new_transmittance;
-
-                                       /* throughput */
-                                       new_tp = tp * coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf);
-                                       scatter = true;
-                               }
-                               else {
-                                       /* throughput */
-                                       float pdf = dot(channel_pdf, transmittance);
-                                       new_tp = tp * transmittance / pdf;
-
-                                       /* remap xi so we can reuse it and keep thing stratified */
-                                       xi = 1.0f - (1.0f - xi)/sample_transmittance;
-                               }
-                       }
-                       else
-#endif
-                       if(closure_flag & SD_EXTINCTION) {
-                               /* absorption only, no sampling needed */
-                               transmittance = volume_color_transmittance(coeff.sigma_t, dt);
-                               new_tp = tp * transmittance;
-                       }
-                       else {
-                               new_tp = tp;
-                       }
-
-                       /* integrate emission attenuated by absorption */
-                       if(L && (closure_flag & SD_EMISSION)) {
-                               float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
-                               path_radiance_accum_emission(L, state, tp, emission);
-                       }
-
-                       /* modify throughput */
-                       if(closure_flag & SD_EXTINCTION) {
-                               tp = new_tp;
-
-                               /* stop if nearly all light blocked */
-                               if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
-                                       tp = make_float3(0.0f, 0.0f, 0.0f);
-                                       break;
-                               }
-                       }
-
-                       /* prepare to scatter to new direction */
-                       if(scatter) {
-                               /* adjust throughput and move to new location */
-                               sd->P = ray->P + new_t*ray->D;
-                               *throughput = tp;
-
-                               return VOLUME_PATH_SCATTERED;
-                       }
-                       else {
-                               /* accumulate transmittance */
-                               accum_transmittance *= transmittance;
-                       }
-               }
-
-               /* stop if at the end of the volume */
-               t = new_t;
-               if(t == ray->t)
-                       break;
-       }
-
-       *throughput = tp;
-
-       return VOLUME_PATH_ATTENUATED;
+  float3 tp = *throughput;
+  const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
+
+  /* prepare for stepping */
+  int max_steps = kernel_data.integrator.volume_max_steps;
+  float step_offset, step_size;
+  kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
+
+  /* compute coefficients at the start */
+  float t = 0.0f;
+  float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
+
+  /* pick random color channel, we use the Veach one-sample
+   * model with balance heuristic for the channels */
+  float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+  float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+  bool has_scatter = false;
+
+  for (int i = 0; i < max_steps; i++) {
+    /* advance to new position */
+    float new_t = min(ray->t, (i + 1) * step_size);
+    float dt = new_t - t;
+
+    /* use random position inside this segment to sample shader,
+    * for last shorter step we remap it to fit within the segment. */
+    if (new_t == ray->t) {
+      step_offset *= (new_t - t) / step_size;
+    }
+
+    float3 new_P = ray->P + ray->D * (t + step_offset);
+    VolumeShaderCoefficients coeff;
+
+    /* compute segment */
+    if (volume_shader_sample(kg, sd, state, new_P, &coeff)) {
+      int closure_flag = sd->flag;
+      float3 new_tp;
+      float3 transmittance;
+      bool scatter = false;
+
+      /* distance sampling */
+#  ifdef __VOLUME_SCATTER__
+      if ((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_EXTINCTION))) {
+        has_scatter = true;
+
+        /* Sample channel, use MIS with balance heuristic. */
+        float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+        float3 channel_pdf;
+        int channel = kernel_volume_sample_channel(albedo, tp, rphase, &channel_pdf);
+
+        /* compute transmittance over full step */
+        transmittance = volume_color_transmittance(coeff.sigma_t, dt);
+
+        /* decide if we will scatter or continue */
+        float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
+
+        if (1.0f - xi >= sample_transmittance) {
+          /* compute sampling distance */
+          float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
+          float new_dt = -logf(1.0f - xi) / sample_sigma_t;
+          new_t = t + new_dt;
+
+          /* transmittance and pdf */
+          float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
+          float3 pdf = coeff.sigma_t * new_transmittance;
+
+          /* throughput */
+          new_tp = tp * coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf);
+          scatter = true;
+        }
+        else {
+          /* throughput */
+          float pdf = dot(channel_pdf, transmittance);
+          new_tp = tp * transmittance / pdf;
+
+          /* remap xi so we can reuse it and keep thing stratified */
+          xi = 1.0f - (1.0f - xi) / sample_transmittance;
+        }
+      }
+      else
+#  endif
+          if (closure_flag & SD_EXTINCTION) {
+        /* absorption only, no sampling needed */
+        transmittance = volume_color_transmittance(coeff.sigma_t, dt);
+        new_tp = tp * transmittance;
+      }
+      else {
+        new_tp = tp;
+      }
+
+      /* integrate emission attenuated by absorption */
+      if (L && (closure_flag & SD_EMISSION)) {
+        float3 emission = kernel_volume_emission_integrate(
+            &coeff, closure_flag, transmittance, dt);
+        path_radiance_accum_emission(L, state, tp, emission);
+      }
+
+      /* modify throughput */
+      if (closure_flag & SD_EXTINCTION) {
+        tp = new_tp;
+
+        /* stop if nearly all light blocked */
+        if (tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
+          tp = make_float3(0.0f, 0.0f, 0.0f);
+          break;
+        }
+      }
+
+      /* prepare to scatter to new direction */
+      if (scatter) {
+        /* adjust throughput and move to new location */
+        sd->P = ray->P + new_t * ray->D;
+        *throughput = tp;
+
+        return VOLUME_PATH_SCATTERED;
+      }
+      else {
+        /* accumulate transmittance */
+        accum_transmittance *= transmittance;
+      }
+    }
+
+    /* stop if at the end of the volume */
+    t = new_t;
+    if (t == ray->t)
+      break;
+  }
+
+  *throughput = tp;
+
+  return VOLUME_PATH_ATTENUATED;
 }
 
 /* get the volume attenuation and emission over line segment defined by
  * ray, with the assumption that there are no surfaces blocking light
  * between the endpoints. distance sampling is used to decide if we will
  * scatter or not. */
-ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(
-    KernelGlobals *kg,
-    ccl_addr_space PathState *state,
-    ShaderData *sd,
-    Ray *ray,
-    PathRadiance *L,
-    ccl_addr_space float3 *throughput,
-    bool heterogeneous)
+ccl_device_noinline VolumeIntegrateResult
+kernel_volume_integrate(KernelGlobals *kg,
+                        ccl_addr_space PathState *state,
+                        ShaderData *sd,
+                        Ray *ray,
+                        PathRadiance *L,
+                        ccl_addr_space float3 *throughput,
+                        bool heterogeneous)
 {
-       shader_setup_from_volume(kg, sd, ray);
+  shader_setup_from_volume(kg, sd, ray);
 
-       if(heterogeneous)
-               return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput);
-       else
-               return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true);
+  if (heterogeneous)
+    return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput);
+  else
+    return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true);
 }
 
-#ifndef __SPLIT_KERNEL__
+#  ifndef __SPLIT_KERNEL__
 /* Decoupled Volume Sampling
  *
  * VolumeSegment is list of coefficients and transmittance stored at all steps
@@ -689,26 +700,26 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(
  * no support for malloc/free and too much stack usage with a fix size array. */
 
 typedef struct VolumeStep {
-       float3 sigma_s;                         /* scatter coefficient */
-       float3 sigma_t;                         /* extinction coefficient */
-       float3 accum_transmittance;     /* accumulated transmittance including this step */
-       float3 cdf_distance;            /* cumulative density function for distance sampling */
-       float t;                                        /* distance at end of this step */
-       float shade_t;                          /* jittered distance where shading was done in step */
-       int closure_flag;                       /* shader evaluation closure flags */
+  float3 sigma_s;             /* scatter coefficient */
+  float3 sigma_t;             /* extinction coefficient */
+  float3 accum_transmittance; /* accumulated transmittance including this step */
+  float3 cdf_distance;        /* cumulative density function for distance sampling */
+  float t;                    /* distance at end of this step */
+  float shade_t;              /* jittered distance where shading was done in step */
+  int closure_flag;           /* shader evaluation closure flags */
 } VolumeStep;
 
 typedef struct VolumeSegment {
-       VolumeStep stack_step;      /* stack storage for homogeneous step, to avoid malloc */
-       VolumeStep *steps;                      /* recorded steps */
-       int numsteps;                           /* number of steps */
-       int closure_flag;                       /* accumulated closure flags from all steps */
+  VolumeStep stack_step; /* stack storage for homogeneous step, to avoid malloc */
+  VolumeStep *steps;     /* recorded steps */
+  int numsteps;          /* number of steps */
+  int closure_flag;      /* accumulated closure flags from all steps */
 
-       float3 accum_emission;          /* accumulated emission at end of segment */
-       float3 accum_transmittance;     /* accumulated transmittance at end of segment */
-       float3 accum_albedo;        /* accumulated average albedo over segment */
+  float3 accum_emission;      /* accumulated emission at end of segment */
+  float3 accum_transmittance; /* accumulated transmittance at end of segment */
+  float3 accum_albedo;        /* accumulated average albedo over segment */
 
-       int sampling_method;            /* volume sampling method */
+  int sampling_method; /* volume sampling method */
 } VolumeSegment;
 
 /* record volume steps to the end of the volume.
@@ -717,400 +728,412 @@ typedef struct VolumeSegment {
  * but the entire segment is needed to do always scattering, rather than probabilistically
  * hitting or missing the volume. if we don't know the transmittance at the end of the
  * volume we can't generate stratified distance samples up to that transmittance */
-#ifdef __VOLUME_DECOUPLED__
-ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
-       Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
+#    ifdef __VOLUME_DECOUPLED__
+ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg,
+                                               PathState *state,
+                                               Ray *ray,
+                                               ShaderData *sd,
+                                               VolumeSegment *segment,
+                                               bool heterogeneous)
 {
-       const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
-
-       /* prepare for volume stepping */
-       int max_steps;
-       float step_size, step_offset;
-
-       if(heterogeneous) {
-               max_steps = kernel_data.integrator.volume_max_steps;
-               kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
-
-#ifdef __KERNEL_CPU__
-               /* NOTE: For the branched path tracing it's possible to have direct
-                * and indirect light integration both having volume segments allocated.
-                * We detect this using index in the pre-allocated memory. Currently we
-                * only support two segments allocated at a time, if more needed some
-                * modifications to the KernelGlobals will be needed.
-                *
-                * This gives us restrictions that decoupled record should only happen
-                * in the stack manner, meaning if there's subsequent call of decoupled
-                * record it'll need to free memory before it's caller frees memory.
-                */
-               const int index = kg->decoupled_volume_steps_index;
-               assert(index < sizeof(kg->decoupled_volume_steps) /
-                              sizeof(*kg->decoupled_volume_steps));
-               if(kg->decoupled_volume_steps[index] == NULL) {
-                       kg->decoupled_volume_steps[index] =
-                               (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
-               }
-               segment->steps = kg->decoupled_volume_steps[index];
-               ++kg->decoupled_volume_steps_index;
-#else
-               segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
-#endif
-       }
-       else {
-               max_steps = 1;
-               step_size = ray->t;
-               step_offset = 0.0f;
-               segment->steps = &segment->stack_step;
-       }
-
-       /* init accumulation variables */
-       float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
-       float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
-       float3 accum_albedo = make_float3(0.0f, 0.0f, 0.0f);
-       float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
-       float t = 0.0f;
-
-       segment->numsteps = 0;
-       segment->closure_flag = 0;
-       bool is_last_step_empty = false;
-
-       VolumeStep *step = segment->steps;
-
-       for(int i = 0; i < max_steps; i++, step++) {
-               /* advance to new position */
-               float new_t = min(ray->t, (i+1) * step_size);
-               float dt = new_t - t;
-
-               /* use random position inside this segment to sample shader,
-               * for last shorter step we remap it to fit within the segment. */
-               if(new_t == ray->t) {
-                       step_offset *= (new_t - t) / step_size;
-               }
-
-               float3 new_P = ray->P + ray->D * (t + step_offset);
-               VolumeShaderCoefficients coeff;
-
-               /* compute segment */
-               if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
-                       int closure_flag = sd->flag;
-                       float3 sigma_t = coeff.sigma_t;
-
-                       /* compute average albedo for channel sampling */
-                       if(closure_flag & SD_SCATTER) {
-                               accum_albedo += dt * safe_divide_color(coeff.sigma_s, sigma_t);
-                       }
-
-                       /* compute accumulated transmittance */
-                       float3 transmittance = volume_color_transmittance(sigma_t, dt);
-
-                       /* compute emission attenuated by absorption */
-                       if(closure_flag & SD_EMISSION) {
-                               float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
-                               accum_emission += accum_transmittance * emission;
-                       }
-
-                       accum_transmittance *= transmittance;
-
-                       /* compute pdf for distance sampling */
-                       float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
-                       cdf_distance = cdf_distance + pdf_distance;
-
-                       /* write step data */
-                       step->sigma_t = sigma_t;
-                       step->sigma_s = coeff.sigma_s;
-                       step->closure_flag = closure_flag;
-
-                       segment->closure_flag |= closure_flag;
-
-                       is_last_step_empty = false;
-                       segment->numsteps++;
-               }
-               else {
-                       if(is_last_step_empty) {
-                               /* consecutive empty step, merge */
-                               step--;
-                       }
-                       else {
-                               /* store empty step */
-                               step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
-                               step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
-                               step->closure_flag = 0;
-
-                               segment->numsteps++;
-                               is_last_step_empty = true;
-                       }
-               }
-
-               step->accum_transmittance = accum_transmittance;
-               step->cdf_distance = cdf_distance;
-               step->t = new_t;
-               step->shade_t = t + step_offset;
-
-               /* stop if at the end of the volume */
-               t = new_t;
-               if(t == ray->t)
-                       break;
-
-               /* stop if nearly all light blocked */
-               if(accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps && accum_transmittance.z < tp_eps)
-                       break;
-       }
-
-       /* store total emission and transmittance */
-       segment->accum_emission = accum_emission;
-       segment->accum_transmittance = accum_transmittance;
-       segment->accum_albedo = accum_albedo;
-
-       /* normalize cumulative density function for distance sampling */
-       VolumeStep *last_step = segment->steps + segment->numsteps - 1;
-
-       if(!is_zero(last_step->cdf_distance)) {
-               VolumeStep *step = &segment->steps[0];
-               int numsteps = segment->numsteps;
-               float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
-
-               for(int i = 0; i < numsteps; i++, step++)
-                       step->cdf_distance *= inv_cdf_distance_sum;
-       }
+  const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
+
+  /* prepare for volume stepping */
+  int max_steps;
+  float step_size, step_offset;
+
+  if (heterogeneous) {
+    max_steps = kernel_data.integrator.volume_max_steps;
+    kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
+
+#      ifdef __KERNEL_CPU__
+    /* NOTE: For the branched path tracing it's possible to have direct
+     * and indirect light integration both having volume segments allocated.
+     * We detect this using index in the pre-allocated memory. Currently we
+     * only support two segments allocated at a time, if more needed some
+     * modifications to the KernelGlobals will be needed.
+     *
+     * This gives us restrictions that decoupled record should only happen
+     * in the stack manner, meaning if there's subsequent call of decoupled
+     * record it'll need to free memory before it's caller frees memory.
+     */
+    const int index = kg->decoupled_volume_steps_index;
+    assert(index < sizeof(kg->decoupled_volume_steps) / sizeof(*kg->decoupled_volume_steps));
+    if (kg->decoupled_volume_steps[index] == NULL) {
+      kg->decoupled_volume_steps[index] = (VolumeStep *)malloc(sizeof(VolumeStep) * max_steps);
+    }
+    segment->steps = kg->decoupled_volume_steps[index];
+    ++kg->decoupled_volume_steps_index;
+#      else
+    segment->steps = (VolumeStep *)malloc(sizeof(VolumeStep) * max_steps);
+#      endif
+  }
+  else {
+    max_steps = 1;
+    step_size = ray->t;
+    step_offset = 0.0f;
+    segment->steps = &segment->stack_step;
+  }
+
+  /* init accumulation variables */
+  float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
+  float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
+  float3 accum_albedo = make_float3(0.0f, 0.0f, 0.0f);
+  float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
+  float t = 0.0f;
+
+  segment->numsteps = 0;
+  segment->closure_flag = 0;
+  bool is_last_step_empty = false;
+
+  VolumeStep *step = segment->steps;
+
+  for (int i = 0; i < max_steps; i++, step++) {
+    /* advance to new position */
+    float new_t = min(ray->t, (i + 1) * step_size);
+    float dt = new_t - t;
+
+    /* use random position inside this segment to sample shader,
+    * for last shorter step we remap it to fit within the segment. */
+    if (new_t == ray->t) {
+      step_offset *= (new_t - t) / step_size;
+    }
+
+    float3 new_P = ray->P + ray->D * (t + step_offset);
+    VolumeShaderCoefficients coeff;
+
+    /* compute segment */
+    if (volume_shader_sample(kg, sd, state, new_P, &coeff)) {
+      int closure_flag = sd->flag;
+      float3 sigma_t = coeff.sigma_t;
+
+      /* compute average albedo for channel sampling */
+      if (closure_flag & SD_SCATTER) {
+        accum_albedo += dt * safe_divide_color(coeff.sigma_s, sigma_t);
+      }
+
+      /* compute accumulated transmittance */
+      float3 transmittance = volume_color_transmittance(sigma_t, dt);
+
+      /* compute emission attenuated by absorption */
+      if (closure_flag & SD_EMISSION) {
+        float3 emission = kernel_volume_emission_integrate(
+            &coeff, closure_flag, transmittance, dt);
+        accum_emission += accum_transmittance * emission;
+      }
+
+      accum_transmittance *= transmittance;
+
+      /* compute pdf for distance sampling */
+      float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
+      cdf_distance = cdf_distance + pdf_distance;
+
+      /* write step data */
+      step->sigma_t = sigma_t;
+      step->sigma_s = coeff.sigma_s;
+      step->closure_flag = closure_flag;
+
+      segment->closure_flag |= closure_flag;
+
+      is_last_step_empty = false;
+      segment->numsteps++;
+    }
+    else {
+      if (is_last_step_empty) {
+        /* consecutive empty step, merge */
+        step--;
+      }
+      else {
+        /* store empty step */
+        step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
+        step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
+        step->closure_flag = 0;
+
+        segment->numsteps++;
+        is_last_step_empty = true;
+      }
+    }
+
+    step->accum_transmittance = accum_transmittance;
+    step->cdf_distance = cdf_distance;
+    step->t = new_t;
+    step->shade_t = t + step_offset;
+
+    /* stop if at the end of the volume */
+    t = new_t;
+    if (t == ray->t)
+      break;
+
+    /* stop if nearly all light blocked */
+    if (accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps &&
+        accum_transmittance.z < tp_eps)
+      break;
+  }
+
+  /* store total emission and transmittance */
+  segment->accum_emission = accum_emission;
+  segment->accum_transmittance = accum_transmittance;
+  segment->accum_albedo = accum_albedo;
+
+  /* normalize cumulative density function for distance sampling */
+  VolumeStep *last_step = segment->steps + segment->numsteps - 1;
+
+  if (!is_zero(last_step->cdf_distance)) {
+    VolumeStep *step = &segment->steps[0];
+    int numsteps = segment->numsteps;
+    float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
+
+    for (int i = 0; i < numsteps; i++, step++)
+      step->cdf_distance *= inv_cdf_distance_sum;
+  }
 }
 
 ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
 {
-       if(segment->steps != &segment->stack_step) {
-#ifdef __KERNEL_CPU__
-               /* NOTE: We only allow free last allocated segment.
-                * No random order of alloc/free is supported.
-                */
-               assert(kg->decoupled_volume_steps_index > 0);
-               assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
-               --kg->decoupled_volume_steps_index;
-#else
-               free(segment->steps);
-#endif
-       }
+  if (segment->steps != &segment->stack_step) {
+#      ifdef __KERNEL_CPU__
+    /* NOTE: We only allow free last allocated segment.
+     * No random order of alloc/free is supported.
+     */
+    assert(kg->decoupled_volume_steps_index > 0);
+    assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
+    --kg->decoupled_volume_steps_index;
+#      else
+    free(segment->steps);
+#      endif
+  }
 }
-#endif  /* __VOLUME_DECOUPLED__ */
+#    endif /* __VOLUME_DECOUPLED__ */
 
 /* scattering for homogeneous and heterogeneous volumes, using decoupled ray
  * marching.
  *
  * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */
-ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
-       KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
-       float3 *throughput, float rphase, float rscatter,
-       const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter)
+ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(KernelGlobals *kg,
+                                                                 PathState *state,
+                                                                 Ray *ray,
+                                                                 ShaderData *sd,
+                                                                 float3 *throughput,
+                                                                 float rphase,
+                                                                 float rscatter,
+                                                                 const VolumeSegment *segment,
+                                                                 const float3 *light_P,
+                                                                 bool probalistic_scatter)
 {
-       kernel_assert(segment->closure_flag & SD_SCATTER);
-
-       /* Sample color channel, use MIS with balance heuristic. */
-       float3 channel_pdf;
-       int channel = kernel_volume_sample_channel(segment->accum_albedo,
-                                                  *throughput,
-                                                  rphase,
-                                                  &channel_pdf);
-
-       float xi = rscatter;
-
-       /* probabilistic scattering decision based on transmittance */
-       if(probalistic_scatter) {
-               float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
-
-               if(1.0f - xi >= sample_transmittance) {
-                       /* rescale random number so we can reuse it */
-                       xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
-               }
-               else {
-                       *throughput /= sample_transmittance;
-                       return VOLUME_PATH_MISSED;
-               }
-       }
-
-       VolumeStep *step;
-       float3 transmittance;
-       float pdf, sample_t;
-       float mis_weight = 1.0f;
-       bool distance_sample = true;
-       bool use_mis = false;
-
-       if(segment->sampling_method && light_P) {
-               if(segment->sampling_method == SD_VOLUME_MIS) {
-                       /* multiple importance sample: randomly pick between
-                        * equiangular and distance sampling strategy */
-                       if(xi < 0.5f) {
-                               xi *= 2.0f;
-                       }
-                       else {
-                               xi = (xi - 0.5f)*2.0f;
-                               distance_sample = false;
-                       }
-
-                       use_mis = true;
-               }
-               else {
-                       /* only equiangular sampling */
-                       distance_sample = false;
-               }
-       }
-
-       /* distance sampling */
-       if(distance_sample) {
-               /* find step in cdf */
-               step = segment->steps;
-
-               float prev_t = 0.0f;
-               float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
-
-               if(segment->numsteps > 1) {
-                       float prev_cdf = 0.0f;
-                       float step_cdf = 1.0f;
-                       float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
-
-                       for(int i = 0; ; i++, step++) {
-                               /* todo: optimize using binary search */
-                               step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
-
-                               if(xi < step_cdf || i == segment->numsteps-1)
-                                       break;
-
-                               prev_cdf = step_cdf;
-                               prev_t = step->t;
-                               prev_cdf_distance = step->cdf_distance;
-                       }
-
-                       /* remap xi so we can reuse it */
-                       xi = (xi - prev_cdf)/(step_cdf - prev_cdf);
-
-                       /* pdf for picking step */
-                       step_pdf_distance = step->cdf_distance - prev_cdf_distance;
-               }
-
-               /* determine range in which we will sample */
-               float step_t = step->t - prev_t;
-
-               /* sample distance and compute transmittance */
-               float3 distance_pdf;
-               sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
-
-               /* modify pdf for hit/miss decision */
-               if(probalistic_scatter)
-                       distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
-
-               pdf = dot(channel_pdf, distance_pdf * step_pdf_distance);
-
-               /* multiple importance sampling */
-               if(use_mis) {
-                       float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
-                       mis_weight = 2.0f*power_heuristic(pdf, equi_pdf);
-               }
-       }
-       /* equi-angular sampling */
-       else {
-               /* sample distance */
-               sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
-
-               /* find step in which sampled distance is located */
-               step = segment->steps;
-
-               float prev_t = 0.0f;
-               float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
-
-               if(segment->numsteps > 1) {
-                       float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
-
-                       int numsteps = segment->numsteps;
-                       int high = numsteps - 1;
-                       int low = 0;
-                       int mid;
-
-                       while(low < high) {
-                               mid = (low + high) >> 1;
-
-                               if(sample_t < step[mid].t)
-                                       high = mid;
-                               else if(sample_t >= step[mid + 1].t)
-                                       low = mid + 1;
-                               else {
-                                       /* found our interval in step[mid] .. step[mid+1] */
-                                       prev_t = step[mid].t;
-                                       prev_cdf_distance = step[mid].cdf_distance;
-                                       step += mid+1;
-                                       break;
-                               }
-                       }
-
-                       if(low >= numsteps - 1) {
-                               prev_t = step[numsteps - 1].t;
-                               prev_cdf_distance = step[numsteps-1].cdf_distance;
-                               step += numsteps - 1;
-                       }
-
-                       /* pdf for picking step with distance sampling */
-                       step_pdf_distance = step->cdf_distance - prev_cdf_distance;
-               }
-
-               /* determine range in which we will sample */
-               float step_t = step->t - prev_t;
-               float step_sample_t = sample_t - prev_t;
-
-               /* compute transmittance */
-               transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
-
-               /* multiple importance sampling */
-               if(use_mis) {
-                       float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
-                       float distance_pdf = dot(channel_pdf, distance_pdf3 * step_pdf_distance);
-                       mis_weight = 2.0f*power_heuristic(pdf, distance_pdf);
-               }
-       }
-       if(sample_t < 0.0f || pdf == 0.0f) {
-               return VOLUME_PATH_MISSED;
-       }
-
-       /* compute transmittance up to this step */
-       if(step != segment->steps)
-               transmittance *= (step-1)->accum_transmittance;
-
-       /* modify throughput */
-       *throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
-
-       /* evaluate shader to create closures at shading point */
-       if(segment->numsteps > 1) {
-               sd->P = ray->P + step->shade_t*ray->D;
-
-               VolumeShaderCoefficients coeff;
-               volume_shader_sample(kg, sd, state, sd->P, &coeff);
-       }
-
-       /* move to new position */
-       sd->P = ray->P + sample_t*ray->D;
-
-       return VOLUME_PATH_SCATTERED;
+  kernel_assert(segment->closure_flag & SD_SCATTER);
+
+  /* Sample color channel, use MIS with balance heuristic. */
+  float3 channel_pdf;
+  int channel = kernel_volume_sample_channel(
+      segment->accum_albedo, *throughput, rphase, &channel_pdf);
+
+  float xi = rscatter;
+
+  /* probabilistic scattering decision based on transmittance */
+  if (probalistic_scatter) {
+    float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
+
+    if (1.0f - xi >= sample_transmittance) {
+      /* rescale random number so we can reuse it */
+      xi = 1.0f - (1.0f - xi - sample_transmittance) / (1.0f - sample_transmittance);
+    }
+    else {
+      *throughput /= sample_transmittance;
+      return VOLUME_PATH_MISSED;
+    }
+  }
+
+  VolumeStep *step;
+  float3 transmittance;
+  float pdf, sample_t;
+  float mis_weight = 1.0f;
+  bool distance_sample = true;
+  bool use_mis = false;
+
+  if (segment->sampling_method && light_P) {
+    if (segment->sampling_method == SD_VOLUME_MIS) {
+      /* multiple importance sample: randomly pick between
+       * equiangular and distance sampling strategy */
+      if (xi < 0.5f) {
+        xi *= 2.0f;
+      }
+      else {
+        xi = (xi - 0.5f) * 2.0f;
+        distance_sample = false;
+      }
+
+      use_mis = true;
+    }
+    else {
+      /* only equiangular sampling */
+      distance_sample = false;
+    }
+  }
+
+  /* distance sampling */
+  if (distance_sample) {
+    /* find step in cdf */
+    step = segment->steps;
+
+    float prev_t = 0.0f;
+    float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
+
+    if (segment->numsteps > 1) {
+      float prev_cdf = 0.0f;
+      float step_cdf = 1.0f;
+      float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
+
+      for (int i = 0;; i++, step++) {
+        /* todo: optimize using binary search */
+        step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
+
+        if (xi < step_cdf || i == segment->numsteps - 1)
+          break;
+
+        prev_cdf = step_cdf;
+        prev_t = step->t;
+        prev_cdf_distance = step->cdf_distance;
+      }
+
+      /* remap xi so we can reuse it */
+      xi = (xi - prev_cdf) / (step_cdf - prev_cdf);
+
+      /* pdf for picking step */
+      step_pdf_distance = step->cdf_distance - prev_cdf_distance;
+    }
+
+    /* determine range in which we will sample */
+    float step_t = step->t - prev_t;
+
+    /* sample distance and compute transmittance */
+    float3 distance_pdf;
+    sample_t = prev_t + kernel_volume_distance_sample(
+                            step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
+
+    /* modify pdf for hit/miss decision */
+    if (probalistic_scatter)
+      distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
+
+    pdf = dot(channel_pdf, distance_pdf * step_pdf_distance);
+
+    /* multiple importance sampling */
+    if (use_mis) {
+      float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
+      mis_weight = 2.0f * power_heuristic(pdf, equi_pdf);
+    }
+  }
+  /* equi-angular sampling */
+  else {
+    /* sample distance */
+    sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
+
+    /* find step in which sampled distance is located */
+    step = segment->steps;
+
+    float prev_t = 0.0f;
+    float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
+
+    if (segment->numsteps > 1) {
+      float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
+
+      int numsteps = segment->numsteps;
+      int high = numsteps - 1;
+      int low = 0;
+      int mid;
+
+      while (low < high) {
+        mid = (low + high) >> 1;
+
+        if (sample_t < step[mid].t)
+          high = mid;
+        else if (sample_t >= step[mid + 1].t)
+          low = mid + 1;
+        else {
+          /* found our interval in step[mid] .. step[mid+1] */
+          prev_t = step[mid].t;
+          prev_cdf_distance = step[mid].cdf_distance;
+          step += mid + 1;
+          break;
+        }
+      }
+
+      if (low >= numsteps - 1) {
+        prev_t = step[numsteps - 1].t;
+        prev_cdf_distance = step[numsteps - 1].cdf_distance;
+        step += numsteps - 1;
+      }
+
+      /* pdf for picking step with distance sampling */
+      step_pdf_distance = step->cdf_distance - prev_cdf_distance;
+    }
+
+    /* determine range in which we will sample */
+    float step_t = step->t - prev_t;
+    float step_sample_t = sample_t - prev_t;
+
+    /* compute transmittance */
+    transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
+
+    /* multiple importance sampling */
+    if (use_mis) {
+      float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
+      float distance_pdf = dot(channel_pdf, distance_pdf3 * step_pdf_distance);
+      mis_weight = 2.0f * power_heuristic(pdf, distance_pdf);
+    }
+  }
+  if (sample_t < 0.0f || pdf == 0.0f) {
+    return VOLUME_PATH_MISSED;
+  }
+
+  /* compute transmittance up to this step */
+  if (step != segment->steps)
+    transmittance *= (step - 1)->accum_transmittance;
+
+  /* modify throughput */
+  *throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
+
+  /* evaluate shader to create closures at shading point */
+  if (segment->numsteps > 1) {
+    sd->P = ray->P + step->shade_t * ray->D;
+
+    VolumeShaderCoefficients coeff;
+    volume_shader_sample(kg, sd, state, sd->P, &coeff);
+  }
+
+  /* move to new position */
+  sd->P = ray->P + sample_t * ray->D;
+
+  return VOLUME_PATH_SCATTERED;
 }
-#endif  /* __SPLIT_KERNEL */
+#  endif /* __SPLIT_KERNEL */
 
 /* decide if we need to use decoupled or not */
-ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method)
+ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg,
+                                            bool heterogeneous,
+                                            bool direct,
+                                            int sampling_method)
 {
-       /* decoupled ray marching for heterogeneous volumes not supported on the GPU,
-        * which also means equiangular and multiple importance sampling is not
-        * support for that case */
-       if(!kernel_data.integrator.volume_decoupled)
-               return false;
-
-#ifdef __KERNEL_GPU__
-       if(heterogeneous)
-               return false;
-#endif
-
-       /* equiangular and multiple importance sampling only implemented for decoupled */
-       if(sampling_method != 0)
-               return true;
-
-       /* for all light sampling use decoupled, reusing shader evaluations is
-        * typically faster in that case */
-       if(direct)
-               return kernel_data.integrator.sample_all_lights_direct;
-       else
-               return kernel_data.integrator.sample_all_lights_indirect;
+  /* decoupled ray marching for heterogeneous volumes not supported on the GPU,
+   * which also means equiangular and multiple importance sampling is not
+   * support for that case */
+  if (!kernel_data.integrator.volume_decoupled)
+    return false;
+
+#  ifdef __KERNEL_GPU__
+  if (heterogeneous)
+    return false;
+#  endif
+
+  /* equiangular and multiple importance sampling only implemented for decoupled */
+  if (sampling_method != 0)
+    return true;
+
+  /* for all light sampling use decoupled, reusing shader evaluations is
+   * typically faster in that case */
+  if (direct)
+    return kernel_data.integrator.sample_all_lights_direct;
+  else
+    return kernel_data.integrator.sample_all_lights_indirect;
 }
 
 /* Volume Stack
@@ -1124,242 +1147,231 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
                                          ccl_addr_space const Ray *ray,
                                          ccl_addr_space VolumeStack *stack)
 {
-       /* NULL ray happens in the baker, does it need proper initialization of
-        * camera in volume?
-        */
-       if(!kernel_data.cam.is_inside_volume || ray == NULL) {
-               /* Camera is guaranteed to be in the air, only take background volume
-                * into account in this case.
-                */
-               if(kernel_data.background.volume_shader != SHADER_NONE) {
-                       stack[0].shader = kernel_data.background.volume_shader;
-                       stack[0].object = PRIM_NONE;
-                       stack[1].shader = SHADER_NONE;
-               }
-               else {
-                       stack[0].shader = SHADER_NONE;
-               }
-               return;
-       }
-
-       kernel_assert(state->flag & PATH_RAY_CAMERA);
-
-       Ray volume_ray = *ray;
-       volume_ray.t = FLT_MAX;
-
-       const uint visibility = (state->flag & PATH_RAY_ALL_VISIBILITY);
-       int stack_index = 0, enclosed_index = 0;
-
-#ifdef __VOLUME_RECORD_ALL__
-       Intersection hits[2*VOLUME_STACK_SIZE + 1];
-       uint num_hits = scene_intersect_volume_all(kg,
-                                                  &volume_ray,
-                                                  hits,
-                                                  2*VOLUME_STACK_SIZE,
-                                                  visibility);
-       if(num_hits > 0) {
-               int enclosed_volumes[VOLUME_STACK_SIZE];
-               Intersection *isect = hits;
-
-               qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
-
-               for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
-                       shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
-                       if(stack_sd->flag & SD_BACKFACING) {
-                               bool need_add = true;
-                               for(int i = 0; i < enclosed_index && need_add; ++i) {
-                                       /* If ray exited the volume and never entered to that volume
-                                        * it means that camera is inside such a volume.
-                                        */
-                                       if(enclosed_volumes[i] == stack_sd->object) {
-                                               need_add = false;
-                                       }
-                               }
-                               for(int i = 0; i < stack_index && need_add; ++i) {
-                                       /* Don't add intersections twice. */
-                                       if(stack[i].object == stack_sd->object) {
-                                               need_add = false;
-                                               break;
-                                       }
-                               }
-                               if(need_add && stack_index < VOLUME_STACK_SIZE - 1) {
-                                       stack[stack_index].object = stack_sd->object;
-                                       stack[stack_index].shader = stack_sd->shader;
-                                       ++stack_index;
-                               }
-                       }
-                       else {
-                               /* If ray from camera enters the volume, this volume shouldn't
-                                * be added to the stack on exit.
-                                */
-                               enclosed_volumes[enclosed_index++] = stack_sd->object;
-                       }
-               }
-       }
-#else
-       int enclosed_volumes[VOLUME_STACK_SIZE];
-       int step = 0;
-
-       while(stack_index < VOLUME_STACK_SIZE - 1 &&
-             enclosed_index < VOLUME_STACK_SIZE - 1 &&
-             step < 2 * VOLUME_STACK_SIZE)
-       {
-               Intersection isect;
-               if(!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
-                       break;
-               }
-
-               shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
-               if(stack_sd->flag & SD_BACKFACING) {
-                       /* If ray exited the volume and never entered to that volume
-                        * it means that camera is inside such a volume.
-                        */
-                       bool need_add = true;
-                       for(int i = 0; i < enclosed_index && need_add; ++i) {
-                               /* If ray exited the volume and never entered to that volume
-                                * it means that camera is inside such a volume.
-                                */
-                               if(enclosed_volumes[i] == stack_sd->object) {
-                                       need_add = false;
-                               }
-                       }
-                       for(int i = 0; i < stack_index && need_add; ++i) {
-                               /* Don't add intersections twice. */
-                               if(stack[i].object == stack_sd->object) {
-                                       need_add = false;
-                                       break;
-                               }
-                       }
-                       if(need_add) {
-                               stack[stack_index].object = stack_sd->object;
-                               stack[stack_index].shader = stack_sd->shader;
-                               ++stack_index;
-                       }
-               }
-               else {
-                       /* If ray from camera enters the volume, this volume shouldn't
-                        * be added to the stack on exit.
-                        */
-                       enclosed_volumes[enclosed_index++] = stack_sd->object;
-               }
-
-               /* Move ray forward. */
-               volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
-               ++step;
-       }
-#endif
-       /* stack_index of 0 means quick checks outside of the kernel gave false
-        * positive, nothing to worry about, just we've wasted quite a few of
-        * ticks just to come into conclusion that camera is in the air.
-        *
-        * In this case we're doing the same above -- check whether background has
-        * volume.
-        */
-       if(stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
-               stack[0].shader = kernel_data.background.volume_shader;
-               stack[0].object = PRIM_NONE;
-               stack[1].shader = SHADER_NONE;
-       }
-       else {
-               stack[stack_index].shader = SHADER_NONE;
-       }
+  /* NULL ray happens in the baker, does it need proper initialization of
+   * camera in volume?
+   */
+  if (!kernel_data.cam.is_inside_volume || ray == NULL) {
+    /* Camera is guaranteed to be in the air, only take background volume
+     * into account in this case.
+     */
+    if (kernel_data.background.volume_shader != SHADER_NONE) {
+      stack[0].shader = kernel_data.background.volume_shader;
+      stack[0].object = PRIM_NONE;
+      stack[1].shader = SHADER_NONE;
+    }
+    else {
+      stack[0].shader = SHADER_NONE;
+    }
+    return;
+  }
+
+  kernel_assert(state->flag & PATH_RAY_CAMERA);
+
+  Ray volume_ray = *ray;
+  volume_ray.t = FLT_MAX;
+
+  const uint visibility = (state->flag & PATH_RAY_ALL_VISIBILITY);
+  int stack_index = 0, enclosed_index = 0;
+
+#  ifdef __VOLUME_RECORD_ALL__
+  Intersection hits[2 * VOLUME_STACK_SIZE + 1];
+  uint num_hits = scene_intersect_volume_all(
+      kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, visibility);
+  if (num_hits > 0) {
+    int enclosed_volumes[VOLUME_STACK_SIZE];
+    Intersection *isect = hits;
+
+    qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
+
+    for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
+      shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
+      if (stack_sd->flag & SD_BACKFACING) {
+        bool need_add = true;
+        for (int i = 0; i < enclosed_index && need_add; ++i) {
+          /* If ray exited the volume and never entered to that volume
+           * it means that camera is inside such a volume.
+           */
+          if (enclosed_volumes[i] == stack_sd->object) {
+            need_add = false;
+          }
+        }
+        for (int i = 0; i < stack_index && need_add; ++i) {
+          /* Don't add intersections twice. */
+          if (stack[i].object == stack_sd->object) {
+            need_add = false;
+            break;
+          }
+        }
+        if (need_add && stack_index < VOLUME_STACK_SIZE - 1) {
+          stack[stack_index].object = stack_sd->object;
+          stack[stack_index].shader = stack_sd->shader;
+          ++stack_index;
+        }
+      }
+      else {
+        /* If ray from camera enters the volume, this volume shouldn't
+         * be added to the stack on exit.
+         */
+        enclosed_volumes[enclosed_index++] = stack_sd->object;
+      }
+    }
+  }
+#  else
+  int enclosed_volumes[VOLUME_STACK_SIZE];
+  int step = 0;
+
+  while (stack_index < VOLUME_STACK_SIZE - 1 && enclosed_index < VOLUME_STACK_SIZE - 1 &&
+         step < 2 * VOLUME_STACK_SIZE) {
+    Intersection isect;
+    if (!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
+      break;
+    }
+
+    shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
+    if (stack_sd->flag & SD_BACKFACING) {
+      /* If ray exited the volume and never entered to that volume
+       * it means that camera is inside such a volume.
+       */
+      bool need_add = true;
+      for (int i = 0; i < enclosed_index && need_add; ++i) {
+        /* If ray exited the volume and never entered to that volume
+         * it means that camera is inside such a volume.
+         */
+        if (enclosed_volumes[i] == stack_sd->object) {
+          need_add = false;
+        }
+      }
+      for (int i = 0; i < stack_index && need_add; ++i) {
+        /* Don't add intersections twice. */
+        if (stack[i].object == stack_sd->object) {
+          need_add = false;
+          break;
+        }
+      }
+      if (need_add) {
+        stack[stack_index].object = stack_sd->object;
+        stack[stack_index].shader = stack_sd->shader;
+        ++stack_index;
+      }
+    }
+    else {
+      /* If ray from camera enters the volume, this volume shouldn't
+       * be added to the stack on exit.
+       */
+      enclosed_volumes[enclosed_index++] = stack_sd->object;
+    }
+
+    /* Move ray forward. */
+    volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
+    ++step;
+  }
+#  endif
+  /* stack_index of 0 means quick checks outside of the kernel gave false
+   * positive, nothing to worry about, just we've wasted quite a few of
+   * ticks just to come into conclusion that camera is in the air.
+   *
+   * In this case we're doing the same above -- check whether background has
+   * volume.
+   */
+  if (stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
+    stack[0].shader = kernel_data.background.volume_shader;
+    stack[0].object = PRIM_NONE;
+    stack[1].shader = SHADER_NONE;
+  }
+  else {
+    stack[stack_index].shader = SHADER_NONE;
+  }
 }
 
-ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, ccl_addr_space VolumeStack *stack)
+ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg,
+                                               ShaderData *sd,
+                                               ccl_addr_space VolumeStack *stack)
 {
-       /* todo: we should have some way for objects to indicate if they want the
-        * world shader to work inside them. excluding it by default is problematic
-        * because non-volume objects can't be assumed to be closed manifolds */
-
-       if(!(sd->flag & SD_HAS_VOLUME))
-               return;
-
-       if(sd->flag & SD_BACKFACING) {
-               /* exit volume object: remove from stack */
-               for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
-                       if(stack[i].object == sd->object) {
-                               /* shift back next stack entries */
-                               do {
-                                       stack[i] = stack[i+1];
-                                       i++;
-                               }
-                               while(stack[i].shader != SHADER_NONE);
-
-                               return;
-                       }
-               }
-       }
-       else {
-               /* enter volume object: add to stack */
-               int i;
-
-               for(i = 0; stack[i].shader != SHADER_NONE; i++) {
-                       /* already in the stack? then we have nothing to do */
-                       if(stack[i].object == sd->object)
-                               return;
-               }
-
-               /* if we exceed the stack limit, ignore */
-               if(i >= VOLUME_STACK_SIZE-1)
-                       return;
-
-               /* add to the end of the stack */
-               stack[i].shader = sd->shader;
-               stack[i].object = sd->object;
-               stack[i+1].shader = SHADER_NONE;
-       }
+  /* todo: we should have some way for objects to indicate if they want the
+   * world shader to work inside them. excluding it by default is problematic
+   * because non-volume objects can't be assumed to be closed manifolds */
+
+  if (!(sd->flag & SD_HAS_VOLUME))
+    return;
+
+  if (sd->flag & SD_BACKFACING) {
+    /* exit volume object: remove from stack */
+    for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
+      if (stack[i].object == sd->object) {
+        /* shift back next stack entries */
+        do {
+          stack[i] = stack[i + 1];
+          i++;
+        } while (stack[i].shader != SHADER_NONE);
+
+        return;
+      }
+    }
+  }
+  else {
+    /* enter volume object: add to stack */
+    int i;
+
+    for (i = 0; stack[i].shader != SHADER_NONE; i++) {
+      /* already in the stack? then we have nothing to do */
+      if (stack[i].object == sd->object)
+        return;
+    }
+
+    /* if we exceed the stack limit, ignore */
+    if (i >= VOLUME_STACK_SIZE - 1)
+      return;
+
+    /* add to the end of the stack */
+    stack[i].shader = sd->shader;
+    stack[i].object = sd->object;
+    stack[i + 1].shader = SHADER_NONE;
+  }
 }
 
-#ifdef __SUBSURFACE__
+#  ifdef __SUBSURFACE__
 ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
                                                           ShaderData *stack_sd,
                                                           Ray *ray,
                                                           ccl_addr_space VolumeStack *stack)
 {
-       kernel_assert(kernel_data.integrator.use_volumes);
-
-       Ray volume_ray = *ray;
-
-#  ifdef __VOLUME_RECORD_ALL__
-       Intersection hits[2*VOLUME_STACK_SIZE + 1];
-       uint num_hits = scene_intersect_volume_all(kg,
-                                                  &volume_ray,
-                                                  hits,
-                                                  2*VOLUME_STACK_SIZE,
-                                                  PATH_RAY_ALL_VISIBILITY);
-       if(num_hits > 0) {
-               Intersection *isect = hits;
-
-               qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
-
-               for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
-                       shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
-                       kernel_volume_stack_enter_exit(kg, stack_sd, stack);
-               }
-       }
-#  else
-       Intersection isect;
-       int step = 0;
-       float3 Pend = ray->P + ray->D*ray->t;
-       while(step < 2 * VOLUME_STACK_SIZE &&
-             scene_intersect_volume(kg,
-                                    &volume_ray,
-                                    &isect,
-                                    PATH_RAY_ALL_VISIBILITY))
-       {
-               shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
-               kernel_volume_stack_enter_exit(kg, stack_sd, stack);
-
-               /* Move ray forward. */
-               volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
-               if(volume_ray.t != FLT_MAX) {
-                       volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t);
-               }
-               ++step;
-       }
-#  endif
+  kernel_assert(kernel_data.integrator.use_volumes);
+
+  Ray volume_ray = *ray;
+
+#    ifdef __VOLUME_RECORD_ALL__
+  Intersection hits[2 * VOLUME_STACK_SIZE + 1];
+  uint num_hits = scene_intersect_volume_all(
+      kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, PATH_RAY_ALL_VISIBILITY);
+  if (num_hits > 0) {
+    Intersection *isect = hits;
+
+    qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
+
+    for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
+      shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
+      kernel_volume_stack_enter_exit(kg, stack_sd, stack);
+    }
+  }
+#    else
+  Intersection isect;
+  int step = 0;
+  float3 Pend = ray->P + ray->D * ray->t;
+  while (step < 2 * VOLUME_STACK_SIZE &&
+         scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) {
+    shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
+    kernel_volume_stack_enter_exit(kg, stack_sd, stack);
+
+    /* Move ray forward. */
+    volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
+    if (volume_ray.t != FLT_MAX) {
+      volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t);
+    }
+    ++step;
+  }
+#    endif
 }
-#endif
+#  endif
 
 /* Clean stack after the last bounce.
  *
@@ -1378,15 +1390,15 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
 ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
                                                  ccl_addr_space VolumeStack *volume_stack)
 {
-       if(kernel_data.background.volume_shader != SHADER_NONE) {
-               /* Keep the world's volume in stack. */
-               volume_stack[1].shader = SHADER_NONE;
-       }
-       else {
-               volume_stack[0].shader = SHADER_NONE;
-       }
+  if (kernel_data.background.volume_shader != SHADER_NONE) {
+    /* Keep the world's volume in stack. */
+    volume_stack[1].shader = SHADER_NONE;
+  }
+  else {
+    volume_stack[0].shader = SHADER_NONE;
+  }
 }
 
-#endif  /* __VOLUME__ */
+#endif /* __VOLUME__ */
 
 CCL_NAMESPACE_END