0e313b8e88c61bf3fae41ef89a498af744f7c321
[blender.git] / intern / cycles / kernel / kernel_volume.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Events for probalistic scattering */
20
21 typedef enum VolumeIntegrateResult {
22         VOLUME_PATH_SCATTERED = 0,
23         VOLUME_PATH_ATTENUATED = 1,
24         VOLUME_PATH_MISSED = 2
25 } VolumeIntegrateResult;
26
27 /* Volume shader properties
28  *
29  * extinction coefficient = absorption coefficient + scattering coefficient
30  * sigma_t = sigma_a + sigma_s */
31
32 typedef struct VolumeShaderCoefficients {
33         float3 sigma_a;
34         float3 sigma_s;
35         float3 emission;
36 } VolumeShaderCoefficients;
37
38 /* evaluate shader to get extinction coefficient at P */
39 ccl_device bool volume_shader_extinction_sample(KernelGlobals *kg, ShaderData *sd, PathState *state, float3 P, float3 *extinction)
40 {
41         sd->P = P;
42         shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
43
44         if(!(sd->flag & (SD_ABSORPTION|SD_SCATTER)))
45                 return false;
46
47         float3 sigma_t = make_float3(0.0f, 0.0f, 0.0f);
48
49         for(int i = 0; i < sd->num_closure; i++) {
50                 const ShaderClosure *sc = &sd->closure[i];
51
52                 if(CLOSURE_IS_VOLUME(sc->type))
53                         sigma_t += sc->weight;
54         }
55
56         *extinction = sigma_t;
57         return true;
58 }
59
60 /* evaluate shader to get absorption, scattering and emission at P */
61 ccl_device bool volume_shader_sample(KernelGlobals *kg, ShaderData *sd, PathState *state, float3 P, VolumeShaderCoefficients *coeff)
62 {
63         sd->P = P;
64         shader_eval_volume(kg, sd, state, state->volume_stack, state->flag, SHADER_CONTEXT_VOLUME);
65
66         if(!(sd->flag & (SD_ABSORPTION|SD_SCATTER|SD_EMISSION)))
67                 return false;
68         
69         coeff->sigma_a = make_float3(0.0f, 0.0f, 0.0f);
70         coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
71         coeff->emission = make_float3(0.0f, 0.0f, 0.0f);
72
73         for(int i = 0; i < sd->num_closure; i++) {
74                 const ShaderClosure *sc = &sd->closure[i];
75
76                 if(sc->type == CLOSURE_VOLUME_ABSORPTION_ID)
77                         coeff->sigma_a += sc->weight;
78                 else if(sc->type == CLOSURE_EMISSION_ID)
79                         coeff->emission += sc->weight;
80                 else if(CLOSURE_IS_VOLUME(sc->type))
81                         coeff->sigma_s += sc->weight;
82         }
83
84         /* when at the max number of bounces, treat scattering as absorption */
85         if(sd->flag & SD_SCATTER) {
86                 if(state->volume_bounce >= kernel_data.integrator.max_volume_bounce) {
87                         coeff->sigma_a += coeff->sigma_s;
88                         coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
89                         sd->flag &= ~SD_SCATTER;
90                         sd->flag |= SD_ABSORPTION;
91                 }
92         }
93
94         return true;
95 }
96
97 ccl_device float3 volume_color_transmittance(float3 sigma, float t)
98 {
99         return make_float3(expf(-sigma.x * t), expf(-sigma.y * t), expf(-sigma.z * t));
100 }
101
102 ccl_device float kernel_volume_channel_get(float3 value, int channel)
103 {
104         return (channel == 0)? value.x: ((channel == 1)? value.y: value.z);
105 }
106
107 ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, VolumeStack *stack)
108 {
109         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
110                 int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*2);
111
112                 if(shader_flag & SD_HETEROGENEOUS_VOLUME)
113                         return true;
114         }
115
116         return false;
117 }
118
119 ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack)
120 {
121         if(kernel_data.integrator.num_all_lights == 0)
122                 return 0;
123
124         int method = -1;
125
126         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
127                 int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*2);
128
129                 if(shader_flag & SD_VOLUME_MIS) {
130                         return SD_VOLUME_MIS;
131                 }
132                 else if(shader_flag & SD_VOLUME_EQUIANGULAR) {
133                         if(method == 0)
134                                 return SD_VOLUME_MIS;
135
136                         method = SD_VOLUME_EQUIANGULAR;
137                 }
138                 else {
139                         if(method == SD_VOLUME_EQUIANGULAR)
140                                 return SD_VOLUME_MIS;
141
142                         method = 0;
143                 }
144         }
145
146         return method;
147 }
148
149 /* Volume Shadows
150  *
151  * These functions are used to attenuate shadow rays to lights. Both absorption
152  * and scattering will block light, represented by the extinction coefficient. */
153
154 /* homogeneous volume: assume shader evaluation at the starts gives
155  * the extinction coefficient for the entire line segment */
156 ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
157 {
158         float3 sigma_t;
159
160         if(volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
161                 *throughput *= volume_color_transmittance(sigma_t, ray->t);
162 }
163
164 /* heterogeneous volume: integrate stepping through the volume until we
165  * reach the end, get absorbed entirely, or run out of iterations */
166 ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
167 {
168         float3 tp = *throughput;
169         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
170
171         /* prepare for stepping */
172         int max_steps = kernel_data.integrator.volume_max_steps;
173         float step = kernel_data.integrator.volume_step_size;
174         float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step;
175
176         /* compute extinction at the start */
177         float t = 0.0f;
178
179         float3 sum = make_float3(0.0f, 0.0f, 0.0f);
180
181         for(int i = 0; i < max_steps; i++) {
182                 /* advance to new position */
183                 float new_t = min(ray->t, (i+1) * step);
184                 float dt = new_t - t;
185
186                 /* use random position inside this segment to sample shader */
187                 if(new_t == ray->t)
188                         random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
189
190                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
191                 float3 sigma_t;
192
193                 /* compute attenuation over segment */
194                 if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
195                         /* Compute expf() only for every Nth step, to save some calculations
196                          * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
197
198                         sum += (-sigma_t * (new_t - t));
199                         if((i & 0x07) == 0) { /* ToDo: Other interval? */
200                                 tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
201
202                                 /* stop if nearly all light is blocked */
203                                 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
204                                         break;
205                         }
206                 }
207
208                 /* stop if at the end of the volume */
209                 t = new_t;
210                 if(t == ray->t) {
211                         /* Update throughput in case we haven't done it above */
212                         tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
213                         break;
214                 }
215         }
216
217         *throughput = tp;
218 }
219
220 /* get the volume attenuation over line segment defined by ray, with the
221  * assumption that there are no surfaces blocking light between the endpoints */
222 ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *throughput)
223 {
224         shader_setup_from_volume(kg, shadow_sd, ray);
225
226         if(volume_stack_is_heterogeneous(kg, state->volume_stack))
227                 kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
228         else
229                 kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
230 }
231
232 /* Equi-angular sampling as in:
233  * "Importance Sampling Techniques for Path Tracing in Participating Media" */
234
235 ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf)
236 {
237         float t = ray->t;
238
239         float delta = dot((light_P - ray->P) , ray->D);
240         float D = sqrtf(len_squared(light_P - ray->P) - delta * delta);
241         float theta_a = -atan2f(delta, D);
242         float theta_b = atan2f(t - delta, D);
243         float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
244
245         *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
246
247         return min(t, delta + t_); /* min is only for float precision errors */
248 }
249
250 ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
251 {
252         float delta = dot((light_P - ray->P) , ray->D);
253         float D = sqrtf(len_squared(light_P - ray->P) - delta * delta);
254
255         float t = ray->t;
256         float t_ = sample_t - delta;
257
258         float theta_a = -atan2f(delta, D);
259         float theta_b = atan2f(t - delta, D);
260
261         float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
262
263         return pdf;
264 }
265
266 /* Distance sampling */
267
268 ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
269 {
270         /* xi is [0, 1[ so log(0) should never happen, division by zero is
271          * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
272         float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
273         float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
274         float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
275
276         float sample_t = min(max_t, -logf(1.0f - xi*(1.0f - sample_transmittance))/sample_sigma_t);
277
278         *transmittance = volume_color_transmittance(sigma_t, sample_t);
279         *pdf = (sigma_t * *transmittance)/(make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
280
281         /* todo: optimization: when taken together with hit/miss decision,
282          * the full_transmittance cancels out drops out and xi does not
283          * need to be remapped */
284
285         return sample_t;
286 }
287
288 ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
289 {
290         float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
291         float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
292
293         return (sigma_t * transmittance)/(make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
294 }
295
296 /* Emission */
297
298 ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff, int closure_flag, float3 transmittance, float t)
299 {
300         /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
301          * this goes to E * t as sigma_t goes to zero
302          *
303          * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
304         float3 emission = coeff->emission;
305
306         if(closure_flag & SD_ABSORPTION) {
307                 float3 sigma_t = coeff->sigma_a + coeff->sigma_s;
308
309                 emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t;
310                 emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t;
311                 emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t;
312         }
313         else
314                 emission *= t;
315         
316         return emission;
317 }
318
319 /* Volume Path */
320
321 /* homogeneous volume: assume shader evaluation at the start gives
322  * the volume shading coefficient for the entire line segment */
323 ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGlobals *kg,
324         PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput,
325         RNG *rng, bool probalistic_scatter)
326 {
327         VolumeShaderCoefficients coeff;
328
329         if(!volume_shader_sample(kg, sd, state, ray->P, &coeff))
330                 return VOLUME_PATH_MISSED;
331
332         int closure_flag = sd->flag;
333         float t = ray->t;
334         float3 new_tp;
335
336 #ifdef __VOLUME_SCATTER__
337         /* randomly scatter, and if we do t is shortened */
338         if(closure_flag & SD_SCATTER) {
339                 /* extinction coefficient */
340                 float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
341
342                 /* pick random color channel, we use the Veach one-sample
343                  * model with balance heuristic for the channels */
344                 float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
345                 int channel = (int)(rphase*3.0f);
346                 sd->randb_closure = rphase*3.0f - channel;
347
348                 /* decide if we will hit or miss */
349                 bool scatter = true;
350                 float xi = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
351
352                 if(probalistic_scatter) {
353                         float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
354                         float sample_transmittance = expf(-sample_sigma_t * t);
355
356                         if(1.0f - xi >= sample_transmittance) {
357                                 scatter = true;
358
359                                 /* rescale random number so we can reuse it */
360                                 xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
361
362                         }
363                         else
364                                 scatter = false;
365                 }
366
367                 if(scatter) {
368                         /* scattering */
369                         float3 pdf;
370                         float3 transmittance;
371                         float sample_t;
372
373                         /* distance sampling */
374                         sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf);
375
376                         /* modify pdf for hit/miss decision */
377                         if(probalistic_scatter)
378                                 pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t);
379
380                         new_tp = *throughput * coeff.sigma_s * transmittance / average(pdf);
381                         t = sample_t;
382                 }
383                 else {
384                         /* no scattering */
385                         float3 transmittance = volume_color_transmittance(sigma_t, t);
386                         float pdf = average(transmittance);
387                         new_tp = *throughput * transmittance / pdf;
388                 }
389         }
390         else 
391 #endif
392         if(closure_flag & SD_ABSORPTION) {
393                 /* absorption only, no sampling needed */
394                 float3 transmittance = volume_color_transmittance(coeff.sigma_a, t);
395                 new_tp = *throughput * transmittance;
396         }
397
398         /* integrate emission attenuated by extinction */
399         if(L && (closure_flag & SD_EMISSION)) {
400                 float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
401                 float3 transmittance = volume_color_transmittance(sigma_t, ray->t);
402                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t);
403                 path_radiance_accum_emission(L, *throughput, emission, state->bounce);
404         }
405
406         /* modify throughput */
407         if(closure_flag & (SD_ABSORPTION|SD_SCATTER)) {
408                 *throughput = new_tp;
409
410                 /* prepare to scatter to new direction */
411                 if(t < ray->t) {
412                         /* adjust throughput and move to new location */
413                         sd->P = ray->P + t*ray->D;
414
415                         return VOLUME_PATH_SCATTERED;
416                 }
417         }
418
419         return VOLUME_PATH_ATTENUATED;
420 }
421
422 /* heterogeneous volume distance sampling: integrate stepping through the
423  * volume until we reach the end, get absorbed entirely, or run out of
424  * iterations. this does probabilistically scatter or get transmitted through
425  * for path tracing where we don't want to branch. */
426 ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
427         PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput, RNG *rng)
428 {
429         float3 tp = *throughput;
430         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
431
432         /* prepare for stepping */
433         int max_steps = kernel_data.integrator.volume_max_steps;
434         float step_size = kernel_data.integrator.volume_step_size;
435         float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
436
437         /* compute coefficients at the start */
438         float t = 0.0f;
439         float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
440
441         /* pick random color channel, we use the Veach one-sample
442          * model with balance heuristic for the channels */
443         float xi = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
444         float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
445         int channel = (int)(rphase*3.0f);
446         sd->randb_closure = rphase*3.0f - channel;
447         bool has_scatter = false;
448
449         for(int i = 0; i < max_steps; i++) {
450                 /* advance to new position */
451                 float new_t = min(ray->t, (i+1) * step_size);
452                 float dt = new_t - t;
453
454                 /* use random position inside this segment to sample shader */
455                 if(new_t == ray->t)
456                         random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
457
458                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
459                 VolumeShaderCoefficients coeff;
460
461                 /* compute segment */
462                 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
463                         int closure_flag = sd->flag;
464                         float3 new_tp;
465                         float3 transmittance;
466                         bool scatter = false;
467
468                         /* distance sampling */
469 #ifdef __VOLUME_SCATTER__
470                         if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_ABSORPTION))) {
471                                 has_scatter = true;
472
473                                 float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
474                                 float3 sigma_s = coeff.sigma_s;
475
476                                 /* compute transmittance over full step */
477                                 transmittance = volume_color_transmittance(sigma_t, dt);
478
479                                 /* decide if we will scatter or continue */
480                                 float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
481
482                                 if(1.0f - xi >= sample_transmittance) {
483                                         /* compute sampling distance */
484                                         float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
485                                         float new_dt = -logf(1.0f - xi)/sample_sigma_t;
486                                         new_t = t + new_dt;
487
488                                         /* transmittance and pdf */
489                                         float3 new_transmittance = volume_color_transmittance(sigma_t, new_dt);
490                                         float3 pdf = sigma_t * new_transmittance;
491
492                                         /* throughput */
493                                         new_tp = tp * sigma_s * new_transmittance / average(pdf);
494                                         scatter = true;
495                                 }
496                                 else {
497                                         /* throughput */
498                                         float pdf = average(transmittance);
499                                         new_tp = tp * transmittance / pdf;
500
501                                         /* remap xi so we can reuse it and keep thing stratified */
502                                         xi = 1.0f - (1.0f - xi)/sample_transmittance;
503                                 }
504                         }
505                         else 
506 #endif
507                         if(closure_flag & SD_ABSORPTION) {
508                                 /* absorption only, no sampling needed */
509                                 float3 sigma_a = coeff.sigma_a;
510
511                                 transmittance = volume_color_transmittance(sigma_a, dt);
512                                 new_tp = tp * transmittance;
513                         }
514
515                         /* integrate emission attenuated by absorption */
516                         if(L && (closure_flag & SD_EMISSION)) {
517                                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
518                                 path_radiance_accum_emission(L, tp, emission, state->bounce);
519                         }
520
521                         /* modify throughput */
522                         if(closure_flag & (SD_ABSORPTION|SD_SCATTER)) {
523                                 tp = new_tp;
524
525                                 /* stop if nearly all light blocked */
526                                 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
527                                         tp = make_float3(0.0f, 0.0f, 0.0f);
528                                         break;
529                                 }
530                         }
531
532                         /* prepare to scatter to new direction */
533                         if(scatter) {
534                                 /* adjust throughput and move to new location */
535                                 sd->P = ray->P + new_t*ray->D;
536                                 *throughput = tp;
537
538                                 return VOLUME_PATH_SCATTERED;
539                         }
540                         else {
541                                 /* accumulate transmittance */
542                                 accum_transmittance *= transmittance;
543                         }
544                 }
545
546                 /* stop if at the end of the volume */
547                 t = new_t;
548                 if(t == ray->t)
549                         break;
550         }
551
552         *throughput = tp;
553
554         return VOLUME_PATH_ATTENUATED;
555 }
556
557 /* get the volume attenuation and emission over line segment defined by
558  * ray, with the assumption that there are no surfaces blocking light
559  * between the endpoints. distance sampling is used to decide if we will
560  * scatter or not. */
561 ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
562         PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, bool heterogeneous)
563 {
564         /* workaround to fix correlation bug in T38710, can find better solution
565          * in random number generator later, for now this is done here to not impact
566          * performance of rendering without volumes */
567         RNG tmp_rng = cmj_hash(*rng, state->rng_offset);
568
569         shader_setup_from_volume(kg, sd, ray);
570
571         if(heterogeneous)
572                 return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput, &tmp_rng);
573         else
574                 return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng, true);
575 }
576
577 /* Decoupled Volume Sampling
578  *
579  * VolumeSegment is list of coefficients and transmittance stored at all steps
580  * through a volume. This can then later be used for decoupled sampling as in:
581  * "Importance Sampling Techniques for Path Tracing in Participating Media"
582  *
583  * On the GPU this is only supported (but currently not enabled)
584  * for homogeneous volumes (1 step), due to
585  * no support for malloc/free and too much stack usage with a fix size array. */
586
587 typedef struct VolumeStep {
588         float3 sigma_s;                         /* scatter coefficient */
589         float3 sigma_t;                         /* extinction coefficient */
590         float3 accum_transmittance;     /* accumulated transmittance including this step */
591         float3 cdf_distance;            /* cumulative density function for distance sampling */
592         float t;                                        /* distance at end of this step */
593         float shade_t;                          /* jittered distance where shading was done in step */
594         int closure_flag;                       /* shader evaluation closure flags */
595 } VolumeStep;
596
597 typedef struct VolumeSegment {
598         VolumeStep stack_step;      /* stack storage for homogeneous step, to avoid malloc */
599         VolumeStep *steps;                      /* recorded steps */
600         int numsteps;                           /* number of steps */
601         int closure_flag;                       /* accumulated closure flags from all steps */
602
603         float3 accum_emission;          /* accumulated emission at end of segment */
604         float3 accum_transmittance;     /* accumulated transmittance at end of segment */
605
606         int sampling_method;            /* volume sampling method */
607 } VolumeSegment;
608
609 /* record volume steps to the end of the volume.
610  *
611  * it would be nice if we could only record up to the point that we need to scatter,
612  * but the entire segment is needed to do always scattering, rather than probabilistically
613  * hitting or missing the volume. if we don't know the transmittance at the end of the
614  * volume we can't generate stratified distance samples up to that transmittance */
615 ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
616         Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
617 {
618         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
619
620         /* prepare for volume stepping */
621         int max_steps;
622         float step_size, random_jitter_offset;
623
624         if(heterogeneous) {
625                 const int global_max_steps = kernel_data.integrator.volume_max_steps;
626                 step_size = kernel_data.integrator.volume_step_size;
627                 /* compute exact steps in advance for malloc */
628                 max_steps = max((int)ceilf(ray->t/step_size), 1);
629                 if(max_steps > global_max_steps) {
630                         max_steps = global_max_steps;
631                         step_size = ray->t / (float)max_steps;
632                 }
633 #ifdef __KERNEL_CPU__
634                 /* NOTE: For the branched path tracing it's possible to have direct
635                  * and indirect light integration both having volume segments allocated.
636                  * We detect this using index in the pre-allocated memory. Currently we
637                  * only support two segments allocated at a time, if more needed some
638                  * modifications to the KernelGlobals will be needed.
639                  *
640                  * This gives us restrictions that decoupled record should only happen
641                  * in the stack manner, meaning if there's subsequent call of decoupled
642                  * record it'll need to free memory before it's caller frees memory.
643                  */
644                 const int index = kg->decoupled_volume_steps_index;
645                 assert(index < sizeof(kg->decoupled_volume_steps) /
646                                sizeof(*kg->decoupled_volume_steps));
647                 if(kg->decoupled_volume_steps[index] == NULL) {
648                         kg->decoupled_volume_steps[index] =
649                                 (VolumeStep*)malloc(sizeof(VolumeStep)*global_max_steps);
650                 }
651                 segment->steps = kg->decoupled_volume_steps[index];
652                 ++kg->decoupled_volume_steps_index;
653 #else
654                 segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
655 #endif
656                 random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
657         }
658         else {
659                 max_steps = 1;
660                 step_size = ray->t;
661                 random_jitter_offset = 0.0f;
662                 segment->steps = &segment->stack_step;
663         }
664         
665         /* init accumulation variables */
666         float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
667         float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
668         float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
669         float t = 0.0f;
670
671         segment->numsteps = 0;
672         segment->closure_flag = 0;
673         bool is_last_step_empty = false;
674
675         VolumeStep *step = segment->steps;
676
677         for(int i = 0; i < max_steps; i++, step++) {
678                 /* advance to new position */
679                 float new_t = min(ray->t, (i+1) * step_size);
680                 float dt = new_t - t;
681
682                 /* use random position inside this segment to sample shader */
683                 if(heterogeneous && new_t == ray->t)
684                         random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
685
686                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
687                 VolumeShaderCoefficients coeff;
688
689                 /* compute segment */
690                 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
691                         int closure_flag = sd->flag;
692                         float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
693
694                         /* compute accumulated transmittance */
695                         float3 transmittance = volume_color_transmittance(sigma_t, dt);
696
697                         /* compute emission attenuated by absorption */
698                         if(closure_flag & SD_EMISSION) {
699                                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
700                                 accum_emission += accum_transmittance * emission;
701                         }
702
703                         accum_transmittance *= transmittance;
704
705                         /* compute pdf for distance sampling */
706                         float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
707                         cdf_distance = cdf_distance + pdf_distance;
708
709                         /* write step data */
710                         step->sigma_t = sigma_t;
711                         step->sigma_s = coeff.sigma_s;
712                         step->closure_flag = closure_flag;
713
714                         segment->closure_flag |= closure_flag;
715
716                         is_last_step_empty = false;
717                         segment->numsteps++;
718                 }
719                 else {
720                         if(is_last_step_empty) {
721                                 /* consecutive empty step, merge */
722                                 step--;
723                         }
724                         else {
725                                 /* store empty step */
726                                 step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
727                                 step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
728                                 step->closure_flag = 0;
729
730                                 segment->numsteps++;
731                                 is_last_step_empty = true;
732                         }
733                 }
734
735                 step->accum_transmittance = accum_transmittance;
736                 step->cdf_distance = cdf_distance;
737                 step->t = new_t;
738                 step->shade_t = t + random_jitter_offset;
739
740                 /* stop if at the end of the volume */
741                 t = new_t;
742                 if(t == ray->t)
743                         break;
744
745                 /* stop if nearly all light blocked */
746                 if(accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps && accum_transmittance.z < tp_eps)
747                         break;
748         }
749
750         /* store total emission and transmittance */
751         segment->accum_emission = accum_emission;
752         segment->accum_transmittance = accum_transmittance;
753
754         /* normalize cumulative density function for distance sampling */
755         VolumeStep *last_step = segment->steps + segment->numsteps - 1;
756
757         if(!is_zero(last_step->cdf_distance)) {
758                 VolumeStep *step = &segment->steps[0];
759                 int numsteps = segment->numsteps;
760                 float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
761
762                 for(int i = 0; i < numsteps; i++, step++)
763                         step->cdf_distance *= inv_cdf_distance_sum;
764         }
765 }
766
767 ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
768 {
769         if(segment->steps != &segment->stack_step) {
770 #ifdef __KERNEL_CPU__
771                 /* NOTE: We only allow free last allocated segment.
772                  * No random order of alloc/free is supported.
773                  */
774                 assert(kg->decoupled_volume_steps_index > 0);
775                 assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
776                 --kg->decoupled_volume_steps_index;
777 #else
778                 free(segment->steps);
779 #endif
780         }
781 }
782
783 /* scattering for homogeneous and heterogeneous volumes, using decoupled ray
784  * marching.
785  *
786  * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */
787 ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
788         KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
789         float3 *throughput, float rphase, float rscatter,
790         const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter)
791 {
792         kernel_assert(segment->closure_flag & SD_SCATTER);
793
794         /* pick random color channel, we use the Veach one-sample
795          * model with balance heuristic for the channels */
796         int channel = (int)(rphase*3.0f);
797         sd->randb_closure = rphase*3.0f - channel;
798         float xi = rscatter;
799
800         /* probabilistic scattering decision based on transmittance */
801         if(probalistic_scatter) {
802                 float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
803
804                 if(1.0f - xi >= sample_transmittance) {
805                         /* rescale random number so we can reuse it */
806                         xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
807                 }
808                 else {
809                         *throughput /= sample_transmittance;
810                         return VOLUME_PATH_MISSED;
811                 }
812         }
813
814         VolumeStep *step;
815         float3 transmittance;
816         float pdf, sample_t;
817         float mis_weight = 1.0f;
818         bool distance_sample = true;
819         bool use_mis = false;
820
821         if(segment->sampling_method && light_P) {
822                 if(segment->sampling_method == SD_VOLUME_MIS) {
823                         /* multiple importance sample: randomly pick between
824                          * equiangular and distance sampling strategy */
825                         if(xi < 0.5f) {
826                                 xi *= 2.0f;
827                         }
828                         else {
829                                 xi = (xi - 0.5f)*2.0f;
830                                 distance_sample = false;
831                         }
832
833                         use_mis = true;
834                 }
835                 else {
836                         /* only equiangular sampling */
837                         distance_sample = false;
838                 }
839         }
840
841         /* distance sampling */
842         if(distance_sample) {
843                 /* find step in cdf */
844                 step = segment->steps;
845
846                 float prev_t = 0.0f;
847                 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
848
849                 if(segment->numsteps > 1) {
850                         float prev_cdf = 0.0f;
851                         float step_cdf = 1.0f;
852                         float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
853
854                         for(int i = 0; ; i++, step++) {
855                                 /* todo: optimize using binary search */
856                                 step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
857
858                                 if(xi < step_cdf || i == segment->numsteps-1)
859                                         break;
860
861                                 prev_cdf = step_cdf;
862                                 prev_t = step->t;
863                                 prev_cdf_distance = step->cdf_distance;
864                         }
865
866                         /* remap xi so we can reuse it */
867                         xi = (xi - prev_cdf)/(step_cdf - prev_cdf);
868
869                         /* pdf for picking step */
870                         step_pdf_distance = step->cdf_distance - prev_cdf_distance;
871                 }
872
873                 /* determine range in which we will sample */
874                 float step_t = step->t - prev_t;
875
876                 /* sample distance and compute transmittance */
877                 float3 distance_pdf;
878                 sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
879
880                 /* modify pdf for hit/miss decision */
881                 if(probalistic_scatter)
882                         distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
883
884                 pdf = average(distance_pdf * step_pdf_distance);
885
886                 /* multiple importance sampling */
887                 if(use_mis) {
888                         float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
889                         mis_weight = 2.0f*power_heuristic(pdf, equi_pdf);
890                 }
891         }
892         /* equi-angular sampling */
893         else {
894                 /* sample distance */
895                 sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
896
897                 /* find step in which sampled distance is located */
898                 step = segment->steps;
899
900                 float prev_t = 0.0f;
901                 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
902
903                 if(segment->numsteps > 1) {
904                         float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
905
906                         int numsteps = segment->numsteps;
907                         int high = numsteps - 1;
908                         int low = 0;
909                         int mid;
910
911                         while(low < high) {
912                                 mid = (low + high) >> 1;
913
914                                 if(sample_t < step[mid].t)
915                                         high = mid;
916                                 else if(sample_t >= step[mid + 1].t)
917                                         low = mid + 1;
918                                 else {
919                                         /* found our interval in step[mid] .. step[mid+1] */
920                                         prev_t = step[mid].t;
921                                         prev_cdf_distance = step[mid].cdf_distance;
922                                         step += mid+1;
923                                         break;
924                                 }
925                         }
926
927                         if(low >= numsteps - 1) {
928                                 prev_t = step[numsteps - 1].t;
929                                 prev_cdf_distance = step[numsteps-1].cdf_distance;
930                                 step += numsteps - 1;
931                         }
932
933                         /* pdf for picking step with distance sampling */
934                         step_pdf_distance = step->cdf_distance - prev_cdf_distance;
935                 }
936
937                 /* determine range in which we will sample */
938                 float step_t = step->t - prev_t;
939                 float step_sample_t = sample_t - prev_t;
940
941                 /* compute transmittance */
942                 transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
943
944                 /* multiple importance sampling */
945                 if(use_mis) {
946                         float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
947                         float distance_pdf = average(distance_pdf3 * step_pdf_distance);
948                         mis_weight = 2.0f*power_heuristic(pdf, distance_pdf);
949                 }
950         }
951
952         /* compute transmittance up to this step */
953         if(step != segment->steps)
954                 transmittance *= (step-1)->accum_transmittance;
955
956         /* modify throughput */
957         *throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
958
959         /* evaluate shader to create closures at shading point */
960         if(segment->numsteps > 1) {
961                 sd->P = ray->P + step->shade_t*ray->D;
962
963                 VolumeShaderCoefficients coeff;
964                 volume_shader_sample(kg, sd, state, sd->P, &coeff);
965         }
966
967         /* move to new position */
968         sd->P = ray->P + sample_t*ray->D;
969
970         return VOLUME_PATH_SCATTERED;
971 }
972
973 /* decide if we need to use decoupled or not */
974 ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method)
975 {
976         /* decoupled ray marching for heterogeneous volumes not supported on the GPU,
977          * which also means equiangular and multiple importance sampling is not
978          * support for that case */
979 #ifdef __KERNEL_GPU__
980         if(heterogeneous)
981                 return false;
982 #endif
983
984         /* equiangular and multiple importance sampling only implemented for decoupled */
985         if(sampling_method != 0)
986                 return true;
987
988         /* for all light sampling use decoupled, reusing shader evaluations is
989          * typically faster in that case */
990         if(direct)
991                 return kernel_data.integrator.sample_all_lights_direct;
992         else
993                 return kernel_data.integrator.sample_all_lights_indirect;
994 }
995
996 /* Volume Stack
997  *
998  * This is an array of object/shared ID's that the current segment of the path
999  * is inside of. */
1000
1001 ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
1002                                          ShaderData *stack_sd,
1003                                          Ray *ray,
1004                                          VolumeStack *stack)
1005 {
1006         /* NULL ray happens in the baker, does it need proper initialization of
1007          * camera in volume?
1008          */
1009         if(!kernel_data.cam.is_inside_volume || ray == NULL) {
1010                 /* Camera is guaranteed to be in the air, only take background volume
1011                  * into account in this case.
1012                  */
1013                 if(kernel_data.background.volume_shader != SHADER_NONE) {
1014                         stack[0].shader = kernel_data.background.volume_shader;
1015                         stack[0].object = PRIM_NONE;
1016                         stack[1].shader = SHADER_NONE;
1017                 }
1018                 else {
1019                         stack[0].shader = SHADER_NONE;
1020                 }
1021                 return;
1022         }
1023
1024         Ray volume_ray = *ray;
1025         volume_ray.t = FLT_MAX;
1026
1027         int stack_index = 0, enclosed_index = 0;
1028
1029 #ifdef __VOLUME_RECORD_ALL__
1030         Intersection hits[2*VOLUME_STACK_SIZE];
1031         uint num_hits = scene_intersect_volume_all(kg,
1032                                                    &volume_ray,
1033                                                    hits,
1034                                                    2*VOLUME_STACK_SIZE,
1035                                                    PATH_RAY_ALL_VISIBILITY);
1036         if(num_hits > 0) {
1037                 int enclosed_volumes[VOLUME_STACK_SIZE];
1038                 Intersection *isect = hits;
1039
1040                 qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1041
1042                 for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
1043                         shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
1044                         if(stack_sd->flag & SD_BACKFACING) {
1045                                 bool need_add = true;
1046                                 for(int i = 0; i < enclosed_index && need_add; ++i) {
1047                                         /* If ray exited the volume and never entered to that volume
1048                                          * it means that camera is inside such a volume.
1049                                          */
1050                                         if(enclosed_volumes[i] == stack_sd->object) {
1051                                                 need_add = false;
1052                                         }
1053                                 }
1054                                 for(int i = 0; i < stack_index && need_add; ++i) {
1055                                         /* Don't add intersections twice. */
1056                                         if(stack[i].object == stack_sd->object) {
1057                                                 need_add = false;
1058                                                 break;
1059                                         }
1060                                 }
1061                                 if(need_add) {
1062                                         stack[stack_index].object = stack_sd->object;
1063                                         stack[stack_index].shader = stack_sd->shader;
1064                                         ++stack_index;
1065                                 }
1066                         }
1067                         else {
1068                                 /* If ray from camera enters the volume, this volume shouldn't
1069                                  * be added to the stack on exit.
1070                                  */
1071                                 enclosed_volumes[enclosed_index++] = stack_sd->object;
1072                         }
1073                 }
1074         }
1075 #else
1076         int enclosed_volumes[VOLUME_STACK_SIZE];
1077         int step = 0;
1078
1079         while(stack_index < VOLUME_STACK_SIZE - 1 &&
1080               enclosed_index < VOLUME_STACK_SIZE - 1 &&
1081               step < 2 * VOLUME_STACK_SIZE)
1082         {
1083                 Intersection isect;
1084                 if(!scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) {
1085                         break;
1086                 }
1087
1088                 shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
1089                 if(stack_sd->flag & SD_BACKFACING) {
1090                         /* If ray exited the volume and never entered to that volume
1091                          * it means that camera is inside such a volume.
1092                          */
1093                         bool need_add = true;
1094                         for(int i = 0; i < enclosed_index && need_add; ++i) {
1095                                 /* If ray exited the volume and never entered to that volume
1096                                  * it means that camera is inside such a volume.
1097                                  */
1098                                 if(enclosed_volumes[i] == stack_sd->object) {
1099                                         need_add = false;
1100                                 }
1101                         }
1102                         for(int i = 0; i < stack_index && need_add; ++i) {
1103                                 /* Don't add intersections twice. */
1104                                 if(stack[i].object == stack_sd->object) {
1105                                         need_add = false;
1106                                         break;
1107                                 }
1108                         }
1109                         if(need_add) {
1110                                 stack[stack_index].object = stack_sd->object;
1111                                 stack[stack_index].shader = stack_sd->shader;
1112                                 ++stack_index;
1113                         }
1114                 }
1115                 else {
1116                         /* If ray from camera enters the volume, this volume shouldn't
1117                          * be added to the stack on exit.
1118                          */
1119                         enclosed_volumes[enclosed_index++] = stack_sd->object;
1120                 }
1121
1122                 /* Move ray forward. */
1123                 volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
1124                 ++step;
1125         }
1126 #endif
1127         /* stack_index of 0 means quick checks outside of the kernel gave false
1128          * positive, nothing to worry about, just we've wasted quite a few of
1129          * ticks just to come into conclusion that camera is in the air.
1130          *
1131          * In this case we're doing the same above -- check whether background has
1132          * volume.
1133          */
1134         if(stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
1135                 stack[0].shader = kernel_data.background.volume_shader;
1136                 stack[0].object = PRIM_NONE;
1137                 stack[1].shader = SHADER_NONE;
1138         }
1139         else {
1140                 stack[stack_index].shader = SHADER_NONE;
1141         }
1142 }
1143
1144 ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, VolumeStack *stack)
1145 {
1146         /* todo: we should have some way for objects to indicate if they want the
1147          * world shader to work inside them. excluding it by default is problematic
1148          * because non-volume objects can't be assumed to be closed manifolds */
1149
1150         if(!(sd->flag & SD_HAS_VOLUME))
1151                 return;
1152         
1153         if(sd->flag & SD_BACKFACING) {
1154                 /* exit volume object: remove from stack */
1155                 for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1156                         if(stack[i].object == sd->object) {
1157                                 /* shift back next stack entries */
1158                                 do {
1159                                         stack[i] = stack[i+1];
1160                                         i++;
1161                                 }
1162                                 while(stack[i].shader != SHADER_NONE);
1163
1164                                 return;
1165                         }
1166                 }
1167         }
1168         else {
1169                 /* enter volume object: add to stack */
1170                 int i;
1171
1172                 for(i = 0; stack[i].shader != SHADER_NONE; i++) {
1173                         /* already in the stack? then we have nothing to do */
1174                         if(stack[i].object == sd->object)
1175                                 return;
1176                 }
1177
1178                 /* if we exceed the stack limit, ignore */
1179                 if(i >= VOLUME_STACK_SIZE-1)
1180                         return;
1181
1182                 /* add to the end of the stack */
1183                 stack[i].shader = sd->shader;
1184                 stack[i].object = sd->object;
1185                 stack[i+1].shader = SHADER_NONE;
1186         }
1187 }
1188
1189 #ifdef __SUBSURFACE__
1190 ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
1191                                                           ShaderData *stack_sd,
1192                                                           Ray *ray,
1193                                                           VolumeStack *stack)
1194 {
1195         kernel_assert(kernel_data.integrator.use_volumes);
1196
1197         Ray volume_ray = *ray;
1198
1199 #  ifdef __VOLUME_RECORD_ALL__
1200         Intersection hits[2*VOLUME_STACK_SIZE];
1201         uint num_hits = scene_intersect_volume_all(kg,
1202                                                    &volume_ray,
1203                                                    hits,
1204                                                    2*VOLUME_STACK_SIZE,
1205                                                    PATH_RAY_ALL_VISIBILITY);
1206         if(num_hits > 0) {
1207                 Intersection *isect = hits;
1208
1209                 qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1210
1211                 for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
1212                         shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
1213                         kernel_volume_stack_enter_exit(kg, stack_sd, stack);
1214                 }
1215         }
1216 #  else
1217         Intersection isect;
1218         int step = 0;
1219         float3 Pend = ray->P + ray->D*ray->t;
1220         while(step < 2 * VOLUME_STACK_SIZE &&
1221               scene_intersect_volume(kg,
1222                                      &volume_ray,
1223                                      &isect,
1224                                      PATH_RAY_ALL_VISIBILITY))
1225         {
1226                 shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
1227                 kernel_volume_stack_enter_exit(kg, stack_sd, stack);
1228
1229                 /* Move ray forward. */
1230                 volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
1231                 if(volume_ray.t != FLT_MAX) {
1232                         volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t);
1233                 }
1234                 ++step;
1235         }
1236 #  endif
1237 }
1238 #endif
1239
1240 CCL_NAMESPACE_END