fb3c5437275167942729524b34cf3cdaefe53186
[blender.git] / intern / cycles / kernel / kernel_volume.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Events for probalistic scattering */
20
21 typedef enum VolumeIntegrateResult {
22         VOLUME_PATH_SCATTERED = 0,
23         VOLUME_PATH_ATTENUATED = 1,
24         VOLUME_PATH_MISSED = 2
25 } VolumeIntegrateResult;
26
27 /* Volume shader properties
28  *
29  * extinction coefficient = absorption coefficient + scattering coefficient
30  * sigma_t = sigma_a + sigma_s */
31
32 typedef struct VolumeShaderCoefficients {
33         float3 sigma_t;
34         float3 sigma_s;
35         float3 emission;
36 } VolumeShaderCoefficients;
37
38 /* evaluate shader to get extinction coefficient at P */
39 ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
40                                                        ShaderData *sd,
41                                                        ccl_addr_space PathState *state,
42                                                        float3 P,
43                                                        float3 *extinction)
44 {
45         sd->P = P;
46         shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW, 0);
47
48         if(sd->flag & SD_EXTINCTION) {
49                 *extinction = sd->closure_transparent_extinction;
50                 return true;
51         }
52         else {
53                 return false;
54         }
55 }
56
57 /* evaluate shader to get absorption, scattering and emission at P */
58 ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
59                                             ShaderData *sd,
60                                             ccl_addr_space PathState *state,
61                                             float3 P,
62                                             VolumeShaderCoefficients *coeff)
63 {
64         sd->P = P;
65         shader_eval_volume(kg, sd, state, state->volume_stack, state->flag, MAX_CLOSURE);
66
67         if(!(sd->flag & (SD_EXTINCTION|SD_SCATTER|SD_EMISSION)))
68                 return false;
69         
70         coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
71         coeff->sigma_t = (sd->flag & SD_EXTINCTION)? sd->closure_transparent_extinction:
72                                                      make_float3(0.0f, 0.0f, 0.0f);
73         coeff->emission = (sd->flag & SD_EMISSION)? sd->closure_emission_background:
74                                                     make_float3(0.0f, 0.0f, 0.0f);
75
76         if(sd->flag & SD_SCATTER) {
77                 if(state->volume_bounce < kernel_data.integrator.max_volume_bounce) {
78                         for(int i = 0; i < sd->num_closure; i++) {
79                                 const ShaderClosure *sc = &sd->closure[i];
80
81                                 if(CLOSURE_IS_VOLUME(sc->type))
82                                         coeff->sigma_s += sc->weight;
83                         }
84                 }
85                 else {
86                         /* When at the max number of bounces, clear scattering. */
87                         sd->flag &= ~SD_SCATTER;
88                 }
89         }
90
91         return true;
92 }
93
94 ccl_device float3 volume_color_transmittance(float3 sigma, float t)
95 {
96         return make_float3(expf(-sigma.x * t), expf(-sigma.y * t), expf(-sigma.z * t));
97 }
98
99 ccl_device float kernel_volume_channel_get(float3 value, int channel)
100 {
101         return (channel == 0)? value.x: ((channel == 1)? value.y: value.z);
102 }
103
104 ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
105 {
106         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
107                 int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
108
109                 if(shader_flag & SD_HETEROGENEOUS_VOLUME)
110                         return true;
111         }
112
113         return false;
114 }
115
116 ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack)
117 {
118         if(kernel_data.integrator.num_all_lights == 0)
119                 return 0;
120
121         int method = -1;
122
123         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
124                 int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
125
126                 if(shader_flag & SD_VOLUME_MIS) {
127                         return SD_VOLUME_MIS;
128                 }
129                 else if(shader_flag & SD_VOLUME_EQUIANGULAR) {
130                         if(method == 0)
131                                 return SD_VOLUME_MIS;
132
133                         method = SD_VOLUME_EQUIANGULAR;
134                 }
135                 else {
136                         if(method == SD_VOLUME_EQUIANGULAR)
137                                 return SD_VOLUME_MIS;
138
139                         method = 0;
140                 }
141         }
142
143         return method;
144 }
145
146 /* Volume Shadows
147  *
148  * These functions are used to attenuate shadow rays to lights. Both absorption
149  * and scattering will block light, represented by the extinction coefficient. */
150
151 /* homogeneous volume: assume shader evaluation at the starts gives
152  * the extinction coefficient for the entire line segment */
153 ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg,
154                                                  ccl_addr_space PathState *state,
155                                                  Ray *ray,
156                                                  ShaderData *sd,
157                                                  float3 *throughput)
158 {
159         float3 sigma_t;
160
161         if(volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
162                 *throughput *= volume_color_transmittance(sigma_t, ray->t);
163 }
164
165 /* heterogeneous volume: integrate stepping through the volume until we
166  * reach the end, get absorbed entirely, or run out of iterations */
167 ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
168                                                    ccl_addr_space PathState *state,
169                                                    Ray *ray,
170                                                    ShaderData *sd,
171                                                    float3 *throughput)
172 {
173         float3 tp = *throughput;
174         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
175
176         /* prepare for stepping */
177         int max_steps = kernel_data.integrator.volume_max_steps;
178         float step = kernel_data.integrator.volume_step_size;
179         float random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * step;
180
181         /* compute extinction at the start */
182         float t = 0.0f;
183
184         float3 sum = make_float3(0.0f, 0.0f, 0.0f);
185
186         for(int i = 0; i < max_steps; i++) {
187                 /* advance to new position */
188                 float new_t = min(ray->t, (i+1) * step);
189                 float dt = new_t - t;
190
191                 /* use random position inside this segment to sample shader */
192                 if(new_t == ray->t)
193                         random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * dt;
194
195                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
196                 float3 sigma_t;
197
198                 /* compute attenuation over segment */
199                 if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
200                         /* Compute expf() only for every Nth step, to save some calculations
201                          * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
202
203                         sum += (-sigma_t * (new_t - t));
204                         if((i & 0x07) == 0) { /* ToDo: Other interval? */
205                                 tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
206
207                                 /* stop if nearly all light is blocked */
208                                 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
209                                         break;
210                         }
211                 }
212
213                 /* stop if at the end of the volume */
214                 t = new_t;
215                 if(t == ray->t) {
216                         /* Update throughput in case we haven't done it above */
217                         tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
218                         break;
219                 }
220         }
221
222         *throughput = tp;
223 }
224
225 /* get the volume attenuation over line segment defined by ray, with the
226  * assumption that there are no surfaces blocking light between the endpoints */
227 ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
228                                               ShaderData *shadow_sd,
229                                               ccl_addr_space PathState *state,
230                                               Ray *ray,
231                                               float3 *throughput)
232 {
233         shader_setup_from_volume(kg, shadow_sd, ray);
234
235         if(volume_stack_is_heterogeneous(kg, state->volume_stack))
236                 kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
237         else
238                 kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
239 }
240
241 /* Equi-angular sampling as in:
242  * "Importance Sampling Techniques for Path Tracing in Participating Media" */
243
244 ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf)
245 {
246         float t = ray->t;
247
248         float delta = dot((light_P - ray->P) , ray->D);
249         float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
250         if(UNLIKELY(D == 0.0f)) {
251                 *pdf = 0.0f;
252                 return 0.0f;
253         }
254         float theta_a = -atan2f(delta, D);
255         float theta_b = atan2f(t - delta, D);
256         float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
257         if(UNLIKELY(theta_b == theta_a)) {
258                 *pdf = 0.0f;
259                 return 0.0f;
260         }
261         *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
262
263         return min(t, delta + t_); /* min is only for float precision errors */
264 }
265
266 ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
267 {
268         float delta = dot((light_P - ray->P) , ray->D);
269         float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
270         if(UNLIKELY(D == 0.0f)) {
271                 return 0.0f;
272         }
273
274         float t = ray->t;
275         float t_ = sample_t - delta;
276
277         float theta_a = -atan2f(delta, D);
278         float theta_b = atan2f(t - delta, D);
279         if(UNLIKELY(theta_b == theta_a)) {
280                 return 0.0f;
281         }
282
283         float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
284
285         return pdf;
286 }
287
288 /* Distance sampling */
289
290 ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
291 {
292         /* xi is [0, 1[ so log(0) should never happen, division by zero is
293          * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
294         float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
295         float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
296         float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
297
298         float sample_t = min(max_t, -logf(1.0f - xi*(1.0f - sample_transmittance))/sample_sigma_t);
299
300         *transmittance = volume_color_transmittance(sigma_t, sample_t);
301         *pdf = safe_divide_color(sigma_t * *transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
302
303         /* todo: optimization: when taken together with hit/miss decision,
304          * the full_transmittance cancels out drops out and xi does not
305          * need to be remapped */
306
307         return sample_t;
308 }
309
310 ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
311 {
312         float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
313         float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
314
315         return safe_divide_color(sigma_t * transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
316 }
317
318 /* Emission */
319
320 ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff, int closure_flag, float3 transmittance, float t)
321 {
322         /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
323          * this goes to E * t as sigma_t goes to zero
324          *
325          * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
326         float3 emission = coeff->emission;
327
328         if(closure_flag & SD_EXTINCTION) {
329                 float3 sigma_t = coeff->sigma_t;
330
331                 emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t;
332                 emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t;
333                 emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t;
334         }
335         else
336                 emission *= t;
337         
338         return emission;
339 }
340
341 /* Volume Path */
342
343 /* homogeneous volume: assume shader evaluation at the start gives
344  * the volume shading coefficient for the entire line segment */
345 ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(
346     KernelGlobals *kg,
347     ccl_addr_space PathState *state,
348     Ray *ray,
349     ShaderData *sd,
350     PathRadiance *L,
351     ccl_addr_space float3 *throughput,
352     bool probalistic_scatter)
353 {
354         VolumeShaderCoefficients coeff;
355
356         if(!volume_shader_sample(kg, sd, state, ray->P, &coeff))
357                 return VOLUME_PATH_MISSED;
358
359         int closure_flag = sd->flag;
360         float t = ray->t;
361         float3 new_tp;
362
363 #ifdef __VOLUME_SCATTER__
364         /* randomly scatter, and if we do t is shortened */
365         if(closure_flag & SD_SCATTER) {
366                 /* extinction coefficient */
367                 float3 sigma_t = coeff.sigma_t;
368
369                 /* pick random color channel, we use the Veach one-sample
370                  * model with balance heuristic for the channels */
371                 float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
372                 int channel = (int)(rphase*3.0f);
373
374                 /* decide if we will hit or miss */
375                 bool scatter = true;
376                 float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
377
378                 if(probalistic_scatter) {
379                         float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
380                         float sample_transmittance = expf(-sample_sigma_t * t);
381
382                         if(1.0f - xi >= sample_transmittance) {
383                                 scatter = true;
384
385                                 /* rescale random number so we can reuse it */
386                                 xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
387
388                         }
389                         else
390                                 scatter = false;
391                 }
392
393                 if(scatter) {
394                         /* scattering */
395                         float3 pdf;
396                         float3 transmittance;
397                         float sample_t;
398
399                         /* distance sampling */
400                         sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf);
401
402                         /* modify pdf for hit/miss decision */
403                         if(probalistic_scatter)
404                                 pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t);
405
406                         new_tp = *throughput * coeff.sigma_s * transmittance / average(pdf);
407                         t = sample_t;
408                 }
409                 else {
410                         /* no scattering */
411                         float3 transmittance = volume_color_transmittance(sigma_t, t);
412                         float pdf = average(transmittance);
413                         new_tp = *throughput * transmittance / pdf;
414                 }
415         }
416         else 
417 #endif
418         if(closure_flag & SD_EXTINCTION) {
419                 /* absorption only, no sampling needed */
420                 float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
421                 new_tp = *throughput * transmittance;
422         }
423
424         /* integrate emission attenuated by extinction */
425         if(L && (closure_flag & SD_EMISSION)) {
426                 float3 sigma_t = coeff.sigma_t;
427                 float3 transmittance = volume_color_transmittance(sigma_t, ray->t);
428                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t);
429                 path_radiance_accum_emission(L, state, *throughput, emission);
430         }
431
432         /* modify throughput */
433         if(closure_flag & SD_EXTINCTION) {
434                 *throughput = new_tp;
435
436                 /* prepare to scatter to new direction */
437                 if(t < ray->t) {
438                         /* adjust throughput and move to new location */
439                         sd->P = ray->P + t*ray->D;
440
441                         return VOLUME_PATH_SCATTERED;
442                 }
443         }
444
445         return VOLUME_PATH_ATTENUATED;
446 }
447
448 /* heterogeneous volume distance sampling: integrate stepping through the
449  * volume until we reach the end, get absorbed entirely, or run out of
450  * iterations. this does probabilistically scatter or get transmitted through
451  * for path tracing where we don't want to branch. */
452 ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
453     KernelGlobals *kg,
454     ccl_addr_space PathState *state,
455     Ray *ray,
456     ShaderData *sd,
457     PathRadiance *L,
458     ccl_addr_space float3 *throughput)
459 {
460         float3 tp = *throughput;
461         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
462
463         /* prepare for stepping */
464         int max_steps = kernel_data.integrator.volume_max_steps;
465         float step_size = kernel_data.integrator.volume_step_size;
466         float random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * step_size;
467
468         /* compute coefficients at the start */
469         float t = 0.0f;
470         float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
471
472         /* pick random color channel, we use the Veach one-sample
473          * model with balance heuristic for the channels */
474         float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
475         float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
476         int channel = (int)(rphase*3.0f);
477         bool has_scatter = false;
478
479         for(int i = 0; i < max_steps; i++) {
480                 /* advance to new position */
481                 float new_t = min(ray->t, (i+1) * step_size);
482                 float dt = new_t - t;
483
484                 /* use random position inside this segment to sample shader */
485                 if(new_t == ray->t)
486                         random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * dt;
487
488                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
489                 VolumeShaderCoefficients coeff;
490
491                 /* compute segment */
492                 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
493                         int closure_flag = sd->flag;
494                         float3 new_tp;
495                         float3 transmittance;
496                         bool scatter = false;
497
498                         /* distance sampling */
499 #ifdef __VOLUME_SCATTER__
500                         if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_EXTINCTION))) {
501                                 has_scatter = true;
502
503                                 float3 sigma_t = coeff.sigma_t;
504                                 float3 sigma_s = coeff.sigma_s;
505
506                                 /* compute transmittance over full step */
507                                 transmittance = volume_color_transmittance(sigma_t, dt);
508
509                                 /* decide if we will scatter or continue */
510                                 float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
511
512                                 if(1.0f - xi >= sample_transmittance) {
513                                         /* compute sampling distance */
514                                         float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
515                                         float new_dt = -logf(1.0f - xi)/sample_sigma_t;
516                                         new_t = t + new_dt;
517
518                                         /* transmittance and pdf */
519                                         float3 new_transmittance = volume_color_transmittance(sigma_t, new_dt);
520                                         float3 pdf = sigma_t * new_transmittance;
521
522                                         /* throughput */
523                                         new_tp = tp * sigma_s * new_transmittance / average(pdf);
524                                         scatter = true;
525                                 }
526                                 else {
527                                         /* throughput */
528                                         float pdf = average(transmittance);
529                                         new_tp = tp * transmittance / pdf;
530
531                                         /* remap xi so we can reuse it and keep thing stratified */
532                                         xi = 1.0f - (1.0f - xi)/sample_transmittance;
533                                 }
534                         }
535                         else 
536 #endif
537                         if(closure_flag & SD_EXTINCTION) {
538                                 /* absorption only, no sampling needed */
539                                 transmittance = volume_color_transmittance(coeff.sigma_t, dt);
540                                 new_tp = tp * transmittance;
541                         }
542
543                         /* integrate emission attenuated by absorption */
544                         if(L && (closure_flag & SD_EMISSION)) {
545                                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
546                                 path_radiance_accum_emission(L, state, tp, emission);
547                         }
548
549                         /* modify throughput */
550                         if(closure_flag & SD_EXTINCTION) {
551                                 tp = new_tp;
552
553                                 /* stop if nearly all light blocked */
554                                 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
555                                         tp = make_float3(0.0f, 0.0f, 0.0f);
556                                         break;
557                                 }
558                         }
559
560                         /* prepare to scatter to new direction */
561                         if(scatter) {
562                                 /* adjust throughput and move to new location */
563                                 sd->P = ray->P + new_t*ray->D;
564                                 *throughput = tp;
565
566                                 return VOLUME_PATH_SCATTERED;
567                         }
568                         else {
569                                 /* accumulate transmittance */
570                                 accum_transmittance *= transmittance;
571                         }
572                 }
573
574                 /* stop if at the end of the volume */
575                 t = new_t;
576                 if(t == ray->t)
577                         break;
578         }
579
580         *throughput = tp;
581
582         return VOLUME_PATH_ATTENUATED;
583 }
584
585 /* get the volume attenuation and emission over line segment defined by
586  * ray, with the assumption that there are no surfaces blocking light
587  * between the endpoints. distance sampling is used to decide if we will
588  * scatter or not. */
589 ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(
590     KernelGlobals *kg,
591     ccl_addr_space PathState *state,
592     ShaderData *sd,
593     Ray *ray,
594     PathRadiance *L,
595     ccl_addr_space float3 *throughput,
596     bool heterogeneous)
597 {
598         shader_setup_from_volume(kg, sd, ray);
599
600         if(heterogeneous)
601                 return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput);
602         else
603                 return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true);
604 }
605
606 #ifndef __SPLIT_KERNEL__
607 /* Decoupled Volume Sampling
608  *
609  * VolumeSegment is list of coefficients and transmittance stored at all steps
610  * through a volume. This can then later be used for decoupled sampling as in:
611  * "Importance Sampling Techniques for Path Tracing in Participating Media"
612  *
613  * On the GPU this is only supported (but currently not enabled)
614  * for homogeneous volumes (1 step), due to
615  * no support for malloc/free and too much stack usage with a fix size array. */
616
617 typedef struct VolumeStep {
618         float3 sigma_s;                         /* scatter coefficient */
619         float3 sigma_t;                         /* extinction coefficient */
620         float3 accum_transmittance;     /* accumulated transmittance including this step */
621         float3 cdf_distance;            /* cumulative density function for distance sampling */
622         float t;                                        /* distance at end of this step */
623         float shade_t;                          /* jittered distance where shading was done in step */
624         int closure_flag;                       /* shader evaluation closure flags */
625 } VolumeStep;
626
627 typedef struct VolumeSegment {
628         VolumeStep stack_step;      /* stack storage for homogeneous step, to avoid malloc */
629         VolumeStep *steps;                      /* recorded steps */
630         int numsteps;                           /* number of steps */
631         int closure_flag;                       /* accumulated closure flags from all steps */
632
633         float3 accum_emission;          /* accumulated emission at end of segment */
634         float3 accum_transmittance;     /* accumulated transmittance at end of segment */
635
636         int sampling_method;            /* volume sampling method */
637 } VolumeSegment;
638
639 /* record volume steps to the end of the volume.
640  *
641  * it would be nice if we could only record up to the point that we need to scatter,
642  * but the entire segment is needed to do always scattering, rather than probabilistically
643  * hitting or missing the volume. if we don't know the transmittance at the end of the
644  * volume we can't generate stratified distance samples up to that transmittance */
645 #ifdef __VOLUME_DECOUPLED__
646 ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
647         Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
648 {
649         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
650
651         /* prepare for volume stepping */
652         int max_steps;
653         float step_size, random_jitter_offset;
654
655         if(heterogeneous) {
656                 const int global_max_steps = kernel_data.integrator.volume_max_steps;
657                 step_size = kernel_data.integrator.volume_step_size;
658                 /* compute exact steps in advance for malloc */
659                 if(ray->t > global_max_steps*step_size) {
660                         max_steps = global_max_steps;
661                         step_size = ray->t / (float)max_steps;
662                 }
663                 else {
664                         max_steps = max((int)ceilf(ray->t/step_size), 1);
665                 }
666 #ifdef __KERNEL_CPU__
667                 /* NOTE: For the branched path tracing it's possible to have direct
668                  * and indirect light integration both having volume segments allocated.
669                  * We detect this using index in the pre-allocated memory. Currently we
670                  * only support two segments allocated at a time, if more needed some
671                  * modifications to the KernelGlobals will be needed.
672                  *
673                  * This gives us restrictions that decoupled record should only happen
674                  * in the stack manner, meaning if there's subsequent call of decoupled
675                  * record it'll need to free memory before it's caller frees memory.
676                  */
677                 const int index = kg->decoupled_volume_steps_index;
678                 assert(index < sizeof(kg->decoupled_volume_steps) /
679                                sizeof(*kg->decoupled_volume_steps));
680                 if(kg->decoupled_volume_steps[index] == NULL) {
681                         kg->decoupled_volume_steps[index] =
682                                 (VolumeStep*)malloc(sizeof(VolumeStep)*global_max_steps);
683                 }
684                 segment->steps = kg->decoupled_volume_steps[index];
685                 ++kg->decoupled_volume_steps_index;
686 #else
687                 segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
688 #endif
689                 random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
690         }
691         else {
692                 max_steps = 1;
693                 step_size = ray->t;
694                 random_jitter_offset = 0.0f;
695                 segment->steps = &segment->stack_step;
696         }
697         
698         /* init accumulation variables */
699         float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
700         float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
701         float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
702         float t = 0.0f;
703
704         segment->numsteps = 0;
705         segment->closure_flag = 0;
706         bool is_last_step_empty = false;
707
708         VolumeStep *step = segment->steps;
709
710         for(int i = 0; i < max_steps; i++, step++) {
711                 /* advance to new position */
712                 float new_t = min(ray->t, (i+1) * step_size);
713                 float dt = new_t - t;
714
715                 /* use random position inside this segment to sample shader */
716                 if(heterogeneous && new_t == ray->t)
717                         random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
718
719                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
720                 VolumeShaderCoefficients coeff;
721
722                 /* compute segment */
723                 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
724                         int closure_flag = sd->flag;
725                         float3 sigma_t = coeff.sigma_t;
726
727                         /* compute accumulated transmittance */
728                         float3 transmittance = volume_color_transmittance(sigma_t, dt);
729
730                         /* compute emission attenuated by absorption */
731                         if(closure_flag & SD_EMISSION) {
732                                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
733                                 accum_emission += accum_transmittance * emission;
734                         }
735
736                         accum_transmittance *= transmittance;
737
738                         /* compute pdf for distance sampling */
739                         float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
740                         cdf_distance = cdf_distance + pdf_distance;
741
742                         /* write step data */
743                         step->sigma_t = sigma_t;
744                         step->sigma_s = coeff.sigma_s;
745                         step->closure_flag = closure_flag;
746
747                         segment->closure_flag |= closure_flag;
748
749                         is_last_step_empty = false;
750                         segment->numsteps++;
751                 }
752                 else {
753                         if(is_last_step_empty) {
754                                 /* consecutive empty step, merge */
755                                 step--;
756                         }
757                         else {
758                                 /* store empty step */
759                                 step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
760                                 step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
761                                 step->closure_flag = 0;
762
763                                 segment->numsteps++;
764                                 is_last_step_empty = true;
765                         }
766                 }
767
768                 step->accum_transmittance = accum_transmittance;
769                 step->cdf_distance = cdf_distance;
770                 step->t = new_t;
771                 step->shade_t = t + random_jitter_offset;
772
773                 /* stop if at the end of the volume */
774                 t = new_t;
775                 if(t == ray->t)
776                         break;
777
778                 /* stop if nearly all light blocked */
779                 if(accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps && accum_transmittance.z < tp_eps)
780                         break;
781         }
782
783         /* store total emission and transmittance */
784         segment->accum_emission = accum_emission;
785         segment->accum_transmittance = accum_transmittance;
786
787         /* normalize cumulative density function for distance sampling */
788         VolumeStep *last_step = segment->steps + segment->numsteps - 1;
789
790         if(!is_zero(last_step->cdf_distance)) {
791                 VolumeStep *step = &segment->steps[0];
792                 int numsteps = segment->numsteps;
793                 float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
794
795                 for(int i = 0; i < numsteps; i++, step++)
796                         step->cdf_distance *= inv_cdf_distance_sum;
797         }
798 }
799
800 ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
801 {
802         if(segment->steps != &segment->stack_step) {
803 #ifdef __KERNEL_CPU__
804                 /* NOTE: We only allow free last allocated segment.
805                  * No random order of alloc/free is supported.
806                  */
807                 assert(kg->decoupled_volume_steps_index > 0);
808                 assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
809                 --kg->decoupled_volume_steps_index;
810 #else
811                 free(segment->steps);
812 #endif
813         }
814 }
815 #endif  /* __VOLUME_DECOUPLED__ */
816
817 /* scattering for homogeneous and heterogeneous volumes, using decoupled ray
818  * marching.
819  *
820  * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */
821 ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
822         KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
823         float3 *throughput, float rphase, float rscatter,
824         const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter)
825 {
826         kernel_assert(segment->closure_flag & SD_SCATTER);
827
828         /* pick random color channel, we use the Veach one-sample
829          * model with balance heuristic for the channels */
830         int channel = (int)(rphase*3.0f);
831         float xi = rscatter;
832
833         /* probabilistic scattering decision based on transmittance */
834         if(probalistic_scatter) {
835                 float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
836
837                 if(1.0f - xi >= sample_transmittance) {
838                         /* rescale random number so we can reuse it */
839                         xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
840                 }
841                 else {
842                         *throughput /= sample_transmittance;
843                         return VOLUME_PATH_MISSED;
844                 }
845         }
846
847         VolumeStep *step;
848         float3 transmittance;
849         float pdf, sample_t;
850         float mis_weight = 1.0f;
851         bool distance_sample = true;
852         bool use_mis = false;
853
854         if(segment->sampling_method && light_P) {
855                 if(segment->sampling_method == SD_VOLUME_MIS) {
856                         /* multiple importance sample: randomly pick between
857                          * equiangular and distance sampling strategy */
858                         if(xi < 0.5f) {
859                                 xi *= 2.0f;
860                         }
861                         else {
862                                 xi = (xi - 0.5f)*2.0f;
863                                 distance_sample = false;
864                         }
865
866                         use_mis = true;
867                 }
868                 else {
869                         /* only equiangular sampling */
870                         distance_sample = false;
871                 }
872         }
873
874         /* distance sampling */
875         if(distance_sample) {
876                 /* find step in cdf */
877                 step = segment->steps;
878
879                 float prev_t = 0.0f;
880                 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
881
882                 if(segment->numsteps > 1) {
883                         float prev_cdf = 0.0f;
884                         float step_cdf = 1.0f;
885                         float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
886
887                         for(int i = 0; ; i++, step++) {
888                                 /* todo: optimize using binary search */
889                                 step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
890
891                                 if(xi < step_cdf || i == segment->numsteps-1)
892                                         break;
893
894                                 prev_cdf = step_cdf;
895                                 prev_t = step->t;
896                                 prev_cdf_distance = step->cdf_distance;
897                         }
898
899                         /* remap xi so we can reuse it */
900                         xi = (xi - prev_cdf)/(step_cdf - prev_cdf);
901
902                         /* pdf for picking step */
903                         step_pdf_distance = step->cdf_distance - prev_cdf_distance;
904                 }
905
906                 /* determine range in which we will sample */
907                 float step_t = step->t - prev_t;
908
909                 /* sample distance and compute transmittance */
910                 float3 distance_pdf;
911                 sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
912
913                 /* modify pdf for hit/miss decision */
914                 if(probalistic_scatter)
915                         distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
916
917                 pdf = average(distance_pdf * step_pdf_distance);
918
919                 /* multiple importance sampling */
920                 if(use_mis) {
921                         float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
922                         mis_weight = 2.0f*power_heuristic(pdf, equi_pdf);
923                 }
924         }
925         /* equi-angular sampling */
926         else {
927                 /* sample distance */
928                 sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
929
930                 /* find step in which sampled distance is located */
931                 step = segment->steps;
932
933                 float prev_t = 0.0f;
934                 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
935
936                 if(segment->numsteps > 1) {
937                         float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
938
939                         int numsteps = segment->numsteps;
940                         int high = numsteps - 1;
941                         int low = 0;
942                         int mid;
943
944                         while(low < high) {
945                                 mid = (low + high) >> 1;
946
947                                 if(sample_t < step[mid].t)
948                                         high = mid;
949                                 else if(sample_t >= step[mid + 1].t)
950                                         low = mid + 1;
951                                 else {
952                                         /* found our interval in step[mid] .. step[mid+1] */
953                                         prev_t = step[mid].t;
954                                         prev_cdf_distance = step[mid].cdf_distance;
955                                         step += mid+1;
956                                         break;
957                                 }
958                         }
959
960                         if(low >= numsteps - 1) {
961                                 prev_t = step[numsteps - 1].t;
962                                 prev_cdf_distance = step[numsteps-1].cdf_distance;
963                                 step += numsteps - 1;
964                         }
965
966                         /* pdf for picking step with distance sampling */
967                         step_pdf_distance = step->cdf_distance - prev_cdf_distance;
968                 }
969
970                 /* determine range in which we will sample */
971                 float step_t = step->t - prev_t;
972                 float step_sample_t = sample_t - prev_t;
973
974                 /* compute transmittance */
975                 transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
976
977                 /* multiple importance sampling */
978                 if(use_mis) {
979                         float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
980                         float distance_pdf = average(distance_pdf3 * step_pdf_distance);
981                         mis_weight = 2.0f*power_heuristic(pdf, distance_pdf);
982                 }
983         }
984         if(sample_t < 0.0f || pdf == 0.0f) {
985                 return VOLUME_PATH_MISSED;
986         }
987
988         /* compute transmittance up to this step */
989         if(step != segment->steps)
990                 transmittance *= (step-1)->accum_transmittance;
991
992         /* modify throughput */
993         *throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
994
995         /* evaluate shader to create closures at shading point */
996         if(segment->numsteps > 1) {
997                 sd->P = ray->P + step->shade_t*ray->D;
998
999                 VolumeShaderCoefficients coeff;
1000                 volume_shader_sample(kg, sd, state, sd->P, &coeff);
1001         }
1002
1003         /* move to new position */
1004         sd->P = ray->P + sample_t*ray->D;
1005
1006         return VOLUME_PATH_SCATTERED;
1007 }
1008 #endif /* __SPLIT_KERNEL */
1009
1010 /* decide if we need to use decoupled or not */
1011 ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method)
1012 {
1013         /* decoupled ray marching for heterogeneous volumes not supported on the GPU,
1014          * which also means equiangular and multiple importance sampling is not
1015          * support for that case */
1016         if(!kernel_data.integrator.volume_decoupled)
1017                 return false;
1018
1019 #ifdef __KERNEL_GPU__
1020         if(heterogeneous)
1021                 return false;
1022 #endif
1023
1024         /* equiangular and multiple importance sampling only implemented for decoupled */
1025         if(sampling_method != 0)
1026                 return true;
1027
1028         /* for all light sampling use decoupled, reusing shader evaluations is
1029          * typically faster in that case */
1030         if(direct)
1031                 return kernel_data.integrator.sample_all_lights_direct;
1032         else
1033                 return kernel_data.integrator.sample_all_lights_indirect;
1034 }
1035
1036 /* Volume Stack
1037  *
1038  * This is an array of object/shared ID's that the current segment of the path
1039  * is inside of. */
1040
1041 ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
1042                                          ShaderData *stack_sd,
1043                                          ccl_addr_space const PathState *state,
1044                                          ccl_addr_space const Ray *ray,
1045                                          ccl_addr_space VolumeStack *stack)
1046 {
1047         /* NULL ray happens in the baker, does it need proper initialization of
1048          * camera in volume?
1049          */
1050         if(!kernel_data.cam.is_inside_volume || ray == NULL) {
1051                 /* Camera is guaranteed to be in the air, only take background volume
1052                  * into account in this case.
1053                  */
1054                 if(kernel_data.background.volume_shader != SHADER_NONE) {
1055                         stack[0].shader = kernel_data.background.volume_shader;
1056                         stack[0].object = PRIM_NONE;
1057                         stack[1].shader = SHADER_NONE;
1058                 }
1059                 else {
1060                         stack[0].shader = SHADER_NONE;
1061                 }
1062                 return;
1063         }
1064
1065         kernel_assert(state->flag & PATH_RAY_CAMERA);
1066
1067         Ray volume_ray = *ray;
1068         volume_ray.t = FLT_MAX;
1069
1070         const uint visibility = (state->flag & PATH_RAY_ALL_VISIBILITY);
1071         int stack_index = 0, enclosed_index = 0;
1072
1073 #ifdef __VOLUME_RECORD_ALL__
1074         Intersection hits[2*VOLUME_STACK_SIZE + 1];
1075         uint num_hits = scene_intersect_volume_all(kg,
1076                                                    &volume_ray,
1077                                                    hits,
1078                                                    2*VOLUME_STACK_SIZE,
1079                                                    visibility);
1080         if(num_hits > 0) {
1081                 int enclosed_volumes[VOLUME_STACK_SIZE];
1082                 Intersection *isect = hits;
1083
1084                 qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1085
1086                 for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
1087                         shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
1088                         if(stack_sd->flag & SD_BACKFACING) {
1089                                 bool need_add = true;
1090                                 for(int i = 0; i < enclosed_index && need_add; ++i) {
1091                                         /* If ray exited the volume and never entered to that volume
1092                                          * it means that camera is inside such a volume.
1093                                          */
1094                                         if(enclosed_volumes[i] == stack_sd->object) {
1095                                                 need_add = false;
1096                                         }
1097                                 }
1098                                 for(int i = 0; i < stack_index && need_add; ++i) {
1099                                         /* Don't add intersections twice. */
1100                                         if(stack[i].object == stack_sd->object) {
1101                                                 need_add = false;
1102                                                 break;
1103                                         }
1104                                 }
1105                                 if(need_add) {
1106                                         stack[stack_index].object = stack_sd->object;
1107                                         stack[stack_index].shader = stack_sd->shader;
1108                                         ++stack_index;
1109                                 }
1110                         }
1111                         else {
1112                                 /* If ray from camera enters the volume, this volume shouldn't
1113                                  * be added to the stack on exit.
1114                                  */
1115                                 enclosed_volumes[enclosed_index++] = stack_sd->object;
1116                         }
1117                 }
1118         }
1119 #else
1120         int enclosed_volumes[VOLUME_STACK_SIZE];
1121         int step = 0;
1122
1123         while(stack_index < VOLUME_STACK_SIZE - 1 &&
1124               enclosed_index < VOLUME_STACK_SIZE - 1 &&
1125               step < 2 * VOLUME_STACK_SIZE)
1126         {
1127                 Intersection isect;
1128                 if(!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
1129                         break;
1130                 }
1131
1132                 shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
1133                 if(stack_sd->flag & SD_BACKFACING) {
1134                         /* If ray exited the volume and never entered to that volume
1135                          * it means that camera is inside such a volume.
1136                          */
1137                         bool need_add = true;
1138                         for(int i = 0; i < enclosed_index && need_add; ++i) {
1139                                 /* If ray exited the volume and never entered to that volume
1140                                  * it means that camera is inside such a volume.
1141                                  */
1142                                 if(enclosed_volumes[i] == stack_sd->object) {
1143                                         need_add = false;
1144                                 }
1145                         }
1146                         for(int i = 0; i < stack_index && need_add; ++i) {
1147                                 /* Don't add intersections twice. */
1148                                 if(stack[i].object == stack_sd->object) {
1149                                         need_add = false;
1150                                         break;
1151                                 }
1152                         }
1153                         if(need_add) {
1154                                 stack[stack_index].object = stack_sd->object;
1155                                 stack[stack_index].shader = stack_sd->shader;
1156                                 ++stack_index;
1157                         }
1158                 }
1159                 else {
1160                         /* If ray from camera enters the volume, this volume shouldn't
1161                          * be added to the stack on exit.
1162                          */
1163                         enclosed_volumes[enclosed_index++] = stack_sd->object;
1164                 }
1165
1166                 /* Move ray forward. */
1167                 volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
1168                 ++step;
1169         }
1170 #endif
1171         /* stack_index of 0 means quick checks outside of the kernel gave false
1172          * positive, nothing to worry about, just we've wasted quite a few of
1173          * ticks just to come into conclusion that camera is in the air.
1174          *
1175          * In this case we're doing the same above -- check whether background has
1176          * volume.
1177          */
1178         if(stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
1179                 stack[0].shader = kernel_data.background.volume_shader;
1180                 stack[0].object = PRIM_NONE;
1181                 stack[1].shader = SHADER_NONE;
1182         }
1183         else {
1184                 stack[stack_index].shader = SHADER_NONE;
1185         }
1186 }
1187
1188 ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, ccl_addr_space VolumeStack *stack)
1189 {
1190         /* todo: we should have some way for objects to indicate if they want the
1191          * world shader to work inside them. excluding it by default is problematic
1192          * because non-volume objects can't be assumed to be closed manifolds */
1193
1194         if(!(sd->flag & SD_HAS_VOLUME))
1195                 return;
1196         
1197         if(sd->flag & SD_BACKFACING) {
1198                 /* exit volume object: remove from stack */
1199                 for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1200                         if(stack[i].object == sd->object) {
1201                                 /* shift back next stack entries */
1202                                 do {
1203                                         stack[i] = stack[i+1];
1204                                         i++;
1205                                 }
1206                                 while(stack[i].shader != SHADER_NONE);
1207
1208                                 return;
1209                         }
1210                 }
1211         }
1212         else {
1213                 /* enter volume object: add to stack */
1214                 int i;
1215
1216                 for(i = 0; stack[i].shader != SHADER_NONE; i++) {
1217                         /* already in the stack? then we have nothing to do */
1218                         if(stack[i].object == sd->object)
1219                                 return;
1220                 }
1221
1222                 /* if we exceed the stack limit, ignore */
1223                 if(i >= VOLUME_STACK_SIZE-1)
1224                         return;
1225
1226                 /* add to the end of the stack */
1227                 stack[i].shader = sd->shader;
1228                 stack[i].object = sd->object;
1229                 stack[i+1].shader = SHADER_NONE;
1230         }
1231 }
1232
1233 #ifdef __SUBSURFACE__
1234 ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
1235                                                           ShaderData *stack_sd,
1236                                                           Ray *ray,
1237                                                           ccl_addr_space VolumeStack *stack)
1238 {
1239         kernel_assert(kernel_data.integrator.use_volumes);
1240
1241         Ray volume_ray = *ray;
1242
1243 #  ifdef __VOLUME_RECORD_ALL__
1244         Intersection hits[2*VOLUME_STACK_SIZE + 1];
1245         uint num_hits = scene_intersect_volume_all(kg,
1246                                                    &volume_ray,
1247                                                    hits,
1248                                                    2*VOLUME_STACK_SIZE,
1249                                                    PATH_RAY_ALL_VISIBILITY);
1250         if(num_hits > 0) {
1251                 Intersection *isect = hits;
1252
1253                 qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1254
1255                 for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
1256                         shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
1257                         kernel_volume_stack_enter_exit(kg, stack_sd, stack);
1258                 }
1259         }
1260 #  else
1261         Intersection isect;
1262         int step = 0;
1263         float3 Pend = ray->P + ray->D*ray->t;
1264         while(step < 2 * VOLUME_STACK_SIZE &&
1265               scene_intersect_volume(kg,
1266                                      &volume_ray,
1267                                      &isect,
1268                                      PATH_RAY_ALL_VISIBILITY))
1269         {
1270                 shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
1271                 kernel_volume_stack_enter_exit(kg, stack_sd, stack);
1272
1273                 /* Move ray forward. */
1274                 volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
1275                 if(volume_ray.t != FLT_MAX) {
1276                         volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t);
1277                 }
1278                 ++step;
1279         }
1280 #  endif
1281 }
1282 #endif
1283
1284 /* Clean stack after the last bounce.
1285  *
1286  * It is expected that all volumes are closed manifolds, so at the time when ray
1287  * hits nothing (for example, it is a last bounce which goes to environment) the
1288  * only expected volume in the stack is the world's one. All the rest volume
1289  * entries should have been exited already.
1290  *
1291  * This isn't always true because of ray intersection precision issues, which
1292  * could lead us to an infinite non-world volume in the stack, causing render
1293  * artifacts.
1294  *
1295  * Use this function after the last bounce to get rid of all volumes apart from
1296  * the world's one after the last bounce to avoid render artifacts.
1297  */
1298 ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
1299                                                  ccl_addr_space VolumeStack *volume_stack)
1300 {
1301         if(kernel_data.background.volume_shader != SHADER_NONE) {
1302                 /* Keep the world's volume in stack. */
1303                 volume_stack[1].shader = SHADER_NONE;
1304         }
1305         else {
1306                 volume_stack[0].shader = SHADER_NONE;
1307         }
1308 }
1309
1310 CCL_NAMESPACE_END