Cleanup: fix compiler warnings.
[blender.git] / intern / cycles / kernel / kernel_volume.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Events for probalistic scattering */
20
21 typedef enum VolumeIntegrateResult {
22         VOLUME_PATH_SCATTERED = 0,
23         VOLUME_PATH_ATTENUATED = 1,
24         VOLUME_PATH_MISSED = 2
25 } VolumeIntegrateResult;
26
27 /* Volume shader properties
28  *
29  * extinction coefficient = absorption coefficient + scattering coefficient
30  * sigma_t = sigma_a + sigma_s */
31
32 typedef struct VolumeShaderCoefficients {
33         float3 sigma_t;
34         float3 sigma_s;
35         float3 emission;
36 } VolumeShaderCoefficients;
37
38 #ifdef __VOLUME__
39
40 /* evaluate shader to get extinction coefficient at P */
41 ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
42                                                        ShaderData *sd,
43                                                        ccl_addr_space PathState *state,
44                                                        float3 P,
45                                                        float3 *extinction)
46 {
47         sd->P = P;
48         shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW);
49
50         if(sd->flag & SD_EXTINCTION) {
51                 *extinction = sd->closure_transparent_extinction;
52                 return true;
53         }
54         else {
55                 return false;
56         }
57 }
58
59 /* evaluate shader to get absorption, scattering and emission at P */
60 ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
61                                             ShaderData *sd,
62                                             ccl_addr_space PathState *state,
63                                             float3 P,
64                                             VolumeShaderCoefficients *coeff)
65 {
66         sd->P = P;
67         shader_eval_volume(kg, sd, state, state->volume_stack, state->flag);
68
69         if(!(sd->flag & (SD_EXTINCTION|SD_SCATTER|SD_EMISSION)))
70                 return false;
71
72         coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
73         coeff->sigma_t = (sd->flag & SD_EXTINCTION)? sd->closure_transparent_extinction:
74                                                      make_float3(0.0f, 0.0f, 0.0f);
75         coeff->emission = (sd->flag & SD_EMISSION)? sd->closure_emission_background:
76                                                     make_float3(0.0f, 0.0f, 0.0f);
77
78         if(sd->flag & SD_SCATTER) {
79                 for(int i = 0; i < sd->num_closure; i++) {
80                         const ShaderClosure *sc = &sd->closure[i];
81
82                         if(CLOSURE_IS_VOLUME(sc->type))
83                                 coeff->sigma_s += sc->weight;
84                 }
85         }
86
87         return true;
88 }
89
90 #endif  /* __VOLUME__ */
91
92 ccl_device float3 volume_color_transmittance(float3 sigma, float t)
93 {
94         return exp3(-sigma * t);
95 }
96
97 ccl_device float kernel_volume_channel_get(float3 value, int channel)
98 {
99         return (channel == 0)? value.x: ((channel == 1)? value.y: value.z);
100 }
101
102 #ifdef __VOLUME__
103
104 ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
105 {
106         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
107                 int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
108
109                 if(shader_flag & SD_HETEROGENEOUS_VOLUME) {
110                         return true;
111                 }
112                 else if(shader_flag & SD_NEED_ATTRIBUTES) {
113                         /* We want to render world or objects without any volume grids
114                          * as homogenous, but can only verify this at runtime since other
115                          * heterogenous volume objects may be using the same shader. */
116                         int object = stack[i].object;
117                         if(object != OBJECT_NONE) {
118                                 int object_flag = kernel_tex_fetch(__object_flag, object);
119                                 if(object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) {
120                                         return true;
121                                 }
122                         }
123                 }
124         }
125
126         return false;
127 }
128
129 ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack)
130 {
131         if(kernel_data.integrator.num_all_lights == 0)
132                 return 0;
133
134         int method = -1;
135
136         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
137                 int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
138
139                 if(shader_flag & SD_VOLUME_MIS) {
140                         return SD_VOLUME_MIS;
141                 }
142                 else if(shader_flag & SD_VOLUME_EQUIANGULAR) {
143                         if(method == 0)
144                                 return SD_VOLUME_MIS;
145
146                         method = SD_VOLUME_EQUIANGULAR;
147                 }
148                 else {
149                         if(method == SD_VOLUME_EQUIANGULAR)
150                                 return SD_VOLUME_MIS;
151
152                         method = 0;
153                 }
154         }
155
156         return method;
157 }
158
159 ccl_device_inline void kernel_volume_step_init(KernelGlobals *kg,
160                                                ccl_addr_space PathState *state,
161                                                float t,
162                                                float *step_size,
163                                                float *step_offset)
164 {
165         const int max_steps = kernel_data.integrator.volume_max_steps;
166         float step = min(kernel_data.integrator.volume_step_size, t);
167
168         /* compute exact steps in advance for malloc */
169         if(t > max_steps * step) {
170                 step = t / (float)max_steps;
171         }
172
173         *step_size = step;
174         *step_offset = path_state_rng_1D_hash(kg, state, 0x1e31d8a4) * step;
175 }
176
177 /* Volume Shadows
178  *
179  * These functions are used to attenuate shadow rays to lights. Both absorption
180  * and scattering will block light, represented by the extinction coefficient. */
181
182 /* homogeneous volume: assume shader evaluation at the starts gives
183  * the extinction coefficient for the entire line segment */
184 ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg,
185                                                  ccl_addr_space PathState *state,
186                                                  Ray *ray,
187                                                  ShaderData *sd,
188                                                  float3 *throughput)
189 {
190         float3 sigma_t;
191
192         if(volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
193                 *throughput *= volume_color_transmittance(sigma_t, ray->t);
194 }
195
196 /* heterogeneous volume: integrate stepping through the volume until we
197  * reach the end, get absorbed entirely, or run out of iterations */
198 ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
199                                                    ccl_addr_space PathState *state,
200                                                    Ray *ray,
201                                                    ShaderData *sd,
202                                                    float3 *throughput)
203 {
204         float3 tp = *throughput;
205         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
206
207         /* prepare for stepping */
208         int max_steps = kernel_data.integrator.volume_max_steps;
209         float step_offset, step_size;
210         kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
211
212         /* compute extinction at the start */
213         float t = 0.0f;
214
215         float3 sum = make_float3(0.0f, 0.0f, 0.0f);
216
217         for(int i = 0; i < max_steps; i++) {
218                 /* advance to new position */
219                 float new_t = min(ray->t, (i+1) * step_size);
220
221                 /* use random position inside this segment to sample shader, adjust
222                  * for last step that is shorter than other steps. */
223                 if(new_t == ray->t) {
224                         step_offset *= (new_t - t) / step_size;
225                 }
226
227                 float3 new_P = ray->P + ray->D * (t + step_offset);
228                 float3 sigma_t;
229
230                 /* compute attenuation over segment */
231                 if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
232                         /* Compute expf() only for every Nth step, to save some calculations
233                          * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
234
235                         sum += (-sigma_t * (new_t - t));
236                         if((i & 0x07) == 0) { /* ToDo: Other interval? */
237                                 tp = *throughput * exp3(sum);
238
239                                 /* stop if nearly all light is blocked */
240                                 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
241                                         break;
242                         }
243                 }
244
245                 /* stop if at the end of the volume */
246                 t = new_t;
247                 if(t == ray->t) {
248                         /* Update throughput in case we haven't done it above */
249                         tp = *throughput * exp3(sum);
250                         break;
251                 }
252         }
253
254         *throughput = tp;
255 }
256
257 /* get the volume attenuation over line segment defined by ray, with the
258  * assumption that there are no surfaces blocking light between the endpoints */
259 ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
260                                               ShaderData *shadow_sd,
261                                               ccl_addr_space PathState *state,
262                                               Ray *ray,
263                                               float3 *throughput)
264 {
265         shader_setup_from_volume(kg, shadow_sd, ray);
266
267         if(volume_stack_is_heterogeneous(kg, state->volume_stack))
268                 kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
269         else
270                 kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
271 }
272
273 #endif  /* __VOLUME__ */
274
275 /* Equi-angular sampling as in:
276  * "Importance Sampling Techniques for Path Tracing in Participating Media" */
277
278 ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf)
279 {
280         float t = ray->t;
281
282         float delta = dot((light_P - ray->P) , ray->D);
283         float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
284         if(UNLIKELY(D == 0.0f)) {
285                 *pdf = 0.0f;
286                 return 0.0f;
287         }
288         float theta_a = -atan2f(delta, D);
289         float theta_b = atan2f(t - delta, D);
290         float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
291         if(UNLIKELY(theta_b == theta_a)) {
292                 *pdf = 0.0f;
293                 return 0.0f;
294         }
295         *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
296
297         return min(t, delta + t_); /* min is only for float precision errors */
298 }
299
300 ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
301 {
302         float delta = dot((light_P - ray->P) , ray->D);
303         float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
304         if(UNLIKELY(D == 0.0f)) {
305                 return 0.0f;
306         }
307
308         float t = ray->t;
309         float t_ = sample_t - delta;
310
311         float theta_a = -atan2f(delta, D);
312         float theta_b = atan2f(t - delta, D);
313         if(UNLIKELY(theta_b == theta_a)) {
314                 return 0.0f;
315         }
316
317         float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
318
319         return pdf;
320 }
321
322 /* Distance sampling */
323
324 ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
325 {
326         /* xi is [0, 1[ so log(0) should never happen, division by zero is
327          * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
328         float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
329         float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
330         float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
331
332         float sample_t = min(max_t, -logf(1.0f - xi*(1.0f - sample_transmittance))/sample_sigma_t);
333
334         *transmittance = volume_color_transmittance(sigma_t, sample_t);
335         *pdf = safe_divide_color(sigma_t * *transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
336
337         /* todo: optimization: when taken together with hit/miss decision,
338          * the full_transmittance cancels out drops out and xi does not
339          * need to be remapped */
340
341         return sample_t;
342 }
343
344 ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
345 {
346         float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
347         float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
348
349         return safe_divide_color(sigma_t * transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
350 }
351
352 /* Emission */
353
354 ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff, int closure_flag, float3 transmittance, float t)
355 {
356         /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
357          * this goes to E * t as sigma_t goes to zero
358          *
359          * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
360         float3 emission = coeff->emission;
361
362         if(closure_flag & SD_EXTINCTION) {
363                 float3 sigma_t = coeff->sigma_t;
364
365                 emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t;
366                 emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t;
367                 emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t;
368         }
369         else
370                 emission *= t;
371
372         return emission;
373 }
374
375 /* Volume Path */
376
377 ccl_device int kernel_volume_sample_channel(float3 albedo, float3 throughput, float rand, float3 *pdf)
378 {
379         /* Sample color channel proportional to throughput and single scattering
380          * albedo, to significantly reduce noise with many bounce, following:
381          *
382          * "Practical and Controllable Subsurface Scattering for Production Path
383          *  Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
384         float3 weights = fabs(throughput * albedo);
385         float sum_weights = weights.x + weights.y + weights.z;
386         float3 weights_pdf;
387
388         if(sum_weights > 0.0f) {
389                 weights_pdf = weights/sum_weights;
390         }
391         else {
392                 weights_pdf = make_float3(1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f);
393         }
394
395         *pdf = weights_pdf;
396
397         /* OpenCL does not support -> on float3, so don't use pdf->x. */
398         if(rand < weights_pdf.x) {
399                 return 0;
400         }
401         else if(rand < weights_pdf.x + weights_pdf.y) {
402                 return 1;
403         }
404         else {
405                 return 2;
406         }
407 }
408
409 #ifdef __VOLUME__
410
411 /* homogeneous volume: assume shader evaluation at the start gives
412  * the volume shading coefficient for the entire line segment */
413 ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(
414     KernelGlobals *kg,
415     ccl_addr_space PathState *state,
416     Ray *ray,
417     ShaderData *sd,
418     PathRadiance *L,
419     ccl_addr_space float3 *throughput,
420     bool probalistic_scatter)
421 {
422         VolumeShaderCoefficients coeff;
423
424         if(!volume_shader_sample(kg, sd, state, ray->P, &coeff))
425                 return VOLUME_PATH_MISSED;
426
427         int closure_flag = sd->flag;
428         float t = ray->t;
429         float3 new_tp;
430
431 #ifdef __VOLUME_SCATTER__
432         /* randomly scatter, and if we do t is shortened */
433         if(closure_flag & SD_SCATTER) {
434                 /* Sample channel, use MIS with balance heuristic. */
435                 float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
436                 float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
437                 float3 channel_pdf;
438                 int channel = kernel_volume_sample_channel(albedo, *throughput, rphase, &channel_pdf);
439
440                 /* decide if we will hit or miss */
441                 bool scatter = true;
442                 float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
443
444                 if(probalistic_scatter) {
445                         float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
446                         float sample_transmittance = expf(-sample_sigma_t * t);
447
448                         if(1.0f - xi >= sample_transmittance) {
449                                 scatter = true;
450
451                                 /* rescale random number so we can reuse it */
452                                 xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
453
454                         }
455                         else
456                                 scatter = false;
457                 }
458
459                 if(scatter) {
460                         /* scattering */
461                         float3 pdf;
462                         float3 transmittance;
463                         float sample_t;
464
465                         /* distance sampling */
466                         sample_t = kernel_volume_distance_sample(ray->t, coeff.sigma_t, channel, xi, &transmittance, &pdf);
467
468                         /* modify pdf for hit/miss decision */
469                         if(probalistic_scatter)
470                                 pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(coeff.sigma_t, t);
471
472                         new_tp = *throughput * coeff.sigma_s * transmittance / dot(channel_pdf, pdf);
473                         t = sample_t;
474                 }
475                 else {
476                         /* no scattering */
477                         float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
478                         float pdf = dot(channel_pdf, transmittance);
479                         new_tp = *throughput * transmittance / pdf;
480                 }
481         }
482         else
483 #endif
484         if(closure_flag & SD_EXTINCTION) {
485                 /* absorption only, no sampling needed */
486                 float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
487                 new_tp = *throughput * transmittance;
488         }
489         else {
490                 new_tp = *throughput;
491         }
492
493         /* integrate emission attenuated by extinction */
494         if(L && (closure_flag & SD_EMISSION)) {
495                 float3 transmittance = volume_color_transmittance(coeff.sigma_t, ray->t);
496                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t);
497                 path_radiance_accum_emission(L, state, *throughput, emission);
498         }
499
500         /* modify throughput */
501         if(closure_flag & SD_EXTINCTION) {
502                 *throughput = new_tp;
503
504                 /* prepare to scatter to new direction */
505                 if(t < ray->t) {
506                         /* adjust throughput and move to new location */
507                         sd->P = ray->P + t*ray->D;
508
509                         return VOLUME_PATH_SCATTERED;
510                 }
511         }
512
513         return VOLUME_PATH_ATTENUATED;
514 }
515
516 /* heterogeneous volume distance sampling: integrate stepping through the
517  * volume until we reach the end, get absorbed entirely, or run out of
518  * iterations. this does probabilistically scatter or get transmitted through
519  * for path tracing where we don't want to branch. */
520 ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
521     KernelGlobals *kg,
522     ccl_addr_space PathState *state,
523     Ray *ray,
524     ShaderData *sd,
525     PathRadiance *L,
526     ccl_addr_space float3 *throughput)
527 {
528         float3 tp = *throughput;
529         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
530
531         /* prepare for stepping */
532         int max_steps = kernel_data.integrator.volume_max_steps;
533         float step_offset, step_size;
534         kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
535
536         /* compute coefficients at the start */
537         float t = 0.0f;
538         float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
539
540         /* pick random color channel, we use the Veach one-sample
541          * model with balance heuristic for the channels */
542         float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
543         float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
544         bool has_scatter = false;
545
546         for(int i = 0; i < max_steps; i++) {
547                 /* advance to new position */
548                 float new_t = min(ray->t, (i+1) * step_size);
549                 float dt = new_t - t;
550
551                 /* use random position inside this segment to sample shader,
552                 * for last shorter step we remap it to fit within the segment. */
553                 if(new_t == ray->t) {
554                         step_offset *= (new_t - t) / step_size;
555                 }
556
557                 float3 new_P = ray->P + ray->D * (t + step_offset);
558                 VolumeShaderCoefficients coeff;
559
560                 /* compute segment */
561                 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
562                         int closure_flag = sd->flag;
563                         float3 new_tp;
564                         float3 transmittance;
565                         bool scatter = false;
566
567                         /* distance sampling */
568 #ifdef __VOLUME_SCATTER__
569                         if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_EXTINCTION))) {
570                                 has_scatter = true;
571
572                                 /* Sample channel, use MIS with balance heuristic. */
573                                 float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
574                                 float3 channel_pdf;
575                                 int channel = kernel_volume_sample_channel(albedo, tp, rphase, &channel_pdf);
576
577                                 /* compute transmittance over full step */
578                                 transmittance = volume_color_transmittance(coeff.sigma_t, dt);
579
580                                 /* decide if we will scatter or continue */
581                                 float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
582
583                                 if(1.0f - xi >= sample_transmittance) {
584                                         /* compute sampling distance */
585                                         float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
586                                         float new_dt = -logf(1.0f - xi)/sample_sigma_t;
587                                         new_t = t + new_dt;
588
589                                         /* transmittance and pdf */
590                                         float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
591                                         float3 pdf = coeff.sigma_t * new_transmittance;
592
593                                         /* throughput */
594                                         new_tp = tp * coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf);
595                                         scatter = true;
596                                 }
597                                 else {
598                                         /* throughput */
599                                         float pdf = dot(channel_pdf, transmittance);
600                                         new_tp = tp * transmittance / pdf;
601
602                                         /* remap xi so we can reuse it and keep thing stratified */
603                                         xi = 1.0f - (1.0f - xi)/sample_transmittance;
604                                 }
605                         }
606                         else
607 #endif
608                         if(closure_flag & SD_EXTINCTION) {
609                                 /* absorption only, no sampling needed */
610                                 transmittance = volume_color_transmittance(coeff.sigma_t, dt);
611                                 new_tp = tp * transmittance;
612                         }
613                         else {
614                                 new_tp = tp;
615                         }
616
617                         /* integrate emission attenuated by absorption */
618                         if(L && (closure_flag & SD_EMISSION)) {
619                                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
620                                 path_radiance_accum_emission(L, state, tp, emission);
621                         }
622
623                         /* modify throughput */
624                         if(closure_flag & SD_EXTINCTION) {
625                                 tp = new_tp;
626
627                                 /* stop if nearly all light blocked */
628                                 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
629                                         tp = make_float3(0.0f, 0.0f, 0.0f);
630                                         break;
631                                 }
632                         }
633
634                         /* prepare to scatter to new direction */
635                         if(scatter) {
636                                 /* adjust throughput and move to new location */
637                                 sd->P = ray->P + new_t*ray->D;
638                                 *throughput = tp;
639
640                                 return VOLUME_PATH_SCATTERED;
641                         }
642                         else {
643                                 /* accumulate transmittance */
644                                 accum_transmittance *= transmittance;
645                         }
646                 }
647
648                 /* stop if at the end of the volume */
649                 t = new_t;
650                 if(t == ray->t)
651                         break;
652         }
653
654         *throughput = tp;
655
656         return VOLUME_PATH_ATTENUATED;
657 }
658
659 /* get the volume attenuation and emission over line segment defined by
660  * ray, with the assumption that there are no surfaces blocking light
661  * between the endpoints. distance sampling is used to decide if we will
662  * scatter or not. */
663 ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(
664     KernelGlobals *kg,
665     ccl_addr_space PathState *state,
666     ShaderData *sd,
667     Ray *ray,
668     PathRadiance *L,
669     ccl_addr_space float3 *throughput,
670     bool heterogeneous)
671 {
672         shader_setup_from_volume(kg, sd, ray);
673
674         if(heterogeneous)
675                 return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput);
676         else
677                 return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true);
678 }
679
680 #ifndef __SPLIT_KERNEL__
681 /* Decoupled Volume Sampling
682  *
683  * VolumeSegment is list of coefficients and transmittance stored at all steps
684  * through a volume. This can then later be used for decoupled sampling as in:
685  * "Importance Sampling Techniques for Path Tracing in Participating Media"
686  *
687  * On the GPU this is only supported (but currently not enabled)
688  * for homogeneous volumes (1 step), due to
689  * no support for malloc/free and too much stack usage with a fix size array. */
690
691 typedef struct VolumeStep {
692         float3 sigma_s;                         /* scatter coefficient */
693         float3 sigma_t;                         /* extinction coefficient */
694         float3 accum_transmittance;     /* accumulated transmittance including this step */
695         float3 cdf_distance;            /* cumulative density function for distance sampling */
696         float t;                                        /* distance at end of this step */
697         float shade_t;                          /* jittered distance where shading was done in step */
698         int closure_flag;                       /* shader evaluation closure flags */
699 } VolumeStep;
700
701 typedef struct VolumeSegment {
702         VolumeStep stack_step;      /* stack storage for homogeneous step, to avoid malloc */
703         VolumeStep *steps;                      /* recorded steps */
704         int numsteps;                           /* number of steps */
705         int closure_flag;                       /* accumulated closure flags from all steps */
706
707         float3 accum_emission;          /* accumulated emission at end of segment */
708         float3 accum_transmittance;     /* accumulated transmittance at end of segment */
709         float3 accum_albedo;        /* accumulated average albedo over segment */
710
711         int sampling_method;            /* volume sampling method */
712 } VolumeSegment;
713
714 /* record volume steps to the end of the volume.
715  *
716  * it would be nice if we could only record up to the point that we need to scatter,
717  * but the entire segment is needed to do always scattering, rather than probabilistically
718  * hitting or missing the volume. if we don't know the transmittance at the end of the
719  * volume we can't generate stratified distance samples up to that transmittance */
720 #ifdef __VOLUME_DECOUPLED__
721 ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
722         Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
723 {
724         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
725
726         /* prepare for volume stepping */
727         int max_steps;
728         float step_size, step_offset;
729
730         if(heterogeneous) {
731                 max_steps = kernel_data.integrator.volume_max_steps;
732                 kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset);
733
734 #ifdef __KERNEL_CPU__
735                 /* NOTE: For the branched path tracing it's possible to have direct
736                  * and indirect light integration both having volume segments allocated.
737                  * We detect this using index in the pre-allocated memory. Currently we
738                  * only support two segments allocated at a time, if more needed some
739                  * modifications to the KernelGlobals will be needed.
740                  *
741                  * This gives us restrictions that decoupled record should only happen
742                  * in the stack manner, meaning if there's subsequent call of decoupled
743                  * record it'll need to free memory before it's caller frees memory.
744                  */
745                 const int index = kg->decoupled_volume_steps_index;
746                 assert(index < sizeof(kg->decoupled_volume_steps) /
747                                sizeof(*kg->decoupled_volume_steps));
748                 if(kg->decoupled_volume_steps[index] == NULL) {
749                         kg->decoupled_volume_steps[index] =
750                                 (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
751                 }
752                 segment->steps = kg->decoupled_volume_steps[index];
753                 ++kg->decoupled_volume_steps_index;
754 #else
755                 segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
756 #endif
757         }
758         else {
759                 max_steps = 1;
760                 step_size = ray->t;
761                 step_offset = 0.0f;
762                 segment->steps = &segment->stack_step;
763         }
764
765         /* init accumulation variables */
766         float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
767         float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
768         float3 accum_albedo = make_float3(0.0f, 0.0f, 0.0f);
769         float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
770         float t = 0.0f;
771
772         segment->numsteps = 0;
773         segment->closure_flag = 0;
774         bool is_last_step_empty = false;
775
776         VolumeStep *step = segment->steps;
777
778         for(int i = 0; i < max_steps; i++, step++) {
779                 /* advance to new position */
780                 float new_t = min(ray->t, (i+1) * step_size);
781                 float dt = new_t - t;
782
783                 /* use random position inside this segment to sample shader,
784                 * for last shorter step we remap it to fit within the segment. */
785                 if(new_t == ray->t) {
786                         step_offset *= (new_t - t) / step_size;
787                 }
788
789                 float3 new_P = ray->P + ray->D * (t + step_offset);
790                 VolumeShaderCoefficients coeff;
791
792                 /* compute segment */
793                 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
794                         int closure_flag = sd->flag;
795                         float3 sigma_t = coeff.sigma_t;
796
797                         /* compute average albedo for channel sampling */
798                         if(closure_flag & SD_SCATTER) {
799                                 accum_albedo += dt * safe_divide_color(coeff.sigma_s, sigma_t);
800                         }
801
802                         /* compute accumulated transmittance */
803                         float3 transmittance = volume_color_transmittance(sigma_t, dt);
804
805                         /* compute emission attenuated by absorption */
806                         if(closure_flag & SD_EMISSION) {
807                                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
808                                 accum_emission += accum_transmittance * emission;
809                         }
810
811                         accum_transmittance *= transmittance;
812
813                         /* compute pdf for distance sampling */
814                         float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
815                         cdf_distance = cdf_distance + pdf_distance;
816
817                         /* write step data */
818                         step->sigma_t = sigma_t;
819                         step->sigma_s = coeff.sigma_s;
820                         step->closure_flag = closure_flag;
821
822                         segment->closure_flag |= closure_flag;
823
824                         is_last_step_empty = false;
825                         segment->numsteps++;
826                 }
827                 else {
828                         if(is_last_step_empty) {
829                                 /* consecutive empty step, merge */
830                                 step--;
831                         }
832                         else {
833                                 /* store empty step */
834                                 step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
835                                 step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
836                                 step->closure_flag = 0;
837
838                                 segment->numsteps++;
839                                 is_last_step_empty = true;
840                         }
841                 }
842
843                 step->accum_transmittance = accum_transmittance;
844                 step->cdf_distance = cdf_distance;
845                 step->t = new_t;
846                 step->shade_t = t + step_offset;
847
848                 /* stop if at the end of the volume */
849                 t = new_t;
850                 if(t == ray->t)
851                         break;
852
853                 /* stop if nearly all light blocked */
854                 if(accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps && accum_transmittance.z < tp_eps)
855                         break;
856         }
857
858         /* store total emission and transmittance */
859         segment->accum_emission = accum_emission;
860         segment->accum_transmittance = accum_transmittance;
861         segment->accum_albedo = accum_albedo;
862
863         /* normalize cumulative density function for distance sampling */
864         VolumeStep *last_step = segment->steps + segment->numsteps - 1;
865
866         if(!is_zero(last_step->cdf_distance)) {
867                 VolumeStep *step = &segment->steps[0];
868                 int numsteps = segment->numsteps;
869                 float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
870
871                 for(int i = 0; i < numsteps; i++, step++)
872                         step->cdf_distance *= inv_cdf_distance_sum;
873         }
874 }
875
876 ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
877 {
878         if(segment->steps != &segment->stack_step) {
879 #ifdef __KERNEL_CPU__
880                 /* NOTE: We only allow free last allocated segment.
881                  * No random order of alloc/free is supported.
882                  */
883                 assert(kg->decoupled_volume_steps_index > 0);
884                 assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
885                 --kg->decoupled_volume_steps_index;
886 #else
887                 free(segment->steps);
888 #endif
889         }
890 }
891 #endif  /* __VOLUME_DECOUPLED__ */
892
893 /* scattering for homogeneous and heterogeneous volumes, using decoupled ray
894  * marching.
895  *
896  * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */
897 ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
898         KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
899         float3 *throughput, float rphase, float rscatter,
900         const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter)
901 {
902         kernel_assert(segment->closure_flag & SD_SCATTER);
903
904         /* Sample color channel, use MIS with balance heuristic. */
905         float3 channel_pdf;
906         int channel = kernel_volume_sample_channel(segment->accum_albedo,
907                                                    *throughput,
908                                                    rphase,
909                                                    &channel_pdf);
910
911         float xi = rscatter;
912
913         /* probabilistic scattering decision based on transmittance */
914         if(probalistic_scatter) {
915                 float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
916
917                 if(1.0f - xi >= sample_transmittance) {
918                         /* rescale random number so we can reuse it */
919                         xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
920                 }
921                 else {
922                         *throughput /= sample_transmittance;
923                         return VOLUME_PATH_MISSED;
924                 }
925         }
926
927         VolumeStep *step;
928         float3 transmittance;
929         float pdf, sample_t;
930         float mis_weight = 1.0f;
931         bool distance_sample = true;
932         bool use_mis = false;
933
934         if(segment->sampling_method && light_P) {
935                 if(segment->sampling_method == SD_VOLUME_MIS) {
936                         /* multiple importance sample: randomly pick between
937                          * equiangular and distance sampling strategy */
938                         if(xi < 0.5f) {
939                                 xi *= 2.0f;
940                         }
941                         else {
942                                 xi = (xi - 0.5f)*2.0f;
943                                 distance_sample = false;
944                         }
945
946                         use_mis = true;
947                 }
948                 else {
949                         /* only equiangular sampling */
950                         distance_sample = false;
951                 }
952         }
953
954         /* distance sampling */
955         if(distance_sample) {
956                 /* find step in cdf */
957                 step = segment->steps;
958
959                 float prev_t = 0.0f;
960                 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
961
962                 if(segment->numsteps > 1) {
963                         float prev_cdf = 0.0f;
964                         float step_cdf = 1.0f;
965                         float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
966
967                         for(int i = 0; ; i++, step++) {
968                                 /* todo: optimize using binary search */
969                                 step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
970
971                                 if(xi < step_cdf || i == segment->numsteps-1)
972                                         break;
973
974                                 prev_cdf = step_cdf;
975                                 prev_t = step->t;
976                                 prev_cdf_distance = step->cdf_distance;
977                         }
978
979                         /* remap xi so we can reuse it */
980                         xi = (xi - prev_cdf)/(step_cdf - prev_cdf);
981
982                         /* pdf for picking step */
983                         step_pdf_distance = step->cdf_distance - prev_cdf_distance;
984                 }
985
986                 /* determine range in which we will sample */
987                 float step_t = step->t - prev_t;
988
989                 /* sample distance and compute transmittance */
990                 float3 distance_pdf;
991                 sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
992
993                 /* modify pdf for hit/miss decision */
994                 if(probalistic_scatter)
995                         distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
996
997                 pdf = dot(channel_pdf, distance_pdf * step_pdf_distance);
998
999                 /* multiple importance sampling */
1000                 if(use_mis) {
1001                         float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
1002                         mis_weight = 2.0f*power_heuristic(pdf, equi_pdf);
1003                 }
1004         }
1005         /* equi-angular sampling */
1006         else {
1007                 /* sample distance */
1008                 sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
1009
1010                 /* find step in which sampled distance is located */
1011                 step = segment->steps;
1012
1013                 float prev_t = 0.0f;
1014                 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
1015
1016                 if(segment->numsteps > 1) {
1017                         float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
1018
1019                         int numsteps = segment->numsteps;
1020                         int high = numsteps - 1;
1021                         int low = 0;
1022                         int mid;
1023
1024                         while(low < high) {
1025                                 mid = (low + high) >> 1;
1026
1027                                 if(sample_t < step[mid].t)
1028                                         high = mid;
1029                                 else if(sample_t >= step[mid + 1].t)
1030                                         low = mid + 1;
1031                                 else {
1032                                         /* found our interval in step[mid] .. step[mid+1] */
1033                                         prev_t = step[mid].t;
1034                                         prev_cdf_distance = step[mid].cdf_distance;
1035                                         step += mid+1;
1036                                         break;
1037                                 }
1038                         }
1039
1040                         if(low >= numsteps - 1) {
1041                                 prev_t = step[numsteps - 1].t;
1042                                 prev_cdf_distance = step[numsteps-1].cdf_distance;
1043                                 step += numsteps - 1;
1044                         }
1045
1046                         /* pdf for picking step with distance sampling */
1047                         step_pdf_distance = step->cdf_distance - prev_cdf_distance;
1048                 }
1049
1050                 /* determine range in which we will sample */
1051                 float step_t = step->t - prev_t;
1052                 float step_sample_t = sample_t - prev_t;
1053
1054                 /* compute transmittance */
1055                 transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
1056
1057                 /* multiple importance sampling */
1058                 if(use_mis) {
1059                         float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
1060                         float distance_pdf = dot(channel_pdf, distance_pdf3 * step_pdf_distance);
1061                         mis_weight = 2.0f*power_heuristic(pdf, distance_pdf);
1062                 }
1063         }
1064         if(sample_t < 0.0f || pdf == 0.0f) {
1065                 return VOLUME_PATH_MISSED;
1066         }
1067
1068         /* compute transmittance up to this step */
1069         if(step != segment->steps)
1070                 transmittance *= (step-1)->accum_transmittance;
1071
1072         /* modify throughput */
1073         *throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
1074
1075         /* evaluate shader to create closures at shading point */
1076         if(segment->numsteps > 1) {
1077                 sd->P = ray->P + step->shade_t*ray->D;
1078
1079                 VolumeShaderCoefficients coeff;
1080                 volume_shader_sample(kg, sd, state, sd->P, &coeff);
1081         }
1082
1083         /* move to new position */
1084         sd->P = ray->P + sample_t*ray->D;
1085
1086         return VOLUME_PATH_SCATTERED;
1087 }
1088 #endif  /* __SPLIT_KERNEL */
1089
1090 /* decide if we need to use decoupled or not */
1091 ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method)
1092 {
1093         /* decoupled ray marching for heterogeneous volumes not supported on the GPU,
1094          * which also means equiangular and multiple importance sampling is not
1095          * support for that case */
1096         if(!kernel_data.integrator.volume_decoupled)
1097                 return false;
1098
1099 #ifdef __KERNEL_GPU__
1100         if(heterogeneous)
1101                 return false;
1102 #endif
1103
1104         /* equiangular and multiple importance sampling only implemented for decoupled */
1105         if(sampling_method != 0)
1106                 return true;
1107
1108         /* for all light sampling use decoupled, reusing shader evaluations is
1109          * typically faster in that case */
1110         if(direct)
1111                 return kernel_data.integrator.sample_all_lights_direct;
1112         else
1113                 return kernel_data.integrator.sample_all_lights_indirect;
1114 }
1115
1116 /* Volume Stack
1117  *
1118  * This is an array of object/shared ID's that the current segment of the path
1119  * is inside of. */
1120
1121 ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
1122                                          ShaderData *stack_sd,
1123                                          ccl_addr_space const PathState *state,
1124                                          ccl_addr_space const Ray *ray,
1125                                          ccl_addr_space VolumeStack *stack)
1126 {
1127         /* NULL ray happens in the baker, does it need proper initialization of
1128          * camera in volume?
1129          */
1130         if(!kernel_data.cam.is_inside_volume || ray == NULL) {
1131                 /* Camera is guaranteed to be in the air, only take background volume
1132                  * into account in this case.
1133                  */
1134                 if(kernel_data.background.volume_shader != SHADER_NONE) {
1135                         stack[0].shader = kernel_data.background.volume_shader;
1136                         stack[0].object = PRIM_NONE;
1137                         stack[1].shader = SHADER_NONE;
1138                 }
1139                 else {
1140                         stack[0].shader = SHADER_NONE;
1141                 }
1142                 return;
1143         }
1144
1145         kernel_assert(state->flag & PATH_RAY_CAMERA);
1146
1147         Ray volume_ray = *ray;
1148         volume_ray.t = FLT_MAX;
1149
1150         const uint visibility = (state->flag & PATH_RAY_ALL_VISIBILITY);
1151         int stack_index = 0, enclosed_index = 0;
1152
1153 #ifdef __VOLUME_RECORD_ALL__
1154         Intersection hits[2*VOLUME_STACK_SIZE + 1];
1155         uint num_hits = scene_intersect_volume_all(kg,
1156                                                    &volume_ray,
1157                                                    hits,
1158                                                    2*VOLUME_STACK_SIZE,
1159                                                    visibility);
1160         if(num_hits > 0) {
1161                 int enclosed_volumes[VOLUME_STACK_SIZE];
1162                 Intersection *isect = hits;
1163
1164                 qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1165
1166                 for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
1167                         shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
1168                         if(stack_sd->flag & SD_BACKFACING) {
1169                                 bool need_add = true;
1170                                 for(int i = 0; i < enclosed_index && need_add; ++i) {
1171                                         /* If ray exited the volume and never entered to that volume
1172                                          * it means that camera is inside such a volume.
1173                                          */
1174                                         if(enclosed_volumes[i] == stack_sd->object) {
1175                                                 need_add = false;
1176                                         }
1177                                 }
1178                                 for(int i = 0; i < stack_index && need_add; ++i) {
1179                                         /* Don't add intersections twice. */
1180                                         if(stack[i].object == stack_sd->object) {
1181                                                 need_add = false;
1182                                                 break;
1183                                         }
1184                                 }
1185                                 if(need_add && stack_index < VOLUME_STACK_SIZE - 1) {
1186                                         stack[stack_index].object = stack_sd->object;
1187                                         stack[stack_index].shader = stack_sd->shader;
1188                                         ++stack_index;
1189                                 }
1190                         }
1191                         else {
1192                                 /* If ray from camera enters the volume, this volume shouldn't
1193                                  * be added to the stack on exit.
1194                                  */
1195                                 enclosed_volumes[enclosed_index++] = stack_sd->object;
1196                         }
1197                 }
1198         }
1199 #else
1200         int enclosed_volumes[VOLUME_STACK_SIZE];
1201         int step = 0;
1202
1203         while(stack_index < VOLUME_STACK_SIZE - 1 &&
1204               enclosed_index < VOLUME_STACK_SIZE - 1 &&
1205               step < 2 * VOLUME_STACK_SIZE)
1206         {
1207                 Intersection isect;
1208                 if(!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
1209                         break;
1210                 }
1211
1212                 shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
1213                 if(stack_sd->flag & SD_BACKFACING) {
1214                         /* If ray exited the volume and never entered to that volume
1215                          * it means that camera is inside such a volume.
1216                          */
1217                         bool need_add = true;
1218                         for(int i = 0; i < enclosed_index && need_add; ++i) {
1219                                 /* If ray exited the volume and never entered to that volume
1220                                  * it means that camera is inside such a volume.
1221                                  */
1222                                 if(enclosed_volumes[i] == stack_sd->object) {
1223                                         need_add = false;
1224                                 }
1225                         }
1226                         for(int i = 0; i < stack_index && need_add; ++i) {
1227                                 /* Don't add intersections twice. */
1228                                 if(stack[i].object == stack_sd->object) {
1229                                         need_add = false;
1230                                         break;
1231                                 }
1232                         }
1233                         if(need_add) {
1234                                 stack[stack_index].object = stack_sd->object;
1235                                 stack[stack_index].shader = stack_sd->shader;
1236                                 ++stack_index;
1237                         }
1238                 }
1239                 else {
1240                         /* If ray from camera enters the volume, this volume shouldn't
1241                          * be added to the stack on exit.
1242                          */
1243                         enclosed_volumes[enclosed_index++] = stack_sd->object;
1244                 }
1245
1246                 /* Move ray forward. */
1247                 volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
1248                 ++step;
1249         }
1250 #endif
1251         /* stack_index of 0 means quick checks outside of the kernel gave false
1252          * positive, nothing to worry about, just we've wasted quite a few of
1253          * ticks just to come into conclusion that camera is in the air.
1254          *
1255          * In this case we're doing the same above -- check whether background has
1256          * volume.
1257          */
1258         if(stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
1259                 stack[0].shader = kernel_data.background.volume_shader;
1260                 stack[0].object = PRIM_NONE;
1261                 stack[1].shader = SHADER_NONE;
1262         }
1263         else {
1264                 stack[stack_index].shader = SHADER_NONE;
1265         }
1266 }
1267
1268 ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, ccl_addr_space VolumeStack *stack)
1269 {
1270         /* todo: we should have some way for objects to indicate if they want the
1271          * world shader to work inside them. excluding it by default is problematic
1272          * because non-volume objects can't be assumed to be closed manifolds */
1273
1274         if(!(sd->flag & SD_HAS_VOLUME))
1275                 return;
1276
1277         if(sd->flag & SD_BACKFACING) {
1278                 /* exit volume object: remove from stack */
1279                 for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1280                         if(stack[i].object == sd->object) {
1281                                 /* shift back next stack entries */
1282                                 do {
1283                                         stack[i] = stack[i+1];
1284                                         i++;
1285                                 }
1286                                 while(stack[i].shader != SHADER_NONE);
1287
1288                                 return;
1289                         }
1290                 }
1291         }
1292         else {
1293                 /* enter volume object: add to stack */
1294                 int i;
1295
1296                 for(i = 0; stack[i].shader != SHADER_NONE; i++) {
1297                         /* already in the stack? then we have nothing to do */
1298                         if(stack[i].object == sd->object)
1299                                 return;
1300                 }
1301
1302                 /* if we exceed the stack limit, ignore */
1303                 if(i >= VOLUME_STACK_SIZE-1)
1304                         return;
1305
1306                 /* add to the end of the stack */
1307                 stack[i].shader = sd->shader;
1308                 stack[i].object = sd->object;
1309                 stack[i+1].shader = SHADER_NONE;
1310         }
1311 }
1312
1313 #ifdef __SUBSURFACE__
1314 ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
1315                                                           ShaderData *stack_sd,
1316                                                           Ray *ray,
1317                                                           ccl_addr_space VolumeStack *stack)
1318 {
1319         kernel_assert(kernel_data.integrator.use_volumes);
1320
1321         Ray volume_ray = *ray;
1322
1323 #  ifdef __VOLUME_RECORD_ALL__
1324         Intersection hits[2*VOLUME_STACK_SIZE + 1];
1325         uint num_hits = scene_intersect_volume_all(kg,
1326                                                    &volume_ray,
1327                                                    hits,
1328                                                    2*VOLUME_STACK_SIZE,
1329                                                    PATH_RAY_ALL_VISIBILITY);
1330         if(num_hits > 0) {
1331                 Intersection *isect = hits;
1332
1333                 qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1334
1335                 for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
1336                         shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
1337                         kernel_volume_stack_enter_exit(kg, stack_sd, stack);
1338                 }
1339         }
1340 #  else
1341         Intersection isect;
1342         int step = 0;
1343         float3 Pend = ray->P + ray->D*ray->t;
1344         while(step < 2 * VOLUME_STACK_SIZE &&
1345               scene_intersect_volume(kg,
1346                                      &volume_ray,
1347                                      &isect,
1348                                      PATH_RAY_ALL_VISIBILITY))
1349         {
1350                 shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
1351                 kernel_volume_stack_enter_exit(kg, stack_sd, stack);
1352
1353                 /* Move ray forward. */
1354                 volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
1355                 if(volume_ray.t != FLT_MAX) {
1356                         volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t);
1357                 }
1358                 ++step;
1359         }
1360 #  endif
1361 }
1362 #endif
1363
1364 /* Clean stack after the last bounce.
1365  *
1366  * It is expected that all volumes are closed manifolds, so at the time when ray
1367  * hits nothing (for example, it is a last bounce which goes to environment) the
1368  * only expected volume in the stack is the world's one. All the rest volume
1369  * entries should have been exited already.
1370  *
1371  * This isn't always true because of ray intersection precision issues, which
1372  * could lead us to an infinite non-world volume in the stack, causing render
1373  * artifacts.
1374  *
1375  * Use this function after the last bounce to get rid of all volumes apart from
1376  * the world's one after the last bounce to avoid render artifacts.
1377  */
1378 ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
1379                                                  ccl_addr_space VolumeStack *volume_stack)
1380 {
1381         if(kernel_data.background.volume_shader != SHADER_NONE) {
1382                 /* Keep the world's volume in stack. */
1383                 volume_stack[1].shader = SHADER_NONE;
1384         }
1385         else {
1386                 volume_stack[0].shader = SHADER_NONE;
1387         }
1388 }
1389
1390 #endif  /* __VOLUME__ */
1391
1392 CCL_NAMESPACE_END