2af4c9a5e7a6b3a9920993fdfe947a91fa4120c0
[blender.git] / intern / cycles / kernel / kernel_volume.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Events for probalistic scattering */
20
21 typedef enum VolumeIntegrateResult {
22         VOLUME_PATH_SCATTERED = 0,
23         VOLUME_PATH_ATTENUATED = 1,
24         VOLUME_PATH_MISSED = 2
25 } VolumeIntegrateResult;
26
27 /* Volume shader properties
28  *
29  * extinction coefficient = absorption coefficient + scattering coefficient
30  * sigma_t = sigma_a + sigma_s */
31
32 typedef struct VolumeShaderCoefficients {
33         float3 sigma_t;
34         float3 sigma_s;
35         float3 emission;
36 } VolumeShaderCoefficients;
37
38 /* evaluate shader to get extinction coefficient at P */
39 ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
40                                                        ShaderData *sd,
41                                                        ccl_addr_space PathState *state,
42                                                        float3 P,
43                                                        float3 *extinction)
44 {
45         sd->P = P;
46         shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW, 0);
47
48         if(sd->flag & SD_EXTINCTION) {
49                 *extinction = sd->closure_transparent_extinction;
50                 return true;
51         }
52         else {
53                 return false;
54         }
55 }
56
57 /* evaluate shader to get absorption, scattering and emission at P */
58 ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
59                                             ShaderData *sd,
60                                             ccl_addr_space PathState *state,
61                                             float3 P,
62                                             VolumeShaderCoefficients *coeff)
63 {
64         sd->P = P;
65         shader_eval_volume(kg, sd, state, state->volume_stack, state->flag, kernel_data.integrator.max_closures);
66
67         if(!(sd->flag & (SD_EXTINCTION|SD_SCATTER|SD_EMISSION)))
68                 return false;
69         
70         coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
71         coeff->sigma_t = (sd->flag & SD_EXTINCTION)? sd->closure_transparent_extinction:
72                                                      make_float3(0.0f, 0.0f, 0.0f);
73         coeff->emission = (sd->flag & SD_EMISSION)? sd->closure_emission_background:
74                                                     make_float3(0.0f, 0.0f, 0.0f);
75
76         if(sd->flag & SD_SCATTER) {
77                 if(state->bounce < kernel_data.integrator.max_bounce &&
78                    state->volume_bounce < kernel_data.integrator.max_volume_bounce) {
79                         for(int i = 0; i < sd->num_closure; i++) {
80                                 const ShaderClosure *sc = &sd->closure[i];
81
82                                 if(CLOSURE_IS_VOLUME(sc->type))
83                                         coeff->sigma_s += sc->weight;
84                         }
85                 }
86                 else {
87                         /* When at the max number of bounces, clear scattering. */
88                         sd->flag &= ~SD_SCATTER;
89                 }
90         }
91
92         return true;
93 }
94
95 ccl_device float3 volume_color_transmittance(float3 sigma, float t)
96 {
97         return make_float3(expf(-sigma.x * t), expf(-sigma.y * t), expf(-sigma.z * t));
98 }
99
100 ccl_device float kernel_volume_channel_get(float3 value, int channel)
101 {
102         return (channel == 0)? value.x: ((channel == 1)? value.y: value.z);
103 }
104
105 ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
106 {
107         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
108                 int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
109
110                 if(shader_flag & SD_HETEROGENEOUS_VOLUME)
111                         return true;
112         }
113
114         return false;
115 }
116
117 ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack)
118 {
119         if(kernel_data.integrator.num_all_lights == 0)
120                 return 0;
121
122         int method = -1;
123
124         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
125                 int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
126
127                 if(shader_flag & SD_VOLUME_MIS) {
128                         return SD_VOLUME_MIS;
129                 }
130                 else if(shader_flag & SD_VOLUME_EQUIANGULAR) {
131                         if(method == 0)
132                                 return SD_VOLUME_MIS;
133
134                         method = SD_VOLUME_EQUIANGULAR;
135                 }
136                 else {
137                         if(method == SD_VOLUME_EQUIANGULAR)
138                                 return SD_VOLUME_MIS;
139
140                         method = 0;
141                 }
142         }
143
144         return method;
145 }
146
147 /* Volume Shadows
148  *
149  * These functions are used to attenuate shadow rays to lights. Both absorption
150  * and scattering will block light, represented by the extinction coefficient. */
151
152 /* homogeneous volume: assume shader evaluation at the starts gives
153  * the extinction coefficient for the entire line segment */
154 ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg,
155                                                  ccl_addr_space PathState *state,
156                                                  Ray *ray,
157                                                  ShaderData *sd,
158                                                  float3 *throughput)
159 {
160         float3 sigma_t;
161
162         if(volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
163                 *throughput *= volume_color_transmittance(sigma_t, ray->t);
164 }
165
166 /* heterogeneous volume: integrate stepping through the volume until we
167  * reach the end, get absorbed entirely, or run out of iterations */
168 ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
169                                                    ccl_addr_space PathState *state,
170                                                    Ray *ray,
171                                                    ShaderData *sd,
172                                                    float3 *throughput)
173 {
174         float3 tp = *throughput;
175         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
176
177         /* prepare for stepping */
178         int max_steps = kernel_data.integrator.volume_max_steps;
179         float step = kernel_data.integrator.volume_step_size;
180         float random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * step;
181
182         /* compute extinction at the start */
183         float t = 0.0f;
184
185         float3 sum = make_float3(0.0f, 0.0f, 0.0f);
186
187         for(int i = 0; i < max_steps; i++) {
188                 /* advance to new position */
189                 float new_t = min(ray->t, (i+1) * step);
190                 float dt = new_t - t;
191
192                 /* use random position inside this segment to sample shader */
193                 if(new_t == ray->t)
194                         random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * dt;
195
196                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
197                 float3 sigma_t;
198
199                 /* compute attenuation over segment */
200                 if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
201                         /* Compute expf() only for every Nth step, to save some calculations
202                          * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
203
204                         sum += (-sigma_t * (new_t - t));
205                         if((i & 0x07) == 0) { /* ToDo: Other interval? */
206                                 tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
207
208                                 /* stop if nearly all light is blocked */
209                                 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
210                                         break;
211                         }
212                 }
213
214                 /* stop if at the end of the volume */
215                 t = new_t;
216                 if(t == ray->t) {
217                         /* Update throughput in case we haven't done it above */
218                         tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
219                         break;
220                 }
221         }
222
223         *throughput = tp;
224 }
225
226 /* get the volume attenuation over line segment defined by ray, with the
227  * assumption that there are no surfaces blocking light between the endpoints */
228 ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
229                                               ShaderData *shadow_sd,
230                                               ccl_addr_space PathState *state,
231                                               Ray *ray,
232                                               float3 *throughput)
233 {
234         shader_setup_from_volume(kg, shadow_sd, ray);
235
236         if(volume_stack_is_heterogeneous(kg, state->volume_stack))
237                 kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
238         else
239                 kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
240 }
241
242 /* Equi-angular sampling as in:
243  * "Importance Sampling Techniques for Path Tracing in Participating Media" */
244
245 ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf)
246 {
247         float t = ray->t;
248
249         float delta = dot((light_P - ray->P) , ray->D);
250         float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
251         if(UNLIKELY(D == 0.0f)) {
252                 *pdf = 0.0f;
253                 return 0.0f;
254         }
255         float theta_a = -atan2f(delta, D);
256         float theta_b = atan2f(t - delta, D);
257         float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
258         if(UNLIKELY(theta_b == theta_a)) {
259                 *pdf = 0.0f;
260                 return 0.0f;
261         }
262         *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
263
264         return min(t, delta + t_); /* min is only for float precision errors */
265 }
266
267 ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
268 {
269         float delta = dot((light_P - ray->P) , ray->D);
270         float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
271         if(UNLIKELY(D == 0.0f)) {
272                 return 0.0f;
273         }
274
275         float t = ray->t;
276         float t_ = sample_t - delta;
277
278         float theta_a = -atan2f(delta, D);
279         float theta_b = atan2f(t - delta, D);
280         if(UNLIKELY(theta_b == theta_a)) {
281                 return 0.0f;
282         }
283
284         float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
285
286         return pdf;
287 }
288
289 /* Distance sampling */
290
291 ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
292 {
293         /* xi is [0, 1[ so log(0) should never happen, division by zero is
294          * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
295         float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
296         float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
297         float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
298
299         float sample_t = min(max_t, -logf(1.0f - xi*(1.0f - sample_transmittance))/sample_sigma_t);
300
301         *transmittance = volume_color_transmittance(sigma_t, sample_t);
302         *pdf = safe_divide_color(sigma_t * *transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
303
304         /* todo: optimization: when taken together with hit/miss decision,
305          * the full_transmittance cancels out drops out and xi does not
306          * need to be remapped */
307
308         return sample_t;
309 }
310
311 ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
312 {
313         float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
314         float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
315
316         return safe_divide_color(sigma_t * transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
317 }
318
319 /* Emission */
320
321 ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff, int closure_flag, float3 transmittance, float t)
322 {
323         /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
324          * this goes to E * t as sigma_t goes to zero
325          *
326          * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
327         float3 emission = coeff->emission;
328
329         if(closure_flag & SD_EXTINCTION) {
330                 float3 sigma_t = coeff->sigma_t;
331
332                 emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t;
333                 emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t;
334                 emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t;
335         }
336         else
337                 emission *= t;
338         
339         return emission;
340 }
341
342 /* Volume Path */
343
344 ccl_device int kernel_volume_sample_channel(float3 albedo, float3 throughput, float rand, float3 *pdf)
345 {
346         /* Sample color channel proportional to throughput and single scattering
347          * albedo, to significantly reduce noise with many bounce, following:
348          *
349          * "Practical and Controllable Subsurface Scattering for Production Path
350          *  Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
351         float3 weights = fabs(throughput * albedo);
352         float sum_weights = weights.x + weights.y + weights.z;
353
354         if(sum_weights > 0.0f) {
355                 *pdf = weights/sum_weights;
356         }
357         else {
358                 *pdf = make_float3(1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f);
359         }
360
361         if(rand < pdf->x) {
362                 return 0;
363         }
364         else if(rand < pdf->x + pdf->y) {
365                 return 1;
366         }
367         else {
368                 return 2;
369         }
370 }
371
372 /* homogeneous volume: assume shader evaluation at the start gives
373  * the volume shading coefficient for the entire line segment */
374 ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(
375     KernelGlobals *kg,
376     ccl_addr_space PathState *state,
377     Ray *ray,
378     ShaderData *sd,
379     PathRadiance *L,
380     ccl_addr_space float3 *throughput,
381     bool probalistic_scatter)
382 {
383         VolumeShaderCoefficients coeff;
384
385         if(!volume_shader_sample(kg, sd, state, ray->P, &coeff))
386                 return VOLUME_PATH_MISSED;
387
388         int closure_flag = sd->flag;
389         float t = ray->t;
390         float3 new_tp;
391
392 #ifdef __VOLUME_SCATTER__
393         /* randomly scatter, and if we do t is shortened */
394         if(closure_flag & SD_SCATTER) {
395                 /* Sample channel, use MIS with balance heuristic. */
396                 float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
397                 float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
398                 float3 channel_pdf;
399                 int channel = kernel_volume_sample_channel(albedo, *throughput, rphase, &channel_pdf);
400
401                 /* decide if we will hit or miss */
402                 bool scatter = true;
403                 float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
404
405                 if(probalistic_scatter) {
406                         float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
407                         float sample_transmittance = expf(-sample_sigma_t * t);
408
409                         if(1.0f - xi >= sample_transmittance) {
410                                 scatter = true;
411
412                                 /* rescale random number so we can reuse it */
413                                 xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
414
415                         }
416                         else
417                                 scatter = false;
418                 }
419
420                 if(scatter) {
421                         /* scattering */
422                         float3 pdf;
423                         float3 transmittance;
424                         float sample_t;
425
426                         /* distance sampling */
427                         sample_t = kernel_volume_distance_sample(ray->t, coeff.sigma_t, channel, xi, &transmittance, &pdf);
428
429                         /* modify pdf for hit/miss decision */
430                         if(probalistic_scatter)
431                                 pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(coeff.sigma_t, t);
432
433                         new_tp = *throughput * coeff.sigma_s * transmittance / dot(channel_pdf, pdf);
434                         t = sample_t;
435                 }
436                 else {
437                         /* no scattering */
438                         float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
439                         float pdf = dot(channel_pdf, transmittance);
440                         new_tp = *throughput * transmittance / pdf;
441                 }
442         }
443         else 
444 #endif
445         if(closure_flag & SD_EXTINCTION) {
446                 /* absorption only, no sampling needed */
447                 float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
448                 new_tp = *throughput * transmittance;
449         }
450
451         /* integrate emission attenuated by extinction */
452         if(L && (closure_flag & SD_EMISSION)) {
453                 float3 transmittance = volume_color_transmittance(coeff.sigma_t, ray->t);
454                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t);
455                 path_radiance_accum_emission(L, state, *throughput, emission);
456         }
457
458         /* modify throughput */
459         if(closure_flag & SD_EXTINCTION) {
460                 *throughput = new_tp;
461
462                 /* prepare to scatter to new direction */
463                 if(t < ray->t) {
464                         /* adjust throughput and move to new location */
465                         sd->P = ray->P + t*ray->D;
466
467                         return VOLUME_PATH_SCATTERED;
468                 }
469         }
470
471         return VOLUME_PATH_ATTENUATED;
472 }
473
474 /* heterogeneous volume distance sampling: integrate stepping through the
475  * volume until we reach the end, get absorbed entirely, or run out of
476  * iterations. this does probabilistically scatter or get transmitted through
477  * for path tracing where we don't want to branch. */
478 ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
479     KernelGlobals *kg,
480     ccl_addr_space PathState *state,
481     Ray *ray,
482     ShaderData *sd,
483     PathRadiance *L,
484     ccl_addr_space float3 *throughput)
485 {
486         float3 tp = *throughput;
487         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
488
489         /* prepare for stepping */
490         int max_steps = kernel_data.integrator.volume_max_steps;
491         float step_size = kernel_data.integrator.volume_step_size;
492         float random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * step_size;
493
494         /* compute coefficients at the start */
495         float t = 0.0f;
496         float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
497
498         /* pick random color channel, we use the Veach one-sample
499          * model with balance heuristic for the channels */
500         float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
501         float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
502         bool has_scatter = false;
503
504         for(int i = 0; i < max_steps; i++) {
505                 /* advance to new position */
506                 float new_t = min(ray->t, (i+1) * step_size);
507                 float dt = new_t - t;
508
509                 /* use random position inside this segment to sample shader */
510                 if(new_t == ray->t)
511                         random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * dt;
512
513                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
514                 VolumeShaderCoefficients coeff;
515
516                 /* compute segment */
517                 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
518                         int closure_flag = sd->flag;
519                         float3 new_tp;
520                         float3 transmittance;
521                         bool scatter = false;
522
523                         /* distance sampling */
524 #ifdef __VOLUME_SCATTER__
525                         if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_EXTINCTION))) {
526                                 has_scatter = true;
527
528                                 /* Sample channel, use MIS with balance heuristic. */
529                                 float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
530                                 float3 channel_pdf;
531                                 int channel = kernel_volume_sample_channel(albedo, tp, rphase, &channel_pdf);
532
533                                 /* compute transmittance over full step */
534                                 transmittance = volume_color_transmittance(coeff.sigma_t, dt);
535
536                                 /* decide if we will scatter or continue */
537                                 float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
538
539                                 if(1.0f - xi >= sample_transmittance) {
540                                         /* compute sampling distance */
541                                         float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
542                                         float new_dt = -logf(1.0f - xi)/sample_sigma_t;
543                                         new_t = t + new_dt;
544
545                                         /* transmittance and pdf */
546                                         float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
547                                         float3 pdf = coeff.sigma_t * new_transmittance;
548
549                                         /* throughput */
550                                         new_tp = tp * coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf);
551                                         scatter = true;
552                                 }
553                                 else {
554                                         /* throughput */
555                                         float pdf = dot(channel_pdf, transmittance);
556                                         new_tp = tp * transmittance / pdf;
557
558                                         /* remap xi so we can reuse it and keep thing stratified */
559                                         xi = 1.0f - (1.0f - xi)/sample_transmittance;
560                                 }
561                         }
562                         else 
563 #endif
564                         if(closure_flag & SD_EXTINCTION) {
565                                 /* absorption only, no sampling needed */
566                                 transmittance = volume_color_transmittance(coeff.sigma_t, dt);
567                                 new_tp = tp * transmittance;
568                         }
569
570                         /* integrate emission attenuated by absorption */
571                         if(L && (closure_flag & SD_EMISSION)) {
572                                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
573                                 path_radiance_accum_emission(L, state, tp, emission);
574                         }
575
576                         /* modify throughput */
577                         if(closure_flag & SD_EXTINCTION) {
578                                 tp = new_tp;
579
580                                 /* stop if nearly all light blocked */
581                                 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
582                                         tp = make_float3(0.0f, 0.0f, 0.0f);
583                                         break;
584                                 }
585                         }
586
587                         /* prepare to scatter to new direction */
588                         if(scatter) {
589                                 /* adjust throughput and move to new location */
590                                 sd->P = ray->P + new_t*ray->D;
591                                 *throughput = tp;
592
593                                 return VOLUME_PATH_SCATTERED;
594                         }
595                         else {
596                                 /* accumulate transmittance */
597                                 accum_transmittance *= transmittance;
598                         }
599                 }
600
601                 /* stop if at the end of the volume */
602                 t = new_t;
603                 if(t == ray->t)
604                         break;
605         }
606
607         *throughput = tp;
608
609         return VOLUME_PATH_ATTENUATED;
610 }
611
612 /* get the volume attenuation and emission over line segment defined by
613  * ray, with the assumption that there are no surfaces blocking light
614  * between the endpoints. distance sampling is used to decide if we will
615  * scatter or not. */
616 ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(
617     KernelGlobals *kg,
618     ccl_addr_space PathState *state,
619     ShaderData *sd,
620     Ray *ray,
621     PathRadiance *L,
622     ccl_addr_space float3 *throughput,
623     bool heterogeneous)
624 {
625         shader_setup_from_volume(kg, sd, ray);
626
627         if(heterogeneous)
628                 return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput);
629         else
630                 return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true);
631 }
632
633 #ifndef __SPLIT_KERNEL__
634 /* Decoupled Volume Sampling
635  *
636  * VolumeSegment is list of coefficients and transmittance stored at all steps
637  * through a volume. This can then later be used for decoupled sampling as in:
638  * "Importance Sampling Techniques for Path Tracing in Participating Media"
639  *
640  * On the GPU this is only supported (but currently not enabled)
641  * for homogeneous volumes (1 step), due to
642  * no support for malloc/free and too much stack usage with a fix size array. */
643
644 typedef struct VolumeStep {
645         float3 sigma_s;                         /* scatter coefficient */
646         float3 sigma_t;                         /* extinction coefficient */
647         float3 accum_transmittance;     /* accumulated transmittance including this step */
648         float3 cdf_distance;            /* cumulative density function for distance sampling */
649         float t;                                        /* distance at end of this step */
650         float shade_t;                          /* jittered distance where shading was done in step */
651         int closure_flag;                       /* shader evaluation closure flags */
652 } VolumeStep;
653
654 typedef struct VolumeSegment {
655         VolumeStep stack_step;      /* stack storage for homogeneous step, to avoid malloc */
656         VolumeStep *steps;                      /* recorded steps */
657         int numsteps;                           /* number of steps */
658         int closure_flag;                       /* accumulated closure flags from all steps */
659
660         float3 accum_emission;          /* accumulated emission at end of segment */
661         float3 accum_transmittance;     /* accumulated transmittance at end of segment */
662         float3 accum_albedo;        /* accumulated average albedo over segment */
663
664         int sampling_method;            /* volume sampling method */
665 } VolumeSegment;
666
667 /* record volume steps to the end of the volume.
668  *
669  * it would be nice if we could only record up to the point that we need to scatter,
670  * but the entire segment is needed to do always scattering, rather than probabilistically
671  * hitting or missing the volume. if we don't know the transmittance at the end of the
672  * volume we can't generate stratified distance samples up to that transmittance */
673 #ifdef __VOLUME_DECOUPLED__
674 ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
675         Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
676 {
677         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
678
679         /* prepare for volume stepping */
680         int max_steps;
681         float step_size, random_jitter_offset;
682
683         if(heterogeneous) {
684                 const int global_max_steps = kernel_data.integrator.volume_max_steps;
685                 step_size = kernel_data.integrator.volume_step_size;
686                 /* compute exact steps in advance for malloc */
687                 if(ray->t > global_max_steps*step_size) {
688                         max_steps = global_max_steps;
689                         step_size = ray->t / (float)max_steps;
690                 }
691                 else {
692                         max_steps = max((int)ceilf(ray->t/step_size), 1);
693                 }
694 #ifdef __KERNEL_CPU__
695                 /* NOTE: For the branched path tracing it's possible to have direct
696                  * and indirect light integration both having volume segments allocated.
697                  * We detect this using index in the pre-allocated memory. Currently we
698                  * only support two segments allocated at a time, if more needed some
699                  * modifications to the KernelGlobals will be needed.
700                  *
701                  * This gives us restrictions that decoupled record should only happen
702                  * in the stack manner, meaning if there's subsequent call of decoupled
703                  * record it'll need to free memory before it's caller frees memory.
704                  */
705                 const int index = kg->decoupled_volume_steps_index;
706                 assert(index < sizeof(kg->decoupled_volume_steps) /
707                                sizeof(*kg->decoupled_volume_steps));
708                 if(kg->decoupled_volume_steps[index] == NULL) {
709                         kg->decoupled_volume_steps[index] =
710                                 (VolumeStep*)malloc(sizeof(VolumeStep)*global_max_steps);
711                 }
712                 segment->steps = kg->decoupled_volume_steps[index];
713                 ++kg->decoupled_volume_steps_index;
714 #else
715                 segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
716 #endif
717                 random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
718         }
719         else {
720                 max_steps = 1;
721                 step_size = ray->t;
722                 random_jitter_offset = 0.0f;
723                 segment->steps = &segment->stack_step;
724         }
725         
726         /* init accumulation variables */
727         float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
728         float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
729         float3 accum_albedo = make_float3(0.0f, 0.0f, 0.0f);
730         float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
731         float t = 0.0f;
732
733         segment->numsteps = 0;
734         segment->closure_flag = 0;
735         bool is_last_step_empty = false;
736
737         VolumeStep *step = segment->steps;
738
739         for(int i = 0; i < max_steps; i++, step++) {
740                 /* advance to new position */
741                 float new_t = min(ray->t, (i+1) * step_size);
742                 float dt = new_t - t;
743
744                 /* use random position inside this segment to sample shader */
745                 if(heterogeneous && new_t == ray->t)
746                         random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
747
748                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
749                 VolumeShaderCoefficients coeff;
750
751                 /* compute segment */
752                 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
753                         int closure_flag = sd->flag;
754                         float3 sigma_t = coeff.sigma_t;
755
756                         /* compute average albedo for channel sampling */
757                         if(closure_flag & SD_SCATTER) {
758                                 accum_albedo += dt * safe_divide_color(coeff.sigma_s, sigma_t);
759                         }
760
761                         /* compute accumulated transmittance */
762                         float3 transmittance = volume_color_transmittance(sigma_t, dt);
763
764                         /* compute emission attenuated by absorption */
765                         if(closure_flag & SD_EMISSION) {
766                                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
767                                 accum_emission += accum_transmittance * emission;
768                         }
769
770                         accum_transmittance *= transmittance;
771
772                         /* compute pdf for distance sampling */
773                         float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
774                         cdf_distance = cdf_distance + pdf_distance;
775
776                         /* write step data */
777                         step->sigma_t = sigma_t;
778                         step->sigma_s = coeff.sigma_s;
779                         step->closure_flag = closure_flag;
780
781                         segment->closure_flag |= closure_flag;
782
783                         is_last_step_empty = false;
784                         segment->numsteps++;
785                 }
786                 else {
787                         if(is_last_step_empty) {
788                                 /* consecutive empty step, merge */
789                                 step--;
790                         }
791                         else {
792                                 /* store empty step */
793                                 step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
794                                 step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
795                                 step->closure_flag = 0;
796
797                                 segment->numsteps++;
798                                 is_last_step_empty = true;
799                         }
800                 }
801
802                 step->accum_transmittance = accum_transmittance;
803                 step->cdf_distance = cdf_distance;
804                 step->t = new_t;
805                 step->shade_t = t + random_jitter_offset;
806
807                 /* stop if at the end of the volume */
808                 t = new_t;
809                 if(t == ray->t)
810                         break;
811
812                 /* stop if nearly all light blocked */
813                 if(accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps && accum_transmittance.z < tp_eps)
814                         break;
815         }
816
817         /* store total emission and transmittance */
818         segment->accum_emission = accum_emission;
819         segment->accum_transmittance = accum_transmittance;
820         segment->accum_albedo = accum_albedo;
821
822         /* normalize cumulative density function for distance sampling */
823         VolumeStep *last_step = segment->steps + segment->numsteps - 1;
824
825         if(!is_zero(last_step->cdf_distance)) {
826                 VolumeStep *step = &segment->steps[0];
827                 int numsteps = segment->numsteps;
828                 float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
829
830                 for(int i = 0; i < numsteps; i++, step++)
831                         step->cdf_distance *= inv_cdf_distance_sum;
832         }
833 }
834
835 ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
836 {
837         if(segment->steps != &segment->stack_step) {
838 #ifdef __KERNEL_CPU__
839                 /* NOTE: We only allow free last allocated segment.
840                  * No random order of alloc/free is supported.
841                  */
842                 assert(kg->decoupled_volume_steps_index > 0);
843                 assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
844                 --kg->decoupled_volume_steps_index;
845 #else
846                 free(segment->steps);
847 #endif
848         }
849 }
850 #endif  /* __VOLUME_DECOUPLED__ */
851
852 /* scattering for homogeneous and heterogeneous volumes, using decoupled ray
853  * marching.
854  *
855  * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */
856 ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
857         KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
858         float3 *throughput, float rphase, float rscatter,
859         const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter)
860 {
861         kernel_assert(segment->closure_flag & SD_SCATTER);
862
863         /* Sample color channel, use MIS with balance heuristic. */
864         float3 channel_pdf;
865         int channel = kernel_volume_sample_channel(segment->accum_albedo,
866                                                    *throughput,
867                                                    rphase,
868                                                    &channel_pdf);
869
870         float xi = rscatter;
871
872         /* probabilistic scattering decision based on transmittance */
873         if(probalistic_scatter) {
874                 float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
875
876                 if(1.0f - xi >= sample_transmittance) {
877                         /* rescale random number so we can reuse it */
878                         xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
879                 }
880                 else {
881                         *throughput /= sample_transmittance;
882                         return VOLUME_PATH_MISSED;
883                 }
884         }
885
886         VolumeStep *step;
887         float3 transmittance;
888         float pdf, sample_t;
889         float mis_weight = 1.0f;
890         bool distance_sample = true;
891         bool use_mis = false;
892
893         if(segment->sampling_method && light_P) {
894                 if(segment->sampling_method == SD_VOLUME_MIS) {
895                         /* multiple importance sample: randomly pick between
896                          * equiangular and distance sampling strategy */
897                         if(xi < 0.5f) {
898                                 xi *= 2.0f;
899                         }
900                         else {
901                                 xi = (xi - 0.5f)*2.0f;
902                                 distance_sample = false;
903                         }
904
905                         use_mis = true;
906                 }
907                 else {
908                         /* only equiangular sampling */
909                         distance_sample = false;
910                 }
911         }
912
913         /* distance sampling */
914         if(distance_sample) {
915                 /* find step in cdf */
916                 step = segment->steps;
917
918                 float prev_t = 0.0f;
919                 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
920
921                 if(segment->numsteps > 1) {
922                         float prev_cdf = 0.0f;
923                         float step_cdf = 1.0f;
924                         float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
925
926                         for(int i = 0; ; i++, step++) {
927                                 /* todo: optimize using binary search */
928                                 step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
929
930                                 if(xi < step_cdf || i == segment->numsteps-1)
931                                         break;
932
933                                 prev_cdf = step_cdf;
934                                 prev_t = step->t;
935                                 prev_cdf_distance = step->cdf_distance;
936                         }
937
938                         /* remap xi so we can reuse it */
939                         xi = (xi - prev_cdf)/(step_cdf - prev_cdf);
940
941                         /* pdf for picking step */
942                         step_pdf_distance = step->cdf_distance - prev_cdf_distance;
943                 }
944
945                 /* determine range in which we will sample */
946                 float step_t = step->t - prev_t;
947
948                 /* sample distance and compute transmittance */
949                 float3 distance_pdf;
950                 sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
951
952                 /* modify pdf for hit/miss decision */
953                 if(probalistic_scatter)
954                         distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
955
956                 pdf = dot(channel_pdf, distance_pdf * step_pdf_distance);
957
958                 /* multiple importance sampling */
959                 if(use_mis) {
960                         float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
961                         mis_weight = 2.0f*power_heuristic(pdf, equi_pdf);
962                 }
963         }
964         /* equi-angular sampling */
965         else {
966                 /* sample distance */
967                 sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
968
969                 /* find step in which sampled distance is located */
970                 step = segment->steps;
971
972                 float prev_t = 0.0f;
973                 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
974
975                 if(segment->numsteps > 1) {
976                         float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
977
978                         int numsteps = segment->numsteps;
979                         int high = numsteps - 1;
980                         int low = 0;
981                         int mid;
982
983                         while(low < high) {
984                                 mid = (low + high) >> 1;
985
986                                 if(sample_t < step[mid].t)
987                                         high = mid;
988                                 else if(sample_t >= step[mid + 1].t)
989                                         low = mid + 1;
990                                 else {
991                                         /* found our interval in step[mid] .. step[mid+1] */
992                                         prev_t = step[mid].t;
993                                         prev_cdf_distance = step[mid].cdf_distance;
994                                         step += mid+1;
995                                         break;
996                                 }
997                         }
998
999                         if(low >= numsteps - 1) {
1000                                 prev_t = step[numsteps - 1].t;
1001                                 prev_cdf_distance = step[numsteps-1].cdf_distance;
1002                                 step += numsteps - 1;
1003                         }
1004
1005                         /* pdf for picking step with distance sampling */
1006                         step_pdf_distance = step->cdf_distance - prev_cdf_distance;
1007                 }
1008
1009                 /* determine range in which we will sample */
1010                 float step_t = step->t - prev_t;
1011                 float step_sample_t = sample_t - prev_t;
1012
1013                 /* compute transmittance */
1014                 transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
1015
1016                 /* multiple importance sampling */
1017                 if(use_mis) {
1018                         float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
1019                         float distance_pdf = dot(channel_pdf, distance_pdf3 * step_pdf_distance);
1020                         mis_weight = 2.0f*power_heuristic(pdf, distance_pdf);
1021                 }
1022         }
1023         if(sample_t < 0.0f || pdf == 0.0f) {
1024                 return VOLUME_PATH_MISSED;
1025         }
1026
1027         /* compute transmittance up to this step */
1028         if(step != segment->steps)
1029                 transmittance *= (step-1)->accum_transmittance;
1030
1031         /* modify throughput */
1032         *throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
1033
1034         /* evaluate shader to create closures at shading point */
1035         if(segment->numsteps > 1) {
1036                 sd->P = ray->P + step->shade_t*ray->D;
1037
1038                 VolumeShaderCoefficients coeff;
1039                 volume_shader_sample(kg, sd, state, sd->P, &coeff);
1040         }
1041
1042         /* move to new position */
1043         sd->P = ray->P + sample_t*ray->D;
1044
1045         return VOLUME_PATH_SCATTERED;
1046 }
1047 #endif /* __SPLIT_KERNEL */
1048
1049 /* decide if we need to use decoupled or not */
1050 ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method)
1051 {
1052         /* decoupled ray marching for heterogeneous volumes not supported on the GPU,
1053          * which also means equiangular and multiple importance sampling is not
1054          * support for that case */
1055         if(!kernel_data.integrator.volume_decoupled)
1056                 return false;
1057
1058 #ifdef __KERNEL_GPU__
1059         if(heterogeneous)
1060                 return false;
1061 #endif
1062
1063         /* equiangular and multiple importance sampling only implemented for decoupled */
1064         if(sampling_method != 0)
1065                 return true;
1066
1067         /* for all light sampling use decoupled, reusing shader evaluations is
1068          * typically faster in that case */
1069         if(direct)
1070                 return kernel_data.integrator.sample_all_lights_direct;
1071         else
1072                 return kernel_data.integrator.sample_all_lights_indirect;
1073 }
1074
1075 /* Volume Stack
1076  *
1077  * This is an array of object/shared ID's that the current segment of the path
1078  * is inside of. */
1079
1080 ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
1081                                          ShaderData *stack_sd,
1082                                          ccl_addr_space const PathState *state,
1083                                          ccl_addr_space const Ray *ray,
1084                                          ccl_addr_space VolumeStack *stack)
1085 {
1086         /* NULL ray happens in the baker, does it need proper initialization of
1087          * camera in volume?
1088          */
1089         if(!kernel_data.cam.is_inside_volume || ray == NULL) {
1090                 /* Camera is guaranteed to be in the air, only take background volume
1091                  * into account in this case.
1092                  */
1093                 if(kernel_data.background.volume_shader != SHADER_NONE) {
1094                         stack[0].shader = kernel_data.background.volume_shader;
1095                         stack[0].object = PRIM_NONE;
1096                         stack[1].shader = SHADER_NONE;
1097                 }
1098                 else {
1099                         stack[0].shader = SHADER_NONE;
1100                 }
1101                 return;
1102         }
1103
1104         kernel_assert(state->flag & PATH_RAY_CAMERA);
1105
1106         Ray volume_ray = *ray;
1107         volume_ray.t = FLT_MAX;
1108
1109         const uint visibility = (state->flag & PATH_RAY_ALL_VISIBILITY);
1110         int stack_index = 0, enclosed_index = 0;
1111
1112 #ifdef __VOLUME_RECORD_ALL__
1113         Intersection hits[2*VOLUME_STACK_SIZE + 1];
1114         uint num_hits = scene_intersect_volume_all(kg,
1115                                                    &volume_ray,
1116                                                    hits,
1117                                                    2*VOLUME_STACK_SIZE,
1118                                                    visibility);
1119         if(num_hits > 0) {
1120                 int enclosed_volumes[VOLUME_STACK_SIZE];
1121                 Intersection *isect = hits;
1122
1123                 qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1124
1125                 for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
1126                         shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
1127                         if(stack_sd->flag & SD_BACKFACING) {
1128                                 bool need_add = true;
1129                                 for(int i = 0; i < enclosed_index && need_add; ++i) {
1130                                         /* If ray exited the volume and never entered to that volume
1131                                          * it means that camera is inside such a volume.
1132                                          */
1133                                         if(enclosed_volumes[i] == stack_sd->object) {
1134                                                 need_add = false;
1135                                         }
1136                                 }
1137                                 for(int i = 0; i < stack_index && need_add; ++i) {
1138                                         /* Don't add intersections twice. */
1139                                         if(stack[i].object == stack_sd->object) {
1140                                                 need_add = false;
1141                                                 break;
1142                                         }
1143                                 }
1144                                 if(need_add) {
1145                                         stack[stack_index].object = stack_sd->object;
1146                                         stack[stack_index].shader = stack_sd->shader;
1147                                         ++stack_index;
1148                                 }
1149                         }
1150                         else {
1151                                 /* If ray from camera enters the volume, this volume shouldn't
1152                                  * be added to the stack on exit.
1153                                  */
1154                                 enclosed_volumes[enclosed_index++] = stack_sd->object;
1155                         }
1156                 }
1157         }
1158 #else
1159         int enclosed_volumes[VOLUME_STACK_SIZE];
1160         int step = 0;
1161
1162         while(stack_index < VOLUME_STACK_SIZE - 1 &&
1163               enclosed_index < VOLUME_STACK_SIZE - 1 &&
1164               step < 2 * VOLUME_STACK_SIZE)
1165         {
1166                 Intersection isect;
1167                 if(!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
1168                         break;
1169                 }
1170
1171                 shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
1172                 if(stack_sd->flag & SD_BACKFACING) {
1173                         /* If ray exited the volume and never entered to that volume
1174                          * it means that camera is inside such a volume.
1175                          */
1176                         bool need_add = true;
1177                         for(int i = 0; i < enclosed_index && need_add; ++i) {
1178                                 /* If ray exited the volume and never entered to that volume
1179                                  * it means that camera is inside such a volume.
1180                                  */
1181                                 if(enclosed_volumes[i] == stack_sd->object) {
1182                                         need_add = false;
1183                                 }
1184                         }
1185                         for(int i = 0; i < stack_index && need_add; ++i) {
1186                                 /* Don't add intersections twice. */
1187                                 if(stack[i].object == stack_sd->object) {
1188                                         need_add = false;
1189                                         break;
1190                                 }
1191                         }
1192                         if(need_add) {
1193                                 stack[stack_index].object = stack_sd->object;
1194                                 stack[stack_index].shader = stack_sd->shader;
1195                                 ++stack_index;
1196                         }
1197                 }
1198                 else {
1199                         /* If ray from camera enters the volume, this volume shouldn't
1200                          * be added to the stack on exit.
1201                          */
1202                         enclosed_volumes[enclosed_index++] = stack_sd->object;
1203                 }
1204
1205                 /* Move ray forward. */
1206                 volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
1207                 ++step;
1208         }
1209 #endif
1210         /* stack_index of 0 means quick checks outside of the kernel gave false
1211          * positive, nothing to worry about, just we've wasted quite a few of
1212          * ticks just to come into conclusion that camera is in the air.
1213          *
1214          * In this case we're doing the same above -- check whether background has
1215          * volume.
1216          */
1217         if(stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
1218                 stack[0].shader = kernel_data.background.volume_shader;
1219                 stack[0].object = PRIM_NONE;
1220                 stack[1].shader = SHADER_NONE;
1221         }
1222         else {
1223                 stack[stack_index].shader = SHADER_NONE;
1224         }
1225 }
1226
1227 ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, ccl_addr_space VolumeStack *stack)
1228 {
1229         /* todo: we should have some way for objects to indicate if they want the
1230          * world shader to work inside them. excluding it by default is problematic
1231          * because non-volume objects can't be assumed to be closed manifolds */
1232
1233         if(!(sd->flag & SD_HAS_VOLUME))
1234                 return;
1235         
1236         if(sd->flag & SD_BACKFACING) {
1237                 /* exit volume object: remove from stack */
1238                 for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1239                         if(stack[i].object == sd->object) {
1240                                 /* shift back next stack entries */
1241                                 do {
1242                                         stack[i] = stack[i+1];
1243                                         i++;
1244                                 }
1245                                 while(stack[i].shader != SHADER_NONE);
1246
1247                                 return;
1248                         }
1249                 }
1250         }
1251         else {
1252                 /* enter volume object: add to stack */
1253                 int i;
1254
1255                 for(i = 0; stack[i].shader != SHADER_NONE; i++) {
1256                         /* already in the stack? then we have nothing to do */
1257                         if(stack[i].object == sd->object)
1258                                 return;
1259                 }
1260
1261                 /* if we exceed the stack limit, ignore */
1262                 if(i >= VOLUME_STACK_SIZE-1)
1263                         return;
1264
1265                 /* add to the end of the stack */
1266                 stack[i].shader = sd->shader;
1267                 stack[i].object = sd->object;
1268                 stack[i+1].shader = SHADER_NONE;
1269         }
1270 }
1271
1272 #ifdef __SUBSURFACE__
1273 ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
1274                                                           ShaderData *stack_sd,
1275                                                           Ray *ray,
1276                                                           ccl_addr_space VolumeStack *stack)
1277 {
1278         kernel_assert(kernel_data.integrator.use_volumes);
1279
1280         Ray volume_ray = *ray;
1281
1282 #  ifdef __VOLUME_RECORD_ALL__
1283         Intersection hits[2*VOLUME_STACK_SIZE + 1];
1284         uint num_hits = scene_intersect_volume_all(kg,
1285                                                    &volume_ray,
1286                                                    hits,
1287                                                    2*VOLUME_STACK_SIZE,
1288                                                    PATH_RAY_ALL_VISIBILITY);
1289         if(num_hits > 0) {
1290                 Intersection *isect = hits;
1291
1292                 qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1293
1294                 for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
1295                         shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
1296                         kernel_volume_stack_enter_exit(kg, stack_sd, stack);
1297                 }
1298         }
1299 #  else
1300         Intersection isect;
1301         int step = 0;
1302         float3 Pend = ray->P + ray->D*ray->t;
1303         while(step < 2 * VOLUME_STACK_SIZE &&
1304               scene_intersect_volume(kg,
1305                                      &volume_ray,
1306                                      &isect,
1307                                      PATH_RAY_ALL_VISIBILITY))
1308         {
1309                 shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
1310                 kernel_volume_stack_enter_exit(kg, stack_sd, stack);
1311
1312                 /* Move ray forward. */
1313                 volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
1314                 if(volume_ray.t != FLT_MAX) {
1315                         volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t);
1316                 }
1317                 ++step;
1318         }
1319 #  endif
1320 }
1321 #endif
1322
1323 /* Clean stack after the last bounce.
1324  *
1325  * It is expected that all volumes are closed manifolds, so at the time when ray
1326  * hits nothing (for example, it is a last bounce which goes to environment) the
1327  * only expected volume in the stack is the world's one. All the rest volume
1328  * entries should have been exited already.
1329  *
1330  * This isn't always true because of ray intersection precision issues, which
1331  * could lead us to an infinite non-world volume in the stack, causing render
1332  * artifacts.
1333  *
1334  * Use this function after the last bounce to get rid of all volumes apart from
1335  * the world's one after the last bounce to avoid render artifacts.
1336  */
1337 ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
1338                                                  ccl_addr_space VolumeStack *volume_stack)
1339 {
1340         if(kernel_data.background.volume_shader != SHADER_NONE) {
1341                 /* Keep the world's volume in stack. */
1342                 volume_stack[1].shader = SHADER_NONE;
1343         }
1344         else {
1345                 volume_stack[0].shader = SHADER_NONE;
1346         }
1347 }
1348
1349 CCL_NAMESPACE_END