Cycles: random walk subsurface scattering.
[blender.git] / intern / cycles / kernel / kernel_volume.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Events for probalistic scattering */
20
21 typedef enum VolumeIntegrateResult {
22         VOLUME_PATH_SCATTERED = 0,
23         VOLUME_PATH_ATTENUATED = 1,
24         VOLUME_PATH_MISSED = 2
25 } VolumeIntegrateResult;
26
27 /* Volume shader properties
28  *
29  * extinction coefficient = absorption coefficient + scattering coefficient
30  * sigma_t = sigma_a + sigma_s */
31
32 typedef struct VolumeShaderCoefficients {
33         float3 sigma_t;
34         float3 sigma_s;
35         float3 emission;
36 } VolumeShaderCoefficients;
37
38 #ifdef __VOLUME__
39
40 /* evaluate shader to get extinction coefficient at P */
41 ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
42                                                        ShaderData *sd,
43                                                        ccl_addr_space PathState *state,
44                                                        float3 P,
45                                                        float3 *extinction)
46 {
47         sd->P = P;
48         shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW, 0);
49
50         if(sd->flag & SD_EXTINCTION) {
51                 *extinction = sd->closure_transparent_extinction;
52                 return true;
53         }
54         else {
55                 return false;
56         }
57 }
58
59 /* evaluate shader to get absorption, scattering and emission at P */
60 ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
61                                             ShaderData *sd,
62                                             ccl_addr_space PathState *state,
63                                             float3 P,
64                                             VolumeShaderCoefficients *coeff)
65 {
66         sd->P = P;
67         shader_eval_volume(kg, sd, state, state->volume_stack, state->flag, kernel_data.integrator.max_closures);
68
69         if(!(sd->flag & (SD_EXTINCTION|SD_SCATTER|SD_EMISSION)))
70                 return false;
71         
72         coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
73         coeff->sigma_t = (sd->flag & SD_EXTINCTION)? sd->closure_transparent_extinction:
74                                                      make_float3(0.0f, 0.0f, 0.0f);
75         coeff->emission = (sd->flag & SD_EMISSION)? sd->closure_emission_background:
76                                                     make_float3(0.0f, 0.0f, 0.0f);
77
78         if(sd->flag & SD_SCATTER) {
79                 if(state->bounce < kernel_data.integrator.max_bounce &&
80                    state->volume_bounce < kernel_data.integrator.max_volume_bounce) {
81                         for(int i = 0; i < sd->num_closure; i++) {
82                                 const ShaderClosure *sc = &sd->closure[i];
83
84                                 if(CLOSURE_IS_VOLUME(sc->type))
85                                         coeff->sigma_s += sc->weight;
86                         }
87                 }
88                 else {
89                         /* When at the max number of bounces, clear scattering. */
90                         sd->flag &= ~SD_SCATTER;
91                 }
92         }
93
94         return true;
95 }
96
97 #endif /* __VOLUME__ */
98
99 ccl_device float3 volume_color_transmittance(float3 sigma, float t)
100 {
101         return make_float3(expf(-sigma.x * t), expf(-sigma.y * t), expf(-sigma.z * t));
102 }
103
104 ccl_device float kernel_volume_channel_get(float3 value, int channel)
105 {
106         return (channel == 0)? value.x: ((channel == 1)? value.y: value.z);
107 }
108
109 #ifdef __VOLUME__
110
111 ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
112 {
113         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
114                 int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
115
116                 if(shader_flag & SD_HETEROGENEOUS_VOLUME)
117                         return true;
118         }
119
120         return false;
121 }
122
123 ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack)
124 {
125         if(kernel_data.integrator.num_all_lights == 0)
126                 return 0;
127
128         int method = -1;
129
130         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
131                 int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
132
133                 if(shader_flag & SD_VOLUME_MIS) {
134                         return SD_VOLUME_MIS;
135                 }
136                 else if(shader_flag & SD_VOLUME_EQUIANGULAR) {
137                         if(method == 0)
138                                 return SD_VOLUME_MIS;
139
140                         method = SD_VOLUME_EQUIANGULAR;
141                 }
142                 else {
143                         if(method == SD_VOLUME_EQUIANGULAR)
144                                 return SD_VOLUME_MIS;
145
146                         method = 0;
147                 }
148         }
149
150         return method;
151 }
152
153 /* Volume Shadows
154  *
155  * These functions are used to attenuate shadow rays to lights. Both absorption
156  * and scattering will block light, represented by the extinction coefficient. */
157
158 /* homogeneous volume: assume shader evaluation at the starts gives
159  * the extinction coefficient for the entire line segment */
160 ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg,
161                                                  ccl_addr_space PathState *state,
162                                                  Ray *ray,
163                                                  ShaderData *sd,
164                                                  float3 *throughput)
165 {
166         float3 sigma_t;
167
168         if(volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
169                 *throughput *= volume_color_transmittance(sigma_t, ray->t);
170 }
171
172 /* heterogeneous volume: integrate stepping through the volume until we
173  * reach the end, get absorbed entirely, or run out of iterations */
174 ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
175                                                    ccl_addr_space PathState *state,
176                                                    Ray *ray,
177                                                    ShaderData *sd,
178                                                    float3 *throughput)
179 {
180         float3 tp = *throughput;
181         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
182
183         /* prepare for stepping */
184         int max_steps = kernel_data.integrator.volume_max_steps;
185         float step = kernel_data.integrator.volume_step_size;
186         float random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * step;
187
188         /* compute extinction at the start */
189         float t = 0.0f;
190
191         float3 sum = make_float3(0.0f, 0.0f, 0.0f);
192
193         for(int i = 0; i < max_steps; i++) {
194                 /* advance to new position */
195                 float new_t = min(ray->t, (i+1) * step);
196                 float dt = new_t - t;
197
198                 /* use random position inside this segment to sample shader */
199                 if(new_t == ray->t)
200                         random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * dt;
201
202                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
203                 float3 sigma_t;
204
205                 /* compute attenuation over segment */
206                 if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
207                         /* Compute expf() only for every Nth step, to save some calculations
208                          * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
209
210                         sum += (-sigma_t * (new_t - t));
211                         if((i & 0x07) == 0) { /* ToDo: Other interval? */
212                                 tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
213
214                                 /* stop if nearly all light is blocked */
215                                 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
216                                         break;
217                         }
218                 }
219
220                 /* stop if at the end of the volume */
221                 t = new_t;
222                 if(t == ray->t) {
223                         /* Update throughput in case we haven't done it above */
224                         tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
225                         break;
226                 }
227         }
228
229         *throughput = tp;
230 }
231
232 /* get the volume attenuation over line segment defined by ray, with the
233  * assumption that there are no surfaces blocking light between the endpoints */
234 ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
235                                               ShaderData *shadow_sd,
236                                               ccl_addr_space PathState *state,
237                                               Ray *ray,
238                                               float3 *throughput)
239 {
240         shader_setup_from_volume(kg, shadow_sd, ray);
241
242         if(volume_stack_is_heterogeneous(kg, state->volume_stack))
243                 kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
244         else
245                 kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
246 }
247
248 #endif /* __VOLUME__ */
249
250 /* Equi-angular sampling as in:
251  * "Importance Sampling Techniques for Path Tracing in Participating Media" */
252
253 ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf)
254 {
255         float t = ray->t;
256
257         float delta = dot((light_P - ray->P) , ray->D);
258         float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
259         if(UNLIKELY(D == 0.0f)) {
260                 *pdf = 0.0f;
261                 return 0.0f;
262         }
263         float theta_a = -atan2f(delta, D);
264         float theta_b = atan2f(t - delta, D);
265         float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
266         if(UNLIKELY(theta_b == theta_a)) {
267                 *pdf = 0.0f;
268                 return 0.0f;
269         }
270         *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
271
272         return min(t, delta + t_); /* min is only for float precision errors */
273 }
274
275 ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
276 {
277         float delta = dot((light_P - ray->P) , ray->D);
278         float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
279         if(UNLIKELY(D == 0.0f)) {
280                 return 0.0f;
281         }
282
283         float t = ray->t;
284         float t_ = sample_t - delta;
285
286         float theta_a = -atan2f(delta, D);
287         float theta_b = atan2f(t - delta, D);
288         if(UNLIKELY(theta_b == theta_a)) {
289                 return 0.0f;
290         }
291
292         float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
293
294         return pdf;
295 }
296
297 /* Distance sampling */
298
299 ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
300 {
301         /* xi is [0, 1[ so log(0) should never happen, division by zero is
302          * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
303         float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
304         float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
305         float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
306
307         float sample_t = min(max_t, -logf(1.0f - xi*(1.0f - sample_transmittance))/sample_sigma_t);
308
309         *transmittance = volume_color_transmittance(sigma_t, sample_t);
310         *pdf = safe_divide_color(sigma_t * *transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
311
312         /* todo: optimization: when taken together with hit/miss decision,
313          * the full_transmittance cancels out drops out and xi does not
314          * need to be remapped */
315
316         return sample_t;
317 }
318
319 ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
320 {
321         float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
322         float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
323
324         return safe_divide_color(sigma_t * transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
325 }
326
327 /* Emission */
328
329 ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff, int closure_flag, float3 transmittance, float t)
330 {
331         /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
332          * this goes to E * t as sigma_t goes to zero
333          *
334          * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
335         float3 emission = coeff->emission;
336
337         if(closure_flag & SD_EXTINCTION) {
338                 float3 sigma_t = coeff->sigma_t;
339
340                 emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t;
341                 emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t;
342                 emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t;
343         }
344         else
345                 emission *= t;
346         
347         return emission;
348 }
349
350 /* Volume Path */
351
352 ccl_device int kernel_volume_sample_channel(float3 albedo, float3 throughput, float rand, float3 *pdf)
353 {
354         /* Sample color channel proportional to throughput and single scattering
355          * albedo, to significantly reduce noise with many bounce, following:
356          *
357          * "Practical and Controllable Subsurface Scattering for Production Path
358          *  Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
359         float3 weights = fabs(throughput * albedo);
360         float sum_weights = weights.x + weights.y + weights.z;
361
362         if(sum_weights > 0.0f) {
363                 *pdf = weights/sum_weights;
364         }
365         else {
366                 *pdf = make_float3(1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f);
367         }
368
369         if(rand < pdf->x) {
370                 return 0;
371         }
372         else if(rand < pdf->x + pdf->y) {
373                 return 1;
374         }
375         else {
376                 return 2;
377         }
378 }
379
380 #ifdef __VOLUME__
381
382 /* homogeneous volume: assume shader evaluation at the start gives
383  * the volume shading coefficient for the entire line segment */
384 ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(
385     KernelGlobals *kg,
386     ccl_addr_space PathState *state,
387     Ray *ray,
388     ShaderData *sd,
389     PathRadiance *L,
390     ccl_addr_space float3 *throughput,
391     bool probalistic_scatter)
392 {
393         VolumeShaderCoefficients coeff;
394
395         if(!volume_shader_sample(kg, sd, state, ray->P, &coeff))
396                 return VOLUME_PATH_MISSED;
397
398         int closure_flag = sd->flag;
399         float t = ray->t;
400         float3 new_tp;
401
402 #ifdef __VOLUME_SCATTER__
403         /* randomly scatter, and if we do t is shortened */
404         if(closure_flag & SD_SCATTER) {
405                 /* Sample channel, use MIS with balance heuristic. */
406                 float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
407                 float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
408                 float3 channel_pdf;
409                 int channel = kernel_volume_sample_channel(albedo, *throughput, rphase, &channel_pdf);
410
411                 /* decide if we will hit or miss */
412                 bool scatter = true;
413                 float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
414
415                 if(probalistic_scatter) {
416                         float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
417                         float sample_transmittance = expf(-sample_sigma_t * t);
418
419                         if(1.0f - xi >= sample_transmittance) {
420                                 scatter = true;
421
422                                 /* rescale random number so we can reuse it */
423                                 xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
424
425                         }
426                         else
427                                 scatter = false;
428                 }
429
430                 if(scatter) {
431                         /* scattering */
432                         float3 pdf;
433                         float3 transmittance;
434                         float sample_t;
435
436                         /* distance sampling */
437                         sample_t = kernel_volume_distance_sample(ray->t, coeff.sigma_t, channel, xi, &transmittance, &pdf);
438
439                         /* modify pdf for hit/miss decision */
440                         if(probalistic_scatter)
441                                 pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(coeff.sigma_t, t);
442
443                         new_tp = *throughput * coeff.sigma_s * transmittance / dot(channel_pdf, pdf);
444                         t = sample_t;
445                 }
446                 else {
447                         /* no scattering */
448                         float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
449                         float pdf = dot(channel_pdf, transmittance);
450                         new_tp = *throughput * transmittance / pdf;
451                 }
452         }
453         else 
454 #endif
455         if(closure_flag & SD_EXTINCTION) {
456                 /* absorption only, no sampling needed */
457                 float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
458                 new_tp = *throughput * transmittance;
459         }
460
461         /* integrate emission attenuated by extinction */
462         if(L && (closure_flag & SD_EMISSION)) {
463                 float3 transmittance = volume_color_transmittance(coeff.sigma_t, ray->t);
464                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t);
465                 path_radiance_accum_emission(L, state, *throughput, emission);
466         }
467
468         /* modify throughput */
469         if(closure_flag & SD_EXTINCTION) {
470                 *throughput = new_tp;
471
472                 /* prepare to scatter to new direction */
473                 if(t < ray->t) {
474                         /* adjust throughput and move to new location */
475                         sd->P = ray->P + t*ray->D;
476
477                         return VOLUME_PATH_SCATTERED;
478                 }
479         }
480
481         return VOLUME_PATH_ATTENUATED;
482 }
483
484 /* heterogeneous volume distance sampling: integrate stepping through the
485  * volume until we reach the end, get absorbed entirely, or run out of
486  * iterations. this does probabilistically scatter or get transmitted through
487  * for path tracing where we don't want to branch. */
488 ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
489     KernelGlobals *kg,
490     ccl_addr_space PathState *state,
491     Ray *ray,
492     ShaderData *sd,
493     PathRadiance *L,
494     ccl_addr_space float3 *throughput)
495 {
496         float3 tp = *throughput;
497         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
498
499         /* prepare for stepping */
500         int max_steps = kernel_data.integrator.volume_max_steps;
501         float step_size = kernel_data.integrator.volume_step_size;
502         float random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * step_size;
503
504         /* compute coefficients at the start */
505         float t = 0.0f;
506         float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
507
508         /* pick random color channel, we use the Veach one-sample
509          * model with balance heuristic for the channels */
510         float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
511         float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
512         bool has_scatter = false;
513
514         for(int i = 0; i < max_steps; i++) {
515                 /* advance to new position */
516                 float new_t = min(ray->t, (i+1) * step_size);
517                 float dt = new_t - t;
518
519                 /* use random position inside this segment to sample shader */
520                 if(new_t == ray->t)
521                         random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * dt;
522
523                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
524                 VolumeShaderCoefficients coeff;
525
526                 /* compute segment */
527                 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
528                         int closure_flag = sd->flag;
529                         float3 new_tp;
530                         float3 transmittance;
531                         bool scatter = false;
532
533                         /* distance sampling */
534 #ifdef __VOLUME_SCATTER__
535                         if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_EXTINCTION))) {
536                                 has_scatter = true;
537
538                                 /* Sample channel, use MIS with balance heuristic. */
539                                 float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
540                                 float3 channel_pdf;
541                                 int channel = kernel_volume_sample_channel(albedo, tp, rphase, &channel_pdf);
542
543                                 /* compute transmittance over full step */
544                                 transmittance = volume_color_transmittance(coeff.sigma_t, dt);
545
546                                 /* decide if we will scatter or continue */
547                                 float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
548
549                                 if(1.0f - xi >= sample_transmittance) {
550                                         /* compute sampling distance */
551                                         float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
552                                         float new_dt = -logf(1.0f - xi)/sample_sigma_t;
553                                         new_t = t + new_dt;
554
555                                         /* transmittance and pdf */
556                                         float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
557                                         float3 pdf = coeff.sigma_t * new_transmittance;
558
559                                         /* throughput */
560                                         new_tp = tp * coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf);
561                                         scatter = true;
562                                 }
563                                 else {
564                                         /* throughput */
565                                         float pdf = dot(channel_pdf, transmittance);
566                                         new_tp = tp * transmittance / pdf;
567
568                                         /* remap xi so we can reuse it and keep thing stratified */
569                                         xi = 1.0f - (1.0f - xi)/sample_transmittance;
570                                 }
571                         }
572                         else 
573 #endif
574                         if(closure_flag & SD_EXTINCTION) {
575                                 /* absorption only, no sampling needed */
576                                 transmittance = volume_color_transmittance(coeff.sigma_t, dt);
577                                 new_tp = tp * transmittance;
578                         }
579
580                         /* integrate emission attenuated by absorption */
581                         if(L && (closure_flag & SD_EMISSION)) {
582                                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
583                                 path_radiance_accum_emission(L, state, tp, emission);
584                         }
585
586                         /* modify throughput */
587                         if(closure_flag & SD_EXTINCTION) {
588                                 tp = new_tp;
589
590                                 /* stop if nearly all light blocked */
591                                 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
592                                         tp = make_float3(0.0f, 0.0f, 0.0f);
593                                         break;
594                                 }
595                         }
596
597                         /* prepare to scatter to new direction */
598                         if(scatter) {
599                                 /* adjust throughput and move to new location */
600                                 sd->P = ray->P + new_t*ray->D;
601                                 *throughput = tp;
602
603                                 return VOLUME_PATH_SCATTERED;
604                         }
605                         else {
606                                 /* accumulate transmittance */
607                                 accum_transmittance *= transmittance;
608                         }
609                 }
610
611                 /* stop if at the end of the volume */
612                 t = new_t;
613                 if(t == ray->t)
614                         break;
615         }
616
617         *throughput = tp;
618
619         return VOLUME_PATH_ATTENUATED;
620 }
621
622 /* get the volume attenuation and emission over line segment defined by
623  * ray, with the assumption that there are no surfaces blocking light
624  * between the endpoints. distance sampling is used to decide if we will
625  * scatter or not. */
626 ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(
627     KernelGlobals *kg,
628     ccl_addr_space PathState *state,
629     ShaderData *sd,
630     Ray *ray,
631     PathRadiance *L,
632     ccl_addr_space float3 *throughput,
633     bool heterogeneous)
634 {
635         shader_setup_from_volume(kg, sd, ray);
636
637         if(heterogeneous)
638                 return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput);
639         else
640                 return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true);
641 }
642
643 #ifndef __SPLIT_KERNEL__
644 /* Decoupled Volume Sampling
645  *
646  * VolumeSegment is list of coefficients and transmittance stored at all steps
647  * through a volume. This can then later be used for decoupled sampling as in:
648  * "Importance Sampling Techniques for Path Tracing in Participating Media"
649  *
650  * On the GPU this is only supported (but currently not enabled)
651  * for homogeneous volumes (1 step), due to
652  * no support for malloc/free and too much stack usage with a fix size array. */
653
654 typedef struct VolumeStep {
655         float3 sigma_s;                         /* scatter coefficient */
656         float3 sigma_t;                         /* extinction coefficient */
657         float3 accum_transmittance;     /* accumulated transmittance including this step */
658         float3 cdf_distance;            /* cumulative density function for distance sampling */
659         float t;                                        /* distance at end of this step */
660         float shade_t;                          /* jittered distance where shading was done in step */
661         int closure_flag;                       /* shader evaluation closure flags */
662 } VolumeStep;
663
664 typedef struct VolumeSegment {
665         VolumeStep stack_step;      /* stack storage for homogeneous step, to avoid malloc */
666         VolumeStep *steps;                      /* recorded steps */
667         int numsteps;                           /* number of steps */
668         int closure_flag;                       /* accumulated closure flags from all steps */
669
670         float3 accum_emission;          /* accumulated emission at end of segment */
671         float3 accum_transmittance;     /* accumulated transmittance at end of segment */
672         float3 accum_albedo;        /* accumulated average albedo over segment */
673
674         int sampling_method;            /* volume sampling method */
675 } VolumeSegment;
676
677 /* record volume steps to the end of the volume.
678  *
679  * it would be nice if we could only record up to the point that we need to scatter,
680  * but the entire segment is needed to do always scattering, rather than probabilistically
681  * hitting or missing the volume. if we don't know the transmittance at the end of the
682  * volume we can't generate stratified distance samples up to that transmittance */
683 #ifdef __VOLUME_DECOUPLED__
684 ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
685         Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
686 {
687         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
688
689         /* prepare for volume stepping */
690         int max_steps;
691         float step_size, random_jitter_offset;
692
693         if(heterogeneous) {
694                 const int global_max_steps = kernel_data.integrator.volume_max_steps;
695                 step_size = kernel_data.integrator.volume_step_size;
696                 /* compute exact steps in advance for malloc */
697                 if(ray->t > global_max_steps*step_size) {
698                         max_steps = global_max_steps;
699                         step_size = ray->t / (float)max_steps;
700                 }
701                 else {
702                         max_steps = max((int)ceilf(ray->t/step_size), 1);
703                 }
704 #ifdef __KERNEL_CPU__
705                 /* NOTE: For the branched path tracing it's possible to have direct
706                  * and indirect light integration both having volume segments allocated.
707                  * We detect this using index in the pre-allocated memory. Currently we
708                  * only support two segments allocated at a time, if more needed some
709                  * modifications to the KernelGlobals will be needed.
710                  *
711                  * This gives us restrictions that decoupled record should only happen
712                  * in the stack manner, meaning if there's subsequent call of decoupled
713                  * record it'll need to free memory before it's caller frees memory.
714                  */
715                 const int index = kg->decoupled_volume_steps_index;
716                 assert(index < sizeof(kg->decoupled_volume_steps) /
717                                sizeof(*kg->decoupled_volume_steps));
718                 if(kg->decoupled_volume_steps[index] == NULL) {
719                         kg->decoupled_volume_steps[index] =
720                                 (VolumeStep*)malloc(sizeof(VolumeStep)*global_max_steps);
721                 }
722                 segment->steps = kg->decoupled_volume_steps[index];
723                 ++kg->decoupled_volume_steps_index;
724 #else
725                 segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
726 #endif
727                 random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
728         }
729         else {
730                 max_steps = 1;
731                 step_size = ray->t;
732                 random_jitter_offset = 0.0f;
733                 segment->steps = &segment->stack_step;
734         }
735         
736         /* init accumulation variables */
737         float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
738         float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
739         float3 accum_albedo = make_float3(0.0f, 0.0f, 0.0f);
740         float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
741         float t = 0.0f;
742
743         segment->numsteps = 0;
744         segment->closure_flag = 0;
745         bool is_last_step_empty = false;
746
747         VolumeStep *step = segment->steps;
748
749         for(int i = 0; i < max_steps; i++, step++) {
750                 /* advance to new position */
751                 float new_t = min(ray->t, (i+1) * step_size);
752                 float dt = new_t - t;
753
754                 /* use random position inside this segment to sample shader */
755                 if(heterogeneous && new_t == ray->t)
756                         random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
757
758                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
759                 VolumeShaderCoefficients coeff;
760
761                 /* compute segment */
762                 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
763                         int closure_flag = sd->flag;
764                         float3 sigma_t = coeff.sigma_t;
765
766                         /* compute average albedo for channel sampling */
767                         if(closure_flag & SD_SCATTER) {
768                                 accum_albedo += dt * safe_divide_color(coeff.sigma_s, sigma_t);
769                         }
770
771                         /* compute accumulated transmittance */
772                         float3 transmittance = volume_color_transmittance(sigma_t, dt);
773
774                         /* compute emission attenuated by absorption */
775                         if(closure_flag & SD_EMISSION) {
776                                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
777                                 accum_emission += accum_transmittance * emission;
778                         }
779
780                         accum_transmittance *= transmittance;
781
782                         /* compute pdf for distance sampling */
783                         float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
784                         cdf_distance = cdf_distance + pdf_distance;
785
786                         /* write step data */
787                         step->sigma_t = sigma_t;
788                         step->sigma_s = coeff.sigma_s;
789                         step->closure_flag = closure_flag;
790
791                         segment->closure_flag |= closure_flag;
792
793                         is_last_step_empty = false;
794                         segment->numsteps++;
795                 }
796                 else {
797                         if(is_last_step_empty) {
798                                 /* consecutive empty step, merge */
799                                 step--;
800                         }
801                         else {
802                                 /* store empty step */
803                                 step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
804                                 step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
805                                 step->closure_flag = 0;
806
807                                 segment->numsteps++;
808                                 is_last_step_empty = true;
809                         }
810                 }
811
812                 step->accum_transmittance = accum_transmittance;
813                 step->cdf_distance = cdf_distance;
814                 step->t = new_t;
815                 step->shade_t = t + random_jitter_offset;
816
817                 /* stop if at the end of the volume */
818                 t = new_t;
819                 if(t == ray->t)
820                         break;
821
822                 /* stop if nearly all light blocked */
823                 if(accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps && accum_transmittance.z < tp_eps)
824                         break;
825         }
826
827         /* store total emission and transmittance */
828         segment->accum_emission = accum_emission;
829         segment->accum_transmittance = accum_transmittance;
830         segment->accum_albedo = accum_albedo;
831
832         /* normalize cumulative density function for distance sampling */
833         VolumeStep *last_step = segment->steps + segment->numsteps - 1;
834
835         if(!is_zero(last_step->cdf_distance)) {
836                 VolumeStep *step = &segment->steps[0];
837                 int numsteps = segment->numsteps;
838                 float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
839
840                 for(int i = 0; i < numsteps; i++, step++)
841                         step->cdf_distance *= inv_cdf_distance_sum;
842         }
843 }
844
845 ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
846 {
847         if(segment->steps != &segment->stack_step) {
848 #ifdef __KERNEL_CPU__
849                 /* NOTE: We only allow free last allocated segment.
850                  * No random order of alloc/free is supported.
851                  */
852                 assert(kg->decoupled_volume_steps_index > 0);
853                 assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
854                 --kg->decoupled_volume_steps_index;
855 #else
856                 free(segment->steps);
857 #endif
858         }
859 }
860 #endif  /* __VOLUME_DECOUPLED__ */
861
862 /* scattering for homogeneous and heterogeneous volumes, using decoupled ray
863  * marching.
864  *
865  * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */
866 ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
867         KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
868         float3 *throughput, float rphase, float rscatter,
869         const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter)
870 {
871         kernel_assert(segment->closure_flag & SD_SCATTER);
872
873         /* Sample color channel, use MIS with balance heuristic. */
874         float3 channel_pdf;
875         int channel = kernel_volume_sample_channel(segment->accum_albedo,
876                                                    *throughput,
877                                                    rphase,
878                                                    &channel_pdf);
879
880         float xi = rscatter;
881
882         /* probabilistic scattering decision based on transmittance */
883         if(probalistic_scatter) {
884                 float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
885
886                 if(1.0f - xi >= sample_transmittance) {
887                         /* rescale random number so we can reuse it */
888                         xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
889                 }
890                 else {
891                         *throughput /= sample_transmittance;
892                         return VOLUME_PATH_MISSED;
893                 }
894         }
895
896         VolumeStep *step;
897         float3 transmittance;
898         float pdf, sample_t;
899         float mis_weight = 1.0f;
900         bool distance_sample = true;
901         bool use_mis = false;
902
903         if(segment->sampling_method && light_P) {
904                 if(segment->sampling_method == SD_VOLUME_MIS) {
905                         /* multiple importance sample: randomly pick between
906                          * equiangular and distance sampling strategy */
907                         if(xi < 0.5f) {
908                                 xi *= 2.0f;
909                         }
910                         else {
911                                 xi = (xi - 0.5f)*2.0f;
912                                 distance_sample = false;
913                         }
914
915                         use_mis = true;
916                 }
917                 else {
918                         /* only equiangular sampling */
919                         distance_sample = false;
920                 }
921         }
922
923         /* distance sampling */
924         if(distance_sample) {
925                 /* find step in cdf */
926                 step = segment->steps;
927
928                 float prev_t = 0.0f;
929                 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
930
931                 if(segment->numsteps > 1) {
932                         float prev_cdf = 0.0f;
933                         float step_cdf = 1.0f;
934                         float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
935
936                         for(int i = 0; ; i++, step++) {
937                                 /* todo: optimize using binary search */
938                                 step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
939
940                                 if(xi < step_cdf || i == segment->numsteps-1)
941                                         break;
942
943                                 prev_cdf = step_cdf;
944                                 prev_t = step->t;
945                                 prev_cdf_distance = step->cdf_distance;
946                         }
947
948                         /* remap xi so we can reuse it */
949                         xi = (xi - prev_cdf)/(step_cdf - prev_cdf);
950
951                         /* pdf for picking step */
952                         step_pdf_distance = step->cdf_distance - prev_cdf_distance;
953                 }
954
955                 /* determine range in which we will sample */
956                 float step_t = step->t - prev_t;
957
958                 /* sample distance and compute transmittance */
959                 float3 distance_pdf;
960                 sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
961
962                 /* modify pdf for hit/miss decision */
963                 if(probalistic_scatter)
964                         distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
965
966                 pdf = dot(channel_pdf, distance_pdf * step_pdf_distance);
967
968                 /* multiple importance sampling */
969                 if(use_mis) {
970                         float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
971                         mis_weight = 2.0f*power_heuristic(pdf, equi_pdf);
972                 }
973         }
974         /* equi-angular sampling */
975         else {
976                 /* sample distance */
977                 sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
978
979                 /* find step in which sampled distance is located */
980                 step = segment->steps;
981
982                 float prev_t = 0.0f;
983                 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
984
985                 if(segment->numsteps > 1) {
986                         float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
987
988                         int numsteps = segment->numsteps;
989                         int high = numsteps - 1;
990                         int low = 0;
991                         int mid;
992
993                         while(low < high) {
994                                 mid = (low + high) >> 1;
995
996                                 if(sample_t < step[mid].t)
997                                         high = mid;
998                                 else if(sample_t >= step[mid + 1].t)
999                                         low = mid + 1;
1000                                 else {
1001                                         /* found our interval in step[mid] .. step[mid+1] */
1002                                         prev_t = step[mid].t;
1003                                         prev_cdf_distance = step[mid].cdf_distance;
1004                                         step += mid+1;
1005                                         break;
1006                                 }
1007                         }
1008
1009                         if(low >= numsteps - 1) {
1010                                 prev_t = step[numsteps - 1].t;
1011                                 prev_cdf_distance = step[numsteps-1].cdf_distance;
1012                                 step += numsteps - 1;
1013                         }
1014
1015                         /* pdf for picking step with distance sampling */
1016                         step_pdf_distance = step->cdf_distance - prev_cdf_distance;
1017                 }
1018
1019                 /* determine range in which we will sample */
1020                 float step_t = step->t - prev_t;
1021                 float step_sample_t = sample_t - prev_t;
1022
1023                 /* compute transmittance */
1024                 transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
1025
1026                 /* multiple importance sampling */
1027                 if(use_mis) {
1028                         float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
1029                         float distance_pdf = dot(channel_pdf, distance_pdf3 * step_pdf_distance);
1030                         mis_weight = 2.0f*power_heuristic(pdf, distance_pdf);
1031                 }
1032         }
1033         if(sample_t < 0.0f || pdf == 0.0f) {
1034                 return VOLUME_PATH_MISSED;
1035         }
1036
1037         /* compute transmittance up to this step */
1038         if(step != segment->steps)
1039                 transmittance *= (step-1)->accum_transmittance;
1040
1041         /* modify throughput */
1042         *throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
1043
1044         /* evaluate shader to create closures at shading point */
1045         if(segment->numsteps > 1) {
1046                 sd->P = ray->P + step->shade_t*ray->D;
1047
1048                 VolumeShaderCoefficients coeff;
1049                 volume_shader_sample(kg, sd, state, sd->P, &coeff);
1050         }
1051
1052         /* move to new position */
1053         sd->P = ray->P + sample_t*ray->D;
1054
1055         return VOLUME_PATH_SCATTERED;
1056 }
1057 #endif /* __SPLIT_KERNEL */
1058
1059 /* decide if we need to use decoupled or not */
1060 ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method)
1061 {
1062         /* decoupled ray marching for heterogeneous volumes not supported on the GPU,
1063          * which also means equiangular and multiple importance sampling is not
1064          * support for that case */
1065         if(!kernel_data.integrator.volume_decoupled)
1066                 return false;
1067
1068 #ifdef __KERNEL_GPU__
1069         if(heterogeneous)
1070                 return false;
1071 #endif
1072
1073         /* equiangular and multiple importance sampling only implemented for decoupled */
1074         if(sampling_method != 0)
1075                 return true;
1076
1077         /* for all light sampling use decoupled, reusing shader evaluations is
1078          * typically faster in that case */
1079         if(direct)
1080                 return kernel_data.integrator.sample_all_lights_direct;
1081         else
1082                 return kernel_data.integrator.sample_all_lights_indirect;
1083 }
1084
1085 /* Volume Stack
1086  *
1087  * This is an array of object/shared ID's that the current segment of the path
1088  * is inside of. */
1089
1090 ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
1091                                          ShaderData *stack_sd,
1092                                          ccl_addr_space const PathState *state,
1093                                          ccl_addr_space const Ray *ray,
1094                                          ccl_addr_space VolumeStack *stack)
1095 {
1096         /* NULL ray happens in the baker, does it need proper initialization of
1097          * camera in volume?
1098          */
1099         if(!kernel_data.cam.is_inside_volume || ray == NULL) {
1100                 /* Camera is guaranteed to be in the air, only take background volume
1101                  * into account in this case.
1102                  */
1103                 if(kernel_data.background.volume_shader != SHADER_NONE) {
1104                         stack[0].shader = kernel_data.background.volume_shader;
1105                         stack[0].object = PRIM_NONE;
1106                         stack[1].shader = SHADER_NONE;
1107                 }
1108                 else {
1109                         stack[0].shader = SHADER_NONE;
1110                 }
1111                 return;
1112         }
1113
1114         kernel_assert(state->flag & PATH_RAY_CAMERA);
1115
1116         Ray volume_ray = *ray;
1117         volume_ray.t = FLT_MAX;
1118
1119         const uint visibility = (state->flag & PATH_RAY_ALL_VISIBILITY);
1120         int stack_index = 0, enclosed_index = 0;
1121
1122 #ifdef __VOLUME_RECORD_ALL__
1123         Intersection hits[2*VOLUME_STACK_SIZE + 1];
1124         uint num_hits = scene_intersect_volume_all(kg,
1125                                                    &volume_ray,
1126                                                    hits,
1127                                                    2*VOLUME_STACK_SIZE,
1128                                                    visibility);
1129         if(num_hits > 0) {
1130                 int enclosed_volumes[VOLUME_STACK_SIZE];
1131                 Intersection *isect = hits;
1132
1133                 qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1134
1135                 for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
1136                         shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
1137                         if(stack_sd->flag & SD_BACKFACING) {
1138                                 bool need_add = true;
1139                                 for(int i = 0; i < enclosed_index && need_add; ++i) {
1140                                         /* If ray exited the volume and never entered to that volume
1141                                          * it means that camera is inside such a volume.
1142                                          */
1143                                         if(enclosed_volumes[i] == stack_sd->object) {
1144                                                 need_add = false;
1145                                         }
1146                                 }
1147                                 for(int i = 0; i < stack_index && need_add; ++i) {
1148                                         /* Don't add intersections twice. */
1149                                         if(stack[i].object == stack_sd->object) {
1150                                                 need_add = false;
1151                                                 break;
1152                                         }
1153                                 }
1154                                 if(need_add) {
1155                                         stack[stack_index].object = stack_sd->object;
1156                                         stack[stack_index].shader = stack_sd->shader;
1157                                         ++stack_index;
1158                                 }
1159                         }
1160                         else {
1161                                 /* If ray from camera enters the volume, this volume shouldn't
1162                                  * be added to the stack on exit.
1163                                  */
1164                                 enclosed_volumes[enclosed_index++] = stack_sd->object;
1165                         }
1166                 }
1167         }
1168 #else
1169         int enclosed_volumes[VOLUME_STACK_SIZE];
1170         int step = 0;
1171
1172         while(stack_index < VOLUME_STACK_SIZE - 1 &&
1173               enclosed_index < VOLUME_STACK_SIZE - 1 &&
1174               step < 2 * VOLUME_STACK_SIZE)
1175         {
1176                 Intersection isect;
1177                 if(!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
1178                         break;
1179                 }
1180
1181                 shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
1182                 if(stack_sd->flag & SD_BACKFACING) {
1183                         /* If ray exited the volume and never entered to that volume
1184                          * it means that camera is inside such a volume.
1185                          */
1186                         bool need_add = true;
1187                         for(int i = 0; i < enclosed_index && need_add; ++i) {
1188                                 /* If ray exited the volume and never entered to that volume
1189                                  * it means that camera is inside such a volume.
1190                                  */
1191                                 if(enclosed_volumes[i] == stack_sd->object) {
1192                                         need_add = false;
1193                                 }
1194                         }
1195                         for(int i = 0; i < stack_index && need_add; ++i) {
1196                                 /* Don't add intersections twice. */
1197                                 if(stack[i].object == stack_sd->object) {
1198                                         need_add = false;
1199                                         break;
1200                                 }
1201                         }
1202                         if(need_add) {
1203                                 stack[stack_index].object = stack_sd->object;
1204                                 stack[stack_index].shader = stack_sd->shader;
1205                                 ++stack_index;
1206                         }
1207                 }
1208                 else {
1209                         /* If ray from camera enters the volume, this volume shouldn't
1210                          * be added to the stack on exit.
1211                          */
1212                         enclosed_volumes[enclosed_index++] = stack_sd->object;
1213                 }
1214
1215                 /* Move ray forward. */
1216                 volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
1217                 ++step;
1218         }
1219 #endif
1220         /* stack_index of 0 means quick checks outside of the kernel gave false
1221          * positive, nothing to worry about, just we've wasted quite a few of
1222          * ticks just to come into conclusion that camera is in the air.
1223          *
1224          * In this case we're doing the same above -- check whether background has
1225          * volume.
1226          */
1227         if(stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
1228                 stack[0].shader = kernel_data.background.volume_shader;
1229                 stack[0].object = PRIM_NONE;
1230                 stack[1].shader = SHADER_NONE;
1231         }
1232         else {
1233                 stack[stack_index].shader = SHADER_NONE;
1234         }
1235 }
1236
1237 ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, ccl_addr_space VolumeStack *stack)
1238 {
1239         /* todo: we should have some way for objects to indicate if they want the
1240          * world shader to work inside them. excluding it by default is problematic
1241          * because non-volume objects can't be assumed to be closed manifolds */
1242
1243         if(!(sd->flag & SD_HAS_VOLUME))
1244                 return;
1245         
1246         if(sd->flag & SD_BACKFACING) {
1247                 /* exit volume object: remove from stack */
1248                 for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1249                         if(stack[i].object == sd->object) {
1250                                 /* shift back next stack entries */
1251                                 do {
1252                                         stack[i] = stack[i+1];
1253                                         i++;
1254                                 }
1255                                 while(stack[i].shader != SHADER_NONE);
1256
1257                                 return;
1258                         }
1259                 }
1260         }
1261         else {
1262                 /* enter volume object: add to stack */
1263                 int i;
1264
1265                 for(i = 0; stack[i].shader != SHADER_NONE; i++) {
1266                         /* already in the stack? then we have nothing to do */
1267                         if(stack[i].object == sd->object)
1268                                 return;
1269                 }
1270
1271                 /* if we exceed the stack limit, ignore */
1272                 if(i >= VOLUME_STACK_SIZE-1)
1273                         return;
1274
1275                 /* add to the end of the stack */
1276                 stack[i].shader = sd->shader;
1277                 stack[i].object = sd->object;
1278                 stack[i+1].shader = SHADER_NONE;
1279         }
1280 }
1281
1282 #ifdef __SUBSURFACE__
1283 ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
1284                                                           ShaderData *stack_sd,
1285                                                           Ray *ray,
1286                                                           ccl_addr_space VolumeStack *stack)
1287 {
1288         kernel_assert(kernel_data.integrator.use_volumes);
1289
1290         Ray volume_ray = *ray;
1291
1292 #  ifdef __VOLUME_RECORD_ALL__
1293         Intersection hits[2*VOLUME_STACK_SIZE + 1];
1294         uint num_hits = scene_intersect_volume_all(kg,
1295                                                    &volume_ray,
1296                                                    hits,
1297                                                    2*VOLUME_STACK_SIZE,
1298                                                    PATH_RAY_ALL_VISIBILITY);
1299         if(num_hits > 0) {
1300                 Intersection *isect = hits;
1301
1302                 qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1303
1304                 for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
1305                         shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
1306                         kernel_volume_stack_enter_exit(kg, stack_sd, stack);
1307                 }
1308         }
1309 #  else
1310         Intersection isect;
1311         int step = 0;
1312         float3 Pend = ray->P + ray->D*ray->t;
1313         while(step < 2 * VOLUME_STACK_SIZE &&
1314               scene_intersect_volume(kg,
1315                                      &volume_ray,
1316                                      &isect,
1317                                      PATH_RAY_ALL_VISIBILITY))
1318         {
1319                 shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
1320                 kernel_volume_stack_enter_exit(kg, stack_sd, stack);
1321
1322                 /* Move ray forward. */
1323                 volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
1324                 if(volume_ray.t != FLT_MAX) {
1325                         volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t);
1326                 }
1327                 ++step;
1328         }
1329 #  endif
1330 }
1331 #endif
1332
1333 /* Clean stack after the last bounce.
1334  *
1335  * It is expected that all volumes are closed manifolds, so at the time when ray
1336  * hits nothing (for example, it is a last bounce which goes to environment) the
1337  * only expected volume in the stack is the world's one. All the rest volume
1338  * entries should have been exited already.
1339  *
1340  * This isn't always true because of ray intersection precision issues, which
1341  * could lead us to an infinite non-world volume in the stack, causing render
1342  * artifacts.
1343  *
1344  * Use this function after the last bounce to get rid of all volumes apart from
1345  * the world's one after the last bounce to avoid render artifacts.
1346  */
1347 ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
1348                                                  ccl_addr_space VolumeStack *volume_stack)
1349 {
1350         if(kernel_data.background.volume_shader != SHADER_NONE) {
1351                 /* Keep the world's volume in stack. */
1352                 volume_stack[1].shader = SHADER_NONE;
1353         }
1354         else {
1355                 volume_stack[0].shader = SHADER_NONE;
1356         }
1357 }
1358
1359 #endif /* __VOLUME__ */
1360
1361 CCL_NAMESPACE_END