Cycles: better path termination for transparency.
[blender.git] / intern / cycles / kernel / kernel_volume.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Events for probalistic scattering */
20
21 typedef enum VolumeIntegrateResult {
22         VOLUME_PATH_SCATTERED = 0,
23         VOLUME_PATH_ATTENUATED = 1,
24         VOLUME_PATH_MISSED = 2
25 } VolumeIntegrateResult;
26
27 /* Volume shader properties
28  *
29  * extinction coefficient = absorption coefficient + scattering coefficient
30  * sigma_t = sigma_a + sigma_s */
31
32 typedef struct VolumeShaderCoefficients {
33         float3 sigma_t;
34         float3 sigma_s;
35         float3 emission;
36 } VolumeShaderCoefficients;
37
38 #ifdef __VOLUME__
39
40 /* evaluate shader to get extinction coefficient at P */
41 ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
42                                                        ShaderData *sd,
43                                                        ccl_addr_space PathState *state,
44                                                        float3 P,
45                                                        float3 *extinction)
46 {
47         sd->P = P;
48         shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW);
49
50         if(sd->flag & SD_EXTINCTION) {
51                 *extinction = sd->closure_transparent_extinction;
52                 return true;
53         }
54         else {
55                 return false;
56         }
57 }
58
59 /* evaluate shader to get absorption, scattering and emission at P */
60 ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
61                                             ShaderData *sd,
62                                             ccl_addr_space PathState *state,
63                                             float3 P,
64                                             VolumeShaderCoefficients *coeff)
65 {
66         sd->P = P;
67         shader_eval_volume(kg, sd, state, state->volume_stack, state->flag);
68
69         if(!(sd->flag & (SD_EXTINCTION|SD_SCATTER|SD_EMISSION)))
70                 return false;
71         
72         coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
73         coeff->sigma_t = (sd->flag & SD_EXTINCTION)? sd->closure_transparent_extinction:
74                                                      make_float3(0.0f, 0.0f, 0.0f);
75         coeff->emission = (sd->flag & SD_EMISSION)? sd->closure_emission_background:
76                                                     make_float3(0.0f, 0.0f, 0.0f);
77
78         if(sd->flag & SD_SCATTER) {
79                 for(int i = 0; i < sd->num_closure; i++) {
80                         const ShaderClosure *sc = &sd->closure[i];
81
82                         if(CLOSURE_IS_VOLUME(sc->type))
83                                 coeff->sigma_s += sc->weight;
84                 }
85         }
86
87         return true;
88 }
89
90 #endif /* __VOLUME__ */
91
92 ccl_device float3 volume_color_transmittance(float3 sigma, float t)
93 {
94         return make_float3(expf(-sigma.x * t), expf(-sigma.y * t), expf(-sigma.z * t));
95 }
96
97 ccl_device float kernel_volume_channel_get(float3 value, int channel)
98 {
99         return (channel == 0)? value.x: ((channel == 1)? value.y: value.z);
100 }
101
102 #ifdef __VOLUME__
103
104 ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
105 {
106         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
107                 int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
108
109                 if(shader_flag & SD_HETEROGENEOUS_VOLUME)
110                         return true;
111         }
112
113         return false;
114 }
115
116 ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack)
117 {
118         if(kernel_data.integrator.num_all_lights == 0)
119                 return 0;
120
121         int method = -1;
122
123         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
124                 int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
125
126                 if(shader_flag & SD_VOLUME_MIS) {
127                         return SD_VOLUME_MIS;
128                 }
129                 else if(shader_flag & SD_VOLUME_EQUIANGULAR) {
130                         if(method == 0)
131                                 return SD_VOLUME_MIS;
132
133                         method = SD_VOLUME_EQUIANGULAR;
134                 }
135                 else {
136                         if(method == SD_VOLUME_EQUIANGULAR)
137                                 return SD_VOLUME_MIS;
138
139                         method = 0;
140                 }
141         }
142
143         return method;
144 }
145
146 /* Volume Shadows
147  *
148  * These functions are used to attenuate shadow rays to lights. Both absorption
149  * and scattering will block light, represented by the extinction coefficient. */
150
151 /* homogeneous volume: assume shader evaluation at the starts gives
152  * the extinction coefficient for the entire line segment */
153 ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg,
154                                                  ccl_addr_space PathState *state,
155                                                  Ray *ray,
156                                                  ShaderData *sd,
157                                                  float3 *throughput)
158 {
159         float3 sigma_t;
160
161         if(volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
162                 *throughput *= volume_color_transmittance(sigma_t, ray->t);
163 }
164
165 /* heterogeneous volume: integrate stepping through the volume until we
166  * reach the end, get absorbed entirely, or run out of iterations */
167 ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
168                                                    ccl_addr_space PathState *state,
169                                                    Ray *ray,
170                                                    ShaderData *sd,
171                                                    float3 *throughput)
172 {
173         float3 tp = *throughput;
174         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
175
176         /* prepare for stepping */
177         int max_steps = kernel_data.integrator.volume_max_steps;
178         float step = kernel_data.integrator.volume_step_size;
179         float random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * step;
180
181         /* compute extinction at the start */
182         float t = 0.0f;
183
184         float3 sum = make_float3(0.0f, 0.0f, 0.0f);
185
186         for(int i = 0; i < max_steps; i++) {
187                 /* advance to new position */
188                 float new_t = min(ray->t, (i+1) * step);
189                 float dt = new_t - t;
190
191                 /* use random position inside this segment to sample shader */
192                 if(new_t == ray->t)
193                         random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * dt;
194
195                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
196                 float3 sigma_t;
197
198                 /* compute attenuation over segment */
199                 if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
200                         /* Compute expf() only for every Nth step, to save some calculations
201                          * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
202
203                         sum += (-sigma_t * (new_t - t));
204                         if((i & 0x07) == 0) { /* ToDo: Other interval? */
205                                 tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
206
207                                 /* stop if nearly all light is blocked */
208                                 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
209                                         break;
210                         }
211                 }
212
213                 /* stop if at the end of the volume */
214                 t = new_t;
215                 if(t == ray->t) {
216                         /* Update throughput in case we haven't done it above */
217                         tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
218                         break;
219                 }
220         }
221
222         *throughput = tp;
223 }
224
225 /* get the volume attenuation over line segment defined by ray, with the
226  * assumption that there are no surfaces blocking light between the endpoints */
227 ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
228                                               ShaderData *shadow_sd,
229                                               ccl_addr_space PathState *state,
230                                               Ray *ray,
231                                               float3 *throughput)
232 {
233         shader_setup_from_volume(kg, shadow_sd, ray);
234
235         if(volume_stack_is_heterogeneous(kg, state->volume_stack))
236                 kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
237         else
238                 kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
239 }
240
241 #endif /* __VOLUME__ */
242
243 /* Equi-angular sampling as in:
244  * "Importance Sampling Techniques for Path Tracing in Participating Media" */
245
246 ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf)
247 {
248         float t = ray->t;
249
250         float delta = dot((light_P - ray->P) , ray->D);
251         float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
252         if(UNLIKELY(D == 0.0f)) {
253                 *pdf = 0.0f;
254                 return 0.0f;
255         }
256         float theta_a = -atan2f(delta, D);
257         float theta_b = atan2f(t - delta, D);
258         float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
259         if(UNLIKELY(theta_b == theta_a)) {
260                 *pdf = 0.0f;
261                 return 0.0f;
262         }
263         *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
264
265         return min(t, delta + t_); /* min is only for float precision errors */
266 }
267
268 ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
269 {
270         float delta = dot((light_P - ray->P) , ray->D);
271         float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
272         if(UNLIKELY(D == 0.0f)) {
273                 return 0.0f;
274         }
275
276         float t = ray->t;
277         float t_ = sample_t - delta;
278
279         float theta_a = -atan2f(delta, D);
280         float theta_b = atan2f(t - delta, D);
281         if(UNLIKELY(theta_b == theta_a)) {
282                 return 0.0f;
283         }
284
285         float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
286
287         return pdf;
288 }
289
290 /* Distance sampling */
291
292 ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
293 {
294         /* xi is [0, 1[ so log(0) should never happen, division by zero is
295          * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
296         float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
297         float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
298         float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
299
300         float sample_t = min(max_t, -logf(1.0f - xi*(1.0f - sample_transmittance))/sample_sigma_t);
301
302         *transmittance = volume_color_transmittance(sigma_t, sample_t);
303         *pdf = safe_divide_color(sigma_t * *transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
304
305         /* todo: optimization: when taken together with hit/miss decision,
306          * the full_transmittance cancels out drops out and xi does not
307          * need to be remapped */
308
309         return sample_t;
310 }
311
312 ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
313 {
314         float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
315         float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
316
317         return safe_divide_color(sigma_t * transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
318 }
319
320 /* Emission */
321
322 ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff, int closure_flag, float3 transmittance, float t)
323 {
324         /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
325          * this goes to E * t as sigma_t goes to zero
326          *
327          * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
328         float3 emission = coeff->emission;
329
330         if(closure_flag & SD_EXTINCTION) {
331                 float3 sigma_t = coeff->sigma_t;
332
333                 emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t;
334                 emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t;
335                 emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t;
336         }
337         else
338                 emission *= t;
339         
340         return emission;
341 }
342
343 /* Volume Path */
344
345 ccl_device int kernel_volume_sample_channel(float3 albedo, float3 throughput, float rand, float3 *pdf)
346 {
347         /* Sample color channel proportional to throughput and single scattering
348          * albedo, to significantly reduce noise with many bounce, following:
349          *
350          * "Practical and Controllable Subsurface Scattering for Production Path
351          *  Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
352         float3 weights = fabs(throughput * albedo);
353         float sum_weights = weights.x + weights.y + weights.z;
354
355         if(sum_weights > 0.0f) {
356                 *pdf = weights/sum_weights;
357         }
358         else {
359                 *pdf = make_float3(1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f);
360         }
361
362         if(rand < pdf->x) {
363                 return 0;
364         }
365         else if(rand < pdf->x + pdf->y) {
366                 return 1;
367         }
368         else {
369                 return 2;
370         }
371 }
372
373 #ifdef __VOLUME__
374
375 /* homogeneous volume: assume shader evaluation at the start gives
376  * the volume shading coefficient for the entire line segment */
377 ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(
378     KernelGlobals *kg,
379     ccl_addr_space PathState *state,
380     Ray *ray,
381     ShaderData *sd,
382     PathRadiance *L,
383     ccl_addr_space float3 *throughput,
384     bool probalistic_scatter)
385 {
386         VolumeShaderCoefficients coeff;
387
388         if(!volume_shader_sample(kg, sd, state, ray->P, &coeff))
389                 return VOLUME_PATH_MISSED;
390
391         int closure_flag = sd->flag;
392         float t = ray->t;
393         float3 new_tp;
394
395 #ifdef __VOLUME_SCATTER__
396         /* randomly scatter, and if we do t is shortened */
397         if(closure_flag & SD_SCATTER) {
398                 /* Sample channel, use MIS with balance heuristic. */
399                 float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
400                 float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
401                 float3 channel_pdf;
402                 int channel = kernel_volume_sample_channel(albedo, *throughput, rphase, &channel_pdf);
403
404                 /* decide if we will hit or miss */
405                 bool scatter = true;
406                 float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
407
408                 if(probalistic_scatter) {
409                         float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
410                         float sample_transmittance = expf(-sample_sigma_t * t);
411
412                         if(1.0f - xi >= sample_transmittance) {
413                                 scatter = true;
414
415                                 /* rescale random number so we can reuse it */
416                                 xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
417
418                         }
419                         else
420                                 scatter = false;
421                 }
422
423                 if(scatter) {
424                         /* scattering */
425                         float3 pdf;
426                         float3 transmittance;
427                         float sample_t;
428
429                         /* distance sampling */
430                         sample_t = kernel_volume_distance_sample(ray->t, coeff.sigma_t, channel, xi, &transmittance, &pdf);
431
432                         /* modify pdf for hit/miss decision */
433                         if(probalistic_scatter)
434                                 pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(coeff.sigma_t, t);
435
436                         new_tp = *throughput * coeff.sigma_s * transmittance / dot(channel_pdf, pdf);
437                         t = sample_t;
438                 }
439                 else {
440                         /* no scattering */
441                         float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
442                         float pdf = dot(channel_pdf, transmittance);
443                         new_tp = *throughput * transmittance / pdf;
444                 }
445         }
446         else 
447 #endif
448         if(closure_flag & SD_EXTINCTION) {
449                 /* absorption only, no sampling needed */
450                 float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
451                 new_tp = *throughput * transmittance;
452         }
453
454         /* integrate emission attenuated by extinction */
455         if(L && (closure_flag & SD_EMISSION)) {
456                 float3 transmittance = volume_color_transmittance(coeff.sigma_t, ray->t);
457                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t);
458                 path_radiance_accum_emission(L, state, *throughput, emission);
459         }
460
461         /* modify throughput */
462         if(closure_flag & SD_EXTINCTION) {
463                 *throughput = new_tp;
464
465                 /* prepare to scatter to new direction */
466                 if(t < ray->t) {
467                         /* adjust throughput and move to new location */
468                         sd->P = ray->P + t*ray->D;
469
470                         return VOLUME_PATH_SCATTERED;
471                 }
472         }
473
474         return VOLUME_PATH_ATTENUATED;
475 }
476
477 /* heterogeneous volume distance sampling: integrate stepping through the
478  * volume until we reach the end, get absorbed entirely, or run out of
479  * iterations. this does probabilistically scatter or get transmitted through
480  * for path tracing where we don't want to branch. */
481 ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
482     KernelGlobals *kg,
483     ccl_addr_space PathState *state,
484     Ray *ray,
485     ShaderData *sd,
486     PathRadiance *L,
487     ccl_addr_space float3 *throughput)
488 {
489         float3 tp = *throughput;
490         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
491
492         /* prepare for stepping */
493         int max_steps = kernel_data.integrator.volume_max_steps;
494         float step_size = kernel_data.integrator.volume_step_size;
495         float random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * step_size;
496
497         /* compute coefficients at the start */
498         float t = 0.0f;
499         float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
500
501         /* pick random color channel, we use the Veach one-sample
502          * model with balance heuristic for the channels */
503         float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
504         float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
505         bool has_scatter = false;
506
507         for(int i = 0; i < max_steps; i++) {
508                 /* advance to new position */
509                 float new_t = min(ray->t, (i+1) * step_size);
510                 float dt = new_t - t;
511
512                 /* use random position inside this segment to sample shader */
513                 if(new_t == ray->t)
514                         random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * dt;
515
516                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
517                 VolumeShaderCoefficients coeff;
518
519                 /* compute segment */
520                 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
521                         int closure_flag = sd->flag;
522                         float3 new_tp;
523                         float3 transmittance;
524                         bool scatter = false;
525
526                         /* distance sampling */
527 #ifdef __VOLUME_SCATTER__
528                         if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_EXTINCTION))) {
529                                 has_scatter = true;
530
531                                 /* Sample channel, use MIS with balance heuristic. */
532                                 float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
533                                 float3 channel_pdf;
534                                 int channel = kernel_volume_sample_channel(albedo, tp, rphase, &channel_pdf);
535
536                                 /* compute transmittance over full step */
537                                 transmittance = volume_color_transmittance(coeff.sigma_t, dt);
538
539                                 /* decide if we will scatter or continue */
540                                 float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
541
542                                 if(1.0f - xi >= sample_transmittance) {
543                                         /* compute sampling distance */
544                                         float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
545                                         float new_dt = -logf(1.0f - xi)/sample_sigma_t;
546                                         new_t = t + new_dt;
547
548                                         /* transmittance and pdf */
549                                         float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
550                                         float3 pdf = coeff.sigma_t * new_transmittance;
551
552                                         /* throughput */
553                                         new_tp = tp * coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf);
554                                         scatter = true;
555                                 }
556                                 else {
557                                         /* throughput */
558                                         float pdf = dot(channel_pdf, transmittance);
559                                         new_tp = tp * transmittance / pdf;
560
561                                         /* remap xi so we can reuse it and keep thing stratified */
562                                         xi = 1.0f - (1.0f - xi)/sample_transmittance;
563                                 }
564                         }
565                         else 
566 #endif
567                         if(closure_flag & SD_EXTINCTION) {
568                                 /* absorption only, no sampling needed */
569                                 transmittance = volume_color_transmittance(coeff.sigma_t, dt);
570                                 new_tp = tp * transmittance;
571                         }
572
573                         /* integrate emission attenuated by absorption */
574                         if(L && (closure_flag & SD_EMISSION)) {
575                                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
576                                 path_radiance_accum_emission(L, state, tp, emission);
577                         }
578
579                         /* modify throughput */
580                         if(closure_flag & SD_EXTINCTION) {
581                                 tp = new_tp;
582
583                                 /* stop if nearly all light blocked */
584                                 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
585                                         tp = make_float3(0.0f, 0.0f, 0.0f);
586                                         break;
587                                 }
588                         }
589
590                         /* prepare to scatter to new direction */
591                         if(scatter) {
592                                 /* adjust throughput and move to new location */
593                                 sd->P = ray->P + new_t*ray->D;
594                                 *throughput = tp;
595
596                                 return VOLUME_PATH_SCATTERED;
597                         }
598                         else {
599                                 /* accumulate transmittance */
600                                 accum_transmittance *= transmittance;
601                         }
602                 }
603
604                 /* stop if at the end of the volume */
605                 t = new_t;
606                 if(t == ray->t)
607                         break;
608         }
609
610         *throughput = tp;
611
612         return VOLUME_PATH_ATTENUATED;
613 }
614
615 /* get the volume attenuation and emission over line segment defined by
616  * ray, with the assumption that there are no surfaces blocking light
617  * between the endpoints. distance sampling is used to decide if we will
618  * scatter or not. */
619 ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(
620     KernelGlobals *kg,
621     ccl_addr_space PathState *state,
622     ShaderData *sd,
623     Ray *ray,
624     PathRadiance *L,
625     ccl_addr_space float3 *throughput,
626     bool heterogeneous)
627 {
628         shader_setup_from_volume(kg, sd, ray);
629
630         if(heterogeneous)
631                 return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput);
632         else
633                 return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true);
634 }
635
636 #ifndef __SPLIT_KERNEL__
637 /* Decoupled Volume Sampling
638  *
639  * VolumeSegment is list of coefficients and transmittance stored at all steps
640  * through a volume. This can then later be used for decoupled sampling as in:
641  * "Importance Sampling Techniques for Path Tracing in Participating Media"
642  *
643  * On the GPU this is only supported (but currently not enabled)
644  * for homogeneous volumes (1 step), due to
645  * no support for malloc/free and too much stack usage with a fix size array. */
646
647 typedef struct VolumeStep {
648         float3 sigma_s;                         /* scatter coefficient */
649         float3 sigma_t;                         /* extinction coefficient */
650         float3 accum_transmittance;     /* accumulated transmittance including this step */
651         float3 cdf_distance;            /* cumulative density function for distance sampling */
652         float t;                                        /* distance at end of this step */
653         float shade_t;                          /* jittered distance where shading was done in step */
654         int closure_flag;                       /* shader evaluation closure flags */
655 } VolumeStep;
656
657 typedef struct VolumeSegment {
658         VolumeStep stack_step;      /* stack storage for homogeneous step, to avoid malloc */
659         VolumeStep *steps;                      /* recorded steps */
660         int numsteps;                           /* number of steps */
661         int closure_flag;                       /* accumulated closure flags from all steps */
662
663         float3 accum_emission;          /* accumulated emission at end of segment */
664         float3 accum_transmittance;     /* accumulated transmittance at end of segment */
665         float3 accum_albedo;        /* accumulated average albedo over segment */
666
667         int sampling_method;            /* volume sampling method */
668 } VolumeSegment;
669
670 /* record volume steps to the end of the volume.
671  *
672  * it would be nice if we could only record up to the point that we need to scatter,
673  * but the entire segment is needed to do always scattering, rather than probabilistically
674  * hitting or missing the volume. if we don't know the transmittance at the end of the
675  * volume we can't generate stratified distance samples up to that transmittance */
676 #ifdef __VOLUME_DECOUPLED__
677 ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
678         Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
679 {
680         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
681
682         /* prepare for volume stepping */
683         int max_steps;
684         float step_size, random_jitter_offset;
685
686         if(heterogeneous) {
687                 const int global_max_steps = kernel_data.integrator.volume_max_steps;
688                 step_size = kernel_data.integrator.volume_step_size;
689                 /* compute exact steps in advance for malloc */
690                 if(ray->t > global_max_steps*step_size) {
691                         max_steps = global_max_steps;
692                         step_size = ray->t / (float)max_steps;
693                 }
694                 else {
695                         max_steps = max((int)ceilf(ray->t/step_size), 1);
696                 }
697 #ifdef __KERNEL_CPU__
698                 /* NOTE: For the branched path tracing it's possible to have direct
699                  * and indirect light integration both having volume segments allocated.
700                  * We detect this using index in the pre-allocated memory. Currently we
701                  * only support two segments allocated at a time, if more needed some
702                  * modifications to the KernelGlobals will be needed.
703                  *
704                  * This gives us restrictions that decoupled record should only happen
705                  * in the stack manner, meaning if there's subsequent call of decoupled
706                  * record it'll need to free memory before it's caller frees memory.
707                  */
708                 const int index = kg->decoupled_volume_steps_index;
709                 assert(index < sizeof(kg->decoupled_volume_steps) /
710                                sizeof(*kg->decoupled_volume_steps));
711                 if(kg->decoupled_volume_steps[index] == NULL) {
712                         kg->decoupled_volume_steps[index] =
713                                 (VolumeStep*)malloc(sizeof(VolumeStep)*global_max_steps);
714                 }
715                 segment->steps = kg->decoupled_volume_steps[index];
716                 ++kg->decoupled_volume_steps_index;
717 #else
718                 segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
719 #endif
720                 random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
721         }
722         else {
723                 max_steps = 1;
724                 step_size = ray->t;
725                 random_jitter_offset = 0.0f;
726                 segment->steps = &segment->stack_step;
727         }
728         
729         /* init accumulation variables */
730         float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
731         float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
732         float3 accum_albedo = make_float3(0.0f, 0.0f, 0.0f);
733         float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
734         float t = 0.0f;
735
736         segment->numsteps = 0;
737         segment->closure_flag = 0;
738         bool is_last_step_empty = false;
739
740         VolumeStep *step = segment->steps;
741
742         for(int i = 0; i < max_steps; i++, step++) {
743                 /* advance to new position */
744                 float new_t = min(ray->t, (i+1) * step_size);
745                 float dt = new_t - t;
746
747                 /* use random position inside this segment to sample shader */
748                 if(heterogeneous && new_t == ray->t)
749                         random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
750
751                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
752                 VolumeShaderCoefficients coeff;
753
754                 /* compute segment */
755                 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
756                         int closure_flag = sd->flag;
757                         float3 sigma_t = coeff.sigma_t;
758
759                         /* compute average albedo for channel sampling */
760                         if(closure_flag & SD_SCATTER) {
761                                 accum_albedo += dt * safe_divide_color(coeff.sigma_s, sigma_t);
762                         }
763
764                         /* compute accumulated transmittance */
765                         float3 transmittance = volume_color_transmittance(sigma_t, dt);
766
767                         /* compute emission attenuated by absorption */
768                         if(closure_flag & SD_EMISSION) {
769                                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
770                                 accum_emission += accum_transmittance * emission;
771                         }
772
773                         accum_transmittance *= transmittance;
774
775                         /* compute pdf for distance sampling */
776                         float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
777                         cdf_distance = cdf_distance + pdf_distance;
778
779                         /* write step data */
780                         step->sigma_t = sigma_t;
781                         step->sigma_s = coeff.sigma_s;
782                         step->closure_flag = closure_flag;
783
784                         segment->closure_flag |= closure_flag;
785
786                         is_last_step_empty = false;
787                         segment->numsteps++;
788                 }
789                 else {
790                         if(is_last_step_empty) {
791                                 /* consecutive empty step, merge */
792                                 step--;
793                         }
794                         else {
795                                 /* store empty step */
796                                 step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
797                                 step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
798                                 step->closure_flag = 0;
799
800                                 segment->numsteps++;
801                                 is_last_step_empty = true;
802                         }
803                 }
804
805                 step->accum_transmittance = accum_transmittance;
806                 step->cdf_distance = cdf_distance;
807                 step->t = new_t;
808                 step->shade_t = t + random_jitter_offset;
809
810                 /* stop if at the end of the volume */
811                 t = new_t;
812                 if(t == ray->t)
813                         break;
814
815                 /* stop if nearly all light blocked */
816                 if(accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps && accum_transmittance.z < tp_eps)
817                         break;
818         }
819
820         /* store total emission and transmittance */
821         segment->accum_emission = accum_emission;
822         segment->accum_transmittance = accum_transmittance;
823         segment->accum_albedo = accum_albedo;
824
825         /* normalize cumulative density function for distance sampling */
826         VolumeStep *last_step = segment->steps + segment->numsteps - 1;
827
828         if(!is_zero(last_step->cdf_distance)) {
829                 VolumeStep *step = &segment->steps[0];
830                 int numsteps = segment->numsteps;
831                 float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
832
833                 for(int i = 0; i < numsteps; i++, step++)
834                         step->cdf_distance *= inv_cdf_distance_sum;
835         }
836 }
837
838 ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
839 {
840         if(segment->steps != &segment->stack_step) {
841 #ifdef __KERNEL_CPU__
842                 /* NOTE: We only allow free last allocated segment.
843                  * No random order of alloc/free is supported.
844                  */
845                 assert(kg->decoupled_volume_steps_index > 0);
846                 assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
847                 --kg->decoupled_volume_steps_index;
848 #else
849                 free(segment->steps);
850 #endif
851         }
852 }
853 #endif  /* __VOLUME_DECOUPLED__ */
854
855 /* scattering for homogeneous and heterogeneous volumes, using decoupled ray
856  * marching.
857  *
858  * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */
859 ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
860         KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
861         float3 *throughput, float rphase, float rscatter,
862         const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter)
863 {
864         kernel_assert(segment->closure_flag & SD_SCATTER);
865
866         /* Sample color channel, use MIS with balance heuristic. */
867         float3 channel_pdf;
868         int channel = kernel_volume_sample_channel(segment->accum_albedo,
869                                                    *throughput,
870                                                    rphase,
871                                                    &channel_pdf);
872
873         float xi = rscatter;
874
875         /* probabilistic scattering decision based on transmittance */
876         if(probalistic_scatter) {
877                 float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
878
879                 if(1.0f - xi >= sample_transmittance) {
880                         /* rescale random number so we can reuse it */
881                         xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
882                 }
883                 else {
884                         *throughput /= sample_transmittance;
885                         return VOLUME_PATH_MISSED;
886                 }
887         }
888
889         VolumeStep *step;
890         float3 transmittance;
891         float pdf, sample_t;
892         float mis_weight = 1.0f;
893         bool distance_sample = true;
894         bool use_mis = false;
895
896         if(segment->sampling_method && light_P) {
897                 if(segment->sampling_method == SD_VOLUME_MIS) {
898                         /* multiple importance sample: randomly pick between
899                          * equiangular and distance sampling strategy */
900                         if(xi < 0.5f) {
901                                 xi *= 2.0f;
902                         }
903                         else {
904                                 xi = (xi - 0.5f)*2.0f;
905                                 distance_sample = false;
906                         }
907
908                         use_mis = true;
909                 }
910                 else {
911                         /* only equiangular sampling */
912                         distance_sample = false;
913                 }
914         }
915
916         /* distance sampling */
917         if(distance_sample) {
918                 /* find step in cdf */
919                 step = segment->steps;
920
921                 float prev_t = 0.0f;
922                 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
923
924                 if(segment->numsteps > 1) {
925                         float prev_cdf = 0.0f;
926                         float step_cdf = 1.0f;
927                         float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
928
929                         for(int i = 0; ; i++, step++) {
930                                 /* todo: optimize using binary search */
931                                 step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
932
933                                 if(xi < step_cdf || i == segment->numsteps-1)
934                                         break;
935
936                                 prev_cdf = step_cdf;
937                                 prev_t = step->t;
938                                 prev_cdf_distance = step->cdf_distance;
939                         }
940
941                         /* remap xi so we can reuse it */
942                         xi = (xi - prev_cdf)/(step_cdf - prev_cdf);
943
944                         /* pdf for picking step */
945                         step_pdf_distance = step->cdf_distance - prev_cdf_distance;
946                 }
947
948                 /* determine range in which we will sample */
949                 float step_t = step->t - prev_t;
950
951                 /* sample distance and compute transmittance */
952                 float3 distance_pdf;
953                 sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
954
955                 /* modify pdf for hit/miss decision */
956                 if(probalistic_scatter)
957                         distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
958
959                 pdf = dot(channel_pdf, distance_pdf * step_pdf_distance);
960
961                 /* multiple importance sampling */
962                 if(use_mis) {
963                         float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
964                         mis_weight = 2.0f*power_heuristic(pdf, equi_pdf);
965                 }
966         }
967         /* equi-angular sampling */
968         else {
969                 /* sample distance */
970                 sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
971
972                 /* find step in which sampled distance is located */
973                 step = segment->steps;
974
975                 float prev_t = 0.0f;
976                 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
977
978                 if(segment->numsteps > 1) {
979                         float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
980
981                         int numsteps = segment->numsteps;
982                         int high = numsteps - 1;
983                         int low = 0;
984                         int mid;
985
986                         while(low < high) {
987                                 mid = (low + high) >> 1;
988
989                                 if(sample_t < step[mid].t)
990                                         high = mid;
991                                 else if(sample_t >= step[mid + 1].t)
992                                         low = mid + 1;
993                                 else {
994                                         /* found our interval in step[mid] .. step[mid+1] */
995                                         prev_t = step[mid].t;
996                                         prev_cdf_distance = step[mid].cdf_distance;
997                                         step += mid+1;
998                                         break;
999                                 }
1000                         }
1001
1002                         if(low >= numsteps - 1) {
1003                                 prev_t = step[numsteps - 1].t;
1004                                 prev_cdf_distance = step[numsteps-1].cdf_distance;
1005                                 step += numsteps - 1;
1006                         }
1007
1008                         /* pdf for picking step with distance sampling */
1009                         step_pdf_distance = step->cdf_distance - prev_cdf_distance;
1010                 }
1011
1012                 /* determine range in which we will sample */
1013                 float step_t = step->t - prev_t;
1014                 float step_sample_t = sample_t - prev_t;
1015
1016                 /* compute transmittance */
1017                 transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
1018
1019                 /* multiple importance sampling */
1020                 if(use_mis) {
1021                         float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
1022                         float distance_pdf = dot(channel_pdf, distance_pdf3 * step_pdf_distance);
1023                         mis_weight = 2.0f*power_heuristic(pdf, distance_pdf);
1024                 }
1025         }
1026         if(sample_t < 0.0f || pdf == 0.0f) {
1027                 return VOLUME_PATH_MISSED;
1028         }
1029
1030         /* compute transmittance up to this step */
1031         if(step != segment->steps)
1032                 transmittance *= (step-1)->accum_transmittance;
1033
1034         /* modify throughput */
1035         *throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
1036
1037         /* evaluate shader to create closures at shading point */
1038         if(segment->numsteps > 1) {
1039                 sd->P = ray->P + step->shade_t*ray->D;
1040
1041                 VolumeShaderCoefficients coeff;
1042                 volume_shader_sample(kg, sd, state, sd->P, &coeff);
1043         }
1044
1045         /* move to new position */
1046         sd->P = ray->P + sample_t*ray->D;
1047
1048         return VOLUME_PATH_SCATTERED;
1049 }
1050 #endif /* __SPLIT_KERNEL */
1051
1052 /* decide if we need to use decoupled or not */
1053 ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method)
1054 {
1055         /* decoupled ray marching for heterogeneous volumes not supported on the GPU,
1056          * which also means equiangular and multiple importance sampling is not
1057          * support for that case */
1058         if(!kernel_data.integrator.volume_decoupled)
1059                 return false;
1060
1061 #ifdef __KERNEL_GPU__
1062         if(heterogeneous)
1063                 return false;
1064 #endif
1065
1066         /* equiangular and multiple importance sampling only implemented for decoupled */
1067         if(sampling_method != 0)
1068                 return true;
1069
1070         /* for all light sampling use decoupled, reusing shader evaluations is
1071          * typically faster in that case */
1072         if(direct)
1073                 return kernel_data.integrator.sample_all_lights_direct;
1074         else
1075                 return kernel_data.integrator.sample_all_lights_indirect;
1076 }
1077
1078 /* Volume Stack
1079  *
1080  * This is an array of object/shared ID's that the current segment of the path
1081  * is inside of. */
1082
1083 ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
1084                                          ShaderData *stack_sd,
1085                                          ccl_addr_space const PathState *state,
1086                                          ccl_addr_space const Ray *ray,
1087                                          ccl_addr_space VolumeStack *stack)
1088 {
1089         /* NULL ray happens in the baker, does it need proper initialization of
1090          * camera in volume?
1091          */
1092         if(!kernel_data.cam.is_inside_volume || ray == NULL) {
1093                 /* Camera is guaranteed to be in the air, only take background volume
1094                  * into account in this case.
1095                  */
1096                 if(kernel_data.background.volume_shader != SHADER_NONE) {
1097                         stack[0].shader = kernel_data.background.volume_shader;
1098                         stack[0].object = PRIM_NONE;
1099                         stack[1].shader = SHADER_NONE;
1100                 }
1101                 else {
1102                         stack[0].shader = SHADER_NONE;
1103                 }
1104                 return;
1105         }
1106
1107         kernel_assert(state->flag & PATH_RAY_CAMERA);
1108
1109         Ray volume_ray = *ray;
1110         volume_ray.t = FLT_MAX;
1111
1112         const uint visibility = (state->flag & PATH_RAY_ALL_VISIBILITY);
1113         int stack_index = 0, enclosed_index = 0;
1114
1115 #ifdef __VOLUME_RECORD_ALL__
1116         Intersection hits[2*VOLUME_STACK_SIZE + 1];
1117         uint num_hits = scene_intersect_volume_all(kg,
1118                                                    &volume_ray,
1119                                                    hits,
1120                                                    2*VOLUME_STACK_SIZE,
1121                                                    visibility);
1122         if(num_hits > 0) {
1123                 int enclosed_volumes[VOLUME_STACK_SIZE];
1124                 Intersection *isect = hits;
1125
1126                 qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1127
1128                 for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
1129                         shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
1130                         if(stack_sd->flag & SD_BACKFACING) {
1131                                 bool need_add = true;
1132                                 for(int i = 0; i < enclosed_index && need_add; ++i) {
1133                                         /* If ray exited the volume and never entered to that volume
1134                                          * it means that camera is inside such a volume.
1135                                          */
1136                                         if(enclosed_volumes[i] == stack_sd->object) {
1137                                                 need_add = false;
1138                                         }
1139                                 }
1140                                 for(int i = 0; i < stack_index && need_add; ++i) {
1141                                         /* Don't add intersections twice. */
1142                                         if(stack[i].object == stack_sd->object) {
1143                                                 need_add = false;
1144                                                 break;
1145                                         }
1146                                 }
1147                                 if(need_add) {
1148                                         stack[stack_index].object = stack_sd->object;
1149                                         stack[stack_index].shader = stack_sd->shader;
1150                                         ++stack_index;
1151                                 }
1152                         }
1153                         else {
1154                                 /* If ray from camera enters the volume, this volume shouldn't
1155                                  * be added to the stack on exit.
1156                                  */
1157                                 enclosed_volumes[enclosed_index++] = stack_sd->object;
1158                         }
1159                 }
1160         }
1161 #else
1162         int enclosed_volumes[VOLUME_STACK_SIZE];
1163         int step = 0;
1164
1165         while(stack_index < VOLUME_STACK_SIZE - 1 &&
1166               enclosed_index < VOLUME_STACK_SIZE - 1 &&
1167               step < 2 * VOLUME_STACK_SIZE)
1168         {
1169                 Intersection isect;
1170                 if(!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
1171                         break;
1172                 }
1173
1174                 shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
1175                 if(stack_sd->flag & SD_BACKFACING) {
1176                         /* If ray exited the volume and never entered to that volume
1177                          * it means that camera is inside such a volume.
1178                          */
1179                         bool need_add = true;
1180                         for(int i = 0; i < enclosed_index && need_add; ++i) {
1181                                 /* If ray exited the volume and never entered to that volume
1182                                  * it means that camera is inside such a volume.
1183                                  */
1184                                 if(enclosed_volumes[i] == stack_sd->object) {
1185                                         need_add = false;
1186                                 }
1187                         }
1188                         for(int i = 0; i < stack_index && need_add; ++i) {
1189                                 /* Don't add intersections twice. */
1190                                 if(stack[i].object == stack_sd->object) {
1191                                         need_add = false;
1192                                         break;
1193                                 }
1194                         }
1195                         if(need_add) {
1196                                 stack[stack_index].object = stack_sd->object;
1197                                 stack[stack_index].shader = stack_sd->shader;
1198                                 ++stack_index;
1199                         }
1200                 }
1201                 else {
1202                         /* If ray from camera enters the volume, this volume shouldn't
1203                          * be added to the stack on exit.
1204                          */
1205                         enclosed_volumes[enclosed_index++] = stack_sd->object;
1206                 }
1207
1208                 /* Move ray forward. */
1209                 volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
1210                 ++step;
1211         }
1212 #endif
1213         /* stack_index of 0 means quick checks outside of the kernel gave false
1214          * positive, nothing to worry about, just we've wasted quite a few of
1215          * ticks just to come into conclusion that camera is in the air.
1216          *
1217          * In this case we're doing the same above -- check whether background has
1218          * volume.
1219          */
1220         if(stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
1221                 stack[0].shader = kernel_data.background.volume_shader;
1222                 stack[0].object = PRIM_NONE;
1223                 stack[1].shader = SHADER_NONE;
1224         }
1225         else {
1226                 stack[stack_index].shader = SHADER_NONE;
1227         }
1228 }
1229
1230 ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, ccl_addr_space VolumeStack *stack)
1231 {
1232         /* todo: we should have some way for objects to indicate if they want the
1233          * world shader to work inside them. excluding it by default is problematic
1234          * because non-volume objects can't be assumed to be closed manifolds */
1235
1236         if(!(sd->flag & SD_HAS_VOLUME))
1237                 return;
1238         
1239         if(sd->flag & SD_BACKFACING) {
1240                 /* exit volume object: remove from stack */
1241                 for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1242                         if(stack[i].object == sd->object) {
1243                                 /* shift back next stack entries */
1244                                 do {
1245                                         stack[i] = stack[i+1];
1246                                         i++;
1247                                 }
1248                                 while(stack[i].shader != SHADER_NONE);
1249
1250                                 return;
1251                         }
1252                 }
1253         }
1254         else {
1255                 /* enter volume object: add to stack */
1256                 int i;
1257
1258                 for(i = 0; stack[i].shader != SHADER_NONE; i++) {
1259                         /* already in the stack? then we have nothing to do */
1260                         if(stack[i].object == sd->object)
1261                                 return;
1262                 }
1263
1264                 /* if we exceed the stack limit, ignore */
1265                 if(i >= VOLUME_STACK_SIZE-1)
1266                         return;
1267
1268                 /* add to the end of the stack */
1269                 stack[i].shader = sd->shader;
1270                 stack[i].object = sd->object;
1271                 stack[i+1].shader = SHADER_NONE;
1272         }
1273 }
1274
1275 #ifdef __SUBSURFACE__
1276 ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
1277                                                           ShaderData *stack_sd,
1278                                                           Ray *ray,
1279                                                           ccl_addr_space VolumeStack *stack)
1280 {
1281         kernel_assert(kernel_data.integrator.use_volumes);
1282
1283         Ray volume_ray = *ray;
1284
1285 #  ifdef __VOLUME_RECORD_ALL__
1286         Intersection hits[2*VOLUME_STACK_SIZE + 1];
1287         uint num_hits = scene_intersect_volume_all(kg,
1288                                                    &volume_ray,
1289                                                    hits,
1290                                                    2*VOLUME_STACK_SIZE,
1291                                                    PATH_RAY_ALL_VISIBILITY);
1292         if(num_hits > 0) {
1293                 Intersection *isect = hits;
1294
1295                 qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1296
1297                 for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
1298                         shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
1299                         kernel_volume_stack_enter_exit(kg, stack_sd, stack);
1300                 }
1301         }
1302 #  else
1303         Intersection isect;
1304         int step = 0;
1305         float3 Pend = ray->P + ray->D*ray->t;
1306         while(step < 2 * VOLUME_STACK_SIZE &&
1307               scene_intersect_volume(kg,
1308                                      &volume_ray,
1309                                      &isect,
1310                                      PATH_RAY_ALL_VISIBILITY))
1311         {
1312                 shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
1313                 kernel_volume_stack_enter_exit(kg, stack_sd, stack);
1314
1315                 /* Move ray forward. */
1316                 volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
1317                 if(volume_ray.t != FLT_MAX) {
1318                         volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t);
1319                 }
1320                 ++step;
1321         }
1322 #  endif
1323 }
1324 #endif
1325
1326 /* Clean stack after the last bounce.
1327  *
1328  * It is expected that all volumes are closed manifolds, so at the time when ray
1329  * hits nothing (for example, it is a last bounce which goes to environment) the
1330  * only expected volume in the stack is the world's one. All the rest volume
1331  * entries should have been exited already.
1332  *
1333  * This isn't always true because of ray intersection precision issues, which
1334  * could lead us to an infinite non-world volume in the stack, causing render
1335  * artifacts.
1336  *
1337  * Use this function after the last bounce to get rid of all volumes apart from
1338  * the world's one after the last bounce to avoid render artifacts.
1339  */
1340 ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
1341                                                  ccl_addr_space VolumeStack *volume_stack)
1342 {
1343         if(kernel_data.background.volume_shader != SHADER_NONE) {
1344                 /* Keep the world's volume in stack. */
1345                 volume_stack[1].shader = SHADER_NONE;
1346         }
1347         else {
1348                 volume_stack[0].shader = SHADER_NONE;
1349         }
1350 }
1351
1352 #endif /* __VOLUME__ */
1353
1354 CCL_NAMESPACE_END