Cycles: add Transparent Depth output to Light Path node.
[blender-staging.git] / intern / cycles / kernel / kernel_volume.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Events for probalistic scattering */
20
21 typedef enum VolumeIntegrateResult {
22         VOLUME_PATH_SCATTERED = 0,
23         VOLUME_PATH_ATTENUATED = 1,
24         VOLUME_PATH_MISSED = 2
25 } VolumeIntegrateResult;
26
27 /* Volume shader properties
28  *
29  * extinction coefficient = absorption coefficient + scattering coefficient
30  * sigma_t = sigma_a + sigma_s */
31
32 typedef struct VolumeShaderCoefficients {
33         float3 sigma_a;
34         float3 sigma_s;
35         float3 emission;
36 } VolumeShaderCoefficients;
37
38 /* evaluate shader to get extinction coefficient at P */
39 ccl_device bool volume_shader_extinction_sample(KernelGlobals *kg, ShaderData *sd, PathState *state, float3 P, float3 *extinction)
40 {
41         sd->P = P;
42         shader_eval_volume(kg, sd, state->volume_stack, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
43
44         if(!(sd->flag & (SD_ABSORPTION|SD_SCATTER)))
45                 return false;
46
47         float3 sigma_t = make_float3(0.0f, 0.0f, 0.0f);
48
49         for(int i = 0; i < sd->num_closure; i++) {
50                 const ShaderClosure *sc = &sd->closure[i];
51
52                 if(CLOSURE_IS_VOLUME(sc->type))
53                         sigma_t += sc->weight;
54         }
55
56         *extinction = sigma_t;
57         return true;
58 }
59
60 /* evaluate shader to get absorption, scattering and emission at P */
61 ccl_device bool volume_shader_sample(KernelGlobals *kg, ShaderData *sd, PathState *state, float3 P, VolumeShaderCoefficients *coeff)
62 {
63         sd->P = P;
64         shader_eval_volume(kg, sd, state->volume_stack, state->flag, SHADER_CONTEXT_VOLUME);
65
66         if(!(sd->flag & (SD_ABSORPTION|SD_SCATTER|SD_EMISSION)))
67                 return false;
68         
69         coeff->sigma_a = make_float3(0.0f, 0.0f, 0.0f);
70         coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
71         coeff->emission = make_float3(0.0f, 0.0f, 0.0f);
72
73         for(int i = 0; i < sd->num_closure; i++) {
74                 const ShaderClosure *sc = &sd->closure[i];
75
76                 if(sc->type == CLOSURE_VOLUME_ABSORPTION_ID)
77                         coeff->sigma_a += sc->weight;
78                 else if(sc->type == CLOSURE_EMISSION_ID)
79                         coeff->emission += sc->weight;
80                 else if(CLOSURE_IS_VOLUME(sc->type))
81                         coeff->sigma_s += sc->weight;
82         }
83
84         /* when at the max number of bounces, treat scattering as absorption */
85         if(sd->flag & SD_SCATTER) {
86                 if(state->volume_bounce >= kernel_data.integrator.max_volume_bounce) {
87                         coeff->sigma_a += coeff->sigma_s;
88                         coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
89                         sd->flag &= ~SD_SCATTER;
90                         sd->flag |= SD_ABSORPTION;
91                 }
92         }
93
94         return true;
95 }
96
97 ccl_device float3 volume_color_transmittance(float3 sigma, float t)
98 {
99         return make_float3(expf(-sigma.x * t), expf(-sigma.y * t), expf(-sigma.z * t));
100 }
101
102 ccl_device float kernel_volume_channel_get(float3 value, int channel)
103 {
104         return (channel == 0)? value.x: ((channel == 1)? value.y: value.z);
105 }
106
107 ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, VolumeStack *stack)
108 {
109         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
110                 int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*2);
111
112                 if(shader_flag & SD_HETEROGENEOUS_VOLUME)
113                         return true;
114         }
115
116         return false;
117 }
118
119 /* Volume Shadows
120  *
121  * These functions are used to attenuate shadow rays to lights. Both absorption
122  * and scattering will block light, represented by the extinction coefficient. */
123
124 /* homogeneous volume: assume shader evaluation at the starts gives
125  * the extinction coefficient for the entire line segment */
126 ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
127 {
128         float3 sigma_t;
129
130         if(volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
131                 *throughput *= volume_color_transmittance(sigma_t, ray->t);
132 }
133
134 /* heterogeneous volume: integrate stepping through the volume until we
135  * reach the end, get absorbed entirely, or run out of iterations */
136 ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
137 {
138         float3 tp = *throughput;
139         const float tp_eps = 1e-10f; /* todo: this is likely not the right value */
140
141         /* prepare for stepping */
142         int max_steps = kernel_data.integrator.volume_max_steps;
143         float step = kernel_data.integrator.volume_step_size;
144         float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step;
145
146         /* compute extinction at the start */
147         float t = 0.0f;
148
149         for(int i = 0; i < max_steps; i++) {
150                 /* advance to new position */
151                 float new_t = min(ray->t, (i+1) * step);
152                 float dt = new_t - t;
153
154                 /* use random position inside this segment to sample shader */
155                 if(new_t == ray->t)
156                         random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
157
158                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
159                 float3 sigma_t;
160
161                 /* compute attenuation over segment */
162                 if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
163                         /* todo: we could avoid computing expf() for each step by summing,
164                          * because exp(a)*exp(b) = exp(a+b), but we still want a quick
165                          * tp_eps check too */
166                         tp *= volume_color_transmittance(sigma_t, new_t - t);
167
168                         /* stop if nearly all light blocked */
169                         if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
170                                 break;
171                 }
172
173                 /* stop if at the end of the volume */
174                 t = new_t;
175                 if(t == ray->t)
176                         break;
177         }
178
179         *throughput = tp;
180 }
181
182 /* get the volume attenuation over line segment defined by ray, with the
183  * assumption that there are no surfaces blocking light between the endpoints */
184 ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, PathState *state, Ray *ray, float3 *throughput)
185 {
186         ShaderData sd;
187         shader_setup_from_volume(kg, &sd, ray, state->bounce, state->transparent_bounce);
188
189         if(volume_stack_is_heterogeneous(kg, state->volume_stack))
190                 kernel_volume_shadow_heterogeneous(kg, state, ray, &sd, throughput);
191         else
192                 kernel_volume_shadow_homogeneous(kg, state, ray, &sd, throughput);
193 }
194
195 /* Equi-angular sampling as in:
196  * "Importance Sampling Techniques for Path Tracing in Participating Media" */
197
198 ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf)
199 {
200         float t = ray->t;
201
202         float delta = dot((light_P - ray->P) , ray->D);
203         float D = sqrtf(len_squared(light_P - ray->P) - delta * delta);
204         float theta_a = -atan2f(delta, D);
205         float theta_b = atan2f(t - delta, D);
206         float t_ = D * tan((xi * theta_b) + (1 - xi) * theta_a);
207
208         *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
209
210         return min(t, delta + t_); /* min is only for float precision errors */
211 }
212
213 ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
214 {
215         float delta = dot((light_P - ray->P) , ray->D);
216         float D = sqrtf(len_squared(light_P - ray->P) - delta * delta);
217
218         float t = ray->t;
219         float t_ = sample_t - delta;
220
221         float theta_a = -atan2f(delta, D);
222         float theta_b = atan2f(t - delta, D);
223
224         float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
225
226         return pdf;
227 }
228
229 ccl_device bool kernel_volume_equiangular_light_position(KernelGlobals *kg, PathState *state, Ray *ray, RNG *rng, float3 *light_P)
230 {
231         /* light RNGs */
232         float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
233         float light_u, light_v;
234         path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
235
236         /* light sample */
237         LightSample ls;
238         light_sample(kg, light_t, light_u, light_v, ray->time, ray->P, &ls);
239         if(ls.pdf == 0.0f)
240                 return false;
241         
242         *light_P = ls.P;
243         return true;
244 }
245
246 ccl_device float kernel_volume_decoupled_equiangular_pdf(KernelGlobals *kg, PathState *state, Ray *ray, RNG *rng, float sample_t)
247 {
248         float3 light_P;
249
250         if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P))
251                 return 0.0f;
252
253         return kernel_volume_equiangular_pdf(ray, light_P, sample_t);
254 }
255
256 /* Distance sampling */
257
258 ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
259 {
260         /* xi is [0, 1[ so log(0) should never happen, division by zero is
261          * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
262         float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
263         float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
264         float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
265
266         float sample_t = min(max_t, -logf(1.0f - xi*(1.0f - sample_transmittance))/sample_sigma_t);
267
268         *transmittance = volume_color_transmittance(sigma_t, sample_t);
269         *pdf = (sigma_t * *transmittance)/(make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
270
271         /* todo: optimization: when taken together with hit/miss decision,
272          * the full_transmittance cancels out drops out and xi does not
273          * need to be remapped */
274
275         return sample_t;
276 }
277
278 ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
279 {
280         float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
281         float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
282
283         return (sigma_t * transmittance)/(make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
284 }
285
286 /* Emission */
287
288 ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff, int closure_flag, float3 transmittance, float t)
289 {
290         /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
291          * this goes to E * t as sigma_t goes to zero
292          *
293          * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
294         float3 emission = coeff->emission;
295
296         if(closure_flag & SD_ABSORPTION) {
297                 float3 sigma_t = coeff->sigma_a + coeff->sigma_s;
298
299                 emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t;
300                 emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t;
301                 emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t;
302         }
303         else
304                 emission *= t;
305         
306         return emission;
307 }
308
309 /* Volume Path */
310
311 /* homogeneous volume: assume shader evaluation at the start gives
312  * the volume shading coefficient for the entire line segment */
313 ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGlobals *kg,
314         PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput,
315         RNG *rng)
316 {
317         VolumeShaderCoefficients coeff;
318
319         if(!volume_shader_sample(kg, sd, state, ray->P, &coeff))
320                 return VOLUME_PATH_MISSED;
321
322         int closure_flag = sd->flag;
323         float t = ray->t;
324         float3 new_tp;
325
326         /* randomly scatter, and if we do t is shortened */
327         if(closure_flag & SD_SCATTER) {
328                 /* extinction coefficient */
329                 float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
330
331                 /* pick random color channel, we use the Veach one-sample
332                  * model with balance heuristic for the channels */
333                 float rphase = path_state_rng_1D(kg, rng, state, PRNG_PHASE);
334                 int channel = (int)(rphase*3.0f);
335                 sd->randb_closure = rphase*3.0f - channel;
336
337                 float xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE);
338
339                 /* decide if we will hit or miss */
340                 float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
341                 float sample_transmittance = expf(-sample_sigma_t * t);
342
343                 if(xi >= sample_transmittance) {
344                         /* scattering */
345                         float3 pdf;
346                         float3 transmittance;
347                         float sample_t;
348
349                         /* rescale random number so we can reuse it */
350                         xi = (xi - sample_transmittance)/(1.0f - sample_transmittance);
351
352                         if(kernel_data.integrator.volume_homogeneous_sampling == 0 || !kernel_data.integrator.num_all_lights) { 
353                                 /* distance sampling */
354                                 sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf);
355                         }
356                         else {
357                                 /* equiangular sampling */
358                                 float3 light_P;
359                                 float equi_pdf;
360                                 if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P))
361                                         return VOLUME_PATH_MISSED;
362
363                                 sample_t = kernel_volume_equiangular_sample(ray, light_P, xi, &equi_pdf);
364                                 transmittance = volume_color_transmittance(sigma_t, sample_t);
365                                 pdf = make_float3(equi_pdf, equi_pdf, equi_pdf);
366                         }
367
368                         /* modifiy pdf for hit/miss decision */
369                         pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t);
370
371                         new_tp = *throughput * coeff.sigma_s * transmittance / average(pdf);
372                         t = sample_t;
373                 }
374                 else {
375                         /* no scattering */
376                         float3 transmittance = volume_color_transmittance(sigma_t, t);
377                         float pdf = average(transmittance);
378                         new_tp = *throughput * transmittance / pdf;
379                 }
380         }
381         else if(closure_flag & SD_ABSORPTION) {
382                 /* absorption only, no sampling needed */
383                 float3 transmittance = volume_color_transmittance(coeff.sigma_a, t);
384                 new_tp = *throughput * transmittance;
385         }
386
387         /* integrate emission attenuated by extinction */
388         if(closure_flag & SD_EMISSION) {
389                 float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
390                 float3 transmittance = volume_color_transmittance(sigma_t, ray->t);
391                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t);
392                 path_radiance_accum_emission(L, *throughput, emission, state->bounce);
393         }
394
395         /* modify throughput */
396         if(closure_flag & (SD_ABSORPTION|SD_SCATTER)) {
397                 *throughput = new_tp;
398
399                 /* prepare to scatter to new direction */
400                 if(t < ray->t) {
401                         /* adjust throughput and move to new location */
402                         sd->P = ray->P + t*ray->D;
403
404                         return VOLUME_PATH_SCATTERED;
405                 }
406         }
407
408         return VOLUME_PATH_ATTENUATED;
409 }
410
411 /* heterogeneous volume: integrate stepping through the volume until we
412  * reach the end, get absorbed entirely, or run out of iterations */
413 ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlobals *kg,
414         PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput, RNG *rng)
415 {
416         float3 tp = *throughput;
417         const float tp_eps = 1e-10f; /* todo: this is likely not the right value */
418
419         /* prepare for stepping */
420         int max_steps = kernel_data.integrator.volume_max_steps;
421         float step_size = kernel_data.integrator.volume_step_size;
422         float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
423
424         /* compute coefficients at the start */
425         float t = 0.0f;
426         float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
427
428         /* cache some constant variables */
429         float xi;
430         int channel = -1;
431         bool has_scatter = false;
432
433         for(int i = 0; i < max_steps; i++) {
434                 /* advance to new position */
435                 float new_t = min(ray->t, (i+1) * step_size);
436                 float dt = new_t - t;
437
438                 /* use random position inside this segment to sample shader */
439                 if(new_t == ray->t)
440                         random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
441
442                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
443                 VolumeShaderCoefficients coeff;
444
445                 /* compute segment */
446                 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
447                         int closure_flag = sd->flag;
448                         float3 new_tp;
449                         float3 transmittance;
450                         bool scatter = false;
451
452                         /* randomly scatter, and if we do dt and new_t are shortened */
453                         if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_ABSORPTION))) {
454                                 has_scatter = true;
455
456                                 /* average sigma_t and sigma_s over segment */
457                                 float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
458                                 float3 sigma_s = coeff.sigma_s;
459
460                                 /* lazily set up variables for sampling */
461                                 if(channel == -1) {
462                                         /* pick random color channel, we use the Veach one-sample
463                                          * model with balance heuristic for the channels */
464                                         xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE);
465
466                                         float rphase = path_state_rng_1D(kg, rng, state, PRNG_PHASE);
467                                         channel = (int)(rphase*3.0f);
468                                         sd->randb_closure = rphase*3.0f - channel;
469                                 }
470
471                                 /* compute transmittance over full step */
472                                 transmittance = volume_color_transmittance(sigma_t, dt);
473
474                                 /* decide if we will scatter or continue */
475                                 float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
476
477                                 if(1.0f - xi >= sample_transmittance) {
478                                         /* compute sampling distance */
479                                         float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
480                                         float new_dt = -logf(1.0f - xi)/sample_sigma_t;
481                                         new_t = t + new_dt;
482
483                                         /* transmittance, throughput */
484                                         float3 new_transmittance = volume_color_transmittance(sigma_t, new_dt);
485                                         float pdf = average(sigma_t * new_transmittance);
486                                         new_tp = tp * sigma_s * new_transmittance / pdf;
487                                         scatter = true;
488                                 }
489                                 else {
490                                         /* throughput */
491                                         float pdf = average(transmittance);
492                                         new_tp = tp * transmittance / pdf;
493
494                                         /* remap xi so we can reuse it and keep thing stratified */
495                                         xi = 1.0f - (1.0f - xi)/sample_transmittance;
496                                 }
497                         }
498                         else if(closure_flag & SD_ABSORPTION) {
499                                 /* absorption only, no sampling needed */
500                                 float3 sigma_a = coeff.sigma_a;
501
502                                 transmittance = volume_color_transmittance(sigma_a, dt);
503                                 new_tp = tp * transmittance;
504                         }
505
506                         /* integrate emission attenuated by absorption */
507                         if(closure_flag & SD_EMISSION) {
508                                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
509                                 path_radiance_accum_emission(L, tp, emission, state->bounce);
510                         }
511
512                         /* modify throughput */
513                         if(closure_flag & (SD_ABSORPTION|SD_SCATTER)) {
514                                 tp = new_tp;
515
516                                 /* stop if nearly all light blocked */
517                                 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
518                                         tp = make_float3(0.0f, 0.0f, 0.0f);
519                                         break;
520                                 }
521
522                                 /* prepare to scatter to new direction */
523                                 if(scatter) {
524                                         /* adjust throughput and move to new location */
525                                         sd->P = ray->P + new_t*ray->D;
526                                         *throughput = tp;
527
528                                         return VOLUME_PATH_SCATTERED;
529                                 }
530                                 else {
531                                         /* accumulate transmittance */
532                                         accum_transmittance *= transmittance;
533                                 }
534                         }
535                 }
536
537                 /* stop if at the end of the volume */
538                 t = new_t;
539                 if(t == ray->t)
540                         break;
541         }
542
543         *throughput = tp;
544
545         return VOLUME_PATH_ATTENUATED;
546 }
547
548 /* Decoupled Volume Sampling
549  *
550  * VolumeSegment is list of coefficients and transmittance stored at all steps
551  * through a volume. This can then latter be used for decoupled sampling as in:
552  * "Importance Sampling Techniques for Path Tracing in Participating Media" */
553
554 /* CPU only because of malloc/free */
555 #ifdef __KERNEL_CPU__
556
557 typedef struct VolumeStep {
558         float3 sigma_s;                         /* scatter coefficient */
559         float3 sigma_t;                         /* extinction coefficient */
560         float3 accum_transmittance;     /* accumulated transmittance including this step */
561         float3 cdf_distance;            /* cumulative density function for distance sampling */
562         float t;                                        /* distance at end of this step */
563         float shade_t;                          /* jittered distance where shading was done in step */
564         int closure_flag;                       /* shader evaluation closure flags */
565 } VolumeStep;
566
567 typedef struct VolumeSegment {
568         VolumeStep *steps;                      /* recorded steps */
569         int numsteps;                           /* number of steps */
570         int closure_flag;                       /* accumulated closure flags from all steps */
571
572         float3 accum_emission;          /* accumulated emission at end of segment */
573         float3 accum_transmittance;     /* accumulated transmittance at end of segment */
574 } VolumeSegment;
575
576 /* record volume steps to the end of the volume.
577  *
578  * it would be nice if we could only record up to the point that we need to scatter,
579  * but the entire segment is needed to do always scattering, rather than probalistically
580  * hitting or missing the volume. if we don't know the transmittance at the end of the
581  * volume we can't generate stratitied distance samples up to that transmittance */
582 ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
583         Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
584 {
585         /* prepare for volume stepping */
586         int max_steps;
587         float step_size, random_jitter_offset;
588
589         if(heterogeneous) {
590                 max_steps = kernel_data.integrator.volume_max_steps;
591                 step_size = kernel_data.integrator.volume_step_size;
592                 random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
593
594                 /* compute exact steps in advance for malloc */
595                 max_steps = max((int)ceilf(ray->t/step_size), 1);
596         }
597         else {
598                 max_steps = 1;
599                 step_size = ray->t;
600                 random_jitter_offset = 0.0f;
601         }
602         
603         /* init accumulation variables */
604         float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
605         float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
606         float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
607         float t = 0.0f;
608
609         segment->closure_flag = 0;
610         segment->numsteps = 0;
611         segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
612
613         VolumeStep *step = segment->steps;
614
615         for(int i = 0; i < max_steps; i++, step++) {
616                 /* advance to new position */
617                 float new_t = min(ray->t, (i+1) * step_size);
618                 float dt = new_t - t;
619
620                 /* use random position inside this segment to sample shader */
621                 if(heterogeneous && new_t == ray->t)
622                         random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
623
624                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
625                 VolumeShaderCoefficients coeff;
626
627                 /* compute segment */
628                 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
629                         int closure_flag = sd->flag;
630                         float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
631
632                         /* compute accumulated transmittance */
633                         float3 transmittance = volume_color_transmittance(sigma_t, dt);
634
635                         /* compute emission attenuated by absorption */
636                         if(closure_flag & SD_EMISSION) {
637                                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
638                                 accum_emission += accum_transmittance * emission;
639                         }
640
641                         accum_transmittance *= transmittance;
642
643                         /* compute pdf for distance sampling */
644                         float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
645                         cdf_distance = cdf_distance + pdf_distance;
646
647                         /* write step data */
648                         step->sigma_t = sigma_t;
649                         step->sigma_s = coeff.sigma_s;
650                         step->closure_flag = closure_flag;
651
652                         segment->closure_flag |= closure_flag;
653                 }
654                 else {
655                         /* store empty step (todo: skip consecutive empty steps) */
656                         step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
657                         step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
658                         step->closure_flag = 0;
659                 }
660
661                 step->accum_transmittance = accum_transmittance;
662                 step->cdf_distance = cdf_distance;
663                 step->t = new_t;
664                 step->shade_t = t + random_jitter_offset;
665
666                 segment->numsteps++;
667
668                 /* stop if at the end of the volume */
669                 t = new_t;
670                 if(t == ray->t)
671                         break;
672         }
673
674         /* store total emission and transmittance */
675         segment->accum_emission = accum_emission;
676         segment->accum_transmittance = accum_transmittance;
677
678         /* normalize cumulative density function for distance sampling */
679         VolumeStep *last_step = segment->steps + segment->numsteps - 1;
680
681         if(!is_zero(last_step->cdf_distance)) {
682                 VolumeStep *step = &segment->steps[0];
683                 int numsteps = segment->numsteps;
684                 float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
685
686                 for(int i = 0; i < numsteps; i++, step++)
687                         step->cdf_distance *= inv_cdf_distance_sum;
688         }
689 }
690
691 ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
692 {
693         free(segment->steps);
694 }
695
696 /* scattering for homogeneous and heterogeneous volumes, using decoupled ray
697  * marching. unlike the non-decoupled functions, these do not do probalistic
698  * scattering, they always scatter if there is any non-zero scattering
699  * coefficient.
700  *
701  * these also do not do emission or modify throughput. */
702 ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
703         KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
704         float3 *throughput, RNG *rng, VolumeSegment *segment)
705 {
706         int closure_flag = segment->closure_flag;
707
708         if(!(closure_flag & SD_SCATTER))
709                 return VOLUME_PATH_MISSED;
710
711         /* pick random color channel, we use the Veach one-sample
712          * model with balance heuristic for the channels */
713         float rphase = path_state_rng_1D(kg, rng, state, PRNG_PHASE);
714         int channel = (int)(rphase*3.0f);
715         sd->randb_closure = rphase*3.0f - channel;
716
717         float xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE);
718
719         VolumeStep *step;
720         float3 transmittance;
721         float pdf, sample_t;
722
723         /* distance sampling */
724         if(kernel_data.integrator.volume_homogeneous_sampling == 0 || !kernel_data.integrator.num_all_lights) { 
725                 /* find step in cdf */
726                 step = segment->steps;
727
728                 float prev_t = 0.0f;
729                 float3 step_pdf = make_float3(1.0f, 1.0f, 1.0f);
730
731                 if(segment->numsteps > 1) {
732                         float prev_cdf = 0.0f;
733                         float step_cdf = 1.0f;
734                         float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
735
736                         for(int i = 0; ; i++, step++) {
737                                 /* todo: optimize using binary search */
738                                 step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
739
740                                 if(xi < step_cdf || i == segment->numsteps-1)
741                                         break;
742
743                                 prev_cdf = step_cdf;
744                                 prev_t = step->t;
745                                 prev_cdf_distance = step->cdf_distance;
746                         }
747
748                         /* remap xi so we can reuse it */
749                         xi = (xi - prev_cdf)/(step_cdf - prev_cdf);
750
751                         /* pdf for picking step */
752                         step_pdf = step->cdf_distance - prev_cdf_distance;
753                 }
754
755                 /* determine range in which we will sample */
756                 float step_t = step->t - prev_t;
757
758                 /* sample distance and compute transmittance */
759                 float3 distance_pdf;
760                 sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
761                 pdf = average(distance_pdf * step_pdf);
762         }
763         /* equi-angular sampling */
764         else {
765                 /* pick position on light */
766                 float3 light_P;
767                 if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P))
768                         return VOLUME_PATH_MISSED;
769
770                 /* sample distance */
771                 sample_t = kernel_volume_equiangular_sample(ray, light_P, xi, &pdf);
772
773                 /* find step in which sampled distance is located */
774                 step = segment->steps;
775
776                 float prev_t = 0.0f;
777
778                 if(segment->numsteps > 1) {
779                         /* todo: optimize using binary search */
780                         for(int i = 0; i < segment->numsteps-1; i++, step++) {
781                                 if(sample_t < step->t)
782                                         break;
783
784                                 prev_t = step->t;
785                         }
786                 }
787                 
788                 /* compute transmittance */
789                 transmittance = volume_color_transmittance(step->sigma_t, sample_t - prev_t);
790         }
791
792         /* compute transmittance up to this step */
793         if(step != segment->steps)
794                 transmittance *= (step-1)->accum_transmittance;
795
796         /* modify throughput */
797         *throughput *= step->sigma_s * transmittance / pdf;
798
799         /* evaluate shader to create closures at shading point */
800         if(segment->numsteps > 1) {
801                 sd->P = ray->P + step->shade_t*ray->D;
802
803                 VolumeShaderCoefficients coeff;
804                 volume_shader_sample(kg, sd, state, sd->P, &coeff);
805         }
806
807         /* move to new position */
808         sd->P = ray->P + sample_t*ray->D;
809
810         return VOLUME_PATH_SCATTERED;
811 }
812
813 #endif
814
815 /* get the volume attenuation and emission over line segment defined by
816  * ray, with the assumption that there are no surfaces blocking light
817  * between the endpoints */
818 ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
819         PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng)
820 {
821         /* workaround to fix correlation bug in T38710, can find better solution
822          * in random number generator later, for now this is done here to not impact
823          * performance of rendering without volumes */
824         RNG tmp_rng = cmj_hash(*rng, state->rng_offset);
825         bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
826
827 #if 0
828         /* debugging code to compare decoupled ray marching */
829         VolumeSegment segment;
830
831         shader_setup_from_volume(kg, sd, ray, state->bounce, state->transparent_bounce);
832         kernel_volume_decoupled_record(kg, state, ray, sd, &segment, heterogeneous);
833
834         VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, state, ray, sd, throughput, &tmp_rng, &segment);
835
836         kernel_volume_decoupled_free(kg, &segment);
837
838         return result;
839 #else
840         shader_setup_from_volume(kg, sd, ray, state->bounce, state->transparent_bounce);
841
842         if(heterogeneous)
843                 return kernel_volume_integrate_heterogeneous(kg, state, ray, sd, L, throughput, &tmp_rng);
844         else
845                 return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng);
846 #endif
847 }
848
849 /* Volume Stack
850  *
851  * This is an array of object/shared ID's that the current segment of the path
852  * is inside of. */
853
854 ccl_device void kernel_volume_stack_init(KernelGlobals *kg, VolumeStack *stack)
855 {
856         /* todo: this assumes camera is always in air, need to detect when it isn't */
857         if(kernel_data.background.volume_shader == SHADER_NONE) {
858                 stack[0].shader = SHADER_NONE;
859         }
860         else {
861                 stack[0].shader = kernel_data.background.volume_shader;
862                 stack[0].object = PRIM_NONE;
863                 stack[1].shader = SHADER_NONE;
864         }
865 }
866
867 ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, VolumeStack *stack)
868 {
869         /* todo: we should have some way for objects to indicate if they want the
870          * world shader to work inside them. excluding it by default is problematic
871          * because non-volume objects can't be assumed to be closed manifolds */
872
873         if(!(sd->flag & SD_HAS_VOLUME))
874                 return;
875         
876         if(sd->flag & SD_BACKFACING) {
877                 /* exit volume object: remove from stack */
878                 for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
879                         if(stack[i].object == sd->object) {
880                                 /* shift back next stack entries */
881                                 do {
882                                         stack[i] = stack[i+1];
883                                         i++;
884                                 }
885                                 while(stack[i].shader != SHADER_NONE);
886
887                                 return;
888                         }
889                 }
890         }
891         else {
892                 /* enter volume object: add to stack */
893                 int i;
894
895                 for(i = 0; stack[i].shader != SHADER_NONE; i++) {
896                         /* already in the stack? then we have nothing to do */
897                         if(stack[i].object == sd->object)
898                                 return;
899                 }
900
901                 /* if we exceed the stack limit, ignore */
902                 if(i >= VOLUME_STACK_SIZE-1)
903                         return;
904
905                 /* add to the end of the stack */
906                 stack[i].shader = sd->shader;
907                 stack[i].object = sd->object;
908                 stack[i+1].shader = SHADER_NONE;
909         }
910 }
911
912 CCL_NAMESPACE_END
913