3d3d871a702fc449c5ded8cc762eb0586f703d08
[blender.git] / intern / cycles / kernel / kernel_volume.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Events for probalistic scattering */
20
21 typedef enum VolumeIntegrateResult {
22         VOLUME_PATH_SCATTERED = 0,
23         VOLUME_PATH_ATTENUATED = 1,
24         VOLUME_PATH_MISSED = 2
25 } VolumeIntegrateResult;
26
27 /* Volume shader properties
28  *
29  * extinction coefficient = absorption coefficient + scattering coefficient
30  * sigma_t = sigma_a + sigma_s */
31
32 typedef struct VolumeShaderCoefficients {
33         float3 sigma_a;
34         float3 sigma_s;
35         float3 emission;
36 } VolumeShaderCoefficients;
37
38 /* evaluate shader to get extinction coefficient at P */
39 ccl_device bool volume_shader_extinction_sample(KernelGlobals *kg, ShaderData *sd, PathState *state, float3 P, float3 *extinction)
40 {
41         sd->P = P;
42         shader_eval_volume(kg, sd, state->volume_stack, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
43
44         if(!(sd->flag & (SD_ABSORPTION|SD_SCATTER)))
45                 return false;
46
47         float3 sigma_t = make_float3(0.0f, 0.0f, 0.0f);
48
49         for(int i = 0; i < sd->num_closure; i++) {
50                 const ShaderClosure *sc = &sd->closure[i];
51
52                 if(CLOSURE_IS_VOLUME(sc->type))
53                         sigma_t += sc->weight;
54         }
55
56         *extinction = sigma_t;
57         return true;
58 }
59
60 /* evaluate shader to get absorption, scattering and emission at P */
61 ccl_device bool volume_shader_sample(KernelGlobals *kg, ShaderData *sd, PathState *state, float3 P, VolumeShaderCoefficients *coeff)
62 {
63         sd->P = P;
64         shader_eval_volume(kg, sd, state->volume_stack, state->flag, SHADER_CONTEXT_VOLUME);
65
66         if(!(sd->flag & (SD_ABSORPTION|SD_SCATTER|SD_EMISSION)))
67                 return false;
68         
69         coeff->sigma_a = make_float3(0.0f, 0.0f, 0.0f);
70         coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
71         coeff->emission = make_float3(0.0f, 0.0f, 0.0f);
72
73         for(int i = 0; i < sd->num_closure; i++) {
74                 const ShaderClosure *sc = &sd->closure[i];
75
76                 if(sc->type == CLOSURE_VOLUME_ABSORPTION_ID)
77                         coeff->sigma_a += sc->weight;
78                 else if(sc->type == CLOSURE_EMISSION_ID)
79                         coeff->emission += sc->weight;
80                 else if(CLOSURE_IS_VOLUME(sc->type))
81                         coeff->sigma_s += sc->weight;
82         }
83
84         /* when at the max number of bounces, treat scattering as absorption */
85         if(sd->flag & SD_SCATTER) {
86                 if(state->volume_bounce >= kernel_data.integrator.max_volume_bounce) {
87                         coeff->sigma_a += coeff->sigma_s;
88                         coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
89                         sd->flag &= ~SD_SCATTER;
90                         sd->flag |= SD_ABSORPTION;
91                 }
92         }
93
94         return true;
95 }
96
97 ccl_device float3 volume_color_transmittance(float3 sigma, float t)
98 {
99         return make_float3(expf(-sigma.x * t), expf(-sigma.y * t), expf(-sigma.z * t));
100 }
101
102 ccl_device float kernel_volume_channel_get(float3 value, int channel)
103 {
104         return (channel == 0)? value.x: ((channel == 1)? value.y: value.z);
105 }
106
107 ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, VolumeStack *stack)
108 {
109         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
110                 int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*2);
111
112                 if(shader_flag & SD_HETEROGENEOUS_VOLUME)
113                         return true;
114         }
115
116         return false;
117 }
118
119 ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack)
120 {
121         if(kernel_data.integrator.num_all_lights == 0)
122                 return 0;
123
124         int method = -1;
125
126         for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
127                 int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*2);
128
129                 if(shader_flag & SD_VOLUME_MIS) {
130                         return SD_VOLUME_MIS;
131                 }
132                 else if(shader_flag & SD_VOLUME_EQUIANGULAR) {
133                         if(method == 0)
134                                 return SD_VOLUME_MIS;
135
136                         method = SD_VOLUME_EQUIANGULAR;
137                 }
138                 else {
139                         if(method == SD_VOLUME_EQUIANGULAR)
140                                 return SD_VOLUME_MIS;
141
142                         method = 0;
143                 }
144         }
145
146         return method;
147 }
148
149 /* Volume Shadows
150  *
151  * These functions are used to attenuate shadow rays to lights. Both absorption
152  * and scattering will block light, represented by the extinction coefficient. */
153
154 /* homogeneous volume: assume shader evaluation at the starts gives
155  * the extinction coefficient for the entire line segment */
156 ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
157 {
158         float3 sigma_t;
159
160         if(volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
161                 *throughput *= volume_color_transmittance(sigma_t, ray->t);
162 }
163
164 /* heterogeneous volume: integrate stepping through the volume until we
165  * reach the end, get absorbed entirely, or run out of iterations */
166 ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
167 {
168         float3 tp = *throughput;
169         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
170
171         /* prepare for stepping */
172         int max_steps = kernel_data.integrator.volume_max_steps;
173         float step = kernel_data.integrator.volume_step_size;
174         float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step;
175
176         /* compute extinction at the start */
177         float t = 0.0f;
178
179         float3 sum = make_float3(0.0f, 0.0f, 0.0f);
180
181         for(int i = 0; i < max_steps; i++) {
182                 /* advance to new position */
183                 float new_t = min(ray->t, (i+1) * step);
184                 float dt = new_t - t;
185
186                 /* use random position inside this segment to sample shader */
187                 if(new_t == ray->t)
188                         random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
189
190                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
191                 float3 sigma_t;
192
193                 /* compute attenuation over segment */
194                 if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
195                         /* Compute expf() only for every Nth step, to save some calculations
196                          * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
197
198                         sum += (-sigma_t * (new_t - t));
199                         if((i & 0x07) == 0) { /* ToDo: Other interval? */
200                                 tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
201
202                                 /* stop if nearly all light is blocked */
203                                 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
204                                         break;
205                         }
206                 }
207
208                 /* stop if at the end of the volume */
209                 t = new_t;
210                 if(t == ray->t) {
211                         /* Update throughput in case we haven't done it above */
212                         tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
213                         break;
214                 }
215         }
216
217         *throughput = tp;
218 }
219
220 /* get the volume attenuation over line segment defined by ray, with the
221  * assumption that there are no surfaces blocking light between the endpoints */
222 ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, PathState *state, Ray *ray, float3 *throughput)
223 {
224         ShaderData sd;
225         shader_setup_from_volume(kg, &sd, ray, state->bounce, state->transparent_bounce);
226
227         if(volume_stack_is_heterogeneous(kg, state->volume_stack))
228                 kernel_volume_shadow_heterogeneous(kg, state, ray, &sd, throughput);
229         else
230                 kernel_volume_shadow_homogeneous(kg, state, ray, &sd, throughput);
231 }
232
233 /* Equi-angular sampling as in:
234  * "Importance Sampling Techniques for Path Tracing in Participating Media" */
235
236 ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf)
237 {
238         float t = ray->t;
239
240         float delta = dot((light_P - ray->P) , ray->D);
241         float D = sqrtf(len_squared(light_P - ray->P) - delta * delta);
242         float theta_a = -atan2f(delta, D);
243         float theta_b = atan2f(t - delta, D);
244         float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
245
246         *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
247
248         return min(t, delta + t_); /* min is only for float precision errors */
249 }
250
251 ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
252 {
253         float delta = dot((light_P - ray->P) , ray->D);
254         float D = sqrtf(len_squared(light_P - ray->P) - delta * delta);
255
256         float t = ray->t;
257         float t_ = sample_t - delta;
258
259         float theta_a = -atan2f(delta, D);
260         float theta_b = atan2f(t - delta, D);
261
262         float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
263
264         return pdf;
265 }
266
267 /* Distance sampling */
268
269 ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
270 {
271         /* xi is [0, 1[ so log(0) should never happen, division by zero is
272          * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
273         float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
274         float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
275         float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
276
277         float sample_t = min(max_t, -logf(1.0f - xi*(1.0f - sample_transmittance))/sample_sigma_t);
278
279         *transmittance = volume_color_transmittance(sigma_t, sample_t);
280         *pdf = (sigma_t * *transmittance)/(make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
281
282         /* todo: optimization: when taken together with hit/miss decision,
283          * the full_transmittance cancels out drops out and xi does not
284          * need to be remapped */
285
286         return sample_t;
287 }
288
289 ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
290 {
291         float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
292         float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
293
294         return (sigma_t * transmittance)/(make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
295 }
296
297 /* Emission */
298
299 ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff, int closure_flag, float3 transmittance, float t)
300 {
301         /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
302          * this goes to E * t as sigma_t goes to zero
303          *
304          * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
305         float3 emission = coeff->emission;
306
307         if(closure_flag & SD_ABSORPTION) {
308                 float3 sigma_t = coeff->sigma_a + coeff->sigma_s;
309
310                 emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t;
311                 emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t;
312                 emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t;
313         }
314         else
315                 emission *= t;
316         
317         return emission;
318 }
319
320 /* Volume Path */
321
322 /* homogeneous volume: assume shader evaluation at the start gives
323  * the volume shading coefficient for the entire line segment */
324 ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGlobals *kg,
325         PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput,
326         RNG *rng, bool probalistic_scatter)
327 {
328         VolumeShaderCoefficients coeff;
329
330         if(!volume_shader_sample(kg, sd, state, ray->P, &coeff))
331                 return VOLUME_PATH_MISSED;
332
333         int closure_flag = sd->flag;
334         float t = ray->t;
335         float3 new_tp;
336
337 #ifdef __VOLUME_SCATTER__
338         /* randomly scatter, and if we do t is shortened */
339         if(closure_flag & SD_SCATTER) {
340                 /* extinction coefficient */
341                 float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
342
343                 /* pick random color channel, we use the Veach one-sample
344                  * model with balance heuristic for the channels */
345                 float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
346                 int channel = (int)(rphase*3.0f);
347                 sd->randb_closure = rphase*3.0f - channel;
348
349                 /* decide if we will hit or miss */
350                 bool scatter = true;
351                 float xi = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
352
353                 if(probalistic_scatter) {
354                         float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
355                         float sample_transmittance = expf(-sample_sigma_t * t);
356
357                         if(1.0f - xi >= sample_transmittance) {
358                                 scatter = true;
359
360                                 /* rescale random number so we can reuse it */
361                                 xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
362
363                         }
364                         else
365                                 scatter = false;
366                 }
367
368                 if(scatter) {
369                         /* scattering */
370                         float3 pdf;
371                         float3 transmittance;
372                         float sample_t;
373
374                         /* distance sampling */
375                         sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf);
376
377                         /* modifiy pdf for hit/miss decision */
378                         if(probalistic_scatter)
379                                 pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t);
380
381                         new_tp = *throughput * coeff.sigma_s * transmittance / average(pdf);
382                         t = sample_t;
383                 }
384                 else {
385                         /* no scattering */
386                         float3 transmittance = volume_color_transmittance(sigma_t, t);
387                         float pdf = average(transmittance);
388                         new_tp = *throughput * transmittance / pdf;
389                 }
390         }
391         else 
392 #endif
393         if(closure_flag & SD_ABSORPTION) {
394                 /* absorption only, no sampling needed */
395                 float3 transmittance = volume_color_transmittance(coeff.sigma_a, t);
396                 new_tp = *throughput * transmittance;
397         }
398
399         /* integrate emission attenuated by extinction */
400         if(L && (closure_flag & SD_EMISSION)) {
401                 float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
402                 float3 transmittance = volume_color_transmittance(sigma_t, ray->t);
403                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t);
404                 path_radiance_accum_emission(L, *throughput, emission, state->bounce);
405         }
406
407         /* modify throughput */
408         if(closure_flag & (SD_ABSORPTION|SD_SCATTER)) {
409                 *throughput = new_tp;
410
411                 /* prepare to scatter to new direction */
412                 if(t < ray->t) {
413                         /* adjust throughput and move to new location */
414                         sd->P = ray->P + t*ray->D;
415
416                         return VOLUME_PATH_SCATTERED;
417                 }
418         }
419
420         return VOLUME_PATH_ATTENUATED;
421 }
422
423 /* heterogeneous volume distance sampling: integrate stepping through the
424  * volume until we reach the end, get absorbed entirely, or run out of
425  * iterations. this does probalistically scatter or get transmitted through
426  * for path tracing where we don't want to branch. */
427 ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
428         PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput, RNG *rng)
429 {
430         float3 tp = *throughput;
431         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
432
433         /* prepare for stepping */
434         int max_steps = kernel_data.integrator.volume_max_steps;
435         float step_size = kernel_data.integrator.volume_step_size;
436         float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
437
438         /* compute coefficients at the start */
439         float t = 0.0f;
440         float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
441
442         /* pick random color channel, we use the Veach one-sample
443          * model with balance heuristic for the channels */
444         float xi = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
445         float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
446         int channel = (int)(rphase*3.0f);
447         sd->randb_closure = rphase*3.0f - channel;
448         bool has_scatter = false;
449
450         for(int i = 0; i < max_steps; i++) {
451                 /* advance to new position */
452                 float new_t = min(ray->t, (i+1) * step_size);
453                 float dt = new_t - t;
454
455                 /* use random position inside this segment to sample shader */
456                 if(new_t == ray->t)
457                         random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
458
459                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
460                 VolumeShaderCoefficients coeff;
461
462                 /* compute segment */
463                 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
464                         int closure_flag = sd->flag;
465                         float3 new_tp;
466                         float3 transmittance;
467                         bool scatter = false;
468
469                         /* distance sampling */
470 #ifdef __VOLUME_SCATTER__
471                         if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_ABSORPTION))) {
472                                 has_scatter = true;
473
474                                 float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
475                                 float3 sigma_s = coeff.sigma_s;
476
477                                 /* compute transmittance over full step */
478                                 transmittance = volume_color_transmittance(sigma_t, dt);
479
480                                 /* decide if we will scatter or continue */
481                                 float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
482
483                                 if(1.0f - xi >= sample_transmittance) {
484                                         /* compute sampling distance */
485                                         float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
486                                         float new_dt = -logf(1.0f - xi)/sample_sigma_t;
487                                         new_t = t + new_dt;
488
489                                         /* transmittance and pdf */
490                                         float3 new_transmittance = volume_color_transmittance(sigma_t, new_dt);
491                                         float3 pdf = sigma_t * new_transmittance;
492
493                                         /* throughput */
494                                         new_tp = tp * sigma_s * new_transmittance / average(pdf);
495                                         scatter = true;
496                                 }
497                                 else {
498                                         /* throughput */
499                                         float pdf = average(transmittance);
500                                         new_tp = tp * transmittance / pdf;
501
502                                         /* remap xi so we can reuse it and keep thing stratified */
503                                         xi = 1.0f - (1.0f - xi)/sample_transmittance;
504                                 }
505                         }
506                         else 
507 #endif
508                         if(closure_flag & SD_ABSORPTION) {
509                                 /* absorption only, no sampling needed */
510                                 float3 sigma_a = coeff.sigma_a;
511
512                                 transmittance = volume_color_transmittance(sigma_a, dt);
513                                 new_tp = tp * transmittance;
514                         }
515
516                         /* integrate emission attenuated by absorption */
517                         if(L && (closure_flag & SD_EMISSION)) {
518                                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
519                                 path_radiance_accum_emission(L, tp, emission, state->bounce);
520                         }
521
522                         /* modify throughput */
523                         if(closure_flag & (SD_ABSORPTION|SD_SCATTER)) {
524                                 tp = new_tp;
525
526                                 /* stop if nearly all light blocked */
527                                 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
528                                         tp = make_float3(0.0f, 0.0f, 0.0f);
529                                         break;
530                                 }
531                         }
532
533                         /* prepare to scatter to new direction */
534                         if(scatter) {
535                                 /* adjust throughput and move to new location */
536                                 sd->P = ray->P + new_t*ray->D;
537                                 *throughput = tp;
538
539                                 return VOLUME_PATH_SCATTERED;
540                         }
541                         else {
542                                 /* accumulate transmittance */
543                                 accum_transmittance *= transmittance;
544                         }
545                 }
546
547                 /* stop if at the end of the volume */
548                 t = new_t;
549                 if(t == ray->t)
550                         break;
551         }
552
553         *throughput = tp;
554
555         return VOLUME_PATH_ATTENUATED;
556 }
557
558 /* get the volume attenuation and emission over line segment defined by
559  * ray, with the assumption that there are no surfaces blocking light
560  * between the endpoints. distance sampling is used to decide if we will
561  * scatter or not. */
562 ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
563         PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, bool heterogeneous)
564 {
565         /* workaround to fix correlation bug in T38710, can find better solution
566          * in random number generator later, for now this is done here to not impact
567          * performance of rendering without volumes */
568         RNG tmp_rng = cmj_hash(*rng, state->rng_offset);
569
570         shader_setup_from_volume(kg, sd, ray, state->bounce, state->transparent_bounce);
571
572         if(heterogeneous)
573                 return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput, &tmp_rng);
574         else
575                 return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng, true);
576 }
577
578 /* Decoupled Volume Sampling
579  *
580  * VolumeSegment is list of coefficients and transmittance stored at all steps
581  * through a volume. This can then latter be used for decoupled sampling as in:
582  * "Importance Sampling Techniques for Path Tracing in Participating Media"
583  *
584  * On the GPU this is only supported (but currently not enabled)
585  * for homogeneous volumes (1 step), due to
586  * no support for malloc/free and too much stack usage with a fix size array. */
587
588 typedef struct VolumeStep {
589         float3 sigma_s;                         /* scatter coefficient */
590         float3 sigma_t;                         /* extinction coefficient */
591         float3 accum_transmittance;     /* accumulated transmittance including this step */
592         float3 cdf_distance;            /* cumulative density function for distance sampling */
593         float t;                                        /* distance at end of this step */
594         float shade_t;                          /* jittered distance where shading was done in step */
595         int closure_flag;                       /* shader evaluation closure flags */
596 } VolumeStep;
597
598 typedef struct VolumeSegment {
599         VolumeStep stack_step;      /* stack storage for homogeneous step, to avoid malloc */
600         VolumeStep *steps;                      /* recorded steps */
601         int numsteps;                           /* number of steps */
602         int closure_flag;                       /* accumulated closure flags from all steps */
603
604         float3 accum_emission;          /* accumulated emission at end of segment */
605         float3 accum_transmittance;     /* accumulated transmittance at end of segment */
606
607         int sampling_method;            /* volume sampling method */
608 } VolumeSegment;
609
610 /* record volume steps to the end of the volume.
611  *
612  * it would be nice if we could only record up to the point that we need to scatter,
613  * but the entire segment is needed to do always scattering, rather than probalistically
614  * hitting or missing the volume. if we don't know the transmittance at the end of the
615  * volume we can't generate stratified distance samples up to that transmittance */
616 ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
617         Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
618 {
619         const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
620
621         /* prepare for volume stepping */
622         int max_steps;
623         float step_size, random_jitter_offset;
624
625         if(heterogeneous) {
626                 const int global_max_steps = kernel_data.integrator.volume_max_steps;
627                 step_size = kernel_data.integrator.volume_step_size;
628                 /* compute exact steps in advance for malloc */
629                 max_steps = max((int)ceilf(ray->t/step_size), 1);
630                 if(max_steps > global_max_steps) {
631                         max_steps = global_max_steps;
632                         step_size = ray->t / (float)max_steps;
633                 }
634                 segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
635                 random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
636         }
637         else {
638                 max_steps = 1;
639                 step_size = ray->t;
640                 random_jitter_offset = 0.0f;
641                 segment->steps = &segment->stack_step;
642         }
643         
644         /* init accumulation variables */
645         float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
646         float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
647         float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
648         float t = 0.0f;
649
650         segment->numsteps = 0;
651         segment->closure_flag = 0;
652         bool is_last_step_empty = false;
653
654         VolumeStep *step = segment->steps;
655
656         for(int i = 0; i < max_steps; i++, step++) {
657                 /* advance to new position */
658                 float new_t = min(ray->t, (i+1) * step_size);
659                 float dt = new_t - t;
660
661                 /* use random position inside this segment to sample shader */
662                 if(heterogeneous && new_t == ray->t)
663                         random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
664
665                 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
666                 VolumeShaderCoefficients coeff;
667
668                 /* compute segment */
669                 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
670                         int closure_flag = sd->flag;
671                         float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
672
673                         /* compute accumulated transmittance */
674                         float3 transmittance = volume_color_transmittance(sigma_t, dt);
675
676                         /* compute emission attenuated by absorption */
677                         if(closure_flag & SD_EMISSION) {
678                                 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
679                                 accum_emission += accum_transmittance * emission;
680                         }
681
682                         accum_transmittance *= transmittance;
683
684                         /* compute pdf for distance sampling */
685                         float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
686                         cdf_distance = cdf_distance + pdf_distance;
687
688                         /* write step data */
689                         step->sigma_t = sigma_t;
690                         step->sigma_s = coeff.sigma_s;
691                         step->closure_flag = closure_flag;
692
693                         segment->closure_flag |= closure_flag;
694
695                         is_last_step_empty = false;
696                         segment->numsteps++;
697                 }
698                 else {
699                         if(is_last_step_empty) {
700                                 /* consecutive empty step, merge */
701                                 step--;
702                         }
703                         else {
704                                 /* store empty step */
705                                 step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
706                                 step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
707                                 step->closure_flag = 0;
708
709                                 segment->numsteps++;
710                                 is_last_step_empty = true;
711                         }
712                 }
713
714                 step->accum_transmittance = accum_transmittance;
715                 step->cdf_distance = cdf_distance;
716                 step->t = new_t;
717                 step->shade_t = t + random_jitter_offset;
718
719                 /* stop if at the end of the volume */
720                 t = new_t;
721                 if(t == ray->t)
722                         break;
723
724                 /* stop if nearly all light blocked */
725                 if(accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps && accum_transmittance.z < tp_eps)
726                         break;
727         }
728
729         /* store total emission and transmittance */
730         segment->accum_emission = accum_emission;
731         segment->accum_transmittance = accum_transmittance;
732
733         /* normalize cumulative density function for distance sampling */
734         VolumeStep *last_step = segment->steps + segment->numsteps - 1;
735
736         if(!is_zero(last_step->cdf_distance)) {
737                 VolumeStep *step = &segment->steps[0];
738                 int numsteps = segment->numsteps;
739                 float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
740
741                 for(int i = 0; i < numsteps; i++, step++)
742                         step->cdf_distance *= inv_cdf_distance_sum;
743         }
744 }
745
746 ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
747 {
748         if(segment->steps != &segment->stack_step)
749                 free(segment->steps);
750 }
751
752 /* scattering for homogeneous and heterogeneous volumes, using decoupled ray
753  * marching. this function does not do emission or modify throughput. 
754  *
755  * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */
756 ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
757         KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
758         float3 *throughput, float rphase, float rscatter,
759         const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter)
760 {
761         kernel_assert(segment->closure_flag & SD_SCATTER);
762
763         /* pick random color channel, we use the Veach one-sample
764          * model with balance heuristic for the channels */
765         int channel = (int)(rphase*3.0f);
766         sd->randb_closure = rphase*3.0f - channel;
767         float xi = rscatter;
768
769         /* probalistic scattering decision based on transmittance */
770         if(probalistic_scatter) {
771                 float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
772
773                 if(1.0f - xi >= sample_transmittance) {
774                         /* rescale random number so we can reuse it */
775                         xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
776                 }
777                 else {
778                         *throughput /= sample_transmittance;
779                         return VOLUME_PATH_MISSED;
780                 }
781         }
782
783         VolumeStep *step;
784         float3 transmittance;
785         float pdf, sample_t;
786         float mis_weight = 1.0f;
787         bool distance_sample = true;
788         bool use_mis = false;
789
790         if(segment->sampling_method && light_P) {
791                 if(segment->sampling_method == SD_VOLUME_MIS) {
792                         /* multiple importance sample: randomly pick between
793                          * equiangular and distance sampling strategy */
794                         if(xi < 0.5f) {
795                                 xi *= 2.0f;
796                         }
797                         else {
798                                 xi = (xi - 0.5f)*2.0f;
799                                 distance_sample = false;
800                         }
801
802                         use_mis = true;
803                 }
804                 else {
805                         /* only equiangular sampling */
806                         distance_sample = false;
807                 }
808         }
809
810         /* distance sampling */
811         if(distance_sample) {
812                 /* find step in cdf */
813                 step = segment->steps;
814
815                 float prev_t = 0.0f;
816                 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
817
818                 if(segment->numsteps > 1) {
819                         float prev_cdf = 0.0f;
820                         float step_cdf = 1.0f;
821                         float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
822
823                         for(int i = 0; ; i++, step++) {
824                                 /* todo: optimize using binary search */
825                                 step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
826
827                                 if(xi < step_cdf || i == segment->numsteps-1)
828                                         break;
829
830                                 prev_cdf = step_cdf;
831                                 prev_t = step->t;
832                                 prev_cdf_distance = step->cdf_distance;
833                         }
834
835                         /* remap xi so we can reuse it */
836                         xi = (xi - prev_cdf)/(step_cdf - prev_cdf);
837
838                         /* pdf for picking step */
839                         step_pdf_distance = step->cdf_distance - prev_cdf_distance;
840                 }
841
842                 /* determine range in which we will sample */
843                 float step_t = step->t - prev_t;
844
845                 /* sample distance and compute transmittance */
846                 float3 distance_pdf;
847                 sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
848
849                 /* modifiy pdf for hit/miss decision */
850                 if(probalistic_scatter)
851                         distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
852
853                 pdf = average(distance_pdf * step_pdf_distance);
854
855                 /* multiple importance sampling */
856                 if(use_mis) {
857                         float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
858                         mis_weight = 2.0f*power_heuristic(pdf, equi_pdf);
859                 }
860         }
861         /* equi-angular sampling */
862         else {
863                 /* sample distance */
864                 sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
865
866                 /* find step in which sampled distance is located */
867                 step = segment->steps;
868
869                 float prev_t = 0.0f;
870                 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
871
872                 if(segment->numsteps > 1) {
873                         float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
874
875                         int numsteps = segment->numsteps;
876                         int high = numsteps - 1;
877                         int low = 0;
878                         int mid;
879
880                         while(low < high) {
881                                 mid = (low + high) >> 1;
882
883                                 if(sample_t < step[mid].t)
884                                         high = mid;
885                                 else if(sample_t >= step[mid + 1].t)
886                                         low = mid + 1;
887                                 else {
888                                         /* found our interval in step[mid] .. step[mid+1] */
889                                         prev_t = step[mid].t;
890                                         prev_cdf_distance = step[mid].cdf_distance;
891                                         step += mid+1;
892                                         break;
893                                 }
894                         }
895
896                         if(low >= numsteps - 1) {
897                                 prev_t = step[numsteps - 1].t;
898                                 prev_cdf_distance = step[numsteps-1].cdf_distance;
899                                 step += numsteps - 1;
900                         }
901
902                         /* pdf for picking step with distance sampling */
903                         step_pdf_distance = step->cdf_distance - prev_cdf_distance;
904                 }
905
906                 /* determine range in which we will sample */
907                 float step_t = step->t - prev_t;
908                 float step_sample_t = sample_t - prev_t;
909
910                 /* compute transmittance */
911                 transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
912
913                 /* multiple importance sampling */
914                 if(use_mis) {
915                         float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
916                         float distance_pdf = average(distance_pdf3 * step_pdf_distance);
917                         mis_weight = 2.0f*power_heuristic(pdf, distance_pdf);
918                 }
919         }
920
921         /* compute transmittance up to this step */
922         if(step != segment->steps)
923                 transmittance *= (step-1)->accum_transmittance;
924
925         /* modify throughput */
926         *throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
927
928         /* evaluate shader to create closures at shading point */
929         if(segment->numsteps > 1) {
930                 sd->P = ray->P + step->shade_t*ray->D;
931
932                 VolumeShaderCoefficients coeff;
933                 volume_shader_sample(kg, sd, state, sd->P, &coeff);
934         }
935
936         /* move to new position */
937         sd->P = ray->P + sample_t*ray->D;
938
939         return VOLUME_PATH_SCATTERED;
940 }
941
942 /* decide if we need to use decoupled or not */
943 ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method)
944 {
945         /* decoupled ray marching for heterogenous volumes not supported on the GPU,
946          * which also means equiangular and multiple importance sampling is not
947          * support for that case */
948 #ifdef __KERNEL_GPU__
949         if(heterogeneous)
950                 return false;
951 #endif
952
953         /* equiangular and multiple importance sampling only implemented for decoupled */
954         if(sampling_method != 0)
955                 return true;
956
957         /* for all light sampling use decoupled, reusing shader evaluations is
958          * typically faster in that case */
959         if(direct)
960                 return kernel_data.integrator.sample_all_lights_direct;
961         else
962                 return kernel_data.integrator.sample_all_lights_indirect;
963 }
964
965 /* Volume Stack
966  *
967  * This is an array of object/shared ID's that the current segment of the path
968  * is inside of. */
969
970 ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
971                                          Ray *ray,
972                                          VolumeStack *stack)
973 {
974         /* NULL ray happens in the baker, does it need proper initialization of
975          * camera in volume?
976          */
977         if(!kernel_data.cam.is_inside_volume || ray == NULL) {
978                 /* Camera is guaranteed to be in the air, only take background volume
979                  * into account in this case.
980                  */
981                 if(kernel_data.background.volume_shader != SHADER_NONE) {
982                         stack[0].shader = kernel_data.background.volume_shader;
983                         stack[0].object = PRIM_NONE;
984                         stack[1].shader = SHADER_NONE;
985                 }
986                 else {
987                         stack[0].shader = SHADER_NONE;
988                 }
989                 return;
990         }
991
992         Ray volume_ray = *ray;
993         volume_ray.t = FLT_MAX;
994
995         int stack_index = 0, enclosed_index = 0;
996         int enclosed_volumes[VOLUME_STACK_SIZE];
997         int step = 0;
998
999         while(stack_index < VOLUME_STACK_SIZE - 1 &&
1000               enclosed_index < VOLUME_STACK_SIZE - 1 &&
1001               step < 2 * VOLUME_STACK_SIZE)
1002         {
1003                 Intersection isect;
1004                 if(!scene_intersect_volume(kg, &volume_ray, &isect)) {
1005                         break;
1006                 }
1007
1008                 ShaderData sd;
1009                 shader_setup_from_ray(kg, &sd, &isect, &volume_ray, 0, 0);
1010                 if(sd.flag & SD_BACKFACING) {
1011                         /* If ray exited the volume and never entered to that volume
1012                          * it means that camera is inside such a volume.
1013                          */
1014                         bool is_enclosed = false;
1015                         for(int i = 0; i < enclosed_index; ++i) {
1016                                 if(enclosed_volumes[i] == sd.object) {
1017                                         is_enclosed = true;
1018                                         break;
1019                                 }
1020                         }
1021                         if(is_enclosed == false) {
1022                                 stack[stack_index].object = sd.object;
1023                                 stack[stack_index].shader = sd.shader;
1024                                 ++stack_index;
1025                         }
1026                 }
1027                 else {
1028                         /* If ray from camera enters the volume, this volume shouldn't
1029                          * be added to the stack on exit.
1030                          */
1031                         enclosed_volumes[enclosed_index++] = sd.object;
1032                 }
1033
1034                 /* Move ray forward. */
1035                 volume_ray.P = ray_offset(sd.P, -sd.Ng);
1036                 ++step;
1037         }
1038         /* stack_index of 0 means quick checks outside of the kernel gave false
1039          * positive, nothing to worry about, just we've wasted quite a few of
1040          * ticks just to come into conclusion that camera is in the air.
1041          *
1042          * In this case we're doing the same above -- check whether background has
1043          * volume.
1044          */
1045         if(stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
1046                 stack[0].shader = kernel_data.background.volume_shader;
1047                 stack[0].object = PRIM_NONE;
1048                 stack[1].shader = SHADER_NONE;
1049         }
1050         else {
1051                 stack[stack_index].shader = SHADER_NONE;
1052         }
1053 }
1054
1055 ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, VolumeStack *stack)
1056 {
1057         /* todo: we should have some way for objects to indicate if they want the
1058          * world shader to work inside them. excluding it by default is problematic
1059          * because non-volume objects can't be assumed to be closed manifolds */
1060
1061         if(!(sd->flag & SD_HAS_VOLUME))
1062                 return;
1063         
1064         if(sd->flag & SD_BACKFACING) {
1065                 /* exit volume object: remove from stack */
1066                 for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1067                         if(stack[i].object == sd->object) {
1068                                 /* shift back next stack entries */
1069                                 do {
1070                                         stack[i] = stack[i+1];
1071                                         i++;
1072                                 }
1073                                 while(stack[i].shader != SHADER_NONE);
1074
1075                                 return;
1076                         }
1077                 }
1078         }
1079         else {
1080                 /* enter volume object: add to stack */
1081                 int i;
1082
1083                 for(i = 0; stack[i].shader != SHADER_NONE; i++) {
1084                         /* already in the stack? then we have nothing to do */
1085                         if(stack[i].object == sd->object)
1086                                 return;
1087                 }
1088
1089                 /* if we exceed the stack limit, ignore */
1090                 if(i >= VOLUME_STACK_SIZE-1)
1091                         return;
1092
1093                 /* add to the end of the stack */
1094                 stack[i].shader = sd->shader;
1095                 stack[i].object = sd->object;
1096                 stack[i+1].shader = SHADER_NONE;
1097         }
1098 }
1099
1100 #ifdef __SUBSURFACE__
1101 ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
1102                                                           Ray *ray,
1103                                                           VolumeStack *stack)
1104 {
1105         kernel_assert(kernel_data.integrator.use_volumes);
1106
1107         Ray volume_ray = *ray;
1108         Intersection isect;
1109         int step = 0;
1110         while(step < VOLUME_STACK_SIZE &&
1111                   scene_intersect_volume(kg, &volume_ray, &isect))
1112         {
1113                 ShaderData sd;
1114                 shader_setup_from_ray(kg, &sd, &isect, &volume_ray, 0, 0);
1115                 kernel_volume_stack_enter_exit(kg, &sd, stack);
1116
1117                 /* Move ray forward. */
1118                 volume_ray.P = ray_offset(sd.P, -sd.Ng);
1119                 volume_ray.t -= sd.ray_length;
1120                 ++step;
1121         }
1122 }
1123 #endif
1124
1125 CCL_NAMESPACE_END