2 * Copyright 2011-2013 Blender Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 /* Events for probalistic scattering */
21 typedef enum VolumeIntegrateResult {
22 VOLUME_PATH_SCATTERED = 0,
23 VOLUME_PATH_ATTENUATED = 1,
24 VOLUME_PATH_MISSED = 2
25 } VolumeIntegrateResult;
27 /* Volume shader properties
29 * extinction coefficient = absorption coefficient + scattering coefficient
30 * sigma_t = sigma_a + sigma_s */
32 typedef struct VolumeShaderCoefficients {
36 } VolumeShaderCoefficients;
38 /* evaluate shader to get extinction coefficient at P */
39 ccl_device bool volume_shader_extinction_sample(KernelGlobals *kg, ShaderData *sd, PathState *state, float3 P, float3 *extinction)
42 shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
44 if(!(sd->flag & (SD_ABSORPTION|SD_SCATTER)))
47 float3 sigma_t = make_float3(0.0f, 0.0f, 0.0f);
49 for(int i = 0; i < sd->num_closure; i++) {
50 const ShaderClosure *sc = &sd->closure[i];
52 if(CLOSURE_IS_VOLUME(sc->type))
53 sigma_t += sc->weight;
56 *extinction = sigma_t;
60 /* evaluate shader to get absorption, scattering and emission at P */
61 ccl_device bool volume_shader_sample(KernelGlobals *kg, ShaderData *sd, PathState *state, float3 P, VolumeShaderCoefficients *coeff)
64 shader_eval_volume(kg, sd, state, state->volume_stack, state->flag, SHADER_CONTEXT_VOLUME);
66 if(!(sd->flag & (SD_ABSORPTION|SD_SCATTER|SD_EMISSION)))
69 coeff->sigma_a = make_float3(0.0f, 0.0f, 0.0f);
70 coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
71 coeff->emission = make_float3(0.0f, 0.0f, 0.0f);
73 for(int i = 0; i < sd->num_closure; i++) {
74 const ShaderClosure *sc = &sd->closure[i];
76 if(sc->type == CLOSURE_VOLUME_ABSORPTION_ID)
77 coeff->sigma_a += sc->weight;
78 else if(sc->type == CLOSURE_EMISSION_ID)
79 coeff->emission += sc->weight;
80 else if(CLOSURE_IS_VOLUME(sc->type))
81 coeff->sigma_s += sc->weight;
84 /* when at the max number of bounces, treat scattering as absorption */
85 if(sd->flag & SD_SCATTER) {
86 if(state->volume_bounce >= kernel_data.integrator.max_volume_bounce) {
87 coeff->sigma_a += coeff->sigma_s;
88 coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
89 sd->flag &= ~SD_SCATTER;
90 sd->flag |= SD_ABSORPTION;
97 ccl_device float3 volume_color_transmittance(float3 sigma, float t)
99 return make_float3(expf(-sigma.x * t), expf(-sigma.y * t), expf(-sigma.z * t));
102 ccl_device float kernel_volume_channel_get(float3 value, int channel)
104 return (channel == 0)? value.x: ((channel == 1)? value.y: value.z);
107 ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, VolumeStack *stack)
109 for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
110 int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*2);
112 if(shader_flag & SD_HETEROGENEOUS_VOLUME)
119 ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack)
121 if(kernel_data.integrator.num_all_lights == 0)
126 for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
127 int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*2);
129 if(shader_flag & SD_VOLUME_MIS) {
130 return SD_VOLUME_MIS;
132 else if(shader_flag & SD_VOLUME_EQUIANGULAR) {
134 return SD_VOLUME_MIS;
136 method = SD_VOLUME_EQUIANGULAR;
139 if(method == SD_VOLUME_EQUIANGULAR)
140 return SD_VOLUME_MIS;
151 * These functions are used to attenuate shadow rays to lights. Both absorption
152 * and scattering will block light, represented by the extinction coefficient. */
154 /* homogeneous volume: assume shader evaluation at the starts gives
155 * the extinction coefficient for the entire line segment */
156 ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
160 if(volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
161 *throughput *= volume_color_transmittance(sigma_t, ray->t);
164 /* heterogeneous volume: integrate stepping through the volume until we
165 * reach the end, get absorbed entirely, or run out of iterations */
166 ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
168 float3 tp = *throughput;
169 const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
171 /* prepare for stepping */
172 int max_steps = kernel_data.integrator.volume_max_steps;
173 float step = kernel_data.integrator.volume_step_size;
174 float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step;
176 /* compute extinction at the start */
179 float3 sum = make_float3(0.0f, 0.0f, 0.0f);
181 for(int i = 0; i < max_steps; i++) {
182 /* advance to new position */
183 float new_t = min(ray->t, (i+1) * step);
184 float dt = new_t - t;
186 /* use random position inside this segment to sample shader */
188 random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
190 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
193 /* compute attenuation over segment */
194 if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
195 /* Compute expf() only for every Nth step, to save some calculations
196 * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
198 sum += (-sigma_t * (new_t - t));
199 if((i & 0x07) == 0) { /* ToDo: Other interval? */
200 tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
202 /* stop if nearly all light is blocked */
203 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
208 /* stop if at the end of the volume */
211 /* Update throughput in case we haven't done it above */
212 tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z));
220 /* get the volume attenuation over line segment defined by ray, with the
221 * assumption that there are no surfaces blocking light between the endpoints */
222 ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, PathState *state, Ray *ray, float3 *throughput)
225 shader_setup_from_volume(kg, &sd, ray);
227 if(volume_stack_is_heterogeneous(kg, state->volume_stack))
228 kernel_volume_shadow_heterogeneous(kg, state, ray, &sd, throughput);
230 kernel_volume_shadow_homogeneous(kg, state, ray, &sd, throughput);
233 /* Equi-angular sampling as in:
234 * "Importance Sampling Techniques for Path Tracing in Participating Media" */
236 ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf)
240 float delta = dot((light_P - ray->P) , ray->D);
241 float D = sqrtf(len_squared(light_P - ray->P) - delta * delta);
242 float theta_a = -atan2f(delta, D);
243 float theta_b = atan2f(t - delta, D);
244 float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
246 *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
248 return min(t, delta + t_); /* min is only for float precision errors */
251 ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
253 float delta = dot((light_P - ray->P) , ray->D);
254 float D = sqrtf(len_squared(light_P - ray->P) - delta * delta);
257 float t_ = sample_t - delta;
259 float theta_a = -atan2f(delta, D);
260 float theta_b = atan2f(t - delta, D);
262 float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
267 /* Distance sampling */
269 ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
271 /* xi is [0, 1[ so log(0) should never happen, division by zero is
272 * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
273 float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
274 float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
275 float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
277 float sample_t = min(max_t, -logf(1.0f - xi*(1.0f - sample_transmittance))/sample_sigma_t);
279 *transmittance = volume_color_transmittance(sigma_t, sample_t);
280 *pdf = (sigma_t * *transmittance)/(make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
282 /* todo: optimization: when taken together with hit/miss decision,
283 * the full_transmittance cancels out drops out and xi does not
284 * need to be remapped */
289 ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
291 float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
292 float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
294 return (sigma_t * transmittance)/(make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
299 ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff, int closure_flag, float3 transmittance, float t)
301 /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
302 * this goes to E * t as sigma_t goes to zero
304 * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
305 float3 emission = coeff->emission;
307 if(closure_flag & SD_ABSORPTION) {
308 float3 sigma_t = coeff->sigma_a + coeff->sigma_s;
310 emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t;
311 emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t;
312 emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t;
322 /* homogeneous volume: assume shader evaluation at the start gives
323 * the volume shading coefficient for the entire line segment */
324 ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGlobals *kg,
325 PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput,
326 RNG *rng, bool probalistic_scatter)
328 VolumeShaderCoefficients coeff;
330 if(!volume_shader_sample(kg, sd, state, ray->P, &coeff))
331 return VOLUME_PATH_MISSED;
333 int closure_flag = sd->flag;
337 #ifdef __VOLUME_SCATTER__
338 /* randomly scatter, and if we do t is shortened */
339 if(closure_flag & SD_SCATTER) {
340 /* extinction coefficient */
341 float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
343 /* pick random color channel, we use the Veach one-sample
344 * model with balance heuristic for the channels */
345 float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
346 int channel = (int)(rphase*3.0f);
347 sd->randb_closure = rphase*3.0f - channel;
349 /* decide if we will hit or miss */
351 float xi = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
353 if(probalistic_scatter) {
354 float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
355 float sample_transmittance = expf(-sample_sigma_t * t);
357 if(1.0f - xi >= sample_transmittance) {
360 /* rescale random number so we can reuse it */
361 xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
371 float3 transmittance;
374 /* distance sampling */
375 sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf);
377 /* modify pdf for hit/miss decision */
378 if(probalistic_scatter)
379 pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t);
381 new_tp = *throughput * coeff.sigma_s * transmittance / average(pdf);
386 float3 transmittance = volume_color_transmittance(sigma_t, t);
387 float pdf = average(transmittance);
388 new_tp = *throughput * transmittance / pdf;
393 if(closure_flag & SD_ABSORPTION) {
394 /* absorption only, no sampling needed */
395 float3 transmittance = volume_color_transmittance(coeff.sigma_a, t);
396 new_tp = *throughput * transmittance;
399 /* integrate emission attenuated by extinction */
400 if(L && (closure_flag & SD_EMISSION)) {
401 float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
402 float3 transmittance = volume_color_transmittance(sigma_t, ray->t);
403 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t);
404 path_radiance_accum_emission(L, *throughput, emission, state->bounce);
407 /* modify throughput */
408 if(closure_flag & (SD_ABSORPTION|SD_SCATTER)) {
409 *throughput = new_tp;
411 /* prepare to scatter to new direction */
413 /* adjust throughput and move to new location */
414 sd->P = ray->P + t*ray->D;
416 return VOLUME_PATH_SCATTERED;
420 return VOLUME_PATH_ATTENUATED;
423 /* heterogeneous volume distance sampling: integrate stepping through the
424 * volume until we reach the end, get absorbed entirely, or run out of
425 * iterations. this does probabilistically scatter or get transmitted through
426 * for path tracing where we don't want to branch. */
427 ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
428 PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput, RNG *rng)
430 float3 tp = *throughput;
431 const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
433 /* prepare for stepping */
434 int max_steps = kernel_data.integrator.volume_max_steps;
435 float step_size = kernel_data.integrator.volume_step_size;
436 float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
438 /* compute coefficients at the start */
440 float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
442 /* pick random color channel, we use the Veach one-sample
443 * model with balance heuristic for the channels */
444 float xi = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
445 float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
446 int channel = (int)(rphase*3.0f);
447 sd->randb_closure = rphase*3.0f - channel;
448 bool has_scatter = false;
450 for(int i = 0; i < max_steps; i++) {
451 /* advance to new position */
452 float new_t = min(ray->t, (i+1) * step_size);
453 float dt = new_t - t;
455 /* use random position inside this segment to sample shader */
457 random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
459 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
460 VolumeShaderCoefficients coeff;
462 /* compute segment */
463 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
464 int closure_flag = sd->flag;
466 float3 transmittance;
467 bool scatter = false;
469 /* distance sampling */
470 #ifdef __VOLUME_SCATTER__
471 if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_ABSORPTION))) {
474 float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
475 float3 sigma_s = coeff.sigma_s;
477 /* compute transmittance over full step */
478 transmittance = volume_color_transmittance(sigma_t, dt);
480 /* decide if we will scatter or continue */
481 float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
483 if(1.0f - xi >= sample_transmittance) {
484 /* compute sampling distance */
485 float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
486 float new_dt = -logf(1.0f - xi)/sample_sigma_t;
489 /* transmittance and pdf */
490 float3 new_transmittance = volume_color_transmittance(sigma_t, new_dt);
491 float3 pdf = sigma_t * new_transmittance;
494 new_tp = tp * sigma_s * new_transmittance / average(pdf);
499 float pdf = average(transmittance);
500 new_tp = tp * transmittance / pdf;
502 /* remap xi so we can reuse it and keep thing stratified */
503 xi = 1.0f - (1.0f - xi)/sample_transmittance;
508 if(closure_flag & SD_ABSORPTION) {
509 /* absorption only, no sampling needed */
510 float3 sigma_a = coeff.sigma_a;
512 transmittance = volume_color_transmittance(sigma_a, dt);
513 new_tp = tp * transmittance;
516 /* integrate emission attenuated by absorption */
517 if(L && (closure_flag & SD_EMISSION)) {
518 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
519 path_radiance_accum_emission(L, tp, emission, state->bounce);
522 /* modify throughput */
523 if(closure_flag & (SD_ABSORPTION|SD_SCATTER)) {
526 /* stop if nearly all light blocked */
527 if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
528 tp = make_float3(0.0f, 0.0f, 0.0f);
533 /* prepare to scatter to new direction */
535 /* adjust throughput and move to new location */
536 sd->P = ray->P + new_t*ray->D;
539 return VOLUME_PATH_SCATTERED;
542 /* accumulate transmittance */
543 accum_transmittance *= transmittance;
547 /* stop if at the end of the volume */
555 return VOLUME_PATH_ATTENUATED;
558 /* get the volume attenuation and emission over line segment defined by
559 * ray, with the assumption that there are no surfaces blocking light
560 * between the endpoints. distance sampling is used to decide if we will
562 ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
563 PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, bool heterogeneous)
565 /* workaround to fix correlation bug in T38710, can find better solution
566 * in random number generator later, for now this is done here to not impact
567 * performance of rendering without volumes */
568 RNG tmp_rng = cmj_hash(*rng, state->rng_offset);
570 shader_setup_from_volume(kg, sd, ray);
573 return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput, &tmp_rng);
575 return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng, true);
578 /* Decoupled Volume Sampling
580 * VolumeSegment is list of coefficients and transmittance stored at all steps
581 * through a volume. This can then later be used for decoupled sampling as in:
582 * "Importance Sampling Techniques for Path Tracing in Participating Media"
584 * On the GPU this is only supported (but currently not enabled)
585 * for homogeneous volumes (1 step), due to
586 * no support for malloc/free and too much stack usage with a fix size array. */
588 typedef struct VolumeStep {
589 float3 sigma_s; /* scatter coefficient */
590 float3 sigma_t; /* extinction coefficient */
591 float3 accum_transmittance; /* accumulated transmittance including this step */
592 float3 cdf_distance; /* cumulative density function for distance sampling */
593 float t; /* distance at end of this step */
594 float shade_t; /* jittered distance where shading was done in step */
595 int closure_flag; /* shader evaluation closure flags */
598 typedef struct VolumeSegment {
599 VolumeStep stack_step; /* stack storage for homogeneous step, to avoid malloc */
600 VolumeStep *steps; /* recorded steps */
601 int numsteps; /* number of steps */
602 int closure_flag; /* accumulated closure flags from all steps */
604 float3 accum_emission; /* accumulated emission at end of segment */
605 float3 accum_transmittance; /* accumulated transmittance at end of segment */
607 int sampling_method; /* volume sampling method */
610 /* record volume steps to the end of the volume.
612 * it would be nice if we could only record up to the point that we need to scatter,
613 * but the entire segment is needed to do always scattering, rather than probabilistically
614 * hitting or missing the volume. if we don't know the transmittance at the end of the
615 * volume we can't generate stratified distance samples up to that transmittance */
616 ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
617 Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
619 const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
621 /* prepare for volume stepping */
623 float step_size, random_jitter_offset;
626 const int global_max_steps = kernel_data.integrator.volume_max_steps;
627 step_size = kernel_data.integrator.volume_step_size;
628 /* compute exact steps in advance for malloc */
629 max_steps = max((int)ceilf(ray->t/step_size), 1);
630 /* NOTE: For the branched path tracing it's possible to have direct
631 * and indirect light integration both having volume segments allocated.
632 * We detect this using index in the pre-allocated memory. Currently we
633 * only support two segments allocated at a time, if more needed some
634 * modifications to the KernelGlobals will be needed.
636 * This gives us restrictions that decoupled record should only happen
637 * in the stack manner, meaning if there's subsequent call of decoupled
638 * record it'll need to free memory before it's caller frees memory.
640 const int index = kg->decoupled_volume_steps_index;
641 assert(index < sizeof(kg->decoupled_volume_steps) /
642 sizeof(*kg->decoupled_volume_steps));
643 if(max_steps > global_max_steps) {
644 max_steps = global_max_steps;
645 step_size = ray->t / (float)max_steps;
647 if(kg->decoupled_volume_steps[index] == NULL) {
648 kg->decoupled_volume_steps[index] =
649 (VolumeStep*)malloc(sizeof(VolumeStep)*global_max_steps);
651 segment->steps = kg->decoupled_volume_steps[index];
652 random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
653 ++kg->decoupled_volume_steps_index;
658 random_jitter_offset = 0.0f;
659 segment->steps = &segment->stack_step;
662 /* init accumulation variables */
663 float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
664 float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
665 float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
668 segment->numsteps = 0;
669 segment->closure_flag = 0;
670 bool is_last_step_empty = false;
672 VolumeStep *step = segment->steps;
674 for(int i = 0; i < max_steps; i++, step++) {
675 /* advance to new position */
676 float new_t = min(ray->t, (i+1) * step_size);
677 float dt = new_t - t;
679 /* use random position inside this segment to sample shader */
680 if(heterogeneous && new_t == ray->t)
681 random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
683 float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
684 VolumeShaderCoefficients coeff;
686 /* compute segment */
687 if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
688 int closure_flag = sd->flag;
689 float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
691 /* compute accumulated transmittance */
692 float3 transmittance = volume_color_transmittance(sigma_t, dt);
694 /* compute emission attenuated by absorption */
695 if(closure_flag & SD_EMISSION) {
696 float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
697 accum_emission += accum_transmittance * emission;
700 accum_transmittance *= transmittance;
702 /* compute pdf for distance sampling */
703 float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
704 cdf_distance = cdf_distance + pdf_distance;
706 /* write step data */
707 step->sigma_t = sigma_t;
708 step->sigma_s = coeff.sigma_s;
709 step->closure_flag = closure_flag;
711 segment->closure_flag |= closure_flag;
713 is_last_step_empty = false;
717 if(is_last_step_empty) {
718 /* consecutive empty step, merge */
722 /* store empty step */
723 step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
724 step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
725 step->closure_flag = 0;
728 is_last_step_empty = true;
732 step->accum_transmittance = accum_transmittance;
733 step->cdf_distance = cdf_distance;
735 step->shade_t = t + random_jitter_offset;
737 /* stop if at the end of the volume */
742 /* stop if nearly all light blocked */
743 if(accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps && accum_transmittance.z < tp_eps)
747 /* store total emission and transmittance */
748 segment->accum_emission = accum_emission;
749 segment->accum_transmittance = accum_transmittance;
751 /* normalize cumulative density function for distance sampling */
752 VolumeStep *last_step = segment->steps + segment->numsteps - 1;
754 if(!is_zero(last_step->cdf_distance)) {
755 VolumeStep *step = &segment->steps[0];
756 int numsteps = segment->numsteps;
757 float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
759 for(int i = 0; i < numsteps; i++, step++)
760 step->cdf_distance *= inv_cdf_distance_sum;
764 ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
766 if(segment->steps != &segment->stack_step) {
767 /* NOTE: We only allow free last allocated segment.
768 * No random order of alloc/free is supported.
770 assert(kg->decoupled_volume_steps_index > 0);
771 assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
772 --kg->decoupled_volume_steps_index;
776 /* scattering for homogeneous and heterogeneous volumes, using decoupled ray
779 * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */
780 ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
781 KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
782 float3 *throughput, float rphase, float rscatter,
783 const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter)
785 kernel_assert(segment->closure_flag & SD_SCATTER);
787 /* pick random color channel, we use the Veach one-sample
788 * model with balance heuristic for the channels */
789 int channel = (int)(rphase*3.0f);
790 sd->randb_closure = rphase*3.0f - channel;
793 /* probabilistic scattering decision based on transmittance */
794 if(probalistic_scatter) {
795 float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
797 if(1.0f - xi >= sample_transmittance) {
798 /* rescale random number so we can reuse it */
799 xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance);
802 *throughput /= sample_transmittance;
803 return VOLUME_PATH_MISSED;
808 float3 transmittance;
810 float mis_weight = 1.0f;
811 bool distance_sample = true;
812 bool use_mis = false;
814 if(segment->sampling_method && light_P) {
815 if(segment->sampling_method == SD_VOLUME_MIS) {
816 /* multiple importance sample: randomly pick between
817 * equiangular and distance sampling strategy */
822 xi = (xi - 0.5f)*2.0f;
823 distance_sample = false;
829 /* only equiangular sampling */
830 distance_sample = false;
834 /* distance sampling */
835 if(distance_sample) {
836 /* find step in cdf */
837 step = segment->steps;
840 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
842 if(segment->numsteps > 1) {
843 float prev_cdf = 0.0f;
844 float step_cdf = 1.0f;
845 float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
847 for(int i = 0; ; i++, step++) {
848 /* todo: optimize using binary search */
849 step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
851 if(xi < step_cdf || i == segment->numsteps-1)
856 prev_cdf_distance = step->cdf_distance;
859 /* remap xi so we can reuse it */
860 xi = (xi - prev_cdf)/(step_cdf - prev_cdf);
862 /* pdf for picking step */
863 step_pdf_distance = step->cdf_distance - prev_cdf_distance;
866 /* determine range in which we will sample */
867 float step_t = step->t - prev_t;
869 /* sample distance and compute transmittance */
871 sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
873 /* modify pdf for hit/miss decision */
874 if(probalistic_scatter)
875 distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
877 pdf = average(distance_pdf * step_pdf_distance);
879 /* multiple importance sampling */
881 float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
882 mis_weight = 2.0f*power_heuristic(pdf, equi_pdf);
885 /* equi-angular sampling */
887 /* sample distance */
888 sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
890 /* find step in which sampled distance is located */
891 step = segment->steps;
894 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
896 if(segment->numsteps > 1) {
897 float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
899 int numsteps = segment->numsteps;
900 int high = numsteps - 1;
905 mid = (low + high) >> 1;
907 if(sample_t < step[mid].t)
909 else if(sample_t >= step[mid + 1].t)
912 /* found our interval in step[mid] .. step[mid+1] */
913 prev_t = step[mid].t;
914 prev_cdf_distance = step[mid].cdf_distance;
920 if(low >= numsteps - 1) {
921 prev_t = step[numsteps - 1].t;
922 prev_cdf_distance = step[numsteps-1].cdf_distance;
923 step += numsteps - 1;
926 /* pdf for picking step with distance sampling */
927 step_pdf_distance = step->cdf_distance - prev_cdf_distance;
930 /* determine range in which we will sample */
931 float step_t = step->t - prev_t;
932 float step_sample_t = sample_t - prev_t;
934 /* compute transmittance */
935 transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
937 /* multiple importance sampling */
939 float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
940 float distance_pdf = average(distance_pdf3 * step_pdf_distance);
941 mis_weight = 2.0f*power_heuristic(pdf, distance_pdf);
945 /* compute transmittance up to this step */
946 if(step != segment->steps)
947 transmittance *= (step-1)->accum_transmittance;
949 /* modify throughput */
950 *throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
952 /* evaluate shader to create closures at shading point */
953 if(segment->numsteps > 1) {
954 sd->P = ray->P + step->shade_t*ray->D;
956 VolumeShaderCoefficients coeff;
957 volume_shader_sample(kg, sd, state, sd->P, &coeff);
960 /* move to new position */
961 sd->P = ray->P + sample_t*ray->D;
963 return VOLUME_PATH_SCATTERED;
966 /* decide if we need to use decoupled or not */
967 ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method)
969 /* decoupled ray marching for heterogeneous volumes not supported on the GPU,
970 * which also means equiangular and multiple importance sampling is not
971 * support for that case */
972 #ifdef __KERNEL_GPU__
977 /* equiangular and multiple importance sampling only implemented for decoupled */
978 if(sampling_method != 0)
981 /* for all light sampling use decoupled, reusing shader evaluations is
982 * typically faster in that case */
984 return kernel_data.integrator.sample_all_lights_direct;
986 return kernel_data.integrator.sample_all_lights_indirect;
991 * This is an array of object/shared ID's that the current segment of the path
994 ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
998 /* NULL ray happens in the baker, does it need proper initialization of
1001 if(!kernel_data.cam.is_inside_volume || ray == NULL) {
1002 /* Camera is guaranteed to be in the air, only take background volume
1003 * into account in this case.
1005 if(kernel_data.background.volume_shader != SHADER_NONE) {
1006 stack[0].shader = kernel_data.background.volume_shader;
1007 stack[0].object = PRIM_NONE;
1008 stack[1].shader = SHADER_NONE;
1011 stack[0].shader = SHADER_NONE;
1016 Ray volume_ray = *ray;
1017 volume_ray.t = FLT_MAX;
1019 int stack_index = 0, enclosed_index = 0;
1021 const uint visibility = PATH_RAY_ALL_VISIBILITY | kernel_data.integrator.layer_flag;
1022 #ifdef __VOLUME_RECORD_ALL__
1023 Intersection hits[2*VOLUME_STACK_SIZE];
1024 uint num_hits = scene_intersect_volume_all(kg,
1027 2*VOLUME_STACK_SIZE,
1030 int enclosed_volumes[VOLUME_STACK_SIZE];
1031 Intersection *isect = hits;
1033 qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1035 for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
1037 shader_setup_from_ray(kg, &sd, isect, &volume_ray);
1038 if(sd.flag & SD_BACKFACING) {
1039 bool need_add = true;
1040 for(int i = 0; i < enclosed_index && need_add; ++i) {
1041 /* If ray exited the volume and never entered to that volume
1042 * it means that camera is inside such a volume.
1044 if(enclosed_volumes[i] == sd.object) {
1048 for(int i = 0; i < stack_index && need_add; ++i) {
1049 /* Don't add intersections twice. */
1050 if(stack[i].object == sd.object) {
1056 stack[stack_index].object = sd.object;
1057 stack[stack_index].shader = sd.shader;
1062 /* If ray from camera enters the volume, this volume shouldn't
1063 * be added to the stack on exit.
1065 enclosed_volumes[enclosed_index++] = sd.object;
1070 int enclosed_volumes[VOLUME_STACK_SIZE];
1073 while(stack_index < VOLUME_STACK_SIZE - 1 &&
1074 enclosed_index < VOLUME_STACK_SIZE - 1 &&
1075 step < 2 * VOLUME_STACK_SIZE)
1078 if(!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
1083 shader_setup_from_ray(kg, &sd, &isect, &volume_ray);
1084 if(sd.flag & SD_BACKFACING) {
1085 /* If ray exited the volume and never entered to that volume
1086 * it means that camera is inside such a volume.
1088 bool need_add = true;
1089 for(int i = 0; i < enclosed_index && need_add; ++i) {
1090 /* If ray exited the volume and never entered to that volume
1091 * it means that camera is inside such a volume.
1093 if(enclosed_volumes[i] == sd.object) {
1097 for(int i = 0; i < stack_index && need_add; ++i) {
1098 /* Don't add intersections twice. */
1099 if(stack[i].object == sd.object) {
1105 stack[stack_index].object = sd.object;
1106 stack[stack_index].shader = sd.shader;
1111 /* If ray from camera enters the volume, this volume shouldn't
1112 * be added to the stack on exit.
1114 enclosed_volumes[enclosed_index++] = sd.object;
1117 /* Move ray forward. */
1118 volume_ray.P = ray_offset(sd.P, -sd.Ng);
1122 /* stack_index of 0 means quick checks outside of the kernel gave false
1123 * positive, nothing to worry about, just we've wasted quite a few of
1124 * ticks just to come into conclusion that camera is in the air.
1126 * In this case we're doing the same above -- check whether background has
1129 if(stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
1130 stack[0].shader = kernel_data.background.volume_shader;
1131 stack[0].object = PRIM_NONE;
1132 stack[1].shader = SHADER_NONE;
1135 stack[stack_index].shader = SHADER_NONE;
1139 ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, VolumeStack *stack)
1141 /* todo: we should have some way for objects to indicate if they want the
1142 * world shader to work inside them. excluding it by default is problematic
1143 * because non-volume objects can't be assumed to be closed manifolds */
1145 if(!(sd->flag & SD_HAS_VOLUME))
1148 if(sd->flag & SD_BACKFACING) {
1149 /* exit volume object: remove from stack */
1150 for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
1151 if(stack[i].object == sd->object) {
1152 /* shift back next stack entries */
1154 stack[i] = stack[i+1];
1157 while(stack[i].shader != SHADER_NONE);
1164 /* enter volume object: add to stack */
1167 for(i = 0; stack[i].shader != SHADER_NONE; i++) {
1168 /* already in the stack? then we have nothing to do */
1169 if(stack[i].object == sd->object)
1173 /* if we exceed the stack limit, ignore */
1174 if(i >= VOLUME_STACK_SIZE-1)
1177 /* add to the end of the stack */
1178 stack[i].shader = sd->shader;
1179 stack[i].object = sd->object;
1180 stack[i+1].shader = SHADER_NONE;
1184 #ifdef __SUBSURFACE__
1185 ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
1189 kernel_assert(kernel_data.integrator.use_volumes);
1191 Ray volume_ray = *ray;
1193 # ifdef __VOLUME_RECORD_ALL__
1194 Intersection hits[2*VOLUME_STACK_SIZE];
1195 uint num_hits = scene_intersect_volume_all(kg,
1198 2*VOLUME_STACK_SIZE,
1199 PATH_RAY_ALL_VISIBILITY);
1201 Intersection *isect = hits;
1203 qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1205 for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
1207 shader_setup_from_ray(kg, &sd, isect, &volume_ray);
1208 kernel_volume_stack_enter_exit(kg, &sd, stack);
1214 while(step < 2 * VOLUME_STACK_SIZE &&
1215 scene_intersect_volume(kg,
1218 PATH_RAY_ALL_VISIBILITY))
1221 shader_setup_from_ray(kg, &sd, &isect, &volume_ray);
1222 kernel_volume_stack_enter_exit(kg, &sd, stack);
1224 /* Move ray forward. */
1225 volume_ray.P = ray_offset(sd.P, -sd.Ng);
1226 volume_ray.t -= sd.ray_length;