return;
}
- KernelGlobals kg = kernel_globals;
-
-#ifdef WITH_OSL
- OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
-#endif
-
+ KernelGlobals kg = thread_kernel_globals_init();
RenderTile tile;
void(*path_trace_kernel)(KernelGlobals*, float*, unsigned int*, int, int, int, int, int);
}
}
-#ifdef WITH_OSL
- OSLShader::thread_free(&kg);
-#endif
+ thread_kernel_globals_free(&kg);
}
void thread_film_convert(DeviceTask& task)
{
task_pool.cancel();
}
+
+protected:
+ inline KernelGlobals thread_kernel_globals_init()
+ {
+ KernelGlobals kg = kernel_globals;
+ kg.transparent_shadow_intersections = NULL;
+ const int decoupled_count = sizeof(kg.decoupled_volume_steps) /
+ sizeof(*kg.decoupled_volume_steps);
+ for(int i = 0; i < decoupled_count; ++i) {
+ kg.decoupled_volume_steps[i] = NULL;
+ }
+ kg.decoupled_volume_steps_index = 0;
+#ifdef WITH_OSL
+ OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
+#endif
+ return kg;
+ }
+
+ inline void thread_kernel_globals_free(KernelGlobals *kg)
+ {
+ if(kg->transparent_shadow_intersections != NULL) {
+ free(kg->transparent_shadow_intersections);
+ }
+ const int decoupled_count = sizeof(kg->decoupled_volume_steps) /
+ sizeof(*kg->decoupled_volume_steps);
+ for(int i = 0; i < decoupled_count; ++i) {
+ if(kg->decoupled_volume_steps[i] != NULL) {
+ free(kg->decoupled_volume_steps[i]);
+ }
+ }
+#ifdef WITH_OSL
+ OSLShader::thread_free(kg);
+#endif
+ }
};
Device *device_cpu_create(DeviceInfo& info, Stats &stats, bool background)
struct OSLShadingSystem;
# endif
+struct Intersection;
+struct VolumeStep;
+
typedef struct KernelGlobals {
texture_image_uchar4 texture_byte4_images[TEX_NUM_BYTE4_IMAGES_CPU];
texture_image_float4 texture_float4_images[TEX_NUM_FLOAT4_IMAGES_CPU];
OSLThreadData *osl_tdata;
# endif
+ /* **** Run-time data **** */
+
+ /* Heap-allocated storage for transparent shadows intersections. */
+ Intersection *transparent_shadow_intersections;
+
+ /* Storage for decoupled volume steps. */
+ VolumeStep *decoupled_volume_steps[2];
+ int decoupled_volume_steps_index;
} KernelGlobals;
#endif /* __KERNEL_CPU__ */
/* intersect to find an opaque surface, or record all transparent surface hits */
Intersection hits_stack[STACK_MAX_HITS];
Intersection *hits = hits_stack;
- uint max_hits = kernel_data.integrator.transparent_max_bounce - state->transparent_bounce - 1;
+ const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
+ uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
/* prefer to use stack but use dynamic allocation if too deep max hits
* we need max_hits + 1 storage space due to the logic in
* scene_intersect_shadow_all which will first store and then check if
* the limit is exceeded */
- if(max_hits + 1 > STACK_MAX_HITS)
- hits = (Intersection*)malloc(sizeof(Intersection)*(max_hits + 1));
+ if(max_hits + 1 > STACK_MAX_HITS) {
+ if(kg->transparent_shadow_intersections == NULL) {
+ kg->transparent_shadow_intersections =
+ (Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1));
+ }
+ hits = kg->transparent_shadow_intersections;
+ }
uint num_hits;
blocked = scene_intersect_shadow_all(kg, ray, hits, max_hits, &num_hits);
*shadow = throughput;
- if(hits != hits_stack)
- free(hits);
return is_zero(throughput);
}
-
- /* free dynamic storage */
- if(hits != hits_stack)
- free(hits);
}
else {
Intersection isect;
step_size = kernel_data.integrator.volume_step_size;
/* compute exact steps in advance for malloc */
max_steps = max((int)ceilf(ray->t/step_size), 1);
+ /* NOTE: For the branched path tracing it's possible to have direct
+ * and indirect light integration both having volume segments allocated.
+ * We detect this using index in the pre-allocated memory. Currently we
+ * only support two segments allocated at a time, if more needed some
+ * modifications to the KernelGlobals will be needed.
+ *
+ * This gives us restrictions that decoupled record should only happen
+ * in the stack manner, meaning if there's subsequent call of decoupled
+ * record it'll need to free memory before it's caller frees memory.
+ */
+ const int index = kg->decoupled_volume_steps_index;
+ assert(index < sizeof(kg->decoupled_volume_steps) /
+ sizeof(*kg->decoupled_volume_steps));
if(max_steps > global_max_steps) {
max_steps = global_max_steps;
step_size = ray->t / (float)max_steps;
}
- segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
+ if(kg->decoupled_volume_steps[index] == NULL) {
+ kg->decoupled_volume_steps[index] =
+ (VolumeStep*)malloc(sizeof(VolumeStep)*global_max_steps);
+ }
+ segment->steps = kg->decoupled_volume_steps[index];
random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
+ ++kg->decoupled_volume_steps_index;
}
else {
max_steps = 1;
ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
{
- if(segment->steps != &segment->stack_step)
- free(segment->steps);
+ if(segment->steps != &segment->stack_step) {
+ /* NOTE: We only allow free last allocated segment.
+ * No random order of alloc/free is supported.
+ */
+ assert(kg->decoupled_volume_steps_index > 0);
+ assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
+ --kg->decoupled_volume_steps_index;
+ }
}
/* scattering for homogeneous and heterogeneous volumes, using decoupled ray