Cycles: Remove ccl_addr_space from RNG passed to functions
authorHristo Gueorguiev <prem.nirved@gmail.com>
Tue, 21 Mar 2017 11:24:47 +0000 (12:24 +0100)
committerSergey Sharybin <sergey.vfx@gmail.com>
Mon, 27 Mar 2017 08:46:28 +0000 (10:46 +0200)
Simplifies code quite a bit, making it shorter and easier to extend.
Currently no functional changes for users, but is required for the
upcoming work of shadow catcher support with OpenCL.

20 files changed:
intern/cycles/kernel/kernel_path.h
intern/cycles/kernel/kernel_path_branched.h
intern/cycles/kernel/kernel_path_common.h
intern/cycles/kernel/kernel_path_state.h
intern/cycles/kernel/kernel_path_subsurface.h
intern/cycles/kernel/kernel_path_surface.h
intern/cycles/kernel/kernel_path_volume.h
intern/cycles/kernel/kernel_random.h
intern/cycles/kernel/kernel_shader.h
intern/cycles/kernel/kernel_subsurface.h
intern/cycles/kernel/kernel_volume.h
intern/cycles/kernel/split/kernel_buffer_update.h
intern/cycles/kernel/split/kernel_direct_lighting.h
intern/cycles/kernel/split/kernel_do_volume.h
intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
intern/cycles/kernel/split/kernel_next_iteration_setup.h
intern/cycles/kernel/split/kernel_path_init.h
intern/cycles/kernel/split/kernel_scene_intersect.h
intern/cycles/kernel/split/kernel_shader_eval.h
intern/cycles/kernel/split/kernel_subsurface_scatter.h

index 74631aa..31d5285 100644 (file)
@@ -383,7 +383,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 
                        /* do bssrdf scatter step if we picked a bssrdf closure */
                        if(sc) {
-                               uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
+                               uint lcg_state = lcg_state_init(rng, state->rng_offset, state->sample, 0x68bc21eb);
 
                                float bssrdf_u, bssrdf_v;
                                path_state_rng_2D(kg,
@@ -476,7 +476,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
                        }
 
                        extmax = kernel_data.curve.maximum_width;
-                       lcg_state = lcg_state_init(rng, &state, 0x51633e2d);
+                       lcg_state = lcg_state_init(rng, state.rng_offset, state.sample, 0x51633e2d);
                }
 
                if(state.bounce > kernel_data.integrator.ao_bounces) {
index a2d4e34..36fd6c9 100644 (file)
@@ -151,7 +151,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
                        continue;
 
                /* set up random number generator */
-               uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
+               uint lcg_state = lcg_state_init(rng, state->rng_offset, state->sample, 0x68bc21eb);
                int num_samples = kernel_data.integrator.subsurface_samples;
                float num_samples_inv = 1.0f/num_samples;
                RNG bssrdf_rng = cmj_hash(*rng, i);
@@ -285,7 +285,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
                        }
 
                        extmax = kernel_data.curve.maximum_width;
-                       lcg_state = lcg_state_init(rng, &state, 0x51633e2d);
+                       lcg_state = lcg_state_init(rng, state.rng_offset, state.sample, 0x51633e2d);
                }
 
                bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
index 7b90355..596210f 100644 (file)
@@ -22,7 +22,7 @@ ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg,
                                                ccl_global uint *rng_state,
                                                int sample,
                                                int x, int y,
-                                               ccl_addr_space RNG *rng,
+                                               RNG *rng,
                                                ccl_addr_space Ray *ray)
 {
        float filter_u;
index e85f7dc..c0cd2a6 100644 (file)
@@ -19,7 +19,7 @@ CCL_NAMESPACE_BEGIN
 ccl_device_inline void path_state_init(KernelGlobals *kg,
                                        ShaderData *stack_sd,
                                        ccl_addr_space PathState *state,
-                                       ccl_addr_space RNG *rng,
+                                       RNG *rng,
                                        int sample,
                                        ccl_addr_space Ray *ray)
 {
index d22ec99..10b568a 100644 (file)
@@ -28,7 +28,7 @@ bool kernel_path_subsurface_scatter(
         ShaderData *emission_sd,
         PathRadiance *L,
         ccl_addr_space PathState *state,
-        ccl_addr_space RNG *rng,
+        RNG *rng,
         ccl_addr_space Ray *ray,
         ccl_addr_space float3 *throughput,
         ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
@@ -47,7 +47,7 @@ bool kernel_path_subsurface_scatter(
                 */
                kernel_assert(!ss_indirect->tracing);
 
-               uint lcg_state = lcg_state_init_addrspace(rng, state, 0x68bc21eb);
+               uint lcg_state = lcg_state_init(rng, state->rng_offset, state->sample, 0x68bc21eb);
 
                SubsurfaceIntersection ss_isect;
                float bssrdf_u, bssrdf_v;
index d6bfe9b..6d7f121 100644 (file)
@@ -197,7 +197,7 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng,
 #endif
 
 /* path tracing: connect path directly to position on a light and add it to L */
-ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_addr_space RNG *rng,
+ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, RNG *rng,
        ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state,
        PathRadiance *L)
 {
@@ -254,7 +254,7 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_
 
 /* path tracing: bounce off or through surface to with new direction stored in ray */
 ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg,
-                                           ccl_addr_space RNG *rng,
+                                           RNG *rng,
                                            ShaderData *sd,
                                            ccl_addr_space float3 *throughput,
                                            ccl_addr_space PathState *state,
index 28e1b5b..371912c 100644 (file)
@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline void kernel_path_volume_connect_light(
         KernelGlobals *kg,
-        ccl_addr_space RNG *rng,
+        RNG *rng,
         ShaderData *sd,
         ShaderData *emission_sd,
         float3 throughput,
@@ -69,7 +69,7 @@ ccl_device
 #endif
 bool kernel_path_volume_bounce(
     KernelGlobals *kg,
-    ccl_addr_space RNG *rng,
+    RNG *rng,
     ShaderData *sd,
     ccl_addr_space float3 *throughput,
     ccl_addr_space PathState *state,
index ad3fa32..200f2a6 100644 (file)
@@ -98,7 +98,7 @@ ccl_device uint sobol_lookup(const uint m, const uint frame, const uint ex, cons
        return index;
 }
 
-ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, int sample, int num_samples, int dimension)
+ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, RNG *rng, int sample, int num_samples, int dimension)
 {
 #ifdef __CMJ__
        if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
@@ -130,7 +130,7 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *
 #endif
 }
 
-ccl_device_forceinline void path_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, int sample, int num_samples, int dimension, float *fx, float *fy)
+ccl_device_forceinline void path_rng_2D(KernelGlobals *kg, RNG *rng, int sample, int num_samples, int dimension, float *fx, float *fy)
 {
 #ifdef __CMJ__
        if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
@@ -147,7 +147,7 @@ ccl_device_forceinline void path_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *r
        }
 }
 
-ccl_device_inline void path_rng_init(KernelGlobals *kg, ccl_global uint *rng_state, int sample, int num_samples, ccl_addr_space RNG *rng, int x, int y, float *fx, float *fy)
+ccl_device_inline void path_rng_init(KernelGlobals *kg, ccl_global uint *rng_state, int sample, int num_samples, RNG *rng, int x, int y, float *fx, float *fy)
 {
 #ifdef __SOBOL_FULL_SCREEN__
        uint px, py;
@@ -259,12 +259,12 @@ ccl_device uint lcg_init(uint seed)
  * For branches in the path we must be careful not to reuse the same number
  * in a sequence and offset accordingly. */
 
-ccl_device_inline float path_state_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state, int dimension)
+ccl_device_inline float path_state_rng_1D(KernelGlobals *kg, RNG *rng, const ccl_addr_space PathState *state, int dimension)
 {
        return path_rng_1D(kg, rng, state->sample, state->num_samples, state->rng_offset + dimension);
 }
 
-ccl_device_inline float path_state_rng_1D_for_decision(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state, int dimension)
+ccl_device_inline float path_state_rng_1D_for_decision(KernelGlobals *kg, RNG *rng, const ccl_addr_space PathState *state, int dimension)
 {
        /* the rng_offset is not increased for transparent bounces. if we do then
         * fully transparent objects can become subtly visible by the different
@@ -277,29 +277,29 @@ ccl_device_inline float path_state_rng_1D_for_decision(KernelGlobals *kg, ccl_ad
        return path_rng_1D(kg, rng, state->sample, state->num_samples, rng_offset + dimension);
 }
 
-ccl_device_inline void path_state_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state, int dimension, float *fx, float *fy)
+ccl_device_inline void path_state_rng_2D(KernelGlobals *kg, RNG *rng, const ccl_addr_space PathState *state, int dimension, float *fx, float *fy)
 {
        path_rng_2D(kg, rng, state->sample, state->num_samples, state->rng_offset + dimension, fx, fy);
 }
 
-ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches, int dimension)
+ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg, RNG *rng, const PathState *state, int branch, int num_branches, int dimension)
 {
        return path_rng_1D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension);
 }
 
-ccl_device_inline float path_branched_rng_1D_for_decision(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches, int dimension)
+ccl_device_inline float path_branched_rng_1D_for_decision(KernelGlobals *kg, RNG *rng, const PathState *state, int branch, int num_branches, int dimension)
 {
        int rng_offset = state->rng_offset + state->transparent_bounce*PRNG_BOUNCE_NUM;
        return path_rng_1D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, rng_offset + dimension);
 }
 
-ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches, int dimension, float *fx, float *fy)
+ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg, RNG *rng, const PathState *state, int branch, int num_branches, int dimension, float *fx, float *fy)
 {
        path_rng_2D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension, fx, fy);
 }
 
 /* Utitility functions to get light termination value, since it might not be needed in many cases. */
-ccl_device_inline float path_state_rng_light_termination(KernelGlobals *kg, ccl_addr_space RNG *rng, const ccl_addr_space PathState *state)
+ccl_device_inline float path_state_rng_light_termination(KernelGlobals *kg, RNG *rng, const ccl_addr_space PathState *state)
 {
        if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
                return path_state_rng_1D_for_decision(kg, rng, state, PRNG_LIGHT_TERMINATE);
@@ -307,7 +307,7 @@ ccl_device_inline float path_state_rng_light_termination(KernelGlobals *kg, ccl_
        return 0.0f;
 }
 
-ccl_device_inline float path_branched_rng_light_termination(KernelGlobals *kg, ccl_addr_space RNG *rng, const PathState *state, int branch, int num_branches)
+ccl_device_inline float path_branched_rng_light_termination(KernelGlobals *kg, RNG *rng, const PathState *state, int branch, int num_branches)
 {
        if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
                return path_branched_rng_1D_for_decision(kg, rng, state, branch, num_branches, PRNG_LIGHT_TERMINATE);
@@ -324,18 +324,9 @@ ccl_device_inline void path_state_branch(PathState *state, int branch, int num_b
        state->num_samples = state->num_samples*num_branches;
 }
 
-ccl_device_inline uint lcg_state_init(RNG *rng, const PathState *state, uint scramble)
-{
-       return lcg_init(*rng + state->rng_offset + state->sample*scramble);
-}
-
-/* TODO(sergey): For until we can use generic address space from OpenCL 2.0. */
-
-ccl_device_inline uint lcg_state_init_addrspace(ccl_addr_space RNG *rng,
-                                                const ccl_addr_space PathState *state,
-                                                uint scramble)
+ccl_device_inline uint lcg_state_init(RNG *rng, int rng_offset, int sample, uint scramble)
 {
-       return lcg_init(*rng + state->rng_offset + state->sample*scramble);
+       return lcg_init(*rng + rng_offset + sample*scramble);
 }
 
 ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng)
index 5d942de..163777b 100644 (file)
@@ -863,7 +863,7 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
 
 /* Surface Evaluation */
 
-ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_space RNG *rng,
+ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, RNG *rng,
        ccl_addr_space PathState *state, float randb, int path_flag, ShaderContext ctx)
 {
        sd->num_closure = 0;
@@ -888,7 +888,7 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_
        }
 
        if(rng && (sd->flag & SD_BSDF_NEEDS_LCG)) {
-               sd->lcg_state = lcg_state_init_addrspace(rng, state, 0xb4bc3953);
+               sd->lcg_state = lcg_state_init(rng, state->rng_offset, state->sample, 0xb4bc3953);
        }
 }
 
index 64d240d..f75e933 100644 (file)
@@ -223,7 +223,7 @@ ccl_device_inline int subsurface_scatter_multi_intersect(
         SubsurfaceIntersection *ss_isect,
         ShaderData *sd,
         ShaderClosure *sc,
-        uint *lcg_state,
+        RNG *lcg_state,
         float disk_u,
         float disk_v,
         bool all)
index dcab138..9c08782 100644 (file)
@@ -360,7 +360,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(
     ShaderData *sd,
     PathRadiance *L,
     ccl_addr_space float3 *throughput,
-    ccl_addr_space RNG *rng,
+    RNG *rng,
     bool probalistic_scatter)
 {
        VolumeShaderCoefficients coeff;
@@ -469,7 +469,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
     ShaderData *sd,
     PathRadiance *L,
     ccl_addr_space float3 *throughput,
-    ccl_addr_space RNG *rng)
+    RNG *rng)
 {
        float3 tp = *throughput;
        const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
@@ -610,7 +610,7 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(
     Ray *ray,
     PathRadiance *L,
     ccl_addr_space float3 *throughput,
-    ccl_addr_space RNG *rng,
+    RNG *rng,
     bool heterogeneous)
 {
        shader_setup_from_volume(kg, sd, ray);
index f36899b..fc26473 100644 (file)
@@ -87,7 +87,7 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
        ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
        ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
        ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index];
-       ccl_global uint *rng = &kernel_split_state.rng[ray_index];
+       RNG rng = kernel_split_state.rng[ray_index];
        ccl_global float *buffer = kernel_split_params.buffer;
 
        unsigned int work_index;
@@ -120,7 +120,7 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
 
                /* accumulate result in output buffer */
                kernel_write_pass_float4(buffer, sample, L_rad);
-               path_rng_end(kg, rng_state, *rng);
+               path_rng_end(kg, rng_state, rng);
 
                ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
        }
@@ -146,7 +146,7 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
                        buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride;
 
                        /* Initialize random numbers and ray. */
-                       kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, rng, ray);
+                       kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, &rng, ray);
 
                        if(ray->t != 0.0f) {
                                /* Initialize throughput, L_transparent, Ray, PathState;
@@ -155,7 +155,7 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
                                *throughput = make_float3(1.0f, 1.0f, 1.0f);
                                *L_transparent = 0.0f;
                                path_radiance_init(L, kernel_data.film.use_light_pass);
-                               path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, rng, sample, ray);
+                               path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, &rng, sample, ray);
 #ifdef __SUBSURFACE__
                                kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
 #endif
@@ -170,12 +170,13 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
                                float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
                                /* Accumulate result in output buffer. */
                                kernel_write_pass_float4(buffer, sample, L_rad);
-                               path_rng_end(kg, rng_state, *rng);
+                               path_rng_end(kg, rng_state, rng);
 
                                ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
                        }
                }
        }
+       kernel_split_state.rng[ray_index] = rng;
 
 #ifndef __COMPUTE_DEVICE_GPU__
        }
index 3d062cf..0a0733b 100644 (file)
@@ -83,11 +83,12 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
                    (sd->flag & SD_BSDF_HAS_EVAL)))
                {
                        /* Sample illumination from lights to find path contribution. */
-                       ccl_global RNG* rng = &kernel_split_state.rng[ray_index];
-                       float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
+                       RNG rng = kernel_split_state.rng[ray_index];
+                       float light_t = path_state_rng_1D(kg, &rng, state, PRNG_LIGHT);
                        float light_u, light_v;
-                       path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
-                       float terminate = path_state_rng_light_termination(kg, rng, state);
+                       path_state_rng_2D(kg, &rng, state, PRNG_LIGHT_U, &light_u, &light_v);
+                       float terminate = path_state_rng_light_termination(kg, &rng, state);
+                       kernel_split_state.rng[ray_index] = rng;
 
                        LightSample ls;
                        if(light_sample(kg,
index b1df45d..47d3c28 100644 (file)
@@ -50,7 +50,7 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
 
                ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
                ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-               ccl_global RNG *rng = &kernel_split_state.rng[ray_index];
+               RNG rng = kernel_split_state.rng[ray_index];
                ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
                ShaderData *sd = &kernel_split_state.sd[ray_index];
                ShaderData *sd_input = &kernel_split_state.sd_DL_shadow[ray_index];
@@ -69,15 +69,15 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
                        {
                                /* integrate along volume segment with distance sampling */
                                VolumeIntegrateResult result = kernel_volume_integrate(
-                                       kg, state, sd, &volume_ray, L, throughput, rng, heterogeneous);
+                                       kg, state, sd, &volume_ray, L, throughput, &rng, heterogeneous);
 
 #  ifdef __VOLUME_SCATTER__
                                if(result == VOLUME_PATH_SCATTERED) {
                                        /* direct lighting */
-                                       kernel_path_volume_connect_light(kg, rng, sd, sd_input, *throughput, state, L);
+                                       kernel_path_volume_connect_light(kg, &rng, sd, sd_input, *throughput, state, L);
 
                                        /* indirect light bounce */
-                                       if(kernel_path_volume_bounce(kg, rng, sd, throughput, state, L, ray))
+                                       if(kernel_path_volume_bounce(kg, &rng, sd, throughput, state, L, ray))
                                                ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED);
                                        else
                                                ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER);
@@ -85,6 +85,7 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
 #  endif
                        }
                }
+               kernel_split_state.rng[ray_index] = rng;
        }
 
 #endif
index 1834a79..8e1f547 100644 (file)
@@ -98,7 +98,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
        unsigned int tile_y;
        unsigned int sample;
 
-       ccl_global RNG *rng = 0x0;
+       RNG rng = kernel_split_state.rng[ray_index];
        ccl_global PathState *state = 0x0;
        float3 throughput;
 
@@ -110,7 +110,6 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 
                throughput = kernel_split_state.throughput[ray_index];
                state = &kernel_split_state.path_state[ray_index];
-               rng = &kernel_split_state.rng[ray_index];
 
                work_index = kernel_split_state.work_array[ray_index];
                sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample;
@@ -194,7 +193,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 
                if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
                        if(probability != 1.0f) {
-                               float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
+                               float terminate = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_TERMINATE);
                                if(terminate >= probability) {
                                        ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
                                        enqueue_flag = 1;
@@ -214,7 +213,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
                {
                        /* todo: solve correlation */
                        float bsdf_u, bsdf_v;
-                       path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+                       path_state_rng_2D(kg, &rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
 
                        float ao_factor = kernel_data.background.ao_factor;
                        float3 ao_N;
@@ -243,6 +242,8 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
                }
        }
 #endif  /* __AO__ */
+       kernel_split_state.rng[ray_index] = rng;
+
 
 #ifndef __COMPUTE_DEVICE_GPU__
        }
index 056fb1d..a9957ee 100644 (file)
@@ -141,15 +141,16 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
        if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
                ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
                ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-               ccl_global RNG *rng = &kernel_split_state.rng[ray_index];
+               RNG rng = kernel_split_state.rng[ray_index];
                state = &kernel_split_state.path_state[ray_index];
                L = &kernel_split_state.path_radiance[ray_index];
 
                /* Compute direct lighting and next bounce. */
-               if(!kernel_path_surface_bounce(kg, rng, &kernel_split_state.sd[ray_index], throughput, state, L, ray)) {
+               if(!kernel_path_surface_bounce(kg, &rng, &kernel_split_state.sd[ray_index], throughput, state, L, ray)) {
                        ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
                        enqueue_flag = 1;
                }
+               kernel_split_state.rng[ray_index] = rng;
        }
 
 #ifndef __COMPUTE_DEVICE_GPU__
index f879fca..a7ecde7 100644 (file)
@@ -60,12 +60,14 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
        ccl_global float *buffer = kernel_split_params.buffer;
        buffer += (kernel_split_params.offset + pixel_x + pixel_y * kernel_split_params.stride) * kernel_data.film.pass_stride;
 
+       RNG rng = kernel_split_state.rng[ray_index];
+
        /* Initialize random numbers and ray. */
        kernel_path_trace_setup(kg,
                                rng_state,
                                my_sample,
                                pixel_x, pixel_y,
-                               &kernel_split_state.rng[ray_index],
+                               &rng,
                                &kernel_split_state.ray[ray_index]);
 
        if(kernel_split_state.ray[ray_index].t != 0.0f) {
@@ -78,7 +80,7 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
                path_state_init(kg,
                                &kernel_split_state.sd_DL_shadow[ray_index],
                                &kernel_split_state.path_state[ray_index],
-                               &kernel_split_state.rng[ray_index],
+                               &rng,
                                my_sample,
                                &kernel_split_state.ray[ray_index]);
 #ifdef __SUBSURFACE__
@@ -97,6 +99,7 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
                path_rng_end(kg, rng_state, kernel_split_state.rng[ray_index]);
                ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
        }
+       kernel_split_state.rng[ray_index] = rng;
 }
 
 CCL_NAMESPACE_END
index 66f549f..684760e 100644 (file)
@@ -72,7 +72,7 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
                }
 
                extmax = kernel_data.curve.maximum_width;
-               lcg_state = lcg_state_init(&rng, &state, 0x51633e2d);
+               lcg_state = lcg_state_init(&rng, state.rng_offset, state.sample, 0x51633e2d);
        }
 
        if(state.bounce > kernel_data.integrator.ao_bounces) {
index b739f86..0f1696e 100644 (file)
@@ -54,7 +54,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg,
        /* Continue on with shader evaluation. */
        if((ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
                Intersection isect = kernel_split_state.isect[ray_index];
-               ccl_global uint *rng = &kernel_split_state.rng[ray_index];
+               RNG rng = kernel_split_state.rng[ray_index];
                ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
                Ray ray = kernel_split_state.ray[ray_index];
 
@@ -62,8 +62,9 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg,
                                      &kernel_split_state.sd[ray_index],
                                      &isect,
                                      &ray);
-               float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
-               shader_eval_surface(kg, &kernel_split_state.sd[ray_index], rng, state, rbsdf, state->flag, SHADER_CONTEXT_MAIN);
+               float rbsdf = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_BSDF);
+               shader_eval_surface(kg, &kernel_split_state.sd[ray_index], &rng, state, rbsdf, state->flag, SHADER_CONTEXT_MAIN);
+               kernel_split_state.rng[ray_index] = rng;
        }
 }
 
index 4eaa7f5..0b4d50c 100644 (file)
@@ -55,7 +55,7 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg,
        ccl_global char *ray_state = kernel_split_state.ray_state;
        ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
        PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-       ccl_global RNG *rng = &kernel_split_state.rng[ray_index];
+       RNG rng = kernel_split_state.rng[ray_index];
        ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
        ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
        ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
@@ -69,7 +69,7 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg,
                                                          emission_sd,
                                                          L,
                                                          state,
-                                                         rng,
+                                                         &rng,
                                                          ray,
                                                          throughput,
                                                          ss_indirect)) {
@@ -77,6 +77,7 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg,
                                enqueue_flag = 1;
                        }
                }
+               kernel_split_state.rng[ray_index] = rng;
        }
 
 #ifndef __COMPUTE_DEVICE_GPU__