Cycles: experimental correlated multi-jittered sampling pattern that can be used
authorBrecht Van Lommel <brechtvanlommel@pandora.be>
Fri, 7 Jun 2013 16:06:22 +0000 (16:06 +0000)
committerBrecht Van Lommel <brechtvanlommel@pandora.be>
Fri, 7 Jun 2013 16:06:22 +0000 (16:06 +0000)
instead of sobol. So far one doesn't seem to be consistently better or worse than
the other for the same number of samples but more testing is needed.

The random number generator itself is slower than sobol for most number of samples,
except 16, 64, 256, .. because they can be computed faster. This can probably be
optimized, but we can do that when/if this actually turns out to be useful.

Paper this implementation is based on:
http://graphics.pixar.com/library/MultiJitteredSampling/

Also includes some refactoring of RNG code, fixing a Sobol correlation issue with
the first BSDF and < 16 samples, skipping some unneeded RNG calls and using a
simpler unit square to unit disk function.

13 files changed:
intern/cycles/blender/addon/properties.py
intern/cycles/blender/addon/ui.py
intern/cycles/blender/blender_sync.cpp
intern/cycles/kernel/CMakeLists.txt
intern/cycles/kernel/kernel_jitter.h [new file with mode: 0644]
intern/cycles/kernel/kernel_montecarlo.h
intern/cycles/kernel/kernel_path.h
intern/cycles/kernel/kernel_random.h
intern/cycles/kernel/kernel_types.h
intern/cycles/render/integrator.cpp
intern/cycles/render/integrator.h
intern/cycles/render/session.cpp
intern/cycles/util/util_types.h

index a94178f1b42abd9b6193a4eed5a14bce0e9f6cd0..7ea8465976469c58fa8c61dee2d13859adeead57 100644 (file)
@@ -114,6 +114,11 @@ enum_use_layer_samples = (
     ('IGNORE', "Ignore", "Ignore per render layer number of samples"),
     )
 
+enum_sampling_pattern = (
+    ('SOBOL', "Sobol", "Use Sobol random sampling pattern"),
+    ('CORRELATED_MUTI_JITTER', "Correlated Multi-Jitter", "Use Correlated Multi-Jitter random sampling pattern"),
+    )
+
 
 class CyclesRenderSettings(bpy.types.PropertyGroup):
     @classmethod
@@ -219,6 +224,13 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
                 default=1,
                 )
 
+        cls.sampling_pattern = EnumProperty(
+                name="Sampling Pattern",
+                description="Random sampling pattern used by the integrator",
+                items=enum_sampling_pattern,
+                default='SOBOL',
+                )
+
         cls.use_layer_samples = EnumProperty(
                 name="Layer Samples",
                 description="How to use per render layer sample settings",
index f9c516d1963eee3dd9007f3c60014f281851c9b7..b35bdf8f511352b78ff3d27da7089759a20274ee 100644 (file)
@@ -85,6 +85,9 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
             sub.prop(cscene, "mesh_light_samples", text="Mesh Light")
             sub.prop(cscene, "subsurface_samples", text="Subsurface")
 
+        if cscene.feature_set == 'EXPERIMENTAL':
+            layout.row().prop(cscene, "sampling_pattern", text="Pattern")
+
         for rl in scene.render.layers:
             if rl.samples > 0:
                 layout.separator()
index b324385134b1fcb07fc1cd2879c3d5a4458186e0..ef9ce85ddf8080cc8d34e7aff93e6439dfa25026 100644 (file)
@@ -199,6 +199,9 @@ void BlenderSync::sync_integrator()
        integrator->subsurface_samples = get_int(cscene, "subsurface_samples");
        integrator->progressive = get_boolean(cscene, "progressive");
 
+       if(experimental)
+               integrator->sampling_pattern = (SamplingPattern)RNA_enum_get(&cscene, "sampling_pattern");
+
        if(integrator->modified(previntegrator))
                integrator->tag_update(scene);
 }
index 41048c7b379f719bf60b7eea8bb445a1d7b1710e..912a1321d67349cd175e36052a2345acef6370d2 100644 (file)
@@ -33,6 +33,7 @@ set(SRC_HEADERS
        kernel_emission.h
        kernel_film.h
        kernel_globals.h
+       kernel_jitter.h
        kernel_light.h
        kernel_math.h
        kernel_montecarlo.h
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
new file mode 100644 (file)
index 0000000..5ea44cd
--- /dev/null
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2013, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* "Correlated Multi-Jittered Sampling"
+ * Andrew Kensler, Pixar Technical Memo 13-01, 2013 */
+
+/* todo: find good value, suggested 64 gives pattern on cornell box ceiling */
+#define CMJ_RANDOM_OFFSET_LIMIT 4096
+
+__device_inline bool cmj_is_pow2(int i)
+{
+       return (i & (i - 1)) == 0;
+}
+
+__device_inline int cmj_fast_mod_pow2(int a, int b)
+{
+       return (a & (b - 1));
+}
+
+/* a must be > 0 and b must be > 1 */
+__device_inline int cmj_fast_div_pow2(int a, int b)
+{
+#ifdef __KERNEL_SSE2__
+       return a >> __builtin_ctz(b);
+#else
+       return a/b;
+#endif
+}
+
+__device_inline uint cmj_w_mask(uint w)
+{
+#ifdef __KERNEL_SSE2__
+       return ((1 << (32 - __builtin_clz(w))) - 1);
+#else
+       w |= w >> 1;
+       w |= w >> 2;
+       w |= w >> 4;
+       w |= w >> 8;
+       w |= w >> 16;
+
+       return w;
+#endif
+}
+
+__device_inline uint cmj_permute(uint i, uint l, uint p)
+{
+       uint w = l - 1;
+
+       if((l & w) == 0) {
+               /* l is a power of two (fast) */
+               i ^= p;
+               i *= 0xe170893d;
+               i ^= p >> 16;
+               i ^= (i & w) >> 4;
+               i ^= p >> 8;
+               i *= 0x0929eb3f;
+               i ^= p >> 23;
+               i ^= (i & w) >> 1;
+               i *= 1 | p >> 27;
+               i *= 0x6935fa69;
+               i ^= (i & w) >> 11;
+               i *= 0x74dcb303;
+               i ^= (i & w) >> 2;
+               i *= 0x9e501cc3;
+               i ^= (i & w) >> 2;
+               i *= 0xc860a3df;
+               i &= w;
+               i ^= i >> 5;
+
+               return (i + p) & w;
+       }
+       else {
+               /* l is not a power of two (slow) */
+               w = cmj_w_mask(w);
+
+               do {
+                       i ^= p;
+                       i *= 0xe170893d;
+                       i ^= p >> 16;
+                       i ^= (i & w) >> 4;
+                       i ^= p >> 8;
+                       i *= 0x0929eb3f;
+                       i ^= p >> 23;
+                       i ^= (i & w) >> 1;
+                       i *= 1 | p >> 27;
+                       i *= 0x6935fa69;
+                       i ^= (i & w) >> 11;
+                       i *= 0x74dcb303;
+                       i ^= (i & w) >> 2;
+                       i *= 0x9e501cc3;
+                       i ^= (i & w) >> 2;
+                       i *= 0xc860a3df;
+                       i &= w;
+                       i ^= i >> 5;
+               } while (i >= l);
+
+               return (i + p) % l;
+       }
+}
+
+__device_inline uint cmj_hash(uint i, uint p)
+{
+       i ^= p;
+       i ^= i >> 17;
+       i ^= i >> 10;
+       i *= 0xb36534e5;
+       i ^= i >> 12;
+       i ^= i >> 21;
+       i *= 0x93fc4795;
+       i ^= 0xdf6e307f;
+       i ^= i >> 17;
+       i *= 1 | p >> 18;
+
+       return i;
+}
+
+__device_inline float cmj_randfloat(uint i, uint p)
+{
+       return cmj_hash(i, p) * (1.0f / 4294967808.0f);
+}
+
+#ifdef __CMJ__
+__device_noinline float cmj_sample_1D(int s, int N, int p)
+{
+       uint x = cmj_permute(s, N, p * 0x68bc21eb);
+       float jx = cmj_randfloat(s, p * 0x967a889b);
+
+       float invN = 1.0f/N;
+       return (x + jx)*invN;
+}
+
+__device_noinline float2 cmj_sample_2D(int s, int N, int p)
+{
+       int m = float_to_int(sqrtf(N));
+       int n = (N + m - 1)/m;
+       float invN = 1.0f/N;
+       float invm = 1.0f/m;
+       float invn = 1.0f/n;
+
+       s = cmj_permute(s, N, p * 0x51633e2d);
+
+       int sdivm, smodm;
+
+       if(cmj_is_pow2(m)) {
+               sdivm = cmj_fast_div_pow2(s, m);
+               smodm = cmj_fast_mod_pow2(s, m);
+       }
+       else {
+               sdivm = float_to_int(s * invm);
+               smodm = s - sdivm*m;
+       }
+
+       uint sx = cmj_permute(smodm, m, p * 0x68bc21eb);
+       uint sy = cmj_permute(sdivm, n, p * 0x02e5be93);
+
+       float jx = cmj_randfloat(s, p * 0x967a889b);
+       float jy = cmj_randfloat(s, p * 0x368cc8b7);
+
+       return make_float2((sx + (sy + jx)*invn)*invm, (s + jy)*invN);
+}
+#endif
+
+CCL_NAMESPACE_END
+
index 2ae9508416271485a6bbbf1fbf66d083b54d8be4..f608429da369f794e74f3f6df5a197d0982195db 100644 (file)
 CCL_NAMESPACE_BEGIN
 
 /// Given values x and y on [0,1], convert them in place to values on
-/// [-1,1] uniformly distributed over a unit sphere.  This code is
-/// derived from Peter Shirley, "Realistic Ray Tracing", p. 103.
+/// [-1,1] uniformly distributed over a unit sphere.
 __device void to_unit_disk(float *x, float *y)
 {
-       float r, phi;
-       float a = 2.0f * (*x) - 1.0f;
-       float b = 2.0f * (*y) - 1.0f;
-       if(a > -b) {
-               if(a > b) {
-                       r = a;
-                       phi = M_PI_4_F *(b/a);
-               }
-               else {
-                       r = b;
-                       phi = M_PI_4_F *(2.0f - a/b);
-               }
-       }
-       else {
-               if(a < b) {
-                       r = -a;
-                       phi = M_PI_4_F *(4.0f + b/a);
-               }
-               else {
-                       r = -b;
-                       if(b != 0.0f)
-                               phi = M_PI_4_F *(6.0f - a/b);
-                       else
-                               phi = 0.0f;
-               }
-       }
+       float phi = 2.0f * M_PI_F * (*x);
+       float r = sqrtf(*y);
+
        *x = r * cosf(phi);
        *y = r * sinf(phi);
 }
index 5915dfed08be2aa2b1db37abe08eba4993f9160a..866024ba303d0e43b07a986e10f790a6e8e74a1c 100644 (file)
@@ -233,7 +233,7 @@ __device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ra
        return result;
 }
 
-__device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer)
+__device float4 kernel_path_progressive(KernelGlobals *kg, RNG rng, int sample, Ray ray, __global float *buffer)
 {
        /* initialize */
        PathRadiance L;
@@ -249,6 +249,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 #endif
        PathState state;
        int rng_offset = PRNG_BASE_NUM;
+       int num_samples = kernel_data.integrator.aa_samples;
 
        path_state_init(&state);
 
@@ -270,7 +271,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
                        }
 
                        extmax = kernel_data.curve_kernel_data.maximum_width;
-                       lcg_state = lcg_init(*rng + rng_offset + sample*0x51633e2d);
+                       lcg_state = lcg_init(rng + rng_offset + sample*0x51633e2d);
                }
 
                bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax);
@@ -292,7 +293,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
                        light_ray.dP = ray.dP;
 
                        /* intersect with lamp */
-                       float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT);
+                       float light_t = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT);
                        float3 emission;
 
                        if(indirect_lamp_emission(kg, &light_ray, state.flag, ray_pdf, light_t, &emission))
@@ -323,7 +324,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
                /* setup shading */
                ShaderData sd;
                shader_setup_from_ray(kg, &sd, &isect, &ray);
-               float rbsdf = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF);
+               float rbsdf = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF);
                shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
 
                /* holdout */
@@ -373,12 +374,18 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
                 * mainly due to the mixed in MIS that we use. gives too many unneeded
                 * shader evaluations, only need emission if we are going to terminate */
                float probability = path_state_terminate_probability(kg, &state, throughput);
-               float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE);
 
-               if(terminate >= probability)
+               if(probability == 0.0f) {
                        break;
+               }
+               else if(probability != 1.0f) {
+                       float terminate = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_TERMINATE);
+
+                       if(terminate >= probability)
+                               break;
 
-               throughput /= probability;
+                       throughput /= probability;
+               }
 
 #ifdef __SUBSURFACE__
                /* bssrdf scatter to a different location on the same object, replacing
@@ -392,7 +399,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 
                        /* do bssrdf scatter step if we picked a bssrdf closure */
                        if(sc) {
-                               uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
+                               uint lcg_state = lcg_init(rng + rng_offset + sample*0x68bc21eb);
                                subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
                        }
                }
@@ -402,8 +409,9 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
                /* ambient occlusion */
                if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
                        /* todo: solve correlation */
-                       float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U);
-                       float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V);
+                       float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U);
+                       float bsdf_u = bsdf_uv.x;
+                       float bsdf_v = bsdf_uv.y;
 
                        float ao_factor = kernel_data.background.ao_factor;
                        float3 ao_N;
@@ -436,10 +444,15 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
                if(kernel_data.integrator.use_direct_light) {
                        /* sample illumination from lights to find path contribution */
                        if(sd.flag & SD_BSDF_HAS_EVAL) {
-                               float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT);
-                               float light_o = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_F);
-                               float light_u = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_U);
-                               float light_v = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_V);
+                               float light_t = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT);
+#ifdef __MULTI_CLOSURE__
+                               float light_o = 0.0f;
+#else
+                               float light_o = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_F);
+#endif
+                               float2 light_uv = path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_U);
+                               float light_u = light_uv.x;
+                               float light_v = light_uv.y;
 
                                Ray light_ray;
                                BsdfEval L_light;
@@ -471,8 +484,9 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
                BsdfEval bsdf_eval;
                float3 bsdf_omega_in;
                differential3 bsdf_domega_in;
-               float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U);
-               float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V);
+               float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U);
+               float bsdf_u = bsdf_uv.x;
+               float bsdf_v = bsdf_uv.y;
                int label;
 
                label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval,
@@ -524,8 +538,8 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
 
 #ifdef __NON_PROGRESSIVE__
 
-__device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer,
-       float3 throughput, float num_samples_adjust,
+__device void kernel_path_indirect(KernelGlobals *kg, RNG rng, int sample, Ray ray, __global float *buffer,
+       float3 throughput, int num_samples, int num_total_samples,
        float min_ray_pdf, float ray_pdf, PathState state, int rng_offset, PathRadiance *L)
 {
 #ifdef __LAMP_MIS__
@@ -557,7 +571,7 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
                        light_ray.dP = ray.dP;
 
                        /* intersect with lamp */
-                       float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT);
+                       float light_t = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT);
                        float3 emission;
 
                        if(indirect_lamp_emission(kg, &light_ray, state.flag, ray_pdf, light_t, &emission))
@@ -578,7 +592,7 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
                /* setup shading */
                ShaderData sd;
                shader_setup_from_ray(kg, &sd, &isect, &ray);
-               float rbsdf = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF);
+               float rbsdf = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF);
                shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_INDIRECT);
                shader_merge_closures(kg, &sd);
 
@@ -604,13 +618,19 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
                /* path termination. this is a strange place to put the termination, it's
                 * mainly due to the mixed in MIS that we use. gives too many unneeded
                 * shader evaluations, only need emission if we are going to terminate */
-               float probability = path_state_terminate_probability(kg, &state, throughput*num_samples_adjust);
-               float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE);
+               float probability = path_state_terminate_probability(kg, &state, throughput*num_samples);
 
-               if(terminate >= probability)
+               if(probability == 0.0f) {
                        break;
+               }
+               else if(probability != 1.0f) {
+                       float terminate = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_TERMINATE);
+
+                       if(terminate >= probability)
+                               break;
 
-               throughput /= probability;
+                       throughput /= probability;
+               }
 
 #ifdef __SUBSURFACE__
                /* bssrdf scatter to a different location on the same object, replacing
@@ -624,7 +644,7 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
 
                        /* do bssrdf scatter step if we picked a bssrdf closure */
                        if(sc) {
-                               uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
+                               uint lcg_state = lcg_init(rng + rng_offset + sample*0x68bc21eb);
                                subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
                        }
                }
@@ -634,8 +654,9 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
                /* ambient occlusion */
                if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
                        /* todo: solve correlation */
-                       float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U);
-                       float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V);
+                       float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U);
+                       float bsdf_u = bsdf_uv.x;
+                       float bsdf_v = bsdf_uv.y;
 
                        float ao_factor = kernel_data.background.ao_factor;
                        float3 ao_N;
@@ -668,10 +689,15 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
                if(kernel_data.integrator.use_direct_light) {
                        /* sample illumination from lights to find path contribution */
                        if(sd.flag & SD_BSDF_HAS_EVAL) {
-                               float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT);
-                               float light_o = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_F);
-                               float light_u = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_U);
-                               float light_v = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_V);
+                               float light_t = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT);
+#ifdef __MULTI_CLOSURE__
+                               float light_o = 0.0f;
+#else
+                               float light_o = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_F);
+#endif
+                               float2 light_uv = path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_U);
+                               float light_u = light_uv.x;
+                               float light_v = light_uv.y;
 
                                Ray light_ray;
                                BsdfEval L_light;
@@ -704,8 +730,9 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
                BsdfEval bsdf_eval;
                float3 bsdf_omega_in;
                differential3 bsdf_domega_in;
-               float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U);
-               float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V);
+               float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U);
+               float bsdf_u = bsdf_uv.x;
+               float bsdf_v = bsdf_uv.y;
                int label;
 
                label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval,
@@ -740,15 +767,17 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
        }
 }
 
-__device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, RNG *rng, int sample,
+__device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, RNG rng, int sample,
        ShaderData *sd, float3 throughput, float num_samples_adjust,
        float min_ray_pdf, float ray_pdf, PathState state,
        int rng_offset, PathRadiance *L, __global float *buffer)
 {
+       int aa_samples = kernel_data.integrator.aa_samples;
+
 #ifdef __AO__
        /* ambient occlusion */
        if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
-               int num_samples = ceil(kernel_data.integrator.ao_samples*num_samples_adjust);
+               int num_samples = ceil_to_int(kernel_data.integrator.ao_samples*num_samples_adjust);
                float num_samples_inv = num_samples_adjust/num_samples;
                float ao_factor = kernel_data.background.ao_factor;
                float3 ao_N;
@@ -756,8 +785,9 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
 
                for(int j = 0; j < num_samples; j++) {
                        /* todo: solve correlation */
-                       float bsdf_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_U);
-                       float bsdf_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_V);
+                       float2 bsdf_uv = path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U);
+                       float bsdf_u = bsdf_uv.x;
+                       float bsdf_v = bsdf_uv.y;
 
                        float3 ao_D;
                        float ao_pdf;
@@ -798,15 +828,17 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
 
                /* lamp sampling */
                for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
-                       int num_samples = ceil(num_samples_adjust*light_select_num_samples(kg, i));
+                       int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
                        float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
+                       RNG lamp_rng = cmj_hash(rng, i);
 
                        if(kernel_data.integrator.pdf_triangles != 0.0f)
                                num_samples_inv *= 0.5f;
 
                        for(int j = 0; j < num_samples; j++) {
-                               float light_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_U);
-                               float light_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_V);
+                               float2 light_uv = path_rng_2D(kg, lamp_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U);
+                               float light_u = light_uv.x;
+                               float light_v = light_uv.y;
 
                                if(direct_emission(kg, sd, i, 0.0f, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp)) {
                                        /* trace shadow ray */
@@ -822,16 +854,17 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
 
                /* mesh light sampling */
                if(kernel_data.integrator.pdf_triangles != 0.0f) {
-                       int num_samples = ceil(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
+                       int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
                        float num_samples_inv = num_samples_adjust/num_samples;
 
                        if(kernel_data.integrator.num_all_lights)
                                num_samples_inv *= 0.5f;
 
                        for(int j = 0; j < num_samples; j++) {
-                               float light_t = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT);
-                               float light_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_U);
-                               float light_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_V);
+                               float light_t = path_rng_1D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT);
+                               float2 light_uv = path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U);
+                               float light_u = light_uv.x;
+                               float light_v = light_uv.y;
 
                                /* only sample triangle lights */
                                if(kernel_data.integrator.num_all_lights)
@@ -869,9 +902,10 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
                else
                        num_samples = kernel_data.integrator.transmission_samples;
 
-               num_samples = ceil(num_samples_adjust*num_samples);
+               num_samples = ceil_to_int(num_samples_adjust*num_samples);
 
                float num_samples_inv = num_samples_adjust/num_samples;
+               RNG bsdf_rng = cmj_hash(rng, i);
 
                for(int j = 0; j < num_samples; j++) {
                        /* sample BSDF */
@@ -879,8 +913,9 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
                        BsdfEval bsdf_eval;
                        float3 bsdf_omega_in;
                        differential3 bsdf_domega_in;
-                       float bsdf_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_U);
-                       float bsdf_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_V);
+                       float2 bsdf_uv = path_rng_2D(kg, bsdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U);
+                       float bsdf_u = bsdf_uv.x;
+                       float bsdf_v = bsdf_uv.y;
                        int label;
 
                        label = shader_bsdf_sample_closure(kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval,
@@ -918,7 +953,7 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
 #endif
 
                        kernel_path_indirect(kg, rng, sample*num_samples + j, bsdf_ray, buffer,
-                               tp*num_samples_inv, num_samples,
+                               tp*num_samples_inv, num_samples, aa_samples*num_samples,
                                min_ray_pdf, bsdf_pdf, ps, rng_offset+PRNG_BOUNCE_NUM, L);
 
                        /* for render passes, sum and reset indirect light pass variables
@@ -929,7 +964,7 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R
        }
 }
 
-__device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer)
+__device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG rng, int sample, Ray ray, __global float *buffer)
 {
        /* initialize */
        PathRadiance L;
@@ -941,6 +976,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
        float ray_pdf = 0.0f;
        PathState state;
        int rng_offset = PRNG_BASE_NUM;
+       int aa_samples = kernel_data.integrator.aa_samples;
 
        path_state_init(&state);
 
@@ -961,7 +997,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
                        }
 
                        extmax = kernel_data.curve_kernel_data.maximum_width;
-                       lcg_state = lcg_init(*rng + rng_offset + sample*0x51633e2d);
+                       lcg_state = lcg_init(rng + rng_offset + sample*0x51633e2d);
                }
 
                if(!scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax)) {
@@ -990,8 +1026,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
                /* setup shading */
                ShaderData sd;
                shader_setup_from_ray(kg, &sd, &isect, &ray);
-               float rbsdf = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF);
-               shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
+               shader_eval_surface(kg, &sd, 0.0f, state.flag, SHADER_CONTEXT_MAIN);
                shader_merge_closures(kg, &sd);
 
                /* holdout */
@@ -1031,12 +1066,18 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
                         * mainly due to the mixed in MIS that we use. gives too many unneeded
                         * shader evaluations, only need emission if we are going to terminate */
                        float probability = path_state_terminate_probability(kg, &state, throughput);
-                       float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE);
 
-                       if(terminate >= probability)
+                       if(probability == 0.0f) {
                                break;
+                       }
+                       else if(probability != 1.0f) {
+                               float terminate = path_rng_1D(kg, rng, sample, aa_samples, rng_offset + PRNG_TERMINATE);
 
-                       throughput /= probability;
+                               if(terminate >= probability)
+                                       break;
+
+                               throughput /= probability;
+                       }
                }
 
 #ifdef __SUBSURFACE__
@@ -1049,7 +1090,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
                                        continue;
 
                                /* set up random number generator */
-                               uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
+                               uint lcg_state = lcg_init(rng + rng_offset + sample*0x68bc21eb);
                                int num_samples = kernel_data.integrator.subsurface_samples;
                                float num_samples_inv = 1.0f/num_samples;
 
@@ -1112,22 +1153,26 @@ __device void kernel_path_trace(KernelGlobals *kg,
 
        float filter_u;
        float filter_v;
+       int num_samples = kernel_data.integrator.aa_samples;
 
-       path_rng_init(kg, rng_state, sample, &rng, x, y, &filter_u, &filter_v);
+       path_rng_init(kg, rng_state, sample, num_samples, &rng, x, y, &filter_u, &filter_v);
 
        /* sample camera ray */
        Ray ray;
-       
+
        float lens_u = 0.0f, lens_v = 0.0f;
-       float time = 0.0f;
-       
+
        if(kernel_data.cam.aperturesize > 0.0f) {
-               lens_u = path_rng(kg, &rng, sample, PRNG_LENS_U);
-               lens_v = path_rng(kg, &rng, sample, PRNG_LENS_V);
+               float2 lens_uv = path_rng_2D(kg, rng, sample, num_samples, PRNG_LENS_U);
+               lens_u = lens_uv.x;
+               lens_v = lens_uv.y;
        }
+
+       float time = 0.0f;
+
 #ifdef __CAMERA_MOTION__
        if(kernel_data.cam.shuttertime != -1.0f)
-               time = path_rng(kg, &rng, sample, PRNG_TIME);
+               time = path_rng_1D(kg, rng, sample, num_samples, PRNG_TIME);
 #endif
 
        camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, &ray);
@@ -1139,10 +1184,10 @@ __device void kernel_path_trace(KernelGlobals *kg,
 #ifdef __NON_PROGRESSIVE__
                if(kernel_data.integrator.progressive)
 #endif
-                       L = kernel_path_progressive(kg, &rng, sample, ray, buffer);
+                       L = kernel_path_progressive(kg, rng, sample, ray, buffer);
 #ifdef __NON_PROGRESSIVE__
                else
-                       L = kernel_path_non_progressive(kg, &rng, sample, ray, buffer);
+                       L = kernel_path_non_progressive(kg, rng, sample, ray, buffer);
 #endif
        }
        else
index fc33e226051e12a842685b1399a72dc096e8b062..ecf80b817d476de92fd65d6b20b43eb8336586bd 100644 (file)
@@ -16,6 +16,8 @@
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  */
 
+#include "kernel_jitter.h"
+
 CCL_NAMESPACE_BEGIN
 
 typedef uint RNG;
@@ -100,10 +102,10 @@ __device uint sobol_lookup(const uint m, const uint frame, const uint ex, const
        return index;
 }
 
-__device_inline float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dimension)
+__device_inline float path_rng(KernelGlobals *kg, RNG rng, int sample, int dimension)
 {
 #ifdef __SOBOL_FULL_SCREEN__
-       uint result = sobol_dimension(kg, *rng, dimension);
+       uint result = sobol_dimension(kg, rng, dimension);
        float r = (float)result * (1.0f/(float)0xFFFFFFFF);
        return r;
 #else
@@ -115,15 +117,44 @@ __device_inline float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dime
        float shift;
 
        if(dimension & 1)
-               shift = (*rng >> 16)/((float)0xFFFF);
+               shift = (rng >> 16)*(1.0f/(float)0xFFFF);
        else
-               shift = (*rng & 0xFFFF)/((float)0xFFFF);
+               shift = (rng & 0xFFFF)*(1.0f/(float)0xFFFF);
 
        return r + shift - floorf(r + shift);
 #endif
 }
 
-__device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, float *fx, float *fy)
+__device_inline float path_rng_1D(KernelGlobals *kg, RNG rng, int sample, int num_samples, int dimension)
+{
+#ifdef __CMJ__
+       if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
+               /* correlated multi-jittered */
+               int p = rng + dimension;
+               return cmj_sample_1D(sample, num_samples, p);
+       }
+#endif
+
+       /* sobol */
+       return path_rng(kg, rng, sample, dimension);
+}
+
+__device_inline float2 path_rng_2D(KernelGlobals *kg, RNG rng, int sample, int num_samples, int dimension)
+{
+#ifdef __CMJ__
+       if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
+               /* correlated multi-jittered */
+               int p = rng + dimension;
+               return cmj_sample_2D(sample, num_samples, p);
+       }
+#endif
+
+       /* sobol */
+       return make_float2(path_rng(kg, rng, sample, dimension),
+                          path_rng(kg, rng, sample, dimension + 1));
+}
+
+__device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, int num_samples, RNG *rng, int x, int y, float *fx, float *fy)
 {
 #ifdef __SOBOL_FULL_SCREEN__
        uint px, py;
@@ -153,8 +184,10 @@ __device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state,
                *fy = 0.5f;
        }
        else {
-               *fx = path_rng(kg, rng, sample, PRNG_FILTER_U);
-               *fy = path_rng(kg, rng, sample, PRNG_FILTER_V);
+               float2 fxy = path_rng_2D(kg, *rng, sample, num_samples, PRNG_FILTER_U);
+
+               *fx = fxy.x;
+               *fy = fxy.y;
        }
 #endif
 }
@@ -168,14 +201,25 @@ __device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng)
 
 /* Linear Congruential Generator */
 
-__device float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dimension)
+__device float path_rng(KernelGlobals *kg, RNGrng, int sample, int dimension)
 {
        /* implicit mod 2^32 */
-       *rng = (1103515245*(*rng) + 12345);
-       return (float)*rng * (1.0f/(float)0xFFFFFFFF);
+       rng = (1103515245*(rng) + 12345);
+       return (float)rng * (1.0f/(float)0xFFFFFFFF);
 }
 
-__device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, float *fx, float *fy)
+__device_inline float path_rng_1D(KernelGlobals *kg, RNG& rng, int sample, int num_samples, int dimension)
+{
+       return path_rng(kg, rng, sample, dimension);
+}
+
+__device_inline float2 path_rng_2D(KernelGlobals *kg, RNG& rng, int sample, int num_samples, int dimension)
+{
+       return make_float2(path_rng(kg, rng, sample, dimension),
+                          path_rng(kg, rng, sample, dimension + 1));
+}
+
+__device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, int num_samples, RNG *rng, int x, int y, float *fx, float *fy)
 {
        /* load state */
        *rng = *rng_state;
@@ -187,8 +231,10 @@ __device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sam
                *fy = 0.5f;
        }
        else {
-               *fx = path_rng(kg, rng, sample, PRNG_FILTER_U);
-               *fy = path_rng(kg, rng, sample, PRNG_FILTER_V);
+               float2 fxy = path_rng_2D(kg, rng, sample, num_samples, PRNG_FILTER_U);
+
+               *fx = fxy.x;
+               *fy = fxy.y;
        }
 }
 
index f165a1f3839a73573f97a3d66754459910888f3e..abdb609b55fdbb05a5f62f477a67411b3d5345f3 100644 (file)
@@ -117,6 +117,7 @@ CCL_NAMESPACE_BEGIN
 #define __CAMERA_CLIPPING__
 #define __INTERSECTION_REFINE__
 #define __CLAMP_SAMPLE__
+#define __CMJ__
 
 #ifdef __KERNEL_SHADING__
 #define __SVM__
@@ -164,8 +165,10 @@ enum PathTraceDimension {
        PRNG_LENS_V = 3,
 #ifdef __CAMERA_MOTION__
        PRNG_TIME = 4,
-       PRNG_UNUSED = 5,
-       PRNG_BASE_NUM = 6,
+       PRNG_UNUSED_0 = 5,
+       PRNG_UNUSED_1 = 6,      /* for some reason (6, 7) is a bad sobol pattern */
+       PRNG_UNUSED_2 = 7,  /* with a low number of samples (< 64) */
+       PRNG_BASE_NUM = 8,
 #else
        PRNG_BASE_NUM = 4,
 #endif
@@ -181,6 +184,11 @@ enum PathTraceDimension {
        PRNG_BOUNCE_NUM = 8
 };
 
+enum SamplingPattern {
+       SAMPLING_PATTERN_SOBOL = 0,
+       SAMPLING_PATTERN_CMJ = 1
+};
+
 /* these flags values correspond to raytypes in osl.cpp, so keep them in sync!
  *
  * for ray visibility tests in BVH traversal, the upper 20 bits are used for
@@ -728,6 +736,7 @@ typedef struct KernelIntegrator {
 
        /* non-progressive */
        int progressive;
+       int aa_samples;
        int diffuse_samples;
        int glossy_samples;
        int transmission_samples;
@@ -736,7 +745,11 @@ typedef struct KernelIntegrator {
        int use_lamp_mis;
        int subsurface_samples;
 
-       int pad1, pad2, pad3;
+       /* sampler */
+       int sampling_pattern;
+
+       /* padding */
+       int pad;
 } KernelIntegrator;
 
 typedef struct KernelBVH {
index 731ffd9e271f6031280d2fc15ac021f72d20aa7a..cc369e7abc95e8b30469ad70699abb13859cee73 100644 (file)
@@ -49,6 +49,7 @@ Integrator::Integrator()
        sample_clamp = 0.0f;
        motion_blur = false;
 
+       aa_samples = 0;
        diffuse_samples = 1;
        glossy_samples = 1;
        transmission_samples = 1;
@@ -57,6 +58,8 @@ Integrator::Integrator()
        subsurface_samples = 1;
        progressive = true;
 
+       sampling_pattern = SAMPLING_PATTERN_SOBOL;
+
        need_update = true;
 }
 
@@ -104,6 +107,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
        kintegrator->sample_clamp = (sample_clamp == 0.0f)? FLT_MAX: sample_clamp*3.0f;
 
        kintegrator->progressive = progressive;
+       kintegrator->aa_samples = aa_samples;
        kintegrator->diffuse_samples = diffuse_samples;
        kintegrator->glossy_samples = glossy_samples;
        kintegrator->transmission_samples = transmission_samples;
@@ -111,6 +115,8 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
        kintegrator->mesh_light_samples = mesh_light_samples;
        kintegrator->subsurface_samples = subsurface_samples;
 
+       kintegrator->sampling_pattern = sampling_pattern;
+
        /* sobol directions table */
        int max_samples = 1;
 
@@ -160,6 +166,7 @@ bool Integrator::modified(const Integrator& integrator)
                seed == integrator.seed &&
                sample_clamp == integrator.sample_clamp &&
                progressive == integrator.progressive &&
+               aa_samples == integrator.aa_samples &&
                diffuse_samples == integrator.diffuse_samples &&
                glossy_samples == integrator.glossy_samples &&
                transmission_samples == integrator.transmission_samples &&
index 9867e310d4d170c46e6fd6580ac8583a84568277..fff24b506fb23f8b3ce7f7fabb79d0e038432fe9 100644 (file)
@@ -19,6 +19,8 @@
 #ifndef __INTEGRATOR_H__
 #define __INTEGRATOR_H__
 
+#include "kernel_types.h"
+
 CCL_NAMESPACE_BEGIN
 
 class Device;
@@ -49,6 +51,7 @@ public:
        float sample_clamp;
        bool motion_blur;
 
+       int aa_samples;
        int diffuse_samples;
        int glossy_samples;
        int transmission_samples;
@@ -58,6 +61,8 @@ public:
 
        bool progressive;
 
+       SamplingPattern sampling_pattern;
+
        bool need_update;
 
        Integrator();
index bc847d5719c6f5c2d4d665c3e55770bfb077c7b6..44364418dcf6d0c26f0807e51f3ec99c51d18a05 100644 (file)
@@ -22,6 +22,7 @@
 #include "buffers.h"
 #include "camera.h"
 #include "device.h"
+#include "integrator.h"
 #include "scene.h"
 #include "session.h"
 
@@ -728,6 +729,18 @@ void Session::update_scene()
                cam->tag_update();
        }
 
+       /* number of samples is needed by multi jittered sampling pattern */
+       Integrator *integrator = scene->integrator;
+
+       if(integrator->sampling_pattern == SAMPLING_PATTERN_CMJ) {
+               int aa_samples = tile_manager.num_samples;
+
+               if(aa_samples != integrator->aa_samples) {
+                       integrator->aa_samples = aa_samples;
+                       integrator->tag_update(scene);
+               }
+       }
+
        /* update scene */
        if(scene->need_update()) {
                progress.set_status("Updating Scene");
index fe1cb61ffa9597740edcc4147dfd3e2ba823b325..0bc67f0618a4d1f498c061bb76b879bc06a325e3 100644 (file)
@@ -59,6 +59,8 @@
 
 /* SIMD Types */
 
+#ifndef __KERNEL_GPU__
+
 /* not enabled, globally applying it just gives slowdown,
  * but useful for testing. */
 //#define __KERNEL_SSE__
 #endif
 
 #ifndef _WIN32
-#ifndef __KERNEL_GPU__
 
 #include <stdint.h>
 
 #endif
+
 #endif
 
 CCL_NAMESPACE_BEGIN