Merge branch 'master' into blender2.8
authorCampbell Barton <ideasman42@gmail.com>
Mon, 14 Aug 2017 02:13:55 +0000 (12:13 +1000)
committerCampbell Barton <ideasman42@gmail.com>
Mon, 14 Aug 2017 02:13:55 +0000 (12:13 +1000)
18 files changed:
build_files/build_environment/cmake/options.cmake
build_files/build_environment/windows/build_deps.cmd
intern/cycles/kernel/CMakeLists.txt
intern/cycles/kernel/kernel_accumulate.h
intern/cycles/kernel/kernel_debug.h [deleted file]
intern/cycles/kernel/kernel_passes.h
intern/cycles/kernel/kernel_path.h
intern/cycles/kernel/kernel_path_branched.h
intern/cycles/kernel/kernel_random.h
intern/cycles/kernel/kernel_types.h
intern/cycles/kernel/split/kernel_buffer_update.h
intern/cycles/kernel/split/kernel_data_init.h
intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
intern/cycles/kernel/split/kernel_indirect_background.h
intern/cycles/kernel/split/kernel_path_init.h
intern/cycles/kernel/split/kernel_scene_intersect.h
intern/cycles/kernel/split/kernel_split_data_types.h
intern/cycles/util/util_types.h

index 16d79d463f6ac7bb4f82be1a3487e470d0313857..5618fc1255dcee998c9ed4f8ce3a0893acd6a349 100644 (file)
@@ -33,7 +33,8 @@ ELSE(BUILD_MODE STREQUAL "Debug")
        set(LIBDIR ${CMAKE_CURRENT_BINARY_DIR}/Release)
 ENDIF(BUILD_MODE STREQUAL "Debug")
 
-set(DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/downloads)
+option(DOWNLOAD_DIR "Path for downloaded files" ${CMAKE_CURRENT_SOURCE_DIR}/downloads)
+file(TO_CMAKE_PATH ${DOWNLOAD_DIR} DOWNLOAD_DIR)
 set(PATCH_DIR ${CMAKE_CURRENT_SOURCE_DIR}/patches)
 set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/build)
 
index 3e458816a5a205eb1edf492c4ac948a98248451a..a18eb085e4fc2d78171c92233f01a764a33d145a 100644 (file)
@@ -54,7 +54,7 @@ set CMAKE_DEBUG_OPTIONS=-DWITH_OPTIMIZED_DEBUG=On
 if "%3" == "debug" set CMAKE_DEBUG_OPTIONS=-DWITH_OPTIMIZED_DEBUG=Off
 
 set SOURCE_DIR=%~dp0\..
-set BUILD_DIR=%~dp0\..\..\..\..\build_windows\deps
+set BUILD_DIR=%cd%\build
 set HARVEST_DIR=%BUILD_DIR%\output
 set STAGING=%BUILD_DIR%\S
 
@@ -62,7 +62,7 @@ rem for python module build
 set MSSdk=1 
 set DISTUTILS_USE_SDK=1  
 rem for python externals source to be shared between the various archs and compilers
-mkdir %SOURCE_DIR%\downloads\externals
+mkdir %BUILD_DIR%\downloads\externals
 
 REM Detect MSVC Installation
 if DEFINED VisualStudioVersion goto msvc_detect_finally
@@ -95,11 +95,11 @@ if %ERRORLEVEL% NEQ 0 (
 )
 
 set StatusFile=%BUILD_DIR%\%1_%2.log
-set path=%SOURCE_DIR%\downloads\mingw\mingw64\msys\1.0\bin\;%SOURCE_DIR%\downloads\nasm-2.12.01\;%path%
+set path=%BUILD_DIR%\downloads\mingw\mingw64\msys\1.0\bin\;%BUILD_DIR%\downloads\nasm-2.12.01\;%path%
 mkdir %STAGING%\%BuildDir%%ARCH%R
 cd %Staging%\%BuildDir%%ARCH%R
 echo %DATE% %TIME% : Start > %StatusFile%
-cmake -G "%CMAKE_BUILDER%" %SOURCE_DIR% -DBUILD_MODE=Release -DHARVEST_TARGET=%HARVEST_DIR%/%HARVESTROOT%%VSVER_SHORT%/
+cmake -G "%CMAKE_BUILDER%" %SOURCE_DIR% -DDOWNLOAD_DIR=%BUILD_DIR%/downloads -DBUILD_MODE=Release -DHARVEST_TARGET=%HARVEST_DIR%/%HARVESTROOT%%VSVER_SHORT%/
 echo %DATE% %TIME% : Release Configuration done >> %StatusFile%
 msbuild /m "ll.vcxproj" /p:Configuration=Release /fl /flp:logfile=BlenderDeps_llvm.log 
 msbuild /m "BlenderDependencies.sln" /p:Configuration=Release /fl /flp:logfile=BlenderDeps.log 
@@ -109,7 +109,7 @@ echo %DATE% %TIME% : Release Harvest done >> %StatusFile%
 cd %BUILD_DIR%
 mkdir %STAGING%\%BuildDir%%ARCH%D
 cd %Staging%\%BuildDir%%ARCH%D
-cmake -G "%CMAKE_BUILDER%" %SOURCE_DIR% -DCMAKE_BUILD_TYPE=Debug -DBUILD_MODE=Debug -DHARVEST_TARGET=%HARVEST_DIR%/%HARVESTROOT%%VSVER_SHORT%/  %CMAKE_DEBUG_OPTIONS%
+cmake -G "%CMAKE_BUILDER%" %SOURCE_DIR% -DDOWNLOAD_DIR=%BUILD_DIR%/downloads -DCMAKE_BUILD_TYPE=Debug -DBUILD_MODE=Debug -DHARVEST_TARGET=%HARVEST_DIR%/%HARVESTROOT%%VSVER_SHORT%/  %CMAKE_DEBUG_OPTIONS%
 echo %DATE% %TIME% : Debug Configuration done >> %StatusFile%
 msbuild /m "ll.vcxproj" /p:Configuration=Debug /fl /flp:logfile=BlenderDeps_llvm.log 
 msbuild /m "BlenderDependencies.sln" /p:Configuration=Debug /fl /flp:logfile=BlenderDeps.log
index 9fe61515570e4324a18ff6847585ce03b705e495..b4ca16bdb48939bfde31a93f18c37273053873c2 100644 (file)
@@ -79,7 +79,6 @@ set(SRC_HEADERS
        kernel_compat_cpu.h
        kernel_compat_cuda.h
        kernel_compat_opencl.h
-       kernel_debug.h
        kernel_differential.h
        kernel_emission.h
        kernel_film.h
index d139e28b0132af4041b74728b9df3a64e51b591a..82d3c153bf5e0cc97267ec1146bfc6a1b527333e 100644 (file)
@@ -208,6 +208,7 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass)
                L->path_subsurface = make_float3(0.0f, 0.0f, 0.0f);
                L->path_scatter = make_float3(0.0f, 0.0f, 0.0f);
 
+               L->transparent = 0.0f;
                L->emission = make_float3(0.0f, 0.0f, 0.0f);
                L->background = make_float3(0.0f, 0.0f, 0.0f);
                L->ao = make_float3(0.0f, 0.0f, 0.0f);
@@ -217,6 +218,7 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass)
        else
 #endif
        {
+               L->transparent = 0.0f;
                L->emission = make_float3(0.0f, 0.0f, 0.0f);
        }
 
@@ -233,7 +235,14 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass)
        L->denoising_normal = make_float3(0.0f, 0.0f, 0.0f);
        L->denoising_albedo = make_float3(0.0f, 0.0f, 0.0f);
        L->denoising_depth = 0.0f;
-#endif  /* __DENOISING_FEATURES__ */
+#endif
+
+#ifdef __KERNEL_DEBUG__
+       L->debug_data.num_bvh_traversed_nodes = 0;
+       L->debug_data.num_bvh_traversed_instances = 0;
+       L->debug_data.num_bvh_intersections = 0;
+       L->debug_data.num_ray_bounces = 0;
+#endif
 }
 
 ccl_device_inline void path_radiance_bsdf_bounce(PathRadiance *L, ccl_addr_space float3 *throughput,
diff --git a/intern/cycles/kernel/kernel_debug.h b/intern/cycles/kernel/kernel_debug.h
deleted file mode 100644 (file)
index 5647bba..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright 2011-2014 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_inline void debug_data_init(DebugData *debug_data)
-{
-       debug_data->num_bvh_traversed_nodes = 0;
-       debug_data->num_bvh_traversed_instances = 0;
-       debug_data->num_bvh_intersections = 0;
-       debug_data->num_ray_bounces = 0;
-}
-
-ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
-                                                 ccl_global float *buffer,
-                                                 ccl_addr_space PathState *state,
-                                                 DebugData *debug_data,
-                                                 int sample)
-{
-       int flag = kernel_data.film.pass_flag;
-       if(flag & PASS_BVH_TRAVERSED_NODES) {
-               kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes,
-                                       sample,
-                                       debug_data->num_bvh_traversed_nodes);
-       }
-       if(flag & PASS_BVH_TRAVERSED_INSTANCES) {
-               kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances,
-                                       sample,
-                                       debug_data->num_bvh_traversed_instances);
-       }
-       if(flag & PASS_BVH_INTERSECTIONS) {
-               kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections,
-                                       sample,
-                                       debug_data->num_bvh_intersections);
-       }
-       if(flag & PASS_RAY_BOUNCES) {
-               kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces,
-                                       sample,
-                                       debug_data->num_ray_bounces);
-       }
-}
-
-CCL_NAMESPACE_END
index 9cd7ffb181d3920244e9c8f9705c6af881628551..de65e8ef27b979b87bf77709adb04139cf58df4b 100644 (file)
@@ -194,6 +194,36 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
 #endif  /* __DENOISING_FEATURES__ */
 }
 
+#ifdef __KERNEL_DEBUG__
+ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
+                                                 ccl_global float *buffer,
+                                                 PathRadiance *L,
+                                                 int sample)
+{
+       int flag = kernel_data.film.pass_flag;
+       if(flag & PASS_BVH_TRAVERSED_NODES) {
+               kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes,
+                                       sample,
+                                       L->debug_data.num_bvh_traversed_nodes);
+       }
+       if(flag & PASS_BVH_TRAVERSED_INSTANCES) {
+               kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances,
+                                       sample,
+                                       L->debug_data.num_bvh_traversed_instances);
+       }
+       if(flag & PASS_BVH_INTERSECTIONS) {
+               kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections,
+                                       sample,
+                                       L->debug_data.num_bvh_intersections);
+       }
+       if(flag & PASS_RAY_BOUNCES) {
+               kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces,
+                                       sample,
+                                       L->debug_data.num_ray_bounces);
+       }
+}
+#endif /* __KERNEL_DEBUG__ */
+
 ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L,
        ShaderData *sd, int sample, ccl_addr_space PathState *state, float3 throughput)
 {
@@ -334,10 +364,12 @@ ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, ccl_global f
 }
 
 ccl_device_inline void kernel_write_result(KernelGlobals *kg, ccl_global float *buffer,
-       int sample, PathRadiance *L, float alpha, bool is_shadow_catcher)
+       int sample, PathRadiance *L, bool is_shadow_catcher)
 {
        if(L) {
                float3 L_sum;
+               float alpha = 1.0f - L->transparent;
+
 #ifdef __SHADOW_TRICKS__
                if(is_shadow_catcher) {
                        L_sum = path_radiance_sum_shadowcatcher(kg, L, &alpha);
@@ -389,6 +421,11 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg, ccl_global float *
                                                         sample, L->denoising_depth);
                }
 #endif  /* __DENOISING_FEATURES__ */
+
+
+#ifdef __KERNEL_DEBUG__
+               kernel_write_debug_passes(kg, buffer, L, sample);
+#endif
        }
        else {
                kernel_write_pass_float4(buffer, sample, make_float4(0.0f, 0.0f, 0.0f, 0.0f));
index 2a801597649c56c674ed1104753037c072ee9644..c454228eab52fa1733c401669a97bf3e56405768 100644 (file)
 #include "kernel/kernel_path_volume.h"
 #include "kernel/kernel_path_subsurface.h"
 
-#ifdef __KERNEL_DEBUG__
-#  include "kernel/kernel_debug.h"
-#endif
-
 CCL_NAMESPACE_BEGIN
 
 ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
@@ -436,17 +432,16 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 
 #endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */
 
-ccl_device_inline float kernel_path_integrate(KernelGlobals *kg,
-                                              RNG *rng,
-                                              int sample,
-                                              Ray ray,
-                                              ccl_global float *buffer,
-                                              PathRadiance *L,
-                                              bool *is_shadow_catcher)
+ccl_device_inline void kernel_path_integrate(KernelGlobals *kg,
+                                             RNG *rng,
+                                             int sample,
+                                             Ray ray,
+                                             ccl_global float *buffer,
+                                             PathRadiance *L,
+                                             bool *is_shadow_catcher)
 {
        /* initialize */
        float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
-       float L_transparent = 0.0f;
 
        path_radiance_init(L, kernel_data.film.use_light_pass);
 
@@ -458,11 +453,6 @@ ccl_device_inline float kernel_path_integrate(KernelGlobals *kg,
        PathState state;
        path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
 
-#ifdef __KERNEL_DEBUG__
-       DebugData debug_data;
-       debug_data_init(&debug_data);
-#endif  /* __KERNEL_DEBUG__ */
-
 #ifdef __SUBSURFACE__
        SubsurfaceIndirectRays ss_indirect;
        kernel_path_subsurface_init_indirect(&ss_indirect);
@@ -503,11 +493,11 @@ ccl_device_inline float kernel_path_integrate(KernelGlobals *kg,
 
 #ifdef __KERNEL_DEBUG__
                if(state.flag & PATH_RAY_CAMERA) {
-                       debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes;
-                       debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
-                       debug_data.num_bvh_intersections += isect.num_intersections;
+                       L->debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes;
+                       L->debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
+                       L->debug_data.num_bvh_intersections += isect.num_intersections;
                }
-               debug_data.num_ray_bounces++;
+               L->debug_data.num_ray_bounces++;
 #endif  /* __KERNEL_DEBUG__ */
 
 #ifdef __LAMP_MIS__
@@ -622,7 +612,7 @@ ccl_device_inline float kernel_path_integrate(KernelGlobals *kg,
                if(!hit) {
                        /* eval background shader if nothing hit */
                        if(kernel_data.background.transparent && (state.flag & PATH_RAY_CAMERA)) {
-                               L_transparent += average(throughput);
+                               L->transparent += average(throughput);
 
 #ifdef __PASSES__
                                if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
@@ -682,7 +672,7 @@ ccl_device_inline float kernel_path_integrate(KernelGlobals *kg,
                                        holdout_weight = shader_holdout_eval(kg, &sd);
                                }
                                /* any throughput is ok, should all be identical here */
-                               L_transparent += average(holdout_weight*throughput);
+                               L->transparent += average(holdout_weight*throughput);
                        }
 
                        if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) {
@@ -789,12 +779,6 @@ ccl_device_inline float kernel_path_integrate(KernelGlobals *kg,
 #ifdef __SHADOW_TRICKS__
        *is_shadow_catcher = (state.flag & PATH_RAY_SHADOW_CATCHER) != 0;
 #endif  /* __SHADOW_TRICKS__ */
-
-#ifdef __KERNEL_DEBUG__
-       kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
-#endif  /* __KERNEL_DEBUG__ */
-
-       return 1.0f - L_transparent;
 }
 
 ccl_device void kernel_path_trace(KernelGlobals *kg,
@@ -819,14 +803,12 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
        bool is_shadow_catcher;
 
        if(ray.t != 0.0f) {
-               float alpha = kernel_path_integrate(kg, &rng, sample, ray, buffer, &L, &is_shadow_catcher);
-               kernel_write_result(kg, buffer, sample, &L, alpha, is_shadow_catcher);
+               kernel_path_integrate(kg, &rng, sample, ray, buffer, &L, &is_shadow_catcher);
+               kernel_write_result(kg, buffer, sample, &L, is_shadow_catcher);
        }
        else {
-               kernel_write_result(kg, buffer, sample, NULL, 0.0f, false);
+               kernel_write_result(kg, buffer, sample, NULL, false);
        }
-
-       path_rng_end(kg, rng_state, rng);
 }
 
 #endif  /* __SPLIT_KERNEL__ */
index 17facfa9a7837d9176191ec4e9d9036866292f20..abc291bc7e3a7d51ae2d69f63edcca6e4ce32d5d 100644 (file)
@@ -269,17 +269,16 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
 }
 #endif  /* __SUBSURFACE__ */
 
-ccl_device float kernel_branched_path_integrate(KernelGlobals *kg,
-                                                RNG *rng,
-                                                int sample,
-                                                Ray ray,
-                                                ccl_global float *buffer,
-                                                PathRadiance *L,
-                                                bool *is_shadow_catcher)
+ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
+                                               RNG *rng,
+                                               int sample,
+                                               Ray ray,
+                                               ccl_global float *buffer,
+                                               PathRadiance *L,
+                                               bool *is_shadow_catcher)
 {
        /* initialize */
        float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
-       float L_transparent = 0.0f;
 
        path_radiance_init(L, kernel_data.film.use_light_pass);
 
@@ -291,11 +290,6 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg,
        PathState state;
        path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
 
-#ifdef __KERNEL_DEBUG__
-       DebugData debug_data;
-       debug_data_init(&debug_data);
-#endif  /* __KERNEL_DEBUG__ */
-
        /* Main Loop
         * Here we only handle transparency intersections from the camera ray.
         * Indirect bounces are handled in kernel_branched_path_surface_indirect_light().
@@ -326,10 +320,10 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg,
 #endif  /* __HAIR__ */
 
 #ifdef __KERNEL_DEBUG__
-               debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes;
-               debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
-               debug_data.num_bvh_intersections += isect.num_intersections;
-               debug_data.num_ray_bounces++;
+               L->debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes;
+               L->debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
+               L->debug_data.num_bvh_intersections += isect.num_intersections;
+               L->debug_data.num_ray_bounces++;
 #endif  /* __KERNEL_DEBUG__ */
 
 #ifdef __VOLUME__
@@ -482,7 +476,7 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg,
                if(!hit) {
                        /* eval background shader if nothing hit */
                        if(kernel_data.background.transparent) {
-                               L_transparent += average(throughput);
+                               L->transparent += average(throughput);
 
 #ifdef __PASSES__
                                if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
@@ -534,7 +528,7 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg,
                                        holdout_weight = shader_holdout_eval(kg, &sd);
                                }
                                /* any throughput is ok, should all be identical here */
-                               L_transparent += average(holdout_weight*throughput);
+                               L->transparent += average(holdout_weight*throughput);
                        }
                        if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) {
                                break;
@@ -637,12 +631,6 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg,
 #ifdef __SHADOW_TRICKS__
        *is_shadow_catcher = (state.flag & PATH_RAY_SHADOW_CATCHER) != 0;
 #endif  /* __SHADOW_TRICKS__ */
-
-#ifdef __KERNEL_DEBUG__
-       kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
-#endif  /* __KERNEL_DEBUG__ */
-
-       return 1.0f - L_transparent;
 }
 
 ccl_device void kernel_branched_path_trace(KernelGlobals *kg,
@@ -667,14 +655,12 @@ ccl_device void kernel_branched_path_trace(KernelGlobals *kg,
        bool is_shadow_catcher;
 
        if(ray.t != 0.0f) {
-               float alpha = kernel_branched_path_integrate(kg, &rng, sample, ray, buffer, &L, &is_shadow_catcher);
-               kernel_write_result(kg, buffer, sample, &L, alpha, is_shadow_catcher);
+               kernel_branched_path_integrate(kg, &rng, sample, ray, buffer, &L, &is_shadow_catcher);
+               kernel_write_result(kg, buffer, sample, &L, is_shadow_catcher);
        }
        else {
-               kernel_write_result(kg, buffer, sample, NULL, 0.0f, false);
+               kernel_write_result(kg, buffer, sample, NULL, false);
        }
-
-       path_rng_end(kg, rng_state, rng);
 }
 
 #endif  /* __SPLIT_KERNEL__ */
index e8a912ccc0b38a00e8db89f3c5f2be4f369b4261..073011ace31570f6fd79ca5fe07d3b6fdc4631ae 100644 (file)
 
 CCL_NAMESPACE_BEGIN
 
+/* Pseudo random numbers, uncomment this for debugging correlations. Only run
+ * this single threaded on a CPU for repeatable resutls. */
+//#define __DEBUG_CORRELATION__
+
+
+/* High Dimensional Sobol.
+ *
+ * Multidimensional sobol with generator matrices. Dimension 0 and 1 are equal
+ * to classic Van der Corput and Sobol sequences. */
+
 #ifdef __SOBOL__
 
 /* Skip initial numbers that are not as well distributed, especially the
@@ -26,47 +36,6 @@ CCL_NAMESPACE_BEGIN
  */
 #define SOBOL_SKIP 64
 
-/* High Dimensional Sobol. */
-
-/* Van der Corput radical inverse. */
-ccl_device uint van_der_corput(uint bits)
-{
-       bits = (bits << 16) | (bits >> 16);
-       bits = ((bits & 0x00ff00ff) << 8) | ((bits & 0xff00ff00) >> 8);
-       bits = ((bits & 0x0f0f0f0f) << 4) | ((bits & 0xf0f0f0f0) >> 4);
-       bits = ((bits & 0x33333333) << 2) | ((bits & 0xcccccccc) >> 2);
-       bits = ((bits & 0x55555555) << 1) | ((bits & 0xaaaaaaaa) >> 1);
-       return bits;
-}
-
-/* Sobol radical inverse. */
-ccl_device uint sobol(uint i)
-{
-       uint r = 0;
-       for(uint v = 1U << 31; i; i >>= 1, v ^= v >> 1) {
-               if(i & 1) {
-                       r ^= v;
-               }
-       }
-       return r;
-}
-
-/* Inverse of sobol radical inverse. */
-ccl_device uint sobol_inverse(uint i)
-{
-       const uint msb = 1U << 31;
-       uint r = 0;
-       for(uint v = 1; i; i <<= 1, v ^= v << 1) {
-               if(i & msb) {
-                       r ^= v;
-               }
-       }
-       return r;
-}
-
-/* Multidimensional sobol with generator matrices
- * dimension 0 and 1 are equal to van_der_corput() and sobol() respectively.
- */
 ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension)
 {
        uint result = 0;
@@ -79,50 +48,31 @@ ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension)
        return result;
 }
 
-/* Lookup index and x/y coordinate, assumes m is a power of two. */
-ccl_device uint sobol_lookup(const uint m,
-                             const uint frame,
-                             const uint ex,
-                             const uint ey,
-                             uint *x, uint *y)
-{
-       /* Shift is constant per frame. */
-       const uint shift = frame << (m << 1);
-       const uint sobol_shift = sobol(shift);
-       /* Van der Corput is its own inverse. */
-       const uint lower = van_der_corput(ex << (32 - m));
-       /* Need to compensate for ey difference and shift. */
-       const uint sobol_lower = sobol(lower);
-       const uint mask = ~-(1 << m) << (32 - m);  /* Only m upper bits. */
-       const uint delta = ((ey << (32 - m)) ^ sobol_lower ^ sobol_shift) & mask;
-       /* Only use m upper bits for the index (m is a power of two). */
-       const uint sobol_result = delta | (delta >> m);
-       const uint upper = sobol_inverse(sobol_result);
-       const uint index = shift | upper | lower;
-       *x = van_der_corput(index);
-       *y = sobol_shift ^ sobol_result ^ sobol_lower;
-       return index;
-}
+#endif /* __SOBOL__ */
+
 
 ccl_device_forceinline float path_rng_1D(KernelGlobals *kg,
                                          RNG *rng,
                                          int sample, int num_samples,
                                          int dimension)
 {
+#ifdef __DEBUG_CORRELATION__
+       return (float)drand48();
+#endif
+
 #ifdef __CMJ__
-       if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
+#  ifdef __SOBOL__
+       if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
+#  endif
+       {
                /* Correlated multi-jitter. */
                int p = *rng + dimension;
                return cmj_sample_1D(sample, num_samples, p);
        }
 #endif
 
-#ifdef __SOBOL_FULL_SCREEN__
-       uint result = sobol_dimension(kg, *rng, dimension);
-       float r = (float)result * (1.0f/(float)0xFFFFFFFF);
-       return r;
-#else
-       /* Compute sobol sequence value using direction vectors. */
+#ifdef __SOBOL__
+       /* Sobol sequence value using direction vectors. */
        uint result = sobol_dimension(kg, sample + SOBOL_SKIP, dimension);
        float r = (float)result * (1.0f/(float)0xFFFFFFFF);
 
@@ -145,19 +95,29 @@ ccl_device_forceinline void path_rng_2D(KernelGlobals *kg,
                                         int dimension,
                                         float *fx, float *fy)
 {
+#ifdef __DEBUG_CORRELATION__
+       *fx = (float)drand48();
+       *fy = (float)drand48();
+       return;
+#endif
+
 #ifdef __CMJ__
-       if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
+#  ifdef __SOBOL__
+       if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
+#  endif
+       {
                /* Correlated multi-jitter. */
                int p = *rng + dimension;
                cmj_sample_2D(sample, num_samples, p, fx, fy);
+               return;
        }
-       else
 #endif
-       {
-               /* Sobol. */
-               *fx = path_rng_1D(kg, rng, sample, num_samples, dimension);
-               *fy = path_rng_1D(kg, rng, sample, num_samples, dimension + 1);
-       }
+
+#ifdef __SOBOL__
+       /* Sobol. */
+       *fx = path_rng_1D(kg, rng, sample, num_samples, dimension);
+       *fy = path_rng_1D(kg, rng, sample, num_samples, dimension + 1);
+#endif
 }
 
 ccl_device_inline void path_rng_init(KernelGlobals *kg,
@@ -167,81 +127,13 @@ ccl_device_inline void path_rng_init(KernelGlobals *kg,
                                      int x, int y,
                                      float *fx, float *fy)
 {
-#ifdef __SOBOL_FULL_SCREEN__
-       uint px, py;
-       uint bits = 16; /* limits us to 65536x65536 and 65536 samples */
-       uint size = 1 << bits;
-       uint frame = sample;
-
-       *rng = sobol_lookup(bits, frame, x, y, &px, &py);
-
-       *rng ^= kernel_data.integrator.seed;
-
-       if(sample == 0) {
-               *fx = 0.5f;
-               *fy = 0.5f;
-       }
-       else {
-               *fx = size * (float)px * (1.0f/(float)0xFFFFFFFF) - x;
-               *fy = size * (float)py * (1.0f/(float)0xFFFFFFFF) - y;
-       }
-#else
+       /* load state */
        *rng = *rng_state;
-
        *rng ^= kernel_data.integrator.seed;
 
-       if(sample == 0) {
-               *fx = 0.5f;
-               *fy = 0.5f;
-       }
-       else {
-               path_rng_2D(kg, rng, sample, num_samples, PRNG_FILTER_U, fx, fy);
-       }
+#ifdef __DEBUG_CORRELATION__
+       srand48(*rng + sample);
 #endif
-}
-
-ccl_device void path_rng_end(KernelGlobals *kg,
-                             ccl_global uint *rng_state,
-                             RNG rng)
-{
-       /* nothing to do */
-}
-
-#else  /* __SOBOL__ */
-
-/* Linear Congruential Generator */
-
-ccl_device_forceinline float path_rng_1D(KernelGlobals *kg,
-                                         RNG *rng,
-                                         int sample, int num_samples,
-                                         int dimension)
-{
-       /* implicit mod 2^32 */
-       *rng = (1103515245*(*rng) + 12345);
-       return (float)*rng * (1.0f/(float)0xFFFFFFFF);
-}
-
-ccl_device_inline void path_rng_2D(KernelGlobals *kg,
-                                   RNG *rng,
-                                   int sample, int num_samples,
-                                   int dimension,
-                                   float *fx, float *fy)
-{
-       *fx = path_rng_1D(kg, rng, sample, num_samples, dimension);
-       *fy = path_rng_1D(kg, rng, sample, num_samples, dimension + 1);
-}
-
-ccl_device void path_rng_init(KernelGlobals *kg,
-                              ccl_global uint *rng_state,
-                              int sample, int num_samples,
-                              RNG *rng,
-                              int x, int y,
-                              float *fx, float *fy)
-{
-       /* load state */
-       *rng = *rng_state;
-
-       *rng ^= kernel_data.integrator.seed;
 
        if(sample == 0) {
                *fx = 0.5f;
@@ -252,16 +144,6 @@ ccl_device void path_rng_init(KernelGlobals *kg,
        }
 }
 
-ccl_device void path_rng_end(KernelGlobals *kg,
-                             ccl_global uint *rng_state,
-                             RNG rng)
-{
-       /* store state for next sample */
-       *rng_state = rng;
-}
-
-#endif  /* __SOBOL__ */
-
 /* Linear Congruential Generator */
 
 ccl_device uint lcg_step_uint(uint *rng)
index d5f720778ff0c445d097568863623271de8d4d17..f1b82eee3524ce43d61a5368a21a3d7b3fa6ee2c 100644 (file)
@@ -468,11 +468,24 @@ typedef enum DenoiseFlag {
        DENOISING_CLEAN_ALL_PASSES       = (1 << 8)-1,
 } DenoiseFlag;
 
+#ifdef __KERNEL_DEBUG__
+/* NOTE: This is a runtime-only struct, alignment is not
+ * really important here.
+ */
+typedef struct DebugData {
+       int num_bvh_traversed_nodes;
+       int num_bvh_traversed_instances;
+       int num_bvh_intersections;
+       int num_ray_bounces;
+} DebugData;
+#endif
+
 typedef ccl_addr_space struct PathRadiance {
 #ifdef __PASSES__
        int use_light_pass;
 #endif
 
+       float transparent;
        float3 emission;
 #ifdef __PASSES__
        float3 background;
@@ -538,6 +551,10 @@ typedef ccl_addr_space struct PathRadiance {
        float3 denoising_albedo;
        float denoising_depth;
 #endif  /* __DENOISING_FEATURES__ */
+
+#ifdef __KERNEL_DEBUG__
+       DebugData debug_data;
+#endif /* __KERNEL_DEBUG__ */
 } PathRadiance;
 
 typedef struct BsdfEval {
@@ -1345,18 +1362,6 @@ typedef struct KernelData {
 } KernelData;
 static_assert_align(KernelData, 16);
 
-#ifdef __KERNEL_DEBUG__
-/* NOTE: This is a runtime-only struct, alignment is not
- * really important here.
- */
-typedef ccl_addr_space struct DebugData {
-       int num_bvh_traversed_nodes;
-       int num_bvh_traversed_instances;
-       int num_bvh_intersections;
-       int num_ray_bounces;
-} DebugData;
-#endif
-
 /* Declarations required for split kernel */
 
 /* Macro for queues */
index 4c1fdd2d69ce28b164e7e4fa5b105e1a622eb0c8..6aa0d6948d032fe51355c11c725a28d11b068ba5 100644 (file)
@@ -79,14 +79,10 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
        int stride = kernel_split_params.stride;
 
        ccl_global char *ray_state = kernel_split_state.ray_state;
-#ifdef __KERNEL_DEBUG__
-       DebugData *debug_data = &kernel_split_state.debug_data[ray_index];
-#endif
        ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
        PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
        ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
        ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
-       ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index];
        RNG rng = kernel_split_state.rng[ray_index];
        ccl_global float *buffer = kernel_split_params.buffer;
 
@@ -111,15 +107,9 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
        buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride;
 
        if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
-#ifdef __KERNEL_DEBUG__
-               kernel_write_debug_passes(kg, buffer, state, debug_data, sample);
-#endif
-
                /* accumulate result in output buffer */
                bool is_shadow_catcher = (state->flag & PATH_RAY_SHADOW_CATCHER);
-               kernel_write_result(kg, buffer, sample, L, 1.0f - (*L_transparent), is_shadow_catcher);
-
-               path_rng_end(kg, rng_state, rng);
+               kernel_write_result(kg, buffer, sample, L, is_shadow_catcher);
 
                ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
        }
@@ -148,18 +138,14 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
                        kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, &rng, ray);
 
                        if(ray->t != 0.0f) {
-                               /* Initialize throughput, L_transparent, Ray, PathState;
+                               /* Initialize throughput, path radiance, Ray, PathState;
                                 * These rays proceed with path-iteration.
                                 */
                                *throughput = make_float3(1.0f, 1.0f, 1.0f);
-                               *L_transparent = 0.0f;
                                path_radiance_init(L, kernel_data.film.use_light_pass);
                                path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, &rng, sample, ray);
 #ifdef __SUBSURFACE__
                                kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
-#endif
-#ifdef __KERNEL_DEBUG__
-                               debug_data_init(debug_data);
 #endif
                                ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
                                enqueue_flag = 1;
@@ -169,7 +155,6 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
                                float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
                                /* Accumulate result in output buffer. */
                                kernel_write_pass_float4(buffer, sample, L_rad);
-                               path_rng_end(kg, rng_state, rng);
 
                                ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
                        }
index 6f3297de34220e9d2d307215ad80f57ab36147fe..2c042dfde6fe53fd7f48075130383799d91b2223 100644 (file)
@@ -124,14 +124,25 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
 
        /* zero the tiles pixels and initialize rng_state if this is the first sample */
        if(start_sample == 0) {
-               parallel_for(kg, i, sw * sh * kernel_data.film.pass_stride) {
-                       int pixel = i / kernel_data.film.pass_stride;
-                       int pass = i % kernel_data.film.pass_stride;
+               int pass_stride = kernel_data.film.pass_stride;
+
+#ifdef __KERNEL_CPU__
+               for(int y = sy; y < sy + sh; y++) {
+                       int index = offset + y * stride;
+                       memset(buffer + (sx + index) * pass_stride, 0, sizeof(float) * pass_stride * sw);
+                       for(int x = sx; x < sx + sw; x++) {
+                               rng_state[index + x] = hash_int_2d(x, y);
+                       }
+               }
+#else
+               parallel_for(kg, i, sw * sh * pass_stride) {
+                       int pixel = i / pass_stride;
+                       int pass = i % pass_stride;
 
                        int x = sx + pixel % sw;
                        int y = sy + pixel / sw;
 
-                       int index = (offset + x + y*stride) * kernel_data.film.pass_stride + pass;
+                       int index = (offset + x + y*stride) * pass_stride + pass;
 
                        *(buffer + index) = 0.0f;
                }
@@ -143,6 +154,7 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
                        int index = (offset + x + y*stride);
                        *(rng_state + index) = hash_int_2d(x, y);
                }
+#endif
        }
 
 #endif  /* KERENL_STUB */
index adcb1bdc3773417b5bb365021eec9faf79eb9e65..95f57fbff57630c89fc1471f54e603df75c6c7b5 100644 (file)
@@ -162,7 +162,8 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
                                        holdout_weight = shader_holdout_eval(kg, sd);
                                }
                                /* any throughput is ok, should all be identical here */
-                               kernel_split_state.L_transparent[ray_index] += average(holdout_weight*throughput);
+                               PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+                               L->transparent += average(holdout_weight*throughput);
                        }
                        if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
                                kernel_split_path_end(kg, ray_index);
index f0ebb90f60ad450dc6262fa42467676842fd6f1e..04d5769ef0db233b2c7dc1ffc7c75fdbed65ce06 100644 (file)
@@ -54,12 +54,11 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
        PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
        ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
        ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
-       ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index];
 
        if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
                /* eval background shader if nothing hit */
                if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
-                       *L_transparent = (*L_transparent) + average((*throughput));
+                       L->transparent += average((*throughput));
 #ifdef __PASSES__
                        if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
 #endif
index a7ecde7c80d23904d0e5689284ea613ca0fdafd3..8315b0b2bd3c86cd6cd839c5929a3ffc1b5f56d9 100644 (file)
@@ -71,11 +71,10 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
                                &kernel_split_state.ray[ray_index]);
 
        if(kernel_split_state.ray[ray_index].t != 0.0f) {
-               /* Initialize throughput, L_transparent, Ray, PathState;
+               /* Initialize throughput, path radiance, Ray, PathState;
                 * These rays proceed with path-iteration.
                 */
                kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
-               kernel_split_state.L_transparent[ray_index] = 0.0f;
                path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass);
                path_state_init(kg,
                                &kernel_split_state.sd_DL_shadow[ray_index],
@@ -86,17 +85,12 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
 #ifdef __SUBSURFACE__
                kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
 #endif
-
-#ifdef __KERNEL_DEBUG__
-               debug_data_init(&kernel_split_state.debug_data[ray_index]);
-#endif
        }
        else {
                /* These rays do not participate in path-iteration. */
                float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
                /* Accumulate result in output buffer. */
                kernel_write_pass_float4(buffer, my_sample, L_rad);
-               path_rng_end(kg, rng_state, kernel_split_state.rng[ray_index]);
                ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
        }
        kernel_split_state.rng[ray_index] = rng;
index 45984ca509b5087aa9de075c29a2836091996436..5c6d90eecc40841e99577585038a9d1eb32f548a 100644 (file)
@@ -59,9 +59,6 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
                return;
        }
 
-#ifdef __KERNEL_DEBUG__
-       DebugData *debug_data = &kernel_split_state.debug_data[ray_index];
-#endif
        Intersection isect;
        PathState state = kernel_split_state.path_state[ray_index];
        Ray ray = kernel_split_state.ray[ray_index];
@@ -97,12 +94,14 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
        kernel_split_state.isect[ray_index] = isect;
 
 #ifdef __KERNEL_DEBUG__
+       PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
        if(state.flag & PATH_RAY_CAMERA) {
-               debug_data->num_bvh_traversed_nodes += isect.num_traversed_nodes;
-               debug_data->num_bvh_traversed_instances += isect.num_traversed_instances;
-               debug_data->num_bvh_intersections += isect.num_intersections;
+               L->debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes;
+               L->debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
+               L->debug_data.num_bvh_intersections += isect.num_intersections;
        }
-       debug_data->num_ray_bounces++;
+       L->debug_data.num_ray_bounces++;
 #endif
 
        if(!hit) {
index 4bb2f0d3d80971815ab4c74dd824443c94440130..4f32c68d630f2c51d423889d50d07fde96521cdc 100644 (file)
@@ -56,14 +56,6 @@ typedef struct SplitParams {
 
 /* SPLIT_DATA_ENTRY(type, name, num) */
 
-#if defined(WITH_CYCLES_DEBUG) || defined(__KERNEL_DEBUG__)
-/* DebugData memory */
-#  define SPLIT_DATA_DEBUG_ENTRIES \
-       SPLIT_DATA_ENTRY(DebugData, debug_data, 1)
-#else
-#  define SPLIT_DATA_DEBUG_ENTRIES
-#endif  /* DEBUG */
-
 #ifdef __BRANCHED_PATH__
 
 typedef ccl_global struct SplitBranchedState {
@@ -124,7 +116,6 @@ typedef ccl_global struct SplitBranchedState {
 #define SPLIT_DATA_ENTRIES \
        SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \
        SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
-       SPLIT_DATA_ENTRY(ccl_global float, L_transparent, 1) \
        SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
        SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
        SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
@@ -139,13 +130,11 @@ typedef ccl_global struct SplitBranchedState {
        SPLIT_DATA_SUBSURFACE_ENTRIES \
        SPLIT_DATA_VOLUME_ENTRIES \
        SPLIT_DATA_BRANCHED_ENTRIES \
-       SPLIT_DATA_DEBUG_ENTRIES \
 
 /* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */
 #define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \
        SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \
        SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
-       SPLIT_DATA_ENTRY(ccl_global float, L_transparent, 1) \
        SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
        SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
        SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
@@ -158,7 +147,6 @@ typedef ccl_global struct SplitBranchedState {
        SPLIT_DATA_SUBSURFACE_ENTRIES \
        SPLIT_DATA_VOLUME_ENTRIES \
        SPLIT_DATA_BRANCHED_ENTRIES \
-       SPLIT_DATA_DEBUG_ENTRIES \
 
 /* struct that holds pointers to data in the shared state buffer */
 typedef struct SplitData {
index 733b97e14329be8c7e92d9b29638a386e4a76c54..aabca6c81fcc511c1354e1feddb0d1a113b8cdcf 100644 (file)
@@ -28,9 +28,9 @@
 #endif
 
 #include "util/util_defines.h"
-#include "util/util_optimization.h"
 
 #ifndef __KERNEL_GPU__
+#  include "util/util_optimization.h"
 #  include "util/util_simd.h"
 #endif