Cycles: Make CUDA adaptive feature compile a Debug flag.
authorThomas Dinges <blender@dingto.org>
Fri, 6 May 2016 20:34:15 +0000 (22:34 +0200)
committerThomas Dinges <blender@dingto.org>
Fri, 6 May 2016 21:13:33 +0000 (23:13 +0200)
If the CUDA Toolkit is installed and the user is on Linux,
adaptive, feature based CUDA runtime compile is now possible to enable via:

* Environment flag CYCLES_CUDA_ADAPTIVE_COMPILE or
* Debug menu (Debug value 256) in the Cycles UI.

intern/cycles/blender/addon/properties.py
intern/cycles/blender/addon/ui.py
intern/cycles/blender/blender_python.cpp
intern/cycles/device/device_cuda.cpp
intern/cycles/util/util_debug.cpp
intern/cycles/util/util_debug.h

index 6f8383fc0afe299fd70b81b45b4cccae5be700b2..eed86a6e65a85be31ad6e199167119cdd25c9620 100644 (file)
@@ -594,6 +594,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
         cls.debug_use_cpu_sse2 = BoolProperty(name="SSE2", default=True)
         cls.debug_use_qbvh = BoolProperty(name="QBVH", default=True)
 
+        cls.debug_use_cuda_adaptive_compile = BoolProperty(name="Adaptive Compile", default=False)
+
         cls.debug_opencl_kernel_type = EnumProperty(
             name="OpenCL Kernel Type",
             default='DEFAULT',
index a4e6809390d9e959b94c5bdbf46e4ae33beda0c6..023841a7a17a637b4088408087440a6ebb95d171 100644 (file)
@@ -1553,6 +1553,10 @@ class CyclesRender_PT_debug(CyclesButtonsPanel, Panel):
         row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
         col.prop(cscene, "debug_use_qbvh")
 
+        col = layout.column()
+        col.label('CUDA Flags:')
+        col.prop(cscene, "debug_use_cuda_adaptive_compile")
+
         col = layout.column()
         col.label('OpenCL Flags:')
         col.prop(cscene, "debug_opencl_kernel_type", text="Kernel")
index ceb9cbf242efa1a89b14dd2dd4efcca78b0c3450..0161b5b192c2772a08b618ef914295f901b8a62b 100644 (file)
@@ -70,6 +70,8 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
        flags.cpu.sse3 = get_boolean(cscene, "debug_use_cpu_sse3");
        flags.cpu.sse2 = get_boolean(cscene, "debug_use_cpu_sse2");
        flags.cpu.qbvh = get_boolean(cscene, "debug_use_qbvh");
+       /* Synchronize CUDA flags. */
+       flags.cuda.adaptive_compile = get_boolean(cscene, "debug_use_cuda_adaptive_compile");
        /* Synchronize OpenCL kernel type. */
        switch(get_enum(cscene, "debug_opencl_kernel_type")) {
                case 0:
index f7c985e787dc8efe2feeae16c34e286bff1f9061..5362623247f5ff48000aca50e1b8fb4ef16f773b 100644 (file)
 #include "util_types.h"
 #include "util_time.h"
 
-/* use feature-adaptive kernel compilation.
- * Requires CUDA toolkit to be installed and currently only works on Linux.
- */
-/* #define KERNEL_USE_ADAPTIVE */
-
 CCL_NAMESPACE_BEGIN
 
 #ifndef WITH_CUDA_DYNLOAD
@@ -245,6 +240,11 @@ public:
                return true;
        }
 
+       bool use_adaptive_compilation()
+       {
+               return DebugFlags().cuda.adaptive_compile;
+       }
+
        string compile_kernel(const DeviceRequestedFeatures& requested_features)
        {
                /* compute cubin name */
@@ -252,6 +252,9 @@ public:
                cuDeviceComputeCapability(&major, &minor, cuDevId);
                string cubin;
 
+               /* adaptive compile */
+               bool use_adaptive_compile = use_adaptive_compilation();
+
                /* attempt to use kernel provided with blender */
                cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor));
                VLOG(1) << "Testing for pre-compiled kernel " << cubin;
@@ -264,17 +267,19 @@ public:
                string kernel_path = path_get("kernel");
                string md5 = path_files_md5_hash(kernel_path);
 
-#ifdef KERNEL_USE_ADAPTIVE
-               string feature_build_options = requested_features.get_build_options();
-               string device_md5 = util_md5_string(feature_build_options);
-               cubin = string_printf("cycles_kernel_%s_sm%d%d_%s.cubin",
-                                     device_md5.c_str(),
-                                     major, minor,
-                                     md5.c_str());
-#else
-               (void)requested_features;
-               cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str());
-#endif
+               string feature_build_options;
+               if(use_adaptive_compile) {
+                       feature_build_options = requested_features.get_build_options();
+                       string device_md5 = util_md5_string(feature_build_options);
+                       cubin = string_printf("cycles_kernel_%s_sm%d%d_%s.cubin",
+                                         device_md5.c_str(),
+                                         major, minor,
+                                         md5.c_str());
+               }
+               else {
+                       (void)requested_features;
+                       cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str());
+               }
 
                cubin = path_user_get(path_join("cache", cubin));
                VLOG(1) << "Testing for locally compiled kernel " << cubin;
@@ -331,9 +336,8 @@ public:
                        "-DNVCC -D__KERNEL_CUDA_VERSION__=%d",
                        nvcc, major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version);
 
-#ifdef KERNEL_USE_ADAPTIVE
-               command += " " + feature_build_options;
-#endif
+               if(use_adaptive_compile)
+                       command += " " + feature_build_options;
 
                const char* extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS");
                if(extra_cflags) {
index 448c6223765ddc6a91626d61ee658cbe6191a225..80d177d2caeb862cb2fd42de9b14c9b8a10311de 100644 (file)
@@ -57,6 +57,18 @@ void DebugFlags::CPU::reset()
        qbvh = true;
 }
 
+DebugFlags::CUDA::CUDA()
+  : adaptive_compile(false)
+{
+       reset();
+}
+
+void DebugFlags::CUDA::reset()
+{
+       if(getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL)
+               adaptive_compile = true;
+}
+
 DebugFlags::OpenCL::OpenCL()
   : device_type(DebugFlags::OpenCL::DEVICE_ALL),
     kernel_type(DebugFlags::OpenCL::KERNEL_DEFAULT),
@@ -123,6 +135,9 @@ std::ostream& operator <<(std::ostream &os,
           << "  SSE3   : " << string_from_bool(debug_flags.cpu.sse3)  << "\n"
           << "  SSE2   : " << string_from_bool(debug_flags.cpu.sse2)  << "\n";
 
+       os << "CUDA flags:\n"
+          << " Adaptive Compile: " << string_from_bool(debug_flags.cuda.adaptive_compile) << "\n";
+
        const char *opencl_device_type,
                   *opencl_kernel_type;
        switch(debug_flags.opencl.device_type) {
index 6ec5188049d5a7070f5a6bb608236902a0d53ac4..641abcc0668ed481ee7587519340116852789a69 100644 (file)
@@ -46,6 +46,18 @@ public:
                bool qbvh;
        };
 
+       /* Descriptor of CUDA feature-set to be used. */
+       struct CUDA {
+               CUDA();
+
+               /* Reset flags to their defaults. */
+               void reset();
+
+               /* Whether adaptive feature based runtime compile is enabled or not.
+                * Requires the CUDA Toolkit and only works on Linux atm. */
+               bool adaptive_compile;
+       };
+
        /* Descriptor of OpenCL feature-set to be used. */
        struct OpenCL {
                OpenCL();
@@ -107,6 +119,9 @@ public:
        /* Requested CPU flags. */
        CPU cpu;
 
+       /* Requested CUDA flags. */
+       CUDA cuda;
+
        /* Requested OpenCL flags. */
        OpenCL opencl;