Cycles: Use feature-selective base kernel compilation when using split kernel
authorSergey Sharybin <sergey.vfx@gmail.com>
Sat, 18 Jul 2015 13:09:25 +0000 (15:09 +0200)
committerSergey Sharybin <sergey.vfx@gmail.com>
Sat, 18 Jul 2015 14:02:08 +0000 (16:02 +0200)
The idea is to make all kernels as small as possible to work around possible
issues with buggy drivers which might fail building feature-complete kernels.

It's indeed just a workaround to make at last simple test scenes to render
on OpenCL. Real fix should happen from the driver side.

intern/cycles/device/device_opencl.cpp

index 49c4f13e7075577c0e7f65c8416c055d32a2ff8e..3305ef4bb1430d8dacde31e4580215911ad14a20 100644 (file)
@@ -930,7 +930,7 @@ public:
                return md5.get_hex();
        }
 
-       bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/)
+       bool load_kernels(const DeviceRequestedFeatures& requested_features)
        {
                /* Verify if device was initialized. */
                if(!device_initialized) {
@@ -940,20 +940,23 @@ public:
 
                /* Try to use cached kernel. */
                thread_scoped_lock cache_locker;
-               cpProgram = OpenCLCache::get_program(cpPlatform,
-                                                    cdDevice,
-                                                    OpenCLCache::OCL_DEV_BASE_PROGRAM,
-                                                    cache_locker);
+               cpProgram = load_cached_kernel(requested_features,
+                                              OpenCLCache::OCL_DEV_BASE_PROGRAM,
+                                              cache_locker);
 
                if(!cpProgram) {
+                       VLOG(2) << "No cached OpenCL kernel.";
+
                        /* Verify we have right opencl version. */
                        if(!opencl_version_check())
                                return false;
 
+                       string build_flags = build_options_for_base_program(requested_features);
+
                        /* Calculate md5 hashes to detect changes. */
                        string kernel_path = path_get("kernel");
                        string kernel_md5 = path_files_md5_hash(kernel_path);
-                       string device_md5 = device_md5_hash();
+                       string device_md5 = device_md5_hash(build_flags);
 
                        /* Path to cached binary.
                         *
@@ -978,27 +981,41 @@ public:
                        }
 
                        /* If binary kernel exists already, try use it. */
-                       if(path_exists(clbin) && load_binary(kernel_path, clbin, "", &cpProgram)) {
+                       if(path_exists(clbin) && load_binary(kernel_path,
+                                                            clbin,
+                                                            build_flags,
+                                                            &cpProgram)) {
                                /* Kernel loaded from binary, nothing to do. */
+                               VLOG(2) << "Loaded kernel from " << clbin << ".";
                        }
                        else {
                                string init_kernel_source = "#include \"kernels/opencl/kernel.cl\" // " + kernel_md5 + "\n";
 
                                /* If does not exist or loading binary failed, compile kernel. */
-                               if(!compile_kernel(kernel_path, init_kernel_source, "", &cpProgram, debug_src))
+                               if(!compile_kernel(kernel_path,
+                                                  init_kernel_source,
+                                                  build_flags,
+                                                  &cpProgram,
+                                                  debug_src))
+                               {
                                        return false;
+                               }
 
                                /* Save binary for reuse. */
-                               if(!save_binary(&cpProgram, clbin))
+                               if(!save_binary(&cpProgram, clbin)) {
                                        return false;
+                               }
                        }
 
                        /* Cache the program. */
-                       OpenCLCache::store_program(cpPlatform,
-                                                  cdDevice,
-                                                  cpProgram,
-                                                  OpenCLCache::OCL_DEV_BASE_PROGRAM,
-                                                  cache_locker);
+                       store_cached_kernel(cpPlatform,
+                                           cdDevice,
+                                           cpProgram,
+                                           OpenCLCache::OCL_DEV_BASE_PROGRAM,
+                                           cache_locker);
+               }
+               else {
+                       VLOG(2) << "Found cached OpenCL kernel.";
                }
 
                /* Find kernels. */
@@ -1032,9 +1049,9 @@ public:
                }
 
                if(ckFilmConvertByteKernel)
-                       clReleaseKernel(ckFilmConvertByteKernel);  
+                       clReleaseKernel(ckFilmConvertByteKernel);
                if(ckFilmConvertHalfFloatKernel)
-                       clReleaseKernel(ckFilmConvertHalfFloatKernel);  
+                       clReleaseKernel(ckFilmConvertHalfFloatKernel);
                if(ckShaderKernel)
                        clReleaseKernel(ckShaderKernel);
                if(ckBakeKernel)
@@ -1354,7 +1371,6 @@ public:
        virtual void thread_run(DeviceTask * /*task*/) = 0;
 
 protected:
-
        string kernel_build_options(const string *debug_src = NULL)
        {
                string build_options = " -cl-fast-relaxed-math ";
@@ -1546,6 +1562,44 @@ protected:
                }
                return build_options;
        }
+
+       /* ** Those guys are for workign around some compiler-specific bugs ** */
+
+       virtual cl_program load_cached_kernel(
+               const DeviceRequestedFeatures& /*requested_features*/,
+               OpenCLCache::ProgramName program_name,
+               thread_scoped_lock& cache_locker)
+       {
+               return OpenCLCache::get_program(cpPlatform,
+                                               cdDevice,
+                                               program_name,
+                                               cache_locker);
+       }
+
+       virtual void store_cached_kernel(cl_platform_id platform,
+                                        cl_device_id device,
+                                        cl_program program,
+                                        OpenCLCache::ProgramName program_name,
+                                        thread_scoped_lock& cache_locker)
+       {
+               OpenCLCache::store_program(platform,
+                                          device,
+                                          program,
+                                          program_name,
+                                          cache_locker);
+       }
+
+       virtual string build_options_for_base_program(
+               const DeviceRequestedFeatures& /*requested_features*/)
+       {
+               /* TODO(sergey): By default we compile all features, meaning
+                * mega kernel is not getting feature-based optimizations.
+                *
+                * Ideally we need always compile kernel with as less features
+                * enabed as possible to keep performance at it's max.
+                */
+               return "";
+       }
 };
 
 class OpenCLDeviceMegaKernel : public OpenCLDeviceBase
@@ -3490,6 +3544,34 @@ protected:
                opencl_assert_err(ciErr, "clCreateBuffer");
                return ptr;
        }
+
+       /* ** Those guys are for workign around some compiler-specific bugs ** */
+
+       cl_program load_cached_kernel(
+               const DeviceRequestedFeatures& /*requested_features*/,
+               OpenCLCache::ProgramName /*program_name*/,
+               thread_scoped_lock /*cache_locker*/)
+       {
+               VLOG(2) << "Skip loading kernel from cache, "
+                       << "not supported by split kernel.";
+               return NULL;
+       }
+
+       void store_cached_kernel(cl_platform_id /*platform*/,
+                                cl_device_id /*device*/,
+                                cl_program /*program*/,
+                                OpenCLCache::ProgramName /*program_name*/,
+                                thread_scoped_lock& /*slot_locker*/)
+       {
+               VLOG(2) << "Skip storing kernel in cache, "
+                       << "not supported by split kernel.";
+       }
+
+       string build_options_for_base_program(
+               const DeviceRequestedFeatures& requested_features)
+       {
+               return build_options_from_requested_features(requested_features);
+       }
 };
 
 Device *device_opencl_create(DeviceInfo& info, Stats &stats, bool background)