Cycles: Support multithreaded compilation of kernels
authorBrecht Van Lommel <brechtvanlommel@gmail.com>
Fri, 15 Feb 2019 07:18:38 +0000 (08:18 +0100)
committerJeroen Bakker <j.bakker@atmind.nl>
Fri, 15 Feb 2019 07:56:20 +0000 (08:56 +0100)
This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.

Patch by lukasstockner97, jbakker, brecht

    job    |   scene_name    | compilation_time
----------+-----------------+------------------
    Baseline | empty           |            22.73
    D2264    | empty           |            13.94
    Baseline | bmw             |            56.44
    D2264    | bmw             |            41.32
    Baseline | fishycat        |            59.50
    D2264    | fishycat        |            45.19
    Baseline | barbershop      |           212.28
    D2264    | barbershop      |           169.81
    Baseline | victor          |            67.51
    D2264    | victor          |            53.60
    Baseline | classroom       |            51.46
    D2264    | classroom       |            39.02
    Baseline | koro            |            62.48
    D2264    | koro            |            49.03
    Baseline | pavillion       |            54.37
    D2264    | pavillion       |            38.82
    Baseline | splash279       |            47.43
    D2264    | splash279       |            37.94
    Baseline | volume_emission |           145.22
    D2264    | volume_emission |           121.10

This patch reduced compilation time as the split kernels and base
kernels are compiled in parallel. In cycles debug mode (256) you can set
unmark the opencl single program file, what reduces the compilation time
even further (bmw 17 seconds, barbershop 53 seconds).

Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97

Reviewed By: brecht

Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli

Differential Revision: https://developer.blender.org/D2264

12 files changed:
intern/cycles/blender/CMakeLists.txt
intern/cycles/blender/blender_python.cpp
intern/cycles/device/device_intern.h
intern/cycles/device/opencl/opencl.h
intern/cycles/device/opencl/opencl_base.cpp
intern/cycles/device/opencl/opencl_mega.cpp
intern/cycles/device/opencl/opencl_split.cpp
intern/cycles/device/opencl/opencl_util.cpp
intern/cycles/kernel/CMakeLists.txt
intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl [new file with mode: 0644]
intern/cycles/util/util_system.cpp
intern/cycles/util/util_system.h

index 84e2690333e1115b915a78804f23dafd823ab465..f8720de366f0e876873e8bafdd768fe963b53088 100644 (file)
@@ -51,6 +51,10 @@ set(ADDON_FILES
 
 add_definitions(${GL_DEFINITIONS})
 
+if(WITH_CYCLES_DEVICE_OPENCL)
+    add_definitions(-DWITH_OPENCL)
+endif()
+
 if(WITH_CYCLES_NETWORK)
        add_definitions(-DWITH_NETWORK)
 endif()
index bf7605ed5b141754068322e1aae25102512f5210..513941b1fcce79874b8c1a2b4490668f969f66ff 100644 (file)
 #include <OSL/oslconfig.h>
 #endif
 
+#ifdef WITH_OPENCL
+#include "device/device_intern.h"
+#endif
+
 CCL_NAMESPACE_BEGIN
 
 namespace {
@@ -624,6 +628,31 @@ static PyObject *opencl_disable_func(PyObject * /*self*/, PyObject * /*value*/)
        DebugFlags().opencl.device_type = DebugFlags::OpenCL::DEVICE_NONE;
        Py_RETURN_NONE;
 }
+
+static PyObject *opencl_compile_func(PyObject * /*self*/, PyObject *args)
+{
+       PyObject *sequence = PySequence_Fast(args, "Arguments must be a sequence");
+       if(sequence == NULL) {
+               Py_RETURN_FALSE;
+       }
+
+       vector<string> parameters;
+       for(Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(sequence); i++) {
+               PyObject *item = PySequence_Fast_GET_ITEM(sequence, i);
+               PyObject *item_as_string = PyObject_Str(item);
+               const char *parameter_string = PyUnicode_AsUTF8(item_as_string);
+               parameters.push_back(parameter_string);
+               Py_DECREF(item_as_string);
+       }
+       Py_DECREF(sequence);
+
+       if (device_opencl_compile_kernel(parameters)) {
+               Py_RETURN_TRUE;
+       }
+       else {
+               Py_RETURN_FALSE;
+       }
+}
 #endif
 
 static bool denoise_parse_filepaths(PyObject *pyfilepaths, vector<string>& filepaths)
@@ -899,6 +928,7 @@ static PyMethodDef methods[] = {
        {"system_info", system_info_func, METH_NOARGS, ""},
 #ifdef WITH_OPENCL
        {"opencl_disable", opencl_disable_func, METH_NOARGS, ""},
+       {"opencl_compile", opencl_compile_func, METH_VARARGS, ""},
 #endif
 
        /* Standalone denoising */
index 0b26057c3ba00dbb676e45970dada7adc17c1c3f..94df1e009eb8e1b07c25901da9388601c4a45aa4 100644 (file)
@@ -24,6 +24,7 @@ class Device;
 Device *device_cpu_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
 bool device_opencl_init();
 Device *device_opencl_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
+bool device_opencl_compile_kernel(const vector<string>& parameters);
 bool device_cuda_init();
 Device *device_cuda_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
 Device *device_network_create(DeviceInfo& info, Stats &stats, Profiler &profiler, const char *address);
index 9b7631674594ca23d9a09d520e8867b9369b6bd2..a2c0e53b3e7cc611f2e0a2c2f0c27e75a2394462 100644 (file)
@@ -268,6 +268,7 @@ public:
        cl_platform_id cpPlatform;
        cl_device_id cdDevice;
        cl_int ciErr;
+       int device_num;
 
        class OpenCLProgram {
        public:
@@ -293,7 +294,15 @@ public:
 
        private:
                bool build_kernel(const string *debug_src);
+               /* Build the program by calling the own process.
+                * This is required for multithreaded OpenCL compilation, since most Frameworks serialize
+                * build calls internally if they come from the same process.
+                * If that is not supported, this function just returns false.
+                */
+               bool compile_separate(const string& clbin);
+               /* Build the program by calling OpenCL directly. */
                bool compile_kernel(const string *debug_src);
+               /* Loading and saving the program from/to disk. */
                bool load_binary(const string& clbin, const string *debug_src = NULL);
                bool save_binary(const string& clbin);
 
@@ -342,12 +351,17 @@ public:
        bool opencl_version_check();
 
        string device_md5_hash(string kernel_custom_build_options = "");
-       bool load_kernels(const DeviceRequestedFeatures& requested_features);
+       virtual bool load_kernels(const DeviceRequestedFeatures& requested_features);
 
        /* Has to be implemented by the real device classes.
         * The base device will then load all these programs. */
-       virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
-                                 vector<OpenCLProgram*> &programs) = 0;
+       virtual bool add_kernel_programs(const DeviceRequestedFeatures& requested_features,
+                                        vector<OpenCLProgram*> &programs) = 0;
+
+       /* Get the name of the opencl program for the given kernel */
+       virtual const string get_opencl_program_name(bool single_program, const string& kernel_name) = 0;
+       /* Get the program file name to compile (*.cl) for the given kernel */
+       virtual const string get_opencl_program_filename(bool single_program, const string& kernel_name) = 0;
 
        void mem_alloc(device_memory& mem);
        void mem_copy_to(device_memory& mem);
index 4417065bb7fb5770c4da628b7ac5e9cc53301fe9..d8f9a242ac839361ec18b7821de8d19dca4ab5cb 100644 (file)
@@ -93,6 +93,7 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, Profiler &pro
        }
        assert(info.num < usable_devices.size());
        OpenCLPlatformDevice& platform_device = usable_devices[info.num];
+       device_num = info.num;
        cpPlatform = platform_device.platform_id;
        cdDevice = platform_device.device_id;
        platform_name = platform_device.platform_name;
@@ -143,7 +144,6 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, Profiler &pro
        texture_info.resize(1);
        memory_manager.alloc("texture_info", texture_info);
 
-       fprintf(stderr, "Device init success\n");
        device_initialized = true;
 }
 
@@ -251,15 +251,13 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
        programs.push_back(&base_program);
        programs.push_back(&denoising_program);
        /* Call actual class to fill the vector with its programs. */
-       if(!load_kernels(requested_features, programs)) {
+       if(!add_kernel_programs(requested_features, programs)) {
                return false;
        }
 
-       /* Parallel compilation is supported by Cycles, but currently all OpenCL frameworks
-        * serialize the calls internally, so it's not much use right now.
-        * Note: When enabling parallel compilation, use_stdout in the OpenCLProgram constructor
-        * should be set to false as well. */
-#if 0
+       /* Parallel compilation of Cycles kernels, this launches multiple
+        * processes to workaround OpenCL frameworks serializing the calls
+        * internally within a single process. */
        TaskPool task_pool;
        foreach(OpenCLProgram *program, programs) {
                task_pool.push(function_bind(&OpenCLProgram::load, program));
@@ -273,14 +271,6 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
                        return false;
                }
        }
-#else
-       foreach(OpenCLProgram *program, programs) {
-               program->load();
-               if(!program->is_loaded()) {
-                       return false;
-               }
-       }
-#endif
 
        return true;
 }
index 0a7bf96fed7edfb0d5d82f7895c789e528cfa1a6..c0b9e81d4d3204686161da60a542ecf15f5158ba 100644 (file)
@@ -35,19 +35,35 @@ public:
 
        OpenCLDeviceMegaKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_)
        : OpenCLDeviceBase(info, stats, profiler, background_),
-         path_trace_program(this, "megakernel", "kernel.cl", "-D__COMPILE_ONLY_MEGAKERNEL__ ")
+         path_trace_program(this,
+                            get_opencl_program_name(false, "megakernel"),
+                            get_opencl_program_filename(false, "megakernel"),
+                            "-D__COMPILE_ONLY_MEGAKERNEL__ ")
        {
        }
 
-       virtual bool show_samples() const {
+
+       virtual bool show_samples() const
+       {
                return true;
        }
 
-       virtual BVHLayoutMask get_bvh_layout_mask() const {
+       virtual BVHLayoutMask get_bvh_layout_mask() const
+       {
                return BVH_LAYOUT_BVH2;
        }
 
-       virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
+       const string get_opencl_program_name(bool /*single_program*/, const string& kernel_name)
+       {
+               return kernel_name;
+       }
+
+       const string get_opencl_program_filename(bool /*single_program*/, const string& /*kernel_name*/)
+       {
+               return "kernel.cl";
+       }
+
+       virtual bool add_kernel_programs(const DeviceRequestedFeatures& /*requested_features*/,
                                  vector<OpenCLProgram*> &programs)
        {
                path_trace_program.add_kernel(ustring("path_trace"));
index 5a2555f9f8084ab62b5ea16210e27bd0d238dc6c..b759f69d3ab4c86eb94827408f707e8508be765d 100644 (file)
@@ -79,6 +79,27 @@ public:
        OpenCLProgram program_data_init;
        OpenCLProgram program_state_buffer_size;
 
+       OpenCLProgram program_split;
+
+       OpenCLProgram program_path_init;
+       OpenCLProgram program_scene_intersect;
+       OpenCLProgram program_lamp_emission;
+       OpenCLProgram program_do_volume;
+       OpenCLProgram program_queue_enqueue;
+       OpenCLProgram program_indirect_background;
+       OpenCLProgram program_shader_setup;
+       OpenCLProgram program_shader_sort;
+       OpenCLProgram program_shader_eval;
+       OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
+       OpenCLProgram program_subsurface_scatter;
+       OpenCLProgram program_direct_lighting;
+       OpenCLProgram program_shadow_blocked_ao;
+       OpenCLProgram program_shadow_blocked_dl;
+       OpenCLProgram program_enqueue_inactive;
+       OpenCLProgram program_next_iteration_setup;
+       OpenCLProgram program_indirect_subsurface;
+       OpenCLProgram program_buffer_update;
+
        OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_);
 
        ~OpenCLDeviceSplitKernel()
@@ -99,26 +120,150 @@ public:
                return BVH_LAYOUT_BVH2;
        }
 
-       virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
+       virtual bool load_kernels(const DeviceRequestedFeatures& requested_features)
+       {
+               if (!OpenCLDeviceBase::load_kernels(requested_features)) {
+                       return false;
+               }
+               return split_kernel->load_kernels(requested_features);
+       }
+
+       const string fast_compiled_kernels =
+               "path_init "
+               "scene_intersect "
+               "queue_enqueue "
+               "shader_setup "
+               "shader_sort "
+               "enqueue_inactive "
+               "next_iteration_setup "
+               "indirect_subsurface "
+               "buffer_update";
+
+       const string get_opencl_program_name(bool single_program, const string& kernel_name)
+       {
+               if (single_program) {
+                       return "split";
+               }
+               else {
+                       if (fast_compiled_kernels.find(kernel_name) != std::string::npos) {
+                               return "split_bundle";
+                       }
+                       else {
+                               return "split_" + kernel_name;
+                       }
+               }
+       }
+
+       const string get_opencl_program_filename(bool single_program, const string& kernel_name)
+       {
+               if (single_program) {
+                       return "kernel_split.cl";
+               }
+               else {
+                       if (fast_compiled_kernels.find(kernel_name) != std::string::npos) {
+                               return "kernel_split_bundle.cl";
+                       }
+                       else {
+                               return "kernel_" + kernel_name + ".cl";
+                       }
+               }
+       }
+
+       virtual bool add_kernel_programs(const DeviceRequestedFeatures& requested_features,
                                  vector<OpenCLDeviceBase::OpenCLProgram*> &programs)
        {
                bool single_program = OpenCLInfo::use_single_program();
-               program_data_init = OpenCLDeviceBase::OpenCLProgram(this,
-                                                 single_program ? "split" : "split_data_init",
-                                                 single_program ? "kernel_split.cl" : "kernel_data_init.cl",
-                                                 get_build_options(this, requested_features));
-
+               program_data_init = OpenCLDeviceBase::OpenCLProgram(
+                       this,
+                       get_opencl_program_name(single_program, "data_init"),
+                       get_opencl_program_filename(single_program, "data_init"),
+                       get_build_options(this, requested_features));
                program_data_init.add_kernel(ustring("path_trace_data_init"));
                programs.push_back(&program_data_init);
 
-               program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(this,
-                                                 single_program ? "split" : "split_state_buffer_size",
-                                                 single_program ? "kernel_split.cl" : "kernel_state_buffer_size.cl",
-                                                 get_build_options(this, requested_features));
+               program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(
+                       this,
+                       get_opencl_program_name(single_program, "state_buffer_size"),
+                       get_opencl_program_filename(single_program, "state_buffer_size"),
+                       get_build_options(this, requested_features));
+
                program_state_buffer_size.add_kernel(ustring("path_trace_state_buffer_size"));
                programs.push_back(&program_state_buffer_size);
 
-               return split_kernel->load_kernels(requested_features);
+
+#define ADD_SPLIT_KERNEL_SINGLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name));
+#define ADD_SPLIT_KERNEL_SPLIT_PROGRAM(kernel_name) \
+                       program_##kernel_name = \
+                               OpenCLDeviceBase::OpenCLProgram(this, \
+                                                                                               "split_"#kernel_name, \
+                                                                                               "kernel_"#kernel_name".cl", \
+                                                                                               get_build_options(this, requested_features)); \
+                       program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \
+                       programs.push_back(&program_##kernel_name);
+
+               if (single_program) {
+                       program_split = OpenCLDeviceBase::OpenCLProgram(
+                               this,
+                               "split" ,
+                               "kernel_split.cl",
+                               get_build_options(this, requested_features));
+
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(lamp_emission);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(do_volume);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_background);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_eval);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(holdout_emission_blurring_pathtermination_ao);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(subsurface_scatter);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(direct_lighting);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_ao);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_dl);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update);
+
+                       programs.push_back(&program_split);
+               }
+               else {
+                       /* Ordered with most complex kernels first, to reduce overall compile time. */
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(subsurface_scatter);
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(do_volume);
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_dl);
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_ao);
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(holdout_emission_blurring_pathtermination_ao);
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(lamp_emission);
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(direct_lighting);
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(indirect_background);
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shader_eval);
+
+                       /* Quick kernels bundled in a single program to reduce overhead of starting
+                        * Blender processes. */
+                       program_split = OpenCLDeviceBase::OpenCLProgram(
+                               this,
+                               "split_bundle" ,
+                               "kernel_split_bundle.cl",
+                               get_build_options(this, requested_features));
+
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update);
+                       programs.push_back(&program_split);
+               }
+#undef ADD_SPLIT_KERNEL_SPLIT_PROGRAM
+#undef ADD_SPLIT_KERNEL_SINGLE_PROGRAM
+
+               return true;
        }
 
        void thread_run(DeviceTask *task)
@@ -281,8 +426,8 @@ public:
                bool single_program = OpenCLInfo::use_single_program();
                kernel->program =
                        OpenCLDeviceBase::OpenCLProgram(device,
-                                                       single_program ? "split" : "split_" + kernel_name,
-                                                       single_program ? "kernel_split.cl" : "kernel_" + kernel_name + ".cl",
+                                                       device->get_opencl_program_name(single_program, kernel_name),
+                                                       device->get_opencl_program_filename(single_program, kernel_name),
                                                        get_build_options(device, requested_features));
 
                kernel->program.add_kernel(ustring("path_trace_" + kernel_name));
index f43aa5f350a46fae4e417da4e54707e6e99d2fca..fe5ba4886a9740fbb506d40517d626c176846d9e 100644 (file)
 #ifdef WITH_OPENCL
 
 #include "device/opencl/opencl.h"
+#include "device/device_intern.h"
 
 #include "util/util_debug.h"
 #include "util/util_logging.h"
 #include "util/util_md5.h"
 #include "util/util_path.h"
 #include "util/util_time.h"
+#include "util/util_system.h"
 
 using std::cerr;
 using std::endl;
@@ -369,17 +371,119 @@ bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src)
        }
 
        double starttime = time_dt();
-       add_log(string("Compiling OpenCL program ") + program_name.c_str(), false);
+       add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
        add_log(string("Build flags: ") + kernel_build_options, true);
 
        if(!build_kernel(debug_src))
                return false;
 
-       add_log(string("Kernel compilation of ") + program_name + " finished in " + string_printf("%.2lfs.\n", time_dt() - starttime), false);
+       double elapsed = time_dt() - starttime;
+       add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false);
 
        return true;
 }
 
+bool OpenCLDeviceBase::OpenCLProgram::compile_separate(const string& clbin)
+{
+       vector<string> args;
+       args.push_back("--background");
+       args.push_back("--factory-startup");
+       args.push_back("--python-expr");
+
+       args.push_back(
+               string_printf(
+                       "import _cycles; _cycles.opencl_compile('%s', '%d', '%s', '%s', '%s', '%s', '%s')",
+                       (DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT)? "true" : "false",
+                       device->device_num,
+                       device->device_name.c_str(),
+                       device->platform_name.c_str(),
+                       (device->kernel_build_options(NULL) + kernel_build_options).c_str(),
+                       kernel_file.c_str(),
+                       clbin.c_str()));
+
+       double starttime = time_dt();
+       add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
+       add_log(string("Build flags: ") + kernel_build_options, true);
+       if(!system_call_self(args) || !path_exists(clbin)) {
+               return false;
+       }
+
+       double elapsed = time_dt() - starttime;
+       add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false);
+
+       return load_binary(clbin);
+}
+
+/* Compile opencl kernel. This method is called from the _cycles Python
+ * module compile kernels. Parameters must match function above. */
+bool device_opencl_compile_kernel(const vector<string>& parameters)
+{
+       bool force_all_platforms = parameters[0] == "true";
+       int device_platform_id = std::stoi(parameters[1]);
+       const string& device_name = parameters[2];
+       const string& platform_name = parameters[3];
+       const string& build_options = parameters[4];
+       const string& kernel_file = parameters[5];
+       const string& binary_path = parameters[6];
+
+       if(clewInit() != CLEW_SUCCESS) {
+               return false;
+       }
+
+       vector<OpenCLPlatformDevice> usable_devices;
+       OpenCLInfo::get_usable_devices(&usable_devices, force_all_platforms);
+       if(device_platform_id >= usable_devices.size()) {
+               return false;
+       }
+
+       OpenCLPlatformDevice& platform_device = usable_devices[device_platform_id];
+       if(platform_device.platform_name != platform_name ||
+          platform_device.device_name != device_name)
+       {
+               return false;
+       }
+
+       cl_platform_id platform = platform_device.platform_id;
+       cl_device_id device = platform_device.device_id;
+       const cl_context_properties context_props[] = {
+               CL_CONTEXT_PLATFORM, (cl_context_properties) platform,
+               0, 0
+       };
+
+       cl_int err;
+       cl_context context = clCreateContext(context_props, 1, &device, NULL, NULL, &err);
+       if(err != CL_SUCCESS) {
+               return false;
+       }
+
+       string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\" // " + path_files_md5_hash(path_get("kernel")) + "\n";
+       source = path_source_replace_includes(source, path_get("source"));
+       size_t source_len = source.size();
+       const char *source_str = source.c_str();
+       cl_program program = clCreateProgramWithSource(context, 1, &source_str, &source_len, &err);
+       bool result = false;
+
+       if(err == CL_SUCCESS) {
+               err = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
+
+               if(err == CL_SUCCESS) {
+                       size_t size = 0;
+                       clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
+                       if(size > 0) {
+                               vector<uint8_t> binary(size);
+                               uint8_t *bytes = &binary[0];
+                               clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);
+                               result = path_write_binary(binary_path, binary);
+                       }
+               }
+               clReleaseProgram(program);
+       }
+
+       clReleaseContext(context);
+
+       return result;
+}
+
 bool OpenCLDeviceBase::OpenCLProgram::load_binary(const string& clbin,
                                                   const string *debug_src)
 {
@@ -467,15 +571,31 @@ void OpenCLDeviceBase::OpenCLProgram::load()
                }
                else {
                        add_log(string("Kernel file ") + clbin + " either doesn't exist or failed to be loaded by driver.", true);
-
-                       /* If does not exist or loading binary failed, compile kernel. */
-                       if(!compile_kernel(debug_src)) {
-                               return;
+                       if(!path_exists(clbin)) {
+                               if(compile_separate(clbin)) {
+                                       add_log(string("Built and loaded program from ") + clbin + ".", true);
+                                       loaded = true;
+                               }
+                               else {
+                                       add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true);
+
+                                       /* If does not exist or loading binary failed, compile kernel. */
+                                       if(!compile_kernel(debug_src)) {
+                                               return;
+                                       }
+
+                                       /* Save binary for reuse. */
+                                       if(!save_binary(clbin)) {
+                                               add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
+                                       }
+                               }
                        }
-
-                       /* Save binary for reuse. */
-                       if(!save_binary(clbin)) {
-                               add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
+                       else {
+                               add_log(string("Kernel file ") + clbin + "exists, but failed to be loaded by driver.", true);
+                               /* Fall back to compiling. */
+                               if(!compile_kernel(debug_src)) {
+                                       return;
+                               }
                        }
                }
 
index 163aacf19f9c8f010717af70ef34208c266bfa32..f7041ee278389f413247c9e23ed2b80e290a76c0 100644 (file)
@@ -39,6 +39,7 @@ set(SRC_OPENCL_KERNELS
        kernels/opencl/kernel.cl
        kernels/opencl/kernel_state_buffer_size.cl
        kernels/opencl/kernel_split.cl
+       kernels/opencl/kernel_split_bundle.cl
        kernels/opencl/kernel_data_init.cl
        kernels/opencl/kernel_path_init.cl
        kernels/opencl/kernel_queue_enqueue.cl
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
new file mode 100644 (file)
index 0000000..71ea683
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"  // PRECOMPILED
+#include "kernel/split/kernel_split_common.h"  // PRECOMPILED
+
+#include "kernel/kernels/opencl/kernel_path_init.cl"
+#include "kernel/kernels/opencl/kernel_scene_intersect.cl"
+#include "kernel/kernels/opencl/kernel_queue_enqueue.cl"
+#include "kernel/kernels/opencl/kernel_shader_setup.cl"
+#include "kernel/kernels/opencl/kernel_shader_sort.cl"
+#include "kernel/kernels/opencl/kernel_enqueue_inactive.cl"
+#include "kernel/kernels/opencl/kernel_next_iteration_setup.cl"
+#include "kernel/kernels/opencl/kernel_indirect_subsurface.cl"
+#include "kernel/kernels/opencl/kernel_buffer_update.cl"
index fc6db1f66623af7660e66b81902de0a26d07c929..a79829a3dd9ca829a0bc0c741ed19b32489266d2 100644 (file)
@@ -22,6 +22,9 @@
 
 #include <numaapi.h>
 
+#include <OpenImageIO/sysutil.h>
+OIIO_NAMESPACE_USING
+
 #ifdef _WIN32
 #  if(!defined(FREE_WINDOWS))
 #    include <intrin.h>
@@ -329,6 +332,25 @@ bool system_cpu_support_avx2()
 
 #endif
 
+bool system_call_self(const vector<string>& args)
+{
+       /* Escape program and arguments in case they contain spaces. */
+       string cmd = "\"" + Sysutil::this_program_path() + "\"";
+
+       for(int i = 0; i < args.size(); i++) {
+               cmd += " \"" + args[i] + "\"";
+       }
+
+       /* Quiet output. */
+#ifdef _WIN32
+       cmd += " > nul";
+#else
+       cmd += " > /dev/null";
+#endif
+
+       return (system(cmd.c_str()) == 0);
+}
+
 size_t system_physical_ram()
 {
 #ifdef _WIN32
index 1e7cf1d9f2a00648045bad6693b5c4379e46fedf..2590b31a59d0188cf052c13d0a87c074a9aa9f0b 100644 (file)
@@ -18,6 +18,7 @@
 #define __UTIL_SYSTEM_H__
 
 #include "util/util_string.h"
+#include "util/util_vector.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -61,6 +62,9 @@ bool system_cpu_support_avx2();
 
 size_t system_physical_ram();
 
+/* Start a new process of the current application with the given arguments. */
+bool system_call_self(const vector<string>& args);
+
 CCL_NAMESPACE_END
 
 #endif  /* __UTIL_SYSTEM_H__ */