Cycles: Support multithreaded compilation of kernels
authorBrecht Van Lommel <brechtvanlommel@gmail.com>
Fri, 15 Feb 2019 07:18:38 +0000 (08:18 +0100)
committerJeroen Bakker <j.bakker@atmind.nl>
Fri, 15 Feb 2019 07:49:25 +0000 (08:49 +0100)
This patch implements a workaround to get the multithreaded compilation from D2231 working.
So far, it only works for Blender, not for Cycles Standalone. Also, I have only tested the Linux codepath in the helper function.
Depends on D2231.

Reviewers: brecht, dingto, sergey, juicyfruit, lukasstockner97

Reviewed By: brecht

Subscribers: Loner, jbakker, candreacchio, 3dLuver, LazyDodo, bliblubli

Differential Revision: https://developer.blender.org/D2264

12 files changed:
intern/cycles/blender/CMakeLists.txt
intern/cycles/blender/blender_python.cpp
intern/cycles/device/device_intern.h
intern/cycles/device/opencl/opencl.h
intern/cycles/device/opencl/opencl_base.cpp
intern/cycles/device/opencl/opencl_mega.cpp
intern/cycles/device/opencl/opencl_split.cpp
intern/cycles/device/opencl/opencl_util.cpp
intern/cycles/kernel/CMakeLists.txt
intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl [new file with mode: 0644]
intern/cycles/util/util_system.cpp
intern/cycles/util/util_system.h

index 84e2690333e1115b915a78804f23dafd823ab465..f8720de366f0e876873e8bafdd768fe963b53088 100644 (file)
@@ -51,6 +51,10 @@ set(ADDON_FILES
 
 add_definitions(${GL_DEFINITIONS})
 
+if(WITH_CYCLES_DEVICE_OPENCL)
+    add_definitions(-DWITH_OPENCL)
+endif()
+
 if(WITH_CYCLES_NETWORK)
        add_definitions(-DWITH_NETWORK)
 endif()
index de702337f98ff057192dc6cce5a2eba1f1e96f8d..a720a60c05b48b803afd203b31e83dfabe4672ad 100644 (file)
 #include <OSL/oslconfig.h>
 #endif
 
+#ifdef WITH_OPENCL
+#include "device/device_intern.h"
+#endif
+
 CCL_NAMESPACE_BEGIN
 
 namespace {
@@ -628,6 +632,31 @@ static PyObject *opencl_disable_func(PyObject * /*self*/, PyObject * /*value*/)
        DebugFlags().opencl.device_type = DebugFlags::OpenCL::DEVICE_NONE;
        Py_RETURN_NONE;
 }
+
+static PyObject *opencl_compile_func(PyObject * /*self*/, PyObject *args)
+{
+       PyObject *sequence = PySequence_Fast(args, "Arguments must be a sequence");
+       if(sequence == NULL) {
+               Py_RETURN_FALSE;
+       }
+
+       vector<string> parameters;
+       for(Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(sequence); i++) {
+               PyObject *item = PySequence_Fast_GET_ITEM(sequence, i);
+               PyObject *item_as_string = PyObject_Str(item);
+               const char *parameter_string = PyUnicode_AsUTF8(item_as_string);
+               parameters.push_back(parameter_string);
+               Py_DECREF(item_as_string);
+       }
+       Py_DECREF(sequence);
+
+       if (device_opencl_compile_kernel(parameters)) {
+               Py_RETURN_TRUE;
+       }
+       else {
+               Py_RETURN_FALSE;
+       }
+}
 #endif
 
 static bool denoise_parse_filepaths(PyObject *pyfilepaths, vector<string>& filepaths)
@@ -903,6 +932,7 @@ static PyMethodDef methods[] = {
        {"system_info", system_info_func, METH_NOARGS, ""},
 #ifdef WITH_OPENCL
        {"opencl_disable", opencl_disable_func, METH_NOARGS, ""},
+       {"opencl_compile", opencl_compile_func, METH_VARARGS, ""},
 #endif
 
        /* Standalone denoising */
index 0b26057c3ba00dbb676e45970dada7adc17c1c3f..94df1e009eb8e1b07c25901da9388601c4a45aa4 100644 (file)
@@ -24,6 +24,7 @@ class Device;
 Device *device_cpu_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
 bool device_opencl_init();
 Device *device_opencl_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
+bool device_opencl_compile_kernel(const vector<string>& parameters);
 bool device_cuda_init();
 Device *device_cuda_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
 Device *device_network_create(DeviceInfo& info, Stats &stats, Profiler &profiler, const char *address);
index 9b7631674594ca23d9a09d520e8867b9369b6bd2..a2c0e53b3e7cc611f2e0a2c2f0c27e75a2394462 100644 (file)
@@ -268,6 +268,7 @@ public:
        cl_platform_id cpPlatform;
        cl_device_id cdDevice;
        cl_int ciErr;
+       int device_num;
 
        class OpenCLProgram {
        public:
@@ -293,7 +294,15 @@ public:
 
        private:
                bool build_kernel(const string *debug_src);
+               /* Build the program by calling the own process.
+                * This is required for multithreaded OpenCL compilation, since most Frameworks serialize
+                * build calls internally if they come from the same process.
+                * If that is not supported, this function just returns false.
+                */
+               bool compile_separate(const string& clbin);
+               /* Build the program by calling OpenCL directly. */
                bool compile_kernel(const string *debug_src);
+               /* Loading and saving the program from/to disk. */
                bool load_binary(const string& clbin, const string *debug_src = NULL);
                bool save_binary(const string& clbin);
 
@@ -342,12 +351,17 @@ public:
        bool opencl_version_check();
 
        string device_md5_hash(string kernel_custom_build_options = "");
-       bool load_kernels(const DeviceRequestedFeatures& requested_features);
+       virtual bool load_kernels(const DeviceRequestedFeatures& requested_features);
 
        /* Has to be implemented by the real device classes.
         * The base device will then load all these programs. */
-       virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
-                                 vector<OpenCLProgram*> &programs) = 0;
+       virtual bool add_kernel_programs(const DeviceRequestedFeatures& requested_features,
+                                        vector<OpenCLProgram*> &programs) = 0;
+
+       /* Get the name of the opencl program for the given kernel */
+       virtual const string get_opencl_program_name(bool single_program, const string& kernel_name) = 0;
+       /* Get the program file name to compile (*.cl) for the given kernel */
+       virtual const string get_opencl_program_filename(bool single_program, const string& kernel_name) = 0;
 
        void mem_alloc(device_memory& mem);
        void mem_copy_to(device_memory& mem);
index 4417065bb7fb5770c4da628b7ac5e9cc53301fe9..d8f9a242ac839361ec18b7821de8d19dca4ab5cb 100644 (file)
@@ -93,6 +93,7 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, Profiler &pro
        }
        assert(info.num < usable_devices.size());
        OpenCLPlatformDevice& platform_device = usable_devices[info.num];
+       device_num = info.num;
        cpPlatform = platform_device.platform_id;
        cdDevice = platform_device.device_id;
        platform_name = platform_device.platform_name;
@@ -143,7 +144,6 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, Profiler &pro
        texture_info.resize(1);
        memory_manager.alloc("texture_info", texture_info);
 
-       fprintf(stderr, "Device init success\n");
        device_initialized = true;
 }
 
@@ -251,15 +251,13 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
        programs.push_back(&base_program);
        programs.push_back(&denoising_program);
        /* Call actual class to fill the vector with its programs. */
-       if(!load_kernels(requested_features, programs)) {
+       if(!add_kernel_programs(requested_features, programs)) {
                return false;
        }
 
-       /* Parallel compilation is supported by Cycles, but currently all OpenCL frameworks
-        * serialize the calls internally, so it's not much use right now.
-        * Note: When enabling parallel compilation, use_stdout in the OpenCLProgram constructor
-        * should be set to false as well. */
-#if 0
+       /* Parallel compilation of Cycles kernels, this launches multiple
+        * processes to workaround OpenCL frameworks serializing the calls
+        * internally within a single process. */
        TaskPool task_pool;
        foreach(OpenCLProgram *program, programs) {
                task_pool.push(function_bind(&OpenCLProgram::load, program));
@@ -273,14 +271,6 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
                        return false;
                }
        }
-#else
-       foreach(OpenCLProgram *program, programs) {
-               program->load();
-               if(!program->is_loaded()) {
-                       return false;
-               }
-       }
-#endif
 
        return true;
 }
index 0a7bf96fed7edfb0d5d82f7895c789e528cfa1a6..c0b9e81d4d3204686161da60a542ecf15f5158ba 100644 (file)
@@ -35,19 +35,35 @@ public:
 
        OpenCLDeviceMegaKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_)
        : OpenCLDeviceBase(info, stats, profiler, background_),
-         path_trace_program(this, "megakernel", "kernel.cl", "-D__COMPILE_ONLY_MEGAKERNEL__ ")
+         path_trace_program(this,
+                            get_opencl_program_name(false, "megakernel"),
+                            get_opencl_program_filename(false, "megakernel"),
+                            "-D__COMPILE_ONLY_MEGAKERNEL__ ")
        {
        }
 
-       virtual bool show_samples() const {
+
+       virtual bool show_samples() const
+       {
                return true;
        }
 
-       virtual BVHLayoutMask get_bvh_layout_mask() const {
+       virtual BVHLayoutMask get_bvh_layout_mask() const
+       {
                return BVH_LAYOUT_BVH2;
        }
 
-       virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
+       const string get_opencl_program_name(bool /*single_program*/, const string& kernel_name)
+       {
+               return kernel_name;
+       }
+
+       const string get_opencl_program_filename(bool /*single_program*/, const string& /*kernel_name*/)
+       {
+               return "kernel.cl";
+       }
+
+       virtual bool add_kernel_programs(const DeviceRequestedFeatures& /*requested_features*/,
                                  vector<OpenCLProgram*> &programs)
        {
                path_trace_program.add_kernel(ustring("path_trace"));
index 5a2555f9f8084ab62b5ea16210e27bd0d238dc6c..b759f69d3ab4c86eb94827408f707e8508be765d 100644 (file)
@@ -79,6 +79,27 @@ public:
        OpenCLProgram program_data_init;
        OpenCLProgram program_state_buffer_size;
 
+       OpenCLProgram program_split;
+
+       OpenCLProgram program_path_init;
+       OpenCLProgram program_scene_intersect;
+       OpenCLProgram program_lamp_emission;
+       OpenCLProgram program_do_volume;
+       OpenCLProgram program_queue_enqueue;
+       OpenCLProgram program_indirect_background;
+       OpenCLProgram program_shader_setup;
+       OpenCLProgram program_shader_sort;
+       OpenCLProgram program_shader_eval;
+       OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
+       OpenCLProgram program_subsurface_scatter;
+       OpenCLProgram program_direct_lighting;
+       OpenCLProgram program_shadow_blocked_ao;
+       OpenCLProgram program_shadow_blocked_dl;
+       OpenCLProgram program_enqueue_inactive;
+       OpenCLProgram program_next_iteration_setup;
+       OpenCLProgram program_indirect_subsurface;
+       OpenCLProgram program_buffer_update;
+
        OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_);
 
        ~OpenCLDeviceSplitKernel()
@@ -99,26 +120,150 @@ public:
                return BVH_LAYOUT_BVH2;
        }
 
-       virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
+       virtual bool load_kernels(const DeviceRequestedFeatures& requested_features)
+       {
+               if (!OpenCLDeviceBase::load_kernels(requested_features)) {
+                       return false;
+               }
+               return split_kernel->load_kernels(requested_features);
+       }
+
+       const string fast_compiled_kernels =
+               "path_init "
+               "scene_intersect "
+               "queue_enqueue "
+               "shader_setup "
+               "shader_sort "
+               "enqueue_inactive "
+               "next_iteration_setup "
+               "indirect_subsurface "
+               "buffer_update";
+
+       const string get_opencl_program_name(bool single_program, const string& kernel_name)
+       {
+               if (single_program) {
+                       return "split";
+               }
+               else {
+                       if (fast_compiled_kernels.find(kernel_name) != std::string::npos) {
+                               return "split_bundle";
+                       }
+                       else {
+                               return "split_" + kernel_name;
+                       }
+               }
+       }
+
+       const string get_opencl_program_filename(bool single_program, const string& kernel_name)
+       {
+               if (single_program) {
+                       return "kernel_split.cl";
+               }
+               else {
+                       if (fast_compiled_kernels.find(kernel_name) != std::string::npos) {
+                               return "kernel_split_bundle.cl";
+                       }
+                       else {
+                               return "kernel_" + kernel_name + ".cl";
+                       }
+               }
+       }
+
+       virtual bool add_kernel_programs(const DeviceRequestedFeatures& requested_features,
                                  vector<OpenCLDeviceBase::OpenCLProgram*> &programs)
        {
                bool single_program = OpenCLInfo::use_single_program();
-               program_data_init = OpenCLDeviceBase::OpenCLProgram(this,
-                                                 single_program ? "split" : "split_data_init",
-                                                 single_program ? "kernel_split.cl" : "kernel_data_init.cl",
-                                                 get_build_options(this, requested_features));
-
+               program_data_init = OpenCLDeviceBase::OpenCLProgram(
+                       this,
+                       get_opencl_program_name(single_program, "data_init"),
+                       get_opencl_program_filename(single_program, "data_init"),
+                       get_build_options(this, requested_features));
                program_data_init.add_kernel(ustring("path_trace_data_init"));
                programs.push_back(&program_data_init);
 
-               program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(this,
-                                                 single_program ? "split" : "split_state_buffer_size",
-                                                 single_program ? "kernel_split.cl" : "kernel_state_buffer_size.cl",
-                                                 get_build_options(this, requested_features));
+               program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(
+                       this,
+                       get_opencl_program_name(single_program, "state_buffer_size"),
+                       get_opencl_program_filename(single_program, "state_buffer_size"),
+                       get_build_options(this, requested_features));
+
                program_state_buffer_size.add_kernel(ustring("path_trace_state_buffer_size"));
                programs.push_back(&program_state_buffer_size);
 
-               return split_kernel->load_kernels(requested_features);
+
+#define ADD_SPLIT_KERNEL_SINGLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name));
+#define ADD_SPLIT_KERNEL_SPLIT_PROGRAM(kernel_name) \
+                       program_##kernel_name = \
+                               OpenCLDeviceBase::OpenCLProgram(this, \
+                                                                                               "split_"#kernel_name, \
+                                                                                               "kernel_"#kernel_name".cl", \
+                                                                                               get_build_options(this, requested_features)); \
+                       program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \
+                       programs.push_back(&program_##kernel_name);
+
+               if (single_program) {
+                       program_split = OpenCLDeviceBase::OpenCLProgram(
+                               this,
+                               "split" ,
+                               "kernel_split.cl",
+                               get_build_options(this, requested_features));
+
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(lamp_emission);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(do_volume);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_background);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_eval);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(holdout_emission_blurring_pathtermination_ao);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(subsurface_scatter);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(direct_lighting);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_ao);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_dl);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update);
+
+                       programs.push_back(&program_split);
+               }
+               else {
+                       /* Ordered with most complex kernels first, to reduce overall compile time. */
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(subsurface_scatter);
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(do_volume);
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_dl);
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_ao);
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(holdout_emission_blurring_pathtermination_ao);
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(lamp_emission);
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(direct_lighting);
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(indirect_background);
+                       ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shader_eval);
+
+                       /* Quick kernels bundled in a single program to reduce overhead of starting
+                        * Blender processes. */
+                       program_split = OpenCLDeviceBase::OpenCLProgram(
+                               this,
+                               "split_bundle" ,
+                               "kernel_split_bundle.cl",
+                               get_build_options(this, requested_features));
+
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface);
+                       ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update);
+                       programs.push_back(&program_split);
+               }
+#undef ADD_SPLIT_KERNEL_SPLIT_PROGRAM
+#undef ADD_SPLIT_KERNEL_SINGLE_PROGRAM
+
+               return true;
        }
 
        void thread_run(DeviceTask *task)
@@ -281,8 +426,8 @@ public:
                bool single_program = OpenCLInfo::use_single_program();
                kernel->program =
                        OpenCLDeviceBase::OpenCLProgram(device,
-                                                       single_program ? "split" : "split_" + kernel_name,
-                                                       single_program ? "kernel_split.cl" : "kernel_" + kernel_name + ".cl",
+                                                       device->get_opencl_program_name(single_program, kernel_name),
+                                                       device->get_opencl_program_filename(single_program, kernel_name),
                                                        get_build_options(device, requested_features));
 
                kernel->program.add_kernel(ustring("path_trace_" + kernel_name));
index f43aa5f350a46fae4e417da4e54707e6e99d2fca..fe5ba4886a9740fbb506d40517d626c176846d9e 100644 (file)
 #ifdef WITH_OPENCL
 
 #include "device/opencl/opencl.h"
+#include "device/device_intern.h"
 
 #include "util/util_debug.h"
 #include "util/util_logging.h"
 #include "util/util_md5.h"
 #include "util/util_path.h"
 #include "util/util_time.h"
+#include "util/util_system.h"
 
 using std::cerr;
 using std::endl;
@@ -369,17 +371,119 @@ bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src)
        }
 
        double starttime = time_dt();
-       add_log(string("Compiling OpenCL program ") + program_name.c_str(), false);
+       add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
        add_log(string("Build flags: ") + kernel_build_options, true);
 
        if(!build_kernel(debug_src))
                return false;
 
-       add_log(string("Kernel compilation of ") + program_name + " finished in " + string_printf("%.2lfs.\n", time_dt() - starttime), false);
+       double elapsed = time_dt() - starttime;
+       add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false);
 
        return true;
 }
 
+bool OpenCLDeviceBase::OpenCLProgram::compile_separate(const string& clbin)
+{
+       vector<string> args;
+       args.push_back("--background");
+       args.push_back("--factory-startup");
+       args.push_back("--python-expr");
+
+       args.push_back(
+               string_printf(
+                       "import _cycles; _cycles.opencl_compile('%s', '%d', '%s', '%s', '%s', '%s', '%s')",
+                       (DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT)? "true" : "false",
+                       device->device_num,
+                       device->device_name.c_str(),
+                       device->platform_name.c_str(),
+                       (device->kernel_build_options(NULL) + kernel_build_options).c_str(),
+                       kernel_file.c_str(),
+                       clbin.c_str()));
+
+       double starttime = time_dt();
+       add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
+       add_log(string("Build flags: ") + kernel_build_options, true);
+       if(!system_call_self(args) || !path_exists(clbin)) {
+               return false;
+       }
+
+       double elapsed = time_dt() - starttime;
+       add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false);
+
+       return load_binary(clbin);
+}
+
+/* Compile opencl kernel. This method is called from the _cycles Python
+ * module compile kernels. Parameters must match function above. */
+bool device_opencl_compile_kernel(const vector<string>& parameters)
+{
+       bool force_all_platforms = parameters[0] == "true";
+       int device_platform_id = std::stoi(parameters[1]);
+       const string& device_name = parameters[2];
+       const string& platform_name = parameters[3];
+       const string& build_options = parameters[4];
+       const string& kernel_file = parameters[5];
+       const string& binary_path = parameters[6];
+
+       if(clewInit() != CLEW_SUCCESS) {
+               return false;
+       }
+
+       vector<OpenCLPlatformDevice> usable_devices;
+       OpenCLInfo::get_usable_devices(&usable_devices, force_all_platforms);
+       if(device_platform_id >= usable_devices.size()) {
+               return false;
+       }
+
+       OpenCLPlatformDevice& platform_device = usable_devices[device_platform_id];
+       if(platform_device.platform_name != platform_name ||
+          platform_device.device_name != device_name)
+       {
+               return false;
+       }
+
+       cl_platform_id platform = platform_device.platform_id;
+       cl_device_id device = platform_device.device_id;
+       const cl_context_properties context_props[] = {
+               CL_CONTEXT_PLATFORM, (cl_context_properties) platform,
+               0, 0
+       };
+
+       cl_int err;
+       cl_context context = clCreateContext(context_props, 1, &device, NULL, NULL, &err);
+       if(err != CL_SUCCESS) {
+               return false;
+       }
+
+       string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\" // " + path_files_md5_hash(path_get("kernel")) + "\n";
+       source = path_source_replace_includes(source, path_get("source"));
+       size_t source_len = source.size();
+       const char *source_str = source.c_str();
+       cl_program program = clCreateProgramWithSource(context, 1, &source_str, &source_len, &err);
+       bool result = false;
+
+       if(err == CL_SUCCESS) {
+               err = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
+
+               if(err == CL_SUCCESS) {
+                       size_t size = 0;
+                       clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
+                       if(size > 0) {
+                               vector<uint8_t> binary(size);
+                               uint8_t *bytes = &binary[0];
+                               clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);
+                               result = path_write_binary(binary_path, binary);
+                       }
+               }
+               clReleaseProgram(program);
+       }
+
+       clReleaseContext(context);
+
+       return result;
+}
+
 bool OpenCLDeviceBase::OpenCLProgram::load_binary(const string& clbin,
                                                   const string *debug_src)
 {
@@ -467,15 +571,31 @@ void OpenCLDeviceBase::OpenCLProgram::load()
                }
                else {
                        add_log(string("Kernel file ") + clbin + " either doesn't exist or failed to be loaded by driver.", true);
-
-                       /* If does not exist or loading binary failed, compile kernel. */
-                       if(!compile_kernel(debug_src)) {
-                               return;
+                       if(!path_exists(clbin)) {
+                               if(compile_separate(clbin)) {
+                                       add_log(string("Built and loaded program from ") + clbin + ".", true);
+                                       loaded = true;
+                               }
+                               else {
+                                       add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true);
+
+                                       /* If does not exist or loading binary failed, compile kernel. */
+                                       if(!compile_kernel(debug_src)) {
+                                               return;
+                                       }
+
+                                       /* Save binary for reuse. */
+                                       if(!save_binary(clbin)) {
+                                               add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
+                                       }
+                               }
                        }
-
-                       /* Save binary for reuse. */
-                       if(!save_binary(clbin)) {
-                               add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
+                       else {
+                               add_log(string("Kernel file ") + clbin + "exists, but failed to be loaded by driver.", true);
+                               /* Fall back to compiling. */
+                               if(!compile_kernel(debug_src)) {
+                                       return;
+                               }
                        }
                }
 
index 163aacf19f9c8f010717af70ef34208c266bfa32..f7041ee278389f413247c9e23ed2b80e290a76c0 100644 (file)
@@ -39,6 +39,7 @@ set(SRC_OPENCL_KERNELS
        kernels/opencl/kernel.cl
        kernels/opencl/kernel_state_buffer_size.cl
        kernels/opencl/kernel_split.cl
+       kernels/opencl/kernel_split_bundle.cl
        kernels/opencl/kernel_data_init.cl
        kernels/opencl/kernel_path_init.cl
        kernels/opencl/kernel_queue_enqueue.cl
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
new file mode 100644 (file)
index 0000000..71ea683
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"  // PRECOMPILED
+#include "kernel/split/kernel_split_common.h"  // PRECOMPILED
+
+#include "kernel/kernels/opencl/kernel_path_init.cl"
+#include "kernel/kernels/opencl/kernel_scene_intersect.cl"
+#include "kernel/kernels/opencl/kernel_queue_enqueue.cl"
+#include "kernel/kernels/opencl/kernel_shader_setup.cl"
+#include "kernel/kernels/opencl/kernel_shader_sort.cl"
+#include "kernel/kernels/opencl/kernel_enqueue_inactive.cl"
+#include "kernel/kernels/opencl/kernel_next_iteration_setup.cl"
+#include "kernel/kernels/opencl/kernel_indirect_subsurface.cl"
+#include "kernel/kernels/opencl/kernel_buffer_update.cl"
index fc6db1f66623af7660e66b81902de0a26d07c929..a79829a3dd9ca829a0bc0c741ed19b32489266d2 100644 (file)
@@ -22,6 +22,9 @@
 
 #include <numaapi.h>
 
+#include <OpenImageIO/sysutil.h>
+OIIO_NAMESPACE_USING
+
 #ifdef _WIN32
 #  if(!defined(FREE_WINDOWS))
 #    include <intrin.h>
@@ -329,6 +332,25 @@ bool system_cpu_support_avx2()
 
 #endif
 
+bool system_call_self(const vector<string>& args)
+{
+       /* Escape program and arguments in case they contain spaces. */
+       string cmd = "\"" + Sysutil::this_program_path() + "\"";
+
+       for(int i = 0; i < args.size(); i++) {
+               cmd += " \"" + args[i] + "\"";
+       }
+
+       /* Quiet output. */
+#ifdef _WIN32
+       cmd += " > nul";
+#else
+       cmd += " > /dev/null";
+#endif
+
+       return (system(cmd.c_str()) == 0);
+}
+
 size_t system_physical_ram()
 {
 #ifdef _WIN32
index 1e7cf1d9f2a00648045bad6693b5c4379e46fedf..2590b31a59d0188cf052c13d0a87c074a9aa9f0b 100644 (file)
@@ -18,6 +18,7 @@
 #define __UTIL_SYSTEM_H__
 
 #include "util/util_string.h"
+#include "util/util_vector.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -61,6 +62,9 @@ bool system_cpu_support_avx2();
 
 size_t system_physical_ram();
 
+/* Start a new process of the current application with the given arguments. */
+bool system_call_self(const vector<string>& args);
+
 CCL_NAMESPACE_END
 
 #endif  /* __UTIL_SYSTEM_H__ */