Merge branch 'blender2.7'
authorJeroen Bakker <j.bakker@atmind.nl>
Fri, 15 Mar 2019 15:28:33 +0000 (16:28 +0100)
committerJeroen Bakker <j.bakker@atmind.nl>
Fri, 15 Mar 2019 15:28:33 +0000 (16:28 +0100)
13 files changed:
intern/cycles/blender/blender_session.cpp
intern/cycles/blender/blender_session.h
intern/cycles/device/device.h
intern/cycles/device/device_multi.cpp
intern/cycles/device/opencl/opencl.h
intern/cycles/device/opencl/opencl_split.cpp
intern/cycles/device/opencl/opencl_util.cpp
intern/cycles/kernel/kernel_types.h
intern/cycles/render/session.cpp
intern/cycles/render/session.h
intern/cycles/util/util_progress.h
intern/cycles/util/util_task.cpp
intern/cycles/util/util_task.h

index 501e4fec13f10ad0cceda4261828ed5abecee5c5..f1cdda5cb13c0613c268ebbe7ad3f0f4de10625e 100644 (file)
@@ -987,6 +987,11 @@ void BlenderSession::get_status(string& status, string& substatus)
        session->progress.get_status(status, substatus);
 }
 
+void BlenderSession::get_kernel_status(string& kernel_status)
+{
+       session->progress.get_kernel_status(kernel_status);
+}
+
 void BlenderSession::get_progress(float& progress, double& total_time, double& render_time)
 {
        session->progress.get_time(total_time, render_time);
@@ -1005,7 +1010,7 @@ void BlenderSession::update_bake_progress()
 
 void BlenderSession::update_status_progress()
 {
-       string timestatus, status, substatus;
+       string timestatus, status, substatus, kernel_status;
        string scene_status = "";
        float progress;
        double total_time, remaining_time = 0, render_time;
@@ -1014,6 +1019,7 @@ void BlenderSession::update_status_progress()
        float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f;
 
        get_status(status, substatus);
+       get_kernel_status(kernel_status);
        get_progress(progress, total_time, render_time);
 
        if(progress > 0)
@@ -1038,6 +1044,8 @@ void BlenderSession::update_status_progress()
                        status = " | " + status;
                if(substatus.size() > 0)
                        status += " | " + substatus;
+               if(kernel_status.size() > 0)
+                       status += " | " + kernel_status;
        }
 
        double current_time = time_dt();
index 1915cdb36f1e051c5353bd14bfbfb602c3fa7a3b..2bfb9e56c376ea7dfad0bac5ecc2714279d34da8 100644 (file)
@@ -90,6 +90,7 @@ public:
        void tag_redraw();
        void tag_update();
        void get_status(string& status, string& substatus);
+       void get_kernel_status(string& kernel_status);
        void get_progress(float& progress, double& total_time, double& render_time);
        void test_cancel();
        void update_status_progress();
index 08b0e7435fe7b40415f08b85b5455784711a7b17..4db8d10a4aafc0dcbf6b58f3607cdb447efaaa88 100644 (file)
@@ -56,6 +56,14 @@ enum DeviceTypeMask {
        DEVICE_MASK_ALL = ~0
 };
 
+enum DeviceKernelStatus {
+       DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL = 0,
+       DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE,
+       DEVICE_KERNEL_USING_FEATURE_KERNEL,
+       DEVICE_KERNEL_FEATURE_KERNEL_INVALID,
+       DEVICE_KERNEL_UNKNOWN,
+};
+
 #define DEVICE_MASK(type) (DeviceTypeMask)(1 << type)
 
 class DeviceInfo {
@@ -334,6 +342,20 @@ public:
                const DeviceRequestedFeatures& /*requested_features*/)
        { return true; }
 
+       /* Wait for device to become available to upload data and receive tasks
+        * This method is used by the OpenCL device to load the
+        * optimized kernels or when not (yet) available load the
+        * generic kernels (only during foreground rendering) */
+       virtual bool wait_for_availability(
+               const DeviceRequestedFeatures& /*requested_features*/)
+       { return true; }
+       /* Check if there are 'better' kernels available to be used
+        * We can switch over to these kernels
+        * This method is used to determine if we can switch the preview kernels
+        * to regular kernels */
+       virtual DeviceKernelStatus get_active_kernel_switch_state()
+       { return DEVICE_KERNEL_USING_FEATURE_KERNEL; }
+
        /* tasks */
        virtual int get_split_task_count(DeviceTask& task) = 0;
        virtual void task_add(DeviceTask& task) = 0;
index 3308af4f53ff29fc2b3ae8a971ce8b72eac9211c..efb4d9dd288b8e92ee52a8be4f4d7d2fc631ebe4 100644 (file)
@@ -120,6 +120,37 @@ public:
                return true;
        }
 
+       bool wait_for_availability(const DeviceRequestedFeatures& requested_features)
+       {
+               foreach(SubDevice& sub, devices)
+                       if(!sub.device->wait_for_availability(requested_features))
+                               return false;
+
+               return true;
+       }
+
+       DeviceKernelStatus get_active_kernel_switch_state()
+       {
+               DeviceKernelStatus result = DEVICE_KERNEL_USING_FEATURE_KERNEL;
+
+               foreach(SubDevice& sub, devices) {
+                       DeviceKernelStatus subresult = sub.device->get_active_kernel_switch_state();
+                       switch (subresult) {
+                               case DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL:
+                                       result = subresult;
+                                       break;
+
+                               case DEVICE_KERNEL_FEATURE_KERNEL_INVALID:
+                               case DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE:
+                                       return subresult;
+
+                               case DEVICE_KERNEL_USING_FEATURE_KERNEL:
+                                       break;
+                       }
+               }
+               return result;
+       }
+
        void mem_alloc(device_memory& mem)
        {
                device_ptr key = unique_key++;
index 2a4e07419ac5abee324275226512a173fa5c9f74..bb507be4c7218986bd608ec60b4ae245646d4a47 100644 (file)
@@ -261,16 +261,22 @@ class OpenCLDevice : public Device
 {
 public:
        DedicatedTaskPool task_pool;
+
+       /* Task pool for required kernels (base, AO kernels during foreground rendering) */
+       TaskPool load_required_kernel_task_pool;
+       /* Task pool for optional kernels (feature kernels during foreground rendering) */
+       TaskPool load_kernel_task_pool;
        cl_context cxContext;
        cl_command_queue cqCommandQueue;
        cl_platform_id cpPlatform;
        cl_device_id cdDevice;
        cl_int ciErr;
        int device_num;
+       bool use_preview_kernels;
 
        class OpenCLProgram {
        public:
-               OpenCLProgram() : loaded(false), program(NULL), device(NULL) {}
+               OpenCLProgram() : loaded(false), needs_compiling(true), program(NULL), device(NULL) {}
                OpenCLProgram(OpenCLDevice *device,
                              const string& program_name,
                              const string& kernel_name,
@@ -279,12 +285,24 @@ public:
                ~OpenCLProgram();
 
                void add_kernel(ustring name);
-               void load();
+
+               /* Try to load the program from device cache or disk */
+               bool load();
+               /* Compile the kernel (first separate, failback to local) */
+               void compile();
+               /* Create the OpenCL kernels after loading or compiling */
+               void create_kernels();
 
                bool is_loaded() const { return loaded; }
                const string& get_log() const { return log; }
                void report_error();
 
+               /* Wait until this kernel is available to be used 
+                * It will return true when the kernel is available.
+                * It will return false when the kernel is not available 
+                * or could not be loaded. */
+               bool wait_for_availability();
+
                cl_kernel operator()();
                cl_kernel operator()(ustring name);
 
@@ -308,6 +326,8 @@ public:
                void add_error(const string& msg);
 
                bool loaded;
+               bool needs_compiling;
+
                cl_program program;
                OpenCLDevice *device;
 
@@ -323,19 +343,32 @@ public:
                map<ustring, cl_kernel> kernels;
        };
 
-       DeviceSplitKernel *split_kernel;
-
-       OpenCLProgram program_split;
+       /* Container for all types of split programs. */
+       class OpenCLSplitPrograms {
+               public:
+                       OpenCLDevice *device;
+                       OpenCLProgram program_split;
+                       OpenCLProgram program_lamp_emission;
+                       OpenCLProgram program_do_volume;
+                       OpenCLProgram program_indirect_background;
+                       OpenCLProgram program_shader_eval;
+                       OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
+                       OpenCLProgram program_subsurface_scatter;
+                       OpenCLProgram program_direct_lighting;
+                       OpenCLProgram program_shadow_blocked_ao;
+                       OpenCLProgram program_shadow_blocked_dl;
+
+                       OpenCLSplitPrograms(OpenCLDevice *device);
+                       ~OpenCLSplitPrograms();
+
+                       /* Load the kernels and put the created kernels in the given `programs`
+                        * paramter. */
+                       void load_kernels(vector<OpenCLProgram*> &programs,
+                                         const DeviceRequestedFeatures& requested_features,
+                                         bool is_preview=false);
+       };
 
-       OpenCLProgram program_lamp_emission;
-       OpenCLProgram program_do_volume;
-       OpenCLProgram program_indirect_background;
-       OpenCLProgram program_shader_eval;
-       OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
-       OpenCLProgram program_subsurface_scatter;
-       OpenCLProgram program_direct_lighting;
-       OpenCLProgram program_shadow_blocked_ao;
-       OpenCLProgram program_shadow_blocked_dl;
+       DeviceSplitKernel *split_kernel;
 
        OpenCLProgram base_program;
        OpenCLProgram bake_program;
@@ -343,6 +376,9 @@ public:
        OpenCLProgram background_program;
        OpenCLProgram denoising_program;
 
+       OpenCLSplitPrograms kernel_programs;
+       OpenCLSplitPrograms preview_programs;
+
        typedef map<string, device_vector<uchar>*> ConstMemMap;
        typedef map<string, device_ptr> MemMap;
 
@@ -358,22 +394,30 @@ public:
        void opencl_error(const string& message);
        void opencl_assert_err(cl_int err, const char* where);
 
-       OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_);
+       OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
        ~OpenCLDevice();
 
        static void CL_CALLBACK context_notify_callback(const char *err_info,
                const void * /*private_info*/, size_t /*cb*/, void *user_data);
 
        bool opencl_version_check();
+       OpenCLSplitPrograms* get_split_programs();
 
        string device_md5_hash(string kernel_custom_build_options = "");
        bool load_kernels(const DeviceRequestedFeatures& requested_features);
+       void load_required_kernels(const DeviceRequestedFeatures& requested_features);
+       void load_preview_kernels();
+
+       bool wait_for_availability(const DeviceRequestedFeatures& requested_features);
+       DeviceKernelStatus get_active_kernel_switch_state();
 
        /* Get the name of the opencl program for the given kernel */
        const string get_opencl_program_name(const string& kernel_name);
        /* Get the program file name to compile (*.cl) for the given kernel */
        const string get_opencl_program_filename(const string& kernel_name);
-       string get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name);
+       string get_build_options(const DeviceRequestedFeatures& requested_features,
+                                const string& opencl_program_name,
+                                bool preview_kernel=false);
        /* Enable the default features to reduce recompilation events */
        void enable_default_features(DeviceRequestedFeatures& features);
 
index 422813c2e0744dbec29bed44370664ad4e889944..555707cecd552a797f0177918a9987e3e24a5290 100644 (file)
@@ -40,7 +40,13 @@ struct texture_slot_t {
        int slot;
 };
 
-static const string fast_compiled_kernels =
+static const string NON_SPLIT_KERNELS =
+       "denoising "
+       "base "
+       "background "
+       "displace ";
+
+static const string SPLIT_BUNDLE_KERNELS =
        "data_init "
        "path_init "
        "state_buffer_size "
@@ -55,7 +61,10 @@ static const string fast_compiled_kernels =
 
 const string OpenCLDevice::get_opencl_program_name(const string& kernel_name)
 {
-       if (fast_compiled_kernels.find(kernel_name) != std::string::npos) {
+       if (NON_SPLIT_KERNELS.find(kernel_name) != std::string::npos) {
+               return kernel_name;
+       }
+       else if (SPLIT_BUNDLE_KERNELS.find(kernel_name) != std::string::npos) {
                return "split_bundle";
        }
        else {
@@ -65,7 +74,10 @@ const string OpenCLDevice::get_opencl_program_name(const string& kernel_name)
 
 const string OpenCLDevice::get_opencl_program_filename(const string& kernel_name)
 {
-       if (fast_compiled_kernels.find(kernel_name) != std::string::npos) {
+       if (kernel_name == "denoising") {
+               return "filter.cl";
+       }
+       else if (SPLIT_BUNDLE_KERNELS.find(kernel_name) != std::string::npos) {
                return "kernel_split_bundle.cl";
        }
        else {
@@ -92,7 +104,7 @@ void OpenCLDevice::enable_default_features(DeviceRequestedFeatures& features)
        }
 }
 
-string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name)
+string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name, bool preview_kernel)
 {
        /* first check for non-split kernel programs */
        if (opencl_program_name == "base" || opencl_program_name == "denoising") {
@@ -169,7 +181,13 @@ string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_
        enable_default_features(nofeatures);
 
        /* Add program specific optimized compile directives */
-       if (opencl_program_name == "split_do_volume" && !requested_features.use_volume) {
+       if (preview_kernel) {
+               DeviceRequestedFeatures preview_features;
+               preview_features.use_hair = true;
+               build_options += "-D__KERNEL_OPENCL_PREVIEW__ ";
+               build_options += preview_features.get_build_options();
+       }
+       else if (opencl_program_name == "split_do_volume" && !requested_features.use_volume) {
                build_options += nofeatures.get_build_options();
        }
        else {
@@ -196,6 +214,77 @@ string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_
        return build_options;
 }
 
+OpenCLDevice::OpenCLSplitPrograms::OpenCLSplitPrograms(OpenCLDevice *device_)
+{
+       device = device_;
+}
+
+OpenCLDevice::OpenCLSplitPrograms::~OpenCLSplitPrograms()
+{
+       program_split.release();
+       program_lamp_emission.release();
+       program_do_volume.release();
+       program_indirect_background.release();
+       program_shader_eval.release();
+       program_holdout_emission_blurring_pathtermination_ao.release();
+       program_subsurface_scatter.release();
+       program_direct_lighting.release();
+       program_shadow_blocked_ao.release();
+       program_shadow_blocked_dl.release();
+}
+
+void OpenCLDevice::OpenCLSplitPrograms::load_kernels(vector<OpenCLProgram*> &programs, const DeviceRequestedFeatures& requested_features, bool is_preview)
+{
+       if (!requested_features.use_baking) {
+#define ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name));
+#define ADD_SPLIT_KERNEL_PROGRAM(kernel_name) \
+               const string program_name_##kernel_name = "split_"#kernel_name; \
+               program_##kernel_name = \
+                       OpenCLDevice::OpenCLProgram(device, \
+                                                   program_name_##kernel_name, \
+                                                   "kernel_"#kernel_name".cl", \
+                                                   device->get_build_options(requested_features, program_name_##kernel_name, is_preview)); \
+               program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \
+               programs.push_back(&program_##kernel_name);
+
+               /* Ordered with most complex kernels first, to reduce overall compile time. */
+               ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter);
+               if (requested_features.use_volume || is_preview) {
+                       ADD_SPLIT_KERNEL_PROGRAM(do_volume);
+               }
+               ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl);
+               ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao);
+               ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
+               ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
+               ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
+               ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
+               ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
+
+               /* Quick kernels bundled in a single program to reduce overhead of starting
+                       * Blender processes. */
+               program_split = OpenCLDevice::OpenCLProgram(device,
+                                                           "split_bundle" ,
+                                                           "kernel_split_bundle.cl",
+                                                           device->get_build_options(requested_features, "split_bundle", is_preview));
+
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(data_init);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(state_buffer_size);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(path_init);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(scene_intersect);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(queue_enqueue);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_setup);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_sort);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(enqueue_inactive);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update);
+               programs.push_back(&program_split);
+
+#undef ADD_SPLIT_KERNEL_PROGRAM
+#undef ADD_SPLIT_KERNEL_BUNDLE_PROGRAM
+       }
+}
+
 namespace {
 
 /* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to
@@ -307,7 +396,9 @@ public:
                        OpenCLDevice::OpenCLProgram(device,
                                                    program_name,
                                                    device->get_opencl_program_filename(kernel_name),
-                                                   device->get_build_options(requested_features, program_name));
+                                                   device->get_build_options(requested_features, 
+                                                                             program_name, 
+                                                                             device->use_preview_kernels));
 
                kernel->program.add_kernel(ustring("path_trace_" + kernel_name));
                kernel->program.load();
@@ -327,7 +418,8 @@ public:
                size_buffer.zero_to_device();
 
                uint threads = num_threads;
-               cl_kernel kernel_state_buffer_size = device->program_split(ustring("path_trace_state_buffer_size"));
+               OpenCLDevice::OpenCLSplitPrograms *programs = device->get_split_programs();
+               cl_kernel kernel_state_buffer_size = programs->program_split(ustring("path_trace_state_buffer_size"));
                device->kernel_set_args(kernel_state_buffer_size, 0, kg, data, threads, size_buffer);
 
                size_t global_size = 64;
@@ -377,7 +469,8 @@ public:
                cl_int start_sample = rtile.start_sample;
                cl_int end_sample = rtile.start_sample + rtile.num_samples;
 
-               cl_kernel kernel_data_init = device->program_split(ustring("path_trace_data_init"));
+               OpenCLDevice::OpenCLSplitPrograms *programs = device->get_split_programs();
+               cl_kernel kernel_data_init = programs->program_split(ustring("path_trace_data_init"));
 
                cl_uint start_arg_index =
                        device->kernel_set_args(kernel_data_init,
@@ -510,6 +603,8 @@ void OpenCLDevice::opencl_assert_err(cl_int err, const char* where)
 
 OpenCLDevice::OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background)
 : Device(info, stats, profiler, background),
+  kernel_programs(this),
+  preview_programs(this),
   memory_manager(this),
   texture_info(this, "__texture_info", MEM_TEXTURE)
 {
@@ -520,6 +615,7 @@ OpenCLDevice::OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, b
        null_mem = 0;
        device_initialized = false;
        textures_need_update = true;
+       use_preview_kernels = !background;
 
        vector<OpenCLPlatformDevice> usable_devices;
        OpenCLInfo::get_usable_devices(&usable_devices);
@@ -583,11 +679,16 @@ OpenCLDevice::OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, b
        device_initialized = true;
 
        split_kernel = new OpenCLSplitKernel(this);
+       if (!background) {
+               load_preview_kernels();
+       }
 }
 
 OpenCLDevice::~OpenCLDevice()
 {
        task_pool.stop();
+       load_required_kernel_task_pool.stop();
+       load_kernel_task_pool.stop();
 
        memory_manager.free();
 
@@ -603,7 +704,7 @@ OpenCLDevice::~OpenCLDevice()
        bake_program.release();
        displace_program.release();
        background_program.release();
-       program_split.release();
+       denoising_program.release();
 
        if(cqCommandQueue)
                clReleaseCommandQueue(cqCommandQueue);
@@ -669,8 +770,51 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures& requested_feature
        /* Verify we have right opencl version. */
        if(!opencl_version_check())
                return false;
+       
+       load_required_kernels(requested_features);
+
+       vector<OpenCLProgram*> programs;
+       kernel_programs.load_kernels(programs, requested_features, false);
+
+       if (!requested_features.use_baking && requested_features.use_denoising) {
+               denoising_program = OpenCLProgram(this, "denoising", "filter.cl", get_build_options(requested_features, "denoising"));
+               denoising_program.add_kernel(ustring("filter_divide_shadow"));
+               denoising_program.add_kernel(ustring("filter_get_feature"));
+               denoising_program.add_kernel(ustring("filter_write_feature"));
+               denoising_program.add_kernel(ustring("filter_detect_outliers"));
+               denoising_program.add_kernel(ustring("filter_combine_halves"));
+               denoising_program.add_kernel(ustring("filter_construct_transform"));
+               denoising_program.add_kernel(ustring("filter_nlm_calc_difference"));
+               denoising_program.add_kernel(ustring("filter_nlm_blur"));
+               denoising_program.add_kernel(ustring("filter_nlm_calc_weight"));
+               denoising_program.add_kernel(ustring("filter_nlm_update_output"));
+               denoising_program.add_kernel(ustring("filter_nlm_normalize"));
+               denoising_program.add_kernel(ustring("filter_nlm_construct_gramian"));
+               denoising_program.add_kernel(ustring("filter_finalize"));
+               programs.push_back(&denoising_program);
+       }
+
+       load_required_kernel_task_pool.wait_work();
+
+       /* Parallel compilation of Cycles kernels, this launches multiple
+        * processes to workaround OpenCL frameworks serializing the calls
+        * internally within a single process. */
+       foreach(OpenCLProgram *program, programs) {
+               if (!program->load()) {
+                       load_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program));
+               }
+       }
+       return true;
+}
 
+void OpenCLDevice::load_required_kernels(const DeviceRequestedFeatures& requested_features)
+{
        vector<OpenCLProgram*> programs;
+       base_program = OpenCLProgram(this, "base", "kernel_base.cl", get_build_options(requested_features, "base"));
+       base_program.add_kernel(ustring("convert_to_byte"));
+       base_program.add_kernel(ustring("convert_to_half_float"));
+       base_program.add_kernel(ustring("zero_buffer"));
+       programs.push_back(&base_program);
 
        if (requested_features.use_true_displacement) {
                displace_program = OpenCLProgram(this, "displace", "kernel_displace.cl", get_build_options(requested_features, "displace"));
@@ -684,101 +828,89 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures& requested_feature
                programs.push_back(&background_program);
        }
 
-#define ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name));
-#define ADD_SPLIT_KERNEL_PROGRAM(kernel_name) \
-               const string program_name_##kernel_name = "split_"#kernel_name; \
-               program_##kernel_name = \
-                       OpenCLDevice::OpenCLProgram(this, \
-                                                   program_name_##kernel_name, \
-                                                   "kernel_"#kernel_name".cl", \
-                                                   get_build_options(requested_features, program_name_##kernel_name)); \
-               program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \
-               programs.push_back(&program_##kernel_name);
-
-       /* Ordered with most complex kernels first, to reduce overall compile time. */
-       ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter);
-       if (requested_features.use_volume) {
-               ADD_SPLIT_KERNEL_PROGRAM(do_volume);
-       }
-       ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl);
-       ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao);
-       ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
-       ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
-       ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
-       ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
-       ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
-
-       /* Quick kernels bundled in a single program to reduce overhead of starting
-               * Blender processes. */
-       program_split = OpenCLDevice::OpenCLProgram(this,
-                                                                                               "split_bundle" ,
-                                                                                               "kernel_split_bundle.cl",
-                                                                                               get_build_options(requested_features, "split_bundle"));
-
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(data_init);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(state_buffer_size);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(path_init);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(scene_intersect);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(queue_enqueue);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_setup);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_sort);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(enqueue_inactive);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update);
-       programs.push_back(&program_split);
-
-#undef ADD_SPLIT_KERNEL_PROGRAM
-#undef ADD_SPLIT_KERNEL_BUNDLE_PROGRAM
-
-       base_program = OpenCLProgram(this, "base", "kernel_base.cl", get_build_options(requested_features, "base"));
-       base_program.add_kernel(ustring("convert_to_byte"));
-       base_program.add_kernel(ustring("convert_to_half_float"));
-       base_program.add_kernel(ustring("zero_buffer"));
-       programs.push_back(&base_program);
-
        if (requested_features.use_baking) {
                bake_program = OpenCLProgram(this, "bake", "kernel_bake.cl", get_build_options(requested_features, "bake"));
                bake_program.add_kernel(ustring("bake"));
                programs.push_back(&bake_program);
        }
 
-       denoising_program = OpenCLProgram(this, "denoising", "filter.cl", get_build_options(requested_features, "denoising"));
-       denoising_program.add_kernel(ustring("filter_divide_shadow"));
-       denoising_program.add_kernel(ustring("filter_get_feature"));
-       denoising_program.add_kernel(ustring("filter_write_feature"));
-       denoising_program.add_kernel(ustring("filter_detect_outliers"));
-       denoising_program.add_kernel(ustring("filter_combine_halves"));
-       denoising_program.add_kernel(ustring("filter_construct_transform"));
-       denoising_program.add_kernel(ustring("filter_nlm_calc_difference"));
-       denoising_program.add_kernel(ustring("filter_nlm_blur"));
-       denoising_program.add_kernel(ustring("filter_nlm_calc_weight"));
-       denoising_program.add_kernel(ustring("filter_nlm_update_output"));
-       denoising_program.add_kernel(ustring("filter_nlm_normalize"));
-       denoising_program.add_kernel(ustring("filter_nlm_construct_gramian"));
-       denoising_program.add_kernel(ustring("filter_finalize"));
-       programs.push_back(&denoising_program);
-
-       /* Parallel compilation of Cycles kernels, this launches multiple
-        * processes to workaround OpenCL frameworks serializing the calls
-        * internally within a single process. */
-       TaskPool task_pool;
        foreach(OpenCLProgram *program, programs) {
-               task_pool.push(function_bind(&OpenCLProgram::load, program));
+               if (!program->load()) {
+                       load_required_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program));
+               }
        }
-       task_pool.wait_work();
+}
+
+void OpenCLDevice::load_preview_kernels()
+{
+       DeviceRequestedFeatures no_features;
+       vector<OpenCLProgram*> programs;
+       preview_programs.load_kernels(programs, no_features, true);
 
        foreach(OpenCLProgram *program, programs) {
-               VLOG(2) << program->get_log();
-               if(!program->is_loaded()) {
-                       program->report_error();
-                       return false;
+               if (!program->load()) {
+                       load_required_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program));
                }
        }
+}
 
+bool OpenCLDevice::wait_for_availability(const DeviceRequestedFeatures& requested_features)
+{
+       if (background) {
+               load_kernel_task_pool.wait_work();
+               use_preview_kernels = false;
+       }
+       else {
+               /* We use a device setting to determine to load preview kernels or not
+                * Better to check on device level than per kernel as mixing preview and
+                * non-preview kernels does not work due to different data types */
+               if (use_preview_kernels) {
+                       use_preview_kernels = !load_kernel_task_pool.finished();
+               }
+       }
        return split_kernel->load_kernels(requested_features);
 }
 
+OpenCLDevice::OpenCLSplitPrograms* OpenCLDevice::get_split_programs()
+{
+       return use_preview_kernels?&preview_programs:&kernel_programs;
+}
+
+DeviceKernelStatus OpenCLDevice::get_active_kernel_switch_state()
+{
+       /* Do not switch kernels for background renderings
+        * We do foreground rendering but use the preview kernels
+        * Check for the optimized kernels 
+        *
+        * This works also the other way around, where we are using
+        * optimized kernels but new ones are being compiled due
+        * to other features that are needed */
+       if (background) {
+               /* The if-statements below would find the same result,
+                * But as the `finished` method uses a mutex we added
+                * this as an early exit */
+               return DEVICE_KERNEL_USING_FEATURE_KERNEL;
+       }
+       
+       bool other_kernels_finished = load_kernel_task_pool.finished();
+       if (use_preview_kernels) {
+               if (other_kernels_finished) {
+                       return DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE;
+               }
+               else {
+                       return DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL;
+               }
+       }
+       else {
+               if (other_kernels_finished) {
+                       return DEVICE_KERNEL_USING_FEATURE_KERNEL;
+               }
+               else {
+                       return DEVICE_KERNEL_FEATURE_KERNEL_INVALID;
+               }
+       }
+}
+
 void OpenCLDevice::mem_alloc(device_memory& mem)
 {
        if(mem.name) {
@@ -880,6 +1012,7 @@ void OpenCLDevice::mem_copy_from(device_memory& mem, int y, int w, int h, int el
 
 void OpenCLDevice::mem_zero_kernel(device_ptr mem, size_t size)
 {
+       base_program.wait_for_availability();
        cl_kernel ckZeroBuffer = base_program(ustring("zero_buffer"));
 
        size_t global_size[] = {1024, 1024};
@@ -1707,17 +1840,15 @@ void OpenCLDevice::shader(DeviceTask& task)
        cl_int d_shader_w = task.shader_w;
        cl_int d_offset = task.offset;
 
-       cl_kernel kernel;
-
+       OpenCLDevice::OpenCLProgram *program = &background_program;
        if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
-               kernel = bake_program(ustring("bake"));
+               program = &bake_program;
        }
        else if(task.shader_eval_type == SHADER_EVAL_DISPLACE) {
-               kernel = displace_program(ustring("displace"));
-       }
-       else {
-               kernel = background_program(ustring("background"));
+               program = &displace_program;
        }
+       program->wait_for_availability();
+       cl_kernel kernel = (*program)();
 
        cl_uint start_arg_index =
                kernel_set_args(kernel,
index ef0deaeff62b9af6e67f9e12897e002882dcba58..920c8dc4e6ae58fe267112e68f4ceb0bb9c2abed 100644 (file)
@@ -243,6 +243,18 @@ string OpenCLCache::get_kernel_md5()
        return self.kernel_md5;
 }
 
+static string get_program_source(const string& kernel_file)
+{
+       string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
+       /* We compile kernels consisting of many files. unfortunately OpenCL
+        * kernel caches do not seem to recognize changes in included files.
+        * so we force recompile on changes by adding the md5 hash of all files.
+        */
+       source = path_source_replace_includes(source, path_get("source"));
+       source += "\n// " + util_md5_string(source) + "\n";
+       return source;
+}
+
 OpenCLDevice::OpenCLProgram::OpenCLProgram(OpenCLDevice *device,
                                                const string& program_name,
                                                const string& kernel_file,
@@ -255,6 +267,7 @@ OpenCLDevice::OpenCLProgram::OpenCLProgram(OpenCLDevice *device,
    use_stdout(use_stdout)
 {
        loaded = false;
+       needs_compiling = true;
        program = NULL;
 }
 
@@ -343,13 +356,7 @@ bool OpenCLDevice::OpenCLProgram::build_kernel(const string *debug_src)
 
 bool OpenCLDevice::OpenCLProgram::compile_kernel(const string *debug_src)
 {
-       string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
-       /* We compile kernels consisting of many files. unfortunately OpenCL
-        * kernel caches do not seem to recognize changes in included files.
-        * so we force recompile on changes by adding the md5 hash of all files.
-        */
-       source = path_source_replace_includes(source, path_get("source"));
-       source += "\n// " + util_md5_string(source) + "\n";
+       string source = get_program_source(kernel_file);
 
        if(debug_src) {
                path_write_text(*debug_src, source);
@@ -473,8 +480,7 @@ bool device_opencl_compile_kernel(const vector<string>& parameters)
                return false;
        }
 
-       string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\" // " + path_files_md5_hash(path_get("kernel")) + "\n";
-       source = path_source_replace_includes(source, path_get("source"));
+       string source = get_program_source(kernel_file);
        size_t source_len = source.size();
        const char *source_str = source.c_str();
        cl_program program = clCreateProgramWithSource(context, 1, &source_str, &source_len, &err);
@@ -548,11 +554,54 @@ bool OpenCLDevice::OpenCLProgram::save_binary(const string& clbin)
        return path_write_binary(clbin, binary);
 }
 
-void OpenCLDevice::OpenCLProgram::load()
+bool OpenCLDevice::OpenCLProgram::load()
 {
-       assert(device);
-
        loaded = false;
+       string device_md5 = device->device_md5_hash(kernel_build_options);
+
+       /* Try to use cached kernel. */
+       thread_scoped_lock cache_locker;
+       ustring cache_key(program_name + device_md5);
+       program = device->load_cached_kernel(cache_key,
+                                            cache_locker);
+       if (!program) {
+               add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
+
+               /* need to create source to get md5 */
+               string source = get_program_source(kernel_file);
+
+               string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source);
+               basename = path_cache_get(path_join("kernels", basename));
+               string clbin = basename + ".clbin";
+
+               /* If binary kernel exists already, try use it. */
+               if(path_exists(clbin) && load_binary(clbin)) {
+                       /* Kernel loaded from binary, nothing to do. */
+                       add_log(string("Loaded program from ") + clbin + ".", true);
+
+                       /* Cache the program. */
+                       device->store_cached_kernel(program,
+                                                   cache_key,
+                                                   cache_locker);
+               }
+               else {
+                       add_log(string("OpenCL program ") + program_name + " not found on disk.", true);
+                       cache_locker.unlock();
+               }
+       }
+
+       if (program) {
+               create_kernels();
+               loaded = true;
+               needs_compiling = false;
+       }
+
+       return loaded;
+}
+
+void OpenCLDevice::OpenCLProgram::compile()
+{
+       assert(device);
 
        string device_md5 = device->device_md5_hash(kernel_build_options);
 
@@ -562,12 +611,13 @@ void OpenCLDevice::OpenCLProgram::load()
        program = device->load_cached_kernel(cache_key,
                                             cache_locker);
 
-       if(!program) {
+       if (!program)
+       {
+
                add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
 
                /* need to create source to get md5 */
-               string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
-               source = path_source_replace_includes(source, path_get("source"));
+               string source = get_program_source(kernel_file);
 
                string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source);
                basename = path_cache_get(path_join("kernels", basename));
@@ -582,49 +632,38 @@ void OpenCLDevice::OpenCLProgram::load()
                }
 
                /* If binary kernel exists already, try use it. */
-               if(path_exists(clbin) && load_binary(clbin)) {
-                       /* Kernel loaded from binary, nothing to do. */
-                       add_log(string("Loaded program from ") + clbin + ".", true);
+               if(compile_separate(clbin)) {
+                       add_log(string("Built and loaded program from ") + clbin + ".", true);
+                       loaded = true;
                }
                else {
-                       add_log(string("Kernel file ") + clbin + " either doesn't exist or failed to be loaded by driver.", true);
-                       if(!path_exists(clbin)) {
-                               if(compile_separate(clbin)) {
-                                       add_log(string("Built and loaded program from ") + clbin + ".", true);
-                                       loaded = true;
-                               }
-                               else {
-                                       add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true);
-
-                                       /* If does not exist or loading binary failed, compile kernel. */
-                                       if(!compile_kernel(debug_src)) {
-                                               return;
-                                       }
-
-                                       /* Save binary for reuse. */
-                                       if(!save_binary(clbin)) {
-                                               add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
-                                       }
-                               }
+                       add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true);
+
+                       /* If does not exist or loading binary failed, compile kernel. */
+                       if(!compile_kernel(debug_src)) {
+                               needs_compiling = false;
+                               return;
                        }
-                       else {
-                               add_log(string("Kernel file ") + clbin + "exists, but failed to be loaded by driver.", true);
-                               /* Fall back to compiling. */
-                               if(!compile_kernel(debug_src)) {
-                                       return;
-                               }
+
+                       /* Save binary for reuse. */
+                       if(!save_binary(clbin)) {
+                               add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
                        }
                }
 
                /* Cache the program. */
                device->store_cached_kernel(program,
-                                           cache_key,
-                                           cache_locker);
-       }
-       else {
-               add_log(string("Found cached OpenCL program ") + program_name + ".", true);
+                                                                       cache_key,
+                                                                       cache_locker);
        }
 
+       create_kernels();
+       needs_compiling = false;
+       loaded = true;
+}
+
+void OpenCLDevice::OpenCLProgram::create_kernels()
+{
        for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
                assert(kernel->second == NULL);
                cl_int ciErr;
@@ -635,8 +674,15 @@ void OpenCLDevice::OpenCLProgram::load()
                        return;
                }
        }
+}
 
-       loaded = true;
+bool OpenCLDevice::OpenCLProgram::wait_for_availability()
+{
+       add_log(string("Waiting for availability of ") + program_name + ".", true);
+       while (needs_compiling) {
+               time_sleep(0.1);
+       }
+       return loaded;
 }
 
 void OpenCLDevice::OpenCLProgram::report_error()
index caa0057d9972d2a1a600ee7f81a72ed1af13761f..281d9a250473829ee352029549e5541782811462 100644 (file)
@@ -121,52 +121,62 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __KERNEL_OPENCL__
 
-/* keep __KERNEL_ADV_SHADING__ in sync with opencl_kernel_use_advanced_shading! */
+#  if defined(__KERNEL_OPENCL_AMD__) || defined(__KERNEL_OPENCL_INTEL_CPU__)
+#    define __CL_USE_NATIVE__
+#  endif
 
-#  ifdef __KERNEL_OPENCL_NVIDIA__
-#    define __KERNEL_SHADING__
-#    define __KERNEL_ADV_SHADING__
-#    define __SUBSURFACE__
-#    define __PRINCIPLED__
-#    define __VOLUME__
-#    define __VOLUME_SCATTER__
-#    define __SHADOW_RECORD_ALL__
-#    define __CMJ__
-#    define __BRANCHED_PATH__
-#  endif  /* __KERNEL_OPENCL_NVIDIA__ */
+/* Preview kernel is used as a small kernel when the optimized kernel is still being compiled. */
+#  ifdef __KERNEL_OPENCL_PREVIEW__
+#    define __AO__
+#    define __PASSES__
+#    define __HAIR__
+#  else
+
+/* keep __KERNEL_ADV_SHADING__ in sync with opencl_kernel_use_advanced_shading! */
 
-#  ifdef __KERNEL_OPENCL_APPLE__
-#    define __KERNEL_SHADING__
-#    define __KERNEL_ADV_SHADING__
-#    define __PRINCIPLED__
-#    define __CMJ__
+#    ifdef __KERNEL_OPENCL_NVIDIA__
+#      define __KERNEL_SHADING__
+#      define __KERNEL_ADV_SHADING__
+#      define __SUBSURFACE__
+#      define __PRINCIPLED__
+#      define __VOLUME__
+#      define __VOLUME_SCATTER__
+#      define __SHADOW_RECORD_ALL__
+#      define __CMJ__
+#      define __BRANCHED_PATH__
+#    endif  /* __KERNEL_OPENCL_NVIDIA__ */
+
+#    ifdef __KERNEL_OPENCL_APPLE__
+#      define __KERNEL_SHADING__
+#      define __KERNEL_ADV_SHADING__
+#      define __PRINCIPLED__
+#      define __CMJ__
 /* TODO(sergey): Currently experimental section is ignored here,
  * this is because megakernel in device_opencl does not support
  * custom cflags depending on the scene features.
  */
-#  endif  /* __KERNEL_OPENCL_APPLE__ */
-
-#  ifdef __KERNEL_OPENCL_AMD__
-#    define __CL_USE_NATIVE__
-#    define __KERNEL_SHADING__
-#    define __KERNEL_ADV_SHADING__
-#    define __SUBSURFACE__
-#    define __PRINCIPLED__
-#    define __VOLUME__
-#    define __VOLUME_SCATTER__
-#    define __SHADOW_RECORD_ALL__
-#    define __CMJ__
-#    define __BRANCHED_PATH__
-#  endif  /* __KERNEL_OPENCL_AMD__ */
-
-#  ifdef __KERNEL_OPENCL_INTEL_CPU__
-#    define __CL_USE_NATIVE__
-#    define __KERNEL_SHADING__
-#    define __KERNEL_ADV_SHADING__
-#    define __PRINCIPLED__
-#    define __CMJ__
-#  endif  /* __KERNEL_OPENCL_INTEL_CPU__ */
-
+#    endif  /* __KERNEL_OPENCL_APPLE__ */
+
+#    ifdef __KERNEL_OPENCL_AMD__
+#      define __KERNEL_SHADING__
+#      define __KERNEL_ADV_SHADING__
+#      define __SUBSURFACE__
+#      define __PRINCIPLED__
+#      define __VOLUME__
+#      define __VOLUME_SCATTER__
+#      define __SHADOW_RECORD_ALL__
+#      define __CMJ__
+#      define __BRANCHED_PATH__
+#    endif  /* __KERNEL_OPENCL_AMD__ */
+
+#    ifdef __KERNEL_OPENCL_INTEL_CPU__
+#      define __KERNEL_SHADING__
+#      define __KERNEL_ADV_SHADING__
+#      define __PRINCIPLED__
+#      define __CMJ__
+#    endif  /* __KERNEL_OPENCL_INTEL_CPU__ */
+
+#  endif  /* KERNEL_OPENCL_PREVIEW__ */
 #endif  /* __KERNEL_OPENCL__ */
 
 /* Kernel features */
index 866832333ebaad67d81c09e07b82734caba94f5a..e9274fbf49e707aa7a5aa51bd50411c9166e9aa8 100644 (file)
@@ -212,6 +212,11 @@ void Session::run_gpu()
                /* advance to next tile */
                bool no_tiles = !tile_manager.next();
 
+               DeviceKernelStatus kernel_state = DEVICE_KERNEL_UNKNOWN;
+               if (no_tiles) {
+                       kernel_state = device->get_active_kernel_switch_state();
+               }
+
                if(params.background) {
                        /* if no work left and in background mode, we can stop immediately */
                        if(no_tiles) {
@@ -219,6 +224,16 @@ void Session::run_gpu()
                                break;
                        }
                }
+
+               /* Don't go in pause mode when image was rendered with preview kernels
+                * When feature kernels become available the session will be resetted. */
+               else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
+                       time_sleep(0.1);
+               }
+               else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) {
+                       reset_gpu(tile_manager.params, params.samples);
+               }
+
                else {
                        /* if in interactive mode, and we are either paused or done for now,
                         * wait for pause condition notify to wake up again */
@@ -540,6 +555,11 @@ void Session::run_cpu()
                bool no_tiles = !tile_manager.next();
                bool need_tonemap = false;
 
+               DeviceKernelStatus kernel_state = DEVICE_KERNEL_UNKNOWN;
+               if (no_tiles) {
+                       kernel_state = device->get_active_kernel_switch_state();
+               }
+
                if(params.background) {
                        /* if no work left and in background mode, we can stop immediately */
                        if(no_tiles) {
@@ -547,6 +567,16 @@ void Session::run_cpu()
                                break;
                        }
                }
+
+               /* Don't go in pause mode when preview kernels are used
+                * When feature kernels become available the session will be resetted. */
+               else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
+                       time_sleep(0.1);
+               }
+               else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) {
+                       reset_cpu(tile_manager.params, params.samples);
+               }
+
                else {
                        /* if in interactive mode, and we are either paused or done for now,
                         * wait for pause condition notify to wake up again */
@@ -699,7 +729,7 @@ DeviceRequestedFeatures Session::get_requested_device_features()
        return requested_features;
 }
 
-void Session::load_kernels(bool lock_scene)
+bool Session::load_kernels(bool lock_scene)
 {
        thread_scoped_lock scene_lock;
        if(lock_scene) {
@@ -722,7 +752,7 @@ void Session::load_kernels(bool lock_scene)
                        progress.set_error(message);
                        progress.set_status("Error", message);
                        progress.set_update();
-                       return;
+                       return false;
                }
 
                progress.add_skip_time(timer, false);
@@ -730,14 +760,13 @@ void Session::load_kernels(bool lock_scene)
 
                kernels_loaded = true;
                loaded_kernel_features = requested_features;
+               return true;
        }
+       return false;
 }
 
 void Session::run()
 {
-       /* load kernels */
-       load_kernels();
-
        if(params.use_profiling && (params.device.type == DEVICE_CPU)) {
                profiler.start();
        }
@@ -879,7 +908,7 @@ bool Session::update_scene()
 
        /* update scene */
        if(scene->need_update()) {
-               load_kernels(false);
+               bool new_kernels_needed = load_kernels(false);
 
                /* Update max_closures. */
                KernelIntegrator *kintegrator = &scene->dscene.data.integrator;
@@ -894,6 +923,21 @@ bool Session::update_scene()
                progress.set_status("Updating Scene");
                MEM_GUARDED_CALL(&progress, scene->device_update, device, progress);
 
+               DeviceKernelStatus kernel_switch_status = device->get_active_kernel_switch_state();
+               bool kernel_switch_needed = kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE ||
+                                           kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_INVALID;
+               if (kernel_switch_status == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
+                       progress.set_kernel_status("Compiling render kernels");
+               }
+               if (new_kernels_needed || kernel_switch_needed) {
+                       progress.set_kernel_status("Compiling render kernels");
+                       device->wait_for_availability(loaded_kernel_features);
+                       progress.set_kernel_status("");
+               }
+
+               if (kernel_switch_needed) {
+                       reset(tile_manager.params, params.samples);
+               }
                return true;
        }
        return false;
index cbdfc75a9052cfce06f9838f04c3c9b4925e8d28..404b7b7a94509dd8bcd012edc23659a69fdb051a 100644 (file)
@@ -162,7 +162,7 @@ public:
        void set_pause(bool pause);
 
        bool update_scene();
-       void load_kernels(bool lock_scene=true);
+       bool load_kernels(bool lock_scene=true);
 
        void device_free();
 
index 4ed9ebd60ffda756313f4ad3cfbdd79b3cedf8b9..06900d14cdc2da694929c5c712c15c0edba69eca 100644 (file)
@@ -46,6 +46,7 @@ public:
                substatus = "";
                sync_status = "";
                sync_substatus = "";
+               kernel_status = "";
                update_cb = function_null;
                cancel = false;
                cancel_message = "";
@@ -86,6 +87,7 @@ public:
                substatus = "";
                sync_status = "";
                sync_substatus = "";
+               kernel_status = "";
                cancel = false;
                cancel_message = "";
                error = false;
@@ -313,6 +315,25 @@ public:
                }
        }
 
+
+       /* kernel status */
+
+       void set_kernel_status(const string &kernel_status_)
+       {
+               {
+                       thread_scoped_lock lock(progress_mutex);
+                       kernel_status = kernel_status_;
+               }
+
+               set_update();
+       }
+
+       void get_kernel_status(string &kernel_status_)
+       {
+               thread_scoped_lock lock(progress_mutex);
+               kernel_status_ = kernel_status;
+       }
+
        /* callback */
 
        void set_update()
@@ -356,6 +377,8 @@ protected:
        string sync_status;
        string sync_substatus;
 
+       string kernel_status;
+
        volatile bool cancel;
        string cancel_message;
 
index 2a705c2432bc76361312734c0e206227902123c1..ce166af206a1315a188d2d497c5022a4115f032a 100644 (file)
@@ -148,6 +148,12 @@ bool TaskPool::canceled()
        return do_cancel;
 }
 
+bool TaskPool::finished()
+{
+       thread_scoped_lock num_lock(num_mutex);
+       return num == 0;
+}
+
 void TaskPool::num_decrease(int done)
 {
        num_mutex.lock();
index 15f0d341be70c2e0cf94e514f937440e26144538..a7e19d1ab7505344f4ef162e96f6b84a5b83625e 100644 (file)
@@ -93,6 +93,7 @@ public:
        void wait_work(Summary *stats = NULL);  /* work and wait until all tasks are done */
        void cancel();          /* cancel all tasks, keep worker threads running */
        void stop();            /* stop all worker threads */
+       bool finished();        /* check if all work has been completed */
 
        bool canceled();        /* for worker threads, test if canceled */