Cycles/OpenCL: Compile Kernels During Scene Update
authorJeroen Bakker <j.bakker@atmind.nl>
Wed, 13 Mar 2019 11:31:48 +0000 (12:31 +0100)
committerJeroen Bakker <j.bakker@atmind.nl>
Fri, 15 Mar 2019 15:18:21 +0000 (16:18 +0100)
The main goals of this change is faster starting when using foreground
rendering.

This patch will build kernels in parallel to the update process of
the scene. When these optimized kernels are not available (yet) an AO
kernel will be used.

These AO kernels are fast to compile (3-7 seconds) and can be
reused by all scenes. When the final kernels become available we
will switch to these kernels.

In background mode the AO kernels will not be used.
Some kernels are being used during Scene update (displace, background
light). When these kernels are being used the process can halt until
these become available.

Reviewed By: brecht, #cycles

Maniphest Tasks: T61752

Differential Revision: https://developer.blender.org/D4428

13 files changed:
intern/cycles/blender/blender_session.cpp
intern/cycles/blender/blender_session.h
intern/cycles/device/device.h
intern/cycles/device/device_multi.cpp
intern/cycles/device/opencl/opencl.h
intern/cycles/device/opencl/opencl_split.cpp
intern/cycles/device/opencl/opencl_util.cpp
intern/cycles/kernel/kernel_types.h
intern/cycles/render/session.cpp
intern/cycles/render/session.h
intern/cycles/util/util_progress.h
intern/cycles/util/util_task.cpp
intern/cycles/util/util_task.h

index ab08b9e146d128fe12924c11eb50eefc0de107fa..27541800804151f563b3eb580740559591aa4a52 100644 (file)
@@ -933,6 +933,11 @@ void BlenderSession::get_status(string& status, string& substatus)
        session->progress.get_status(status, substatus);
 }
 
+void BlenderSession::get_kernel_status(string& kernel_status)
+{
+       session->progress.get_kernel_status(kernel_status);
+}
+
 void BlenderSession::get_progress(float& progress, double& total_time, double& render_time)
 {
        session->progress.get_time(total_time, render_time);
@@ -951,7 +956,7 @@ void BlenderSession::update_bake_progress()
 
 void BlenderSession::update_status_progress()
 {
-       string timestatus, status, substatus;
+       string timestatus, status, substatus, kernel_status;
        string scene = "";
        float progress;
        double total_time, remaining_time = 0, render_time;
@@ -960,6 +965,7 @@ void BlenderSession::update_status_progress()
        float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f;
 
        get_status(status, substatus);
+       get_kernel_status(kernel_status);
        get_progress(progress, total_time, render_time);
 
        if(progress > 0)
@@ -989,6 +995,8 @@ void BlenderSession::update_status_progress()
                status = " | " + status;
        if(substatus.size() > 0)
                status += " | " + substatus;
+       if(kernel_status.size() > 0)
+               status += " | " + kernel_status;
 
        double current_time = time_dt();
        /* When rendering in a window, redraw the status at least once per second to keep the elapsed and remaining time up-to-date.
index 2aa3c77c37d9bc235e4fd190bd099cd38a2663cc..2c0a83cf6e766ce60356d5ba70a1d16fbc98d73c 100644 (file)
@@ -90,6 +90,7 @@ public:
        void tag_redraw();
        void tag_update();
        void get_status(string& status, string& substatus);
+       void get_kernel_status(string& kernel_status);
        void get_progress(float& progress, double& total_time, double& render_time);
        void test_cancel();
        void update_status_progress();
index 3bf978600d5a444175ac316db40c8cd9becbb582..6f3208e955f18e71ac7af5058b0fdaae9d7e65e1 100644 (file)
@@ -56,6 +56,14 @@ enum DeviceTypeMask {
        DEVICE_MASK_ALL = ~0
 };
 
+enum DeviceKernelStatus {
+       DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL = 0,
+       DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE,
+       DEVICE_KERNEL_USING_FEATURE_KERNEL,
+       DEVICE_KERNEL_FEATURE_KERNEL_INVALID,
+       DEVICE_KERNEL_UNKNOWN,
+};
+
 #define DEVICE_MASK(type) (DeviceTypeMask)(1 << type)
 
 class DeviceInfo {
@@ -321,6 +329,20 @@ public:
                const DeviceRequestedFeatures& /*requested_features*/)
        { return true; }
 
+       /* Wait for device to become available to upload data and receive tasks
+        * This method is used by the OpenCL device to load the
+        * optimized kernels or when not (yet) available load the
+        * generic kernels (only during foreground rendering) */
+       virtual bool wait_for_availability(
+               const DeviceRequestedFeatures& /*requested_features*/)
+       { return true; }
+       /* Check if there are 'better' kernels available to be used
+        * We can switch over to these kernels
+        * This method is used to determine if we can switch the preview kernels
+        * to regular kernels */
+       virtual DeviceKernelStatus get_active_kernel_switch_state()
+       { return DEVICE_KERNEL_USING_FEATURE_KERNEL; }
+
        /* tasks */
        virtual int get_split_task_count(DeviceTask& task) = 0;
        virtual void task_add(DeviceTask& task) = 0;
index 2fac4fa071bddaea02248f2acc24d44fbc5134b7..516b86654aa9a6e496bb59d6383d65626657fcc8 100644 (file)
@@ -120,6 +120,37 @@ public:
                return true;
        }
 
+       bool wait_for_availability(const DeviceRequestedFeatures& requested_features)
+       {
+               foreach(SubDevice& sub, devices)
+                       if(!sub.device->wait_for_availability(requested_features))
+                               return false;
+
+               return true;
+       }
+
+       DeviceKernelStatus get_active_kernel_switch_state()
+       {
+               DeviceKernelStatus result = DEVICE_KERNEL_USING_FEATURE_KERNEL;
+
+               foreach(SubDevice& sub, devices) {
+                       DeviceKernelStatus subresult = sub.device->get_active_kernel_switch_state();
+                       switch (subresult) {
+                               case DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL:
+                                       result = subresult;
+                                       break;
+
+                               case DEVICE_KERNEL_FEATURE_KERNEL_INVALID:
+                               case DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE:
+                                       return subresult;
+
+                               case DEVICE_KERNEL_USING_FEATURE_KERNEL:
+                                       break;
+                       }
+               }
+               return result;
+       }
+
        void mem_alloc(device_memory& mem)
        {
                device_ptr key = unique_key++;
index 2a4e07419ac5abee324275226512a173fa5c9f74..bb507be4c7218986bd608ec60b4ae245646d4a47 100644 (file)
@@ -261,16 +261,22 @@ class OpenCLDevice : public Device
 {
 public:
        DedicatedTaskPool task_pool;
+
+       /* Task pool for required kernels (base, AO kernels during foreground rendering) */
+       TaskPool load_required_kernel_task_pool;
+       /* Task pool for optional kernels (feature kernels during foreground rendering) */
+       TaskPool load_kernel_task_pool;
        cl_context cxContext;
        cl_command_queue cqCommandQueue;
        cl_platform_id cpPlatform;
        cl_device_id cdDevice;
        cl_int ciErr;
        int device_num;
+       bool use_preview_kernels;
 
        class OpenCLProgram {
        public:
-               OpenCLProgram() : loaded(false), program(NULL), device(NULL) {}
+               OpenCLProgram() : loaded(false), needs_compiling(true), program(NULL), device(NULL) {}
                OpenCLProgram(OpenCLDevice *device,
                              const string& program_name,
                              const string& kernel_name,
@@ -279,12 +285,24 @@ public:
                ~OpenCLProgram();
 
                void add_kernel(ustring name);
-               void load();
+
+               /* Try to load the program from device cache or disk */
+               bool load();
+               /* Compile the kernel (first separate, failback to local) */
+               void compile();
+               /* Create the OpenCL kernels after loading or compiling */
+               void create_kernels();
 
                bool is_loaded() const { return loaded; }
                const string& get_log() const { return log; }
                void report_error();
 
+               /* Wait until this kernel is available to be used 
+                * It will return true when the kernel is available.
+                * It will return false when the kernel is not available 
+                * or could not be loaded. */
+               bool wait_for_availability();
+
                cl_kernel operator()();
                cl_kernel operator()(ustring name);
 
@@ -308,6 +326,8 @@ public:
                void add_error(const string& msg);
 
                bool loaded;
+               bool needs_compiling;
+
                cl_program program;
                OpenCLDevice *device;
 
@@ -323,19 +343,32 @@ public:
                map<ustring, cl_kernel> kernels;
        };
 
-       DeviceSplitKernel *split_kernel;
-
-       OpenCLProgram program_split;
+       /* Container for all types of split programs. */
+       class OpenCLSplitPrograms {
+               public:
+                       OpenCLDevice *device;
+                       OpenCLProgram program_split;
+                       OpenCLProgram program_lamp_emission;
+                       OpenCLProgram program_do_volume;
+                       OpenCLProgram program_indirect_background;
+                       OpenCLProgram program_shader_eval;
+                       OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
+                       OpenCLProgram program_subsurface_scatter;
+                       OpenCLProgram program_direct_lighting;
+                       OpenCLProgram program_shadow_blocked_ao;
+                       OpenCLProgram program_shadow_blocked_dl;
+
+                       OpenCLSplitPrograms(OpenCLDevice *device);
+                       ~OpenCLSplitPrograms();
+
+                       /* Load the kernels and put the created kernels in the given `programs`
+                        * paramter. */
+                       void load_kernels(vector<OpenCLProgram*> &programs,
+                                         const DeviceRequestedFeatures& requested_features,
+                                         bool is_preview=false);
+       };
 
-       OpenCLProgram program_lamp_emission;
-       OpenCLProgram program_do_volume;
-       OpenCLProgram program_indirect_background;
-       OpenCLProgram program_shader_eval;
-       OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
-       OpenCLProgram program_subsurface_scatter;
-       OpenCLProgram program_direct_lighting;
-       OpenCLProgram program_shadow_blocked_ao;
-       OpenCLProgram program_shadow_blocked_dl;
+       DeviceSplitKernel *split_kernel;
 
        OpenCLProgram base_program;
        OpenCLProgram bake_program;
@@ -343,6 +376,9 @@ public:
        OpenCLProgram background_program;
        OpenCLProgram denoising_program;
 
+       OpenCLSplitPrograms kernel_programs;
+       OpenCLSplitPrograms preview_programs;
+
        typedef map<string, device_vector<uchar>*> ConstMemMap;
        typedef map<string, device_ptr> MemMap;
 
@@ -358,22 +394,30 @@ public:
        void opencl_error(const string& message);
        void opencl_assert_err(cl_int err, const char* where);
 
-       OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_);
+       OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
        ~OpenCLDevice();
 
        static void CL_CALLBACK context_notify_callback(const char *err_info,
                const void * /*private_info*/, size_t /*cb*/, void *user_data);
 
        bool opencl_version_check();
+       OpenCLSplitPrograms* get_split_programs();
 
        string device_md5_hash(string kernel_custom_build_options = "");
        bool load_kernels(const DeviceRequestedFeatures& requested_features);
+       void load_required_kernels(const DeviceRequestedFeatures& requested_features);
+       void load_preview_kernels();
+
+       bool wait_for_availability(const DeviceRequestedFeatures& requested_features);
+       DeviceKernelStatus get_active_kernel_switch_state();
 
        /* Get the name of the opencl program for the given kernel */
        const string get_opencl_program_name(const string& kernel_name);
        /* Get the program file name to compile (*.cl) for the given kernel */
        const string get_opencl_program_filename(const string& kernel_name);
-       string get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name);
+       string get_build_options(const DeviceRequestedFeatures& requested_features,
+                                const string& opencl_program_name,
+                                bool preview_kernel=false);
        /* Enable the default features to reduce recompilation events */
        void enable_default_features(DeviceRequestedFeatures& features);
 
index 57612098b34548cdcd99de23a94852c2578f5e8b..555707cecd552a797f0177918a9987e3e24a5290 100644 (file)
@@ -104,7 +104,7 @@ void OpenCLDevice::enable_default_features(DeviceRequestedFeatures& features)
        }
 }
 
-string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name)
+string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name, bool preview_kernel)
 {
        /* first check for non-split kernel programs */
        if (opencl_program_name == "base" || opencl_program_name == "denoising") {
@@ -181,7 +181,13 @@ string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_
        enable_default_features(nofeatures);
 
        /* Add program specific optimized compile directives */
-       if (opencl_program_name == "split_do_volume" && !requested_features.use_volume) {
+       if (preview_kernel) {
+               DeviceRequestedFeatures preview_features;
+               preview_features.use_hair = true;
+               build_options += "-D__KERNEL_OPENCL_PREVIEW__ ";
+               build_options += preview_features.get_build_options();
+       }
+       else if (opencl_program_name == "split_do_volume" && !requested_features.use_volume) {
                build_options += nofeatures.get_build_options();
        }
        else {
@@ -208,6 +214,77 @@ string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_
        return build_options;
 }
 
+OpenCLDevice::OpenCLSplitPrograms::OpenCLSplitPrograms(OpenCLDevice *device_)
+{
+       device = device_;
+}
+
+OpenCLDevice::OpenCLSplitPrograms::~OpenCLSplitPrograms()
+{
+       program_split.release();
+       program_lamp_emission.release();
+       program_do_volume.release();
+       program_indirect_background.release();
+       program_shader_eval.release();
+       program_holdout_emission_blurring_pathtermination_ao.release();
+       program_subsurface_scatter.release();
+       program_direct_lighting.release();
+       program_shadow_blocked_ao.release();
+       program_shadow_blocked_dl.release();
+}
+
+void OpenCLDevice::OpenCLSplitPrograms::load_kernels(vector<OpenCLProgram*> &programs, const DeviceRequestedFeatures& requested_features, bool is_preview)
+{
+       if (!requested_features.use_baking) {
+#define ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name));
+#define ADD_SPLIT_KERNEL_PROGRAM(kernel_name) \
+               const string program_name_##kernel_name = "split_"#kernel_name; \
+               program_##kernel_name = \
+                       OpenCLDevice::OpenCLProgram(device, \
+                                                   program_name_##kernel_name, \
+                                                   "kernel_"#kernel_name".cl", \
+                                                   device->get_build_options(requested_features, program_name_##kernel_name, is_preview)); \
+               program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \
+               programs.push_back(&program_##kernel_name);
+
+               /* Ordered with most complex kernels first, to reduce overall compile time. */
+               ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter);
+               if (requested_features.use_volume || is_preview) {
+                       ADD_SPLIT_KERNEL_PROGRAM(do_volume);
+               }
+               ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl);
+               ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao);
+               ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
+               ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
+               ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
+               ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
+               ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
+
+               /* Quick kernels bundled in a single program to reduce overhead of starting
+                       * Blender processes. */
+               program_split = OpenCLDevice::OpenCLProgram(device,
+                                                           "split_bundle" ,
+                                                           "kernel_split_bundle.cl",
+                                                           device->get_build_options(requested_features, "split_bundle", is_preview));
+
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(data_init);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(state_buffer_size);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(path_init);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(scene_intersect);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(queue_enqueue);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_setup);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_sort);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(enqueue_inactive);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface);
+               ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update);
+               programs.push_back(&program_split);
+
+#undef ADD_SPLIT_KERNEL_PROGRAM
+#undef ADD_SPLIT_KERNEL_BUNDLE_PROGRAM
+       }
+}
+
 namespace {
 
 /* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to
@@ -319,7 +396,9 @@ public:
                        OpenCLDevice::OpenCLProgram(device,
                                                    program_name,
                                                    device->get_opencl_program_filename(kernel_name),
-                                                   device->get_build_options(requested_features, program_name));
+                                                   device->get_build_options(requested_features, 
+                                                                             program_name, 
+                                                                             device->use_preview_kernels));
 
                kernel->program.add_kernel(ustring("path_trace_" + kernel_name));
                kernel->program.load();
@@ -339,7 +418,8 @@ public:
                size_buffer.zero_to_device();
 
                uint threads = num_threads;
-               cl_kernel kernel_state_buffer_size = device->program_split(ustring("path_trace_state_buffer_size"));
+               OpenCLDevice::OpenCLSplitPrograms *programs = device->get_split_programs();
+               cl_kernel kernel_state_buffer_size = programs->program_split(ustring("path_trace_state_buffer_size"));
                device->kernel_set_args(kernel_state_buffer_size, 0, kg, data, threads, size_buffer);
 
                size_t global_size = 64;
@@ -389,7 +469,8 @@ public:
                cl_int start_sample = rtile.start_sample;
                cl_int end_sample = rtile.start_sample + rtile.num_samples;
 
-               cl_kernel kernel_data_init = device->program_split(ustring("path_trace_data_init"));
+               OpenCLDevice::OpenCLSplitPrograms *programs = device->get_split_programs();
+               cl_kernel kernel_data_init = programs->program_split(ustring("path_trace_data_init"));
 
                cl_uint start_arg_index =
                        device->kernel_set_args(kernel_data_init,
@@ -522,6 +603,8 @@ void OpenCLDevice::opencl_assert_err(cl_int err, const char* where)
 
 OpenCLDevice::OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background)
 : Device(info, stats, profiler, background),
+  kernel_programs(this),
+  preview_programs(this),
   memory_manager(this),
   texture_info(this, "__texture_info", MEM_TEXTURE)
 {
@@ -532,6 +615,7 @@ OpenCLDevice::OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, b
        null_mem = 0;
        device_initialized = false;
        textures_need_update = true;
+       use_preview_kernels = !background;
 
        vector<OpenCLPlatformDevice> usable_devices;
        OpenCLInfo::get_usable_devices(&usable_devices);
@@ -595,11 +679,16 @@ OpenCLDevice::OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, b
        device_initialized = true;
 
        split_kernel = new OpenCLSplitKernel(this);
+       if (!background) {
+               load_preview_kernels();
+       }
 }
 
 OpenCLDevice::~OpenCLDevice()
 {
        task_pool.stop();
+       load_required_kernel_task_pool.stop();
+       load_kernel_task_pool.stop();
 
        memory_manager.free();
 
@@ -615,7 +704,7 @@ OpenCLDevice::~OpenCLDevice()
        bake_program.release();
        displace_program.release();
        background_program.release();
-       program_split.release();
+       denoising_program.release();
 
        if(cqCommandQueue)
                clReleaseCommandQueue(cqCommandQueue);
@@ -681,8 +770,51 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures& requested_feature
        /* Verify we have right opencl version. */
        if(!opencl_version_check())
                return false;
+       
+       load_required_kernels(requested_features);
+
+       vector<OpenCLProgram*> programs;
+       kernel_programs.load_kernels(programs, requested_features, false);
+
+       if (!requested_features.use_baking && requested_features.use_denoising) {
+               denoising_program = OpenCLProgram(this, "denoising", "filter.cl", get_build_options(requested_features, "denoising"));
+               denoising_program.add_kernel(ustring("filter_divide_shadow"));
+               denoising_program.add_kernel(ustring("filter_get_feature"));
+               denoising_program.add_kernel(ustring("filter_write_feature"));
+               denoising_program.add_kernel(ustring("filter_detect_outliers"));
+               denoising_program.add_kernel(ustring("filter_combine_halves"));
+               denoising_program.add_kernel(ustring("filter_construct_transform"));
+               denoising_program.add_kernel(ustring("filter_nlm_calc_difference"));
+               denoising_program.add_kernel(ustring("filter_nlm_blur"));
+               denoising_program.add_kernel(ustring("filter_nlm_calc_weight"));
+               denoising_program.add_kernel(ustring("filter_nlm_update_output"));
+               denoising_program.add_kernel(ustring("filter_nlm_normalize"));
+               denoising_program.add_kernel(ustring("filter_nlm_construct_gramian"));
+               denoising_program.add_kernel(ustring("filter_finalize"));
+               programs.push_back(&denoising_program);
+       }
+
+       load_required_kernel_task_pool.wait_work();
+
+       /* Parallel compilation of Cycles kernels, this launches multiple
+        * processes to workaround OpenCL frameworks serializing the calls
+        * internally within a single process. */
+       foreach(OpenCLProgram *program, programs) {
+               if (!program->load()) {
+                       load_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program));
+               }
+       }
+       return true;
+}
 
+void OpenCLDevice::load_required_kernels(const DeviceRequestedFeatures& requested_features)
+{
        vector<OpenCLProgram*> programs;
+       base_program = OpenCLProgram(this, "base", "kernel_base.cl", get_build_options(requested_features, "base"));
+       base_program.add_kernel(ustring("convert_to_byte"));
+       base_program.add_kernel(ustring("convert_to_half_float"));
+       base_program.add_kernel(ustring("zero_buffer"));
+       programs.push_back(&base_program);
 
        if (requested_features.use_true_displacement) {
                displace_program = OpenCLProgram(this, "displace", "kernel_displace.cl", get_build_options(requested_features, "displace"));
@@ -696,101 +828,89 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures& requested_feature
                programs.push_back(&background_program);
        }
 
-#define ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name));
-#define ADD_SPLIT_KERNEL_PROGRAM(kernel_name) \
-               const string program_name_##kernel_name = "split_"#kernel_name; \
-               program_##kernel_name = \
-                       OpenCLDevice::OpenCLProgram(this, \
-                                                   program_name_##kernel_name, \
-                                                   "kernel_"#kernel_name".cl", \
-                                                   get_build_options(requested_features, program_name_##kernel_name)); \
-               program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \
-               programs.push_back(&program_##kernel_name);
-
-       /* Ordered with most complex kernels first, to reduce overall compile time. */
-       ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter);
-       if (requested_features.use_volume) {
-               ADD_SPLIT_KERNEL_PROGRAM(do_volume);
-       }
-       ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl);
-       ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao);
-       ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
-       ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
-       ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
-       ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
-       ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
-
-       /* Quick kernels bundled in a single program to reduce overhead of starting
-               * Blender processes. */
-       program_split = OpenCLDevice::OpenCLProgram(this,
-                                                                                               "split_bundle" ,
-                                                                                               "kernel_split_bundle.cl",
-                                                                                               get_build_options(requested_features, "split_bundle"));
-
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(data_init);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(state_buffer_size);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(path_init);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(scene_intersect);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(queue_enqueue);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_setup);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_sort);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(enqueue_inactive);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface);
-       ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update);
-       programs.push_back(&program_split);
-
-#undef ADD_SPLIT_KERNEL_PROGRAM
-#undef ADD_SPLIT_KERNEL_BUNDLE_PROGRAM
-
-       base_program = OpenCLProgram(this, "base", "kernel_base.cl", get_build_options(requested_features, "base"));
-       base_program.add_kernel(ustring("convert_to_byte"));
-       base_program.add_kernel(ustring("convert_to_half_float"));
-       base_program.add_kernel(ustring("zero_buffer"));
-       programs.push_back(&base_program);
-
        if (requested_features.use_baking) {
                bake_program = OpenCLProgram(this, "bake", "kernel_bake.cl", get_build_options(requested_features, "bake"));
                bake_program.add_kernel(ustring("bake"));
                programs.push_back(&bake_program);
        }
 
-       denoising_program = OpenCLProgram(this, "denoising", "filter.cl", get_build_options(requested_features, "denoising"));
-       denoising_program.add_kernel(ustring("filter_divide_shadow"));
-       denoising_program.add_kernel(ustring("filter_get_feature"));
-       denoising_program.add_kernel(ustring("filter_write_feature"));
-       denoising_program.add_kernel(ustring("filter_detect_outliers"));
-       denoising_program.add_kernel(ustring("filter_combine_halves"));
-       denoising_program.add_kernel(ustring("filter_construct_transform"));
-       denoising_program.add_kernel(ustring("filter_nlm_calc_difference"));
-       denoising_program.add_kernel(ustring("filter_nlm_blur"));
-       denoising_program.add_kernel(ustring("filter_nlm_calc_weight"));
-       denoising_program.add_kernel(ustring("filter_nlm_update_output"));
-       denoising_program.add_kernel(ustring("filter_nlm_normalize"));
-       denoising_program.add_kernel(ustring("filter_nlm_construct_gramian"));
-       denoising_program.add_kernel(ustring("filter_finalize"));
-       programs.push_back(&denoising_program);
-
-       /* Parallel compilation of Cycles kernels, this launches multiple
-        * processes to workaround OpenCL frameworks serializing the calls
-        * internally within a single process. */
-       TaskPool task_pool;
        foreach(OpenCLProgram *program, programs) {
-               task_pool.push(function_bind(&OpenCLProgram::load, program));
+               if (!program->load()) {
+                       load_required_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program));
+               }
        }
-       task_pool.wait_work();
+}
+
+void OpenCLDevice::load_preview_kernels()
+{
+       DeviceRequestedFeatures no_features;
+       vector<OpenCLProgram*> programs;
+       preview_programs.load_kernels(programs, no_features, true);
 
        foreach(OpenCLProgram *program, programs) {
-               VLOG(2) << program->get_log();
-               if(!program->is_loaded()) {
-                       program->report_error();
-                       return false;
+               if (!program->load()) {
+                       load_required_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program));
                }
        }
+}
 
+bool OpenCLDevice::wait_for_availability(const DeviceRequestedFeatures& requested_features)
+{
+       if (background) {
+               load_kernel_task_pool.wait_work();
+               use_preview_kernels = false;
+       }
+       else {
+               /* We use a device setting to determine to load preview kernels or not
+                * Better to check on device level than per kernel as mixing preview and
+                * non-preview kernels does not work due to different data types */
+               if (use_preview_kernels) {
+                       use_preview_kernels = !load_kernel_task_pool.finished();
+               }
+       }
        return split_kernel->load_kernels(requested_features);
 }
 
+OpenCLDevice::OpenCLSplitPrograms* OpenCLDevice::get_split_programs()
+{
+       return use_preview_kernels?&preview_programs:&kernel_programs;
+}
+
+DeviceKernelStatus OpenCLDevice::get_active_kernel_switch_state()
+{
+       /* Do not switch kernels for background renderings
+        * We do foreground rendering but use the preview kernels
+        * Check for the optimized kernels 
+        *
+        * This works also the other way around, where we are using
+        * optimized kernels but new ones are being compiled due
+        * to other features that are needed */
+       if (background) {
+               /* The if-statements below would find the same result,
+                * But as the `finished` method uses a mutex we added
+                * this as an early exit */
+               return DEVICE_KERNEL_USING_FEATURE_KERNEL;
+       }
+       
+       bool other_kernels_finished = load_kernel_task_pool.finished();
+       if (use_preview_kernels) {
+               if (other_kernels_finished) {
+                       return DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE;
+               }
+               else {
+                       return DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL;
+               }
+       }
+       else {
+               if (other_kernels_finished) {
+                       return DEVICE_KERNEL_USING_FEATURE_KERNEL;
+               }
+               else {
+                       return DEVICE_KERNEL_FEATURE_KERNEL_INVALID;
+               }
+       }
+}
+
 void OpenCLDevice::mem_alloc(device_memory& mem)
 {
        if(mem.name) {
@@ -892,6 +1012,7 @@ void OpenCLDevice::mem_copy_from(device_memory& mem, int y, int w, int h, int el
 
 void OpenCLDevice::mem_zero_kernel(device_ptr mem, size_t size)
 {
+       base_program.wait_for_availability();
        cl_kernel ckZeroBuffer = base_program(ustring("zero_buffer"));
 
        size_t global_size[] = {1024, 1024};
@@ -1719,17 +1840,15 @@ void OpenCLDevice::shader(DeviceTask& task)
        cl_int d_shader_w = task.shader_w;
        cl_int d_offset = task.offset;
 
-       cl_kernel kernel;
-
+       OpenCLDevice::OpenCLProgram *program = &background_program;
        if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
-               kernel = bake_program(ustring("bake"));
+               program = &bake_program;
        }
        else if(task.shader_eval_type == SHADER_EVAL_DISPLACE) {
-               kernel = displace_program(ustring("displace"));
-       }
-       else {
-               kernel = background_program(ustring("background"));
+               program = &displace_program;
        }
+       program->wait_for_availability();
+       cl_kernel kernel = (*program)();
 
        cl_uint start_arg_index =
                kernel_set_args(kernel,
index ef0deaeff62b9af6e67f9e12897e002882dcba58..920c8dc4e6ae58fe267112e68f4ceb0bb9c2abed 100644 (file)
@@ -243,6 +243,18 @@ string OpenCLCache::get_kernel_md5()
        return self.kernel_md5;
 }
 
+static string get_program_source(const string& kernel_file)
+{
+       string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
+       /* We compile kernels consisting of many files. unfortunately OpenCL
+        * kernel caches do not seem to recognize changes in included files.
+        * so we force recompile on changes by adding the md5 hash of all files.
+        */
+       source = path_source_replace_includes(source, path_get("source"));
+       source += "\n// " + util_md5_string(source) + "\n";
+       return source;
+}
+
 OpenCLDevice::OpenCLProgram::OpenCLProgram(OpenCLDevice *device,
                                                const string& program_name,
                                                const string& kernel_file,
@@ -255,6 +267,7 @@ OpenCLDevice::OpenCLProgram::OpenCLProgram(OpenCLDevice *device,
    use_stdout(use_stdout)
 {
        loaded = false;
+       needs_compiling = true;
        program = NULL;
 }
 
@@ -343,13 +356,7 @@ bool OpenCLDevice::OpenCLProgram::build_kernel(const string *debug_src)
 
 bool OpenCLDevice::OpenCLProgram::compile_kernel(const string *debug_src)
 {
-       string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
-       /* We compile kernels consisting of many files. unfortunately OpenCL
-        * kernel caches do not seem to recognize changes in included files.
-        * so we force recompile on changes by adding the md5 hash of all files.
-        */
-       source = path_source_replace_includes(source, path_get("source"));
-       source += "\n// " + util_md5_string(source) + "\n";
+       string source = get_program_source(kernel_file);
 
        if(debug_src) {
                path_write_text(*debug_src, source);
@@ -473,8 +480,7 @@ bool device_opencl_compile_kernel(const vector<string>& parameters)
                return false;
        }
 
-       string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\" // " + path_files_md5_hash(path_get("kernel")) + "\n";
-       source = path_source_replace_includes(source, path_get("source"));
+       string source = get_program_source(kernel_file);
        size_t source_len = source.size();
        const char *source_str = source.c_str();
        cl_program program = clCreateProgramWithSource(context, 1, &source_str, &source_len, &err);
@@ -548,11 +554,54 @@ bool OpenCLDevice::OpenCLProgram::save_binary(const string& clbin)
        return path_write_binary(clbin, binary);
 }
 
-void OpenCLDevice::OpenCLProgram::load()
+bool OpenCLDevice::OpenCLProgram::load()
 {
-       assert(device);
-
        loaded = false;
+       string device_md5 = device->device_md5_hash(kernel_build_options);
+
+       /* Try to use cached kernel. */
+       thread_scoped_lock cache_locker;
+       ustring cache_key(program_name + device_md5);
+       program = device->load_cached_kernel(cache_key,
+                                            cache_locker);
+       if (!program) {
+               add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
+
+               /* need to create source to get md5 */
+               string source = get_program_source(kernel_file);
+
+               string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source);
+               basename = path_cache_get(path_join("kernels", basename));
+               string clbin = basename + ".clbin";
+
+               /* If binary kernel exists already, try use it. */
+               if(path_exists(clbin) && load_binary(clbin)) {
+                       /* Kernel loaded from binary, nothing to do. */
+                       add_log(string("Loaded program from ") + clbin + ".", true);
+
+                       /* Cache the program. */
+                       device->store_cached_kernel(program,
+                                                   cache_key,
+                                                   cache_locker);
+               }
+               else {
+                       add_log(string("OpenCL program ") + program_name + " not found on disk.", true);
+                       cache_locker.unlock();
+               }
+       }
+
+       if (program) {
+               create_kernels();
+               loaded = true;
+               needs_compiling = false;
+       }
+
+       return loaded;
+}
+
+void OpenCLDevice::OpenCLProgram::compile()
+{
+       assert(device);
 
        string device_md5 = device->device_md5_hash(kernel_build_options);
 
@@ -562,12 +611,13 @@ void OpenCLDevice::OpenCLProgram::load()
        program = device->load_cached_kernel(cache_key,
                                             cache_locker);
 
-       if(!program) {
+       if (!program)
+       {
+
                add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
 
                /* need to create source to get md5 */
-               string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
-               source = path_source_replace_includes(source, path_get("source"));
+               string source = get_program_source(kernel_file);
 
                string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source);
                basename = path_cache_get(path_join("kernels", basename));
@@ -582,49 +632,38 @@ void OpenCLDevice::OpenCLProgram::load()
                }
 
                /* If binary kernel exists already, try use it. */
-               if(path_exists(clbin) && load_binary(clbin)) {
-                       /* Kernel loaded from binary, nothing to do. */
-                       add_log(string("Loaded program from ") + clbin + ".", true);
+               if(compile_separate(clbin)) {
+                       add_log(string("Built and loaded program from ") + clbin + ".", true);
+                       loaded = true;
                }
                else {
-                       add_log(string("Kernel file ") + clbin + " either doesn't exist or failed to be loaded by driver.", true);
-                       if(!path_exists(clbin)) {
-                               if(compile_separate(clbin)) {
-                                       add_log(string("Built and loaded program from ") + clbin + ".", true);
-                                       loaded = true;
-                               }
-                               else {
-                                       add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true);
-
-                                       /* If does not exist or loading binary failed, compile kernel. */
-                                       if(!compile_kernel(debug_src)) {
-                                               return;
-                                       }
-
-                                       /* Save binary for reuse. */
-                                       if(!save_binary(clbin)) {
-                                               add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
-                                       }
-                               }
+                       add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true);
+
+                       /* If does not exist or loading binary failed, compile kernel. */
+                       if(!compile_kernel(debug_src)) {
+                               needs_compiling = false;
+                               return;
                        }
-                       else {
-                               add_log(string("Kernel file ") + clbin + "exists, but failed to be loaded by driver.", true);
-                               /* Fall back to compiling. */
-                               if(!compile_kernel(debug_src)) {
-                                       return;
-                               }
+
+                       /* Save binary for reuse. */
+                       if(!save_binary(clbin)) {
+                               add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
                        }
                }
 
                /* Cache the program. */
                device->store_cached_kernel(program,
-                                           cache_key,
-                                           cache_locker);
-       }
-       else {
-               add_log(string("Found cached OpenCL program ") + program_name + ".", true);
+                                                                       cache_key,
+                                                                       cache_locker);
        }
 
+       create_kernels();
+       needs_compiling = false;
+       loaded = true;
+}
+
+void OpenCLDevice::OpenCLProgram::create_kernels()
+{
        for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
                assert(kernel->second == NULL);
                cl_int ciErr;
@@ -635,8 +674,15 @@ void OpenCLDevice::OpenCLProgram::load()
                        return;
                }
        }
+}
 
-       loaded = true;
+bool OpenCLDevice::OpenCLProgram::wait_for_availability()
+{
+       add_log(string("Waiting for availability of ") + program_name + ".", true);
+       while (needs_compiling) {
+               time_sleep(0.1);
+       }
+       return loaded;
 }
 
 void OpenCLDevice::OpenCLProgram::report_error()
index caa0057d9972d2a1a600ee7f81a72ed1af13761f..281d9a250473829ee352029549e5541782811462 100644 (file)
@@ -121,52 +121,62 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __KERNEL_OPENCL__
 
-/* keep __KERNEL_ADV_SHADING__ in sync with opencl_kernel_use_advanced_shading! */
+#  if defined(__KERNEL_OPENCL_AMD__) || defined(__KERNEL_OPENCL_INTEL_CPU__)
+#    define __CL_USE_NATIVE__
+#  endif
 
-#  ifdef __KERNEL_OPENCL_NVIDIA__
-#    define __KERNEL_SHADING__
-#    define __KERNEL_ADV_SHADING__
-#    define __SUBSURFACE__
-#    define __PRINCIPLED__
-#    define __VOLUME__
-#    define __VOLUME_SCATTER__
-#    define __SHADOW_RECORD_ALL__
-#    define __CMJ__
-#    define __BRANCHED_PATH__
-#  endif  /* __KERNEL_OPENCL_NVIDIA__ */
+/* Preview kernel is used as a small kernel when the optimized kernel is still being compiled. */
+#  ifdef __KERNEL_OPENCL_PREVIEW__
+#    define __AO__
+#    define __PASSES__
+#    define __HAIR__
+#  else
+
+/* keep __KERNEL_ADV_SHADING__ in sync with opencl_kernel_use_advanced_shading! */
 
-#  ifdef __KERNEL_OPENCL_APPLE__
-#    define __KERNEL_SHADING__
-#    define __KERNEL_ADV_SHADING__
-#    define __PRINCIPLED__
-#    define __CMJ__
+#    ifdef __KERNEL_OPENCL_NVIDIA__
+#      define __KERNEL_SHADING__
+#      define __KERNEL_ADV_SHADING__
+#      define __SUBSURFACE__
+#      define __PRINCIPLED__
+#      define __VOLUME__
+#      define __VOLUME_SCATTER__
+#      define __SHADOW_RECORD_ALL__
+#      define __CMJ__
+#      define __BRANCHED_PATH__
+#    endif  /* __KERNEL_OPENCL_NVIDIA__ */
+
+#    ifdef __KERNEL_OPENCL_APPLE__
+#      define __KERNEL_SHADING__
+#      define __KERNEL_ADV_SHADING__
+#      define __PRINCIPLED__
+#      define __CMJ__
 /* TODO(sergey): Currently experimental section is ignored here,
  * this is because megakernel in device_opencl does not support
  * custom cflags depending on the scene features.
  */
-#  endif  /* __KERNEL_OPENCL_APPLE__ */
-
-#  ifdef __KERNEL_OPENCL_AMD__
-#    define __CL_USE_NATIVE__
-#    define __KERNEL_SHADING__
-#    define __KERNEL_ADV_SHADING__
-#    define __SUBSURFACE__
-#    define __PRINCIPLED__
-#    define __VOLUME__
-#    define __VOLUME_SCATTER__
-#    define __SHADOW_RECORD_ALL__
-#    define __CMJ__
-#    define __BRANCHED_PATH__
-#  endif  /* __KERNEL_OPENCL_AMD__ */
-
-#  ifdef __KERNEL_OPENCL_INTEL_CPU__
-#    define __CL_USE_NATIVE__
-#    define __KERNEL_SHADING__
-#    define __KERNEL_ADV_SHADING__
-#    define __PRINCIPLED__
-#    define __CMJ__
-#  endif  /* __KERNEL_OPENCL_INTEL_CPU__ */
-
+#    endif  /* __KERNEL_OPENCL_APPLE__ */
+
+#    ifdef __KERNEL_OPENCL_AMD__
+#      define __KERNEL_SHADING__
+#      define __KERNEL_ADV_SHADING__
+#      define __SUBSURFACE__
+#      define __PRINCIPLED__
+#      define __VOLUME__
+#      define __VOLUME_SCATTER__
+#      define __SHADOW_RECORD_ALL__
+#      define __CMJ__
+#      define __BRANCHED_PATH__
+#    endif  /* __KERNEL_OPENCL_AMD__ */
+
+#    ifdef __KERNEL_OPENCL_INTEL_CPU__
+#      define __KERNEL_SHADING__
+#      define __KERNEL_ADV_SHADING__
+#      define __PRINCIPLED__
+#      define __CMJ__
+#    endif  /* __KERNEL_OPENCL_INTEL_CPU__ */
+
+#  endif  /* KERNEL_OPENCL_PREVIEW__ */
 #endif  /* __KERNEL_OPENCL__ */
 
 /* Kernel features */
index dea50d52cfaa94bcf012b9444f4400bf138fdf28..d4b1a5e843bb44b05664ee73d7494f7044f7e775 100644 (file)
@@ -212,6 +212,11 @@ void Session::run_gpu()
                /* advance to next tile */
                bool no_tiles = !tile_manager.next();
 
+               DeviceKernelStatus kernel_state = DEVICE_KERNEL_UNKNOWN;
+               if (no_tiles) {
+                       kernel_state = device->get_active_kernel_switch_state();
+               }
+
                if(params.background) {
                        /* if no work left and in background mode, we can stop immediately */
                        if(no_tiles) {
@@ -219,6 +224,16 @@ void Session::run_gpu()
                                break;
                        }
                }
+
+               /* Don't go in pause mode when image was rendered with preview kernels
+                * When feature kernels become available the session will be resetted. */
+               else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
+                       time_sleep(0.1);
+               }
+               else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) {
+                       reset_gpu(tile_manager.params, params.samples);
+               }
+
                else {
                        /* if in interactive mode, and we are either paused or done for now,
                         * wait for pause condition notify to wake up again */
@@ -540,6 +555,11 @@ void Session::run_cpu()
                bool no_tiles = !tile_manager.next();
                bool need_tonemap = false;
 
+               DeviceKernelStatus kernel_state = DEVICE_KERNEL_UNKNOWN;
+               if (no_tiles) {
+                       kernel_state = device->get_active_kernel_switch_state();
+               }
+
                if(params.background) {
                        /* if no work left and in background mode, we can stop immediately */
                        if(no_tiles) {
@@ -547,6 +567,16 @@ void Session::run_cpu()
                                break;
                        }
                }
+
+               /* Don't go in pause mode when preview kernels are used
+                * When feature kernels become available the session will be resetted. */
+               else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
+                       time_sleep(0.1);
+               }
+               else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) {
+                       reset_cpu(tile_manager.params, params.samples);
+               }
+
                else {
                        /* if in interactive mode, and we are either paused or done for now,
                         * wait for pause condition notify to wake up again */
@@ -699,7 +729,7 @@ DeviceRequestedFeatures Session::get_requested_device_features()
        return requested_features;
 }
 
-void Session::load_kernels(bool lock_scene)
+bool Session::load_kernels(bool lock_scene)
 {
        thread_scoped_lock scene_lock;
        if(lock_scene) {
@@ -722,7 +752,7 @@ void Session::load_kernels(bool lock_scene)
                        progress.set_error(message);
                        progress.set_status("Error", message);
                        progress.set_update();
-                       return;
+                       return false;
                }
 
                progress.add_skip_time(timer, false);
@@ -730,14 +760,13 @@ void Session::load_kernels(bool lock_scene)
 
                kernels_loaded = true;
                loaded_kernel_features = requested_features;
+               return true;
        }
+       return false;
 }
 
 void Session::run()
 {
-       /* load kernels */
-       load_kernels();
-
        if(params.use_profiling && (params.device.type == DEVICE_CPU)) {
                profiler.start();
        }
@@ -879,7 +908,7 @@ bool Session::update_scene()
 
        /* update scene */
        if(scene->need_update()) {
-               load_kernels(false);
+               bool new_kernels_needed = load_kernels(false);
 
                /* Update max_closures. */
                KernelIntegrator *kintegrator = &scene->dscene.data.integrator;
@@ -894,6 +923,21 @@ bool Session::update_scene()
                progress.set_status("Updating Scene");
                MEM_GUARDED_CALL(&progress, scene->device_update, device, progress);
 
+               DeviceKernelStatus kernel_switch_status = device->get_active_kernel_switch_state();
+               bool kernel_switch_needed = kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE ||
+                                           kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_INVALID;
+               if (kernel_switch_status == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
+                       progress.set_kernel_status("Compiling render kernels");
+               }
+               if (new_kernels_needed || kernel_switch_needed) {
+                       progress.set_kernel_status("Compiling render kernels");
+                       device->wait_for_availability(loaded_kernel_features);
+                       progress.set_kernel_status("");
+               }
+
+               if (kernel_switch_needed) {
+                       reset(tile_manager.params, params.samples);
+               }
                return true;
        }
        return false;
index cbdfc75a9052cfce06f9838f04c3c9b4925e8d28..404b7b7a94509dd8bcd012edc23659a69fdb051a 100644 (file)
@@ -162,7 +162,7 @@ public:
        void set_pause(bool pause);
 
        bool update_scene();
-       void load_kernels(bool lock_scene=true);
+       bool load_kernels(bool lock_scene=true);
 
        void device_free();
 
index 4ed9ebd60ffda756313f4ad3cfbdd79b3cedf8b9..06900d14cdc2da694929c5c712c15c0edba69eca 100644 (file)
@@ -46,6 +46,7 @@ public:
                substatus = "";
                sync_status = "";
                sync_substatus = "";
+               kernel_status = "";
                update_cb = function_null;
                cancel = false;
                cancel_message = "";
@@ -86,6 +87,7 @@ public:
                substatus = "";
                sync_status = "";
                sync_substatus = "";
+               kernel_status = "";
                cancel = false;
                cancel_message = "";
                error = false;
@@ -313,6 +315,25 @@ public:
                }
        }
 
+
+       /* kernel status */
+
+       void set_kernel_status(const string &kernel_status_)
+       {
+               {
+                       thread_scoped_lock lock(progress_mutex);
+                       kernel_status = kernel_status_;
+               }
+
+               set_update();
+       }
+
+       void get_kernel_status(string &kernel_status_)
+       {
+               thread_scoped_lock lock(progress_mutex);
+               kernel_status_ = kernel_status;
+       }
+
        /* callback */
 
        void set_update()
@@ -356,6 +377,8 @@ protected:
        string sync_status;
        string sync_substatus;
 
+       string kernel_status;
+
        volatile bool cancel;
        string cancel_message;
 
index 2a705c2432bc76361312734c0e206227902123c1..ce166af206a1315a188d2d497c5022a4115f032a 100644 (file)
@@ -148,6 +148,12 @@ bool TaskPool::canceled()
        return do_cancel;
 }
 
+bool TaskPool::finished()
+{
+       thread_scoped_lock num_lock(num_mutex);
+       return num == 0;
+}
+
 void TaskPool::num_decrease(int done)
 {
        num_mutex.lock();
index 15f0d341be70c2e0cf94e514f937440e26144538..a7e19d1ab7505344f4ef162e96f6b84a5b83625e 100644 (file)
@@ -93,6 +93,7 @@ public:
        void wait_work(Summary *stats = NULL);  /* work and wait until all tasks are done */
        void cancel();          /* cancel all tasks, keep worker threads running */
        void stop();            /* stop all worker threads */
+       bool finished();        /* check if all work has been completed */
 
        bool canceled();        /* for worker threads, test if canceled */