Merge remote-tracking branch 'origin/blender-v2.93-release'
authorSybren A. Stüvel <sybren@blender.org>
Thu, 20 May 2021 11:00:07 +0000 (13:00 +0200)
committerSybren A. Stüvel <sybren@blender.org>
Thu, 20 May 2021 11:00:07 +0000 (13:00 +0200)
14 files changed:
intern/cycles/bvh/bvh_optix.cpp
intern/cycles/bvh/bvh_optix.h
intern/cycles/device/device.h
intern/cycles/device/device_memory.cpp
intern/cycles/device/device_memory.h
intern/cycles/device/device_multi.cpp
intern/cycles/device/device_optix.cpp
intern/cycles/device/opencl/device_opencl.h
intern/cycles/device/opencl/device_opencl_impl.cpp
intern/cycles/kernel/kernel_types.h
intern/cycles/render/scene.cpp
intern/cycles/render/session.cpp
intern/cycles/util/util_vector.h
release/scripts/modules/bpy_extras/anim_utils.py

index d630e8965dcb6107832624808d950d80b009345c..cd266f72f89a22b2968c800bb36e9cba604a7c40 100644 (file)
@@ -17,6 +17,8 @@
 
 #ifdef WITH_OPTIX
 
+#  include "device/device.h"
+
 #  include "bvh/bvh_optix.h"
 
 CCL_NAMESPACE_BEGIN
@@ -26,6 +28,7 @@ BVHOptiX::BVHOptiX(const BVHParams &params_,
                    const vector<Object *> &objects_,
                    Device *device)
     : BVH(params_, geometry_, objects_),
+      device(device),
       traversable_handle(0),
       as_data(device, params_.top_level ? "optix tlas" : "optix blas", false),
       motion_transform_data(device, "optix motion transform", false)
@@ -34,7 +37,9 @@ BVHOptiX::BVHOptiX(const BVHParams &params_,
 
 BVHOptiX::~BVHOptiX()
 {
-  // Acceleration structure memory is freed via the 'as_data' destructor
+  // Acceleration structure memory is delayed freed on device, since deleting the
+  // BVH may happen while still being used for rendering.
+  device->release_optix_bvh(this);
 }
 
 CCL_NAMESPACE_END
index aa514beae0d8a00e9a2ee789145d074e256f9e8c..ba5d90471d1da338e7eaf6e96d4de8f1fd21df4c 100644 (file)
@@ -28,6 +28,7 @@ CCL_NAMESPACE_BEGIN
 
 class BVHOptiX : public BVH {
  public:
+  Device *device;
   uint64_t traversable_handle;
   device_only_memory<char> as_data;
   device_only_memory<char> motion_transform_data;
index b5468248e5ac62f250be12ba8da56775037aa5f4..bdf18d09b31ccd85f3463c73863f8d7b3f5b02e5 100644 (file)
@@ -61,7 +61,6 @@ enum DeviceTypeMask {
 };
 
 enum DeviceKernelStatus {
-  DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL = 0,
   DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE,
   DEVICE_KERNEL_USING_FEATURE_KERNEL,
   DEVICE_KERNEL_FEATURE_KERNEL_INVALID,
@@ -427,6 +426,9 @@ class Device {
   /* acceleration structure building */
   virtual void build_bvh(BVH *bvh, Progress &progress, bool refit);
 
+  /* OptiX specific destructor. */
+  virtual void release_optix_bvh(BVH *){};
+
 #ifdef WITH_NETWORK
   /* networking */
   void server_run();
index 9eee86b0814eba39d0b4a924296e773513dab3d3..80a05fc32fe1e791e69198a8be17fae1eecb06aa 100644 (file)
@@ -35,10 +35,54 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type)
       device_pointer(0),
       host_pointer(0),
       shared_pointer(0),
-      shared_counter(0)
+      shared_counter(0),
+      original_device_ptr(0),
+      original_device_size(0),
+      original_device(0),
+      need_realloc_(false),
+      modified(false)
 {
 }
 
+device_memory::device_memory(device_memory &&other) noexcept
+    : data_type(other.data_type),
+      data_elements(other.data_elements),
+      data_size(other.data_size),
+      device_size(other.device_size),
+      data_width(other.data_width),
+      data_height(other.data_height),
+      data_depth(other.data_depth),
+      type(other.type),
+      name(other.name),
+      device(other.device),
+      device_pointer(other.device_pointer),
+      host_pointer(other.host_pointer),
+      shared_pointer(other.shared_pointer),
+      shared_counter(other.shared_counter),
+      original_device_ptr(other.original_device_ptr),
+      original_device_size(other.original_device_size),
+      original_device(other.original_device),
+      need_realloc_(other.need_realloc_),
+      modified(other.modified)
+{
+  other.data_elements = 0;
+  other.data_size = 0;
+  other.device_size = 0;
+  other.data_width = 0;
+  other.data_height = 0;
+  other.data_depth = 0;
+  other.device = 0;
+  other.device_pointer = 0;
+  other.host_pointer = 0;
+  other.shared_pointer = 0;
+  other.shared_counter = 0;
+  other.original_device_ptr = 0;
+  other.original_device_size = 0;
+  other.original_device = 0;
+  other.need_realloc_ = false;
+  other.modified = false;
+}
+
 device_memory::~device_memory()
 {
   assert(shared_pointer == 0);
index 97459b9ae6af683755bb2a98ea6fc5b2a4aa20fc..80f4d7b046892840a8afcd8e723213e6b0b1b206 100644 (file)
@@ -238,6 +238,7 @@ class device_memory {
 
   /* Only create through subclasses. */
   device_memory(Device *device, const char *name, MemoryType type);
+  device_memory(device_memory &&other) noexcept;
 
   /* No copying allowed. */
   device_memory(const device_memory &) = delete;
@@ -277,6 +278,10 @@ template<typename T> class device_only_memory : public device_memory {
     data_elements = max(device_type_traits<T>::num_elements, 1);
   }
 
+  device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other))
+  {
+  }
+
   virtual ~device_only_memory()
   {
     free();
index 35faadcbec505cb60cbc07f553723340bfde1dce..85ffa5fcd525296c7d2129d670e7efb69d5d29f7 100644 (file)
@@ -232,10 +232,6 @@ class MultiDevice : public Device {
     foreach (SubDevice &sub, devices) {
       DeviceKernelStatus subresult = sub.device->get_active_kernel_switch_state();
       switch (subresult) {
-        case DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL:
-          result = subresult;
-          break;
-
         case DEVICE_KERNEL_FEATURE_KERNEL_INVALID:
         case DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE:
           return subresult;
index cce11507fa131f1eca021c3a11a7d22d372c790f..01de0724cb2958ad52d5b316cabc02377cd44ccd 100644 (file)
@@ -193,6 +193,9 @@ class OptiXDevice : public CUDADevice {
   device_only_memory<unsigned char> denoiser_state;
   int denoiser_input_passes = 0;
 
+  vector<device_only_memory<char>> delayed_free_bvh_memory;
+  thread_mutex delayed_free_bvh_mutex;
+
  public:
   OptiXDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
       : CUDADevice(info_, stats_, profiler_, background_),
@@ -258,6 +261,8 @@ class OptiXDevice : public CUDADevice {
     // Make CUDA context current
     const CUDAContextScope scope(cuContext);
 
+    free_bvh_memory_delayed();
+
     sbt_data.free();
     texture_info.free();
     launch_params.free();
@@ -1297,6 +1302,8 @@ class OptiXDevice : public CUDADevice {
       return;
     }
 
+    free_bvh_memory_delayed();
+
     BVHOptiX *const bvh_optix = static_cast<BVHOptiX *>(bvh);
 
     progress.set_substatus("Building OptiX acceleration structure");
@@ -1767,6 +1774,24 @@ class OptiXDevice : public CUDADevice {
     }
   }
 
+  void release_optix_bvh(BVH *bvh) override
+  {
+    thread_scoped_lock lock(delayed_free_bvh_mutex);
+    /* Do delayed free of BVH memory, since geometry holding BVH might be deleted
+     * while GPU is still rendering. */
+    BVHOptiX *const bvh_optix = static_cast<BVHOptiX *>(bvh);
+
+    delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->as_data));
+    delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->motion_transform_data));
+    bvh_optix->traversable_handle = 0;
+  }
+
+  void free_bvh_memory_delayed()
+  {
+    thread_scoped_lock lock(delayed_free_bvh_mutex);
+    delayed_free_bvh_memory.free_memory();
+  }
+
   void const_copy_to(const char *name, void *host, size_t size) override
   {
     // Set constant memory for CUDA module
index 2d6c6d04214d7ca4a63d01ff361d1b33b4c6bbc2..a65e764b0d41cef8b9cbbb555583e5463ed902fa 100644 (file)
@@ -269,7 +269,6 @@ class OpenCLDevice : public Device {
   cl_device_id cdDevice;
   cl_int ciErr;
   int device_num;
-  bool use_preview_kernels;
 
   class OpenCLProgram {
    public:
@@ -369,8 +368,7 @@ class OpenCLDevice : public Device {
     /* Load the kernels and put the created kernels in the given
      * `programs` parameter. */
     void load_kernels(vector<OpenCLProgram *> &programs,
-                      const DeviceRequestedFeatures &requested_features,
-                      bool is_preview = false);
+                      const DeviceRequestedFeatures &requested_features);
   };
 
   DeviceSplitKernel *split_kernel;
@@ -382,7 +380,6 @@ class OpenCLDevice : public Device {
   OpenCLProgram denoising_program;
 
   OpenCLSplitPrograms kernel_programs;
-  OpenCLSplitPrograms preview_programs;
 
   typedef map<string, device_vector<uchar> *> ConstMemMap;
   typedef map<string, device_ptr> MemMap;
@@ -412,7 +409,6 @@ class OpenCLDevice : public Device {
   string device_md5_hash(string kernel_custom_build_options = "");
   bool load_kernels(const DeviceRequestedFeatures &requested_features);
   void load_required_kernels(const DeviceRequestedFeatures &requested_features);
-  void load_preview_kernels();
 
   bool wait_for_availability(const DeviceRequestedFeatures &requested_features);
   DeviceKernelStatus get_active_kernel_switch_state();
@@ -422,8 +418,7 @@ class OpenCLDevice : public Device {
   /* Get the program file name to compile (*.cl) for the given kernel */
   const string get_opencl_program_filename(const string &kernel_name);
   string get_build_options(const DeviceRequestedFeatures &requested_features,
-                           const string &opencl_program_name,
-                           bool preview_kernel = false);
+                           const string &opencl_program_name);
   /* Enable the default features to reduce recompilation events */
   void enable_default_features(DeviceRequestedFeatures &features);
 
index b1d6284171a16d9b0af26f5387a11123d6839cc3..715213175c9b8b1c4dd43e2e423f7deafbc01f62 100644 (file)
@@ -107,8 +107,7 @@ void OpenCLDevice::enable_default_features(DeviceRequestedFeatures &features)
 }
 
 string OpenCLDevice::get_build_options(const DeviceRequestedFeatures &requested_features,
-                                       const string &opencl_program_name,
-                                       bool preview_kernel)
+                                       const string &opencl_program_name)
 {
   /* first check for non-split kernel programs */
   if (opencl_program_name == "base" || opencl_program_name == "denoising") {
@@ -185,13 +184,7 @@ string OpenCLDevice::get_build_options(const DeviceRequestedFeatures &requested_
   enable_default_features(nofeatures);
 
   /* Add program specific optimized compile directives */
-  if (preview_kernel) {
-    DeviceRequestedFeatures preview_features;
-    preview_features.use_hair = true;
-    build_options += "-D__KERNEL_AO_PREVIEW__ ";
-    build_options += preview_features.get_build_options();
-  }
-  else if (opencl_program_name == "split_do_volume" && !requested_features.use_volume) {
+  if (opencl_program_name == "split_do_volume" && !requested_features.use_volume) {
     build_options += nofeatures.get_build_options();
   }
   else {
@@ -238,9 +231,7 @@ OpenCLDevice::OpenCLSplitPrograms::~OpenCLSplitPrograms()
 }
 
 void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
-    vector<OpenCLProgram *> &programs,
-    const DeviceRequestedFeatures &requested_features,
-    bool is_preview)
+    vector<OpenCLProgram *> &programs, const DeviceRequestedFeatures &requested_features)
 {
   if (!requested_features.use_baking) {
 #  define ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(kernel_name) \
@@ -251,7 +242,7 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
         device, \
         program_name_##kernel_name, \
         "kernel_" #kernel_name ".cl", \
-        device->get_build_options(requested_features, program_name_##kernel_name, is_preview)); \
+        device->get_build_options(requested_features, program_name_##kernel_name)); \
     program_##kernel_name.add_kernel(ustring("path_trace_" #kernel_name)); \
     programs.push_back(&program_##kernel_name);
 
@@ -259,7 +250,7 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
     ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter);
     ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
     ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
-    if (requested_features.use_volume || is_preview) {
+    if (requested_features.use_volume) {
       ADD_SPLIT_KERNEL_PROGRAM(do_volume);
     }
     ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
@@ -274,7 +265,7 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
         device,
         "split_bundle",
         "kernel_split_bundle.cl",
-        device->get_build_options(requested_features, "split_bundle", is_preview));
+        device->get_build_options(requested_features, "split_bundle"));
 
     ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(data_init);
     ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(state_buffer_size);
@@ -403,7 +394,7 @@ class OpenCLSplitKernel : public DeviceSplitKernel {
         device,
         program_name,
         device->get_opencl_program_filename(kernel_name),
-        device->get_build_options(requested_features, program_name, device->use_preview_kernels));
+        device->get_build_options(requested_features, program_name));
 
     kernel->program.add_kernel(ustring("path_trace_" + kernel_name));
     kernel->program.load();
@@ -617,7 +608,6 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b
     : Device(info, stats, profiler, background),
       load_kernel_num_compiling(0),
       kernel_programs(this),
-      preview_programs(this),
       memory_manager(this),
       texture_info(this, "__texture_info", MEM_GLOBAL)
 {
@@ -627,7 +617,6 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b
   cqCommandQueue = NULL;
   device_initialized = false;
   textures_need_update = true;
-  use_preview_kernels = !background;
 
   vector<OpenCLPlatformDevice> usable_devices;
   OpenCLInfo::get_usable_devices(&usable_devices);
@@ -683,9 +672,6 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b
   device_initialized = true;
 
   split_kernel = new OpenCLSplitKernel(this);
-  if (use_preview_kernels) {
-    load_preview_kernels();
-  }
 }
 
 OpenCLDevice::~OpenCLDevice()
@@ -776,7 +762,7 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures &requested_feature
   load_required_kernels(requested_features);
 
   vector<OpenCLProgram *> programs;
-  kernel_programs.load_kernels(programs, requested_features, false);
+  kernel_programs.load_kernels(programs, requested_features);
 
   if (!requested_features.use_baking && requested_features.use_denoising) {
     denoising_program = OpenCLProgram(
@@ -854,19 +840,6 @@ void OpenCLDevice::load_required_kernels(const DeviceRequestedFeatures &requeste
   }
 }
 
-void OpenCLDevice::load_preview_kernels()
-{
-  DeviceRequestedFeatures no_features;
-  vector<OpenCLProgram *> programs;
-  preview_programs.load_kernels(programs, no_features, true);
-
-  foreach (OpenCLProgram *program, programs) {
-    if (!program->load()) {
-      load_required_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program));
-    }
-  }
-}
-
 bool OpenCLDevice::wait_for_availability(const DeviceRequestedFeatures &requested_features)
 {
   if (requested_features.use_baking) {
@@ -874,59 +847,18 @@ bool OpenCLDevice::wait_for_availability(const DeviceRequestedFeatures &requeste
     return true;
   }
 
-  if (background) {
-    load_kernel_task_pool.wait_work();
-    use_preview_kernels = false;
-  }
-  else {
-    /* We use a device setting to determine to load preview kernels or not
-     * Better to check on device level than per kernel as mixing preview and
-     * non-preview kernels does not work due to different data types */
-    if (use_preview_kernels) {
-      use_preview_kernels = load_kernel_num_compiling.load() > 0;
-    }
-  }
+  load_kernel_task_pool.wait_work();
   return split_kernel->load_kernels(requested_features);
 }
 
 OpenCLDevice::OpenCLSplitPrograms *OpenCLDevice::get_split_programs()
 {
-  return use_preview_kernels ? &preview_programs : &kernel_programs;
+  return &kernel_programs;
 }
 
 DeviceKernelStatus OpenCLDevice::get_active_kernel_switch_state()
 {
-  /* Do not switch kernels for background renderings
-   * We do foreground rendering but use the preview kernels
-   * Check for the optimized kernels
-   *
-   * This works also the other way around, where we are using
-   * optimized kernels but new ones are being compiled due
-   * to other features that are needed */
-  if (background) {
-    /* The if-statements below would find the same result,
-     * But as the `finished` method uses a mutex we added
-     * this as an early exit */
-    return DEVICE_KERNEL_USING_FEATURE_KERNEL;
-  }
-
-  bool other_kernels_finished = load_kernel_num_compiling.load() == 0;
-  if (use_preview_kernels) {
-    if (other_kernels_finished) {
-      return DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE;
-    }
-    else {
-      return DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL;
-    }
-  }
-  else {
-    if (other_kernels_finished) {
-      return DEVICE_KERNEL_USING_FEATURE_KERNEL;
-    }
-    else {
-      return DEVICE_KERNEL_FEATURE_KERNEL_INVALID;
-    }
-  }
+  return DEVICE_KERNEL_USING_FEATURE_KERNEL;
 }
 
 void OpenCLDevice::mem_alloc(device_memory &mem)
index 18c4d2f86ada0acbc2fa1250682d11d22e3d7545..74fa2826cd43f21759915093d6e86f7ced003b3a 100644 (file)
@@ -99,27 +99,23 @@ CCL_NAMESPACE_BEGIN
 #define __AO__
 #define __PASSES__
 #define __HAIR__
-
-/* Without these we get an AO render, used by OpenCL preview kernel. */
-#ifndef __KERNEL_AO_PREVIEW__
-#  define __SVM__
-#  define __EMISSION__
-#  define __HOLDOUT__
-#  define __MULTI_CLOSURE__
-#  define __TRANSPARENT_SHADOWS__
-#  define __BACKGROUND_MIS__
-#  define __LAMP_MIS__
-#  define __CAMERA_MOTION__
-#  define __OBJECT_MOTION__
-#  define __BAKING__
-#  define __PRINCIPLED__
-#  define __SUBSURFACE__
-#  define __VOLUME__
-#  define __VOLUME_SCATTER__
-#  define __CMJ__
-#  define __SHADOW_RECORD_ALL__
-#  define __BRANCHED_PATH__
-#endif
+#define __SVM__
+#define __EMISSION__
+#define __HOLDOUT__
+#define __MULTI_CLOSURE__
+#define __TRANSPARENT_SHADOWS__
+#define __BACKGROUND_MIS__
+#define __LAMP_MIS__
+#define __CAMERA_MOTION__
+#define __OBJECT_MOTION__
+#define __BAKING__
+#define __PRINCIPLED__
+#define __SUBSURFACE__
+#define __VOLUME__
+#define __VOLUME_SCATTER__
+#define __CMJ__
+#define __SHADOW_RECORD_ALL__
+#define __BRANCHED_PATH__
 
 /* Device specific features */
 #ifdef __KERNEL_CPU__
index f753bb43c428fe69f21377e0b221fdbde75896cf..c4e7d2c79d61ce08799f4fd331726f8feff7c679 100644 (file)
@@ -542,9 +542,6 @@ bool Scene::update(Progress &progress, bool &kernel_switch_needed)
     DeviceKernelStatus kernel_switch_status = device->get_active_kernel_switch_state();
     kernel_switch_needed = kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE ||
                            kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_INVALID;
-    if (kernel_switch_status == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
-      progress.set_kernel_status("Compiling render kernels");
-    }
     if (new_kernels_needed || kernel_switch_needed) {
       progress.set_kernel_status("Compiling render kernels");
       device->wait_for_availability(loaded_kernel_features);
index 3c601e18126135f83fc3ad57d116a89c9517b71a..7830ca2293aa71321d8a09661f0d66315251e9b4 100644 (file)
@@ -243,11 +243,6 @@ void Session::run_gpu()
       }
     }
 
-    /* Don't go in pause mode when image was rendered with preview kernels
-     * When feature kernels become available the session will be reset. */
-    else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
-      time_sleep(0.1);
-    }
     else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) {
       reset_gpu(tile_manager.params, params.samples);
     }
@@ -762,11 +757,6 @@ void Session::run_cpu()
       }
     }
 
-    /* Don't go in pause mode when preview kernels are used
-     * When feature kernels become available the session will be reset. */
-    else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
-      time_sleep(0.1);
-    }
     else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) {
       reset_cpu(tile_manager.params, params.samples);
     }
index 04fb33368d931f2f71642ad0081b900e3648cd19..87cd4de843842d37ab76659695b5561838719262 100644 (file)
@@ -43,8 +43,8 @@ class vector : public std::vector<value_type, allocator_type> {
   /* Try as hard as possible to use zero memory. */
   void free_memory()
   {
-    BaseClass::resize(0);
-    BaseClass::shrink_to_fit();
+    vector<value_type, allocator_type> empty;
+    BaseClass::swap(empty);
   }
 
   /* Some external API might demand working with std::vector. */
index 269592b3bb1d032de6cf4fcfb68e130aac61fe4a..0868b772a2be3041359ed48548e5c798aa73c5fd 100644 (file)
@@ -296,7 +296,7 @@ def bake_action_iter(
                     pbone.keyframe_insert("rotation_axis_angle", index=-1, frame=f, group=name)
                 else:  # euler, XYZ, ZXY etc
                     if euler_prev is not None:
-                        euler = pbone.matrix_basis.to_euler(obj.rotation_mode, euler_prev)
+                        euler = pbone.matrix_basis.to_euler(pbone.rotation_mode, euler_prev)
                         pbone.rotation_euler = euler
                         del euler
                     euler_prev = pbone.rotation_euler.copy()