Merge remote-tracking branch 'origin/blender-v2.93-release'
authorSybren A. Stüvel <sybren@blender.org>
Thu, 20 May 2021 11:00:07 +0000 (13:00 +0200)
committerSybren A. Stüvel <sybren@blender.org>
Thu, 20 May 2021 11:00:07 +0000 (13:00 +0200)
1  2 
intern/cycles/device/device_optix.cpp
intern/cycles/kernel/kernel_types.h
intern/cycles/render/scene.cpp

index cce11507fa131f1eca021c3a11a7d22d372c790f,bb6027254f9da15819fbb93b81e0253a87fa3abf..01de0724cb2958ad52d5b316cabc02377cd44ccd
@@@ -193,6 -193,9 +193,9 @@@ class OptiXDevice : public CUDADevice 
    device_only_memory<unsigned char> denoiser_state;
    int denoiser_input_passes = 0;
  
+   vector<device_only_memory<char>> delayed_free_bvh_memory;
+   thread_mutex delayed_free_bvh_mutex;
   public:
    OptiXDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
        : CUDADevice(info_, stats_, profiler_, background_),
      // Make CUDA context current
      const CUDAContextScope scope(cuContext);
  
+     free_bvh_memory_delayed();
      sbt_data.free();
      texture_info.free();
      launch_params.free();
          // Create OptiX denoiser handle on demand when it is first used
          OptixDenoiserOptions denoiser_options = {};
          assert(task.denoising.input_passes >= 1 && task.denoising.input_passes <= 3);
 +#  if OPTIX_ABI_VERSION >= 47
 +        denoiser_options.guideAlbedo = task.denoising.input_passes >= 2;
 +        denoiser_options.guideNormal = task.denoising.input_passes >= 3;
 +        check_result_optix_ret(optixDenoiserCreate(
 +            context, OPTIX_DENOISER_MODEL_KIND_HDR, &denoiser_options, &denoiser));
 +#  else
          denoiser_options.inputKind = static_cast<OptixDenoiserInputKind>(
              OPTIX_DENOISER_INPUT_RGB + (task.denoising.input_passes - 1));
 -#  if OPTIX_ABI_VERSION < 28
 +#    if OPTIX_ABI_VERSION < 28
          denoiser_options.pixelFormat = OPTIX_PIXEL_FORMAT_FLOAT3;
 -#  endif
 +#    endif
          check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser));
          check_result_optix_ret(
              optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0));
 +#  endif
  
          // OptiX denoiser handle was created with the requested number of input passes
          denoiser_input_passes = task.denoising.input_passes;
  #  endif
        output_layers[0].format = OPTIX_PIXEL_FORMAT_FLOAT3;
  
 +#  if OPTIX_ABI_VERSION >= 47
 +      OptixDenoiserLayer image_layers = {};
 +      image_layers.input = input_layers[0];
 +      image_layers.output = output_layers[0];
 +
 +      OptixDenoiserGuideLayer guide_layers = {};
 +      guide_layers.albedo = input_layers[1];
 +      guide_layers.normal = input_layers[2];
 +#  endif
 +
        // Finally run denonising
        OptixDenoiserParams params = {};  // All parameters are disabled/zero
 +#  if OPTIX_ABI_VERSION >= 47
        check_result_optix_ret(optixDenoiserInvoke(denoiser,
 -                                                 0,
 +                                                 NULL,
 +                                                 &params,
 +                                                 denoiser_state.device_pointer,
 +                                                 scratch_offset,
 +                                                 &guide_layers,
 +                                                 &image_layers,
 +                                                 1,
 +                                                 overlap_offset.x,
 +                                                 overlap_offset.y,
 +                                                 denoiser_state.device_pointer + scratch_offset,
 +                                                 scratch_size));
 +#  else
 +      check_result_optix_ret(optixDenoiserInvoke(denoiser,
 +                                                 NULL,
                                                   &params,
                                                   denoiser_state.device_pointer,
                                                   scratch_offset,
                                                   output_layers,
                                                   denoiser_state.device_pointer + scratch_offset,
                                                   scratch_size));
 +#  endif
  
  #  if OPTIX_DENOISER_NO_PIXEL_STRIDE
        void *output_args[] = {&input_ptr,
        return;
      }
  
+     free_bvh_memory_delayed();
      BVHOptiX *const bvh_optix = static_cast<BVHOptiX *>(bvh);
  
      progress.set_substatus("Building OptiX acceleration structure");
      }
    }
  
+   void release_optix_bvh(BVH *bvh) override
+   {
+     thread_scoped_lock lock(delayed_free_bvh_mutex);
+     /* Do delayed free of BVH memory, since geometry holding BVH might be deleted
+      * while GPU is still rendering. */
+     BVHOptiX *const bvh_optix = static_cast<BVHOptiX *>(bvh);
+     delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->as_data));
+     delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->motion_transform_data));
+     bvh_optix->traversable_handle = 0;
+   }
+   void free_bvh_memory_delayed()
+   {
+     thread_scoped_lock lock(delayed_free_bvh_mutex);
+     delayed_free_bvh_memory.free_memory();
+   }
    void const_copy_to(const char *name, void *host, size_t size) override
    {
      // Set constant memory for CUDA module
index 18c4d2f86ada0acbc2fa1250682d11d22e3d7545,c661d77edb1f74ba1f640e2fadf0a7eeaa85f08d..74fa2826cd43f21759915093d6e86f7ced003b3a
@@@ -99,27 -99,23 +99,23 @@@ CCL_NAMESPACE_BEGI
  #define __AO__
  #define __PASSES__
  #define __HAIR__
- /* Without these we get an AO render, used by OpenCL preview kernel. */
- #ifndef __KERNEL_AO_PREVIEW__
- #  define __SVM__
- #  define __EMISSION__
- #  define __HOLDOUT__
- #  define __MULTI_CLOSURE__
- #  define __TRANSPARENT_SHADOWS__
- #  define __BACKGROUND_MIS__
- #  define __LAMP_MIS__
- #  define __CAMERA_MOTION__
- #  define __OBJECT_MOTION__
- #  define __BAKING__
- #  define __PRINCIPLED__
- #  define __SUBSURFACE__
- #  define __VOLUME__
- #  define __VOLUME_SCATTER__
- #  define __CMJ__
- #  define __SHADOW_RECORD_ALL__
- #  define __BRANCHED_PATH__
- #endif
+ #define __SVM__
+ #define __EMISSION__
+ #define __HOLDOUT__
+ #define __MULTI_CLOSURE__
+ #define __TRANSPARENT_SHADOWS__
+ #define __BACKGROUND_MIS__
+ #define __LAMP_MIS__
+ #define __CAMERA_MOTION__
+ #define __OBJECT_MOTION__
+ #define __BAKING__
+ #define __PRINCIPLED__
+ #define __SUBSURFACE__
+ #define __VOLUME__
+ #define __VOLUME_SCATTER__
+ #define __CMJ__
+ #define __SHADOW_RECORD_ALL__
+ #define __BRANCHED_PATH__
  
  /* Device specific features */
  #ifdef __KERNEL_CPU__
@@@ -895,8 -891,6 +891,8 @@@ enum ShaderDataFlag 
    SD_HAS_CONSTANT_EMISSION = (1 << 27),
    /* Needs to access attributes for volume rendering */
    SD_NEED_VOLUME_ATTRIBUTES = (1 << 28),
 +  /* Shader has emission */
 +  SD_HAS_EMISSION = (1 << 29),
  
    SD_SHADER_FLAGS = (SD_USE_MIS | SD_HAS_TRANSPARENT_SHADOW | SD_HAS_VOLUME | SD_HAS_ONLY_VOLUME |
                       SD_HETEROGENEOUS_VOLUME | SD_HAS_BSSRDF_BUMP | SD_VOLUME_EQUIANGULAR |
index f753bb43c428fe69f21377e0b221fdbde75896cf,70474368a4ef84fd47ea0adaa124e68bf650cbeb..c4e7d2c79d61ce08799f4fd331726f8feff7c679
@@@ -143,27 -143,21 +143,27 @@@ void Scene::free_memory(bool final
    delete bvh;
    bvh = NULL;
  
 -  foreach (Shader *s, shaders)
 -    delete s;
 -  /* delete procedurals before other types as they may hold pointers to those types */
 +  /* The order of deletion is important to make sure data is freed based on possible dependencies
 +   * as the Nodes' reference counts are decremented in the destructors:
 +   *
 +   * - Procedurals can create and hold pointers to any other types.
 +   * - Objects can hold pointers to Geometries and ParticleSystems
 +   * - Lights and Geometries can hold pointers to Shaders.
 +   *
 +   * Similarly, we first delete all nodes and their associated device data, and then the managers
 +   * and their associated device data.
 +   */
    foreach (Procedural *p, procedurals)
      delete p;
 -  foreach (Geometry *g, geometry)
 -    delete g;
    foreach (Object *o, objects)
      delete o;
 -  foreach (Light *l, lights)
 -    delete l;
 +  foreach (Geometry *g, geometry)
 +    delete g;
    foreach (ParticleSystem *p, particle_systems)
      delete p;
 +  foreach (Light *l, lights)
 +    delete l;
  
 -  shaders.clear();
    geometry.clear();
    objects.clear();
    lights.clear();
      film->device_free(device, &dscene, this);
      background->device_free(device, &dscene);
      integrator->device_free(device, &dscene, true);
 +  }
 +
 +  if (final) {
 +    delete camera;
 +    delete dicing_camera;
 +    delete film;
 +    delete background;
 +    delete integrator;
 +  }
 +
 +  /* Delete Shaders after every other nodes to ensure that we do not try to decrement the reference
 +   * count on some dangling pointer. */
 +  foreach (Shader *s, shaders)
 +    delete s;
 +
 +  shaders.clear();
  
 +  /* Now that all nodes have been deleted, we can safely delete managers and device data. */
 +  if (device) {
      object_manager->device_free(device, &dscene, true);
      geometry_manager->device_free(device, &dscene, true);
      shader_manager->device_free(device, &dscene, this);
  
    if (final) {
      delete lookup_tables;
 -    delete camera;
 -    delete dicing_camera;
 -    delete film;
 -    delete background;
 -    delete integrator;
      delete object_manager;
      delete geometry_manager;
      delete shader_manager;
@@@ -523,6 -504,9 +523,6 @@@ bool Scene::update(Progress &progress, 
  {
    /* update scene */
    if (need_update()) {
 -    /* Updated used shader tag so we know which features are need for the kernel. */
 -    shader_manager->update_shaders_used(this);
 -
      /* Update max_closures. */
      KernelIntegrator *kintegrator = &dscene.data.integrator;
      if (params.background) {
      DeviceKernelStatus kernel_switch_status = device->get_active_kernel_switch_state();
      kernel_switch_needed = kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE ||
                             kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_INVALID;
-     if (kernel_switch_status == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
-       progress.set_kernel_status("Compiling render kernels");
-     }
      if (new_kernels_needed || kernel_switch_needed) {
        progress.set_kernel_status("Compiling render kernels");
        device->wait_for_availability(loaded_kernel_features);
@@@ -600,7 -581,7 +597,7 @@@ int Scene::get_max_closure_count(
    int max_closures = 0;
    for (int i = 0; i < shaders.size(); i++) {
      Shader *shader = shaders[i];
 -    if (shader->used) {
 +    if (shader->reference_count()) {
        int num_closures = shader->graph->get_num_closures();
        max_closures = max(max_closures, num_closures);
      }
@@@ -761,10 -742,9 +758,10 @@@ template<> void Scene::delete_node_impl
    particle_system_manager->tag_update(this);
  }
  
 -template<> void Scene::delete_node_impl(Shader * /*node*/)
 +template<> void Scene::delete_node_impl(Shader *shader)
  {
    /* don't delete unused shaders, not supported */
 +  shader->clear_reference_count();
  }
  
  template<> void Scene::delete_node_impl(Procedural *node)
@@@ -831,12 -811,9 +828,12 @@@ template<> void Scene::delete_nodes(con
    particle_system_manager->tag_update(this);
  }
  
 -template<> void Scene::delete_nodes(const set<Shader *> & /*nodes*/, const NodeOwner * /*owner*/)
 +template<> void Scene::delete_nodes(const set<Shader *> &nodes, const NodeOwner * /*owner*/)
  {
    /* don't delete unused shaders, not supported */
 +  for (Shader *shader : nodes) {
 +    shader->clear_reference_count();
 +  }
  }
  
  template<> void Scene::delete_nodes(const set<Procedural *> &nodes, const NodeOwner *owner)