Cycles: add Intel OpenImageDenoise support for viewport denoising
authorBrecht Van Lommel <brecht@blender.org>
Sun, 31 May 2020 22:11:17 +0000 (00:11 +0200)
committerBrecht Van Lommel <brecht@blender.org>
Wed, 24 Jun 2020 13:17:36 +0000 (15:17 +0200)
Compared to Optix denoise, this is usually slower since there is no GPU
acceleration. Some optimizations may still be possible, in avoid copies
to the GPU and/or denoising less often.

The main thing is that this adds viewport denoising support for computers
without an NVIDIA GPU (as long as the CPU supports SSE 4.1, which is nearly
all of them).

Ref T76259

12 files changed:
intern/cycles/blender/CMakeLists.txt
intern/cycles/blender/addon/properties.py
intern/cycles/blender/addon/ui.py
intern/cycles/blender/blender_python.cpp
intern/cycles/blender/blender_sync.cpp
intern/cycles/device/CMakeLists.txt
intern/cycles/device/device.cpp
intern/cycles/device/device_cpu.cpp
intern/cycles/device/device_task.h
intern/cycles/render/session.cpp
intern/cycles/util/CMakeLists.txt
intern/cycles/util/util_openimagedenoise.h [new file with mode: 0644]

index 4589f4573d95d4687d4f2e440a1e301951310481..2316800e21e55571a98aa062745f59ea0c62e963 100644 (file)
@@ -102,6 +102,13 @@ if(WITH_OPENVDB)
   )
 endif()
 
+if(WITH_OPENIMAGEDENOISE)
+  add_definitions(-DWITH_OPENIMAGEDENOISE)
+  list(APPEND INC_SYS
+    ${OPENIMAGEDENOISE_INCLUDE_DIRS}
+  )
+endif()
+
 blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
 
 # avoid link failure with clang 3.4 debug
index 061e3784b0dca178945f34915486138f39db4a6e..053de16529c6d64bccaeb8dfbd465440ed80a324 100644 (file)
@@ -182,14 +182,30 @@ enum_aov_types = (
     ('COLOR', "Color", "Write a Color pass", 1),
 )
 
+def enum_openimagedenoise_denoiser(self, context):
+    if _cycles.with_openimagedenoise:
+        return [('OPENIMAGEDENOISE', "OpenImageDenoise", "Use Intel OpenImageDenoise AI denoiser running on the CPU", 4)]
+    return []
+
 def enum_optix_denoiser(self, context):
     if not context or bool(context.preferences.addons[__package__].preferences.get_devices_for_type('OPTIX')):
         return [('OPTIX', "OptiX", "Use the OptiX AI denoiser with GPU acceleration, only available on NVIDIA GPUs", 2)]
     return []
 
 def enum_preview_denoiser(self, context):
-    items = [('AUTO', "Auto", "Use the fastest available denoiser for viewport rendering", 0)]
-    items += enum_optix_denoiser(self, context)
+    optix_items = enum_optix_denoiser(self, context)
+    oidn_items = enum_openimagedenoise_denoiser(self, context)
+
+    if len(optix_items):
+        auto_label = "Fastest (Optix)"
+    elif len(oidn_items):
+        auto_label = "Fatest (OpenImageDenoise)"
+    else:
+        auto_label = "None"
+
+    items = [('AUTO', auto_label, "Use the fastest available denoiser for viewport rendering", 0)]
+    items += optix_items
+    items += oidn_items
     return items
 
 def enum_denoiser(self, context):
index e689ec90983850963a4f56354619e3218a4a1e73..aa0a47eb9c7f57badce2c039d171bec712d31671 100644 (file)
@@ -1006,6 +1006,8 @@ class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel):
         if denoiser == 'OPTIX':
             col.prop(cycles_view_layer, "denoising_optix_input_passes")
             return
+        elif denoiser == 'OPENIMAGEDENOISE':
+            return
 
         col.prop(cycles_view_layer, "denoising_radius", text="Radius")
 
index 5595d6576402a33dd60be26a2b92f5fa59981133..3e595c3ee52726834cd6f7e917e78b3a5e9fd74f 100644 (file)
@@ -31,6 +31,7 @@
 #include "util/util_logging.h"
 #include "util/util_md5.h"
 #include "util/util_opengl.h"
+#include "util/util_openimagedenoise.h"
 #include "util/util_path.h"
 #include "util/util_string.h"
 #include "util/util_task.h"
@@ -1076,5 +1077,14 @@ void *CCL_python_module_init()
   Py_INCREF(Py_False);
 #endif /* WITH_EMBREE */
 
+  if (ccl::openimagedenoise_supported()) {
+    PyModule_AddObject(mod, "with_openimagedenoise", Py_True);
+    Py_INCREF(Py_True);
+  }
+  else {
+    PyModule_AddObject(mod, "with_openimagedenoise", Py_False);
+    Py_INCREF(Py_False);
+  }
+
   return (void *)mod;
 }
index aed92cf137640889d9deed17b6b847e8eb5fdd49..bf065cc54927a21d77420ca8321c5f16f09dd5c4 100644 (file)
@@ -38,6 +38,7 @@
 #include "util/util_foreach.h"
 #include "util/util_hash.h"
 #include "util/util_opengl.h"
+#include "util/util_openimagedenoise.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -957,6 +958,9 @@ DenoiseParams BlenderSync::get_denoise_params(BL::Scene &b_scene,
       if (!Device::available_devices(DEVICE_MASK_OPTIX).empty()) {
         denoising.type = DENOISER_OPTIX;
       }
+      else if (openimagedenoise_supported()) {
+        denoising.type = DENOISER_OPENIMAGEDENOISE;
+      }
       else {
         denoising.use = false;
       }
index aa5b65a2b730651c2dcb39d5d84ffde3d33826cd..ca366722eb7e6060be7f49847c5fdbcf70767733 100644 (file)
@@ -99,6 +99,18 @@ if(WITH_CYCLES_DEVICE_MULTI)
   add_definitions(-DWITH_MULTI)
 endif()
 
+if(WITH_OPENIMAGEDENOISE)
+  add_definitions(-DWITH_OPENIMAGEDENOISE)
+  add_definitions(-DOIDN_STATIC_LIB)
+  list(APPEND INC_SYS
+    ${OPENIMAGEDENOISE_INCLUDE_DIRS}
+  )
+  list(APPEND LIB
+    ${OPENIMAGEDENOISE_LIBRARIES}
+    ${TBB_LIBRARIES}
+  )
+endif()
+
 include_directories(${INC})
 include_directories(SYSTEM ${INC_SYS})
 
index 73415d5f9c62ad40c9fed1e0dbf3062ea3066077..9dbb33980b480b7e2987a8c285d6c38d4eab3587 100644 (file)
@@ -706,6 +706,18 @@ void DeviceInfo::add_denoising_devices(DenoiserType denoiser_type)
       denoisers = denoiser_type;
     }
   }
+  else if (denoiser_type == DENOISER_OPENIMAGEDENOISE && type != DEVICE_CPU) {
+    /* Convert to a special multi device with separate denoising devices. */
+    if (multi_devices.empty()) {
+      multi_devices.push_back(*this);
+    }
+
+    /* Add CPU denoising devices. */
+    DeviceInfo cpu_device = Device::available_devices(DEVICE_MASK_CPU).front();
+    denoising_devices.push_back(cpu_device);
+
+    denoisers = denoiser_type;
+  }
 }
 
 CCL_NAMESPACE_END
index 1f760a1553074fec0b8ceb867c327904fdd71562..8f68e66a1b4a8c8b745ae98d3b01282dcf474ac7 100644 (file)
@@ -51,6 +51,7 @@
 #include "util/util_function.h"
 #include "util/util_logging.h"
 #include "util/util_map.h"
+#include "util/util_openimagedenoise.h"
 #include "util/util_opengl.h"
 #include "util/util_optimization.h"
 #include "util/util_progress.h"
@@ -177,6 +178,10 @@ class CPUDevice : public Device {
 #ifdef WITH_OSL
   OSLGlobals osl_globals;
 #endif
+#ifdef WITH_OPENIMAGEDENOISE
+  oidn::DeviceRef oidn_device;
+  oidn::FilterRef oidn_filter;
+#endif
 
   bool use_split_kernel;
 
@@ -943,6 +948,70 @@ class CPUDevice : public Device {
     }
   }
 
+  void denoise_openimagedenoise(DeviceTask &task, RenderTile &rtile)
+  {
+#ifdef WITH_OPENIMAGEDENOISE
+    assert(openimagedenoise_supported());
+
+    /* Only one at a time, since OpenImageDenoise itself is multithreaded. */
+    static thread_mutex mutex;
+    thread_scoped_lock lock(mutex);
+
+    /* Create device and filter, cached for reuse. */
+    if (!oidn_device) {
+      oidn_device = oidn::newDevice();
+      oidn_device.commit();
+    }
+    if (!oidn_filter) {
+      oidn_filter = oidn_device.newFilter("RT");
+    }
+
+    /* Copy pixels from compute device to CPU (no-op for CPU device). */
+    rtile.buffers->buffer.copy_from_device();
+
+    /* Set images with appropriate stride for our interleaved pass storage. */
+    const struct {
+      const char *name;
+      int offset;
+    } passes[] = {{"color", task.pass_denoising_data + DENOISING_PASS_COLOR},
+                  {"normal", task.pass_denoising_data + DENOISING_PASS_NORMAL},
+                  {"albedo", task.pass_denoising_data + DENOISING_PASS_ALBEDO},
+                  {"output", 0},
+                  { NULL,
+                    0 }};
+
+    for (int i = 0; passes[i].name; i++) {
+      const int64_t offset = rtile.offset + rtile.x + rtile.y * rtile.stride;
+      const int64_t buffer_offset = (offset * task.pass_stride + passes[i].offset) * sizeof(float);
+      const int64_t pixel_stride = task.pass_stride * sizeof(float);
+      const int64_t row_stride = rtile.stride * pixel_stride;
+
+      oidn_filter.setImage(passes[i].name,
+                           (char *)rtile.buffer + buffer_offset,
+                           oidn::Format::Float3,
+                           rtile.w,
+                           rtile.h,
+                           0,
+                           pixel_stride,
+                           row_stride);
+    }
+
+    /* Execute filter. */
+    oidn_filter.set("hdr", true);
+    oidn_filter.set("srgb", false);
+    oidn_filter.commit();
+    oidn_filter.execute();
+
+    /* todo: it may be possible to avoid this copy, but we have to ensure that
+     * when other code copies data from the device it doesn't overwrite the
+     * denoiser buffers. */
+    rtile.buffers->buffer.copy_to_device();
+#else
+    (void)task;
+    (void)rtile;
+#endif
+  }
+
   void denoise_nlm(DenoisingTask &denoising, RenderTile &tile)
   {
     ProfilingHelper profiling(denoising.profiler, PROFILING_DENOISING);
@@ -1018,7 +1087,10 @@ class CPUDevice : public Device {
         render(task, tile, kg);
       }
       else if (tile.task == RenderTile::DENOISE) {
-        if (task.denoising.type == DENOISER_NLM) {
+        if (task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+          denoise_openimagedenoise(task, tile);
+        }
+        else if (task.denoising.type == DENOISER_NLM) {
           if (denoising == NULL) {
             denoising = new DenoisingTask(this, task);
             denoising->profiler = &kg->profiler;
@@ -1060,16 +1132,22 @@ class CPUDevice : public Device {
     tile.stride = task.stride;
     tile.buffers = task.buffers;
 
-    DenoisingTask denoising(this, task);
+    if (task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+      denoise_openimagedenoise(task, tile);
+    }
+    else {
+      DenoisingTask denoising(this, task);
 
-    ProfilingState denoising_profiler_state;
-    profiler.add_state(&denoising_profiler_state);
-    denoising.profiler = &denoising_profiler_state;
+      ProfilingState denoising_profiler_state;
+      profiler.add_state(&denoising_profiler_state);
+      denoising.profiler = &denoising_profiler_state;
 
-    denoise_nlm(denoising, tile);
-    task.update_progress(&tile, tile.w * tile.h);
+      denoise_nlm(denoising, tile);
 
-    profiler.remove_state(&denoising_profiler_state);
+      profiler.remove_state(&denoising_profiler_state);
+    }
+
+    task.update_progress(&tile, tile.w * tile.h);
   }
 
   void thread_film_convert(DeviceTask &task)
@@ -1143,10 +1221,17 @@ class CPUDevice : public Device {
     /* split task into smaller ones */
     list<DeviceTask> tasks;
 
-    if (task.type == DeviceTask::SHADER)
+    if (task.type == DeviceTask::DENOISE_BUFFER &&
+        task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+      /* Denoise entire buffer at once with OIDN, it has own threading. */
+      tasks.push_back(task);
+    }
+    else if (task.type == DeviceTask::SHADER) {
       task.split(tasks, info.cpu_threads, 256);
-    else
+    }
+    else {
       task.split(tasks, info.cpu_threads);
+    }
 
     foreach (DeviceTask &task, tasks) {
       task_pool.push([=] {
@@ -1351,6 +1436,9 @@ void device_cpu_info(vector<DeviceInfo> &devices)
   info.has_half_images = true;
   info.has_profiling = true;
   info.denoisers = DENOISER_NLM;
+  if (openimagedenoise_supported()) {
+    info.denoisers |= DENOISER_OPENIMAGEDENOISE;
+  }
 
   devices.insert(devices.begin(), info);
 }
index 1ad8e0d9485bfcfb9a47d78bad2a57f62313c04d..aeb5ff72e890995685a0556baef3d03181886751 100644 (file)
@@ -34,6 +34,7 @@ class Tile;
 enum DenoiserType {
   DENOISER_NLM = 1,
   DENOISER_OPTIX = 2,
+  DENOISER_OPENIMAGEDENOISE = 4,
   DENOISER_NUM,
 
   DENOISER_NONE = 0,
index f11722ac9a93bf4bd3ff05ee1955434dace71a59..c7531adcaf4f91d8169b2e57e98bc3c00abf0196 100644 (file)
@@ -1086,7 +1086,7 @@ void Session::update_status_time(bool show_pause, bool show_done)
        */
       substatus += string_printf(", Sample %d/%d", progress.get_current_sample(), num_samples);
     }
-    if (params.denoising.use) {
+    if (params.denoising.use && params.denoising.type != DENOISER_OPENIMAGEDENOISE) {
       substatus += string_printf(", Denoised %d tiles", progress.get_denoised_tiles());
     }
     else if (params.denoising.store_passes && params.denoising.type == DENOISER_NLM) {
index 4f66ced1c5afad309f564fb0ff2bc3b1f4c28a05..ad4ea9c86e0f629e55314d1766320855ab6e58ae 100644 (file)
@@ -86,6 +86,7 @@ set(SRC_HEADERS
   util_math_matrix.h
   util_md5.h
   util_murmurhash.h
+  util_openimagedenoise.h
   util_opengl.h
   util_optimization.h
   util_param.h
diff --git a/intern/cycles/util/util_openimagedenoise.h b/intern/cycles/util/util_openimagedenoise.h
new file mode 100644 (file)
index 0000000..aafa69c
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_OPENIMAGEDENOISE_H__
+#define __UTIL_OPENIMAGEDENOISE_H__
+
+#ifdef WITH_OPENIMAGEDENOISE
+#  include <OpenImageDenoise/oidn.hpp>
+#endif
+
+#include "util_system.h"
+
+CCL_NAMESPACE_BEGIN
+
+static inline bool openimagedenoise_supported()
+{
+#ifdef WITH_OPENIMAGEDENOISE
+  return system_cpu_support_sse41();
+#else
+  return false;
+#endif
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_OPENIMAGEDENOISE_H__ */