Cycles: limit number of processes compiling OpenCL kernel based on memory
authorBrecht Van Lommel <brecht@blender.org>
Wed, 25 Mar 2020 12:11:09 +0000 (13:11 +0100)
committerBrecht Van Lommel <brecht@blender.org>
Wed, 25 Mar 2020 15:39:37 +0000 (16:39 +0100)
The numbers here can probably be tweaked to be better, but it's hard to
predict and this should at least avoid excessive memory swapping.

Fixes T57064.

intern/cycles/device/opencl/device_opencl_impl.cpp
intern/cycles/device/opencl/opencl_util.cpp
intern/cycles/util/CMakeLists.txt
intern/cycles/util/util_semaphore.h [new file with mode: 0644]

index b7a2be79804791f9d641194e69ff77b5bd6c7f0d..2766f85d17c05cb2c07ffa9e7c8265e94c6f12ce 100644 (file)
@@ -257,16 +257,16 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
 
     /* Ordered with most complex kernels first, to reduce overall compile time. */
     ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter);
+    ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
+    ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
     if (requested_features.use_volume || is_preview) {
       ADD_SPLIT_KERNEL_PROGRAM(do_volume);
     }
+    ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
+    ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
+    ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
     ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl);
     ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao);
-    ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
-    ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
-    ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
-    ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
-    ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
 
     /* Quick kernels bundled in a single program to reduce overhead of starting
      * Blender processes. */
index 978c75d2e2c5c3ffbf99a0eb4bbc41749c847047..b8b07cf29475803f26d567c968e4ac10dd526fba 100644 (file)
@@ -23,6 +23,7 @@
 #  include "util/util_logging.h"
 #  include "util/util_md5.h"
 #  include "util/util_path.h"
+#  include "util/util_semaphore.h"
 #  include "util/util_system.h"
 #  include "util/util_time.h"
 
@@ -390,8 +391,27 @@ static void escape_python_string(string &str)
   string_replace(str, "'", "\'");
 }
 
+static int opencl_compile_process_limit()
+{
+  /* Limit number of concurrent processes compiling, with a heuristic based
+   * on total physical RAM and estimate of memory usage needed when compiling
+   * with all Cycles features enabled.
+   *
+   * This is somewhat arbitrary as we don't know the actual available RAM or
+   * how much the kernel compilation will needed depending on the features, but
+   * better than not limiting at all. */
+  static const int64_t GB = 1024LL * 1024LL * 1024LL;
+  static const int64_t process_memory = 2 * GB;
+  static const int64_t base_memory = 2 * GB;
+  static const int64_t system_memory = system_physical_ram();
+  static const int64_t process_limit = (system_memory - base_memory) / process_memory;
+
+  return max((int)process_limit, 1);
+}
+
 bool OpenCLDevice::OpenCLProgram::compile_separate(const string &clbin)
 {
+  /* Construct arguments. */
   vector<string> args;
   args.push_back("--background");
   args.push_back("--factory-startup");
@@ -419,14 +439,23 @@ bool OpenCLDevice::OpenCLProgram::compile_separate(const string &clbin)
       kernel_file_escaped.c_str(),
       clbin_escaped.c_str()));
 
-  double starttime = time_dt();
+  /* Limit number of concurrent processes compiling. */
+  static thread_counting_semaphore semaphore(opencl_compile_process_limit());
+  semaphore.acquire();
+
+  /* Compile. */
+  const double starttime = time_dt();
   add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
   add_log(string("Build flags: ") + kernel_build_options, true);
-  if (!system_call_self(args) || !path_exists(clbin)) {
+  const bool success = system_call_self(args);
+  const double elapsed = time_dt() - starttime;
+
+  semaphore.release();
+
+  if (!success || !path_exists(clbin)) {
     return false;
   }
 
-  double elapsed = time_dt() - starttime;
   add_log(
       string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed),
       false);
index ef100c124532f9b7c98ff80f646edeefb7282fc7..c1f71461dfd1c89c9ddd5a601ca69b0f98a596f6 100644 (file)
@@ -102,6 +102,7 @@ set(SRC_HEADERS
   util_sky_model_data.h
   util_avxf.h
   util_avxb.h
+  util_semaphore.h
   util_sseb.h
   util_ssef.h
   util_ssei.h
diff --git a/intern/cycles/util/util_semaphore.h b/intern/cycles/util/util_semaphore.h
new file mode 100644 (file)
index 0000000..d995b07
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_SEMAPHORE_H__
+#define __UTIL_SEMAPHORE_H__
+
+#include "util/util_thread.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Counting Semaphore
+ *
+ * To restrict concurrent access to a resource to a specified number
+ * of threads. Similar to std::counting_semaphore from C++20. */
+
+class thread_counting_semaphore {
+ public:
+  explicit thread_counting_semaphore(const int count) : count(count)
+  {
+  }
+
+  thread_counting_semaphore(const thread_counting_semaphore &) = delete;
+
+  void acquire()
+  {
+    thread_scoped_lock lock(mutex);
+    while (count == 0) {
+      condition.wait(lock);
+    }
+    count--;
+  }
+
+  void release()
+  {
+    thread_scoped_lock lock(mutex);
+    count++;
+    condition.notify_one();
+  }
+
+ protected:
+  thread_mutex mutex;
+  thread_condition_variable condition;
+  int count;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_SEMAPHORE_H__ */