Cycles: Always use guarded allocator of vectors
authorSergey Sharybin <sergey.vfx@gmail.com>
Sat, 6 Feb 2016 22:40:41 +0000 (03:40 +0500)
committerSergey Sharybin <sergey.vfx@gmail.com>
Fri, 12 Feb 2016 14:43:26 +0000 (15:43 +0100)
We don't have vectors re-allocation happening multiple times from inside
a loop anymore, so we can safely switch to a memory guarded allocator for
vectors and keep track on the memory usage at various stages of rendering.

Additionally, when building from inside Blender repository, Cycles will
use Blender's guarded allocator, so actual memory usage will be displayed
in the Space Info header.

There are couple of tricky aspects of the patch:

- TaskScheduler::exit() now explicitly frees memory used by `threads`.
  This is needed because `threads` is a static member which destructor
  isn't getting called on Blender's exit which caused memory leak print
  to happen.

  This shouldn't give any measurable speed issues, reallocation of that
  vector is only one of fewzillion other allocations happening during
  synchronization.

- Use regular guarded malloc (not aligned one). No idea why it was
  made to be aligned in the first place. Perhaps some corner case tests
  or so. Vector was never expected to be aligned anyway. Let's see if
  we'll have actual bugs with this.

Reviewers: dingto, lukasstockner97, juicyfruit, brecht

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D1774

14 files changed:
intern/cycles/CMakeLists.txt
intern/cycles/blender/addon/__init__.py
intern/cycles/blender/addon/engine.py
intern/cycles/blender/blender_python.cpp
intern/cycles/device/device.cpp
intern/cycles/device/device.h
intern/cycles/render/scene.cpp
intern/cycles/render/shader.cpp
intern/cycles/render/shader.h
intern/cycles/util/CMakeLists.txt
intern/cycles/util/util_guarded_allocator.h
intern/cycles/util/util_task.cpp
intern/cycles/util/util_task.h
intern/cycles/util/util_vector.h

index 2a0894522ebb345d8250813106452912ed831f87..179cba2ae5f40a974791ec3ddfb197c958a070e3 100644 (file)
@@ -210,6 +210,7 @@ endif()
 # Subdirectories
 
 if(WITH_CYCLES_BLENDER)
+       add_definitions(-DWITH_BLENDER_GUARDEDALLOC)
        add_subdirectory(blender)
 endif()
 
index c4ae6f90521a89790c129f092c35358ee4091899..8d4438cae24a4988c3163f4448d22a5b25043a16 100644 (file)
@@ -88,10 +88,17 @@ class CyclesRender(bpy.types.RenderEngine):
             self.report({'ERROR'}, "OSL support disabled in this build.")
 
 
+def engine_exit():
+    engine.exit()
+
+
 def register():
     from . import ui
     from . import properties
     from . import presets
+    import atexit
+
+    atexit.register(engine_exit)
 
     engine.init()
 
@@ -107,6 +114,7 @@ def unregister():
     from . import ui
     from . import properties
     from . import presets
+    import atexit
 
     bpy.app.handlers.version_update.remove(version_update.do_versions)
 
@@ -114,3 +122,6 @@ def unregister():
     properties.unregister()
     presets.unregister()
     bpy.utils.unregister_module(__name__)
+
+    atexit.unregister(engine_exit)
+    engine_exit()
index 42ec253613f900e3bd42f8e2f570cc30d0741c84..96dc3a59ef2df73319241f85a5f5ad8635e3934b 100644 (file)
@@ -74,6 +74,10 @@ def init():
     _cycles.init(path, user_path, bpy.app.background)
 
 
+def exit():
+    import _cycles
+    _cycles.exit()
+
 def create(engine, data, scene, region=None, v3d=None, rv3d=None, preview_osl=False):
     import bpy
     import _cycles
index 4792f9612a9d57deab2db8d0eac524372e65822b..27eab0c7f681184675534ef1f4f7678be18fcace 100644 (file)
@@ -39,6 +39,10 @@ CCL_NAMESPACE_BEGIN
 
 namespace {
 
+/* Device list stored static (used by compute_device_list()). */
+static ccl::vector<CCLDeviceInfo> device_list;
+static ccl::DeviceType device_type = DEVICE_NONE;
+
 /* Flag describing whether debug flags were synchronized from scene. */
 bool debug_flags_set = false;
 
@@ -172,6 +176,16 @@ static PyObject *init_func(PyObject * /*self*/, PyObject *args)
        Py_RETURN_NONE;
 }
 
+
+static PyObject *exit_func(PyObject * /*self*/, PyObject * /*args*/)
+{
+       ShaderManager::free_memory();
+       TaskScheduler::free_memory();
+       Device::free_memory();
+       device_list.free_memory();
+       Py_RETURN_NONE;
+}
+
 static PyObject *create_func(PyObject * /*self*/, PyObject *args)
 {
        PyObject *pyengine, *pyuserpref, *pydata, *pyscene, *pyregion, *pyv3d, *pyrv3d;
@@ -616,6 +630,7 @@ static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/
 
 static PyMethodDef methods[] = {
        {"init", init_func, METH_VARARGS, ""},
+       {"exit", exit_func, METH_VARARGS, ""},
        {"create", create_func, METH_VARARGS, ""},
        {"free", free_func, METH_O, ""},
        {"render", render_func, METH_O, ""},
@@ -648,10 +663,6 @@ static struct PyModuleDef module = {
 
 static CCLDeviceInfo *compute_device_list(DeviceType type)
 {
-       /* device list stored static */
-       static ccl::vector<CCLDeviceInfo> device_list;
-       static ccl::DeviceType device_type = DEVICE_NONE;
-
        /* create device list if it's not already done */
        if(type != device_type) {
                ccl::vector<DeviceInfo>& devices = ccl::Device::available_devices();
index 02e9b0551c9f1dc218ea33d405b3864dcd270666..90211b23aa1901246e206be260795b7f5e75f416 100644 (file)
@@ -34,6 +34,8 @@ CCL_NAMESPACE_BEGIN
 
 bool Device::need_types_update = true;
 bool Device::need_devices_update = true;
+vector<DeviceType> Device::types;
+vector<DeviceInfo> Device::devices;
 
 /* Device Requested Features */
 
@@ -280,8 +282,6 @@ string Device::string_from_type(DeviceType type)
 
 vector<DeviceType>& Device::available_types()
 {
-       static vector<DeviceType> types;
-
        if(need_types_update) {
                types.clear();
                types.push_back(DEVICE_CPU);
@@ -311,8 +311,6 @@ vector<DeviceType>& Device::available_types()
 
 vector<DeviceInfo>& Device::available_devices()
 {
-       static vector<DeviceInfo> devices;
-
        if(need_devices_update) {
                devices.clear();
 #ifdef WITH_CUDA
@@ -368,4 +366,10 @@ void Device::tag_update()
        need_devices_update = true;
 }
 
+void Device::free_memory()
+{
+       types.free_memory();
+       devices.free_memory();
+}
+
 CCL_NAMESPACE_END
index c53cd8887756437077c9c8a6ffd78edd2162e8f7..30d0003b94070f45e1768f9b0fac50c362e09cb4 100644 (file)
@@ -269,9 +269,12 @@ public:
        /* Tag devices lists for update. */
        static void tag_update();
 
+       static void free_memory();
 private:
        /* Indicted whether device types and devices lists were initialized. */
        static bool need_types_update, need_devices_update;
+       static vector<DeviceType> types;
+       static vector<DeviceInfo> devices;
 };
 
 CCL_NAMESPACE_END
index 25f812221ac33151020cfc1ada3144b24f7669b9..62951af7f6b2f52e770bc7ba12d5eb8a914fccb4 100644 (file)
 #include "tables.h"
 
 #include "util_foreach.h"
+#include "util_guarded_allocator.h"
+#include "util_logging.h"
 #include "util_progress.h"
 
-#ifdef WITH_CYCLES_DEBUG
-#  include "util_guarded_allocator.h"
-#  include "util_logging.h"
-#endif
-
 CCL_NAMESPACE_BEGIN
 
 Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_)
@@ -245,11 +242,9 @@ void Scene::device_update(Device *device_, Progress& progress)
                device->const_copy_to("__data", &dscene.data, sizeof(dscene.data));
        }
 
-#ifdef WITH_CYCLES_DEBUG
        VLOG(1) << "System memory statistics after full device sync:\n"
                << "  Usage: " << util_guarded_get_mem_used() << "\n"
                << "  Peak: " << util_guarded_get_mem_peak();
-#endif
 }
 
 Scene::MotionType Scene::need_motion(bool advanced_shading)
index 0b3509fa1b16a235a08eccbd3b7430eaddca7ce5..09a6061abea8561b4f784835deef3fb3f54fff5b 100644 (file)
@@ -531,5 +531,10 @@ void ShaderManager::get_requested_features(Scene *scene,
        }
 }
 
+void ShaderManager::free_memory()
+{
+       beckmann_table.free_memory();
+}
+
 CCL_NAMESPACE_END
 
index 8b3969de88e7989684e285606313a23872c699aa..d7692a2b6f5c75b6f33634b4dd3a9b7f9cb5a74a 100644 (file)
@@ -169,6 +169,8 @@ public:
        void get_requested_features(Scene *scene,
                                    DeviceRequestedFeatures *requested_features);
 
+       static void free_memory();
+
 protected:
        ShaderManager();
 
index 8367d21bfc6ebdda537cfc8ce3152935d08f93f3..d4f6a49953bbe24c56eacf3bec43365c11897e10 100644 (file)
@@ -40,8 +40,10 @@ set(SRC_HEADERS
        util_atomic.h
        util_boundbox.h
        util_debug.h
+       util_guarded_allocator.cpp
        util_foreach.h
        util_function.h
+       util_guarded_allocator.h
        util_half.h
        util_hash.h
        util_image.h
@@ -77,15 +79,6 @@ set(SRC_HEADERS
        util_xml.h
 )
 
-if(WITH_CYCLES_DEBUG)
-       list(APPEND SRC
-               util_guarded_allocator.cpp
-       )
-       list(APPEND SRC_HEADERS
-               util_guarded_allocator.h
-       )
-endif()
-
 include_directories(${INC})
 include_directories(SYSTEM ${INC_SYS})
 
index 2df717253e32752cbe4c113b7a67e5b77d1351ce..2cef14943690f7f2f0ba4a814d52af1f501608b8 100644 (file)
 #ifndef __UTIL_GUARDED_ALLOCATOR_H__
 #define __UTIL_GUARDED_ALLOCATOR_H__
 
-/* Define this in order to use Blender's guarded allocator to keep
- * track of allocated buffers, their sizes and peak memory usage.
- *
- * This is usually a bad level call, but it's really handy to keep
- * track of overall peak memory consumption during the scene
- * synchronization step.
- */
-#undef WITH_BLENDER_GUARDEDALLOC
-
+#include <cstddef>
 #include <memory>
 
+#include "util_debug.h"
 #include "util_types.h"
 
 #ifdef WITH_BLENDER_GUARDEDALLOC
@@ -42,39 +35,85 @@ void util_guarded_mem_free(size_t n);
 
 /* Guarded allocator for the use with STL. */
 template <typename T>
-class GuardedAllocator : public std::allocator<T> {
+class GuardedAllocator {
 public:
-       template<typename _Tp1>
-       struct rebind {
-               typedef GuardedAllocator<_Tp1> other;
-       };
+       typedef size_t size_type;
+       typedef ptrdiff_t difference_type;
+       typedef T *pointer;
+       typedef const T *const_pointer;
+       typedef T& reference;
+       typedef const T& const_reference;
+       typedef T value_type;
+
+       GuardedAllocator() {}
+       GuardedAllocator(const GuardedAllocator&) {}
 
        T *allocate(size_t n, const void *hint = 0)
        {
                util_guarded_mem_alloc(n * sizeof(T));
-#ifdef WITH_BLENDER_GUARDEDALLOC
                (void)hint;
-               return (T*)MEM_mallocN_aligned(n * sizeof(T), 16, "Cycles Alloc");
+#ifdef WITH_BLENDER_GUARDEDALLOC
+               if(n == 0) {
+                       return NULL;
+               }
+               return (T*)MEM_mallocN(n * sizeof(T), "Cycles Alloc");
 #else
-               return std::allocator<T>::allocate(n, hint);
+               return (T*)malloc(n * sizeof(T));
 #endif
        }
 
        void deallocate(T *p, size_t n)
        {
                util_guarded_mem_free(n * sizeof(T));
+               if(p != NULL) {
 #ifdef WITH_BLENDER_GUARDEDALLOC
-               MEM_freeN((void*)p);
+                       MEM_freeN(p);
 #else
-               std::allocator<T>::deallocate(p, n);
+                       free(p);
 #endif
+               }
+       }
+
+       T *address(T& x) const
+       {
+               return &x;
+       }
+
+       const T *address(const T& x) const
+       {
+               return &x;
+       }
+
+       GuardedAllocator<T>& operator=(const GuardedAllocator&)
+       {
+               return *this;
+       }
+
+       void construct(T *p, const T& val)
+       {
+               new ((T *)p) T(val);
+       }
+
+       void destroy(T *p)
+       {
+               p->~T();
+       }
+
+       size_t max_size() const
+       {
+               return size_t(-1);
        }
 
-       GuardedAllocator() : std::allocator<T>() {  }
-       GuardedAllocator(const GuardedAllocator &a) : std::allocator<T>(a) { }
        template <class U>
-       GuardedAllocator(const GuardedAllocator<U> &a) : std::allocator<T>(a) { }
-       ~GuardedAllocator() { }
+       struct rebind {
+               typedef GuardedAllocator<U> other;
+       };
+
+       template <class U>
+       GuardedAllocator(const GuardedAllocator<U>&) {}
+
+       template <class U>
+       GuardedAllocator& operator=(const GuardedAllocator<U>&) { return *this; }
 };
 
 /* Get memory usage and peak from the guarded STL allocator. */
index d56553d1d4a5fc9632533ae61f95f7360de011a5..5523f3738249ae3a801051bf1d629ce0b10f7bf4 100644 (file)
@@ -219,6 +219,12 @@ void TaskScheduler::exit()
        }
 }
 
+void TaskScheduler::free_memory()
+{
+       assert(users == 0);
+       threads.free_memory();
+}
+
 bool TaskScheduler::thread_wait_pop(Entry& entry)
 {
        thread_scoped_lock queue_lock(queue_mutex);
index debcff3b7767ce8136ee9c43918a84a2a8754619..a8e1963252a2ab346cfa2e9d5b696e755fc41dbe 100644 (file)
@@ -91,6 +91,7 @@ class TaskScheduler
 public:
        static void init(int num_threads = 0);
        static void exit();
+       static void free_memory();
 
        /* number of threads that can work on task */
        static int num_threads() { return threads.size(); }
index 623436483a068259665bbac7a31cd683cf9c5627..830aa15291dce3dc8584d97362e375743617bf40 100644 (file)
 #include <vector>
 
 #include "util_aligned_malloc.h"
+#include "util_guarded_allocator.h"
 #include "util_types.h"
 
-#ifdef WITH_CYCLES_DEBUG
-#  include "util_guarded_allocator.h"
-#endif
-
 CCL_NAMESPACE_BEGIN
 
 /* Vector
  *
  * Own subclass-ed vestion of std::vector. Subclass is needed because:
  *
- * - When building with WITH_CYCLES_DEBUG we need to use own allocator which
- *   keeps track of used/peak memory.
+ * - Use own allocator which keeps track of used/peak memory.
  *
  * - Have method to ensure capacity is re-set to 0.
  */
 template<typename value_type,
-#ifdef WITH_CYCLES_DEBUG
-         typename allocator_type = GuardedAllocator<value_type>
-#else
-         typename allocator_type = std::allocator<value_type>
-#endif
-        >
+         typename allocator_type = GuardedAllocator<value_type> >
 class vector : public std::vector<value_type, allocator_type>
 {
 public: