Cycles: threading optimizations
authorBrecht Van Lommel <brechtvanlommel@pandora.be>
Sat, 5 May 2012 19:44:33 +0000 (19:44 +0000)
committerBrecht Van Lommel <brechtvanlommel@pandora.be>
Sat, 5 May 2012 19:44:33 +0000 (19:44 +0000)
* Multithreaded image loading, each thread can load a separate image.
* Better multithreading for multiple instanced meshes, different threads can now
  build BVH's for different meshes, rather than all cooperating on the same mesh.
  Especially noticeable for dynamic BVH building for the viewport, gave about
  2x faster build on 8 core in fairly complex scene with many objects.
* The main thread waiting for worker threads can now also work itself, so
  (num_cores + 1) threads will be working, this supposedly gives better
  performance on some operating systems, but did not measure performance for
  this very detailed yet.

intern/cycles/bvh/bvh_build.cpp
intern/cycles/bvh/bvh_build.h
intern/cycles/device/device_cpu.cpp
intern/cycles/render/image.cpp
intern/cycles/render/image.h
intern/cycles/render/mesh.cpp
intern/cycles/render/mesh.h
intern/cycles/util/util_task.cpp
intern/cycles/util/util_task.h

index d865426304abc7d8b56afb5494eac425c2560fb7..28237aea611b0b82ab12263e8e57819a707422d3 100644 (file)
@@ -36,12 +36,12 @@ CCL_NAMESPACE_BEGIN
 
 class BVHBuildTask : public Task {
 public:
-       BVHBuildTask(InnerNode *node_, int child_, BVHObjectBinning& range_, int level_)
-       : node(node_), child(child_), level(level_), range(range_) {}
+       BVHBuildTask(BVHBuild *build, InnerNode *node, int child, BVHObjectBinning& range_, int level)
+       : range(range_)
+       {
+               run = function_bind(&BVHBuild::thread_build_node, build, node, child, &range, level);
+       }
 
-       InnerNode *node;
-       int child;
-       int level;
        BVHObjectBinning range;
 };
 
@@ -55,8 +55,7 @@ BVHBuild::BVHBuild(const vector<Object*>& objects_,
   prim_object(prim_object_),
   params(params_),
   progress(progress_),
-  progress_start_time(0.0),
-  task_pool(function_bind(&BVHBuild::thread_build_node, this, _1, _2))
+  progress_start_time(0.0)
 {
        spatial_min_overlap = 0.0f;
 }
@@ -177,7 +176,7 @@ BVHNode* BVHBuild::run()
                /* multithreaded binning build */
                BVHObjectBinning rootbin(root, (references.size())? &references[0]: NULL);
                rootnode = build_node(rootbin, 0);
-               task_pool.wait();
+               task_pool.wait_work();
        }
 
        /* delete if we cancelled */
@@ -210,25 +209,24 @@ void BVHBuild::progress_update()
        progress_start_time = time_dt(); 
 }
 
-void BVHBuild::thread_build_node(Task *task_, int thread_id)
+void BVHBuild::thread_build_node(InnerNode *inner, int child, BVHObjectBinning *range, int level)
 {
        if(progress.get_cancel())
                return;
 
        /* build nodes */
-       BVHBuildTask *task = (BVHBuildTask*)task_;
-       BVHNode *node = build_node(task->range, task->level);
+       BVHNode *node = build_node(*range, level);
 
        /* set child in inner node */
-       task->node->children[task->child] = node;
+       inner->children[child] = node;
 
        /* update progress */
-       if(task->range.size() < THREAD_TASK_SIZE) {
+       if(range->size() < THREAD_TASK_SIZE) {
                /*rotate(node, INT_MAX, 5);*/
 
                thread_scoped_lock lock(build_mutex);
 
-               progress_count += task->range.size();
+               progress_count += range->size();
                progress_update();
        }
 }
@@ -262,8 +260,8 @@ BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
                /* threaded build */
                inner = new InnerNode(range.bounds());
 
-               task_pool.push(new BVHBuildTask(inner, 0, left, level + 1), true);
-               task_pool.push(new BVHBuildTask(inner, 1, right, level + 1), true);
+               task_pool.push(new BVHBuildTask(this, inner, 0, left, level + 1), true);
+               task_pool.push(new BVHBuildTask(this, inner, 1, right, level + 1), true);
        }
 
        return inner;
index 84e14632b4b34ce099e499f9e7b82f496ec69e8c..44ef918b3269fe0c187e42caa4f9a9c89a29ff99 100644 (file)
@@ -29,7 +29,9 @@
 
 CCL_NAMESPACE_BEGIN
 
+class BVHBuildTask;
 class BVHParams;
+class InnerNode;
 class Mesh;
 class Object;
 class Progress;
@@ -54,6 +56,7 @@ protected:
        friend class BVHMixedSplit;
        friend class BVHObjectSplit;
        friend class BVHSpatialSplit;
+       friend class BVHBuildTask;
 
        /* adding references */
        void add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
@@ -68,7 +71,7 @@ protected:
 
        /* threads */
        enum { THREAD_TASK_SIZE = 4096 };
-       void thread_build_node(Task *task_, int thread_id);
+       void thread_build_node(InnerNode *node, int child, BVHObjectBinning *range, int level);
        thread_mutex build_mutex;
 
        /* progress */
index ec84047c44fe8e9fb88489de27a72f810b85cc91..07988d32aff4a4d83c9c8cac9d80cd63dae8ec1e 100644 (file)
@@ -44,7 +44,6 @@ public:
        KernelGlobals *kg;
        
        CPUDevice(int threads_num)
-       : task_pool(function_bind(&CPUDevice::thread_run, this, _1, _2))
        {
                kg = kernel_globals_create();
 
@@ -113,10 +112,8 @@ public:
 #endif
        }
 
-       void thread_run(Task *task_, int thread_id)
+       void thread_run(DeviceTask *task)
        {
-               DeviceTask *task = (DeviceTask*)task_;
-
                if(task->type == DeviceTask::PATH_TRACE)
                        thread_path_trace(*task);
                else if(task->type == DeviceTask::TONEMAP)
@@ -125,6 +122,15 @@ public:
                        thread_shader(*task);
        }
 
+       class CPUDeviceTask : public DeviceTask {
+       public:
+               CPUDeviceTask(CPUDevice *device, DeviceTask& task)
+               : DeviceTask(task)
+               {
+                       run = function_bind(&CPUDevice::thread_run, device, this);
+               }
+       };
+
        void thread_path_trace(DeviceTask& task)
        {
                if(task_pool.cancelled())
@@ -226,12 +232,12 @@ public:
                task.split(tasks, TaskScheduler::num_threads()*10);
 
                foreach(DeviceTask& task, tasks)
-                       task_pool.push(new DeviceTask(task));
+                       task_pool.push(new CPUDeviceTask(this, task));
        }
 
        void task_wait()
        {
-               task_pool.wait();
+               task_pool.wait_work();
        }
 
        void task_cancel()
index 78b8f06c7b468cb419eb1ace34053fbea5276a02..3e6052338c0c65d617dae1ab85f99e5ce2ac7721 100644 (file)
@@ -324,8 +324,10 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_
        return true;
 }
 
-void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int slot)
+void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int slot, Progress *progress)
 {
+       if(progress->get_cancel())
+               return;
        if(osl_texture_system)
                return;
 
@@ -342,6 +344,9 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
        }
 
        if(is_float) {
+               string filename = path_filename(float_images[slot]->filename);
+               progress->set_status("Updating Images", "Loading " + filename);
+
                device_vector<float4>& tex_img = dscene->tex_float_image[slot - TEX_IMAGE_FLOAT_START];
 
                if(tex_img.device_pointer)
@@ -365,6 +370,9 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
                device->tex_alloc(name.c_str(), tex_img, true, true);
        }
        else {
+               string filename = path_filename(images[slot]->filename);
+               progress->set_status("Updating Images", "Loading " + filename);
+
                device_vector<uchar4>& tex_img = dscene->tex_image[slot];
 
                if(tex_img.device_pointer)
@@ -387,6 +395,8 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
 
                device->tex_alloc(name.c_str(), tex_img, true, true);
        }
+
+       img->need_load = false;
 }
 
 void ImageManager::device_free_image(Device *device, DeviceScene *dscene, int slot)
@@ -431,39 +441,37 @@ void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress&
 {
        if(!need_update)
                return;
+       
+       TaskPool pool;
 
        for(size_t slot = 0; slot < images.size(); slot++) {
-               if(images[slot]) {
-                       if(images[slot]->users == 0) {
-                               device_free_image(device, dscene, slot);
-                       }
-                       else if(images[slot]->need_load) {
-                               string name = path_filename(images[slot]->filename);
-                               progress.set_status("Updating Images", "Loading " + name);
-                               device_load_image(device, dscene, slot);
-                               images[slot]->need_load = false;
-                       }
+               if(!images[slot])
+                       continue;
 
-                       if(progress.get_cancel()) return;
+               if(images[slot]->users == 0) {
+                       device_free_image(device, dscene, slot);
+               }
+               else if(images[slot]->need_load) {
+                       if(!osl_texture_system) 
+                               pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot, &progress));
                }
        }
 
        for(size_t slot = 0; slot < float_images.size(); slot++) {
-               if(float_images[slot]) {
-                       if(float_images[slot]->users == 0) {
-                               device_free_image(device, dscene, slot + TEX_IMAGE_FLOAT_START);
-                       }
-                       else if(float_images[slot]->need_load) {
-                               string name = path_filename(float_images[slot]->filename);
-                               progress.set_status("Updating Images", "Loading " + name);
-                               device_load_image(device, dscene, slot + TEX_IMAGE_FLOAT_START);
-                               float_images[slot]->need_load = false;
-                       }
+               if(!float_images[slot])
+                       continue;
 
-                       if(progress.get_cancel()) return;
+               if(float_images[slot]->users == 0) {
+                       device_free_image(device, dscene, slot + TEX_IMAGE_FLOAT_START);
+               }
+               else if(float_images[slot]->need_load) {
+                       if(!osl_texture_system) 
+                               pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot + TEX_IMAGE_FLOAT_START, &progress));
                }
        }
 
+       pool.wait_work();
+
        need_update = false;
 }
 
index d789e6885e3a6f420c955c53b0fb43af3ea681b5..cc01b4a8e4cbbe9458aabd0d584bf963741f01de 100644 (file)
@@ -65,7 +65,7 @@ private:
        bool file_load_image(Image *img, device_vector<uchar4>& tex_img);
        bool file_load_float_image(Image *img, device_vector<float4>& tex_img);
 
-       void device_load_image(Device *device, DeviceScene *dscene, int slot);
+       void device_load_image(Device *device, DeviceScene *dscene, int slot, Progress *progess);
        void device_free_image(Device *device, DeviceScene *dscene, int slot);
 };
 
index 5d96611ff26a3c7e9918c43468dec49f171dcb2e..0422f97a7066dfd46fe4df303dbfd2c97c979520 100644 (file)
@@ -242,31 +242,47 @@ void Mesh::pack_verts(float4 *tri_verts, float4 *tri_vindex, size_t vert_offset)
        }
 }
 
-void Mesh::compute_bvh(SceneParams *params, Progress& progress)
+void Mesh::compute_bvh(SceneParams *params, Progress *progress, int n, int total)
 {
-       Object object;
-       object.mesh = this;
+       if(progress->get_cancel())
+               return;
 
-       vector<Object*> objects;
-       objects.push_back(&object);
+       compute_bounds();
 
-       if(bvh && !need_update_rebuild) {
-               progress.set_substatus("Refitting BVH");
-               bvh->objects = objects;
-               bvh->refit(progress);
-       }
-       else {
-               progress.set_substatus("Building BVH");
+       if(!transform_applied) {
+               string msg = "Updating Mesh BVH ";
+               if(name == "")
+                       msg += string_printf("%u/%u", (uint)(n+1), (uint)total);
+               else
+                       msg += string_printf("%s %u/%u", name.c_str(), (uint)(n+1), (uint)total);
+
+               Object object;
+               object.mesh = this;
+
+               vector<Object*> objects;
+               objects.push_back(&object);
 
-               BVHParams bparams;
-               bparams.use_cache = params->use_bvh_cache;
-               bparams.use_spatial_split = params->use_bvh_spatial_split;
-               bparams.use_qbvh = params->use_qbvh;
+               if(bvh && !need_update_rebuild) {
+                       progress->set_status(msg, "Refitting BVH");
+                       bvh->objects = objects;
+                       bvh->refit(*progress);
+               }
+               else {
+                       progress->set_status(msg, "Building BVH");
+
+                       BVHParams bparams;
+                       bparams.use_cache = params->use_bvh_cache;
+                       bparams.use_spatial_split = params->use_bvh_spatial_split;
+                       bparams.use_qbvh = params->use_qbvh;
 
-               delete bvh;
-               bvh = BVH::create(bparams, objects);
-               bvh->build(progress);
+                       delete bvh;
+                       bvh = BVH::create(bparams, objects);
+                       bvh->build(*progress);
+               }
        }
+
+       need_update = false;
+       need_update_rebuild = false;
 }
 
 void Mesh::tag_update(Scene *scene, bool rebuild)
@@ -686,35 +702,22 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
        }
 
        /* update bvh */
-       size_t i = 0, num_instance_bvh = 0;
+       size_t i = 0, num_bvh = 0;
 
        foreach(Mesh *mesh, scene->meshes)
                if(mesh->need_update && !mesh->transform_applied)
-                       num_instance_bvh++;
+                       num_bvh++;
+
+       TaskPool pool;
 
        foreach(Mesh *mesh, scene->meshes) {
                if(mesh->need_update) {
-                       mesh->compute_bounds();
-
-                       if(!mesh->transform_applied) {
-                               string msg = "Updating Mesh BVH ";
-                               if(mesh->name == "")
-                                       msg += string_printf("%u/%u", (uint)(i+1), (uint)num_instance_bvh);
-                               else
-                                       msg += string_printf("%s %u/%u", mesh->name.c_str(), (uint)(i+1), (uint)num_instance_bvh);
-                               progress.set_status(msg, "Building BVH");
-
-                               mesh->compute_bvh(&scene->params, progress);
-
-                               i++;
-                       }
-
-                       if(progress.get_cancel()) return;
-
-                       mesh->need_update = false;
-                       mesh->need_update_rebuild = false;
+                       pool.push(function_bind(&Mesh::compute_bvh, mesh, &scene->params, &progress, i, num_bvh));
+                       i++;
                }
        }
+
+       pool.wait_work();
        
        foreach(Shader *shader, scene->shaders)
                shader->need_update_attributes = false;
index 047a2d2624d420a22f9bc3b1696b98dc785230e4..637143f5adf41916f8e67723d3d7a9a0c69a96a9 100644 (file)
@@ -96,7 +96,7 @@ public:
 
        void pack_normals(Scene *scene, float4 *normal, float4 *vnormal);
        void pack_verts(float4 *tri_verts, float4 *tri_vindex, size_t vert_offset);
-       void compute_bvh(SceneParams *params, Progress& progress);
+       void compute_bvh(SceneParams *params, Progress *progress, int n, int total);
 
        bool need_attribute(Scene *scene, AttributeStandard std);
        bool need_attribute(Scene *scene, ustring name);
index 6da9a70ec0c68b034dcd7432bdf09f7075f029a0..fc806275ea42257dd33d5ce5502daff5b1fb8203 100644 (file)
@@ -25,14 +25,12 @@ CCL_NAMESPACE_BEGIN
 
 /* Task Pool */
 
-TaskPool::TaskPool(const TaskRunFunction& run_)
+TaskPool::TaskPool()
 {
        num = 0;
        num_done = 0;
 
        do_cancel = false;
-
-       run = run_;
 }
 
 TaskPool::~TaskPool()
@@ -50,12 +48,55 @@ void TaskPool::push(Task *task, bool front)
        TaskScheduler::push(entry, front);
 }
 
-void TaskPool::wait()
+void TaskPool::push(const TaskRunFunction& run, bool front)
 {
-       thread_scoped_lock lock(done_mutex);
+       push(new Task(run), front);
+}
+
+void TaskPool::wait_work()
+{
+       thread_scoped_lock done_lock(done_mutex);
+
+       while(num_done != num) {
+               thread_scoped_lock queue_lock(TaskScheduler::queue_mutex);
+
+               /* find task from this pool. if we get a task from another pool,
+                * we can get into deadlock */
+               TaskScheduler::Entry work_entry;
+               bool found_entry = false;
+               list<TaskScheduler::Entry>::iterator it;
+
+               for(it = TaskScheduler::queue.begin(); it != TaskScheduler::queue.end(); it++) {
+                       TaskScheduler::Entry& entry = *it;
+
+                       if(entry.pool == this) {
+                               work_entry = entry;
+                               found_entry = true;
+                               TaskScheduler::queue.erase(it);
+                               break;
+                       }
+               }
+
+               queue_lock.unlock();
+
+               /* if found task, do it, otherwise wait until other tasks are done */
+               if(found_entry) {
+                       done_lock.unlock();
+
+                       /* run task */
+                       work_entry.task->run();
+
+                       /* delete task */
+                       delete work_entry.task;
 
-       while(num_done != num)
-               done_cond.wait(lock);
+                       /* notify pool task was done */
+                       done_increase(1);
+
+                       done_lock.lock();
+               }
+               else
+                       done_cond.wait(done_lock);
+       }
 }
 
 void TaskPool::cancel()
@@ -63,7 +104,12 @@ void TaskPool::cancel()
        TaskScheduler::clear(this);
 
        do_cancel = true;
-       wait();
+       {
+               thread_scoped_lock lock(done_mutex);
+
+               while(num_done != num)
+                       done_cond.wait(lock);
+       }
        do_cancel = false;
 }
 
@@ -94,6 +140,7 @@ void TaskPool::done_increase(int done)
 thread_mutex TaskScheduler::mutex;
 int TaskScheduler::users = 0;
 vector<thread*> TaskScheduler::threads;
+vector<int> TaskScheduler::thread_level;
 volatile bool TaskScheduler::do_exit = false;
 
 list<TaskScheduler::Entry> TaskScheduler::queue;
@@ -114,9 +161,12 @@ void TaskScheduler::init(int num_threads)
                        num_threads = system_cpu_thread_count();
 
                threads.resize(num_threads);
+               thread_level.resize(num_threads);
 
-               for(size_t i = 0; i < threads.size(); i++)
+               for(size_t i = 0; i < threads.size(); i++) {
                        threads[i] = new thread(function_bind(&TaskScheduler::thread_run, i));
+                       thread_level[i] = 0;
+               }
        }
        
        users++;
@@ -140,6 +190,7 @@ void TaskScheduler::exit()
                }
 
                threads.clear();
+               thread_level.clear();
        }
 }
 
@@ -170,7 +221,7 @@ void TaskScheduler::thread_run(int thread_id)
        /* keep popping off tasks */
        while(thread_wait_pop(entry)) {
                /* run task */
-               entry.pool->run(entry.task, thread_id);
+               entry.task->run();
 
                /* delete task */
                delete entry.task;
@@ -196,20 +247,20 @@ void TaskScheduler::push(Entry& entry, bool front)
 
 void TaskScheduler::clear(TaskPool *pool)
 {
-       thread_scoped_lock lock(TaskScheduler::queue_mutex);
+       thread_scoped_lock lock(queue_mutex);
 
        /* erase all tasks from this pool from the queue */
-       list<TaskScheduler::Entry>::iterator it = TaskScheduler::queue.begin();
+       list<Entry>::iterator it = queue.begin();
        int done = 0;
 
-       while(it != TaskScheduler::queue.end()) {
-               TaskScheduler::Entry& entry = *it;
+       while(it != queue.end()) {
+               Entry& entry = *it;
 
                if(entry.pool == pool) {
                        done++;
                        delete entry.task;
 
-                       it = TaskScheduler::queue.erase(it);
+                       it = queue.erase(it);
                }
                else
                        it++;
index acdb2cb50a20d6932072f38296cf57aa863731bc..6b7562c22674ae615387ed450244262cedd49a5c 100644 (file)
@@ -29,7 +29,7 @@ class Task;
 class TaskPool;
 class TaskScheduler;
 
-typedef boost::function<void(Task*,int)> TaskRunFunction;
+typedef boost::function<void(void)> TaskRunFunction;
 
 /* Task
  *
@@ -39,7 +39,11 @@ class Task
 {
 public:
        Task() {};
+       Task(const TaskRunFunction& run_) : run(run_) {}
+
        virtual ~Task() {}
+
+       TaskRunFunction run;
 };
 
 /* Task Pool
@@ -54,12 +58,13 @@ public:
 class TaskPool
 {
 public:
-       TaskPool(const TaskRunFunction& run);
+       TaskPool();
        ~TaskPool();
 
        void push(Task *task, bool front = false);
+       void push(const TaskRunFunction& run, bool front = false);
 
-       void wait();            /* wait until all tasks are done */
+       void wait_work();       /* work and wait until all tasks are done */
        void cancel();          /* cancel all tasks, keep worker threads running */
        void stop();            /* stop all worker threads */
 
@@ -70,8 +75,6 @@ protected:
 
        void done_increase(int done);
 
-       TaskRunFunction run;
-
        thread_mutex done_mutex;
        thread_condition_variable done_cond;
 
@@ -103,6 +106,7 @@ protected:
        static thread_mutex mutex;
        static int users;
        static vector<thread*> threads;
+       static vector<int> thread_level;
        static volatile bool do_exit;
 
        static list<Entry> queue;