Cycles: Implement threaded SVM nodes compilation
authorSergey Sharybin <sergey.vfx@gmail.com>
Fri, 9 Sep 2016 10:27:51 +0000 (12:27 +0200)
committerSergey Sharybin <sergey.vfx@gmail.com>
Tue, 13 Sep 2016 11:46:49 +0000 (13:46 +0200)
The title says it all actually. From tests with barber shop scene here
gives 2-3x speedup for shader compilation on my oldie i7 machine. The
gain is mainly due to textures metadata query from jpeg files (which
seems to requite de-compression before metadata can be read). But in
theory could give nice improvements for scenes with huge node trees
as well (i'm talking about node trees of complexity of fractal which
we had reports about in the past).

Reviewers: juicyfruit, dingto, lukasstockner97, brecht

Reviewed By: brecht

Subscribers: monio, Blendify

Differential Revision: https://developer.blender.org/D2215

intern/cycles/render/image.cpp
intern/cycles/render/svm.cpp
intern/cycles/render/svm.h

index 24543601ef9bacc2ea8ac4b4bc330860857a859d..b387c2c2f981698358a108377cb0d21a18cee798 100644 (file)
@@ -280,6 +280,8 @@ int ImageManager::add_image(const string& filename,
 
        ImageDataType type = get_image_metadata(filename, builtin_data, is_linear);
 
+       thread_scoped_lock device_lock(device_mutex);
+
        /* Do we have a float? */
        if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4)
                is_float = true;
index 8b3affb011eb9df2be4c04a4430e97f782b1e38d..9d3f49a3c84fd1887735897c5598a71fab401f49 100644 (file)
@@ -27,6 +27,7 @@
 #include "util_logging.h"
 #include "util_foreach.h"
 #include "util_progress.h"
+#include "util_task.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -44,6 +45,51 @@ void SVMShaderManager::reset(Scene * /*scene*/)
 {
 }
 
+void SVMShaderManager::device_update_shader(Scene *scene,
+                                            Shader *shader,
+                                            Progress *progress,
+                                            vector<int4> *global_svm_nodes)
+{
+       if(progress->get_cancel()) {
+               return;
+       }
+       assert(shader->graph);
+
+       vector<int4> svm_nodes;
+       svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
+
+       SVMCompiler::Summary summary;
+       SVMCompiler compiler(scene->shader_manager, scene->image_manager);
+       compiler.background = (shader == scene->default_background);
+       compiler.compile(scene, shader, svm_nodes, 0, &summary);
+
+       VLOG(2) << "Compilation summary:\n"
+               << "Shader name: " << shader->name << "\n"
+               << summary.full_report();
+
+       if(shader->use_mis && shader->has_surface_emission) {
+               scene->light_manager->need_update = true;
+       }
+
+       /* We only calculate offset and do re-allocation from the locked block,
+        * actual copy we do after the lock is releases to hopefully gain some
+        * percent of performance.
+        */
+       nodes_lock_.lock();
+       size_t global_nodes_size = global_svm_nodes->size();
+       global_svm_nodes->resize(global_nodes_size + svm_nodes.size());
+       nodes_lock_.unlock();
+       /* Offset local SVM nodes to a global address space. */
+       int4& jump_node = global_svm_nodes->at(shader->id);
+       jump_node.y = svm_nodes[0].y + global_nodes_size - 1;
+       jump_node.z = svm_nodes[0].z + global_nodes_size - 1;
+       jump_node.w = svm_nodes[0].w + global_nodes_size - 1;
+       /* Copy new nodes to global storage. */
+       memcpy(&global_svm_nodes->at(global_nodes_size),
+              &svm_nodes[1],
+              sizeof(int4) * (svm_nodes.size() - 1));
+}
+
 void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
 {
        if(!need_update)
@@ -67,23 +113,20 @@ void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene
                svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
        }
 
+       TaskPool task_pool;
        foreach(Shader *shader, scene->shaders) {
-               if(progress.get_cancel()) return;
-
-               assert(shader->graph);
-
-               SVMCompiler::Summary summary;
-               SVMCompiler compiler(scene->shader_manager, scene->image_manager);
-               compiler.background = (shader == scene->default_background);
-               compiler.compile(scene, shader, svm_nodes, shader->id, &summary);
-
-               if(shader->use_mis && shader->has_surface_emission) {
-                       scene->light_manager->need_update = true;
-               }
+               task_pool.push(function_bind(&SVMShaderManager::device_update_shader,
+                                            this,
+                                            scene,
+                                            shader,
+                                            &progress,
+                                            &svm_nodes),
+                              false);
+       }
+       task_pool.wait_work();
 
-               VLOG(2) << "Compilation summary:\n"
-                       << "Shader name: " << shader->name << "\n"
-                       << summary.full_report();
+       if(progress.get_cancel()) {
+               return;
        }
 
        dscene->svm_nodes.copy((uint4*)&svm_nodes[0], svm_nodes.size());
@@ -323,17 +366,17 @@ uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
 
 void SVMCompiler::add_node(int a, int b, int c, int d)
 {
-       svm_nodes.push_back(make_int4(a, b, c, d));
+       current_svm_nodes.push_back(make_int4(a, b, c, d));
 }
 
 void SVMCompiler::add_node(ShaderNodeType type, int a, int b, int c)
 {
-       svm_nodes.push_back(make_int4(type, a, b, c));
+       current_svm_nodes.push_back(make_int4(type, a, b, c));
 }
 
 void SVMCompiler::add_node(ShaderNodeType type, const float3& f)
 {
-       svm_nodes.push_back(make_int4(type,
+       current_svm_nodes.push_back(make_int4(type,
                __float_as_int(f.x),
                __float_as_int(f.y),
                __float_as_int(f.z)));
@@ -341,7 +384,7 @@ void SVMCompiler::add_node(ShaderNodeType type, const float3& f)
 
 void SVMCompiler::add_node(const float4& f)
 {
-       svm_nodes.push_back(make_int4(
+       current_svm_nodes.push_back(make_int4(
                __float_as_int(f.x),
                __float_as_int(f.y),
                __float_as_int(f.z),
@@ -578,26 +621,38 @@ void SVMCompiler::generate_multi_closure(ShaderNode *root_node,
 
                        /* generate instructions for input closure 1 */
                        if(cl1in->link) {
-                               /* add instruction to skip closure and its dependencies if mix weight is zero */
-                               svm_nodes.push_back(make_int4(NODE_JUMP_IF_ONE, 0, stack_assign(facin), 0));
-                               int node_jump_skip_index = svm_nodes.size() - 1;
+                               /* Add instruction to skip closure and its dependencies if mix
+                                * weight is zero.
+                                */
+                               current_svm_nodes.push_back(make_int4(NODE_JUMP_IF_ONE,
+                                                                     0,
+                                                                     stack_assign(facin),
+                                                                     0));
+                               int node_jump_skip_index = current_svm_nodes.size() - 1;
 
                                generate_multi_closure(root_node, cl1in->link->parent, state);
 
-                               /* fill in jump instruction location to be after closure */
-                               svm_nodes[node_jump_skip_index].y = svm_nodes.size() - node_jump_skip_index - 1;
+                               /* Fill in jump instruction location to be after closure. */
+                               current_svm_nodes[node_jump_skip_index].y =
+                                       current_svm_nodes.size() - node_jump_skip_index - 1;
                        }
 
                        /* generate instructions for input closure 2 */
                        if(cl2in->link) {
-                               /* add instruction to skip closure and its dependencies if mix weight is zero */
-                               svm_nodes.push_back(make_int4(NODE_JUMP_IF_ZERO, 0, stack_assign(facin), 0));
-                               int node_jump_skip_index = svm_nodes.size() - 1;
+                               /* Add instruction to skip closure and its dependencies if mix
+                                * weight is zero.
+                                */
+                               current_svm_nodes.push_back(make_int4(NODE_JUMP_IF_ZERO,
+                                                                     0,
+                                                                     stack_assign(facin),
+                                                                     0));
+                               int node_jump_skip_index = current_svm_nodes.size() - 1;
 
                                generate_multi_closure(root_node, cl2in->link->parent, state);
 
-                               /* fill in jump instruction location to be after closure */
-                               svm_nodes[node_jump_skip_index].y = svm_nodes.size() - node_jump_skip_index - 1;
+                               /* Fill in jump instruction location to be after closure. */
+                               current_svm_nodes[node_jump_skip_index].y =
+                                       current_svm_nodes.size() - node_jump_skip_index - 1;
                        }
 
                        /* unassign */
@@ -667,7 +722,7 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
 
        /* clear all compiler state */
        memset(&active_stack, 0, sizeof(active_stack));
-       svm_nodes.clear();
+       current_svm_nodes.clear();
 
        foreach(ShaderNode *node_iter, graph->nodes) {
                foreach(ShaderInput *input, node_iter->inputs)
@@ -727,7 +782,7 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
 
        /* if compile failed, generate empty shader */
        if(compile_failed) {
-               svm_nodes.clear();
+               current_svm_nodes.clear();
                compile_failed = false;
        }
 
@@ -739,13 +794,13 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
 
 void SVMCompiler::compile(Scene *scene,
                           Shader *shader,
-                          vector<int4>& global_svm_nodes,
+                          vector<int4>& svm_nodes,
                           int index,
                           Summary *summary)
 {
        /* copy graph for shader with bump mapping */
        ShaderNode *node = shader->graph->output();
-       int start_num_svm_nodes = global_svm_nodes.size();
+       int start_num_svm_nodes = svm_nodes.size();
 
        const double time_start = time_dt();
 
@@ -789,8 +844,10 @@ void SVMCompiler::compile(Scene *scene,
        if(shader->displacement_method != DISPLACE_TRUE && shader->graph_bump) {
                scoped_timer timer((summary != NULL)? &summary->time_generate_bump: NULL);
                compile_type(shader, shader->graph_bump, SHADER_TYPE_BUMP);
-               global_svm_nodes[index].y = global_svm_nodes.size();
-               global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
+               svm_nodes[index].y = svm_nodes.size();
+               svm_nodes.insert(svm_nodes.end(),
+                                current_svm_nodes.begin(),
+                                current_svm_nodes.end());
        }
 
        /* generate surface shader */
@@ -799,32 +856,38 @@ void SVMCompiler::compile(Scene *scene,
                compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
                /* only set jump offset if there's no bump shader, as the bump shader will fall thru to this one if it exists */
                if(shader->displacement_method == DISPLACE_TRUE || !shader->graph_bump) {
-                       global_svm_nodes[index].y = global_svm_nodes.size();
+                       svm_nodes[index].y = svm_nodes.size();
                }
-               global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
+               svm_nodes.insert(svm_nodes.end(),
+                                current_svm_nodes.begin(),
+                                current_svm_nodes.end());
        }
 
        /* generate volume shader */
        {
                scoped_timer timer((summary != NULL)? &summary->time_generate_volume: NULL);
                compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
-               global_svm_nodes[index].z = global_svm_nodes.size();
-               global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
+               svm_nodes[index].z = svm_nodes.size();
+               svm_nodes.insert(svm_nodes.end(),
+                                current_svm_nodes.begin(),
+                                current_svm_nodes.end());
        }
 
        /* generate displacement shader */
        {
                scoped_timer timer((summary != NULL)? &summary->time_generate_displacement: NULL);
                compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
-               global_svm_nodes[index].w = global_svm_nodes.size();
-               global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
+               svm_nodes[index].w = svm_nodes.size();
+               svm_nodes.insert(svm_nodes.end(),
+                                current_svm_nodes.begin(),
+                                current_svm_nodes.end());
        }
 
        /* Fill in summary information. */
        if(summary != NULL) {
                summary->time_total = time_dt() - time_start;
                summary->peak_stack_usage = max_stack_use;
-               summary->num_svm_nodes = global_svm_nodes.size() - start_num_svm_nodes;
+               summary->num_svm_nodes = svm_nodes.size() - start_num_svm_nodes;
        }
 }
 
index 99e91ca0c3e1c0cfdf8f316e6ad8e54556518f6b..a501b6bc8b17162995e24875e386da9f1b3830e9 100644 (file)
@@ -23,6 +23,7 @@
 
 #include "util_set.h"
 #include "util_string.h"
+#include "util_thread.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -46,6 +47,15 @@ public:
 
        void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress);
        void device_free(Device *device, DeviceScene *dscene, Scene *scene);
+
+protected:
+       /* Lock used to synchronize threaded nodes compilation. */
+       thread_spin_lock nodes_lock_;
+
+       void device_update_shader(Scene *scene,
+                                 Shader *shader,
+                                 Progress *progress,
+                                 vector<int4> *global_svm_nodes);
 };
 
 /* Graph Compiler */
@@ -200,7 +210,7 @@ protected:
        /* compile */
        void compile_type(Shader *shader, ShaderGraph *graph, ShaderType type);
 
-       vector<int4> svm_nodes;
+       vector<int4> current_svm_nodes;
        ShaderType current_type;
        Shader *current_shader;
        ShaderGraph *current_graph;