Make texture node threaded
authorSergey Sharybin <sergey.vfx@gmail.com>
Thu, 3 Mar 2016 10:59:20 +0000 (15:59 +0500)
committerSergey Sharybin <sergey.vfx@gmail.com>
Thu, 3 Mar 2016 11:02:31 +0000 (16:02 +0500)
Quite trivial idea -- just pass tread ID to the texture sampling function.

Implemented as a TLS to avoid passing huge amount of extra contexts around.
Should be working on all platforms, but compilation test is required.

Reviewers: juicyfruit, campbellbarton

Reviewed By: campbellbarton

Differential Revision: https://developer.blender.org/D1831

source/blender/blenkernel/intern/effect.c
source/blender/compositor/intern/COM_CPUDevice.cpp
source/blender/compositor/intern/COM_CPUDevice.h
source/blender/compositor/intern/COM_WorkScheduler.cpp
source/blender/compositor/intern/COM_WorkScheduler.h
source/blender/compositor/operations/COM_TextureOperation.cpp
source/blender/compositor/operations/COM_TextureOperation.h
source/blender/makesrna/intern/rna_texture_api.c
source/blender/render/extern/include/RE_shader_ext.h
source/blender/render/intern/source/render_texture.c

index 30696012221c4b00b9cb4cc2dc8d9ece1c0a8590..12bce70594bcea628e263d2f170054984a358db3 100644 (file)
@@ -757,7 +757,7 @@ static void do_texture_effector(EffectorCache *eff, EffectorData *efd, EffectedP
 
        scene_color_manage = BKE_scene_check_color_management_enabled(eff->scene);
 
-       hasrgb = multitex_ext(eff->pd->tex, tex_co, NULL, NULL, 0, result, NULL, scene_color_manage, false);
+       hasrgb = multitex_ext(eff->pd->tex, tex_co, NULL, NULL, 0, result, 0, NULL, scene_color_manage, false);
 
        if (hasrgb && mode==PFIELD_TEX_RGB) {
                force[0] = (0.5f - result->tr) * strength;
@@ -768,15 +768,15 @@ static void do_texture_effector(EffectorCache *eff, EffectorData *efd, EffectedP
                strength/=nabla;
 
                tex_co[0] += nabla;
-               multitex_ext(eff->pd->tex, tex_co, NULL, NULL, 0, result+1, NULL, scene_color_manage, false);
+               multitex_ext(eff->pd->tex, tex_co, NULL, NULL, 0, result+1, 0, NULL, scene_color_manage, false);
 
                tex_co[0] -= nabla;
                tex_co[1] += nabla;
-               multitex_ext(eff->pd->tex, tex_co, NULL, NULL, 0, result+2, NULL, scene_color_manage, false);
+               multitex_ext(eff->pd->tex, tex_co, NULL, NULL, 0, result+2, 0, NULL, scene_color_manage, false);
 
                tex_co[1] -= nabla;
                tex_co[2] += nabla;
-               multitex_ext(eff->pd->tex, tex_co, NULL, NULL, 0, result+3, NULL, scene_color_manage, false);
+               multitex_ext(eff->pd->tex, tex_co, NULL, NULL, 0, result+3, 0, NULL, scene_color_manage, false);
 
                if (mode == PFIELD_TEX_GRAD || !hasrgb) { /* if we don't have rgb fall back to grad */
                        /* generate intensity if texture only has rgb value */
index c7c3f7769fee06cea0031b04dd3bfea63d074b28..a5824ec5248d7d3cb8e17b53367fc4c400b896e2 100644 (file)
 
 #include "COM_CPUDevice.h"
 
+CPUDevice::CPUDevice(int thread_id)
+  : Device(),
+    m_thread_id(thread_id)
+{
+}
+
 void CPUDevice::execute(WorkPackage *work)
 {
        const unsigned int chunkNumber = work->getChunkNumber();
index 3dc8fff66a3bbeb6a94c21399931a1f001a5a750..d12666593d45e88358f15cb0e517f899276ccfa5 100644 (file)
  */
 class CPUDevice : public Device {
 public:
+       CPUDevice(int thread_id);
+
        /**
         * @brief execute a WorkPackage
         * @param work the WorkPackage to execute
         */
        void execute(WorkPackage *work);
+
+       int thread_id() { return m_thread_id; }
+
+protected:
+       int m_thread_id;
 };
 
 #endif
index fc6ea1299cf3554d07515f8f5774387a4a50f256..4c85f11f655d9af93981c59dd215d3184bfd0ce7 100644 (file)
@@ -50,7 +50,8 @@
 
 
 /// @brief list of all CPUDevices. for every hardware thread an instance of CPUDevice is created
-static vector<CPUDevice *> g_cpudevices;
+static vector<CPUDevice*> g_cpudevices;
+static ThreadLocal(CPUDevice*) g_thread_device;
 
 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
 /// @brief list of all thread for every CPUDevice in cpudevices a thread exists
@@ -153,9 +154,9 @@ int COM_isHighlightedbNode(bNode *bnode)
 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
 void *WorkScheduler::thread_execute_cpu(void *data)
 {
-       Device *device = (Device *)data;
+       CPUDevice *device = (CPUDevice *)data;
        WorkPackage *work;
-       
+       BLI_thread_local_set(g_thread_device, device);
        while ((work = (WorkPackage *)BLI_thread_queue_pop(g_cpuqueue))) {
                HIGHLIGHT(work);
                device->execute(work);
@@ -310,18 +311,20 @@ void WorkScheduler::initialize(bool use_opencl, int num_cpu_threads)
                        device->deinitialize();
                        delete device;
                }
-
+               if (g_cpuInitialized) {
+                       BLI_thread_local_delete(g_thread_device);
+               }
                g_cpuInitialized = false;
        }
 
        /* initialize CPU threads */
        if (!g_cpuInitialized) {
                for (int index = 0; index < num_cpu_threads; index++) {
-                       CPUDevice *device = new CPUDevice();
+                       CPUDevice *device = new CPUDevice(index);
                        device->initialize();
                        g_cpudevices.push_back(device);
                }
-
+               BLI_thread_local_create(g_thread_device);
                g_cpuInitialized = true;
        }
 
@@ -407,7 +410,7 @@ void WorkScheduler::deinitialize()
                        device->deinitialize();
                        delete device;
                }
-
+               BLI_thread_local_delete(g_thread_device);
                g_cpuInitialized = false;
        }
 
@@ -450,3 +453,8 @@ void WorkScheduler::deinitialize()
        }
 }
 
+int WorkScheduler::current_thread_id()
+{
+       CPUDevice *device = (CPUDevice *)BLI_thread_local_get(g_thread_device);
+       return device->thread_id();
+}
index 27afdf6efd08c685f6ed52e4cdd75e638347fe8b..67d3fc87ce17b9b4d3f93a39691b6be862b3ad72 100644 (file)
@@ -113,6 +113,8 @@ public:
         */
        static bool hasGPUDevices();
 
+       static int current_thread_id();
+
 #ifdef WITH_CXX_GUARDEDALLOC
        MEM_CXX_CLASS_ALLOC_FUNCS("COM:WorkScheduler")
 #endif
index 7d1d24a974792d5e127b7e93bb2f2ed2efc3feba..665bffc2c1cc76fa6f1aca31e9a7ad2794dbbf98 100644 (file)
@@ -21,6 +21,7 @@
  */
 
 #include "COM_TextureOperation.h"
+#include "COM_WorkScheduler.h"
 
 #include "BLI_listbase.h"
 #include "BLI_threads.h"
@@ -30,9 +31,7 @@ extern "C" {
 #include "BKE_node.h"
 }
 
-static ThreadMutex mutex_lock = BLI_MUTEX_INITIALIZER;
-
-TextureBaseOperation::TextureBaseOperation() : SingleThreadedOperation()
+TextureBaseOperation::TextureBaseOperation() : NodeOperation()
 {
        this->addInputSocket(COM_DT_VECTOR); //offset
        this->addInputSocket(COM_DT_VECTOR); //size
@@ -63,7 +62,7 @@ void TextureBaseOperation::initExecution()
        {
                ntreeTexBeginExecTree(this->m_texture->nodetree);
        }
-       SingleThreadedOperation::initExecution();
+       NodeOperation::initExecution();
 }
 void TextureBaseOperation::deinitExecution()
 {
@@ -78,7 +77,7 @@ void TextureBaseOperation::deinitExecution()
        {
                ntreeTexEndExecTree(this->m_texture->nodetree->execdata);
        }
-       SingleThreadedOperation::deinitExecution();
+       NodeOperation::deinitExecution();
 }
 
 void TextureBaseOperation::determineResolution(unsigned int resolution[2], unsigned int preferredResolution[2])
@@ -121,12 +120,16 @@ void TextureBaseOperation::executePixelSampled(float output[4], float x, float y
        vec[1] = textureSize[1] * (v + textureOffset[1]);
        vec[2] = textureSize[2] * textureOffset[2];
 
-       /* TODO(sergey): Need to pass thread ID to the multitex code,
-        * then we can avoid having mutex here.
-        */
-       BLI_mutex_lock(&mutex_lock);
-       retval = multitex_ext(this->m_texture, vec, NULL, NULL, 0, &texres, m_pool, m_sceneColorManage, false);
-       BLI_mutex_unlock(&mutex_lock);
+       const int thread_id = WorkScheduler::current_thread_id();
+       retval = multitex_ext(this->m_texture,
+                             vec,
+                             NULL, NULL,
+                             0,
+                             &texres,
+                             thread_id,
+                             m_pool,
+                             m_sceneColorManage,
+                             false);
 
        if (texres.talpha)
                output[3] = texres.ta;
index 47ef40882c598582db21e9318ade42dacb2ca376..4cc203b54a2b1a5380c8cb3471d0f9cb6d78e4b4 100644 (file)
@@ -24,7 +24,7 @@
 #ifndef _COM_TextureOperation_h
 #define _COM_TextureOperation_h
 
-#include "COM_SingleThreadedOperation.h"
+#include "COM_NodeOperation.h"
 #include "DNA_texture_types.h"
 #include "BLI_listbase.h"
 extern "C" {
@@ -39,7 +39,7 @@ extern "C" {
  *
  * @todo: rename to operation.
  */
-class TextureBaseOperation : public SingleThreadedOperation {
+class TextureBaseOperation : public NodeOperation {
 private:
        Tex *m_texture;
        const RenderData *m_rd;
index a27ba6ea06df23120e3ecfc589b863c3e0e8e35a..ef1ef5e1469ff8eb48cfb67647d4f58d0fb12106 100644 (file)
@@ -74,7 +74,7 @@ static void texture_evaluate(struct Tex *tex, float value[3], float r_color[4])
        TexResult texres = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0, NULL};
 
        /* TODO(sergey): always use color management now.  */
-       multitex_ext(tex, value, NULL, NULL, 1, &texres, NULL, true, false);
+       multitex_ext(tex, value, NULL, NULL, 1, &texres, 0, NULL, true, false);
 
        r_color[0] = texres.tr;
        r_color[1] = texres.tg;
index 2b07ace26dbb92add97390eab9061e5b4e89a4ba..12b97aedbd30cc3702d015d8b7ff9ef6d805699f 100644 (file)
@@ -198,7 +198,15 @@ struct ImagePool;
 struct Object;
 
 /* this one uses nodes */
-int    multitex_ext(struct Tex *tex, float texvec[3], float dxt[3], float dyt[3], int osatex, struct TexResult *texres, struct ImagePool *pool, bool scene_color_manage, const bool skip_load_image);
+int multitex_ext(struct Tex *tex,
+                 float texvec[3],
+                 float dxt[3], float dyt[3],
+                 int osatex,
+                 struct TexResult *texres,
+                 const short thread,
+                 struct ImagePool *pool,
+                 bool scene_color_manage,
+                 const bool skip_load_image);
 /* nodes disabled */
 int multitex_ext_safe(struct Tex *tex, float texvec[3], struct TexResult *texres, struct ImagePool *pool, bool scene_color_manage, const bool skip_load_image);
 /* only for internal node usage */
index 172fc999897260c2056b25ebb4d4faaf97f8d066..193f619c09ce6ec338a7221c18c0817994508f1e 100644 (file)
@@ -1377,9 +1377,28 @@ static int multitex_mtex(ShadeInput *shi, MTex *mtex, float texvec[3], float dxt
  *
  * Use it for stuff which is out of render pipeline.
  */
-int multitex_ext(Tex *tex, float texvec[3], float dxt[3], float dyt[3], int osatex, TexResult *texres, struct ImagePool *pool, bool scene_color_manage, const bool skip_load_image)
+int multitex_ext(Tex *tex,
+                 float texvec[3],
+                 float dxt[3], float dyt[3],
+                 int osatex,
+                 TexResult *texres,
+                 const short thread,
+                 struct ImagePool *pool,
+                 bool scene_color_manage,
+                 const bool skip_load_image)
 {
-       return multitex_nodes_intern(tex, texvec, dxt, dyt, osatex, texres, 0, 0, NULL, NULL, pool, scene_color_manage, skip_load_image, false);
+       return multitex_nodes_intern(tex,
+                                    texvec,
+                                    dxt, dyt,
+                                    osatex,
+                                    texres,
+                                    thread,
+                                    0,
+                                    NULL, NULL,
+                                    pool,
+                                    scene_color_manage,
+                                    skip_load_image,
+                                    false);
 }
 
 /* extern-tex doesn't support nodes (ntreeBeginExec() can't be called when rendering is going on)\