Bump maximum threads number to 1024
authorSergey Sharybin <sergey.vfx@gmail.com>
Sun, 7 Jun 2015 20:48:51 +0000 (22:48 +0200)
committerSergey Sharybin <sergey.vfx@gmail.com>
Fri, 15 Jul 2016 14:47:30 +0000 (16:47 +0200)
This commit contains all the changes required for most optimal maximum threads
number bump. This is needed to avoid possibly unneeded initialization or data
allocation on systems with lower threads count.

TODO: Still need to review arrays in render data structures from render_types.h,

P.S. We might remove actual bump of max threads from this patch, so when we'll
be applying the patch we can do all the preparation work and then do actual
bump of max threads.

Reviewers: mont29, campbellbarton

Reviewed By: mont29, campbellbarton

Maniphest Tasks: T43306

Differential Revision: https://developer.blender.org/D1343

source/blender/blenlib/BLI_threads.h
source/blender/render/intern/include/render_types.h
source/blender/render/intern/include/rendercore.h
source/blender/render/intern/source/convertblender.c
source/blender/render/intern/source/occlusion.c
source/blender/render/intern/source/rayshade.c
source/blender/render/intern/source/render_texture.c

index b4a465bbc74377904d92c15138def4da5d82a77b..0b1b4d8ee8cca1f66b70b4a5b02ad92643990bb4 100644 (file)
@@ -42,7 +42,7 @@ extern "C" {
 #endif
 
 /* for tables, button in UI, etc */
-#define BLENDER_MAX_THREADS     64
+#define BLENDER_MAX_THREADS     1024
 
 struct ListBase;
 struct TaskScheduler;
index 6de5da3795a1b2fdfd5bc44c8d4229e797f37115..b3a5ccdae170af19ea30384daa710e6b166cac62 100644 (file)
@@ -180,6 +180,7 @@ struct Render {
        float jit[32][2];
        float mblur_jit[32][2];
        ListBase *qmcsamplers;
+       int num_qmc_samplers;
        
        /* shadow counter, detect shadow-reuse for shaders */
        int shadowsamplenr[BLENDER_MAX_THREADS];
index 308903c6c6df00e21eefe188b96e2fd0bea31925..7254fd25ee67194cf1ff52fb069b688b7992a2a1 100644 (file)
@@ -90,7 +90,7 @@ extern void ray_shadow(ShadeInput *shi, LampRen *lar, float shadfac[4]);
 extern void ray_trace(ShadeInput *shi, ShadeResult *);
 extern void ray_ao(ShadeInput *shi, float ao[3], float env[3]);
 extern void init_jitter_plane(LampRen *lar);
-extern void init_ao_sphere(struct World *wrld);
+extern void init_ao_sphere(Render *re, struct World *wrld);
 extern void init_render_qmcsampler(Render *re);
 extern void free_render_qmcsampler(Render *re);
 
index 907974e20dcf1164edcfc8af60f6bad6e2db8459..ab828a0c04e18bb40f0e0b5b20e845148e3c6dcb 100644 (file)
@@ -5230,7 +5230,7 @@ void RE_Database_FromScene(Render *re, Main *bmain, Scene *scene, unsigned int l
 
                if (re->wrld.mode & (WO_AMB_OCC|WO_ENV_LIGHT|WO_INDIRECT_LIGHT))
                        if (re->wrld.ao_samp_method == WO_AOSAMP_CONSTANT)
-                               init_ao_sphere(&re->wrld);
+                               init_ao_sphere(re, &re->wrld);
        }
        
        /* still bad... doing all */
@@ -5956,7 +5956,7 @@ void RE_Database_Baking(Render *re, Main *bmain, Scene *scene, unsigned int lay,
                
                if (re->wrld.mode & (WO_AMB_OCC|WO_ENV_LIGHT|WO_INDIRECT_LIGHT))
                        if (re->wrld.ao_samp_method == WO_AOSAMP_CONSTANT)
-                               init_ao_sphere(&re->wrld);
+                               init_ao_sphere(re, &re->wrld);
        }
        
        /* still bad... doing all */
index c5c3b6bbf9401a43369f6cc52faaaad15421b2a1..b3d31e3b93ad5f39c34aecd719e20576f979e8ca 100644 (file)
@@ -115,6 +115,8 @@ typedef struct OcclusionTree {
        int doindirect;
 
        OcclusionCache *cache;
+
+       int num_threads;
 } OcclusionTree;
 
 typedef struct OcclusionThread {
@@ -641,6 +643,7 @@ static void occ_build_sh_normalize(OccNode *node)
 
 static OcclusionTree *occ_tree_build(Render *re)
 {
+       const int num_threads = re->r.threads;
        OcclusionTree *tree;
        ObjectInstanceRen *obi;
        ObjectRen *obr;
@@ -679,7 +682,7 @@ static OcclusionTree *occ_tree_build(Render *re)
        BLI_memarena_use_calloc(tree->arena);
 
        if (re->wrld.aomode & WO_AOCACHE)
-               tree->cache = MEM_callocN(sizeof(OcclusionCache) * BLENDER_MAX_THREADS, "OcclusionCache");
+               tree->cache = MEM_callocN(sizeof(OcclusionCache) * num_threads, "OcclusionCache");
 
        tree->face = MEM_callocN(sizeof(OccFace) * totface, "OcclusionFace");
        tree->co = MEM_callocN(sizeof(float) * 3 * totface, "OcclusionCo");
@@ -730,9 +733,11 @@ static OcclusionTree *occ_tree_build(Render *re)
        if (!(re->test_break(re->tbh)))
                occ_build_sh_normalize(tree->root);
 
-       for (a = 0; a < BLENDER_MAX_THREADS; a++)
+       for (a = 0; a < num_threads; a++)
                tree->stack[a] = MEM_callocN(sizeof(OccNode) * TOTCHILD * (tree->maxdepth + 1), "OccStack");
 
+       tree->num_threads = num_threads;
+
        return tree;
 }
 
@@ -742,7 +747,7 @@ static void occ_free_tree(OcclusionTree *tree)
 
        if (tree) {
                if (tree->arena) BLI_memarena_free(tree->arena);
-               for (a = 0; a < BLENDER_MAX_THREADS; a++)
+               for (a = 0; a < tree->num_threads; a++)
                        if (tree->stack[a])
                                MEM_freeN(tree->stack[a]);
                if (tree->occlusion) MEM_freeN(tree->occlusion);
index 9aac5ed1f1d5b143cab59140591bd91385c54896..26a0b0c71b48cffc3597afb8b1fcde09765edd96 100644 (file)
@@ -174,10 +174,11 @@ void freeraytree(Render *re)
        
 #ifdef RE_RAYCOUNTER
        {
+               const int num_threads = re->r.threads;
                RayCounter sum;
                memset(&sum, 0, sizeof(sum));
                int i;
-               for (i=0; i<BLENDER_MAX_THREADS; i++)
+               for (i=0; i<num_threads; i++)
                        RE_RC_MERGE(&sum, re_rc_counter+i);
                RE_RC_INFO(&sum);
        }
@@ -1186,7 +1187,9 @@ static void QMC_sampleHemiCosine(float vec[3], QMCSampler *qsa, int thread, int
 /* called from convertBlenderScene.c */
 void init_render_qmcsampler(Render *re)
 {
-       re->qmcsamplers= MEM_callocN(sizeof(ListBase)*BLENDER_MAX_THREADS, "QMCListBase");
+       const int num_threads = re->r.threads;
+       re->qmcsamplers= MEM_callocN(sizeof(ListBase)*num_threads, "QMCListBase");
+       re->num_qmc_samplers = num_threads;
 }
 
 static QMCSampler *get_thread_qmcsampler(Render *re, int thread, int type, int tot)
@@ -1220,7 +1223,7 @@ void free_render_qmcsampler(Render *re)
        if (re->qmcsamplers) {
                QMCSampler *qsa, *next;
                int a;
-               for (a=0; a<BLENDER_MAX_THREADS; a++) {
+               for (a = 0; a < re->num_qmc_samplers; a++) {
                        for (qsa=re->qmcsamplers[a].first; qsa; qsa=next) {
                                next= qsa->next;
                                QMC_freeSampler(qsa);
@@ -1695,9 +1698,10 @@ static void DS_energy(float *sphere, int tot, float vec[3])
 /* called from convertBlenderScene.c */
 /* creates an equally distributed spherical sample pattern */
 /* and allocates threadsafe memory */
-void init_ao_sphere(World *wrld)
+void init_ao_sphere(Render *re, World *wrld)
 {
        /* fixed random */
+       const int num_threads = re->r.threads;
        RNG *rng;
        float *fp;
        int a, tot, iter= 16;
@@ -1721,7 +1725,7 @@ void init_ao_sphere(World *wrld)
        }
        
        /* tables */
-       wrld->aotables= MEM_mallocN(BLENDER_MAX_THREADS*3*tot*sizeof(float), "AO tables");
+       wrld->aotables= MEM_mallocN(num_threads*3*tot*sizeof(float), "AO tables");
 
        BLI_rng_free(rng);
 }
index 513cfa6df7de585c219403f1786c0caf14bb8745..b4a14f5337da27d14c9d0da69eb62bff62a6ab3a 100644 (file)
@@ -3653,7 +3653,8 @@ void render_realtime_texture(ShadeInput *shi, Image *ima)
        if (firsttime) {
                BLI_lock_thread(LOCK_IMAGE);
                if (firsttime) {
-                       for (a=0; a<BLENDER_MAX_THREADS; a++) {
+                       const int num_threads = BLI_system_thread_count();
+                       for (a = 0; a < num_threads; a++) {
                                memset(&imatex[a], 0, sizeof(Tex));
                                BKE_texture_default(&imatex[a]);
                                imatex[a].type= TEX_IMAGE;