Merge branch 'master' into blender2.8
authorSergey Sharybin <sergey.vfx@gmail.com>
Thu, 11 Jan 2018 14:14:30 +0000 (15:14 +0100)
committerSergey Sharybin <sergey.vfx@gmail.com>
Thu, 11 Jan 2018 14:14:30 +0000 (15:14 +0100)
source/blender/blenkernel/BKE_subsurf.h
source/blender/blenkernel/intern/CCGSubSurf_legacy.c
source/blender/blenkernel/intern/mesh_evaluate.c
source/blender/blenkernel/intern/subsurf_ccg.c
source/blender/depsgraph/intern/eval/deg_eval.cc
source/blender/depsgraph/intern/eval/deg_eval_flush.cc
source/blender/modifiers/intern/MOD_meshdeform.c

index d7b9d20d7b0994cc93fbaa841a86e3e581f1f5de..96320415b16164d3b22134f107f82961a8fa5882 100644 (file)
@@ -144,7 +144,7 @@ typedef struct CCGDerivedMesh {
 
        struct EdgeHash *ehash;
 
-       ThreadRWMutex loops_cache_rwlock;
+       ThreadMutex loops_cache_lock;
        ThreadRWMutex origindex_cache_rwlock;
 } CCGDerivedMesh;
 
index d567b50af563a6594d5a15f9be597ba9977ef8fb..2b331eae950b33f73602a2a3876bd07f09454be9 100644 (file)
@@ -34,6 +34,9 @@
 
 #define FACE_calcIFNo(f, lvl, S, x, y, no)  _face_calcIFNo(f, lvl, S, x, y, no, subdivLevels, vertDataSize)
 
+/* TODO(sergey): This actually depends on subsurf level as well. */
+#define CCG_TASK_LIMIT 16
+
 /* TODO(sergey): Deduplicate the following functions/ */
 static void *_edge_getCoVert(CCGEdge *e, CCGVert *v, int lvl, int x, int dataSize)
 {
@@ -340,7 +343,7 @@ static void ccgSubSurf__calcVertNormals(CCGSubSurf *ss,
        {
                ParallelRangeSettings settings;
                BLI_parallel_range_settings_defaults(&settings);
-               settings.use_threading = (numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT);
+               settings.min_iter_per_thread = CCG_TASK_LIMIT;
                BLI_task_parallel_range(0, numEffectedF,
                                        &data,
                                        ccgSubSurf__calcVertNormals_faces_accumulate_cb,
@@ -374,7 +377,7 @@ static void ccgSubSurf__calcVertNormals(CCGSubSurf *ss,
        {
                ParallelRangeSettings settings;
                BLI_parallel_range_settings_defaults(&settings);
-               settings.use_threading = (numEffectedE * edgeSize * 4 >= CCG_OMP_LIMIT);
+               settings.min_iter_per_thread = CCG_TASK_LIMIT;
                BLI_task_parallel_range(0, numEffectedE,
                                        &data,
                                        ccgSubSurf__calcVertNormals_edges_accumulate_cb,
@@ -384,7 +387,7 @@ static void ccgSubSurf__calcVertNormals(CCGSubSurf *ss,
        {
                ParallelRangeSettings settings;
                BLI_parallel_range_settings_defaults(&settings);
-               settings.use_threading = (numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT);
+               settings.min_iter_per_thread = CCG_TASK_LIMIT;
                BLI_task_parallel_range(0, numEffectedF,
                                        &data,
                                        ccgSubSurf__calcVertNormals_faces_finalize_cb,
@@ -683,7 +686,7 @@ static void ccgSubSurf__calcSubdivLevel(
        {
                ParallelRangeSettings settings;
                BLI_parallel_range_settings_defaults(&settings);
-               settings.use_threading = (numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT);
+               settings.min_iter_per_thread = CCG_TASK_LIMIT;
                BLI_task_parallel_range(0, numEffectedF,
                                        &data,
                                        ccgSubSurf__calcSubdivLevel_interior_faces_edges_midpoints_cb,
@@ -966,7 +969,7 @@ static void ccgSubSurf__calcSubdivLevel(
        {
                ParallelRangeSettings settings;
                BLI_parallel_range_settings_defaults(&settings);
-               settings.use_threading = (numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT);
+               settings.min_iter_per_thread = CCG_TASK_LIMIT;
                BLI_task_parallel_range(0, numEffectedF,
                                        &data,
                                        ccgSubSurf__calcSubdivLevel_interior_faces_edges_centerpoints_shift_cb,
@@ -986,7 +989,7 @@ static void ccgSubSurf__calcSubdivLevel(
        {
                ParallelRangeSettings settings;
                BLI_parallel_range_settings_defaults(&settings);
-               settings.use_threading = (numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT);
+               settings.min_iter_per_thread = CCG_TASK_LIMIT;
                BLI_task_parallel_range(0, numEffectedF,
                                        &data,
                                        ccgSubSurf__calcSubdivLevel_verts_copydata_cb,
index f25b6ed41c78d89f1a7e6d592c5094d283eedf95..4e03155ea6c92cd8b2bbe15c57d4add7fb0995a3 100644 (file)
@@ -287,12 +287,11 @@ void BKE_mesh_calc_normals_poly(
         int numLoops, int numPolys, float (*r_polynors)[3],
         const bool only_face_normals)
 {
-       const bool do_threaded = (numPolys > BKE_MESH_OMP_LIMIT);
        float (*pnors)[3] = r_polynors;
 
        ParallelRangeSettings settings;
        BLI_parallel_range_settings_defaults(&settings);
-       settings.use_threading = do_threaded;
+       settings.min_iter_per_thread = 1024;
 
        if (only_face_normals) {
                BLI_assert((pnors != NULL) || (numPolys == 0));
index 24bcd4c84767b1e31f7fe367621232eed8376b2e..84b82ed53bb1ab8a5ce8852f1d41e620f17b19f2 100644 (file)
@@ -58,6 +58,7 @@
 #include "BLI_edgehash.h"
 #include "BLI_math.h"
 #include "BLI_memarena.h"
+#include "BLI_task.h"
 #include "BLI_threads.h"
 
 #include "BKE_pbvh.h"
@@ -1476,19 +1477,70 @@ static void ccgDM_copyFinalFaceArray(DerivedMesh *dm, MFace *mface)
        }
 }
 
+typedef struct CopyFinalLoopArrayData {
+       CCGDerivedMesh *ccgdm;
+       MLoop *mloop;
+       int grid_size;
+       int *grid_offset;
+       int edge_size;
+       size_t mloop_index;
+} CopyFinalLoopArrayData;
+
+static void copyFinalLoopArray_task_cb(
+        void *__restrict userdata,
+        const int iter,
+        const ParallelRangeTLS *__restrict UNUSED(tls))
+{
+       CopyFinalLoopArrayData *data = userdata;
+       CCGDerivedMesh *ccgdm = data->ccgdm;
+       CCGSubSurf *ss = ccgdm->ss;
+       const int grid_size = data->grid_size;
+       const int edge_size = data->edge_size;
+       CCGFace *f = ccgdm->faceMap[iter].face;
+       const int num_verts = ccgSubSurf_getFaceNumVerts(f);
+       const int grid_index = data->grid_offset[iter];
+       const size_t loop_index = 4 * (size_t)grid_index * (grid_size - 1) * (grid_size - 1);
+       MLoop *ml = &data->mloop[loop_index];
+       for (int S = 0; S < num_verts; S++) {
+               for (int y = 0; y < grid_size - 1; y++) {
+                       for (int x = 0; x < grid_size - 1; x++) {
+
+                               uint v1 = getFaceIndex(ss, f, S, x + 0, y + 0,
+                                                      edge_size, grid_size);
+                               uint v2 = getFaceIndex(ss, f, S, x + 0, y + 1,
+                                                      edge_size, grid_size);
+                               uint v3 = getFaceIndex(ss, f, S, x + 1, y + 1,
+                                                      edge_size, grid_size);
+                               uint v4 = getFaceIndex(ss, f, S, x + 1, y + 0,
+                                                      edge_size, grid_size);
+
+                               ml->v = v1;
+                               ml->e = GET_UINT_FROM_POINTER(BLI_edgehash_lookup(ccgdm->ehash, v1, v2));
+                               ml++;
+
+                               ml->v = v2;
+                               ml->e = GET_UINT_FROM_POINTER(BLI_edgehash_lookup(ccgdm->ehash, v2, v3));
+                               ml++;
+
+                               ml->v = v3;
+                               ml->e = GET_UINT_FROM_POINTER(BLI_edgehash_lookup(ccgdm->ehash, v3, v4));
+                               ml++;
+
+                               ml->v = v4;
+                               ml->e = GET_UINT_FROM_POINTER(BLI_edgehash_lookup(ccgdm->ehash, v4, v1));
+                               ml++;
+                       }
+               }
+       }
+}
+
 static void ccgDM_copyFinalLoopArray(DerivedMesh *dm, MLoop *mloop)
 {
        CCGDerivedMesh *ccgdm = (CCGDerivedMesh *) dm;
        CCGSubSurf *ss = ccgdm->ss;
-       int index;
-       int totface;
-       int gridSize = ccgSubSurf_getGridSize(ss);
-       int edgeSize = ccgSubSurf_getEdgeSize(ss);
-       MLoop *ml;
-       /* DMFlagMat *faceFlags = ccgdm->faceFlags; */ /* UNUSED */
 
        if (!ccgdm->ehash) {
-               BLI_rw_mutex_lock(&ccgdm->loops_cache_rwlock, THREAD_LOCK_WRITE);
+               BLI_mutex_lock(&ccgdm->loops_cache_lock);
                if (!ccgdm->ehash) {
                        MEdge *medge;
                        EdgeHash *ehash;
@@ -1502,53 +1554,30 @@ static void ccgDM_copyFinalLoopArray(DerivedMesh *dm, MLoop *mloop)
 
                        atomic_cas_ptr((void**)&ccgdm->ehash, ccgdm->ehash, ehash);
                }
-               BLI_rw_mutex_unlock(&ccgdm->loops_cache_rwlock);
+               BLI_mutex_unlock(&ccgdm->loops_cache_lock);
        }
 
-       BLI_rw_mutex_lock(&ccgdm->loops_cache_rwlock, THREAD_LOCK_READ);
-       totface = ccgSubSurf_getNumFaces(ss);
-       ml = mloop;
-       for (index = 0; index < totface; index++) {
-               CCGFace *f = ccgdm->faceMap[index].face;
-               int x, y, S, numVerts = ccgSubSurf_getFaceNumVerts(f);
-               /* int flag = (faceFlags) ? faceFlags[index * 2]: ME_SMOOTH; */ /* UNUSED */
-               /* int mat_nr = (faceFlags) ? faceFlags[index * 2 + 1]: 0; */ /* UNUSED */
-
-               for (S = 0; S < numVerts; S++) {
-                       for (y = 0; y < gridSize - 1; y++) {
-                               for (x = 0; x < gridSize - 1; x++) {
-                                       unsigned int v1, v2, v3, v4;
-
-                                       v1 = getFaceIndex(ss, f, S, x + 0, y + 0,
-                                                         edgeSize, gridSize);
-
-                                       v2 = getFaceIndex(ss, f, S, x + 0, y + 1,
-                                                         edgeSize, gridSize);
-                                       v3 = getFaceIndex(ss, f, S, x + 1, y + 1,
-                                                         edgeSize, gridSize);
-                                       v4 = getFaceIndex(ss, f, S, x + 1, y + 0,
-                                                         edgeSize, gridSize);
+       CopyFinalLoopArrayData data;
+       data.ccgdm = ccgdm;
+       data.mloop = mloop;
+       data.grid_size = ccgSubSurf_getGridSize(ss);
+       data.grid_offset = dm->getGridOffset(dm);
+       data.edge_size = ccgSubSurf_getEdgeSize(ss);
 
-                                       ml->v = v1;
-                                       ml->e = GET_UINT_FROM_POINTER(BLI_edgehash_lookup(ccgdm->ehash, v1, v2));
-                                       ml++;
-
-                                       ml->v = v2;
-                                       ml->e = GET_UINT_FROM_POINTER(BLI_edgehash_lookup(ccgdm->ehash, v2, v3));
-                                       ml++;
+       /* NOTE: For a dense subdivision we've got enough work for each face and
+        * hence can dedicate whole thread to single face. For less dense
+        * subdivision we handle multiple faces per thread.
+        */
+       data.mloop_index = data.grid_size >= 5 ? 1 : 8;
 
-                                       ml->v = v3;
-                                       ml->e = GET_UINT_FROM_POINTER(BLI_edgehash_lookup(ccgdm->ehash, v3, v4));
-                                       ml++;
+       ParallelRangeSettings settings;
+       BLI_parallel_range_settings_defaults(&settings);
+       settings.min_iter_per_thread = 1;
 
-                                       ml->v = v4;
-                                       ml->e = GET_UINT_FROM_POINTER(BLI_edgehash_lookup(ccgdm->ehash, v4, v1));
-                                       ml++;
-                               }
-                       }
-               }
-       }
-       BLI_rw_mutex_unlock(&ccgdm->loops_cache_rwlock);
+       BLI_task_parallel_range(0, ccgSubSurf_getNumFaces(ss),
+                               &data,
+                               copyFinalLoopArray_task_cb,
+                               &settings);
 }
 
 static void ccgDM_copyFinalPolyArray(DerivedMesh *dm, MPoly *mpoly)
@@ -3796,7 +3825,7 @@ static void ccgDM_release(DerivedMesh *dm)
                        MEM_freeN(ccgdm->faceMap);
                }
 
-               BLI_rw_mutex_end(&ccgdm->loops_cache_rwlock);
+               BLI_mutex_end(&ccgdm->loops_cache_lock);
                BLI_rw_mutex_end(&ccgdm->origindex_cache_rwlock);
 
                MEM_freeN(ccgdm);
@@ -4787,7 +4816,7 @@ static CCGDerivedMesh *getCCGDerivedMesh(CCGSubSurf *ss,
        ccgdm->dm.numLoopData = ccgdm->dm.numPolyData * 4;
        ccgdm->dm.numTessFaceData = 0;
 
-       BLI_rw_mutex_init(&ccgdm->loops_cache_rwlock);
+       BLI_mutex_init(&ccgdm->loops_cache_lock);
        BLI_rw_mutex_init(&ccgdm->origindex_cache_rwlock);
 
        return ccgdm;
index 116f853ebdf988f73dc801d259fb03e8f38c5b97..a6c6a16a52803732972353de8dab34a4e8d81010 100644 (file)
@@ -132,12 +132,11 @@ static void calculate_pending_func(
 static void calculate_pending_parents(Depsgraph *graph)
 {
        const int num_operations = graph->operations.size();
-       const bool do_threads = (num_operations > 256);
        CalculatePengindData data;
        data.graph = graph;
        ParallelRangeSettings settings;
        BLI_parallel_range_settings_defaults(&settings);
-       settings.use_threading = do_threads;
+       settings.min_iter_per_thread = 1024;
        BLI_task_parallel_range(0,
                                num_operations,
                                &data,
index c3b1f56a71b02bdde5d7d6e83fc7434b848ed4a4..74c3cd28455fac68b486dc4ad99df7412c925877 100644 (file)
@@ -100,7 +100,7 @@ BLI_INLINE void flush_prepare(Depsgraph *graph)
                const int num_operations = graph->operations.size();
                ParallelRangeSettings settings;
                BLI_parallel_range_settings_defaults(&settings);
-               settings.use_threading = (num_operations > 256);
+               settings.min_iter_per_thread = 1024;
                BLI_task_parallel_range(0, num_operations,
                                        graph,
                                        flush_init_operation_node_func,
@@ -110,7 +110,7 @@ BLI_INLINE void flush_prepare(Depsgraph *graph)
                const int num_id_nodes = graph->id_nodes.size();
                ParallelRangeSettings settings;
                BLI_parallel_range_settings_defaults(&settings);
-               settings.use_threading = (num_id_nodes > 256);
+               settings.min_iter_per_thread = 1024;
                BLI_task_parallel_range(0, num_id_nodes,
                                        graph,
                                        flush_init_id_node_func,
@@ -310,10 +310,9 @@ void deg_graph_clear_tags(Depsgraph *graph)
 {
        /* Go over all operation nodes, clearing tags. */
        const int num_operations = graph->operations.size();
-       const bool do_threads = num_operations > 256;
        ParallelRangeSettings settings;
        BLI_parallel_range_settings_defaults(&settings);
-       settings.use_threading = do_threads;
+       settings.min_iter_per_thread = 1024;
        BLI_task_parallel_range(0, num_operations,
                                graph,
                                graph_clear_func,
index 3976433db932dc93d27914a493e93fdf83c89702..47b51ac170b6efefc942b60e371c7e7988e8d85f 100644 (file)
@@ -399,7 +399,7 @@ static void meshdeformModifier_do(
        /* Do deformation. */
        ParallelRangeSettings settings;
        BLI_parallel_range_settings_defaults(&settings);
-       settings.use_threading = (totvert > 1000);
+       settings.min_iter_per_thread = 16;
        BLI_task_parallel_range(0, totvert,
                                &data,
                                meshdeform_vert_task,