Nuke OMP usage in multires.c.
authorBastien Montagne <montagne29@wanadoo.fr>
Thu, 11 Jan 2018 18:39:24 +0000 (19:39 +0100)
committerBastien Montagne <montagne29@wanadoo.fr>
Thu, 11 Jan 2018 18:39:24 +0000 (19:39 +0100)
New code is over three times quicker than old one here (e.g. Suzanne
subdiv level 4, 250k tris, threaded part is now 1.4ms instead of 4.5ms
with OMP).

source/blender/blenkernel/intern/CCGSubSurf.h
source/blender/blenkernel/intern/CCGSubSurf_legacy.c
source/blender/blenkernel/intern/multires.c

index 4c913e79586a0312d801cb97fdc666d776ccb43e..8cdbd2a7a98786fc94e4302cb90e8c54ba12b31f 100644 (file)
@@ -73,6 +73,9 @@ typedef enum {
 
 #define CCG_OMP_LIMIT  1000000
 
+/* TODO(sergey): This actually depends on subsurf level as well. */
+#define CCG_TASK_LIMIT 16
+
 /***/
 
 CCGSubSurf*    ccgSubSurf_new  (CCGMeshIFC *ifc, int subdivisionLevels, CCGAllocatorIFC *allocatorIFC, CCGAllocatorHDL allocator);
index 2b331eae950b33f73602a2a3876bd07f09454be9..756935b50ba65ff574f906d9f6d652f366209d9a 100644 (file)
@@ -34,9 +34,6 @@
 
 #define FACE_calcIFNo(f, lvl, S, x, y, no)  _face_calcIFNo(f, lvl, S, x, y, no, subdivLevels, vertDataSize)
 
-/* TODO(sergey): This actually depends on subsurf level as well. */
-#define CCG_TASK_LIMIT 16
-
 /* TODO(sergey): Deduplicate the following functions/ */
 static void *_edge_getCoVert(CCGEdge *e, CCGVert *v, int lvl, int x, int dataSize)
 {
index 4eb550a9f4c73791af910cd9c40c3eb50ec5e35d..6bc4e359bbdce80d6dd4b22e736d82ef24411930 100644 (file)
@@ -1003,6 +1003,115 @@ static void grid_tangent_matrix(float mat[3][3], const CCGKey *key,
        copy_v3_v3(mat[2], CCG_grid_elem_no(key, grid, x, y));
 }
 
+
+typedef struct MultiresDispRunData {
+       DispOp op;
+       CCGElem **gridData, **subGridData;
+       CCGKey *key;
+       MPoly *mpoly;
+       MDisps *mdisps;
+       GridPaintMask *grid_paint_mask;
+       int *gridOffset;
+       int gridSize, dGridSize, dSkip;
+} MultiresDispRunData;
+
+static void multires_disp_run_cb(
+        void *__restrict userdata,
+        const int pidx,
+        const ParallelRangeTLS *__restrict UNUSED(tls))
+{
+       MultiresDispRunData *tdata = userdata;
+
+       DispOp op = tdata->op;
+       CCGElem **gridData = tdata->gridData;
+       CCGElem **subGridData = tdata->subGridData;
+       CCGKey *key = tdata->key;
+       MPoly *mpoly = tdata->mpoly;
+       MDisps *mdisps = tdata->mdisps;
+       GridPaintMask *grid_paint_mask = tdata->grid_paint_mask;
+       int *gridOffset = tdata->gridOffset;
+       int gridSize = tdata->gridSize;
+       int dGridSize = tdata->dGridSize;
+       int dSkip = tdata->dSkip;
+
+       const int numVerts = mpoly[pidx].totloop;
+       int S, x, y, gIndex = gridOffset[pidx];
+
+       for (S = 0; S < numVerts; ++S, ++gIndex) {
+               GridPaintMask *gpm = grid_paint_mask ? &grid_paint_mask[gIndex] : NULL;
+               MDisps *mdisp = &mdisps[mpoly[pidx].loopstart + S];
+               CCGElem *grid = gridData[gIndex];
+               CCGElem *subgrid = subGridData[gIndex];
+               float (*dispgrid)[3] = NULL;
+
+               dispgrid = mdisp->disps;
+
+               /* if needed, reallocate multires paint mask */
+               if (gpm && gpm->level < key->level) {
+                       gpm->level = key->level;
+                       if (gpm->data) {
+                               MEM_freeN(gpm->data);
+                       }
+                       gpm->data = MEM_callocN(sizeof(float) * key->grid_area, "gpm.data");
+               }
+
+               for (y = 0; y < gridSize; y++) {
+                       for (x = 0; x < gridSize; x++) {
+                               float *co = CCG_grid_elem_co(key, grid, x, y);
+                               float *sco = CCG_grid_elem_co(key, subgrid, x, y);
+                               float *data = dispgrid[dGridSize * y * dSkip + x * dSkip];
+                               float mat[3][3], disp[3], d[3], mask;
+
+                               /* construct tangent space matrix */
+                               grid_tangent_matrix(mat, key, x, y, subgrid);
+
+                               switch (op) {
+                                       case APPLY_DISPLACEMENTS:
+                                               /* Convert displacement to object space
+                                                * and add to grid points */
+                                               mul_v3_m3v3(disp, mat, data);
+                                               add_v3_v3v3(co, sco, disp);
+                                               break;
+                                       case CALC_DISPLACEMENTS:
+                                               /* Calculate displacement between new and old
+                                                * grid points and convert to tangent space */
+                                               sub_v3_v3v3(disp, co, sco);
+                                               invert_m3(mat);
+                                               mul_v3_m3v3(data, mat, disp);
+                                               break;
+                                       case ADD_DISPLACEMENTS:
+                                               /* Convert subdivided displacements to tangent
+                                                * space and add to the original displacements */
+                                               invert_m3(mat);
+                                               mul_v3_m3v3(d, mat, co);
+                                               add_v3_v3(data, d);
+                                               break;
+                               }
+
+                               if (gpm) {
+                                       switch (op) {
+                                               case APPLY_DISPLACEMENTS:
+                                                       /* Copy mask from gpm to DM */
+                                                       *CCG_grid_elem_mask(key, grid, x, y) =
+                                                           paint_grid_paint_mask(gpm, key->level, x, y);
+                                                       break;
+                                               case CALC_DISPLACEMENTS:
+                                                       /* Copy mask from DM to gpm */
+                                                       mask = *CCG_grid_elem_mask(key, grid, x, y);
+                                                       gpm->data[y * gridSize + x] = CLAMPIS(mask, 0, 1);
+                                                       break;
+                                               case ADD_DISPLACEMENTS:
+                                                       /* Add mask displacement to gpm */
+                                                       gpm->data[y * gridSize + x] +=
+                                                           *CCG_grid_elem_mask(key, grid, x, y);
+                                                       break;
+                                       }
+                               }
+                       }
+               }
+       }
+}
+
 /* XXX WARNING: subsurf elements from dm and oldGridData *must* be of the same format (size),
  *              because this code uses CCGKey's info from dm to access oldGridData's normals
  *              (through the call to grid_tangent_matrix())! */
@@ -1015,7 +1124,7 @@ static void multiresModifier_disp_run(DerivedMesh *dm, Mesh *me, DerivedMesh *dm
        MDisps *mdisps = CustomData_get_layer(&me->ldata, CD_MDISPS);
        GridPaintMask *grid_paint_mask = NULL;
        int *gridOffset;
-       int i, k, /*numGrids, */ gridSize, dGridSize, dSkip;
+       int i, gridSize, dGridSize, dSkip;
        int totloop, totpoly;
        
        /* this happens in the dm made by bmesh_mdisps_space_set */
@@ -1051,8 +1160,6 @@ static void multiresModifier_disp_run(DerivedMesh *dm, Mesh *me, DerivedMesh *dm
        if (key.has_mask)
                grid_paint_mask = CustomData_get_layer(&me->ldata, CD_GRID_PAINT_MASK);
 
-       k = 0; /*current loop/mdisp index within the mloop array*/
-
        /* when adding new faces in edit mode, need to allocate disps */
        for (i = 0; i < totloop; ++i) {
                if (mdisps[i].disps == NULL) {
@@ -1061,90 +1168,25 @@ static void multiresModifier_disp_run(DerivedMesh *dm, Mesh *me, DerivedMesh *dm
                }
        }
 
-       BLI_begin_threaded_malloc();
-
-#pragma omp parallel for private(i) if (totloop * gridSize * gridSize >= CCG_OMP_LIMIT)
-
-       for (i = 0; i < totpoly; ++i) {
-               const int numVerts = mpoly[i].totloop;
-               int S, x, y, gIndex = gridOffset[i];
-
-               for (S = 0; S < numVerts; ++S, ++gIndex, ++k) {
-                       GridPaintMask *gpm = grid_paint_mask ? &grid_paint_mask[gIndex] : NULL;
-                       MDisps *mdisp = &mdisps[mpoly[i].loopstart + S];
-                       CCGElem *grid = gridData[gIndex];
-                       CCGElem *subgrid = subGridData[gIndex];
-                       float (*dispgrid)[3] = NULL;
-
-                       dispgrid = mdisp->disps;
-
-                       /* if needed, reallocate multires paint mask */
-                       if (gpm && gpm->level < key.level) {
-                               gpm->level = key.level;
-                               if (gpm->data) {
-                                       MEM_freeN(gpm->data);
-                               }
-                               gpm->data = MEM_callocN(sizeof(float) * key.grid_area, "gpm.data");
-                       }
-
-                       for (y = 0; y < gridSize; y++) {
-                               for (x = 0; x < gridSize; x++) {
-                                       float *co = CCG_grid_elem_co(&key, grid, x, y);
-                                       float *sco = CCG_grid_elem_co(&key, subgrid, x, y);
-                                       float *data = dispgrid[dGridSize * y * dSkip + x * dSkip];
-                                       float mat[3][3], disp[3], d[3], mask;
-
-                                       /* construct tangent space matrix */
-                                       grid_tangent_matrix(mat, &key, x, y, subgrid);
-
-                                       switch (op) {
-                                               case APPLY_DISPLACEMENTS:
-                                                       /* Convert displacement to object space
-                                                        * and add to grid points */
-                                                       mul_v3_m3v3(disp, mat, data);
-                                                       add_v3_v3v3(co, sco, disp);
-                                                       break;
-                                               case CALC_DISPLACEMENTS:
-                                                       /* Calculate displacement between new and old
-                                                        * grid points and convert to tangent space */
-                                                       sub_v3_v3v3(disp, co, sco);
-                                                       invert_m3(mat);
-                                                       mul_v3_m3v3(data, mat, disp);
-                                                       break;
-                                               case ADD_DISPLACEMENTS:
-                                                       /* Convert subdivided displacements to tangent
-                                                        * space and add to the original displacements */
-                                                       invert_m3(mat);
-                                                       mul_v3_m3v3(d, mat, co);
-                                                       add_v3_v3(data, d);
-                                                       break;
-                                       }
-
-                                       if (gpm) {
-                                               switch (op) {
-                                                       case APPLY_DISPLACEMENTS:
-                                                               /* Copy mask from gpm to DM */
-                                                               *CCG_grid_elem_mask(&key, grid, x, y) =
-                                                                   paint_grid_paint_mask(gpm, key.level, x, y);
-                                                               break;
-                                                       case CALC_DISPLACEMENTS:
-                                                               /* Copy mask from DM to gpm */
-                                                               mask = *CCG_grid_elem_mask(&key, grid, x, y);
-                                                               gpm->data[y * gridSize + x] = CLAMPIS(mask, 0, 1);
-                                                               break;
-                                                       case ADD_DISPLACEMENTS:
-                                                               /* Add mask displacement to gpm */
-                                                               gpm->data[y * gridSize + x] +=
-                                                                   *CCG_grid_elem_mask(&key, grid, x, y);
-                                                               break;
-                                               }
-                                       }
-                               }
-                       }
-               }
-       }
-       
-       BLI_end_threaded_malloc();
+       ParallelRangeSettings settings;
+       BLI_parallel_range_settings_defaults(&settings);
+       settings.min_iter_per_thread = CCG_TASK_LIMIT;
+
+       MultiresDispRunData data = {
+           .op = op,
+           .gridData = gridData,
+           .subGridData = subGridData,
+           .key = &key,
+           .mpoly = mpoly,
+           .mdisps = mdisps,
+           .grid_paint_mask = grid_paint_mask,
+           .gridOffset = gridOffset,
+           .gridSize = gridSize,
+           .dGridSize = dGridSize,
+           .dSkip = dSkip
+       };
+
+       BLI_task_parallel_range(0, totpoly, &data, multires_disp_run_cb, &settings);
 
        if (op == APPLY_DISPLACEMENTS) {
                ccgSubSurf_stitchFaces(ccgdm->ss, 0, NULL, 0);