Sculpt: OMP -> BLI_task, step I.
authorBastien Montagne <montagne29@wanadoo.fr>
Fri, 1 Jan 2016 20:18:23 +0000 (21:18 +0100)
committerBastien Montagne <montagne29@wanadoo.fr>
Fri, 1 Jan 2016 20:27:16 +0000 (21:27 +0100)
This gives usual nice boost in parallelized sections themselves, on global sculpting stroke
it's a bit hard to say, seems like we have an average 5% speedup, but it varies a lot...

Note that this area is only parallelized when you have a big brush over dense geometry, otherwise
there's no change at all.

As a side note, this commit also factorizes code here, three times nearly the same code, tst...

source/blender/editors/sculpt_paint/sculpt.c

index fad7f089dca2ac238f3085d75607c19e36a46676..a78a79a8ac9c6c4b24acadd9ccfe577b36b2ef24 100644 (file)
@@ -38,6 +38,8 @@
 #include "BLI_math.h"
 #include "BLI_blenlib.h"
 #include "BLI_dial.h"
+#include "BLI_task.h"
+#include "BLI_threads.h"
 #include "BLI_utildefines.h"
 #include "BLI_ghash.h"
 
@@ -776,127 +778,172 @@ static float calc_symmetry_feather(Sculpt *sd, StrokeCache *cache)
  * \note These are all _very_ similar, when changing one, check others.
  * \{ */
 
-static void calc_area_center(
-        Sculpt *sd, Object *ob,
-        PBVHNode **nodes, int totnode,
-        float r_area_co[3])
-{
-       const Brush *brush = BKE_paint_brush(&sd->paint);
-       SculptSession *ss = ob->sculpt;
-       const bool has_bm_orco = ss->bm && sculpt_stroke_is_dynamic_topology(ss, brush);
-       int n;
+typedef struct SculptCalcAreaData {
+       Sculpt *sd;
+       Object *ob;
+    PBVHNode **nodes;
+       int totnode;
+       bool has_bm_orco;
 
        /* 0=towards view, 1=flipped */
-       float area_co[2][3] = {{0.0f}};
+       float (*area_co)[3];
+       float (*area_no)[3];
+       int *count;
 
-       int count[2] = {0};
+       ThreadMutex mutex;
+} SculptCalcAreaData;
 
-#pragma omp parallel for schedule(guided) if ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_OMP_LIMIT)
-       for (n = 0; n < totnode; n++) {
-               PBVHVertexIter vd;
-               SculptBrushTest test;
-               SculptUndoNode *unode;
-               float private_co[2][3] = {{0.0f}};
-               int   private_count[2] = {0};
-               bool use_original;
+static void calc_area_normal_and_centr_task_cb(void *userdata, void *UNUSED(userdata_chunk), int n)
+{
+       SculptCalcAreaData *data = userdata;
+       SculptSession *ss = data->ob->sculpt;
+       PBVHVertexIter vd;
+       SculptBrushTest test;
+       SculptUndoNode *unode;
 
-               unode = sculpt_undo_push_node(ob, nodes[n], SCULPT_UNDO_COORDS);
-               sculpt_brush_test_init(ss, &test);
+       float private_co[2][3] = {{0.0f}};
+       float private_no[2][3] = {{0.0f}};
+       int   private_count[2] = {0};
+       bool use_original;
+
+       unode = sculpt_undo_push_node(data->ob, data->nodes[n], SCULPT_UNDO_COORDS);
+       sculpt_brush_test_init(ss, &test);
 
-               use_original = (ss->cache->original && (unode->co || unode->bm_entry));
+       use_original = (ss->cache->original && (unode->co || unode->bm_entry));
 
-               /* when the mesh is edited we can't rely on original coords
-                * (original mesh may not even have verts in brush radius) */
-               if (use_original && has_bm_orco) {
-                       float (*orco_coords)[3];
-                       int   (*orco_tris)[3];
-                       int     orco_tris_num;
-                       int i;
+       /* when the mesh is edited we can't rely on original coords
+        * (original mesh may not even have verts in brush radius) */
+       if (use_original && data->has_bm_orco) {
+               float (*orco_coords)[3];
+               int   (*orco_tris)[3];
+               int     orco_tris_num;
+               int i;
 
-                       BKE_pbvh_node_get_bm_orco_data(
-                               nodes[n],
-                               &orco_tris, &orco_tris_num, &orco_coords);
+               BKE_pbvh_node_get_bm_orco_data(
+                       data->nodes[n],
+                       &orco_tris, &orco_tris_num, &orco_coords);
 
-                       for (i = 0; i < orco_tris_num; i++) {
-                               const float *co_tri[3] = {
-                                   orco_coords[orco_tris[i][0]],
-                                   orco_coords[orco_tris[i][1]],
-                                   orco_coords[orco_tris[i][2]],
-                               };
-                               float co[3];
+               for (i = 0; i < orco_tris_num; i++) {
+                       const float *co_tri[3] = {
+                           orco_coords[orco_tris[i][0]],
+                           orco_coords[orco_tris[i][1]],
+                           orco_coords[orco_tris[i][2]],
+                       };
+                       float co[3];
 
-                               closest_on_tri_to_point_v3(co, test.location, UNPACK3(co_tri));
+                       closest_on_tri_to_point_v3(co, test.location, UNPACK3(co_tri));
 
-                               if (sculpt_brush_test_fast(&test, co)) {
-                                       float no[3];
-                                       int flip_index;
+                       if (sculpt_brush_test_fast(&test, co)) {
+                               float no[3];
+                               int flip_index;
 
-                                       cross_tri_v3(no, UNPACK3(co_tri));
+                               normal_tri_v3(no, UNPACK3(co_tri));
 
-                                       flip_index = (dot_v3v3(ss->cache->view_normal, no) <= 0.0f);
+                               flip_index = (dot_v3v3(ss->cache->view_normal, no) <= 0.0f);
+                               if (data->area_co)
                                        add_v3_v3(private_co[flip_index], co);
-                                       private_count[flip_index] += 1;
-                               }
+                               if (data->area_no)
+                                       add_v3_v3(private_no[flip_index], no);
+                               private_count[flip_index] += 1;
                        }
                }
-               else {
-                       BKE_pbvh_vertex_iter_begin(ss->pbvh, nodes[n], vd, PBVH_ITER_UNIQUE)
-                       {
-                               const float *co;
-                               const short *no_s;  /* bm_vert only */
+       }
+       else {
+               BKE_pbvh_vertex_iter_begin(ss->pbvh, data->nodes[n], vd, PBVH_ITER_UNIQUE)
+               {
+                       const float *co;
+                       const short *no_s;  /* bm_vert only */
 
-                               if (use_original) {
-                                       if (unode->bm_entry) {
-                                               BM_log_original_vert_data(ss->bm_log, vd.bm_vert, &co, &no_s);
-                                       }
-                                       else {
-                                               co = unode->co[vd.i];
-                                               no_s = unode->no[vd.i];
-                                       }
+                       if (use_original) {
+                               if (unode->bm_entry) {
+                                       BM_log_original_vert_data(ss->bm_log, vd.bm_vert, &co, &no_s);
                                }
                                else {
-                                       co = vd.co;
+                                       co = unode->co[vd.i];
+                                       no_s = unode->no[vd.i];
                                }
+                       }
+                       else {
+                               co = vd.co;
+                       }
 
-                               if (sculpt_brush_test_fast(&test, co)) {
-                                       float no_buf[3];
-                                       const float *no;
-                                       int flip_index;
+                       if (sculpt_brush_test_fast(&test, co)) {
+                               float no_buf[3];
+                               const float *no;
+                               int flip_index;
 
-                                       if (use_original) {
-                                               normal_short_to_float_v3(no_buf, no_s);
+                               if (use_original) {
+                                       normal_short_to_float_v3(no_buf, no_s);
+                                       no = no_buf;
+                               }
+                               else {
+                                       if (vd.no) {
+                                               normal_short_to_float_v3(no_buf, vd.no);
                                                no = no_buf;
                                        }
                                        else {
-                                               if (vd.no) {
-                                                       normal_short_to_float_v3(no_buf, vd.no);
-                                                       no = no_buf;
-                                               }
-                                               else {
-                                                       no = vd.fno;
-                                               }
+                                               no = vd.fno;
                                        }
+                               }
 
-                                       flip_index = (dot_v3v3(ss->cache->view_normal, no) <= 0.0f);
+                               flip_index = (dot_v3v3(ss->cache->view_normal, no) <= 0.0f);
+                               if (data->area_co)
                                        add_v3_v3(private_co[flip_index], co);
-                                       private_count[flip_index] += 1;
-                               }
+                               if (data->area_no)
+                                       add_v3_v3(private_no[flip_index], no);
+                               private_count[flip_index] += 1;
                        }
-                       BKE_pbvh_vertex_iter_end;
                }
+               BKE_pbvh_vertex_iter_end;
+       }
 
-#pragma omp critical
-               {
-                       /* for flatten center */
-                       add_v3_v3(area_co[0], private_co[0]);
-                       add_v3_v3(area_co[1], private_co[1]);
+       BLI_mutex_lock(&data->mutex);
 
-                       /* weights */
-                       count[0] += private_count[0];
-                       count[1] += private_count[1];
-               }
+       /* for flatten center */
+       if (data->area_co) {
+               add_v3_v3(data->area_co[0], private_co[0]);
+               add_v3_v3(data->area_co[1], private_co[1]);
        }
 
+       /* for area normal */
+       if (data->area_no) {
+               add_v3_v3(data->area_no[0], private_no[0]);
+               add_v3_v3(data->area_no[1], private_no[1]);
+       }
+
+       /* weights */
+       data->count[0] += private_count[0];
+       data->count[1] += private_count[1];
+
+       BLI_mutex_unlock(&data->mutex);
+}
+
+static void calc_area_center(
+        Sculpt *sd, Object *ob,
+        PBVHNode **nodes, int totnode,
+        float r_area_co[3])
+{
+       const Brush *brush = BKE_paint_brush(&sd->paint);
+       SculptSession *ss = ob->sculpt;
+       const bool has_bm_orco = ss->bm && sculpt_stroke_is_dynamic_topology(ss, brush);
+       int n;
+
+       /* 0=towards view, 1=flipped */
+       float area_co[2][3] = {{0.0f}};
+
+       int count[2] = {0};
+
+       SculptCalcAreaData data = {
+               .sd = sd, .ob = ob, .nodes = nodes, .totnode = totnode, .has_bm_orco = has_bm_orco,
+               .area_co = area_co, .area_no = NULL, .count = count,
+       };
+       BLI_mutex_init(&data.mutex);
+
+       BLI_task_parallel_range_ex(0, totnode, &data, NULL, 0, calc_area_normal_and_centr_task_cb,
+                                  ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_OMP_LIMIT), false);
+
+       BLI_mutex_end(&data.mutex);
+
        /* for flatten center */
        for (n = 0; n < ARRAY_SIZE(area_co); n++) {
                if (count[n] != 0) {
@@ -925,111 +972,16 @@ static void calc_area_normal(
 
        int count[2] = {0};
 
-#pragma omp parallel for schedule(guided) if ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_OMP_LIMIT)
-       for (n = 0; n < totnode; n++) {
-               PBVHVertexIter vd;
-               SculptBrushTest test;
-               SculptUndoNode *unode;
-               float private_no[2][3] = {{0.0f}};
-               int   private_count[2] = {0};
-               bool use_original;
-
-               unode = sculpt_undo_push_node(ob, nodes[n], SCULPT_UNDO_COORDS);
-               sculpt_brush_test_init(ss, &test);
-
-               use_original = (ss->cache->original && (unode->co || unode->bm_entry));
-
-               /* when the mesh is edited we can't rely on original coords
-                * (original mesh may not even have verts in brush radius) */
-               if (use_original && has_bm_orco) {
-                       float (*orco_coords)[3];
-                       int   (*orco_tris)[3];
-                       int     orco_tris_num;
-                       int i;
-
-                       BKE_pbvh_node_get_bm_orco_data(
-                               nodes[n],
-                               &orco_tris, &orco_tris_num, &orco_coords);
-
-                       for (i = 0; i < orco_tris_num; i++) {
-                               const float *co_tri[3] = {
-                                   orco_coords[orco_tris[i][0]],
-                                   orco_coords[orco_tris[i][1]],
-                                   orco_coords[orco_tris[i][2]],
-                               };
-                               float co[3];
-
-                               closest_on_tri_to_point_v3(co, test.location, UNPACK3(co_tri));
-
-                               if (sculpt_brush_test_fast(&test, co)) {
-                                       float no[3];
-                                       int flip_index;
-
-                                       normal_tri_v3(no, UNPACK3(co_tri));
-
-                                       flip_index = (dot_v3v3(ss->cache->view_normal, no) <= 0.0f);
-                                       add_v3_v3(private_no[flip_index], no);
-                                       private_count[flip_index] += 1;
-                               }
-                       }
-               }
-               else {
-                       BKE_pbvh_vertex_iter_begin(ss->pbvh, nodes[n], vd, PBVH_ITER_UNIQUE)
-                       {
-                               const float *co;
-                               const short *no_s;  /* bm_vert only */
-
-                               if (use_original) {
-                                       if (unode->bm_entry) {
-                                               BM_log_original_vert_data(ss->bm_log, vd.bm_vert, &co, &no_s);
-                                       }
-                                       else {
-                                               co = unode->co[vd.i];
-                                               no_s = unode->no[vd.i];
-                                       }
-                               }
-                               else {
-                                       co = vd.co;
-                               }
-
-                               if (sculpt_brush_test_fast(&test, co)) {
-                                       float no_buf[3];
-                                       const float *no;
-                                       int flip_index;
-
-                                       if (use_original) {
-                                               normal_short_to_float_v3(no_buf, no_s);
-                                               no = no_buf;
-                                       }
-                                       else {
-                                               if (vd.no) {
-                                                       normal_short_to_float_v3(no_buf, vd.no);
-                                                       no = no_buf;
-                                               }
-                                               else {
-                                                       no = vd.fno;
-                                               }
-                                       }
+       SculptCalcAreaData data = {
+               .sd = sd, .ob = ob, .nodes = nodes, .totnode = totnode, .has_bm_orco = has_bm_orco,
+               .area_co = NULL, .area_no = area_no, .count = count,
+       };
+       BLI_mutex_init(&data.mutex);
 
-                                       flip_index = (dot_v3v3(ss->cache->view_normal, no) <= 0.0f);
-                                       add_v3_v3(private_no[flip_index], no);
-                                       private_count[flip_index] += 1;
-                               }
-                       }
-                       BKE_pbvh_vertex_iter_end;
-               }
+       BLI_task_parallel_range_ex(0, totnode, &data, NULL, 0, calc_area_normal_and_centr_task_cb,
+                                  ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_OMP_LIMIT), false);
 
-#pragma omp critical
-               {
-                       /* for area normal */
-                       add_v3_v3(area_no[0], private_no[0]);
-                       add_v3_v3(area_no[1], private_no[1]);
-
-                       /* weights */
-                       count[0] += private_count[0];
-                       count[1] += private_count[1];
-               }
-       }
+       BLI_mutex_end(&data.mutex);
 
        /* for area normal */
        for (n = 0; n < ARRAY_SIZE(area_no); n++) {
@@ -1057,118 +1009,16 @@ static void calc_area_normal_and_center(
 
        int count[2] = {0};
 
-#pragma omp parallel for schedule(guided) if ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_OMP_LIMIT)
-       for (n = 0; n < totnode; n++) {
-               PBVHVertexIter vd;
-               SculptBrushTest test;
-               SculptUndoNode *unode;
-               float private_co[2][3] = {{0.0f}};
-               float private_no[2][3] = {{0.0f}};
-               int   private_count[2] = {0};
-               bool use_original;
-
-               unode = sculpt_undo_push_node(ob, nodes[n], SCULPT_UNDO_COORDS);
-               sculpt_brush_test_init(ss, &test);
-
-               use_original = (ss->cache->original && (unode->co || unode->bm_entry));
-
-               /* when the mesh is edited we can't rely on original coords
-                * (original mesh may not even have verts in brush radius) */
-               if (use_original && has_bm_orco) {
-                       float (*orco_coords)[3];
-                       int   (*orco_tris)[3];
-                       int     orco_tris_num;
-                       int i;
-
-                       BKE_pbvh_node_get_bm_orco_data(
-                               nodes[n],
-                               &orco_tris, &orco_tris_num, &orco_coords);
-
-                       for (i = 0; i < orco_tris_num; i++) {
-                               const float *co_tri[3] = {
-                                   orco_coords[orco_tris[i][0]],
-                                   orco_coords[orco_tris[i][1]],
-                                   orco_coords[orco_tris[i][2]],
-                               };
-                               float co[3];
-
-                               closest_on_tri_to_point_v3(co, test.location, UNPACK3(co_tri));
-
-                               if (sculpt_brush_test_fast(&test, co)) {
-                                       float no[3];
-                                       int flip_index;
-
-                                       normal_tri_v3(no, UNPACK3(co_tri));
-
-                                       flip_index = (dot_v3v3(ss->cache->view_normal, no) <= 0.0f);
-                                       add_v3_v3(private_co[flip_index], co);
-                                       add_v3_v3(private_no[flip_index], no);
-                                       private_count[flip_index] += 1;
-                               }
-                       }
-               }
-               else {
-                       BKE_pbvh_vertex_iter_begin(ss->pbvh, nodes[n], vd, PBVH_ITER_UNIQUE)
-                       {
-                               const float *co;
-                               const short *no_s;  /* bm_vert only */
-
-                               if (use_original) {
-                                       if (unode->bm_entry) {
-                                               BM_log_original_vert_data(ss->bm_log, vd.bm_vert, &co, &no_s);
-                                       }
-                                       else {
-                                               co = unode->co[vd.i];
-                                               no_s = unode->no[vd.i];
-                                       }
-                               }
-                               else {
-                                       co = vd.co;
-                               }
-
-                               if (sculpt_brush_test_fast(&test, co)) {
-                                       float no_buf[3];
-                                       const float *no;
-                                       int flip_index;
-
-                                       if (use_original) {
-                                               normal_short_to_float_v3(no_buf, no_s);
-                                               no = no_buf;
-                                       }
-                                       else {
-                                               if (vd.no) {
-                                                       normal_short_to_float_v3(no_buf, vd.no);
-                                                       no = no_buf;
-                                               }
-                                               else {
-                                                       no = vd.fno;
-                                               }
-                                       }
-
-                                       flip_index = (dot_v3v3(ss->cache->view_normal, no) <= 0.0f);
-                                       add_v3_v3(private_co[flip_index], co);
-                                       add_v3_v3(private_no[flip_index], no);
-                                       private_count[flip_index] += 1;
-                               }
-                       }
-                       BKE_pbvh_vertex_iter_end;
-               }
-
-#pragma omp critical
-               {
-                       /* for flatten center */
-                       add_v3_v3(area_co[0], private_co[0]);
-                       add_v3_v3(area_co[1], private_co[1]);
+       SculptCalcAreaData data = {
+               .sd = sd, .ob = ob, .nodes = nodes, .totnode = totnode, .has_bm_orco = has_bm_orco,
+               .area_co = area_co, .area_no = area_no, .count = count,
+       };
+       BLI_mutex_init(&data.mutex);
 
-                       /* for area normal */
-                       add_v3_v3(area_no[0], private_no[0]);
-                       add_v3_v3(area_no[1], private_no[1]);
+       BLI_task_parallel_range_ex(0, totnode, &data, NULL, 0, calc_area_normal_and_centr_task_cb,
+                                  ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_OMP_LIMIT), false);
 
-                       /* weights */
-                       count[0] += private_count[0];
-                       count[1] += private_count[1];
-               }
-       }
+       BLI_mutex_end(&data.mutex);
 
        /* for flatten center */
        for (n = 0; n < ARRAY_SIZE(area_co); n++) {