Sculpt/dyntopo: Make the omp threads configurable to overcome performance issues
authorJens Verwiebe <info@jensverwiebe.de>
Mon, 31 Mar 2014 11:51:40 +0000 (13:51 +0200)
committerJens Verwiebe <info@jensverwiebe.de>
Mon, 31 Mar 2014 11:51:49 +0000 (13:51 +0200)
- autodetect optimal default, which typically avoids HT threads
- can store setting in .blend per scene
- this does not touch general omp max threads, due i found other areas where the calculations are fitting for huge corecount
- Intel notes, some of the older generation processors with HyperThreading would not provide significant performance boost for FPU intensive applications. On those systems you might want to set OMP_NUM_THREADS = total number of cores (not total number of hardware theads).

release/scripts/startup/bl_ui/space_view3d_toolbar.py
source/blender/blenkernel/BKE_scene.h
source/blender/blenkernel/intern/scene.c
source/blender/blenlib/BLI_threads.h
source/blender/blenlib/intern/threads.c
source/blender/editors/sculpt_paint/sculpt.c
source/blender/makesdna/DNA_scene_types.h
source/blender/makesrna/intern/rna_scene.c

index 99e3ffb490b8e07ddd76577f4badd7c823691e7a..70ebf6aac79c1582d164ec73837f0c397042cc1a 100644 (file)
@@ -1283,7 +1283,8 @@ class VIEW3D_PT_sculpt_options(Panel, View3DPaintPanel):
 
     def draw(self, context):
         layout = self.layout
-
+        scene = context.scene
+               
         toolsettings = context.tool_settings
         sculpt = toolsettings.sculpt
         capabilities = sculpt.brush.sculpt_capabilities
@@ -1293,6 +1294,14 @@ class VIEW3D_PT_sculpt_options(Panel, View3DPaintPanel):
         col.label(text="Gravity:")
         col.prop(sculpt, "gravity", slider=True, text="Factor")
         col.prop(sculpt, "gravity_object")
+               
+        col.separator()
+        col.label(text="OpenMP Threads:")
+        col.row(align=True).prop(scene, "omp_mode", expand=True)
+        sub = col.column(align=True)
+        sub.enabled = scene.omp_mode == 'MANUAL'
+        sub.prop(scene, "omp_num_threads")
+        col.separator()
 
         layout.prop(sculpt, "use_threaded", text="Threaded Sculpt")
         layout.prop(sculpt, "show_low_resolution")
index a10a3f3f59fca4438329da6adfac71d5fce704fa..972db36d5a6c4b973b95f486c91f62e18e7e4d25 100644 (file)
@@ -137,6 +137,8 @@ bool BKE_scene_check_rigidbody_active(const struct Scene *scene);
 int BKE_scene_num_threads(const struct Scene *scene);
 int BKE_render_num_threads(const struct RenderData *r);
 
+int BKE_scene_num_omp_threads(const struct Scene *scene);
+void BKE_scene_omp_threads_update(const struct Scene *scene);
 #ifdef __cplusplus
 }
 #endif
index 28cc4305da84fc02eff3d33b9d72a3911ade3290..02bc1fcb699a0f98fdba7972861db2f6700b781c 100644 (file)
@@ -638,6 +638,9 @@ Scene *BKE_scene_add(Main *bmain, const char *name)
 
        sce->gm.exitkey = 218; // Blender key code for ESC
 
+       sce->omp_mode = SCE_OMP_AUTO;
+       sce->omp_num_threads = 1;
+
        sound_create_scene(sce);
 
        /* color management */
@@ -1868,3 +1871,10 @@ int BKE_scene_num_threads(const Scene *scene)
        return BKE_render_num_threads(&scene->r);
 }
 
+int BKE_scene_num_omp_threads(const struct Scene *scene)
+{
+       if (scene->omp_mode == SCE_OMP_AUTO)
+               return BLI_omp_thread_count();
+       else
+               return scene->omp_num_threads;
+}
index 62eadb8a8b559d00b6dc64ad3ae331cb7cd8c968..b522d95ddaec99a93ea7e6b8b127962d4acd9dc6 100644 (file)
@@ -75,6 +75,8 @@ int     BLI_system_thread_count(void); /* gets the number of threads the system
 void    BLI_system_num_threads_override_set(int num);
 int     BLI_system_num_threads_override_get(void);
 
+int     BLI_omp_thread_count(void); /* gets the number of openmp threads the system can make use of */
+       
 /* Global Mutex Locks
  * 
  * One custom lock available now. can be extended. */
index ded2fd7e06d71abde0063444577107a8787ccfe4..78752fde608e15aba3cb4d4675a813ca860899b2 100644 (file)
 #  include <sys/time.h>
 #endif
 
-#if defined(__APPLE__) && defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && !defined(__clang__)
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#if defined(__APPLE__)
+#if defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && !defined(__clang__)
 #  define USE_APPLE_OMP_FIX
 #endif
 
+/* how many cores not counting HT aka pysical cores */
+static int system_physical_thread_count(void)
+{
+       int ptcount;
+       size_t ptcount_len = sizeof(ptcount);
+       sysctlbyname("hw.physicalcpu", &ptcount, &ptcount_len, NULL, 0);
+       return ptcount;
+}
+#endif // __APPLE__
+
 #ifdef USE_APPLE_OMP_FIX
 /* ************** libgomp (Apple gcc 4.2.1) TLS bug workaround *************** */
 extern pthread_key_t gomp_tls_key;
@@ -335,6 +350,22 @@ void BLI_end_threads(ListBase *threadbase)
 
 /* System Information */
 
+/* gets the number of openmp threads the system can make use of */
+int BLI_omp_thread_count(void)
+{
+       int t;
+#ifdef _OPENMP
+#ifdef __APPLE__
+       t = system_physical_thread_count();
+#else
+       t = omp_get_num_procs();
+#endif
+#else
+       t = 1;
+#endif
+       return t;
+}
+
 /* how many threads are native on this system? */
 int BLI_system_thread_count(void)
 {
index 8b65d2c9432784fdbda11f61bdecc2c5dfd972cf..c04f8439fe371572152bb196d9d81321c867924d 100644 (file)
@@ -67,6 +67,7 @@
 #include "BKE_multires.h"
 #include "BKE_paint.h"
 #include "BKE_report.h"
+#include "BKE_scene.h"
 #include "BKE_lattice.h" /* for armature_deform_verts */
 #include "BKE_node.h"
 #include "BKE_object.h"
@@ -1541,10 +1542,10 @@ static void do_multires_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *no
 
        grid_hidden = BKE_pbvh_grid_hidden(ss->pbvh);
 
-       thread_num = 0;
 #ifdef _OPENMP
-       if (sd->flags & SCULPT_USE_OPENMP)
-               thread_num = omp_get_thread_num();
+       thread_num = omp_get_thread_num();
+#else
+       thread_num = 0;
 #endif
        tmpgrid_co = ss->cache->tmpgrid_co[thread_num];
        tmprow_co = ss->cache->tmprow_co[thread_num];
@@ -3769,7 +3770,7 @@ static void sculpt_init_mirror_clipping(Object *ob, SculptSession *ss)
        }
 }
 
-static void sculpt_omp_start(Sculpt *sd, SculptSession *ss)
+static void sculpt_omp_start(Scene *scene, Sculpt *sd, SculptSession *ss)
 {
        StrokeCache *cache = ss->cache;
 
@@ -3779,15 +3780,17 @@ static void sculpt_omp_start(Sculpt *sd, SculptSession *ss)
         * Justification: Empirically I've found that two threads per
         * processor gives higher throughput. */
        if (sd->flags & SCULPT_USE_OPENMP) {
-               cache->num_threads = omp_get_num_procs();
+               cache->num_threads = BKE_scene_num_omp_threads(scene);
        }
        else {
                cache->num_threads = 1;
        }
+       omp_set_num_threads(cache->num_threads);
 #else
        (void)sd;
        cache->num_threads = 1;
 #endif
+//     printf("Sculpt omp threadcount: %d\n", cache->num_threads);
        if (ss->multires) {
                int i, gridsize, array_mem_size;
                BKE_pbvh_node_get_grids(ss->pbvh, NULL, NULL, NULL, NULL,
@@ -4002,7 +4005,7 @@ static void sculpt_update_cache_invariants(bContext *C, Sculpt *sd, SculptSessio
        cache->previous_vertex_rotation = 0;
        cache->init_dir_set = false;
 
-       sculpt_omp_start(sd, ss);
+       sculpt_omp_start(scene, sd, ss);
 }
 
 static void sculpt_update_brush_delta(UnifiedPaintSettings *ups, Object *ob, Brush *brush)
@@ -4626,6 +4629,12 @@ static void sculpt_stroke_done(const bContext *C, struct PaintStroke *UNUSED(str
                WM_event_add_notifier(C, NC_OBJECT | ND_DRAW, ob);
        }
 
+#ifdef _OPENMP
+       if (!(sd->flags & SCULPT_USE_OPENMP))
+               omp_set_num_threads(BLI_system_thread_count());
+//             printf("Reseted to omp threadcount: %d\n", BLI_system_thread_count());
+#endif
+
        sculpt_brush_exit_tex(sd);
 }
 
index b9621b4753c47f4df8fbe2f42d1b873ac4adb545..cc16ccd201ddfa2e8f16a3e3bace8a95b51b0ebc 100644 (file)
@@ -1224,6 +1224,10 @@ typedef struct Scene {
        
        /* RigidBody simulation world+settings */
        struct RigidBodyWorld *rigidbody_world;
+
+       /* Openmp Global Settings */
+       int omp_num_threads;
+       int omp_mode;
 } Scene;
 
 
@@ -1769,6 +1773,10 @@ typedef enum SculptFlags {
 #define        USER_UNIT_OPT_SPLIT             1
 #define USER_UNIT_ROT_RADIANS  2
 
+/* OpenMP settings */
+#define SCE_OMP_AUTO 0
+#define SCE_OMP_MANUAL 1
+
 #ifdef __cplusplus
 }
 #endif
index 0c70e3320530f29680b0ffdb968f8c544364fffa..d1b04bdc1a98ebe3e7141ef292ff65dbc67d2052 100644 (file)
@@ -43,6 +43,7 @@
 #include "BKE_freestyle.h"
 #include "BKE_editmesh.h"
 #include "BKE_paint.h"
+#include "BKE_scene.h"
 
 #include "RNA_define.h"
 #include "RNA_enum_types.h"
@@ -680,6 +681,17 @@ static char *rna_RenderSettings_path(PointerRNA *UNUSED(ptr))
        return BLI_sprintfN("render");
 }
 
+static void rna_omp_threads_update(Main *UNUSED(bmain), Scene *scene, PointerRNA *UNUSED(ptr))
+{
+       BKE_scene_omp_threads_update(scene);
+}
+
+static int rna_omp_threads_get(PointerRNA *ptr)
+{
+       Scene *scene = (Scene *)ptr->data;
+       return BKE_scene_num_omp_threads(scene);
+}
+
 static int rna_RenderSettings_threads_get(PointerRNA *ptr)
 {
        RenderData *rd = (RenderData *)ptr->data;
@@ -5088,6 +5100,12 @@ void RNA_def_scene(BlenderRNA *brna)
                {0, NULL, 0, NULL, NULL}
        };
 
+       static EnumPropertyItem omp_threads_mode_items[] = {
+               {SCE_OMP_AUTO, "AUTO", 0, "Auto-detect", "Automatically determine the number of threads, based on CPUs"},
+               {SCE_OMP_MANUAL, "MANUAL", 0, "Manual", "Manually determine the number of threads"},
+               {0, NULL, 0, NULL, NULL}
+       };
+
        /* Struct definition */
        srna = RNA_def_struct(brna, "Scene", "ID");
        RNA_def_struct_ui_text(srna, "Scene",
@@ -5450,6 +5468,17 @@ void RNA_def_scene(BlenderRNA *brna)
        RNA_def_property_struct_type(prop, "ColorManagedSequencerColorspaceSettings");
        RNA_def_property_ui_text(prop, "Sequencer Color Space Settings", "Settings of color space sequencer is working in");
 
+       prop = RNA_def_property(srna, "omp_num_threads", PROP_INT, PROP_NONE);
+       RNA_def_property_range(prop, 1, BLENDER_MAX_THREADS);
+       RNA_def_property_int_funcs(prop, "rna_omp_threads_get", NULL, NULL);
+       RNA_def_property_ui_text(prop, "OpenMP Threads",
+                                                        "Number of CPU threads to use simultaneously for openmp"
+                                                        "(for multi-core/CPU systems)");
+
+       prop = RNA_def_property(srna, "omp_mode", PROP_ENUM, PROP_NONE);
+       RNA_def_property_enum_items(prop, omp_threads_mode_items);
+       RNA_def_property_ui_text(prop, "OpenMP Mode", "Determine the amount of openmp threads used");
+
        /* Nestled Data  */
        /* *** Non-Animated *** */
        RNA_define_animate_sdna(false);