Optimization and threading fix for shapekeys weights calculation
authorSergey Sharybin <sergey.vfx@gmail.com>
Mon, 19 Aug 2013 10:36:39 +0000 (10:36 +0000)
committerSergey Sharybin <sergey.vfx@gmail.com>
Mon, 19 Aug 2013 10:36:39 +0000 (10:36 +0000)
This commit fixes two different issues, which were caused by
how weights are being calculated for relative shapekeys.

Weights for key block used to saved in KeyBlock DNA structure,
which lead to situations when different objects could start
writing to the same weights array if they're sharing the same
key datablock.

Solved this in a way so weights are never stored in KeyBlock
and being passed to shapekeys routines as an array of pointers.
This way weights are still computed run-time (meaning they're
calculated before shapekey evaluation and freed afterwards).

This required some changes to GameEngine as well, to make it
never cache weights in the key blocks.

Another aspect of this commit makes it so weight for a given
vertex group is only computed once. So if multiple key blocks
are using the same influence vertex group, they'll share the
same exact weights array. This gave around 1.7x speedup in
test chinchilla file which is close enough to if we've been
caching weights permanently in DNA (test machine is dual-code
4 threads laptop, speedup measured in depsgraph_mt branch,
trunk might be not so much high speedup).

Some further speed is optimization possible, but it could be
done later as well.

Thanks Brecht for idea of how the things might be solved in
really clear way.

--
svn merge -r58786:58787  ^/branches/soc-2013-depsgraph_mt

source/blender/blenkernel/BKE_key.h
source/blender/blenkernel/intern/key.c
source/blender/editors/mesh/meshtools.c
source/blender/makesdna/DNA_key_types.h
source/gameengine/Converter/BL_ShapeDeformer.cpp
source/gameengine/Rasterizer/RAS_MeshObject.cpp
source/gameengine/Rasterizer/RAS_MeshObject.h

index de60d316426936b5aaa0f314cd84728588398f72..19264feaf489f9d7e58c20bd2092b3241554f004 100644 (file)
@@ -41,6 +41,7 @@ struct Object;
 struct Scene;
 struct Lattice;
 struct Mesh;
+struct WeightsArrayCache;
 
 /* Kernel prototypes */
 #ifdef __cplusplus
@@ -73,8 +74,17 @@ struct KeyBlock *BKE_keyblock_from_key(struct Key *key, int index);
 struct KeyBlock *BKE_keyblock_find_name(struct Key *key, const char name[]);
 void             BKE_keyblock_copy_settings(struct KeyBlock *kb_dst, const struct KeyBlock *kb_src);
 char            *BKE_keyblock_curval_rnapath_get(struct Key *key, struct KeyBlock *kb);
+
 // needed for the GE
-void BKE_key_evaluate_relative(const int start, int end, const int tot, char *basispoin, struct Key *key, struct KeyBlock *actkb, const int mode);
+typedef struct WeightsArrayCache {
+       int num_defgroup_weights;
+       float **defgroup_weights;
+} WeightsArrayCache;
+
+float **BKE_keyblock_get_per_block_weights(struct Object *ob, struct Key *key, struct WeightsArrayCache *cache);
+void BKE_keyblock_free_per_block_weights(struct Key *key, float **per_keyblock_weights, struct WeightsArrayCache *cache);
+void BKE_key_evaluate_relative(const int start, int end, const int tot, char *basispoin, struct Key *key, struct KeyBlock *actkb,
+                               float **per_keyblock_weights, const int mode);
 
 /* conversion functions */
 void    BKE_key_convert_to_mesh(struct KeyBlock *kb, struct Mesh *me);
index d2d2cb1c2d002705e6812c31f35d72493e029825..aaac17ac1edb34829e0f73869b3923bd1a2a7f07 100644 (file)
@@ -734,12 +734,13 @@ static void cp_cu_key(Curve *cu, Key *key, KeyBlock *actkb, KeyBlock *kb, const
        }
 }
 
-void BKE_key_evaluate_relative(const int start, int end, const int tot, char *basispoin, Key *key, KeyBlock *actkb, const int mode)
+void BKE_key_evaluate_relative(const int start, int end, const int tot, char *basispoin, Key *key, KeyBlock *actkb,
+                               float **per_keyblock_weights, const int mode)
 {
        KeyBlock *kb;
        int *ofsp, ofs[3], elemsize, b;
        char *cp, *poin, *reffrom, *from, elemstr[8];
-       int poinsize;
+       int poinsize, keyblock_index;
 
        /* currently always 0, in future key_pointer_size may assign */
        ofs[1] = 0;
@@ -763,14 +764,14 @@ void BKE_key_evaluate_relative(const int start, int end, const int tot, char *ba
        
        /* step 2: do it */
        
-       for (kb = key->block.first; kb; kb = kb->next) {
+       for (kb = key->block.first, keyblock_index = 0; kb; kb = kb->next, keyblock_index++) {
                if (kb != key->refkey) {
                        float icuval = kb->curval;
                        
                        /* only with value, and no difference allowed */
                        if (!(kb->flag & KEYBLOCK_MUTE) && icuval != 0.0f && kb->totelem == tot) {
                                KeyBlock *refb;
-                               float weight, *weights = kb->weights;
+                               float weight, *weights = per_keyblock_weights ? per_keyblock_weights[keyblock_index] : NULL;
                                char *freefrom = NULL, *freereffrom = NULL;
 
                                /* reference now can be any block */
@@ -1058,7 +1059,7 @@ static void do_key(const int start, int end, const int tot, char *poin, Key *key
        if (freek4) MEM_freeN(freek4);
 }
 
-static float *get_weights_array(Object *ob, char *vgroup)
+static float *get_weights_array(Object *ob, char *vgroup, WeightsArrayCache *cache)
 {
        MDeformVert *dvert = NULL;
        BMEditMesh *em = NULL;
@@ -1091,7 +1092,21 @@ static float *get_weights_array(Object *ob, char *vgroup)
        if (defgrp_index != -1) {
                float *weights;
                int i;
-               
+
+               if (cache) {
+                       if (cache->defgroup_weights == NULL) {
+                               int num_defgroup = BLI_countlist(&ob->defbase);
+                               cache->defgroup_weights =
+                                   MEM_callocN(sizeof(*cache->defgroup_weights) * num_defgroup,
+                                               "cached defgroup weights");
+                               cache->num_defgroup_weights = num_defgroup;
+                       }
+
+                       if (cache->defgroup_weights[defgrp_index]) {
+                               return cache->defgroup_weights[defgrp_index];
+                       }
+               }
+
                weights = MEM_mallocN(totvert * sizeof(float), "weights");
 
                if (em) {
@@ -1107,11 +1122,61 @@ static float *get_weights_array(Object *ob, char *vgroup)
                        }
                }
 
+               if (cache) {
+                       cache->defgroup_weights[defgrp_index] = weights;
+               }
+
                return weights;
        }
        return NULL;
 }
 
+float **BKE_keyblock_get_per_block_weights(Object *ob, Key *key, WeightsArrayCache *cache)
+{
+       KeyBlock *keyblock;
+       float **per_keyblock_weights;
+       int keyblock_index;
+
+       per_keyblock_weights =
+               MEM_mallocN(sizeof(*per_keyblock_weights) * key->totkey,
+                           "per keyblock weights");
+
+       for (keyblock = key->block.first, keyblock_index = 0;
+            keyblock;
+            keyblock = keyblock->next, keyblock_index++)
+       {
+               per_keyblock_weights[keyblock_index] = get_weights_array(ob, keyblock->vgroup, cache);
+       }
+
+       return per_keyblock_weights;
+}
+
+void BKE_keyblock_free_per_block_weights(Key *key, float **per_keyblock_weights, WeightsArrayCache *cache)
+{
+       int a;
+
+       if (cache) {
+               if (cache->num_defgroup_weights) {
+                       for (a = 0; a < cache->num_defgroup_weights; a++) {
+                               if (cache->defgroup_weights[a]) {
+                                       MEM_freeN(cache->defgroup_weights[a]);
+                               }
+                       }
+                       MEM_freeN(cache->defgroup_weights);
+               }
+               cache->defgroup_weights = NULL;
+       }
+       else {
+               for (a = 0; a < key->totkey; a++) {
+                       if (per_keyblock_weights[a]) {
+                               MEM_freeN(per_keyblock_weights[a]);
+                       }
+               }
+       }
+
+       MEM_freeN(per_keyblock_weights);
+}
+
 static void do_mesh_key(Scene *scene, Object *ob, Key *key, char *out, const int tot)
 {
        KeyBlock *k[4], *actkb = BKE_keyblock_from_object(ob);
@@ -1144,17 +1209,11 @@ static void do_mesh_key(Scene *scene, Object *ob, Key *key, char *out, const int
        }
        else {
                if (key->type == KEY_RELATIVE) {
-                       KeyBlock *kb;
-                       for (kb = key->block.first; kb; kb = kb->next) {
-                               kb->weights = get_weights_array(ob, kb->vgroup);
-                       }
-
-                       BKE_key_evaluate_relative(0, tot, tot, (char *)out, key, actkb, KEY_MODE_DUMMY);
-                       
-                       for (kb = key->block.first; kb; kb = kb->next) {
-                               if (kb->weights) MEM_freeN(kb->weights);
-                               kb->weights = NULL;
-                       }
+                       WeightsArrayCache cache = {0, NULL};
+                       float **per_keyblock_weights;
+                       per_keyblock_weights = BKE_keyblock_get_per_block_weights(ob, key, &cache);
+                       BKE_key_evaluate_relative(0, tot, tot, (char *)out, key, actkb, per_keyblock_weights, KEY_MODE_DUMMY);
+                       BKE_keyblock_free_per_block_weights(key, per_keyblock_weights, &cache);
                }
                else {
                        const float ctime_scaled = key->ctime / 100.0f;
@@ -1197,11 +1256,11 @@ static void do_rel_cu_key(Curve *cu, Key *key, KeyBlock *actkb, char *out, const
        for (a = 0, nu = cu->nurb.first; nu; nu = nu->next, a += step) {
                if (nu->bp) {
                        step = nu->pntsu * nu->pntsv;
-                       BKE_key_evaluate_relative(a, a + step, tot, out, key, actkb, KEY_MODE_BPOINT);
+                       BKE_key_evaluate_relative(a, a + step, tot, out, key, actkb, NULL, KEY_MODE_BPOINT);
                }
                else if (nu->bezt) {
                        step = 3 * nu->pntsu;
-                       BKE_key_evaluate_relative(a, a + step, tot, out, key, actkb, KEY_MODE_BEZTRIPLE);
+                       BKE_key_evaluate_relative(a, a + step, tot, out, key, actkb, NULL, KEY_MODE_BEZTRIPLE);
                }
                else {
                        step = 0;
@@ -1315,17 +1374,10 @@ static void do_latt_key(Scene *scene, Object *ob, Key *key, char *out, const int
        }
        else {
                if (key->type == KEY_RELATIVE) {
-                       KeyBlock *kb;
-                       
-                       for (kb = key->block.first; kb; kb = kb->next)
-                               kb->weights = get_weights_array(ob, kb->vgroup);
-                       
-                       BKE_key_evaluate_relative(0, tot, tot, out, key, actkb, KEY_MODE_DUMMY);
-                       
-                       for (kb = key->block.first; kb; kb = kb->next) {
-                               if (kb->weights) MEM_freeN(kb->weights);
-                               kb->weights = NULL;
-                       }
+                       float **per_keyblock_weights;
+                       per_keyblock_weights = BKE_keyblock_get_per_block_weights(ob, key, NULL);
+                       BKE_key_evaluate_relative(0, tot, tot, (char *)out, key, actkb, per_keyblock_weights, KEY_MODE_DUMMY);
+                       BKE_keyblock_free_per_block_weights(key, per_keyblock_weights, NULL);
                }
                else {
                        const float ctime_scaled = key->ctime / 100.0f;
@@ -1415,7 +1467,7 @@ float *BKE_key_evaluate_object_ex(Scene *scene, Object *ob, int *r_totelem,
                }
                
                if (OB_TYPE_SUPPORT_VGROUP(ob->type)) {
-                       float *weights = get_weights_array(ob, kb->vgroup);
+                       float *weights = get_weights_array(ob, kb->vgroup, NULL);
 
                        cp_key(0, tot, tot, out, key, actkb, kb, weights, 0);
 
index d012a8ac656d4447d1721e3c40c040c8fb9192bf..4ae48b99bb8d487922174de154f53f5c66f9a7db 100644 (file)
@@ -178,7 +178,6 @@ int join_mesh_exec(bContext *C, wmOperator *op)
                        if (kb->data) MEM_freeN(kb->data);
                        kb->data = MEM_callocN(sizeof(float) * 3 * totvert, "join_shapekey");
                        kb->totelem = totvert;
-                       kb->weights = NULL;
                }
        }
        else if (haskey) {
index 0a09a82b2bb7ceefaa8eb84aaba2f92e1e7ea7d6..f5ce3c8d8c1decded8392418acaaadc0d057f156 100644 (file)
@@ -60,7 +60,6 @@ typedef struct KeyBlock {
        int uid;           /* for meshes only, match the unique number with the customdata layer */
        
        void  *data;       /* array of shape key values, size is (Key->elemsize * KeyBlock->totelem) */
-       float *weights;    /* store an aligned array of weights from 'vgroup' */
        char   name[64];   /* MAX_NAME (unique name, user assigned) */
        char   vgroup[64]; /* MAX_VGROUP_NAME (optional vertex group), array gets allocated into 'weights' when set */
 
index 59cf10d326a422a8bca83d87858e8307732fd536..5aec3c9e965b7ed5949abab433d03a94943d471d 100644 (file)
@@ -159,16 +159,20 @@ bool BL_ShapeDeformer::Update(void)
                /* the key coefficient have been set already, we just need to blend the keys */
                Object* blendobj = m_gameobj->GetBlendObject();
                
-               // make sure the vertex weight cache is in line with this object
-               m_pMeshObject->CheckWeightCache(blendobj);
-
                /* we will blend the key directly in m_transverts array: it is used by armature as the start position */
                /* m_key can be NULL in case of Modifier deformer */
                if (m_key) {
+                       WeightsArrayCache cache = {0, NULL};
+                       float **per_keyblock_weights;
+
                        /* store verts locally */
                        VerifyStorage();
 
-                       BKE_key_evaluate_relative(0, m_bmesh->totvert, m_bmesh->totvert, (char *)(float *)m_transverts, m_key, NULL, 0); /* last arg is ignored */
+                       per_keyblock_weights = BKE_keyblock_get_per_block_weights(blendobj, m_key, &cache);
+                       BKE_key_evaluate_relative(0, m_bmesh->totvert, m_bmesh->totvert, (char *)(float *)m_transverts,
+                                                 m_key, NULL, per_keyblock_weights, 0); /* last arg is ignored */
+                       BKE_keyblock_free_per_block_weights(m_key, per_keyblock_weights, &cache);
+
                        m_bDynamic = true;
                }
 
index 2af71c5efa997dc72c9fd172854050ea394d6d0b..92f134c17020e3ee4942752bf451d3c31e0bdb55 100644 (file)
@@ -125,17 +125,6 @@ RAS_MeshObject::~RAS_MeshObject()
 {
        vector<RAS_Polygon*>::iterator it;
 
-       if (m_mesh && m_mesh->key) 
-       {
-               KeyBlock *kb;
-               // remove the weight cache to avoid memory leak 
-               for (kb = (KeyBlock *)m_mesh->key->block.first; kb; kb = (KeyBlock *)kb->next) {
-                       if (kb->weights) 
-                               MEM_freeN(kb->weights);
-                       kb->weights= NULL;
-               }
-       }
-
        for (it=m_Polygons.begin(); it!=m_Polygons.end(); it++)
                delete (*it);
 
@@ -571,49 +560,3 @@ static int get_def_index(Object* ob, const char* vgroup)
 
        return -1;
 }
-
-void RAS_MeshObject::CheckWeightCache(Object* obj)
-{
-       KeyBlock *kb;
-       int kbindex, defindex;
-       MDeformVert *dv= NULL;
-       int totvert, i;
-       float *weights;
-
-       if (!m_mesh->key)
-               return;
-
-       for (kbindex = 0, kb = (KeyBlock *)m_mesh->key->block.first; kb; kb = kb->next, kbindex++)
-       {
-               // first check the cases where the weight must be cleared
-               if (kb->vgroup[0] == 0 ||
-                       m_mesh->dvert == NULL ||
-                       (defindex = get_def_index(obj, kb->vgroup)) == -1) {
-                       if (kb->weights) {
-                               MEM_freeN(kb->weights);
-                               kb->weights = NULL;
-                       }
-                       m_cacheWeightIndex[kbindex] = -1;
-               } else if (m_cacheWeightIndex[kbindex] != defindex) {
-                       // a weight array is required but the cache is not matching
-                       if (kb->weights) {
-                               MEM_freeN(kb->weights);
-                               kb->weights = NULL;
-                       }
-
-                       dv= m_mesh->dvert;
-                       totvert= m_mesh->totvert;
-               
-                       weights= (float*)MEM_mallocN(totvert*sizeof(float), "weights");
-               
-                       for (i=0; i < totvert; i++, dv++) {
-                               weights[i] = defvert_find_weight(dv, defindex);
-                       }
-
-                       kb->weights = weights;
-                       m_cacheWeightIndex[kbindex] = defindex;
-               }
-       }
-}
-
-
index d77d048302499fd9a0c63904b84d695c5015bb87..e5ae78d006e09cf8d77dc536abaa82dcea8272dd 100644 (file)
@@ -83,9 +83,6 @@ public:
        virtual ~RAS_MeshObject();
 
 
-       // for shape keys, 
-       void CheckWeightCache(struct Object* obj);
-       
        /* materials */
        int                                     NumMaterials();
        const STR_String&       GetMaterialName(unsigned int matid);