Meshdeform: Use SSE intrinsics in meshdeform_dynamic_bind()
authorSergey Sharybin <sergey.vfx@gmail.com>
Tue, 21 Oct 2014 09:37:29 +0000 (11:37 +0200)
committerSergey Sharybin <sergey.vfx@gmail.com>
Wed, 22 Oct 2014 10:21:11 +0000 (12:21 +0200)
Quite straightforward change, don't think some extra explanation is needed.

This gives about 15% speedup of the modifier evaluation on my laptop.

Reviewers: campbellbarton

Differential Revision: https://developer.blender.org/D836

source/blender/modifiers/intern/MOD_meshdeform.c

index bd6ce26103cf365557ada64b3d20384e1223cd60..c3cbc0761e986ae804174981590adc87a78c54ef 100644 (file)
@@ -54,6 +54,9 @@
 
 #include "MOD_util.h"
 
+#ifdef __SSE2__
+#  include <emmintrin.h>
+#endif
 
 static void initData(ModifierData *md)
 {
@@ -134,11 +137,15 @@ static float meshdeform_dynamic_bind(MeshDeformModifierData *mmd, float (*dco)[3
 {
        MDefCell *cell;
        MDefInfluence *inf;
-       float gridvec[3], dvec[3], ivec[3], co[3], wx, wy, wz;
+       float gridvec[3], dvec[3], ivec[3], wx, wy, wz;
        float weight, cageweight, totweight, *cageco;
        int i, j, a, x, y, z, size;
+#ifdef __SSE2__
+       __m128 co = _mm_setzero_ps();
+#else
+       float co[3] = {0.0f, 0.0f, 0.0f};
+#endif
 
-       zero_v3(co);
        totweight = 0.0f;
        size = mmd->dyngridsize;
 
@@ -170,14 +177,30 @@ static float meshdeform_dynamic_bind(MeshDeformModifierData *mmd, float (*dco)[3
                for (j = 0; j < cell->totinfluence; j++, inf++) {
                        cageco = dco[inf->vertex];
                        cageweight = weight * inf->weight;
+#ifdef __SSE2__
+                       {
+                               __m128 cageweight_r = _mm_set1_ps(cageweight);
+                               /* This will load one extra element, this is ok because
+                                * we ignore that part of reigister anyway.
+                                */
+                               __m128 cageco_r = _mm_loadu_ps(cageco);
+                               co = _mm_add_ps(co,
+                                               _mm_mul_ps(cageco_r, cageweight_r));
+                       }
+#else
                        co[0] += cageweight * cageco[0];
                        co[1] += cageweight * cageco[1];
                        co[2] += cageweight * cageco[2];
+#endif
                        totweight += cageweight;
                }
        }
 
+#ifdef __SSE2__
+       copy_v3_v3(vec, (float*)&co);
+#else
        copy_v3_v3(vec, co);
+#endif
 
        return totweight;
 }
@@ -344,7 +367,11 @@ static void meshdeformModifier_do(
        cagedm->getVertCos(cagedm, cagecos);
        bindcagecos = (float(*)[3])mmd->bindcagecos;
 
-       dco = MEM_callocN(sizeof(*dco) * totcagevert, "MDefDco");
+       /* We allocate 1 element extra to make it possible to
+        * load the values to SSE registers, which are float4.
+        */
+       dco = MEM_callocN(sizeof(*dco) * (totcagevert + 1), "MDefDco");
+       zero_v3(dco[totcagevert]);
        for (a = 0; a < totcagevert; a++) {
                /* get cage vertex in world space with binding transform */
                copy_v3_v3(co, cagecos[a]);