Fixed numerical issues, hardened it again.
authorDaniel Genrich <daniel.genrich@gmx.net>
Thu, 22 Nov 2007 17:02:37 +0000 (17:02 +0000)
committerDaniel Genrich <daniel.genrich@gmx.net>
Thu, 22 Nov 2007 17:02:37 +0000 (17:02 +0000)
source/blender/blenkernel/BKE_cloth.h
source/blender/blenkernel/intern/cloth.c
source/blender/blenkernel/intern/implicit.c

index f3f566d283283d23359c76749ac43936654daf8c..38cd54085f5f3699d2eebae98b21d7ff1009f035 100644 (file)
@@ -72,8 +72,8 @@ typedef struct ClothSpring {
        int     matrix_index;   /* needed for implicit solver (fast lookup) */
        int     type;           /* types defined in BKE_cloth.h ("springType") */
        int     flags;          /* defined in BKE_cloth.h, e.g. deactivated due to tearing */
-       float dfdx[3][4];
-       float dfdv[3][4];
+       float dfdx[3][3];
+       float dfdv[3][3];
        float f[3];
 } ClothSpring;
 
@@ -91,13 +91,13 @@ typedef struct Cloth {
        unsigned int            numothersprings;
        unsigned int            numspringssave;
        unsigned int            old_solver_type;
-       float                   (*x)[4]; /* The current position of all vertices.*/
-       float                   (*xold)[4]; /* The previous position of all vertices.*/
-       float                   (*current_x)[4]; /* The TEMPORARY current position of all vertices.*/
-       float                   (*current_xold)[4]; /* The TEMPORARY previous position of all vertices.*/
+       float                   (*x)[3]; /* The current position of all vertices.*/
+       float                   (*xold)[3]; /* The previous position of all vertices.*/
+       float                   (*current_x)[3]; /* The TEMPORARY current position of all vertices.*/
+       float                   (*current_xold)[3]; /* The TEMPORARY previous position of all vertices.*/
        float                   (*v)[4]; /* the current velocity of all vertices */
-       float                   (*current_v)[4];
-       float                   (*xconst)[4];
+       float                   (*current_v)[3];
+       float                   (*xconst)[3];
 } Cloth;
 
 /* goal defines */
index d0ab7ce4ffb269d7d14c1e9f2b04a65becddaa59..436a14d1d6c8888a5f5166b430e5e1aedc1d7618 100644 (file)
@@ -90,9 +90,9 @@ double tval()
 }
 #else
 #include <sys/time.h>
-static struct timeval _tstart, _tend;
-static struct timezone tz;
-void tstart ( void )
+                        static struct timeval _tstart, _tend;
+        static struct timezone tz;
+        void tstart ( void )
 {
        gettimeofday ( &_tstart, &tz );
 }
@@ -133,11 +133,11 @@ static void cloth_apply_vgroup(ClothModifierData *clmd, DerivedMesh *dm, short v
 *
 ******************************************************************************/
 /**
-* cloth_init -  creates a new cloth simulation.
-*
-* 1. create object
-* 2. fill object with standard values or with the GUI settings if given 
-*/
+ * cloth_init -  creates a new cloth simulation.
+ *
+ * 1. create object
+ * 2. fill object with standard values or with the GUI settings if given 
+ */
 void cloth_init (ClothModifierData *clmd)
 {
        /* Initialize our new data structure to reasonable values. */
@@ -202,38 +202,38 @@ DerivedMesh *CDDM_convert_to_triangle(DerivedMesh *dm)
 
        for(i = 0; i < numfaces; i++)
        {
-               if(mface[i].v4)
-                       numquads++;
-               else
-                       numtris++;      
-       }
+       if(mface[i].v4)
+       numquads++;
+       else
+       numtris++;      
+}
 
        result = CDDM_from_template(dm, numverts, 0, numtris + 2*numquads);
 
        if(!result)
-               return NULL;
+       return NULL;
 
        // do verts
        mvert2 = CDDM_get_verts(result);
        for(a=0; a<numverts; a++) 
        {
-               MVert *inMV;
-               MVert *mv = &mvert2[a];
+       MVert *inMV;
+       MVert *mv = &mvert2[a];
 
-               inMV = &mvert[a];
+       inMV = &mvert[a];
 
-               DM_copy_vert_data(dm, result, a, a, 1);
-               *mv = *inMV;
-       }
+       DM_copy_vert_data(dm, result, a, a, 1);
+       *mv = *inMV;
+}
 
 
        // do faces
        mface2 = CDDM_get_faces(result);
        for(a=0, i=0; a<numfaces; a++) 
        {
-               MFace *mf = &mface2[i];
-               MFace *inMF;
-               inMF = &mface[a];
+       MFace *mf = &mface2[i];
+       MFace *inMF;
+       inMF = &mface[a];
 
                
                // DM_copy_face_data(dm, result, a, i, 1);
@@ -241,57 +241,57 @@ DerivedMesh *CDDM_convert_to_triangle(DerivedMesh *dm)
                // *mf = *inMF;
                
 
-               if(mface[a].v4 && random==1)
-               {
-                       mf->v1 = mface[a].v2;
-                       mf->v2 = mface[a].v3;
-                       mf->v3 = mface[a].v4;
-               }
-               else
-               {
-                       mf->v1 = mface[a].v1;
-                       mf->v2 = mface[a].v2;
-                       mf->v3 = mface[a].v3;
-               }
+       if(mface[a].v4 && random==1)
+       {
+       mf->v1 = mface[a].v2;
+       mf->v2 = mface[a].v3;
+       mf->v3 = mface[a].v4;
+}
+       else
+       {
+       mf->v1 = mface[a].v1;
+       mf->v2 = mface[a].v2;
+       mf->v3 = mface[a].v3;
+}
 
-               mf->v4 = 0;
-               mf->flag |= ME_SMOOTH;
+       mf->v4 = 0;
+       mf->flag |= ME_SMOOTH;
 
-               test_index_face(mf, NULL, 0, 3);
+       test_index_face(mf, NULL, 0, 3);
 
-               if(mface[a].v4)
-               {
-                       MFace *mf2;
+       if(mface[a].v4)
+       {
+       MFace *mf2;
 
-                       i++;
+       i++;
 
-                       mf2 = &mface2[i];
+       mf2 = &mface2[i];
                        
                        // DM_copy_face_data(dm, result, a, i, 1);
 
                        // *mf2 = *inMF;
                        
 
-                       if(random==1)
-                       {
-                               mf2->v1 = mface[a].v1;
-                               mf2->v2 = mface[a].v2;
-                               mf2->v3 = mface[a].v4;
-                       }
-                       else
-                       {
-                               mf2->v1 = mface[a].v4;
-                               mf2->v2 = mface[a].v1;
-                               mf2->v3 = mface[a].v3;
-                       }
-                       mf2->v4 = 0;
-                       mf2->flag |= ME_SMOOTH;
+       if(random==1)
+       {
+       mf2->v1 = mface[a].v1;
+       mf2->v2 = mface[a].v2;
+       mf2->v3 = mface[a].v4;
+}
+       else
+       {
+       mf2->v1 = mface[a].v4;
+       mf2->v2 = mface[a].v1;
+       mf2->v3 = mface[a].v3;
+}
+       mf2->v4 = 0;
+       mf2->flag |= ME_SMOOTH;
 
-                       test_index_face(mf2, NULL, 0, 3);
-               }
+       test_index_face(mf2, NULL, 0, 3);
+}
 
-               i++;
-       }
+       i++;
+}
 
        CDDM_calc_edges(result);
        CDDM_calc_normals(result);
@@ -330,43 +330,43 @@ DerivedMesh *CDDM_create_tearing(ClothModifierData *clmd, DerivedMesh *dm)
        
        for(i = 0; i < numsprings; i++)
        {
-               if((springs[i].flags & CSPRING_FLAG_DEACTIVATE)
-               &&(!BLI_edgehash_haskey(edgehash, springs[i].ij, springs[i].kl)))
-               {
-                       BLI_edgehash_insert(edgehash, springs[i].ij, springs[i].kl, NULL);
-                       BLI_edgehash_insert(edgehash, springs[i].kl, springs[i].ij, NULL);
-                       j++;
-               }
-       }
+       if((springs[i].flags & CSPRING_FLAG_DEACTIVATE)
+       &&(!BLI_edgehash_haskey(edgehash, springs[i].ij, springs[i].kl)))
+       {
+       BLI_edgehash_insert(edgehash, springs[i].ij, springs[i].kl, NULL);
+       BLI_edgehash_insert(edgehash, springs[i].kl, springs[i].ij, NULL);
+       j++;
+}
+}
        
        // printf("found %d tears\n", j);
        
        result = CDDM_from_template(dm, numverts, 0, numfaces);
 
        if(!result)
-               return NULL;
+       return NULL;
 
        // do verts
        mvert2 = CDDM_get_verts(result);
        for(a=0; a<numverts; a++) 
        {
-               MVert *inMV;
-               MVert *mv = &mvert2[a];
+       MVert *inMV;
+       MVert *mv = &mvert2[a];
 
-               inMV = &mvert[a];
+       inMV = &mvert[a];
 
-               DM_copy_vert_data(dm, result, a, a, 1);
-               *mv = *inMV;
-       }
+       DM_copy_vert_data(dm, result, a, a, 1);
+       *mv = *inMV;
+}
 
 
        // do faces
        mface2 = CDDM_get_faces(result);
        for(a=0, i=0; a<numfaces; a++) 
        {
-               MFace *mf = &mface2[i];
-               MFace *inMF;
-               inMF = &mface[a];
+       MFace *mf = &mface2[i];
+       MFace *inMF;
+       inMF = &mface[a];
 
                
                // DM_copy_face_data(dm, result, a, i, 1);
@@ -374,21 +374,21 @@ DerivedMesh *CDDM_create_tearing(ClothModifierData *clmd, DerivedMesh *dm)
                // *mf = *inMF;
                
                
-               if((!BLI_edgehash_haskey(edgehash, mface[a].v1, mface[a].v2))
-               &&(!BLI_edgehash_haskey(edgehash, mface[a].v2, mface[a].v3))
-               &&(!BLI_edgehash_haskey(edgehash, mface[a].v3, mface[a].v4))
-               &&(!BLI_edgehash_haskey(edgehash, mface[a].v4, mface[a].v1)))
-               {
-                       mf->v1 = mface[a].v1;
-                       mf->v2 = mface[a].v2;
-                       mf->v3 = mface[a].v3;
-                       mf->v4 = mface[a].v4;
+       if((!BLI_edgehash_haskey(edgehash, mface[a].v1, mface[a].v2))
+       &&(!BLI_edgehash_haskey(edgehash, mface[a].v2, mface[a].v3))
+       &&(!BLI_edgehash_haskey(edgehash, mface[a].v3, mface[a].v4))
+       &&(!BLI_edgehash_haskey(edgehash, mface[a].v4, mface[a].v1)))
+       {
+       mf->v1 = mface[a].v1;
+       mf->v2 = mface[a].v2;
+       mf->v3 = mface[a].v3;
+       mf->v4 = mface[a].v4;
        
-                       test_index_face(mf, NULL, 0, 4);
+       test_index_face(mf, NULL, 0, 4);
        
-                       i++;
-               }
-       }
+       i++;
+}
+}
 
        CDDM_lower_num_faces(result, i);
        CDDM_calc_edges(result);
@@ -527,40 +527,40 @@ DerivedMesh *clothModifier_do(ClothModifierData *clmd,Object *ob, DerivedMesh *d
        /*
        if ( clmd->clothObject )
        {
-               if ( clmd->sim_parms->cache )
-               {
-                       if ( current_time < clmd->sim_parms->firstframe )
-                       {
-                               int frametime = cloth_cache_first_frame ( clmd );
-                               if ( cloth_cache_search_frame ( clmd, frametime ) )
-                               {
-                                       cloth_cache_get_frame ( clmd, frametime );
-                                       cloth_to_object ( ob, result, clmd );
-                               }
-                               return result;
-                       }
-                       else if ( current_time > clmd->sim_parms->lastframe )
-                       {
-                               int frametime = cloth_cache_last_frame ( clmd );
-                               if ( cloth_cache_search_frame ( clmd, frametime ) )
-                               {
-                                       cloth_cache_get_frame ( clmd, frametime );
-                                       cloth_to_object ( ob, result, clmd );
-                               }
-                               return result;
-                       }
-                       else if ( ABS ( deltaTime ) >= 2.0f ) // no timewarps allowed
-                       {
-                               if ( cloth_cache_search_frame ( clmd, framenr ) )
-                               {
-                                       cloth_cache_get_frame ( clmd, framenr );
-                                       cloth_to_object ( ob, result, clmd );
-                               }
-                               clmd->sim_parms->sim_time = current_time;
-                               return result;
-                       }
-               }
-       }
+       if ( clmd->sim_parms->cache )
+       {
+       if ( current_time < clmd->sim_parms->firstframe )
+       {
+       int frametime = cloth_cache_first_frame ( clmd );
+       if ( cloth_cache_search_frame ( clmd, frametime ) )
+       {
+       cloth_cache_get_frame ( clmd, frametime );
+       cloth_to_object ( ob, result, clmd );
+}
+       return result;
+}
+       else if ( current_time > clmd->sim_parms->lastframe )
+       {
+       int frametime = cloth_cache_last_frame ( clmd );
+       if ( cloth_cache_search_frame ( clmd, frametime ) )
+       {
+       cloth_cache_get_frame ( clmd, frametime );
+       cloth_to_object ( ob, result, clmd );
+}
+       return result;
+}
+       else if ( ABS ( deltaTime ) >= 2.0f ) // no timewarps allowed
+       {
+       if ( cloth_cache_search_frame ( clmd, framenr ) )
+       {
+       cloth_cache_get_frame ( clmd, framenr );
+       cloth_to_object ( ob, result, clmd );
+}
+       clmd->sim_parms->sim_time = current_time;
+       return result;
+}
+}
+}
        */
        
        if(deltaTime == 1.0f)
@@ -737,10 +737,10 @@ void cloth_free_modifier (ClothModifierData *clmd)
 ******************************************************************************/
 
 /**
-* cloth_to_object - copies the deformed vertices to the object.
-*
-* This function is a modified version of the softbody.c:softbody_to_object() function.
-**/
+ * cloth_to_object - copies the deformed vertices to the object.
+ *
+ * This function is a modified version of the softbody.c:softbody_to_object() function.
+ **/
 static void cloth_to_object (Object *ob,  DerivedMesh *dm, ClothModifierData *clmd)
 {
        unsigned int    i = 0;
@@ -765,9 +765,9 @@ static void cloth_to_object (Object *ob,  DerivedMesh *dm, ClothModifierData *cl
 
 
 /**
-* cloth_apply_vgroup - applies a vertex group as specified by type
-*
-**/
+ * cloth_apply_vgroup - applies a vertex group as specified by type
+ *
+ **/
 static void cloth_apply_vgroup(ClothModifierData *clmd, DerivedMesh *dm, short vgroup)
 {
        unsigned int i = 0;
@@ -862,52 +862,51 @@ static int cloth_from_object(Object *ob, ClothModifierData *clmd, DerivedMesh *d
                                /* create springs */
                                clmd->clothObject->springs = NULL;
                                clmd->clothObject->numsprings = -1;
+                                       
+                               /* set initial values */
+                               for (i = 0; i < numverts; ++i)
+                               {
+                                       VECCOPY (clmd->clothObject->x[i], mvert[i].co);
+                                       Mat4MulVecfl(ob->obmat, clmd->clothObject->x[i]);
+       
+                                       clmd->clothObject->verts [i].mass = clmd->sim_parms->mass;
+                                       if ( clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL )
+                                               clmd->clothObject->verts [i].goal= clmd->sim_parms->defgoal;
+                                       else
+                                               clmd->clothObject->verts [i].goal= 0.0;
+                                       clmd->clothObject->verts [i].flags = 0;
+                                       VECCOPY(clmd->clothObject->xold[i], clmd->clothObject->x[i]);
+                                       VECCOPY(clmd->clothObject->xconst[i], clmd->clothObject->x[i]);
+                                       VECCOPY(clmd->clothObject->current_xold[i], clmd->clothObject->x[i]);
+                                       VecMulf(clmd->clothObject->v[i], 0.0);
+       
+                                       clmd->clothObject->verts [i].impulse_count = 0;
+                                       VECCOPY ( clmd->clothObject->verts [i].impulse, tnull );
+                               }
                                
-                       /* set initial values */
-                       for (i = 0; i < numverts; ++i)
-                       {
-                               VECCOPY (clmd->clothObject->x[i], mvert[i].co);
-                               Mat4MulVecfl(ob->obmat, clmd->clothObject->x[i]);
-
-                               clmd->clothObject->verts [i].mass = clmd->sim_parms->mass;
-                               if ( clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL )
-                                       clmd->clothObject->verts [i].goal= clmd->sim_parms->defgoal;
-                               else
-                                       clmd->clothObject->verts [i].goal= 0.0;
-                               clmd->clothObject->verts [i].flags = 0;
-                               VECCOPY(clmd->clothObject->xold[i], clmd->clothObject->x[i]);
-                               VECCOPY(clmd->clothObject->xconst[i], clmd->clothObject->x[i]);
-                               VECCOPY(clmd->clothObject->current_xold[i], clmd->clothObject->x[i]);
-                               VecMulf(clmd->clothObject->v[i], 0.0);
-
-                               clmd->clothObject->verts [i].impulse_count = 0;
-                               VECCOPY ( clmd->clothObject->verts [i].impulse, tnull );
+                               if (!cloth_build_springs (clmd->clothObject, dm) )
+                               {
+                                       modifier_setError (&(clmd->modifier), "Can't build springs.");
+                                       return 0;
+                               }  
+       
+                               /* apply / set vertex groups */
+                               if (clmd->sim_parms->vgroup_mass > 0)
+                                       cloth_apply_vgroup (clmd, olddm, clmd->sim_parms->vgroup_mass);
+       
+                               /* init our solver */
+                               if (solvers [clmd->sim_parms->solver_type].init)
+                                       solvers [clmd->sim_parms->solver_type].init (ob, clmd);
+       
+                               clmd->clothObject->tree = bvh_build_from_float3(CDDM_get_faces(dm), dm->getNumFaces(dm), clmd->clothObject->x, numverts, clmd->coll_parms->epsilon);
+                               
+                               clmd->clothObject->selftree = bvh_build_from_float3(NULL, 0, clmd->clothObject->x, numverts, clmd->coll_parms->selfepsilon);
+                               
+                               // save initial state
+                               cloth_write_cache(ob, clmd, framenr-1);
                        }
-                       
-                       if (!cloth_build_springs (clmd->clothObject, dm) )
-                       {
-                               modifier_setError (&(clmd->modifier), "Can't build springs.");
-                               return 0;
-                       }  
-
-                       /* apply / set vertex groups */
-                       if (clmd->sim_parms->vgroup_mass > 0)
-                               cloth_apply_vgroup (clmd, olddm, clmd->sim_parms->vgroup_mass);
-
-                       /* init our solver */
-                       if (solvers [clmd->sim_parms->solver_type].init)
-                               solvers [clmd->sim_parms->solver_type].init (ob, clmd);
-
-                       clmd->clothObject->tree = bvh_build_from_float4(CDDM_get_faces(dm), dm->getNumFaces(dm), clmd->clothObject->x, numverts, clmd->coll_parms->epsilon);
-                       
-                       clmd->clothObject->selftree = bvh_build_from_float4(NULL, 0, clmd->clothObject->x, numverts, clmd->coll_parms->selfepsilon);
-                       
-                       // save initial state
-                       cloth_write_cache(ob, clmd, framenr-1);
-               }
-
-               return 1;
-               default: return 0; // TODO - we do not support changing meshes
+                       return 1;
+                       default: return 0; // TODO - we do not support changing meshes
        }
        
        return 0;
@@ -1119,15 +1118,15 @@ int cloth_build_springs ( Cloth *cloth, DerivedMesh *dm )
                        spring->ij = mface[i].v2;
                        spring->kl = mface[i].v4;
                        VECSUB ( temp, cloth->x[spring->kl], cloth->x[spring->ij] );
-                               spring->restlen =  sqrt ( INPR ( temp, temp ) );
-                               spring->type = CLOTH_SPRING_TYPE_SHEAR;
+                       spring->restlen =  sqrt ( INPR ( temp, temp ) );
+                       spring->type = CLOTH_SPRING_TYPE_SHEAR;
 
-                               BLI_linklist_append ( &edgelist[spring->ij], spring );
-                               BLI_linklist_append ( &edgelist[spring->kl], spring );
-                               shear_springs++;
+                       BLI_linklist_append ( &edgelist[spring->ij], spring );
+                       BLI_linklist_append ( &edgelist[spring->kl], spring );
+                       shear_springs++;
 
-                               node2 = BLI_linklist_append_fast ( &node->next, spring );
-                               node = node2;
+                       node2 = BLI_linklist_append_fast ( &node->next, spring );
+                       node = node2;
                }
        }
        
@@ -1148,8 +1147,8 @@ int cloth_build_springs ( Cloth *cloth, DerivedMesh *dm )
                        // check for existing spring
                        // check also if startpoint is equal to endpoint
                        if ( !BLI_edgehash_haskey ( edgehash, index2, tspring2->ij )
-                               && !BLI_edgehash_haskey ( edgehash, tspring2->ij, index2 )
-                               && ( index2!=tspring2->ij ) )
+                       && !BLI_edgehash_haskey ( edgehash, tspring2->ij, index2 )
+                       && ( index2!=tspring2->ij ) )
                        {
                                spring = ( ClothSpring * ) MEM_callocN ( sizeof ( ClothSpring ), "cloth spring" );
 
index 3f3b3a662534585569de99212c56f2856aab3b33..6f8f96e58fb89027579b640680dcb8f686a44d09 100644 (file)
 #include "BKE_global.h"
 #include  "BIF_editdeform.h"
 
-#include "Bullet-C-Api.h"
-
-#ifdef __SSE3__
-#include <emmintrin.h>
-#include <xmmintrin.h>
-#include <pmmintrin.h>
-#endif
-
 #ifdef _WIN32
 #include <windows.h>
 static LARGE_INTEGER _itstart, _itend;
@@ -91,14 +83,14 @@ void itend(void)
 double itval()
 {
        return ((double)_itend.QuadPart -
-               (double)_itstart.QuadPart)/((double)ifreq.QuadPart);
+                       (double)_itstart.QuadPart)/((double)ifreq.QuadPart);
 }
 #else
 #include <sys/time.h>
 
-static struct timeval _itstart, _itend;
-static struct timezone itz;
-void itstart(void)
+                        static struct timeval _itstart, _itend;
+        static struct timezone itz;
+        void itstart(void)
 {
        gettimeofday(&_itstart, &itz);
 }
@@ -122,39 +114,20 @@ struct Cloth;
 /////////////////////////////////////////
 
 /* DEFINITIONS */
-#ifdef __GNUC__
-typedef float lfVector[4] __attribute__ ((aligned (16)));
-#else
-typedef __declspec(align(16)) lfVector[4];
-#endif
-
-#ifdef __GNUC__
-typedef struct fmatrix3x3 {
-       float m[3][4] __attribute__ ((aligned (16))); /* 3x3 matrix */
-       unsigned int c,r; /* column and row number */
-       int pinned; /* is this vertex allowed to move? */
-       float n1,n2,n3; /* three normal vectors for collision constrains */
-       unsigned int vcount; /* vertex count */
-       unsigned int scount; /* spring count */ 
-} fmatrix3x3;
-#else
+typedef float lfVector[3];
 typedef struct fmatrix3x3 {
-       __declspec(align(16))
-       float m[3][4]; /* 3x3 matrix */
+       float m[3][3]; /* 4x4 matrix */
        unsigned int c,r; /* column and row number */
        int pinned; /* is this vertex allowed to move? */
        float n1,n2,n3; /* three normal vectors for collision constrains */
        unsigned int vcount; /* vertex count */
        unsigned int scount; /* spring count */ 
 } fmatrix3x3;
-#endif
-
 
 ///////////////////////////
 // float[3] vector
 ///////////////////////////
 /* simple vector code */
-
 /* STATUS: verified */
 DO_INLINE void mul_fvector_S(float to[3], float from[3], float scalar)
 {
@@ -166,18 +139,13 @@ DO_INLINE void mul_fvector_S(float to[3], float from[3], float scalar)
 /* STATUS: verified */
 DO_INLINE void cross_fvector(float to[3], float vectorA[3], float vectorB[3])
 {
-       float temp[3];
-       
-       temp[0] = vectorA[1] * vectorB[2] - vectorA[2] * vectorB[1];
-       temp[1] = vectorA[2] * vectorB[0] - vectorA[0] * vectorB[2];
-       temp[2] = vectorA[0] * vectorB[1] - vectorA[1] * vectorB[0];
-       
-       VECCOPY(to, temp);
+       to[0] = vectorA[1] * vectorB[2] - vectorA[2] * vectorB[1];
+       to[1] = vectorA[2] * vectorB[0] - vectorA[0] * vectorB[2];
+       to[2] = vectorA[0] * vectorB[1] - vectorA[1] * vectorB[0];
 }
-
 /* simple v^T * v product ("outer product") */
 /* STATUS: HAS TO BE verified (*should* work) */
-DO_INLINE void mul_fvectorT_fvector(float to[3][4], float vectorA[3], float vectorB[3])
+DO_INLINE void mul_fvectorT_fvector(float to[3][3], float vectorA[3], float vectorB[3])
 {
        mul_fvector_S(to[0], vectorB, vectorA[0]);
        mul_fvector_S(to[1], vectorB, vectorA[1]);
@@ -185,7 +153,7 @@ DO_INLINE void mul_fvectorT_fvector(float to[3][4], float vectorA[3], float vect
 }
 /* simple v^T * v product with scalar ("outer product") */
 /* STATUS: HAS TO BE verified (*should* work) */
-DO_INLINE void mul_fvectorT_fvectorS(float to[3][4], float vectorA[3], float vectorB[3], float aS)
+DO_INLINE void mul_fvectorT_fvectorS(float to[3][3], float vectorA[3], float vectorB[3], float aS)
 {
        mul_fvector_S(to[0], vectorB, vectorA[0]* aS);
        mul_fvector_S(to[1], vectorB, vectorA[1]* aS);
@@ -227,7 +195,7 @@ DO_INLINE void del_lfvector(lfVector *fLongVector)
        }
 }
 /* copy long vector */
-DO_INLINE void cp_lfvector(lfVector *to, lfVector *from, unsigned int verts)
+DO_INLINE void cp_lfvector(float (*to)[3], float (*from)[3], unsigned int verts)
 {
        memcpy(to, from, verts * sizeof(lfVector));
 }
@@ -241,12 +209,12 @@ DO_INLINE void init_lfvector(lfVector *fLongVector, float vector[3], unsigned in
        }
 }
 /* zero long vector with float[3] */
-DO_INLINE void zero_lfvector(lfVector *to, unsigned int verts)
+DO_INLINE void zero_lfvector(float (*to)[3], unsigned int verts)
 {
        memset(to, 0.0f, verts * sizeof(lfVector));
 }
 /* multiply long vector with scalar*/
-DO_INLINE void mul_lfvectorS(lfVector *to, lfVector *fLongVector, float scalar, unsigned int verts)
+DO_INLINE void mul_lfvectorS(float (*to)[3], lfVector *fLongVector, float scalar, unsigned int verts)
 {
        unsigned int i = 0;
 
@@ -257,7 +225,7 @@ DO_INLINE void mul_lfvectorS(lfVector *to, lfVector *fLongVector, float scalar,
 }
 /* multiply long vector with scalar*/
 /* A -= B * float */
-DO_INLINE void submul_lfvectorS(lfVector *to, lfVector *fLongVector, float scalar, unsigned int verts)
+DO_INLINE void submul_lfvectorS(float (*to)[3], lfVector *fLongVector, float scalar, unsigned int verts)
 {
        unsigned int i = 0;
        for(i = 0; i < verts; i++)
@@ -271,7 +239,7 @@ DO_INLINE float dot_lfvector(lfVector *fLongVectorA, lfVector *fLongVectorB, uns
        unsigned int i = 0;
        float temp = 0.0;
 // schedule(guided, 2)
-#pragma omp parallel for reduction(+: temp) private(i) schedule(static)
+#pragma omp parallel for reduction(+: temp)
        for(i = 0; i < verts; i++)
        {
                temp += INPR(fLongVectorA[i], fLongVectorB[i]);
@@ -279,7 +247,7 @@ DO_INLINE float dot_lfvector(lfVector *fLongVectorA, lfVector *fLongVectorB, uns
        return temp;
 }
 /* A = B + C  --> for big vector */
-DO_INLINE void add_lfvector_lfvector(lfVector *to, lfVector *fLongVectorA, lfVector *fLongVectorB, unsigned int verts)
+DO_INLINE void add_lfvector_lfvector(float (*to)[3], lfVector *fLongVectorA, lfVector *fLongVectorB, unsigned int verts)
 {
        unsigned int i = 0;
 
@@ -287,36 +255,10 @@ DO_INLINE void add_lfvector_lfvector(lfVector *to, lfVector *fLongVectorA, lfVec
        {
                VECADD(to[i], fLongVectorA[i], fLongVectorB[i]);
        }
-}
-/*
-#ifdef __SSE3__
-DO_INLINE void add_lfvector(lfVector *to, lfVector *fLongVectorA, unsigned int verts) {
-       __m128 v1, v2;
-       unsigned int i = 0;
-       
-       for(i = 0; i < verts; i++)
-       {
-               v1 = _mm_load_ps(to[i]);
-               v2 = _mm_load_ps(fLongVectorA[i]);
-               
-               v1 = _mm_add_ps(v1, v2);
-               
-               _mm_store_ps(to[i], v1);
-       }
-}
-#else */
-DO_INLINE void add_lfvector(lfVector *to, lfVector *fLongVectorA, unsigned int verts) {
-       unsigned int i = 0;
 
-       for(i = 0; i < verts; i++)
-       {
-               VECADD(to[i], to[i], fLongVectorA[i]);
-       }
 }
-// #endif
-
 /* A = B + C * float --> for big vector */
-DO_INLINE void add_lfvector_lfvectorS(lfVector *to, lfVector *fLongVectorA, lfVector *fLongVectorB, float bS, unsigned int verts)
+DO_INLINE void add_lfvector_lfvectorS(float (*to)[3], lfVector *fLongVectorA, lfVector *fLongVectorB, float bS, unsigned int verts)
 {
        unsigned int i = 0;
 
@@ -326,78 +268,8 @@ DO_INLINE void add_lfvector_lfvectorS(lfVector *to, lfVector *fLongVectorA, lfVe
 
        }
 }
-
-/* A = A + B * float --> for big vector */
-// tested
-/*
-#ifdef __SSE3__
-DO_INLINE void add_lfvectorS(lfVector *to, lfVector *fLongVectorA, float bS, unsigned int verts) {
-       __m128 v1, v2, v3;
-       unsigned int i = 0;
-       
-       for(i = 0; i < verts; i++)
-       {
-               v1 = _mm_load_ps(to[i]);
-               v2 = _mm_load_ps(fLongVectorA[i]);
-               v3 = _mm_set1_ps(bS);
-               
-               v2 = _mm_mul_ps(v2, v3);
-               v1 = _mm_add_ps(v1, v2);
-               
-               _mm_store_ps(to[i], v1);
-       }
-}
-#else */
-DO_INLINE void add_lfvectorS(lfVector *to, lfVector *fLongVectorA, float bS, unsigned int verts) {
-       unsigned int i = 0;
-
-       for(i = 0; i < verts; i++)
-       {
-               VECADDS(to[i], to[i], fLongVectorA[i], bS);
-       }
-}
-// #endif
-
-
-// tested
-/*
-#ifdef __SSE3__
-DO_INLINE float add_lfvectorS_dot(lfVector *to, lfVector *fLongVectorA, float bS, unsigned int verts) {
-       register __m128 v1, v2, v3, v4;
-       unsigned int i = 0;
-       float temp;
-       
-       v4 = _mm_setzero_ps();
-// #pragma omp parallel for reduction(+: v4) private(i, v1, v2, v3) schedule(static)
-       for(i = 0; i < verts; i++)
-       {
-               v1 = _mm_load_ps(to[i]);
-               v2 = _mm_load_ps(fLongVectorA[i]);
-               v3 = _mm_set1_ps(bS);
-               
-               v2 = _mm_mul_ps(v2, v3);
-               v1 = _mm_add_ps(v1, v2);
-               
-               _mm_stream_ps(to[i], v1);
-               
-               v4 = _mm_add_ps(v4, _mm_mul_ps(v1,v1));
-       }
-       
-       v4 = _mm_hadd_ps(v4, v4);
-       v4 = _mm_hadd_ps(v4, v4);
-       _mm_store_ss(&temp, v4);
-       
-       return temp;
-}
-#else */
-DO_INLINE float add_lfvectorS_dot(lfVector *to, lfVector *fLongVectorA, float bS, unsigned int verts) {
-       add_lfvectorS(to, fLongVectorA, bS, verts);
-       return dot_lfvector(to, to, verts);
-}
-// #endif
-
 /* A = B * float + C * float --> for big vector */
-DO_INLINE void add_lfvectorS_lfvectorS(lfVector *to, lfVector *fLongVectorA, float aS, lfVector *fLongVectorB, float bS, unsigned int verts)
+DO_INLINE void add_lfvectorS_lfvectorS(float (*to)[3], lfVector *fLongVectorA, float aS, lfVector *fLongVectorB, float bS, unsigned int verts)
 {
        unsigned int i = 0;
 
@@ -407,7 +279,7 @@ DO_INLINE void add_lfvectorS_lfvectorS(lfVector *to, lfVector *fLongVectorA, flo
        }
 }
 /* A = B - C * float --> for big vector */
-DO_INLINE void sub_lfvector_lfvectorS(lfVector *to, lfVector *fLongVectorA, lfVector *fLongVectorB, float bS, unsigned int verts)
+DO_INLINE void sub_lfvector_lfvectorS(float (*to)[3], lfVector *fLongVectorA, lfVector *fLongVectorB, float bS, unsigned int verts)
 {
        unsigned int i = 0;
        for(i = 0; i < verts; i++)
@@ -417,7 +289,7 @@ DO_INLINE void sub_lfvector_lfvectorS(lfVector *to, lfVector *fLongVectorA, lfVe
 
 }
 /* A = B - C --> for big vector */
-DO_INLINE void sub_lfvector_lfvector(lfVector *to, lfVector *fLongVectorA, lfVector *fLongVectorB, unsigned int verts)
+DO_INLINE void sub_lfvector_lfvector(float (*to)[3], lfVector *fLongVectorA, lfVector *fLongVectorB, unsigned int verts)
 {
        unsigned int i = 0;
 
@@ -428,32 +300,30 @@ DO_INLINE void sub_lfvector_lfvector(lfVector *to, lfVector *fLongVectorA, lfVec
 
 }
 ///////////////////////////
-// 3x3 matrix
+// 4x4 matrix
 ///////////////////////////
-/* printf 3x3 matrix on console: for debug output */
-void print_fmatrix(float m3[3][4])
+/* printf 4x4 matrix on console: for debug output */
+void print_fmatrix(float m3[3][3])
 {
        printf("%f\t%f\t%f\n",m3[0][0],m3[0][1],m3[0][2]);
        printf("%f\t%f\t%f\n",m3[1][0],m3[1][1],m3[1][2]);
        printf("%f\t%f\t%f\n\n",m3[2][0],m3[2][1],m3[2][2]);
 }
-/* copy 3x3 matrix */
-DO_INLINE void cp_fmatrix(float to[3][4], float from[3][4])
+/* copy 4x4 matrix */
+DO_INLINE void cp_fmatrix(float to[3][3], float from[3][3])
 {
-       memcpy(to, from, sizeof (float) * 12);
-       /*
+       // memcpy(to, from, sizeof (float) * 9);
        VECCOPY(to[0], from[0]);
        VECCOPY(to[1], from[1]);
        VECCOPY(to[2], from[2]);
-       */
 }
-/* calculate determinant of 3x3 matrix */
-DO_INLINE float det_fmatrix(float m[3][4])
+/* calculate determinant of 4x4 matrix */
+DO_INLINE float det_fmatrix(float m[3][3])
 {
        return  m[0][0]*m[1][1]*m[2][2] + m[1][0]*m[2][1]*m[0][2] + m[0][1]*m[1][2]*m[2][0] 
-       -m[0][0]*m[1][2]*m[2][1] - m[0][1]*m[1][0]*m[2][2] - m[2][0]*m[1][1]*m[0][2];
+                       -m[0][0]*m[1][2]*m[2][1] - m[0][1]*m[1][0]*m[2][2] - m[2][0]*m[1][1]*m[0][2];
 }
-DO_INLINE void inverse_fmatrix(float to[3][4], float from[3][4])
+DO_INLINE void inverse_fmatrix(float to[3][3], float from[3][3])
 {
        unsigned int i, j;
        float d;
@@ -484,115 +354,84 @@ DO_INLINE void inverse_fmatrix(float to[3][4], float from[3][4])
 
 }
 
-/* 3x3 matrix multiplied by a scalar */
+/* 4x4 matrix multiplied by a scalar */
 /* STATUS: verified */
-DO_INLINE void mul_fmatrix_S(float matrix[3][4], float scalar)
+DO_INLINE void mul_fmatrix_S(float matrix[3][3], float scalar)
 {
        mul_fvector_S(matrix[0], matrix[0],scalar);
        mul_fvector_S(matrix[1], matrix[1],scalar);
        mul_fvector_S(matrix[2], matrix[2],scalar);
 }
 
-/* a vector multiplied by a 3x3 matrix */
+/* a vector multiplied by a 4x4 matrix */
 /* STATUS: verified */
-DO_INLINE void mul_fvector_fmatrix(float *to, float *from, float matrix[3][4])
+DO_INLINE void mul_fvector_fmatrix(float *to, float *from, float matrix[3][3])
 {
-       float temp[3];
-       
-       VECCOPY(temp, from);
-       
-       to[0] = matrix[0][0]*temp[0] + matrix[1][0]*temp[1] + matrix[2][0]*temp[2];
-       to[1] = matrix[0][1]*temp[0] + matrix[1][1]*temp[1] + matrix[2][1]*temp[2];
-       to[2] = matrix[0][2]*temp[0] + matrix[1][2]*temp[1] + matrix[2][2]*temp[2];
+       to[0] = matrix[0][0]*from[0] + matrix[1][0]*from[1] + matrix[2][0]*from[2];
+       to[1] = matrix[0][1]*from[0] + matrix[1][1]*from[1] + matrix[2][1]*from[2];
+       to[2] = matrix[0][2]*from[0] + matrix[1][2]*from[1] + matrix[2][2]*from[2];
 }
 
-/* 3x3 matrix multiplied by a vector */
+/* 4x4 matrix multiplied by a vector */
 /* STATUS: verified */
-/*
-#ifdef __SSE3__
-DO_INLINE void mul_fmatrix_fvector(float to[4], float matrix[3][4], float from[4]) {
-       __m128 v1, v2, v3, v4;
-       
-       v1 = _mm_load_ps(&matrix[0][0]);
-       v2 = _mm_load_ps(&matrix[1][0]);
-       v3 = _mm_load_ps(&matrix[2][0]);
-       v4 = _mm_load_ps(from);
-
-       // stuff
-       v1 = _mm_mul_ps(v1, v4);
-       v2 = _mm_mul_ps(v2, v4);
-       v3 = _mm_mul_ps(v3, v4);
-       v1 = _mm_hadd_ps(v1, v2);
-       v3 = _mm_hadd_ps(v3, _mm_setzero_ps());
-       v4 = _mm_hadd_ps(v1, v3);
-       
-       _mm_store_ps(to, v4);
-}
-#else */
-DO_INLINE void mul_fmatrix_fvector(float to[4], float matrix[3][4], float from[4])
+DO_INLINE void mul_fmatrix_fvector(float *to, float matrix[3][3], float *from)
 {
-       float temp[3] = {0,0,0};
-       
-       temp[0] = INPR(matrix[0],from);
-       temp[1] = INPR(matrix[1],from);
-       temp[2] = INPR(matrix[2],from);
-       
-       VECCOPY(to, temp);
+       to[0] = INPR(matrix[0],from);
+       to[1] = INPR(matrix[1],from);
+       to[2] = INPR(matrix[2],from);
 }
-// #endif
-               
-/* 3x3 matrix multiplied by a 3x3 matrix */
+/* 4x4 matrix multiplied by a 4x4 matrix */
 /* STATUS: verified */
-DO_INLINE void mul_fmatrix_fmatrix(float to[3][4], float matrixA[3][4], float matrixB[3][4])
+DO_INLINE void mul_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
 {
        mul_fvector_fmatrix(to[0], matrixA[0],matrixB);
        mul_fvector_fmatrix(to[1], matrixA[1],matrixB);
        mul_fvector_fmatrix(to[2], matrixA[2],matrixB);
 }
-/* 3x3 matrix addition with 3x3 matrix */
-DO_INLINE void add_fmatrix_fmatrix(float to[3][4], float matrixA[3][4], float matrixB[3][4])
+/* 4x4 matrix addition with 4x4 matrix */
+DO_INLINE void add_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
 {
        VECADD(to[0], matrixA[0], matrixB[0]);
        VECADD(to[1], matrixA[1], matrixB[1]);
        VECADD(to[2], matrixA[2], matrixB[2]);
 }
-/* 3x3 matrix add-addition with 3x3 matrix */
-DO_INLINE void addadd_fmatrix_fmatrix(float to[3][4], float matrixA[3][4], float matrixB[3][4])
+/* 4x4 matrix add-addition with 4x4 matrix */
+DO_INLINE void addadd_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
 {
        VECADDADD(to[0], matrixA[0], matrixB[0]);
        VECADDADD(to[1], matrixA[1], matrixB[1]);
        VECADDADD(to[2], matrixA[2], matrixB[2]);
 }
-/* 3x3 matrix sub-addition with 3x3 matrix */
-DO_INLINE void addsub_fmatrixS_fmatrixS(float to[3][4], float matrixA[3][4], float aS, float matrixB[3][4], float bS)
+/* 4x4 matrix sub-addition with 4x4 matrix */
+DO_INLINE void addsub_fmatrixS_fmatrixS(float to[3][3], float matrixA[3][3], float aS, float matrixB[3][3], float bS)
 {
        VECADDSUBSS(to[0], matrixA[0], aS, matrixB[0], bS);
        VECADDSUBSS(to[1], matrixA[1], aS, matrixB[1], bS);
        VECADDSUBSS(to[2], matrixA[2], aS, matrixB[2], bS);
 }
-/* A -= B + C (3x3 matrix sub-addition with 3x3 matrix) */
-DO_INLINE void subadd_fmatrix_fmatrix(float to[3][4], float matrixA[3][4], float matrixB[3][4])
+/* A -= B + C (4x4 matrix sub-addition with 4x4 matrix) */
+DO_INLINE void subadd_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
 {
        VECSUBADD(to[0], matrixA[0], matrixB[0]);
        VECSUBADD(to[1], matrixA[1], matrixB[1]);
        VECSUBADD(to[2], matrixA[2], matrixB[2]);
 }
-/* A -= B*x + C*y (3x3 matrix sub-addition with 3x3 matrix) */
-DO_INLINE void subadd_fmatrixS_fmatrixS(float to[3][4], float matrixA[3][4], float aS, float matrixB[3][4], float bS)
+/* A -= B*x + C*y (4x4 matrix sub-addition with 4x4 matrix) */
+DO_INLINE void subadd_fmatrixS_fmatrixS(float to[3][3], float matrixA[3][3], float aS, float matrixB[3][3], float bS)
 {
        VECSUBADDSS(to[0], matrixA[0], aS, matrixB[0], bS);
        VECSUBADDSS(to[1], matrixA[1], aS, matrixB[1], bS);
        VECSUBADDSS(to[2], matrixA[2], aS, matrixB[2], bS);
 }
-/* A = B - C (3x3 matrix subtraction with 3x3 matrix) */
-DO_INLINE void sub_fmatrix_fmatrix(float to[3][4], float matrixA[3][4], float matrixB[3][4])
+/* A = B - C (4x4 matrix subtraction with 4x4 matrix) */
+DO_INLINE void sub_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
 {
        VECSUB(to[0], matrixA[0], matrixB[0]);
        VECSUB(to[1], matrixA[1], matrixB[1]);
        VECSUB(to[2], matrixA[2], matrixB[2]);
 }
-/* A += B - C (3x3 matrix add-subtraction with 3x3 matrix) */
-DO_INLINE void addsub_fmatrix_fmatrix(float to[3][4], float matrixA[3][4], float matrixB[3][4])
+/* A += B - C (4x4 matrix add-subtraction with 4x4 matrix) */
+DO_INLINE void addsub_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
 {
        VECADDSUB(to[0], matrixA[0], matrixB[0]);
        VECADDSUB(to[1], matrixA[1], matrixB[1]);
@@ -601,93 +440,53 @@ DO_INLINE void addsub_fmatrix_fmatrix(float to[3][4], float matrixA[3][4], float
 /////////////////////////////////////////////////////////////////
 // special functions
 /////////////////////////////////////////////////////////////////
-/* a vector multiplied and added to/by a 3x3 matrix */
-/*
+/* a vector multiplied and added to/by a 4x4 matrix */
 DO_INLINE void muladd_fvector_fmatrix(float to[3], float from[3], float matrix[3][3])
 {
        to[0] += matrix[0][0]*from[0] + matrix[1][0]*from[1] + matrix[2][0]*from[2];
        to[1] += matrix[0][1]*from[0] + matrix[1][1]*from[1] + matrix[2][1]*from[2];
        to[2] += matrix[0][2]*from[0] + matrix[1][2]*from[1] + matrix[2][2]*from[2];
 }
-*/
-/* 3x3 matrix multiplied and added  to/by a 3x3 matrix  and added to another 3x3 matrix */
-/*
+/* 4x4 matrix multiplied and added  to/by a 4x4 matrix  and added to another 4x4 matrix */
 DO_INLINE void muladd_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
 {
        muladd_fvector_fmatrix(to[0], matrixA[0],matrixB);
        muladd_fvector_fmatrix(to[1], matrixA[1],matrixB);
        muladd_fvector_fmatrix(to[2], matrixA[2],matrixB);
 }
-*/
-/* a vector multiplied and sub'd to/by a 3x3 matrix */
-/*
+/* a vector multiplied and sub'd to/by a 4x4 matrix */
 DO_INLINE void mulsub_fvector_fmatrix(float to[3], float from[3], float matrix[3][3])
 {
        to[0] -= matrix[0][0]*from[0] + matrix[1][0]*from[1] + matrix[2][0]*from[2];
        to[1] -= matrix[0][1]*from[0] + matrix[1][1]*from[1] + matrix[2][1]*from[2];
        to[2] -= matrix[0][2]*from[0] + matrix[1][2]*from[1] + matrix[2][2]*from[2];
 }
-*/
-/* 3x3 matrix multiplied and sub'd  to/by a 3x3 matrix  and added to another 3x3 matrix */
-/*
+/* 4x4 matrix multiplied and sub'd  to/by a 4x4 matrix  and added to another 4x4 matrix */
 DO_INLINE void mulsub_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
 {
        mulsub_fvector_fmatrix(to[0], matrixA[0],matrixB);
        mulsub_fvector_fmatrix(to[1], matrixA[1],matrixB);
        mulsub_fvector_fmatrix(to[2], matrixA[2],matrixB);
 }
-*/
-/* 3x3 matrix multiplied+added by a vector */
+/* 4x4 matrix multiplied+added by a vector */
 /* STATUS: verified */
-/*
-#ifdef __SSE3__
-DO_INLINE void muladd_fmatrix_fvector(float to[4], float matrix[3][4], float from[4]) {
-       __m128 v1, v2, v3, v4;
-       
-       v1 = _mm_load_ps(&matrix[0][0]);
-       v2 = _mm_load_ps(&matrix[1][0]);
-       v3 = _mm_load_ps(&matrix[2][0]);
-       v4 = _mm_load_ps(from);
-
-       // stuff
-       v1 = _mm_mul_ps(v1, v4);
-       v2 = _mm_mul_ps(v2, v4);
-       v3 = _mm_mul_ps(v3, v4);
-       v1 = _mm_hadd_ps(v1, v2);
-       v3 = _mm_hadd_ps(v3, _mm_setzero_ps());
-       v1 = _mm_hadd_ps(v1, v3);
-       
-       v4 = _mm_load_ps(to);
-       v4 = _mm_add_ps(v4,v1);
-
-       _mm_store_ps(to, v4);
-}
-#else */
-DO_INLINE void muladd_fmatrix_fvector(float to[4], float matrix[3][4], float from[4])
+DO_INLINE void muladd_fmatrix_fvector(float to[3], float matrix[3][3], float from[3])
 {
-       float temp[3] = { 0,0,0 };
-       
-       temp[0] = INPR(matrix[0],from);
-       temp[1] = INPR(matrix[1],from);
-       temp[2] = INPR(matrix[2],from); 
-       
-       VECADD(to, to, temp);
+       to[0] += INPR(matrix[0],from);
+       to[1] += INPR(matrix[1],from);
+       to[2] += INPR(matrix[2],from);  
 }
-// #endif
-
-/* 3x3 matrix multiplied+sub'ed by a vector */
-/*
+/* 4x4 matrix multiplied+sub'ed by a vector */
 DO_INLINE void mulsub_fmatrix_fvector(float to[3], float matrix[3][3], float from[3])
 {
        to[0] -= INPR(matrix[0],from);
        to[1] -= INPR(matrix[1],from);
        to[2] -= INPR(matrix[2],from);
 }
-*/
 /////////////////////////////////////////////////////////////////
 
 ///////////////////////////
-// SPARSE SYMMETRIC big matrix with 3x3 matrix entries
+// SPARSE SYMMETRIC big matrix with 4x4 matrix entries
 ///////////////////////////
 /* printf a big matrix on console: for debug output */
 void print_bfmatrix(fmatrix3x3 *m3)
@@ -724,10 +523,10 @@ DO_INLINE void cp_bfmatrix(fmatrix3x3 *to, fmatrix3x3 *from)
 }
 /* init the diagonal of big matrix */
 // slow in parallel
-DO_INLINE void initdiag_bfmatrix(fmatrix3x3 *matrix, float m3[3][4])
+DO_INLINE void initdiag_bfmatrix(fmatrix3x3 *matrix, float m3[3][3])
 {
        unsigned int i,j;
-       float tmatrix[3][4] = {{0,0,0,0},{0,0,0,0},{0,0,0,0}};
+       float tmatrix[3][3] = {{0,0,0},{0,0,0},{0,0,0}};
 
        for(i = 0; i < matrix[0].vcount; i++)
        {               
@@ -739,7 +538,7 @@ DO_INLINE void initdiag_bfmatrix(fmatrix3x3 *matrix, float m3[3][4])
        }
 }
 /* init big matrix */
-DO_INLINE void init_bfmatrix(fmatrix3x3 *matrix, float m3[3][4])
+DO_INLINE void init_bfmatrix(fmatrix3x3 *matrix, float m3[3][3])
 {
        unsigned int i;
 
@@ -759,55 +558,36 @@ DO_INLINE void mul_bfmatrix_S(fmatrix3x3 *matrix, float scalar)
 }
 /* SPARSE SYMMETRIC multiply big matrix with long vector*/
 /* STATUS: verified */
-void mul_bfmatrix_lfvector( lfVector *to, fmatrix3x3 *from, lfVector *fLongVector)
+DO_INLINE void mul_bfmatrix_lfvector( float (*to)[3], fmatrix3x3 *from, lfVector *fLongVector)
 {
-       unsigned int i = 0, numverts = from[0].vcount;
-       // lfVector *tflongvector = create_lfvector(numverts);
-       float temp[4]={0,0,0,0};
+       unsigned int i = 0;
+       lfVector *temp = create_lfvector(from[0].vcount);
        
-       zero_lfvector(to, numverts);
-       /*
+       zero_lfvector(to, from[0].vcount);
+
 #pragma omp parallel sections private(i)
-{      
-#pragma omp section
        {
-               for(i = numverts; i < numverts+from[0].scount; i++)
+#pragma omp section
                {
-                       muladd_fmatrix_fvector(to[from[i].c], from[i].m, fLongVector[from[i].r]);
-                       
-               }
-       }
+                       for(i = from[0].vcount; i < from[0].vcount+from[0].scount; i++)
+                       {
+                               muladd_fmatrix_fvector(to[from[i].c], from[i].m, fLongVector[from[i].r]);
+                       }
+               }       
 #pragma omp section
-       {
-               for(i = 0; i < numverts+from[0].scount; i++)
                {
-                       muladd_fmatrix_fvector(tflongvector[from[i].r], from[i].m, fLongVector[from[i].c]);
+                       for(i = 0; i < from[0].vcount+from[0].scount; i++)
+                       {
+                               muladd_fmatrix_fvector(temp[from[i].r], from[i].m, fLongVector[from[i].c]);
+                       }
                }
        }
-}
-       
-       add_lfvector(to, tflongvector, numverts);
+       add_lfvector_lfvector(to, to, temp, from[0].vcount);
        
-       del_lfvector(tflongvector);
-       */
-       // alternative NON OpenMP code
-       /* process diagonal elements */ 
-       
-       for(i = 0; i < from[0].vcount; i++)
-       {
-               mul_fmatrix_fvector(to[from[i].r], from[i].m, fLongVector[from[i].c]);
-       }
+       del_lfvector(temp);
        
-       /* process off-diagonal entries (every off-diagonal entry needs to be symmetric) */
-       
-       for(i = from[0].vcount; i < from[0].vcount+from[0].scount; i++)
-       {
-               muladd_fmatrix_fvector(to[from[i].c], from[i].m, fLongVector[from[i].r]);
-               muladd_fmatrix_fvector(to[from[i].r], from[i].m, fLongVector[from[i].c]);
-       }
        
 }
-
 /* SPARSE SYMMETRIC add big matrix with big matrix: A = B + C*/
 DO_INLINE void add_bfmatrix_bfmatrix( fmatrix3x3 *to, fmatrix3x3 *from,  fmatrix3x3 *matrix)
 {
@@ -898,8 +678,8 @@ DO_INLINE void subadd_bfmatrixS_bfmatrixS( fmatrix3x3 *to, fmatrix3x3 *from, flo
 ///////////////////////////////////////////////////////////////////
 // simulator start
 ///////////////////////////////////////////////////////////////////
-static float I[3][4] = {{1,0,0,0},{0,1,0,0},{0,0,1,0}};
-static float ZERO[3][4] = {{0,0,0,0},{0,0,0,0},{0,0,0,0}};
+static float I[3][3] = {{1,0,0},{0,1,0},{0,0,1}};
+static float ZERO[3][3] = {{0,0,0}, {0,0,0}, {0,0,0}};
 typedef struct Implicit_Data 
 {
        lfVector *X, *V, *Xnew, *Vnew, *F, *B, *dV, *z;
@@ -941,7 +721,6 @@ int implicit_init (Object *ob, ClothModifierData *clmd)
        id->F = create_lfvector(cloth->numverts);
        id->B = create_lfvector(cloth->numverts);
        id->dV = create_lfvector(cloth->numverts);
-       zero_lfvector(id->dV, cloth->numverts);
        id->z = create_lfvector(cloth->numverts);
        
        for(i=0;i<cloth->numverts;i++) 
@@ -971,7 +750,7 @@ int implicit_init (Object *ob, ClothModifierData *clmd)
 
                // dFdV_start[i].c = big_I[i].c = big_zero[i].c = 
                id->A[i+cloth->numverts].c = id->dFdV[i+cloth->numverts].c = id->dFdX[i+cloth->numverts].c = 
-                       id->P[i+cloth->numverts].c = id->Pinv[i+cloth->numverts].c = id->bigI[i+cloth->numverts].c = spring->kl;
+                               id->P[i+cloth->numverts].c = id->Pinv[i+cloth->numverts].c = id->bigI[i+cloth->numverts].c = spring->kl;
 
                spring->matrix_index = i + cloth->numverts;
                
@@ -985,7 +764,7 @@ int implicit_init (Object *ob, ClothModifierData *clmd)
 
        return 1;
 }
-int implicit_free (ClothModifierData *clmd)
+int    implicit_free (ClothModifierData *clmd)
 {
        Implicit_Data *id;
        Cloth *cloth;
@@ -1096,7 +875,7 @@ DO_INLINE void filter(lfVector *V, fmatrix3x3 *S)
        }
 }
 
-int  cg_filtered(lfVector *ldV, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fmatrix3x3 *S)
+int cg_filtered(lfVector *dv, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fmatrix3x3 *S)
 {
        // Solves for unknown X in equation AX=B
        unsigned int conjgrad_loopcount=0, conjgrad_looplimit=100;
@@ -1108,22 +887,24 @@ int  cg_filtered(lfVector *ldV, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fmatr
        d = create_lfvector(numverts);
        tmp = create_lfvector(numverts);
        r = create_lfvector(numverts);
-       
-       // zero_lfvector(ldV, numverts);
-       filter(ldV, S);
-       add_lfvector_lfvector(ldV, ldV, z, numverts);
+
+       // zero_lfvector(dv, CLOTHPARTICLES);
+       filter(dv, S);
+
+       add_lfvector_lfvector(dv, dv, z, numverts);
 
        // r = B - Mul(tmp,A,X);    // just use B if X known to be zero
-       mul_bfmatrix_lfvector(r, lA, ldV);
-       sub_lfvector_lfvector(r, lB, r, numverts);
-       filter(r, S);
+       cp_lfvector(r, lB, numverts);
+       mul_bfmatrix_lfvector(tmp, lA, dv);
+       sub_lfvector_lfvector(r, r, tmp, numverts);
+
+       filter(r,S);
 
        cp_lfvector(d, r, numverts);
 
        s = dot_lfvector(r, r, numverts);
-       starget = s * conjgrad_epsilon;
-       
-       // itstart();
+       starget = s * sqrt(conjgrad_epsilon);
+
        while((s>starget && conjgrad_loopcount < conjgrad_looplimit))
        {       
                // Mul(q,A,d); // q = A*d;
@@ -1134,14 +915,13 @@ int  cg_filtered(lfVector *ldV, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fmatr
                a = s/dot_lfvector(d, q, numverts);
 
                // X = X + d*a;
-               add_lfvector_lfvectorS(ldV, ldV, d, a, numverts);
-               
-               s_prev = s;
+               add_lfvector_lfvectorS(dv, dv, d, a, numverts);
 
                // r = r - q*a;
-               add_lfvector_lfvectorS(r, r, q, -a, numverts);
+               sub_lfvector_lfvectorS(r, r, q, a, numverts);
+
+               s_prev = s;
                s = dot_lfvector(r, r, numverts);
-               // s = add_lfvectorS_dot(r, q, -a, numverts);
 
                //d = r+d*(s/s_prev);
                add_lfvector_lfvectorS(d, r, d, (s/s_prev), numverts);
@@ -1195,7 +975,7 @@ int cg_filtered_pre(lfVector *dv, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fma
        BuildPPinv(lA, P, Pinv);
        
        filter(dv, S);
-       add_lfvector(dv, z, numverts);
+       add_lfvector_lfvector(dv, dv, z, numverts);
        
        mul_bfmatrix_lfvector(r, lA, dv);
        sub_lfvector_lfvector(r, lB, r, numverts);
@@ -1204,11 +984,13 @@ int cg_filtered_pre(lfVector *dv, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fma
        mul_bfmatrix_lfvector(p, Pinv, r);
        filter(p, S);
        
-       deltaNew = delta0 = dot_lfvector(r, p, numverts);
+       deltaNew = dot_lfvector(r, p, numverts);
+       
+       delta0 = deltaNew * sqrt(conjgrad_epsilon);
        
        // itstart();
        
-       while ((deltaNew > (conjgrad_epsilon*delta0)) && (iterations < conjgrad_looplimit))
+       while ((deltaNew > delta0) && (iterations < conjgrad_looplimit))
        {
                iterations++;
                
@@ -1217,9 +999,9 @@ int cg_filtered_pre(lfVector *dv, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fma
                
                alpha = deltaNew / dot_lfvector(p, s, numverts);
                
-               add_lfvectorS(dv, p, alpha, numverts);
+               add_lfvector_lfvectorS(dv, dv, p, alpha, numverts);
                
-               add_lfvectorS(r, s, -alpha, numverts);
+               add_lfvector_lfvectorS(r, r, s, -alpha, numverts);
                
                mul_bfmatrix_lfvector(h, Pinv, r);
                filter(h, S);
@@ -1247,11 +1029,11 @@ int cg_filtered_pre(lfVector *dv, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fma
 }
 
 // outer product is NOT cross product!!!
-DO_INLINE void dfdx_spring_type1(float to[3][4], float dir[3],float length,float L,float k)
+DO_INLINE void dfdx_spring_type1(float to[3][3], float dir[3],float length,float L,float k)
 {
        // dir is unit length direction, rest is spring's restlength, k is spring constant.
        // return  (outerprod(dir,dir)*k + (I - outerprod(dir,dir))*(k - ((k*L)/length)));
-       float temp[3][4];
+       float temp[3][3];
        mul_fvectorT_fvector(temp, dir, dir);
        sub_fmatrix_fmatrix(to, I, temp);
        mul_fmatrix_S(to, k* (1.0f-(L/length)));
@@ -1259,20 +1041,20 @@ DO_INLINE void dfdx_spring_type1(float to[3][4], float dir[3],float length,float
        add_fmatrix_fmatrix(to, temp, to);
 }
 
-DO_INLINE void dfdx_spring_type2(float to[3][4], float dir[3],float length,float L,float k, float cb)
+DO_INLINE void dfdx_spring_type2(float to[3][3], float dir[3],float length,float L,float k, float cb)
 {
        // return  outerprod(dir,dir)*fbstar_jacobi(length, L, k, cb);
        mul_fvectorT_fvectorS(to, dir, dir, fbstar_jacobi(length, L, k, cb));
 }
 
-DO_INLINE void dfdv_damp(float to[3][4], float dir[3], float damping)
+DO_INLINE void dfdv_damp(float to[3][3], float dir[3], float damping)
 {
        // derivative of force wrt velocity.  
        // return outerprod(dir,dir) * damping;
        mul_fvectorT_fvectorS(to, dir, dir, damping);
 }
 
-DO_INLINE void dfdx_spring(float to[3][4],  float dir[3],float length,float L,float k)
+DO_INLINE void dfdx_spring(float to[3][3],  float dir[3],float length,float L,float k)
 {
        // dir is unit length direction, rest is spring's restlength, k is spring constant.
        //return  ( (I-outerprod(dir,dir))*Min(1.0f,rest/length) - I) * -k;
@@ -1283,7 +1065,7 @@ DO_INLINE void dfdx_spring(float to[3][4],  float dir[3],float length,float L,fl
        mul_fmatrix_S(to, -k);
 }
 
-DO_INLINE void dfdx_damp(float to[3][4],  float dir[3],float length,const float vel[3],float rest,float damping)
+DO_INLINE void dfdx_damp(float to[3][3],  float dir[3],float length,const float vel[3],float rest,float damping)
 {
        // inner spring damping   vel is the relative velocity  of the endpoints.  
        //      return (I-outerprod(dir,dir)) * (-damping * -(dot(dir,vel)/Max(length,rest)));
@@ -1293,12 +1075,12 @@ DO_INLINE void dfdx_damp(float to[3][4],  float dir[3],float length,const float
 
 }
 
-DO_INLINE void cloth_calc_spring_force(ClothModifierData *clmd, ClothSpring *s, lfVector *lF, lfVector *X, lfVector *V, fmatrix3x3 *dFdV, fmatrix3x3 *dFdX, float dt)
+DO_INLINE void cloth_calc_spring_force(ClothModifierData *clmd, ClothSpring *s, lfVector *lF, lfVector *X, lfVector *V, fmatrix3x3 *dFdV, fmatrix3x3 *dFdX)
 {
        float extent[3];
        float length = 0;
        float dir[3] = {0,0,0};
-       float vel[3] = {0,0,0};
+       float vel[3];
        float k = 0.0f;
        float L = s->restlen;
        float cb = clmd->sim_parms->structural;
@@ -1307,7 +1089,7 @@ DO_INLINE void cloth_calc_spring_force(ClothModifierData *clmd, ClothSpring *s,
        float stretch_force[3] = {0,0,0};
        float bending_force[3] = {0,0,0};
        float damping_force[3] = {0,0,0};
-       float nulldfdx[3][4]={ {0,0,0,0}, {0,0,0,0}, {0,0,0,0}};
+       float nulldfdx[3][3]={ {0,0,0}, {0,0,0}, {0,0,0}};
        
        VECCOPY(s->f, nullf);
        cp_fmatrix(s->dfdx, nulldfdx);
@@ -1325,13 +1107,13 @@ DO_INLINE void cloth_calc_spring_force(ClothModifierData *clmd, ClothSpring *s,
                /*
                if(length>L)
                {
-                       if((clmd->sim_parms->flags & CSIMSETT_FLAG_TEARING_ENABLED) 
-                       && ((((length-L)*100.0f/L) > clmd->sim_parms->maxspringlen))) // cut spring!
-                       {
-                               s->flags |= CSPRING_FLAG_DEACTIVATE;
-                               return;
-                       }
-               
+               if((clmd->sim_parms->flags & CSIMSETT_FLAG_TEARING_ENABLED) 
+               && ((((length-L)*100.0f/L) > clmd->sim_parms->maxspringlen))) // cut spring!
+               {
+               s->flags |= CSPRING_FLAG_DEACTIVATE;
+               return;
+       }
+       } 
                */
                mul_fvector_S(dir, extent, 1.0f/length);
        }
@@ -1348,24 +1130,19 @@ DO_INLINE void cloth_calc_spring_force(ClothModifierData *clmd, ClothSpring *s,
                {
                        s->flags |= CLOTH_SPRING_FLAG_NEEDED;
 
-                       k = (clmd->sim_parms->structural*(length-L));   
+                       k = clmd->sim_parms->structural;        
 
-                       mul_fvector_S(stretch_force, dir, k); 
+                       mul_fvector_S(stretch_force, dir, (k*(length-L))); 
 
                        VECADD(s->f, s->f, stretch_force);
 
                        // Ascher & Boxman, p.21: Damping only during elonglation
-                       mul_fvector_S(damping_force, extent, clmd->sim_parms->Cdis * 0.01 * ((INPR(vel,extent)/length))); 
+                       mul_fvector_S(damping_force, extent, clmd->sim_parms->Cdis * ((INPR(vel,extent)/length))); 
                        VECADD(s->f, s->f, damping_force);
-                       
-                       // Formula from Ascher / Boxman, Speeding up cloth simulation
-                       // couldn't see any speedup
-                       // if((dt * (k*dt + 2 * clmd->sim_parms->Cdis * 0.01)) > 0.01 )
-                       {
-                               dfdx_spring_type1(s->dfdx, dir,length,L,clmd->sim_parms->structural);
-                               dfdv_damp(s->dfdv, dir,clmd->sim_parms->Cdis * 0.01);
-                       }
-                       // printf("(dt*k*dt) ): %f, k: %f\n", (dt * (k*dt + 2 * clmd->sim_parms->Cdis * 0.01) ), k);
+
+                       dfdx_spring_type1(s->dfdx, dir,length,L,k);
+
+                       dfdv_damp(s->dfdv, dir,clmd->sim_parms->Cdis);
                }
        }
        else // calculate force of bending springs
@@ -1376,19 +1153,17 @@ DO_INLINE void cloth_calc_spring_force(ClothModifierData *clmd, ClothSpring *s,
                        
                        s->flags |= CLOTH_SPRING_FLAG_NEEDED;
                        
-                       k = fbstar(length, L, clmd->sim_parms->bending, cb);    
+                       k = clmd->sim_parms->bending;   
 
-                       mul_fvector_S(bending_force, dir, k);
+                       mul_fvector_S(bending_force, dir, fbstar(length, L, k, cb));
                        VECADD(s->f, s->f, bending_force);
                        
-                       // DG: My formula to handle bending for the AIMEX scheme 
-                       // multiply with 1000 because of numerical problems
-                       // if( ((k*1000.0)*dt*dt) < -0.18 )
+                       if(INPR(bending_force,bending_force) > 0.13*0.13)
                        {
-                               dfdx_spring_type2(s->dfdx, dir,length,L,clmd->sim_parms->bending, cb);
                                clmd->sim_parms->flags |= CLOTH_SIMSETTINGS_FLAG_BIG_FORCE;
                        }
-                       // printf("(dt*k*dt) ): %f, k: %f\n", (dt*dt*(1000.0*k)), k);
+                       
+                       dfdx_spring_type2(s->dfdx, dir,length,L,k, cb);
                }
        }
 }
@@ -1406,8 +1181,8 @@ DO_INLINE int cloth_apply_spring_force(ClothModifierData *clmd, ClothSpring *s,
                        sub_fmatrix_fmatrix(dFdV[s->kl].m, dFdV[s->kl].m, s->dfdv);
                        add_fmatrix_fmatrix(dFdV[s->matrix_index].m, dFdV[s->matrix_index].m, s->dfdv); 
                }
-               // else if(!(clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_BIG_FORCE))
-               //      return 0;
+               else if(!(clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_BIG_FORCE))
+                       return 0;
                
                sub_fmatrix_fmatrix(dFdX[s->ij].m, dFdX[s->ij].m, s->dfdx);
                sub_fmatrix_fmatrix(dFdX[s->kl].m, dFdX[s->kl].m, s->dfdx);
@@ -1459,14 +1234,14 @@ DO_INLINE void calc_triangle_force(ClothModifierData *clmd, MFace mface, lfVecto
 
 }
 
-void cloth_calc_force(ClothModifierData *clmd, lfVector *lF, lfVector *lX, lfVector *lV, fmatrix3x3 *dFdV, fmatrix3x3 *dFdX, ListBase *effectors, float time, float dt)
+void cloth_calc_force(ClothModifierData *clmd, lfVector *lF, lfVector *lX, lfVector *lV, fmatrix3x3 *dFdV, fmatrix3x3 *dFdX, ListBase *effectors, float time)
 {
        /* Collect forces and derivatives:  F,dFdX,dFdV */
        Cloth           *cloth          = clmd->clothObject;
        unsigned int    i               = 0;
        float           spring_air      = clmd->sim_parms->Cvi * 0.01f; /* viscosity of air scaled in percent */
        float           gravity[3];
-       float           tm2[3][4]       = {{-spring_air,0,0,0}, {0,-spring_air,0,0},{0,0,-spring_air,0}};
+       float           tm2[3][3]       = {{-spring_air,0,0}, {0,-spring_air,0},{0,0,-spring_air}};
        ClothVertex *verts = cloth->verts;
        MFace           *mfaces         = cloth->mfaces;
        float wind_normalized[3];
@@ -1518,7 +1293,7 @@ void cloth_calc_force(ClothModifierData *clmd, lfVector *lF, lfVector *lX, lfVec
                float speed[3] = {0.0f, 0.0f,0.0f};
                float force[3]= {0.0f, 0.0f, 0.0f};
                
-               #pragma omp parallel for private (i) shared(lF) schedule(static)
+#pragma omp parallel for private (i) shared(lF)
                for(i = 0; i < cloth->numverts; i++)
                {
                        float vertexnormal[3]={0,0,0};
@@ -1543,7 +1318,7 @@ void cloth_calc_force(ClothModifierData *clmd, lfVector *lF, lfVector *lX, lfVec
        {
                // only handle active springs
                // if(((clmd->sim_parms->flags & CSIMSETT_FLAG_TEARING_ENABLED) && !(springs[i].flags & CSPRING_FLAG_DEACTIVATE))|| !(clmd->sim_parms->flags & CSIMSETT_FLAG_TEARING_ENABLED)){}
-               cloth_calc_spring_force(clmd, search->link, lF, lX, lV, dFdV, dFdX, dt);
+               cloth_calc_spring_force(clmd, search->link, lF, lX, lV, dFdV, dFdX);
 
                search = search->next;
        }
@@ -1578,52 +1353,38 @@ void cloth_calc_force(ClothModifierData *clmd, lfVector *lF, lfVector *lX, lfVec
        
 }
 
-
 void simulate_implicit_euler(lfVector *Vnew, lfVector *lX, lfVector *lV, lfVector *lF, fmatrix3x3 *dFdV, fmatrix3x3 *dFdX, float dt, fmatrix3x3 *A, lfVector *B, lfVector *dV, fmatrix3x3 *S, lfVector *z, fmatrix3x3 *P, fmatrix3x3 *Pinv)
 {
        unsigned int numverts = dFdV[0].vcount;
 
        lfVector *dFdXmV = create_lfvector(numverts);
-       
        initdiag_bfmatrix(A, I);
-       
+       zero_lfvector(dV, numverts);
+
        subadd_bfmatrixS_bfmatrixS(A, dFdV, dt, dFdX, (dt*dt));
 
        mul_bfmatrix_lfvector(dFdXmV, dFdX, lV);
 
        add_lfvectorS_lfvectorS(B, lF, dt, dFdXmV, (dt*dt), numverts);
        
-       // cg_filtered(dV, A, B, z, S); // conjugate gradient algorithm to solve Ax=b 
-       cg_filtered_pre(dV, A, B, z, S, P, Pinv);
+       // itstart();
        
-       // advance velocities
-       add_lfvector_lfvector(Vnew, lV, dV, numverts);
+       cg_filtered(dV, A, B, z, S); /* conjugate gradient algorithm to solve Ax=b */
        
-       del_lfvector(dFdXmV);
-}
-
-/*
-// this version solves for the new velocity
-void simulate_implicit_euler(lfVector *Vnew, lfVector *lX, lfVector *lV, lfVector *lF, fmatrix3x3 *dFdV, fmatrix3x3 *dFdX, float dt, fmatrix3x3 *A, lfVector *B, lfVector *dV, fmatrix3x3 *S, lfVector *z, fmatrix3x3 *P, fmatrix3x3 *Pinv)
-{
-       unsigned int numverts = dFdV[0].vcount;
-
-       lfVector *dFdXmV = create_lfvector(numverts);
+       // TODO: if anyone finds a way to correct this function =>
+       // explodes with stiffness = 3000 and 16k verts + pinned at 2 corners
+       // cg_filtered_pre(dV, A, B, z, S, P, Pinv);
        
-       initdiag_bfmatrix(A, I);
+       // itend();
+       // printf("cg_filtered calc time: %f\n", (float)itval());
        
-       subadd_bfmatrixS_bfmatrixS(A, dFdV, dt, dFdX, (dt*dt));
-
-       mul_bfmatrix_lfvector(dFdXmV, dFdV, lV);
-
-       add_lfvectorS_lfvectorS(B, lF, dt, dFdXmV, -dt, numverts);
-       add_lfvector_lfvector(B, B, lV, numverts);
-
-       cg_filtered_pre(Vnew, A, B, z, S, P, Pinv);
+       // advance velocities
+       add_lfvector_lfvector(Vnew, lV, dV, numverts);
        
+
        del_lfvector(dFdXmV);
 }
-*/
+
 int implicit_solver (Object *ob, float frame, ClothModifierData *clmd, ListBase *effectors)
 {              
        unsigned int i=0;
@@ -1635,7 +1396,7 @@ int implicit_solver (Object *ob, float frame, ClothModifierData *clmd, ListBase
        Implicit_Data *id = cloth->implicit;
        int result = 0;
        float force = 0, lastforce = 0;
-       lfVector *dx;
+       // lfVector *dx;
        
        if(clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) /* do goal stuff */
        {
@@ -1655,7 +1416,7 @@ int implicit_solver (Object *ob, float frame, ClothModifierData *clmd, ListBase
                effectors= pdInitEffectors(ob,NULL);
                
                // calculate 
-               cloth_calc_force(clmd, id->F, id->X, id->V, id->dFdV, id->dFdX, effectors, step, dt );
+               cloth_calc_force(clmd, id->F, id->X, id->V, id->dFdV, id->dFdX, effectors, step );
                
                // check for sleeping
                // if(!(clmd->coll_parms->flags & CLOTH_SIMSETTINGS_FLAG_SLEEP))
@@ -1664,19 +1425,17 @@ int implicit_solver (Object *ob, float frame, ClothModifierData *clmd, ListBase
                
                        add_lfvector_lfvectorS(id->Xnew, id->X, id->Vnew, dt, numverts);
                }
-               
+               /*
                dx = create_lfvector(numverts);
                sub_lfvector_lfvector(dx, id->Xnew, id->X, numverts);
                force = dot_lfvector(dx, dx, numverts);
                del_lfvector(dx);
                
-               /*
                if((force < 0.00001) && (lastforce >= force))
-                       clmd->coll_parms->flags |= CLOTH_SIMSETTINGS_FLAG_SLEEP;
+               clmd->coll_parms->flags |= CLOTH_SIMSETTINGS_FLAG_SLEEP;
                else if((lastforce*2 < force))
+               clmd->coll_parms->flags &= ~CLOTH_SIMSETTINGS_FLAG_SLEEP;
                */
-                       clmd->coll_parms->flags &= ~CLOTH_SIMSETTINGS_FLAG_SLEEP;
-               
                lastforce = force;
                
                if(clmd->coll_parms->flags & CLOTH_COLLISIONSETTINGS_FLAG_ENABLED)
@@ -1706,7 +1465,7 @@ int implicit_solver (Object *ob, float frame, ClothModifierData *clmd, ListBase
                        }
                        
                        // call collision function
-                       result = cloth_bvh_objcollision(clmd, step + dt, step, dt);
+                       result = 0; // cloth_bvh_objcollision(clmd, step + dt, step, dt);
                        
                        // copy corrected positions back to simulation
                        if(result)
@@ -1735,7 +1494,7 @@ int implicit_solver (Object *ob, float frame, ClothModifierData *clmd, ListBase
                                cp_lfvector(id->V, id->Vnew, numverts);
                                
                                // calculate 
-                               cloth_calc_force(clmd, id->F, id->X, id->V, id->dFdV, id->dFdX, effectors, step, dt);   
+                               cloth_calc_force(clmd, id->F, id->X, id->V, id->dFdV, id->dFdX, effectors, step);       
                                simulate_implicit_euler(id->Vnew, id->X, id->V, id->F, id->dFdV, id->dFdX, dt / 2.0f, id->A, id->B, id->dV, id->S, id->z, id->P, id->Pinv);
                        }
                        
@@ -1775,19 +1534,11 @@ int implicit_solver (Object *ob, float frame, ClothModifierData *clmd, ListBase
        }
        else
        {
-               for(i = 0; i < numverts; i++)
-               {
-                       VECCOPY(cloth->current_xold[i], id->X[i]);
-                       VECCOPY(cloth->x[i], id->X[i]);
-               }
-               // memcpy(cloth->current_xold, id->X, sizeof(lfVector) * numverts);
-               // memcpy(cloth->x, id->X, sizeof(lfVector) * numverts);
+               memcpy(cloth->current_xold, id->X, sizeof(lfVector) * numverts);
+               memcpy(cloth->x, id->X, sizeof(lfVector) * numverts);
        }
        
-       for(i = 0; i < numverts; i++)
-               VECCOPY(cloth->v[i], id->V[i]);
-       
-       // memcpy(cloth->v, id->V, sizeof(lfVector) * numverts);
+       memcpy(cloth->v, id->V, sizeof(lfVector) * numverts);
        
        return 1;
 }
@@ -1795,18 +1546,11 @@ int implicit_solver (Object *ob, float frame, ClothModifierData *clmd, ListBase
 void implicit_set_positions (ClothModifierData *clmd)
 {              
        Cloth *cloth = clmd->clothObject;
-       unsigned int numverts = cloth->numverts, i = 0;
+       unsigned int numverts = cloth->numverts;
        Implicit_Data *id = cloth->implicit;
        
-       
-       for(i = 0; i < numverts; i++)
-       {
-               VECCOPY(id->X[i], cloth->x[i]);
-               VECCOPY(id->V[i], cloth->v[i]);
-       }
-       
-       // memcpy(id->X, cloth->x, sizeof(lfVector) * numverts);        
-       // memcpy(id->V, cloth->v, sizeof(lfVector) * numverts);        
+       memcpy(id->X, cloth->x, sizeof(lfVector) * numverts);   
+       memcpy(id->V, cloth->v, sizeof(lfVector) * numverts);   
 }
 
 
@@ -1825,7 +1569,7 @@ int collisions_collision_response_static(ClothModifierData *clmd, ClothModifierD
        cloth1 = clmd->clothObject;
        cloth2 = coll_clmd->clothObject;
 
-       // search = clmd->coll_parms->collision_list;
+       // search = clmd->coll_parms.collision_list;
        
        while(search)
        {
@@ -1868,10 +1612,10 @@ int collisions_collision_response_static(ClothModifierData *clmd, ClothModifierD
        float vrel_t_pre[3];
        float vrel_t[3];
        double impulse;
-       float epsilon = clmd->coll_parms->epsilon;
+       float epsilon = clmd->coll_parms.epsilon;
        float overlap = (epsilon + ALMOST_ZERO-collpair->distance);
                        
-                       // calculateFrictionImpulse(tangential, relativeVelocity, collpair->normal, magrelVel, clmd->coll_parms->friction*0.01, magrelVel);
+                       // calculateFrictionImpulse(tangential, relativeVelocity, collpair->normal, magrelVel, clmd->coll_parms.friction*0.01, magrelVel);
                        
                        // magtangent = INPR(tangential, tangential);
                        
@@ -2071,20 +1815,20 @@ int collisions_are_edges_adjacent(ClothModifierData *clmd, ClothModifierData *co
        
        VECSUB(temp, verts1[edgecollpair->p11].xold, verts2[edgecollpair->p21].xold);
        if(ABS(INPR(temp, temp)) < ALMOST_ZERO)
-               return 1;
+       return 1;
        
        VECSUB(temp, verts1[edgecollpair->p11].xold, verts2[edgecollpair->p22].xold);
        if(ABS(INPR(temp, temp)) < ALMOST_ZERO)
-               return 1;
+       return 1;
        
        VECSUB(temp, verts1[edgecollpair->p12].xold, verts2[edgecollpair->p21].xold);
        if(ABS(INPR(temp, temp)) < ALMOST_ZERO)
-               return 1;
+       return 1;
        
        VECSUB(temp, verts1[edgecollpair->p12].xold, verts2[edgecollpair->p22].xold);
        if(ABS(INPR(temp, temp)) < ALMOST_ZERO)
-               return 1;
-               */
+       return 1;
+        */
        return 0;
 }
 
@@ -2345,7 +2089,7 @@ int cloth_bvh_objcollision(ClothModifierData * clmd, float step, float prevstep,
        LinkNode *collision_list = NULL; 
        unsigned int i = 0, j = 0;
        int collisions = 0, count = 0;
-       float (*current_x)[4];
+       float (*current_x)[3];
 
        if (!(((Cloth *)clmd->clothObject)->tree))
        {
@@ -2362,57 +2106,57 @@ int cloth_bvh_objcollision(ClothModifierData * clmd, float step, float prevstep,
        ////////////////////////////////////////////////////////////
        
        // update cloth bvh
-       bvh_update_from_float4(bvh1, cloth->current_xold, cloth->numverts, cloth->current_x, 0); // 0 means STATIC, 1 means MOVING (see later in this function)
+       bvh_update_from_float3(bvh1, cloth->current_xold, cloth->numverts, cloth->current_x, 0); // 0 means STATIC, 1 means MOVING (see later in this function)
 /*
        // check all collision objects
        for (base = G.scene->base.first; base; base = base->next)
        {
-               ob2 = base->object;
-               collmd = (CollisionModifierData *) modifiers_findByType (ob2, eModifierType_Collision);
+       ob2 = base->object;
+       collmd = (CollisionModifierData *) modifiers_findByType (ob2, eModifierType_Collision);
                
-               if (!collmd)
-                       continue;
+       if (!collmd)
+       continue;
                
                // check if there is a bounding volume hierarchy
-               if (collmd->tree) 
-               {                       
-                       bvh2 = collmd->tree;
+       if (collmd->tree) 
+       {                       
+       bvh2 = collmd->tree;
                        
                        // update position + bvh of collision object
-                       collision_move_object(collmd, step, prevstep);
-                       bvh_update_from_mvert(collmd->tree, collmd->current_x, collmd->numverts, NULL, 0);
+       collision_move_object(collmd, step, prevstep);
+       bvh_update_from_mvert(collmd->tree, collmd->current_x, collmd->numverts, NULL, 0);
                        
                        // fill collision list 
-                       collisions += bvh_traverse(bvh1->root, bvh2->root, &collision_list);
+       collisions += bvh_traverse(bvh1->root, bvh2->root, &collision_list);
                        
                        // call static collision response
                        
                        // free collision list
-                       if(collision_list)
-                       {
-                               LinkNode *search = collision_list;
+       if(collision_list)
+       {
+       LinkNode *search = collision_list;
                                
-                               while(search)
-                               {
-                                       CollisionPair *coll_pair = search->link;
+       while(search)
+       {
+       CollisionPair *coll_pair = search->link;
                                        
-                                       MEM_freeN(coll_pair);
-                                       search = search->next;
-                               }
-                               BLI_linklist_free(collision_list,NULL);
+       MEM_freeN(coll_pair);
+       search = search->next;
+}
+       BLI_linklist_free(collision_list,NULL);
        
-                               collision_list = NULL;
-                       }
-               }
-       }
+       collision_list = NULL;
+}
+}
+}
        
        //////////////////////////////////////////////
        // update velocities + positions
        //////////////////////////////////////////////
        for(i = 0; i < cloth->numverts; i++)
        {
-               VECADD(cloth->current_x[i], cloth->current_xold[i], cloth->current_v[i]);
-       }
+       VECADD(cloth->current_x[i], cloth->current_xold[i], cloth->current_v[i]);
+}
        //////////////////////////////////////////////
 */     
        /*
@@ -2424,68 +2168,68 @@ int cloth_bvh_objcollision(ClothModifierData * clmd, float step, float prevstep,
        // free collision list
        if(collision_list)
        {
-               LinkNode *search = collision_list;
+       LinkNode *search = collision_list;
                
-               while(search)
-               {
-                       float distance = 0;
-                       float mindistance = cloth->selftree->epsilon;
-                       CollisionPair *collpair = (CollisionPair *)search->link;
+       while(search)
+       {
+       float distance = 0;
+       float mindistance = cloth->selftree->epsilon;
+       CollisionPair *collpair = (CollisionPair *)search->link;
                        
                        // get distance of faces
-                       distance = plNearestPoints(
-                                       cloth->current_x[collpair->point_indexA[0]], cloth->current_x[collpair->point_indexA[1]], cloth->current_x[collpair->point_indexA[2]], cloth->current_x[collpair->point_indexB[0]], cloth->current_x[collpair->point_indexB[1]], cloth->current_x[collpair->point_indexB[2]], collpair->pa,collpair->pb,collpair->vector);
+       distance = plNearestPoints(
+       cloth->current_x[collpair->point_indexA[0]], cloth->current_x[collpair->point_indexA[1]], cloth->current_x[collpair->point_indexA[2]], cloth->current_x[collpair->point_indexB[0]], cloth->current_x[collpair->point_indexB[1]], cloth->current_x[collpair->point_indexB[2]], collpair->pa,collpair->pb,collpair->vector);
                                        
-                       if(distance < mindistance)
-                       {
-                               ///////////////////////////////////////////
+       if(distance < mindistance)
+       {
+       ///////////////////////////////////////////
                                // TODO: take velocity of the collision points into account!
-                               ///////////////////////////////////////////
+       ///////////////////////////////////////////
                                
-                               float correction = mindistance - distance;
-                               float temp[3];
+       float correction = mindistance - distance;
+       float temp[3];
                                
-                               VECCOPY(temp, collpair->vector);
-                               Normalize(temp);
-                               VecMulf(temp, -correction*0.5);
+       VECCOPY(temp, collpair->vector);
+       Normalize(temp);
+       VecMulf(temp, -correction*0.5);
                                
-                               if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexA[0]].goal >= SOFTGOALSNAP)))
-                                       VECSUB(cloth->current_x[collpair->point_indexA[0]], cloth->current_x[collpair->point_indexA[0]], temp); 
+       if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexA[0]].goal >= SOFTGOALSNAP)))
+       VECSUB(cloth->current_x[collpair->point_indexA[0]], cloth->current_x[collpair->point_indexA[0]], temp); 
                                
-                               if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexA[1]].goal >= SOFTGOALSNAP)))
-                                       VECSUB(cloth->current_x[collpair->point_indexA[1]], cloth->current_x[collpair->point_indexA[1]], temp);
+       if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexA[1]].goal >= SOFTGOALSNAP)))
+       VECSUB(cloth->current_x[collpair->point_indexA[1]], cloth->current_x[collpair->point_indexA[1]], temp);
                                
-                               if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexA[2]].goal >= SOFTGOALSNAP)))
-                                       VECSUB(cloth->current_x[collpair->point_indexA[2]], cloth->current_x[collpair->point_indexA[2]], temp);
+       if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexA[2]].goal >= SOFTGOALSNAP)))
+       VECSUB(cloth->current_x[collpair->point_indexA[2]], cloth->current_x[collpair->point_indexA[2]], temp);
                                
                                
-                               if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexB[0]].goal >= SOFTGOALSNAP)))
-                                       VECSUB(cloth->current_x[collpair->point_indexB[0]], cloth->current_x[collpair->point_indexB[0]], temp); 
+       if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexB[0]].goal >= SOFTGOALSNAP)))
+       VECSUB(cloth->current_x[collpair->point_indexB[0]], cloth->current_x[collpair->point_indexB[0]], temp); 
                                
-                               if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexB[1]].goal >= SOFTGOALSNAP)))
-                                       VECSUB(cloth->current_x[collpair->point_indexB[1]], cloth->current_x[collpair->point_indexB[1]], temp);
+       if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexB[1]].goal >= SOFTGOALSNAP)))
+       VECSUB(cloth->current_x[collpair->point_indexB[1]], cloth->current_x[collpair->point_indexB[1]], temp);
                                
-                               if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexB[2]].goal >= SOFTGOALSNAP)))
-                                       VECSUB(cloth->current_x[collpair->point_indexB[2]], cloth->current_x[collpair->point_indexB[2]], temp);
+       if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexB[2]].goal >= SOFTGOALSNAP)))
+       VECSUB(cloth->current_x[collpair->point_indexB[2]], cloth->current_x[collpair->point_indexB[2]], temp);
                                        
-                               collisions = 1;
+       collisions = 1;
                                
-                       }
+}
                        
-               }
+}
                
-               search = collision_list;
-               while(search)
-               {
-                       CollisionPair *coll_pair = search->link;
+       search = collision_list;
+       while(search)
+       {
+       CollisionPair *coll_pair = search->link;
                        
-                       MEM_freeN(coll_pair);
-                       search = search->next;
-               }
-               BLI_linklist_free(collision_list,NULL);
+       MEM_freeN(coll_pair);
+       search = search->next;
+}
+       BLI_linklist_free(collision_list,NULL);
 
-               collision_list = NULL;
-       }
+       collision_list = NULL;
+}
        */
        // Test on *simple* selfcollisions
        collisions = 1;
@@ -2495,62 +2239,62 @@ int cloth_bvh_objcollision(ClothModifierData * clmd, float step, float prevstep,
 #pragma omp parallel for private(i,j, collisions) shared(current_x)
        for(count = 0; count < 6; count++)
        {
-               collisions = 0;
+       collisions = 0;
                
-               for(i = 0; i < cloth->numverts; i++)
-               {
-                       for(j = i + 1; j < cloth->numverts; j++)
-                       {
-                               float temp[3];
-                               float length = 0;
-                               float mindistance = cloth->selftree->epsilon;
+       for(i = 0; i < cloth->numverts; i++)
+       {
+       for(j = i + 1; j < cloth->numverts; j++)
+       {
+       float temp[3];
+       float length = 0;
+       float mindistance = cloth->selftree->epsilon;
                                        
-                               if(clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL)
-                               {                       
-                                       if((cloth->verts [i].goal >= SOFTGOALSNAP)
-                                       && (cloth->verts [j].goal >= SOFTGOALSNAP))
-                                       {
-                                               continue;
-                                       }
-                               }
+       if(clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL)
+       {                       
+       if((cloth->verts [i].goal >= SOFTGOALSNAP)
+       && (cloth->verts [j].goal >= SOFTGOALSNAP))
+       {
+       continue;
+}
+}
                                        
                                        // check for adjacent points
-                               if(BLI_edgehash_haskey ( cloth->edgehash, i, j ))
-                               {
-                                       continue;
-                               }
+       if(BLI_edgehash_haskey ( cloth->edgehash, i, j ))
+       {
+       continue;
+}
                                        
-                               VECSUB(temp, current_x[i], current_x[j]);
+       VECSUB(temp, current_x[i], current_x[j]);
                                        
-                               length = Normalize(temp);
+       length = Normalize(temp);
                                        
-                               if(length < mindistance)
-                               {
-                                       float correction = mindistance - length;
+       if(length < mindistance)
+       {
+       float correction = mindistance - length;
                                                
-                                       if((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [i].goal >= SOFTGOALSNAP))
-                                       {
-                                               VecMulf(temp, -correction);
-                                               VECADD(current_x[j], current_x[j], temp);
-                                       }
-                                       else if((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [j].goal >= SOFTGOALSNAP))
-                                       {
-                                               VecMulf(temp, correction);
-                                               VECADD(current_x[i], current_x[i], temp);
-                                       }
-                                       else
-                                       {
-                                               VecMulf(temp, -correction*0.5);
-                                               VECADD(current_x[j], current_x[j], temp);
+       if((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [i].goal >= SOFTGOALSNAP))
+       {
+       VecMulf(temp, -correction);
+       VECADD(current_x[j], current_x[j], temp);
+}
+       else if((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [j].goal >= SOFTGOALSNAP))
+       {
+       VecMulf(temp, correction);
+       VECADD(current_x[i], current_x[i], temp);
+}
+       else
+       {
+       VecMulf(temp, -correction*0.5);
+       VECADD(current_x[j], current_x[j], temp);
                                                
-                                               VECSUB(current_x[i], current_x[i], temp);       
-                                       }
+       VECSUB(current_x[i], current_x[i], temp);       
+}
                                        
-                                       collisions = 1;
-                               }
-                       }
-               }
-       }
+       collisions = 1;
+}
+}
+}
+}
 
        
        //////////////////////////////////////////////
@@ -2558,8 +2302,8 @@ int cloth_bvh_objcollision(ClothModifierData * clmd, float step, float prevstep,
        //////////////////////////////////////////////
        for(i = 0; i < cloth->numverts; i++)
        {
-               VECSUB(cloth->current_v[i], cloth->current_x[i], cloth->current_xold[i]);
-       }
+       VECSUB(cloth->current_v[i], cloth->current_x[i], cloth->current_xold[i]);
+}
        //////////////////////////////////////////////
 */     
        return 1;