svn merge -r 15392:15551 https://svn.blender.org/svnroot/bf-blender/trunk/blender
[blender.git] / source / blender / blenkernel / intern / implicit.c
index f562bd49fcc8b434947b26071d1a43b50df0bda7..297ac0b1530c874aa0434b6b6e7840524ab4fb7e 100644 (file)
@@ -1,15 +1,12 @@
 /*  implicit.c      
 * 
 *
-* ***** BEGIN GPL/BL DUAL LICENSE BLOCK *****
+* ***** BEGIN GPL LICENSE BLOCK *****
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
-* of the License, or (at your option) any later version. The Blender
-* Foundation also sells licenses for use in proprietary software under
-* the Blender License.  See http://www.blender.org/BL/ for information
-* about this.
+* of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 *
 * Contributor(s): none yet.
 *
-* ***** END GPL/BL DUAL LICENSE BLOCK *****
+* ***** END GPL LICENSE BLOCK *****
 */
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
+
 #include "MEM_guardedalloc.h"
-/* types */
-#include "DNA_curve_types.h"
-#include "DNA_object_types.h"
-#include "DNA_object_force.h"  
+
+#include "BKE_cloth.h"
+
 #include "DNA_cloth_types.h"   
-#include "DNA_key_types.h"
-#include "DNA_mesh_types.h"
-#include "DNA_modifier_types.h"
-#include "DNA_meshdata_types.h"
-#include "DNA_lattice_types.h"
 #include "DNA_scene_types.h"
-#include "DNA_modifier_types.h"
-#include "BLI_arithb.h"
-#include "BLI_blenlib.h"
-#include "BLI_edgehash.h"
-#include "BLI_threads.h"
-#include "BKE_collisions.h"
-#include "BKE_curve.h"
-#include "BKE_displist.h"
+
 #include "BKE_effect.h"
 #include "BKE_global.h"
-#include "BKE_key.h"
-#include "BKE_object.h"
 #include "BKE_cloth.h"
-#include "BKE_modifier.h"
 #include "BKE_utildefines.h"
-#include "BKE_global.h"
-#include  "BIF_editdeform.h"
-
-#include "Bullet-C-Api.h"
-
-#ifdef __SSE3__
-#include <emmintrin.h>
-#include <xmmintrin.h>
-#include <pmmintrin.h>
-#endif
 
 #ifdef _WIN32
 #include <windows.h>
@@ -91,14 +59,18 @@ void itend(void)
 double itval()
 {
        return ((double)_itend.QuadPart -
-               (double)_itstart.QuadPart)/((double)ifreq.QuadPart);
+                       (double)_itstart.QuadPart)/((double)ifreq.QuadPart);
 }
 #else
 #include <sys/time.h>
+// intrinsics need better compile flag checking
+// #include <xmmintrin.h>
+// #include <pmmintrin.h>
+// #include <pthread.h>
 
-static struct timeval _itstart, _itend;
-static struct timezone itz;
-void itstart(void)
+                        static struct timeval _itstart, _itend;
+        static struct timezone itz;
+        void itstart(void)
 {
        gettimeofday(&_itstart, &itz);
 }
@@ -115,6 +87,17 @@ double itval()
 }
 #endif
 
+static float I[3][3] = {{1,0,0},{0,1,0},{0,0,1}};
+static float ZERO[3][3] = {{0,0,0}, {0,0,0}, {0,0,0}};
+
+/*
+#define C99
+#ifdef C99
+#defineDO_INLINE inline 
+#else 
+#defineDO_INLINE static 
+#endif
+*/
 struct Cloth;
 
 //////////////////////////////////////////
@@ -122,39 +105,20 @@ struct Cloth;
 /////////////////////////////////////////
 
 /* DEFINITIONS */
-#ifdef __GNUC__
-typedef float lfVector[4] __attribute__ ((aligned (16)));
-#else
-typedef __declspec(align(16)) lfVector[4];
-#endif
-
-#ifdef __GNUC__
-typedef struct fmatrix3x3 {
-       float m[3][4] __attribute__ ((aligned (16))); /* 3x3 matrix */
-       unsigned int c,r; /* column and row number */
-       int pinned; /* is this vertex allowed to move? */
-       float n1,n2,n3; /* three normal vectors for collision constrains */
-       unsigned int vcount; /* vertex count */
-       unsigned int scount; /* spring count */ 
-} fmatrix3x3;
-#else
+typedef float lfVector[3];
 typedef struct fmatrix3x3 {
-       __declspec(align(16))
-       float m[3][4]; /* 3x3 matrix */
+       float m[3][3]; /* 3x3 matrix */
        unsigned int c,r; /* column and row number */
        int pinned; /* is this vertex allowed to move? */
        float n1,n2,n3; /* three normal vectors for collision constrains */
        unsigned int vcount; /* vertex count */
        unsigned int scount; /* spring count */ 
 } fmatrix3x3;
-#endif
-
 
 ///////////////////////////
 // float[3] vector
 ///////////////////////////
 /* simple vector code */
-
 /* STATUS: verified */
 DO_INLINE void mul_fvector_S(float to[3], float from[3], float scalar)
 {
@@ -166,18 +130,13 @@ DO_INLINE void mul_fvector_S(float to[3], float from[3], float scalar)
 /* STATUS: verified */
 DO_INLINE void cross_fvector(float to[3], float vectorA[3], float vectorB[3])
 {
-       float temp[3];
-       
-       temp[0] = vectorA[1] * vectorB[2] - vectorA[2] * vectorB[1];
-       temp[1] = vectorA[2] * vectorB[0] - vectorA[0] * vectorB[2];
-       temp[2] = vectorA[0] * vectorB[1] - vectorA[1] * vectorB[0];
-       
-       VECCOPY(to, temp);
+       to[0] = vectorA[1] * vectorB[2] - vectorA[2] * vectorB[1];
+       to[1] = vectorA[2] * vectorB[0] - vectorA[0] * vectorB[2];
+       to[2] = vectorA[0] * vectorB[1] - vectorA[1] * vectorB[0];
 }
-
 /* simple v^T * v product ("outer product") */
 /* STATUS: HAS TO BE verified (*should* work) */
-DO_INLINE void mul_fvectorT_fvector(float to[3][4], float vectorA[3], float vectorB[3])
+DO_INLINE void mul_fvectorT_fvector(float to[3][3], float vectorA[3], float vectorB[3])
 {
        mul_fvector_S(to[0], vectorB, vectorA[0]);
        mul_fvector_S(to[1], vectorB, vectorA[1]);
@@ -185,13 +144,16 @@ DO_INLINE void mul_fvectorT_fvector(float to[3][4], float vectorA[3], float vect
 }
 /* simple v^T * v product with scalar ("outer product") */
 /* STATUS: HAS TO BE verified (*should* work) */
-DO_INLINE void mul_fvectorT_fvectorS(float to[3][4], float vectorA[3], float vectorB[3], float aS)
-{
-       mul_fvector_S(to[0], vectorB, vectorA[0]* aS);
-       mul_fvector_S(to[1], vectorB, vectorA[1]* aS);
-       mul_fvector_S(to[2], vectorB, vectorA[2]* aS);
+DO_INLINE void mul_fvectorT_fvectorS(float to[3][3], float vectorA[3], float vectorB[3], float aS)
+{      
+       mul_fvectorT_fvector(to, vectorA, vectorB);
+       
+       mul_fvector_S(to[0], to[0], aS);
+       mul_fvector_S(to[1], to[1], aS);
+       mul_fvector_S(to[2], to[2], aS);
 }
 
+
 /* printf vector[3] on console: for debug output */
 void print_fvector(float m3[3])
 {
@@ -202,7 +164,7 @@ void print_fvector(float m3[3])
 // long float vector float (*)[3]
 ///////////////////////////
 /* print long vector on console: for debug output */
-DO_INLINE void print_lfvector(lfVector *fLongVector, unsigned int verts)
+DO_INLINE void print_lfvector(float (*fLongVector)[3], unsigned int verts)
 {
        unsigned int i = 0;
        for(i = 0; i < verts; i++)
@@ -218,7 +180,7 @@ DO_INLINE lfVector *create_lfvector(unsigned int verts)
        // return (lfVector *)cloth_aligned_malloc(&MEMORY_BASE, verts * sizeof(lfVector));
 }
 /* delete long vector */
-DO_INLINE void del_lfvector(lfVector *fLongVector)
+DO_INLINE void del_lfvector(float (*fLongVector)[3])
 {
        if (fLongVector != NULL)
        {
@@ -227,12 +189,12 @@ DO_INLINE void del_lfvector(lfVector *fLongVector)
        }
 }
 /* copy long vector */
-DO_INLINE void cp_lfvector(lfVector *to, lfVector *from, unsigned int verts)
+DO_INLINE void cp_lfvector(float (*to)[3], float (*from)[3], unsigned int verts)
 {
        memcpy(to, from, verts * sizeof(lfVector));
 }
 /* init long vector with float[3] */
-DO_INLINE void init_lfvector(lfVector *fLongVector, float vector[3], unsigned int verts)
+DO_INLINE void init_lfvector(float (*fLongVector)[3], float vector[3], unsigned int verts)
 {
        unsigned int i = 0;
        for(i = 0; i < verts; i++)
@@ -241,12 +203,12 @@ DO_INLINE void init_lfvector(lfVector *fLongVector, float vector[3], unsigned in
        }
 }
 /* zero long vector with float[3] */
-DO_INLINE void zero_lfvector(lfVector *to, unsigned int verts)
+DO_INLINE void zero_lfvector(float (*to)[3], unsigned int verts)
 {
        memset(to, 0.0f, verts * sizeof(lfVector));
 }
 /* multiply long vector with scalar*/
-DO_INLINE void mul_lfvectorS(lfVector *to, lfVector *fLongVector, float scalar, unsigned int verts)
+DO_INLINE void mul_lfvectorS(float (*to)[3], float (*fLongVector)[3], float scalar, unsigned int verts)
 {
        unsigned int i = 0;
 
@@ -257,7 +219,7 @@ DO_INLINE void mul_lfvectorS(lfVector *to, lfVector *fLongVector, float scalar,
 }
 /* multiply long vector with scalar*/
 /* A -= B * float */
-DO_INLINE void submul_lfvectorS(lfVector *to, lfVector *fLongVector, float scalar, unsigned int verts)
+DO_INLINE void submul_lfvectorS(float (*to)[3], float (*fLongVector)[3], float scalar, unsigned int verts)
 {
        unsigned int i = 0;
        for(i = 0; i < verts; i++)
@@ -266,20 +228,20 @@ DO_INLINE void submul_lfvectorS(lfVector *to, lfVector *fLongVector, float scala
        }
 }
 /* dot product for big vector */
-DO_INLINE float dot_lfvector(lfVector *fLongVectorA, lfVector *fLongVectorB, unsigned int verts)
+DO_INLINE float dot_lfvector(float (*fLongVectorA)[3], float (*fLongVectorB)[3], unsigned int verts)
 {
-       unsigned int i = 0;
+       long i = 0;
        float temp = 0.0;
 // schedule(guided, 2)
-#pragma omp parallel for reduction(+: temp) schedule(static)
-       for(i = 0; i < verts; i++)
+#pragma omp parallel for reduction(+: temp)
+       for(i = 0; i < (long)verts; i++)
        {
                temp += INPR(fLongVectorA[i], fLongVectorB[i]);
        }
        return temp;
 }
 /* A = B + C  --> for big vector */
-DO_INLINE void add_lfvector_lfvector(lfVector *to, lfVector *fLongVectorA, lfVector *fLongVectorB, unsigned int verts)
+DO_INLINE void add_lfvector_lfvector(float (*to)[3], float (*fLongVectorA)[3], float (*fLongVectorB)[3], unsigned int verts)
 {
        unsigned int i = 0;
 
@@ -290,7 +252,7 @@ DO_INLINE void add_lfvector_lfvector(lfVector *to, lfVector *fLongVectorA, lfVec
 
 }
 /* A = B + C * float --> for big vector */
-DO_INLINE void add_lfvector_lfvectorS(lfVector *to, lfVector *fLongVectorA, lfVector *fLongVectorB, float bS, unsigned int verts)
+DO_INLINE void add_lfvector_lfvectorS(float (*to)[3], float (*fLongVectorA)[3], float (*fLongVectorB)[3], float bS, unsigned int verts)
 {
        unsigned int i = 0;
 
@@ -301,7 +263,7 @@ DO_INLINE void add_lfvector_lfvectorS(lfVector *to, lfVector *fLongVectorA, lfVe
        }
 }
 /* A = B * float + C * float --> for big vector */
-DO_INLINE void add_lfvectorS_lfvectorS(lfVector *to, lfVector *fLongVectorA, float aS, lfVector *fLongVectorB, float bS, unsigned int verts)
+DO_INLINE void add_lfvectorS_lfvectorS(float (*to)[3], float (*fLongVectorA)[3], float aS, float (*fLongVectorB)[3], float bS, unsigned int verts)
 {
        unsigned int i = 0;
 
@@ -311,7 +273,7 @@ DO_INLINE void add_lfvectorS_lfvectorS(lfVector *to, lfVector *fLongVectorA, flo
        }
 }
 /* A = B - C * float --> for big vector */
-DO_INLINE void sub_lfvector_lfvectorS(lfVector *to, lfVector *fLongVectorA, lfVector *fLongVectorB, float bS, unsigned int verts)
+DO_INLINE void sub_lfvector_lfvectorS(float (*to)[3], float (*fLongVectorA)[3], float (*fLongVectorB)[3], float bS, unsigned int verts)
 {
        unsigned int i = 0;
        for(i = 0; i < verts; i++)
@@ -321,7 +283,7 @@ DO_INLINE void sub_lfvector_lfvectorS(lfVector *to, lfVector *fLongVectorA, lfVe
 
 }
 /* A = B - C --> for big vector */
-DO_INLINE void sub_lfvector_lfvector(lfVector *to, lfVector *fLongVectorA, lfVector *fLongVectorB, unsigned int verts)
+DO_INLINE void sub_lfvector_lfvector(float (*to)[3], float (*fLongVectorA)[3], float (*fLongVectorB)[3], unsigned int verts)
 {
        unsigned int i = 0;
 
@@ -335,29 +297,40 @@ DO_INLINE void sub_lfvector_lfvector(lfVector *to, lfVector *fLongVectorA, lfVec
 // 3x3 matrix
 ///////////////////////////
 /* printf 3x3 matrix on console: for debug output */
-void print_fmatrix(float m3[3][4])
+void print_fmatrix(float m3[3][3])
 {
        printf("%f\t%f\t%f\n",m3[0][0],m3[0][1],m3[0][2]);
        printf("%f\t%f\t%f\n",m3[1][0],m3[1][1],m3[1][2]);
        printf("%f\t%f\t%f\n\n",m3[2][0],m3[2][1],m3[2][2]);
 }
+
 /* copy 3x3 matrix */
-DO_INLINE void cp_fmatrix(float to[3][4], float from[3][4])
+DO_INLINE void cp_fmatrix(float to[3][3], float from[3][3])
 {
-       memcpy(to, from, sizeof (float) * 12);
-       /*
+       // memcpy(to, from, sizeof (float) * 9);
        VECCOPY(to[0], from[0]);
        VECCOPY(to[1], from[1]);
        VECCOPY(to[2], from[2]);
-       */
 }
+
+/* copy 3x3 matrix */
+DO_INLINE void initdiag_fmatrixS(float to[3][3], float aS)
+{
+       cp_fmatrix(to, ZERO);
+       
+       to[0][0] = aS;
+       to[1][1] = aS;
+       to[2][2] = aS;
+}
+
 /* calculate determinant of 3x3 matrix */
-DO_INLINE float det_fmatrix(float m[3][4])
+DO_INLINE float det_fmatrix(float m[3][3])
 {
        return  m[0][0]*m[1][1]*m[2][2] + m[1][0]*m[2][1]*m[0][2] + m[0][1]*m[1][2]*m[2][0] 
-       -m[0][0]*m[1][2]*m[2][1] - m[0][1]*m[1][0]*m[2][2] - m[2][0]*m[1][1]*m[0][2];
+                       -m[0][0]*m[1][2]*m[2][1] - m[0][1]*m[1][0]*m[2][2] - m[2][0]*m[1][1]*m[0][2];
 }
-DO_INLINE void inverse_fmatrix(float to[3][4], float from[3][4])
+
+DO_INLINE void inverse_fmatrix(float to[3][3], float from[3][3])
 {
        unsigned int i, j;
        float d;
@@ -390,7 +363,7 @@ DO_INLINE void inverse_fmatrix(float to[3][4], float from[3][4])
 
 /* 3x3 matrix multiplied by a scalar */
 /* STATUS: verified */
-DO_INLINE void mul_fmatrix_S(float matrix[3][4], float scalar)
+DO_INLINE void mul_fmatrix_S(float matrix[3][3], float scalar)
 {
        mul_fvector_S(matrix[0], matrix[0],scalar);
        mul_fvector_S(matrix[1], matrix[1],scalar);
@@ -399,99 +372,73 @@ DO_INLINE void mul_fmatrix_S(float matrix[3][4], float scalar)
 
 /* a vector multiplied by a 3x3 matrix */
 /* STATUS: verified */
-DO_INLINE void mul_fvector_fmatrix(float *to, float *from, float matrix[3][4])
+DO_INLINE void mul_fvector_fmatrix(float *to, float *from, float matrix[3][3])
 {
-       float temp[3];
-       
-       VECCOPY(temp, from);
-       
-       to[0] = matrix[0][0]*temp[0] + matrix[1][0]*temp[1] + matrix[2][0]*temp[2];
-       to[1] = matrix[0][1]*temp[0] + matrix[1][1]*temp[1] + matrix[2][1]*temp[2];
-       to[2] = matrix[0][2]*temp[0] + matrix[1][2]*temp[1] + matrix[2][2]*temp[2];
+       to[0] = matrix[0][0]*from[0] + matrix[1][0]*from[1] + matrix[2][0]*from[2];
+       to[1] = matrix[0][1]*from[0] + matrix[1][1]*from[1] + matrix[2][1]*from[2];
+       to[2] = matrix[0][2]*from[0] + matrix[1][2]*from[1] + matrix[2][2]*from[2];
 }
 
 /* 3x3 matrix multiplied by a vector */
 /* STATUS: verified */
-#ifdef __SSE3__
-DO_INLINE void mul_fmatrix_fvector(float *to, float matrix[3][4], float *from) {
-       __m128 v1, v2, v3, v4;
-       
-       v1 = _mm_load_ps(&matrix[0][0]);
-       v2 = _mm_load_ps(&matrix[1][0]);
-       v3 = _mm_load_ps(&matrix[2][0]);
-       v4 = _mm_load_ps(from);
-
-       // stuff
-       v1 = _mm_mul_ps(v1, v4);
-       v2 = _mm_mul_ps(v2, v4);
-       v3 = _mm_mul_ps(v3, v4);
-       v1 = _mm_hadd_ps(v1, v2);
-       v3 = _mm_hadd_ps(v3, _mm_setzero_ps());
-       v4 = _mm_hadd_ps(v1, v3);
-       
-       _mm_store_ps(to, v4);
-}
-#else
-DO_INLINE void mul_fmatrix_fvector(float *to, float matrix[3][4], float *from)
+DO_INLINE void mul_fmatrix_fvector(float *to, float matrix[3][3], float *from)
 {
        to[0] = INPR(matrix[0],from);
        to[1] = INPR(matrix[1],from);
        to[2] = INPR(matrix[2],from);
 }
-#endif
-               
 /* 3x3 matrix multiplied by a 3x3 matrix */
 /* STATUS: verified */
-DO_INLINE void mul_fmatrix_fmatrix(float to[3][4], float matrixA[3][4], float matrixB[3][4])
+DO_INLINE void mul_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
 {
        mul_fvector_fmatrix(to[0], matrixA[0],matrixB);
        mul_fvector_fmatrix(to[1], matrixA[1],matrixB);
        mul_fvector_fmatrix(to[2], matrixA[2],matrixB);
 }
 /* 3x3 matrix addition with 3x3 matrix */
-DO_INLINE void add_fmatrix_fmatrix(float to[3][4], float matrixA[3][4], float matrixB[3][4])
+DO_INLINE void add_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
 {
        VECADD(to[0], matrixA[0], matrixB[0]);
        VECADD(to[1], matrixA[1], matrixB[1]);
        VECADD(to[2], matrixA[2], matrixB[2]);
 }
 /* 3x3 matrix add-addition with 3x3 matrix */
-DO_INLINE void addadd_fmatrix_fmatrix(float to[3][4], float matrixA[3][4], float matrixB[3][4])
+DO_INLINE void addadd_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
 {
        VECADDADD(to[0], matrixA[0], matrixB[0]);
        VECADDADD(to[1], matrixA[1], matrixB[1]);
        VECADDADD(to[2], matrixA[2], matrixB[2]);
 }
 /* 3x3 matrix sub-addition with 3x3 matrix */
-DO_INLINE void addsub_fmatrixS_fmatrixS(float to[3][4], float matrixA[3][4], float aS, float matrixB[3][4], float bS)
+DO_INLINE void addsub_fmatrixS_fmatrixS(float to[3][3], float matrixA[3][3], float aS, float matrixB[3][3], float bS)
 {
        VECADDSUBSS(to[0], matrixA[0], aS, matrixB[0], bS);
        VECADDSUBSS(to[1], matrixA[1], aS, matrixB[1], bS);
        VECADDSUBSS(to[2], matrixA[2], aS, matrixB[2], bS);
 }
 /* A -= B + C (3x3 matrix sub-addition with 3x3 matrix) */
-DO_INLINE void subadd_fmatrix_fmatrix(float to[3][4], float matrixA[3][4], float matrixB[3][4])
+DO_INLINE void subadd_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
 {
        VECSUBADD(to[0], matrixA[0], matrixB[0]);
        VECSUBADD(to[1], matrixA[1], matrixB[1]);
        VECSUBADD(to[2], matrixA[2], matrixB[2]);
 }
 /* A -= B*x + C*y (3x3 matrix sub-addition with 3x3 matrix) */
-DO_INLINE void subadd_fmatrixS_fmatrixS(float to[3][4], float matrixA[3][4], float aS, float matrixB[3][4], float bS)
+DO_INLINE void subadd_fmatrixS_fmatrixS(float to[3][3], float matrixA[3][3], float aS, float matrixB[3][3], float bS)
 {
        VECSUBADDSS(to[0], matrixA[0], aS, matrixB[0], bS);
        VECSUBADDSS(to[1], matrixA[1], aS, matrixB[1], bS);
        VECSUBADDSS(to[2], matrixA[2], aS, matrixB[2], bS);
 }
 /* A = B - C (3x3 matrix subtraction with 3x3 matrix) */
-DO_INLINE void sub_fmatrix_fmatrix(float to[3][4], float matrixA[3][4], float matrixB[3][4])
+DO_INLINE void sub_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
 {
        VECSUB(to[0], matrixA[0], matrixB[0]);
        VECSUB(to[1], matrixA[1], matrixB[1]);
        VECSUB(to[2], matrixA[2], matrixB[2]);
 }
 /* A += B - C (3x3 matrix add-subtraction with 3x3 matrix) */
-DO_INLINE void addsub_fmatrix_fmatrix(float to[3][4], float matrixA[3][4], float matrixB[3][4])
+DO_INLINE void addsub_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
 {
        VECADDSUB(to[0], matrixA[0], matrixB[0]);
        VECADDSUB(to[1], matrixA[1], matrixB[1]);
@@ -501,88 +448,48 @@ DO_INLINE void addsub_fmatrix_fmatrix(float to[3][4], float matrixA[3][4], float
 // special functions
 /////////////////////////////////////////////////////////////////
 /* a vector multiplied and added to/by a 3x3 matrix */
-/*
 DO_INLINE void muladd_fvector_fmatrix(float to[3], float from[3], float matrix[3][3])
 {
        to[0] += matrix[0][0]*from[0] + matrix[1][0]*from[1] + matrix[2][0]*from[2];
        to[1] += matrix[0][1]*from[0] + matrix[1][1]*from[1] + matrix[2][1]*from[2];
        to[2] += matrix[0][2]*from[0] + matrix[1][2]*from[1] + matrix[2][2]*from[2];
 }
-*/
 /* 3x3 matrix multiplied and added  to/by a 3x3 matrix  and added to another 3x3 matrix */
-/*
 DO_INLINE void muladd_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
 {
        muladd_fvector_fmatrix(to[0], matrixA[0],matrixB);
        muladd_fvector_fmatrix(to[1], matrixA[1],matrixB);
        muladd_fvector_fmatrix(to[2], matrixA[2],matrixB);
 }
-*/
 /* a vector multiplied and sub'd to/by a 3x3 matrix */
-/*
 DO_INLINE void mulsub_fvector_fmatrix(float to[3], float from[3], float matrix[3][3])
 {
        to[0] -= matrix[0][0]*from[0] + matrix[1][0]*from[1] + matrix[2][0]*from[2];
        to[1] -= matrix[0][1]*from[0] + matrix[1][1]*from[1] + matrix[2][1]*from[2];
        to[2] -= matrix[0][2]*from[0] + matrix[1][2]*from[1] + matrix[2][2]*from[2];
 }
-*/
 /* 3x3 matrix multiplied and sub'd  to/by a 3x3 matrix  and added to another 3x3 matrix */
-/*
 DO_INLINE void mulsub_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
 {
        mulsub_fvector_fmatrix(to[0], matrixA[0],matrixB);
        mulsub_fvector_fmatrix(to[1], matrixA[1],matrixB);
        mulsub_fvector_fmatrix(to[2], matrixA[2],matrixB);
 }
-*/
 /* 3x3 matrix multiplied+added by a vector */
 /* STATUS: verified */
-
-#ifdef __SSE3__
-DO_INLINE void muladd_fmatrix_fvector(float to[3], float matrix[3][4], float from[3]) {
-       __m128 v1, v2, v3, v4;
-       
-       v1 = _mm_load_ps(&matrix[0][0]);
-       v2 = _mm_load_ps(&matrix[1][0]);
-       v3 = _mm_load_ps(&matrix[2][0]);
-       v4 = _mm_load_ps(from);
-
-       // stuff
-       v1 = _mm_mul_ps(v1, v4);
-       v2 = _mm_mul_ps(v2, v4);
-       v3 = _mm_mul_ps(v3, v4);
-       v1 = _mm_hadd_ps(v1, v2);
-       v3 = _mm_hadd_ps(v3, _mm_setzero_ps());
-       v1 = _mm_hadd_ps(v1, v3);
-       
-       v4 = _mm_load_ps(to);
-       v4 = _mm_add_ps(v4,v1);
-
-       _mm_store_ps(to, v4);
-}
-#else
-DO_INLINE void muladd_fmatrix_fvector(float to[3], float matrix[3][4], float from[3])
+DO_INLINE void muladd_fmatrix_fvector(float to[3], float matrix[3][3], float from[3])
 {
-       float temp[3] = { 0,0,0 };
-       
-       temp[0] = INPR(matrix[0],from);
-       temp[1] = INPR(matrix[1],from);
-       temp[2] = INPR(matrix[2],from); 
-       
-       VECADD(to, to, temp);
+       to[0] += INPR(matrix[0],from);
+       to[1] += INPR(matrix[1],from);
+       to[2] += INPR(matrix[2],from);  
 }
-#endif
-
 /* 3x3 matrix multiplied+sub'ed by a vector */
-/*
 DO_INLINE void mulsub_fmatrix_fvector(float to[3], float matrix[3][3], float from[3])
 {
        to[0] -= INPR(matrix[0],from);
        to[1] -= INPR(matrix[1],from);
        to[2] -= INPR(matrix[2],from);
 }
-*/
 /////////////////////////////////////////////////////////////////
 
 ///////////////////////////
@@ -615,18 +522,32 @@ DO_INLINE void del_bfmatrix(fmatrix3x3 *matrix)
                MEM_freeN (matrix);
        }
 }
+
 /* copy big matrix */
 DO_INLINE void cp_bfmatrix(fmatrix3x3 *to, fmatrix3x3 *from)
 {      
        // TODO bounds checking 
        memcpy(to, from, sizeof(fmatrix3x3) * (from[0].vcount+from[0].scount) );
 }
+
+/* init big matrix */
+// slow in parallel
+DO_INLINE void init_bfmatrix(fmatrix3x3 *matrix, float m3[3][3])
+{
+       unsigned int i;
+
+       for(i = 0; i < matrix[0].vcount+matrix[0].scount; i++)
+       {               
+               cp_fmatrix(matrix[i].m, m3); 
+       }
+}
+
 /* init the diagonal of big matrix */
 // slow in parallel
-DO_INLINE void initdiag_bfmatrix(fmatrix3x3 *matrix, float m3[3][4])
+DO_INLINE void initdiag_bfmatrix(fmatrix3x3 *matrix, float m3[3][3])
 {
        unsigned int i,j;
-       float tmatrix[3][4] = {{0,0,0,0},{0,0,0,0},{0,0,0,0}};
+       float tmatrix[3][3] = {{0,0,0},{0,0,0},{0,0,0}};
 
        for(i = 0; i < matrix[0].vcount; i++)
        {               
@@ -637,16 +558,7 @@ DO_INLINE void initdiag_bfmatrix(fmatrix3x3 *matrix, float m3[3][4])
                cp_fmatrix(matrix[j].m, tmatrix); 
        }
 }
-/* init big matrix */
-DO_INLINE void init_bfmatrix(fmatrix3x3 *matrix, float m3[3][4])
-{
-       unsigned int i;
 
-       for(i = 0; i < matrix[0].vcount+matrix[0].scount; i++)
-       {
-               cp_fmatrix(matrix[i].m, m3); 
-       }
-}
 /* multiply big matrix with scalar*/
 DO_INLINE void mul_bfmatrix_S(fmatrix3x3 *matrix, float scalar)
 {
@@ -656,30 +568,49 @@ DO_INLINE void mul_bfmatrix_S(fmatrix3x3 *matrix, float scalar)
                mul_fmatrix_S(matrix[i].m, scalar);
        }
 }
+
 /* SPARSE SYMMETRIC multiply big matrix with long vector*/
 /* STATUS: verified */
-void mul_bfmatrix_lfvector( lfVector *to, fmatrix3x3 *from, lfVector *fLongVector)
+DO_INLINE void mul_bfmatrix_lfvector( float (*to)[3], fmatrix3x3 *from, lfVector *fLongVector)
 {
        unsigned int i = 0;
-       float *tflongvector;
-       float temp[4]={0,0,0,0};
+       lfVector *temp = create_lfvector(from[0].vcount);
        
        zero_lfvector(to, from[0].vcount);
-       
-       /* process diagonal elements */ 
-       for(i = 0; i < from[0].vcount; i++)
+
+#pragma omp parallel sections private(i)
        {
-               mul_fmatrix_fvector(to[from[i].r], from[i].m, fLongVector[from[i].c]);
+#pragma omp section
+               {
+                       for(i = from[0].vcount; i < from[0].vcount+from[0].scount; i++)
+                       {
+                               muladd_fmatrix_fvector(to[from[i].c], from[i].m, fLongVector[from[i].r]);
+                       }
+               }       
+#pragma omp section
+               {
+                       for(i = 0; i < from[0].vcount+from[0].scount; i++)
+                       {
+                               muladd_fmatrix_fvector(temp[from[i].r], from[i].m, fLongVector[from[i].c]);
+                       }
+               }
        }
+       add_lfvector_lfvector(to, to, temp, from[0].vcount);
+       
+       del_lfvector(temp);
+       
+       
+}
 
-       /* process off-diagonal entries (every off-diagonal entry needs to be symmetric) */
-       // TODO: pragma below is wrong, correct it!
-// #pragma omp parallel for shared(to,from, fLongVector) private(i) 
+/* SPARSE SYMMETRIC multiply big matrix with long vector (for diagonal preconditioner) */
+/* STATUS: verified */
+DO_INLINE void mul_prevfmatrix_lfvector( float (*to)[3], fmatrix3x3 *from, lfVector *fLongVector)
+{
+       unsigned int i = 0;
        
-       for(i = from[0].vcount; i < from[0].vcount+from[0].scount; i++)
+       for(i = 0; i < from[0].vcount; i++)
        {
-               muladd_fmatrix_fvector(to[from[i].c], from[i].m, fLongVector[from[i].r]);
-               muladd_fmatrix_fvector(to[from[i].r], from[i].m, fLongVector[from[i].c]);
+               mul_fmatrix_fvector(to[from[i].r], from[i].m, fLongVector[from[i].c]);
        }
 }
 
@@ -773,12 +704,10 @@ DO_INLINE void subadd_bfmatrixS_bfmatrixS( fmatrix3x3 *to, fmatrix3x3 *from, flo
 ///////////////////////////////////////////////////////////////////
 // simulator start
 ///////////////////////////////////////////////////////////////////
-static float I[3][4] = {{1,0,0,0},{0,1,0,0},{0,0,1,0}};
-static float ZERO[3][4] = {{0,0,0,0},{0,0,0,0},{0,0,0,0}};
 typedef struct Implicit_Data 
 {
-       lfVector *X, *V, *Xnew, *Vnew, *F, *B, *dV, *z;
-       fmatrix3x3 *A, *dFdV, *dFdX, *S, *P, *Pinv, *bigI; 
+       lfVector *X, *V, *Xnew, *Vnew, *olddV, *F, *B, *dV, *z;
+       fmatrix3x3 *A, *dFdV, *dFdX, *S, *P, *Pinv, *bigI, *M
 } Implicit_Data;
 
 int implicit_init (Object *ob, ClothModifierData *clmd)
@@ -790,6 +719,9 @@ int implicit_init (Object *ob, ClothModifierData *clmd)
        ClothSpring *spring = NULL;
        Implicit_Data *id = NULL;
        LinkNode *search = NULL;
+       
+       if(G.rt > 0)
+               printf("implicit_init\n");
 
        // init memory guard
        // MEMORY_BASE.first = MEMORY_BASE.last = NULL;
@@ -809,32 +741,36 @@ int implicit_init (Object *ob, ClothModifierData *clmd)
        id->Pinv = create_bfmatrix(cloth->numverts, cloth->numsprings);
        id->P = create_bfmatrix(cloth->numverts, cloth->numsprings);
        id->bigI = create_bfmatrix(cloth->numverts, cloth->numsprings); // TODO 0 springs
+       id->M = create_bfmatrix(cloth->numverts, cloth->numsprings);
        id->X = create_lfvector(cloth->numverts);
        id->Xnew = create_lfvector(cloth->numverts);
        id->V = create_lfvector(cloth->numverts);
        id->Vnew = create_lfvector(cloth->numverts);
+       id->olddV = create_lfvector(cloth->numverts);
+       zero_lfvector(id->olddV, cloth->numverts);
        id->F = create_lfvector(cloth->numverts);
        id->B = create_lfvector(cloth->numverts);
        id->dV = create_lfvector(cloth->numverts);
-       zero_lfvector(id->dV, cloth->numverts);
        id->z = create_lfvector(cloth->numverts);
        
        for(i=0;i<cloth->numverts;i++) 
        {
-               id->A[i].r = id->A[i].c = id->dFdV[i].r = id->dFdV[i].c = id->dFdX[i].r = id->dFdX[i].c = id->P[i].c = id->P[i].r = id->Pinv[i].c = id->Pinv[i].r = id->bigI[i].c = id->bigI[i].r = i;
+               id->A[i].r = id->A[i].c = id->dFdV[i].r = id->dFdV[i].c = id->dFdX[i].r = id->dFdX[i].c = id->P[i].c = id->P[i].r = id->Pinv[i].c = id->Pinv[i].r = id->bigI[i].c = id->bigI[i].r = id->M[i].r = id->M[i].c = i;
 
-               if(verts [i].goal >= SOFTGOALSNAP)
+               if(verts [i].flags & CLOTH_VERT_FLAG_PINNED)
                {
                        id->S[pinned].pinned = 1;
                        id->S[pinned].c = id->S[pinned].r = i;
                        pinned++;
                }
+               
+               initdiag_fmatrixS(id->M[i].m, verts[i].mass);
        }
 
        // S is special and needs specific vcount and scount
        id->S[0].vcount = pinned; id->S[0].scount = 0;
 
-       // init springs */
+       // init springs 
        search = cloth->springs;
        for(i=0;i<cloth->numsprings;i++) 
        {
@@ -842,20 +778,22 @@ int implicit_init (Object *ob, ClothModifierData *clmd)
                
                // dFdV_start[i].r = big_I[i].r = big_zero[i].r = 
                id->A[i+cloth->numverts].r = id->dFdV[i+cloth->numverts].r = id->dFdX[i+cloth->numverts].r = 
-                               id->P[i+cloth->numverts].r = id->Pinv[i+cloth->numverts].r = id->bigI[i+cloth->numverts].r = spring->ij;
+                               id->P[i+cloth->numverts].r = id->Pinv[i+cloth->numverts].r = id->bigI[i+cloth->numverts].r = id->M[i+cloth->numverts].r = spring->ij;
 
                // dFdV_start[i].c = big_I[i].c = big_zero[i].c = 
                id->A[i+cloth->numverts].c = id->dFdV[i+cloth->numverts].c = id->dFdX[i+cloth->numverts].c = 
-                       id->P[i+cloth->numverts].c = id->Pinv[i+cloth->numverts].c = id->bigI[i+cloth->numverts].c = spring->kl;
+                               id->P[i+cloth->numverts].c = id->Pinv[i+cloth->numverts].c = id->bigI[i+cloth->numverts].c = id->M[i+cloth->numverts].c = spring->kl;
 
                spring->matrix_index = i + cloth->numverts;
                
                search = search->next;
        }
+       
+       initdiag_bfmatrix(id->bigI, I);
 
        for(i = 0; i < cloth->numverts; i++)
        {               
-               VECCOPY(id->X[i], cloth->x[i]);
+               VECCOPY(id->X[i], verts[i].x);
        }
 
        return 1;
@@ -879,11 +817,13 @@ int       implicit_free (ClothModifierData *clmd)
                        del_bfmatrix(id->P);
                        del_bfmatrix(id->Pinv);
                        del_bfmatrix(id->bigI);
+                       del_bfmatrix(id->M);
 
                        del_lfvector(id->X);
                        del_lfvector(id->Xnew);
                        del_lfvector(id->V);
                        del_lfvector(id->Vnew);
+                       del_lfvector(id->olddV);
                        del_lfvector(id->F);
                        del_lfvector(id->B);
                        del_lfvector(id->dV);
@@ -896,31 +836,6 @@ int        implicit_free (ClothModifierData *clmd)
        return 1;
 }
 
-void cloth_bending_mode(ClothModifierData *clmd, int enabled)
-{
-       Cloth *cloth = clmd->clothObject;
-       Implicit_Data *id;
-       
-       if(cloth)
-       {
-               id = cloth->implicit;
-               
-               if(id)
-               {
-                       if(enabled)
-                       {
-                               cloth->numsprings = cloth->numspringssave;
-                       }
-                       else
-                       {
-                               cloth->numsprings = cloth->numothersprings;
-                       }
-                       
-                       id->A[0].scount = id->dFdV[0].scount = id->dFdX[0].scount = id->P[0].scount = id->Pinv[0].scount = id->bigI[0].scount = cloth->numsprings;
-               }       
-       }
-}
-
 DO_INLINE float fb(float length, float L)
 {
        float x = length/L;
@@ -939,13 +854,14 @@ DO_INLINE float fbstar(float length, float L, float kb, float cb)
        float tempfb = kb * fb(length, L);
 
        float fbstar = cb * (length - L);
-
+       
        if(tempfb < fbstar)
                return fbstar;
        else
                return tempfb;          
 }
 
+// function to calculae bending spring force (taken from Choi & Co)
 DO_INLINE float fbstar_jacobi(float length, float L, float kb, float cb)
 {
        float tempfb = kb * fb(length, L);
@@ -983,22 +899,24 @@ int  cg_filtered(lfVector *ldV, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fmatr
        d = create_lfvector(numverts);
        tmp = create_lfvector(numverts);
        r = create_lfvector(numverts);
-       
-       // zero_lfvector(ldV, numverts);
+
+       // zero_lfvector(ldV, CLOTHPARTICLES);
        filter(ldV, S);
+
        add_lfvector_lfvector(ldV, ldV, z, numverts);
 
        // r = B - Mul(tmp,A,X);    // just use B if X known to be zero
-       mul_bfmatrix_lfvector(r, lA, ldV);
-       sub_lfvector_lfvector(r, lB, r, numverts);
-       filter(r, S);
+       cp_lfvector(r, lB, numverts);
+       mul_bfmatrix_lfvector(tmp, lA, ldV);
+       sub_lfvector_lfvector(r, r, tmp, numverts);
+
+       filter(r,S);
 
        cp_lfvector(d, r, numverts);
 
        s = dot_lfvector(r, r, numverts);
-       starget = s * conjgrad_epsilon;
-       
-       // itstart();
+       starget = s * sqrt(conjgrad_epsilon);
+
        while((s>starget && conjgrad_loopcount < conjgrad_looplimit))
        {       
                // Mul(q,A,d); // q = A*d;
@@ -1024,16 +942,12 @@ int  cg_filtered(lfVector *ldV, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fmatr
 
                conjgrad_loopcount++;
        }
-       // itend();
-       // printf("cg_filtered time: %f\n", (float)itval());
-       
        conjgrad_lasterror = s;
 
        del_lfvector(q);
        del_lfvector(d);
        del_lfvector(tmp);
        del_lfvector(r);
-       
        // printf("W/O conjgrad_loopcount: %d\n", conjgrad_loopcount);
 
        return conjgrad_loopcount<conjgrad_looplimit;  // true means we reached desired accuracy in given time - ie stable
@@ -1054,7 +968,7 @@ DO_INLINE void BuildPPinv(fmatrix3x3 *lA, fmatrix3x3 *P, fmatrix3x3 *Pinv)
                
        }
 }
-
+/*
 // version 1.3
 int cg_filtered_pre(lfVector *dv, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fmatrix3x3 *S, fmatrix3x3 *P, fmatrix3x3 *Pinv)
 {
@@ -1075,14 +989,117 @@ int cg_filtered_pre(lfVector *dv, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fma
        sub_lfvector_lfvector(r, lB, r, numverts);
        filter(r, S);
        
-       mul_bfmatrix_lfvector(p, Pinv, r);
+       mul_prevfmatrix_lfvector(p, Pinv, r);
+       filter(p, S);
+       
+       deltaNew = dot_lfvector(r, p, numverts);
+       
+       delta0 = deltaNew * sqrt(conjgrad_epsilon);
+       
+       // itstart();
+       
+       while ((deltaNew > delta0) && (iterations < conjgrad_looplimit))
+       {
+               iterations++;
+               
+               mul_bfmatrix_lfvector(s, lA, p);
+               filter(s, S);
+               
+               alpha = deltaNew / dot_lfvector(p, s, numverts);
+               
+               add_lfvector_lfvectorS(dv, dv, p, alpha, numverts);
+               
+               add_lfvector_lfvectorS(r, r, s, -alpha, numverts);
+               
+               mul_prevfmatrix_lfvector(h, Pinv, r);
+               filter(h, S);
+               
+               deltaOld = deltaNew;
+               
+               deltaNew = dot_lfvector(r, h, numverts);
+               
+               add_lfvector_lfvectorS(p, h, p, deltaNew / deltaOld, numverts);
+               
+               filter(p, S);
+               
+       }
+       
+       // itend();
+       // printf("cg_filtered_pre time: %f\n", (float)itval());
+       
+       del_lfvector(h);
+       del_lfvector(s);
+       del_lfvector(p);
+       del_lfvector(r);
+       
+       printf("iterations: %d\n", iterations);
+       
+       return iterations<conjgrad_looplimit;
+}
+*/
+// version 1.4
+int cg_filtered_pre(lfVector *dv, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fmatrix3x3 *S, fmatrix3x3 *P, fmatrix3x3 *Pinv, fmatrix3x3 *bigI)
+{
+       unsigned int numverts = lA[0].vcount, iterations = 0, conjgrad_looplimit=100;
+       float delta0 = 0, deltaNew = 0, deltaOld = 0, alpha = 0, tol = 0;
+       lfVector *r = create_lfvector(numverts);
+       lfVector *p = create_lfvector(numverts);
+       lfVector *s = create_lfvector(numverts);
+       lfVector *h = create_lfvector(numverts);
+       lfVector *bhat = create_lfvector(numverts);
+       lfVector *btemp = create_lfvector(numverts);
+       
+       BuildPPinv(lA, P, Pinv);
+       
+       initdiag_bfmatrix(bigI, I);
+       sub_bfmatrix_Smatrix(bigI, bigI, S);
+       
+       // x = Sx_0+(I-S)z
+       filter(dv, S);
+       add_lfvector_lfvector(dv, dv, z, numverts);
+       
+       // b_hat = S(b-A(I-S)z)
+       mul_bfmatrix_lfvector(r, lA, z);
+       mul_bfmatrix_lfvector(bhat, bigI, r);
+       sub_lfvector_lfvector(bhat, lB, bhat, numverts);
+       
+       // r = S(b-Ax)
+       mul_bfmatrix_lfvector(r, lA, dv);
+       sub_lfvector_lfvector(r, lB, r, numverts);
+       filter(r, S);
+       
+       // p = SP^-1r
+       mul_prevfmatrix_lfvector(p, Pinv, r);
+       filter(p, S);
+       
+       // delta0 = bhat^TP^-1bhat
+       mul_prevfmatrix_lfvector(btemp, Pinv, bhat);
+       delta0 = dot_lfvector(bhat, btemp, numverts);
+       
+       // deltaNew = r^TP
+       deltaNew = dot_lfvector(r, p, numverts);
+       
+       /*
+       filter(dv, S);
+       add_lfvector_lfvector(dv, dv, z, numverts);
+       
+       mul_bfmatrix_lfvector(r, lA, dv);
+       sub_lfvector_lfvector(r, lB, r, numverts);
+       filter(r, S);
+       
+       mul_prevfmatrix_lfvector(p, Pinv, r);
        filter(p, S);
        
-       deltaNew = delta0 = dot_lfvector(r, p, numverts);
+       deltaNew = dot_lfvector(r, p, numverts);
+       
+       delta0 = deltaNew * sqrt(conjgrad_epsilon);
+       */
        
        // itstart();
        
-       while ((deltaNew > (conjgrad_epsilon*delta0)) && (iterations < conjgrad_looplimit))
+       tol = (0.01*0.2);
+       
+       while ((deltaNew > delta0*tol*tol) && (iterations < conjgrad_looplimit))
        {
                iterations++;
                
@@ -1093,9 +1110,9 @@ int cg_filtered_pre(lfVector *dv, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fma
                
                add_lfvector_lfvectorS(dv, dv, p, alpha, numverts);
                
-               sub_lfvector_lfvectorS(r, r, s, alpha, numverts);
+               add_lfvector_lfvectorS(r, r, s, -alpha, numverts);
                
-               mul_bfmatrix_lfvector(h, Pinv, r);
+               mul_prevfmatrix_lfvector(h, Pinv, r);
                filter(h, S);
                
                deltaOld = deltaNew;
@@ -1103,6 +1120,7 @@ int cg_filtered_pre(lfVector *dv, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fma
                deltaNew = dot_lfvector(r, h, numverts);
                
                add_lfvector_lfvectorS(p, h, p, deltaNew / deltaOld, numverts);
+               
                filter(p, S);
                
        }
@@ -1110,6 +1128,8 @@ int cg_filtered_pre(lfVector *dv, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fma
        // itend();
        // printf("cg_filtered_pre time: %f\n", (float)itval());
        
+       del_lfvector(btemp);
+       del_lfvector(bhat);
        del_lfvector(h);
        del_lfvector(s);
        del_lfvector(p);
@@ -1121,32 +1141,43 @@ int cg_filtered_pre(lfVector *dv, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fma
 }
 
 // outer product is NOT cross product!!!
-DO_INLINE void dfdx_spring_type1(float to[3][4], float dir[3],float length,float L,float k)
+DO_INLINE void dfdx_spring_type1(float to[3][3], float extent[3], float length, float L, float dot, float k)
 {
        // dir is unit length direction, rest is spring's restlength, k is spring constant.
        // return  (outerprod(dir,dir)*k + (I - outerprod(dir,dir))*(k - ((k*L)/length)));
-       float temp[3][4];
+       float temp[3][3];
+       float temp1 = k*(1.0 - (L/length));     
+       
+       mul_fvectorT_fvectorS(temp, extent, extent, 1.0 / dot);
+       sub_fmatrix_fmatrix(to, I, temp);
+       mul_fmatrix_S(to, temp1);
+       
+       mul_fvectorT_fvectorS(temp, extent, extent, k/ dot);
+       add_fmatrix_fmatrix(to, to, temp);
+       
+       /*
        mul_fvectorT_fvector(temp, dir, dir);
        sub_fmatrix_fmatrix(to, I, temp);
        mul_fmatrix_S(to, k* (1.0f-(L/length)));
        mul_fmatrix_S(temp, k);
        add_fmatrix_fmatrix(to, temp, to);
+       */
 }
 
-DO_INLINE void dfdx_spring_type2(float to[3][4], float dir[3],float length,float L,float k, float cb)
+DO_INLINE void dfdx_spring_type2(float to[3][3], float dir[3], float length, float L, float k, float cb)
 {
        // return  outerprod(dir,dir)*fbstar_jacobi(length, L, k, cb);
        mul_fvectorT_fvectorS(to, dir, dir, fbstar_jacobi(length, L, k, cb));
 }
 
-DO_INLINE void dfdv_damp(float to[3][4], float dir[3], float damping)
+DO_INLINE void dfdv_damp(float to[3][3], float dir[3], float damping)
 {
        // derivative of force wrt velocity.  
-       // return outerprod(dir,dir) * damping;
        mul_fvectorT_fvectorS(to, dir, dir, damping);
+       
 }
 
-DO_INLINE void dfdx_spring(float to[3][4],  float dir[3],float length,float L,float k)
+DO_INLINE void dfdx_spring(float to[3][3],  float dir[3],float length,float L,float k)
 {
        // dir is unit length direction, rest is spring's restlength, k is spring constant.
        //return  ( (I-outerprod(dir,dir))*Min(1.0f,rest/length) - I) * -k;
@@ -1157,7 +1188,8 @@ DO_INLINE void dfdx_spring(float to[3][4],  float dir[3],float length,float L,fl
        mul_fmatrix_S(to, -k);
 }
 
-DO_INLINE void dfdx_damp(float to[3][4],  float dir[3],float length,const float vel[3],float rest,float damping)
+// unused atm
+DO_INLINE void dfdx_damp(float to[3][3],  float dir[3],float length,const float vel[3],float rest,float damping)
 {
        // inner spring damping   vel is the relative velocity  of the endpoints.  
        //      return (I-outerprod(dir,dir)) * (-damping * -(dot(dir,vel)/Max(length,rest)));
@@ -1167,12 +1199,14 @@ DO_INLINE void dfdx_damp(float to[3][4],  float dir[3],float length,const float
 
 }
 
-DO_INLINE void cloth_calc_spring_force(ClothModifierData *clmd, ClothSpring *s, lfVector *lF, lfVector *X, lfVector *V, fmatrix3x3 *dFdV, fmatrix3x3 *dFdX, float dt)
+DO_INLINE void cloth_calc_spring_force(ClothModifierData *clmd, ClothSpring *s, lfVector *lF, lfVector *X, lfVector *V, fmatrix3x3 *dFdV, fmatrix3x3 *dFdX, float time)
 {
+       Cloth *cloth = clmd->clothObject;
+       ClothVertex *verts = cloth->verts;
        float extent[3];
-       float length = 0;
+       float length = 0, dot = 0;
        float dir[3] = {0,0,0};
-       float vel[3] = {0,0,0};
+       float vel[3];
        float k = 0.0f;
        float L = s->restlen;
        float cb = clmd->sim_parms->structural;
@@ -1181,7 +1215,9 @@ DO_INLINE void cloth_calc_spring_force(ClothModifierData *clmd, ClothSpring *s,
        float stretch_force[3] = {0,0,0};
        float bending_force[3] = {0,0,0};
        float damping_force[3] = {0,0,0};
-       float nulldfdx[3][4]={ {0,0,0,0}, {0,0,0,0}, {0,0,0,0}};
+       float nulldfdx[3][3]={ {0,0,0}, {0,0,0}, {0,0,0}};
+       
+       float scaling = 0.0;
        
        VECCOPY(s->f, nullf);
        cp_fmatrix(s->dfdx, nulldfdx);
@@ -1190,22 +1226,23 @@ DO_INLINE void cloth_calc_spring_force(ClothModifierData *clmd, ClothSpring *s,
        // calculate elonglation
        VECSUB(extent, X[s->kl], X[s->ij]);
        VECSUB(vel, V[s->kl], V[s->ij]);
-       length = sqrt(INPR(extent, extent));
+       dot = INPR(extent, extent);
+       length = sqrt(dot);
        
        s->flags &= ~CLOTH_SPRING_FLAG_NEEDED;
        
-       if(length > ABS(ALMOST_ZERO))
+       if(length > ALMOST_ZERO)
        {
                /*
                if(length>L)
                {
-                       if((clmd->sim_parms->flags & CSIMSETT_FLAG_TEARING_ENABLED) 
-                       && ((((length-L)*100.0f/L) > clmd->sim_parms->maxspringlen))) // cut spring!
-                       {
-                               s->flags |= CSPRING_FLAG_DEACTIVATE;
-                               return;
-                       }
-               
+               if((clmd->sim_parms->flags & CSIMSETT_FLAG_TEARING_ENABLED) 
+               && ((((length-L)*100.0f/L) > clmd->sim_parms->maxspringlen))) // cut spring!
+               {
+               s->flags |= CSPRING_FLAG_DEACTIVATE;
+               return;
+       }
+       } 
                */
                mul_fvector_S(dir, extent, 1.0f/length);
        }
@@ -1214,140 +1251,129 @@ DO_INLINE void cloth_calc_spring_force(ClothModifierData *clmd, ClothSpring *s,
                mul_fvector_S(dir, extent, 0.0f);
        }
        
-       
        // calculate force of structural + shear springs
-       if(s->type != CLOTH_SPRING_TYPE_BENDING)
+       if((s->type & CLOTH_SPRING_TYPE_STRUCTURAL) || (s->type & CLOTH_SPRING_TYPE_SHEAR))
        {
                if(length > L) // only on elonglation
                {
                        s->flags |= CLOTH_SPRING_FLAG_NEEDED;
-
-                       k = (clmd->sim_parms->structural*(length-L));   
-
-                       mul_fvector_S(stretch_force, dir, k); 
+                       
+                       k = clmd->sim_parms->structural;
+                               
+                       scaling = k + s->stiffness * ABS(clmd->sim_parms->max_struct-k);
+                       
+                       k = scaling / (clmd->sim_parms->avg_spring_len + FLT_EPSILON);
+                       
+                       // TODO: verify, half verified (couldn't see error)
+                       mul_fvector_S(stretch_force, dir, k*(length-L)); 
 
                        VECADD(s->f, s->f, stretch_force);
 
                        // Ascher & Boxman, p.21: Damping only during elonglation
-                       mul_fvector_S(damping_force, extent, clmd->sim_parms->Cdis * 0.01 * ((INPR(vel,extent)/length))); 
+                       // something wrong with it...
+                       mul_fvector_S(damping_force, dir, clmd->sim_parms->Cdis * INPR(vel,dir));
                        VECADD(s->f, s->f, damping_force);
                        
-                       // Formula from Ascher / Boxman, Speeding up cloth simulation
-                       // if((dt * (k*dt + 2 * clmd->sim_parms->Cdis * 0.01)) > 0.01 )
-                       {
-                               dfdx_spring_type1(s->dfdx, dir,length,L,clmd->sim_parms->structural);
-                               dfdv_damp(s->dfdv, dir,clmd->sim_parms->Cdis * 0.01);
-                       }
-                       // printf("(dt*k*dt) ): %f, k: %f\n", (dt * (k*dt + 2 * clmd->sim_parms->Cdis * 0.01) ), k);
+                       /* VERIFIED */
+                       dfdx_spring(s->dfdx, dir, length, L, k);
+                       
+                       /* VERIFIED */
+                       dfdv_damp(s->dfdv, dir, clmd->sim_parms->Cdis);
+                       
                }
        }
+       else if(s->type & CLOTH_SPRING_TYPE_GOAL)
+       {
+               float tvect[3];
+               
+               s->flags |= CLOTH_SPRING_FLAG_NEEDED;
+               
+               // current_position = xold + t * (newposition - xold)
+               VECSUB(tvect, verts[s->ij].xconst, verts[s->ij].xold);
+               mul_fvector_S(tvect, tvect, time);
+               VECADD(tvect, tvect, verts[s->ij].xold);
+
+               VECSUB(extent, X[s->ij], tvect);
+               
+               dot = INPR(extent, extent);
+               length = sqrt(dot);
+               
+               k = clmd->sim_parms->goalspring;
+               
+               scaling = k + s->stiffness * ABS(clmd->sim_parms->max_struct-k);
+                       
+               k = verts [s->ij].goal * scaling / (clmd->sim_parms->avg_spring_len + FLT_EPSILON);
+               
+               VECADDS(s->f, s->f, extent, -k);
+               
+               mul_fvector_S(damping_force, dir, clmd->sim_parms->goalfrict * 0.01 * INPR(vel,dir));
+               VECADD(s->f, s->f, damping_force);
+               
+               // HERE IS THE PROBLEM!!!!
+               // dfdx_spring(s->dfdx, dir, length, 0.0, k);
+               // dfdv_damp(s->dfdv, dir, MIN2(1.0, (clmd->sim_parms->goalfrict/100.0)));
+       }
        else // calculate force of bending springs
        {
                if(length < L)
                {
-                       // clmd->sim_parms->flags |= CLOTH_SIMSETTINGS_FLAG_BIG_FORCE;
-                       
                        s->flags |= CLOTH_SPRING_FLAG_NEEDED;
                        
-                       k = fbstar(length, L, clmd->sim_parms->bending, cb);    
+                       k = clmd->sim_parms->bending;   
+                       
+                       scaling = k + s->stiffness * ABS(clmd->sim_parms->max_bend-k);                  
+                       cb = k = scaling / (20.0*(clmd->sim_parms->avg_spring_len + FLT_EPSILON));
 
-                       mul_fvector_S(bending_force, dir, k);
+                       mul_fvector_S(bending_force, dir, fbstar(length, L, k, cb));
                        VECADD(s->f, s->f, bending_force);
-                       
-                       // DG: My formula to handle bending for the AIMEX scheme 
-                       // multiply with 1000 because of numerical problems
-                       // if( ((k*1000)*dt*dt) < -0.18 )
-                       {
-                               dfdx_spring_type2(s->dfdx, dir,length,L,clmd->sim_parms->bending, cb);
-                               clmd->sim_parms->flags |= CLOTH_SIMSETTINGS_FLAG_BIG_FORCE;
-                       }
-                       // printf("(dt*k*dt) ): %f, k: %f\n", (dt*dt*k*-1.0), k);
+
+                       dfdx_spring_type2(s->dfdx, dir, length,L, k, cb);
                }
        }
 }
 
-DO_INLINE int cloth_apply_spring_force(ClothModifierData *clmd, ClothSpring *s, lfVector *lF, lfVector *X, lfVector *V, fmatrix3x3 *dFdV, fmatrix3x3 *dFdX)
+DO_INLINE void cloth_apply_spring_force(ClothModifierData *clmd, ClothSpring *s, lfVector *lF, lfVector *X, lfVector *V, fmatrix3x3 *dFdV, fmatrix3x3 *dFdX)
 {
        if(s->flags & CLOTH_SPRING_FLAG_NEEDED)
        {
-               VECADD(lF[s->ij], lF[s->ij], s->f);
-               VECSUB(lF[s->kl], lF[s->kl], s->f);     
-                       
-               if(s->type != CLOTH_SPRING_TYPE_BENDING)
+               if(!(s->type & CLOTH_SPRING_TYPE_BENDING))
                {
                        sub_fmatrix_fmatrix(dFdV[s->ij].m, dFdV[s->ij].m, s->dfdv);
                        sub_fmatrix_fmatrix(dFdV[s->kl].m, dFdV[s->kl].m, s->dfdv);
                        add_fmatrix_fmatrix(dFdV[s->matrix_index].m, dFdV[s->matrix_index].m, s->dfdv); 
                }
-               else if(!(clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_BIG_FORCE))
-                       return 0;
+
+               VECADD(lF[s->ij], lF[s->ij], s->f);
+               
+               if(!(s->type & CLOTH_SPRING_TYPE_GOAL))
+                       VECSUB(lF[s->kl], lF[s->kl], s->f);
                
-               sub_fmatrix_fmatrix(dFdX[s->ij].m, dFdX[s->ij].m, s->dfdx);
                sub_fmatrix_fmatrix(dFdX[s->kl].m, dFdX[s->kl].m, s->dfdx);
-
+               sub_fmatrix_fmatrix(dFdX[s->ij].m, dFdX[s->ij].m, s->dfdx);
                add_fmatrix_fmatrix(dFdX[s->matrix_index].m, dFdX[s->matrix_index].m, s->dfdx);
-       }
-       
-       return 1;
-}
-
-DO_INLINE void calculateTriangleNormal(float to[3], lfVector *X, MFace mface)
-{
-       float v1[3], v2[3];
-
-       VECSUB(v1, X[mface.v2], X[mface.v1]);
-       VECSUB(v2, X[mface.v3], X[mface.v1]);
-       cross_fvector(to, v1, v2);
-}
-
-DO_INLINE void calculatQuadNormal(float to[3], lfVector *X, MFace mface)
-{
-       float temp = CalcNormFloat4(X[mface.v1],X[mface.v2],X[mface.v3],X[mface.v4],to);
-       mul_fvector_S(to, to, temp);
+       }       
 }
 
-void calculateWeightedVertexNormal(ClothModifierData *clmd, MFace *mfaces, float to[3], int index, lfVector *X)
-{
-       float temp[3]; 
-       int i;
-       Cloth *cloth = clmd->clothObject;
-
-       for(i = 0; i < cloth->numfaces; i++)
-       {
-               // check if this triangle contains the selected vertex
-               if(mfaces[i].v1 == index || mfaces[i].v2 == index || mfaces[i].v3 == index || mfaces[i].v4 == index)
-               {
-                       calculatQuadNormal(temp, X, mfaces[i]);
-                       VECADD(to, to, temp);
-               }
-       }
-}
 float calculateVertexWindForce(float wind[3], float vertexnormal[3])  
 {
-       return fabs(INPR(wind, vertexnormal) * 0.5f);
-}
-
-DO_INLINE void calc_triangle_force(ClothModifierData *clmd, MFace mface, lfVector *F, lfVector *X, lfVector *V, fmatrix3x3 *dFdV, fmatrix3x3 *dFdX, ListBase *effectors)
-{      
-
+       return fabs(INPR(wind, vertexnormal));
 }
 
-void cloth_calc_force(ClothModifierData *clmd, lfVector *lF, lfVector *lX, lfVector *lV, fmatrix3x3 *dFdV, fmatrix3x3 *dFdX, ListBase *effectors, float time, float dt)
+void cloth_calc_force(ClothModifierData *clmd, lfVector *lF, lfVector *lX, lfVector *lV, fmatrix3x3 *dFdV, fmatrix3x3 *dFdX, ListBase *effectors, float time, fmatrix3x3 *M)
 {
        /* Collect forces and derivatives:  F,dFdX,dFdV */
        Cloth           *cloth          = clmd->clothObject;
-       unsigned int    i               = 0;
+       long            i               = 0;
        float           spring_air      = clmd->sim_parms->Cvi * 0.01f; /* viscosity of air scaled in percent */
        float           gravity[3];
-       float           tm2[3][4]       = {{-spring_air,0,0,0}, {0,-spring_air,0,0},{0,0,-spring_air,0}};
-       ClothVertex *verts = cloth->verts;
+       float           tm2[3][3]       = {{-spring_air,0,0}, {0,-spring_air,0},{0,0,-spring_air}};
        MFace           *mfaces         = cloth->mfaces;
+       //ClothVertex   *verts          = cloth->verts;
        float wind_normalized[3];
        unsigned int numverts = cloth->numverts;
-       float auxvect[3], velgoal[3], tvect[3];
-       float kd, ks;
        LinkNode *search = cloth->springs;
 
+
        VECCOPY(gravity, clmd->sim_parms->gravity);
        mul_fvector_S(gravity, gravity, 0.001f); /* scale gravity force */
 
@@ -1357,108 +1383,119 @@ void cloth_calc_force(ClothModifierData *clmd, lfVector *lF, lfVector *lX, lfVec
        initdiag_bfmatrix(dFdV, tm2);
 
        init_lfvector(lF, gravity, numverts);
-
-       submul_lfvectorS(lF, lV, spring_air, numverts);
-
-       /* do goal stuff */
-       if(clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) 
-       {       
-               for(i = 0; i < numverts; i++)
-               {                       
-                       if(verts [i].goal < SOFTGOALSNAP)
-                       {                       
-                               // current_position = xold + t * (newposition - xold)
-                               VECSUB(tvect, cloth->xconst[i], cloth->xold[i]);
-                               mul_fvector_S(tvect, tvect, time);
-                               VECADD(tvect, tvect, cloth->xold[i]);
-
-                               VECSUB(auxvect, tvect, lX[i]);
-                               ks  = 1.0f/(1.0f- verts [i].goal*clmd->sim_parms->goalspring)-1.0f ;
-                               VECADDS(lF[i], lF[i], auxvect, -ks);
-
-                               // calulate damping forces generated by goals                           
-                               VECSUB(velgoal, cloth->xold[i], cloth->xconst[i]);
-                               kd =  clmd->sim_parms->goalfrict * 0.01f; // friction force scale taken from SB
-                               VECSUBADDSS(lF[i], velgoal, kd, lV[i], kd);
-                               
-                       }
-               }       
+       
+       /* multiply lF with mass matrix
+       // force = mass * acceleration (in this case: gravity)
+       */
+       for(i = 0; i < (long)numverts; i++)
+       {
+               float temp[3];
+               VECCOPY(temp, lF[i]);
+               mul_fmatrix_fvector(lF[i], M[i].m, temp);
        }
 
+       submul_lfvectorS(lF, lV, spring_air, numverts);
+       
        /* handle external forces like wind */
        if(effectors)
-       {
-               float speed[3] = {0.0f, 0.0f,0.0f};
-               float force[3]= {0.0f, 0.0f, 0.0f};
-               
-               #pragma omp parallel for private (i) shared(lF)
-               for(i = 0; i < cloth->numverts; i++)
+       {       
+               for(i = 0; i < cloth->numfaces; i++)
                {
                        float vertexnormal[3]={0,0,0};
-                       float fieldfactor = 1000.0f, windfactor  = 250.0f; // from sb
+                       float speed[3] = {0.0f, 0.0f,0.0f};
+                       float force[3]= {0.0f, 0.0f, 0.0f};
                        
-                       pdDoEffectors(effectors, lX[i], force, speed, (float)G.scene->r.cfra, 0.0f, PE_WIND_AS_SPEED);          
+                       if(mfaces[i].v4)
+                               CalcNormFloat4(lX[mfaces[i].v1],lX[mfaces[i].v2],lX[mfaces[i].v3],lX[mfaces[i].v4],vertexnormal);
+                       else
+                               CalcNormFloat(lX[mfaces[i].v1],lX[mfaces[i].v2],lX[mfaces[i].v3],vertexnormal);
                        
-                       // TODO apply forcefields here
-                       VECADDS(lF[i], lF[i], force, fieldfactor*0.01f);
-
+                       pdDoEffectors(effectors, lX[mfaces[i].v1], force, speed, (float)G.scene->r.cfra, 0.0f, PE_WIND_AS_SPEED);
                        VECCOPY(wind_normalized, speed);
                        Normalize(wind_normalized);
+                       VecMulf(wind_normalized, -calculateVertexWindForce(speed, vertexnormal));
+                       
+                       if(mfaces[i].v4)
+                       {
+                               VECADDS(lF[mfaces[i].v1], lF[mfaces[i].v1], wind_normalized, 0.25);
+                       }
+                       else
+                       {
+                               VECADDS(lF[mfaces[i].v1], lF[mfaces[i].v1], wind_normalized, 1.0 / 3.0);
+                       }
+                       
+                       speed[0] = speed[1] = speed[2] = 0.0;
+                       pdDoEffectors(effectors, lX[mfaces[i].v2], force, speed, (float)G.scene->r.cfra, 0.0f, PE_WIND_AS_SPEED);
+                       VECCOPY(wind_normalized, speed);
+                       Normalize(wind_normalized);
+                       VecMulf(wind_normalized, -calculateVertexWindForce(speed, vertexnormal));
+                       if(mfaces[i].v4)
+                       {
+                               VECADDS(lF[mfaces[i].v2], lF[mfaces[i].v2], wind_normalized, 0.25);
+                       }
+                       else
+                       {
+                               VECADDS(lF[mfaces[i].v2], lF[mfaces[i].v2], wind_normalized, 1.0 / 3.0);
+                       }
+                       
+                       speed[0] = speed[1] = speed[2] = 0.0;
+                       pdDoEffectors(effectors, lX[mfaces[i].v3], force, speed, (float)G.scene->r.cfra, 0.0f, PE_WIND_AS_SPEED);
+                       VECCOPY(wind_normalized, speed);
+                       Normalize(wind_normalized);
+                       VecMulf(wind_normalized, -calculateVertexWindForce(speed, vertexnormal));
+                       if(mfaces[i].v4)
+                       {
+                               VECADDS(lF[mfaces[i].v3], lF[mfaces[i].v3], wind_normalized, 0.25);
+                       }
+                       else
+                       {
+                               VECADDS(lF[mfaces[i].v3], lF[mfaces[i].v3], wind_normalized, 1.0 / 3.0);
+                       }
+                       
+                       speed[0] = speed[1] = speed[2] = 0.0;
+                       if(mfaces[i].v4)
+                       {
+                               pdDoEffectors(effectors, lX[mfaces[i].v4], force, speed, (float)G.scene->r.cfra, 0.0f, PE_WIND_AS_SPEED);
+                               VECCOPY(wind_normalized, speed);
+                               Normalize(wind_normalized);
+                               VecMulf(wind_normalized, -calculateVertexWindForce(speed, vertexnormal));
+                               VECADDS(lF[mfaces[i].v4], lF[mfaces[i].v4], wind_normalized, 0.25);
+                       }
                        
-                       calculateWeightedVertexNormal(clmd, mfaces, vertexnormal, i, lX);
-                       VECADDS(lF[i], lF[i], wind_normalized, -calculateVertexWindForce(speed, vertexnormal));
                }
        }
-       
+               
        // calculate spring forces
        search = cloth->springs;
        while(search)
        {
                // only handle active springs
                // if(((clmd->sim_parms->flags & CSIMSETT_FLAG_TEARING_ENABLED) && !(springs[i].flags & CSPRING_FLAG_DEACTIVATE))|| !(clmd->sim_parms->flags & CSIMSETT_FLAG_TEARING_ENABLED)){}
-               cloth_calc_spring_force(clmd, search->link, lF, lX, lV, dFdV, dFdX, dt);
+               cloth_calc_spring_force(clmd, search->link, lF, lX, lV, dFdV, dFdX, time);
 
                search = search->next;
        }
        
-       if(clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_BIG_FORCE)
-       {       
-               if(cloth->numspringssave != cloth->numsprings)
-               {
-                       cloth_bending_mode(clmd, 1);
-               }
-       }
-       else
-       {
-               if(cloth->numspringssave == cloth->numsprings)
-               {
-                       cloth_bending_mode(clmd, 0);
-               }
-       }
-       
        // apply spring forces
        search = cloth->springs;
        while(search)
        {
                // only handle active springs
                // if(((clmd->sim_parms->flags & CSIMSETT_FLAG_TEARING_ENABLED) && !(springs[i].flags & CSPRING_FLAG_DEACTIVATE))|| !(clmd->sim_parms->flags & CSIMSETT_FLAG_TEARING_ENABLED))  
-               if(!cloth_apply_spring_force(clmd, search->link, lF, lX, lV, dFdV, dFdX))
-                       break;
+               cloth_apply_spring_force(clmd, search->link, lF, lX, lV, dFdV, dFdX);
                search = search->next;
        }
-       
-       clmd->sim_parms->flags &= ~CLOTH_SIMSETTINGS_FLAG_BIG_FORCE;
-       
+       // printf("\n");
 }
 
-
-void simulate_implicit_euler(lfVector *Vnew, lfVector *lX, lfVector *lV, lfVector *lF, fmatrix3x3 *dFdV, fmatrix3x3 *dFdX, float dt, fmatrix3x3 *A, lfVector *B, lfVector *dV, fmatrix3x3 *S, lfVector *z, fmatrix3x3 *P, fmatrix3x3 *Pinv)
+void simulate_implicit_euler(lfVector *Vnew, lfVector *lX, lfVector *lV, lfVector *lF, fmatrix3x3 *dFdV, fmatrix3x3 *dFdX, float dt, fmatrix3x3 *A, lfVector *B, lfVector *dV, fmatrix3x3 *S, lfVector *z, lfVector *olddV, fmatrix3x3 *P, fmatrix3x3 *Pinv, fmatrix3x3 *M, fmatrix3x3 *bigI)
 {
        unsigned int numverts = dFdV[0].vcount;
 
        lfVector *dFdXmV = create_lfvector(numverts);
+       zero_lfvector(dV, numverts);
        
-       initdiag_bfmatrix(A, I);
+       cp_bfmatrix(A, M);
        
        subadd_bfmatrixS_bfmatrixS(A, dFdV, dt, dFdX, (dt*dt));
 
@@ -1466,150 +1503,133 @@ void simulate_implicit_euler(lfVector *Vnew, lfVector *lX, lfVector *lV, lfVecto
 
        add_lfvectorS_lfvectorS(B, lF, dt, dFdXmV, (dt*dt), numverts);
        
-       // cg_filtered(dV, A, B, z, S); // conjugate gradient algorithm to solve Ax=b 
-       cg_filtered_pre(dV, A, B, z, S, P, Pinv);
-       
-       // advance velocities
-       add_lfvector_lfvector(Vnew, lV, dV, numverts);
+       itstart();
        
-       del_lfvector(dFdXmV);
-}
-
-/*
-// this version solves for the new velocity
-void simulate_implicit_euler(lfVector *Vnew, lfVector *lX, lfVector *lV, lfVector *lF, fmatrix3x3 *dFdV, fmatrix3x3 *dFdX, float dt, fmatrix3x3 *A, lfVector *B, lfVector *dV, fmatrix3x3 *S, lfVector *z, fmatrix3x3 *P, fmatrix3x3 *Pinv)
-{
-       unsigned int numverts = dFdV[0].vcount;
-
-       lfVector *dFdXmV = create_lfvector(numverts);
+       cg_filtered(dV, A, B, z, S); /* conjugate gradient algorithm to solve Ax=b */
+       // cg_filtered_pre(dV, A, B, z, S, P, Pinv, bigI);
        
-       initdiag_bfmatrix(A, I);
+       itend();
+       // printf("cg_filtered calc time: %f\n", (float)itval());
        
-       subadd_bfmatrixS_bfmatrixS(A, dFdV, dt, dFdX, (dt*dt));
-
-       mul_bfmatrix_lfvector(dFdXmV, dFdV, lV);
-
-       add_lfvectorS_lfvectorS(B, lF, dt, dFdXmV, -dt, numverts);
-       add_lfvector_lfvector(B, B, lV, numverts);
+       cp_lfvector(olddV, dV, numverts);
 
-       cg_filtered_pre(Vnew, A, B, z, S, P, Pinv);
+       // advance velocities
+       add_lfvector_lfvector(Vnew, lV, dV, numverts);
        
+
        del_lfvector(dFdXmV);
 }
-*/
+
 int implicit_solver (Object *ob, float frame, ClothModifierData *clmd, ListBase *effectors)
 {              
        unsigned int i=0;
-       float step=0.0f, tf=1.0f;
+       float step=0.0f, tf=clmd->sim_parms->timescale;
        Cloth *cloth = clmd->clothObject;
        ClothVertex *verts = cloth->verts;
        unsigned int numverts = cloth->numverts;
-       float dt = 1.0f / clmd->sim_parms->stepsPerFrame;
+       float dt = clmd->sim_parms->timescale / clmd->sim_parms->stepsPerFrame;
        Implicit_Data *id = cloth->implicit;
        int result = 0;
-       float force = 0, lastforce = 0;
-       lfVector *dx;
        
        if(clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) /* do goal stuff */
        {
                for(i = 0; i < numverts; i++)
                {                       
                        // update velocities with constrained velocities from pinned verts
-                       if(verts [i].goal >= SOFTGOALSNAP)
+                       if(verts [i].flags & CLOTH_VERT_FLAG_PINNED)
                        {                       
-                               VECSUB(id->V[i], cloth->xconst[i], cloth->xold[i]);
-                               // VecMulf(id->V[i], 1.0 / dt);
+                               VECSUB(id->V[i], verts[i].xconst, verts[i].xold);
+                               // VecMulf(id->V[i], clmd->sim_parms->stepsPerFrame);
                        }
                }       
        }
-
+       
        while(step < tf)
-       {               
+       {       
+               // calculate forces
                effectors= pdInitEffectors(ob,NULL);
+               cloth_calc_force(clmd, id->F, id->X, id->V, id->dFdV, id->dFdX, effectors, step, id->M);
+               if(effectors) pdEndEffectors(effectors);
                
-               // calculate 
-               cloth_calc_force(clmd, id->F, id->X, id->V, id->dFdV, id->dFdX, effectors, step, dt );
+               // calculate new velocity
+               simulate_implicit_euler(id->Vnew, id->X, id->V, id->F, id->dFdV, id->dFdX, dt, id->A, id->B, id->dV, id->S, id->z, id->olddV, id->P, id->Pinv, id->M, id->bigI);
                
-               // check for sleeping
-               // if(!(clmd->coll_parms->flags & CLOTH_SIMSETTINGS_FLAG_SLEEP))
-               {
-                       simulate_implicit_euler(id->Vnew, id->X, id->V, id->F, id->dFdV, id->dFdX, dt, id->A, id->B, id->dV, id->S, id->z, id->P, id->Pinv);
+               // advance positions
+               add_lfvector_lfvectorS(id->Xnew, id->X, id->Vnew, dt, numverts);
                
-                       add_lfvector_lfvectorS(id->Xnew, id->X, id->Vnew, dt, numverts);
+               /* move pinned verts to correct position */
+               for(i = 0; i < numverts; i++)
+               {       
+                       if(clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) 
+                       {                       
+                               if(verts [i].flags & CLOTH_VERT_FLAG_PINNED)
+                               {                       
+                                       float tvect[3] = {.0,.0,.0};
+                                       VECSUB(tvect, verts[i].xconst, verts[i].xold);
+                                       mul_fvector_S(tvect, tvect, step+dt);
+                                       VECADD(tvect, tvect, verts[i].xold);
+                                       VECCOPY(id->Xnew[i], tvect);
+                               }       
+                       }
+                       
+                       VECCOPY(verts[i].txold, id->X[i]);
                }
                
-               dx = create_lfvector(numverts);
-               sub_lfvector_lfvector(dx, id->Xnew, id->X, numverts);
-               force = dot_lfvector(dx, dx, numverts);
-               del_lfvector(dx);
-               
-               /*
-               if((force < 0.00001) && (lastforce >= force))
-                       clmd->coll_parms->flags |= CLOTH_SIMSETTINGS_FLAG_SLEEP;
-               else if((lastforce*2 < force))
-               */
-                       clmd->coll_parms->flags &= ~CLOTH_SIMSETTINGS_FLAG_SLEEP;
-               
-               lastforce = force;
-               
-               if(clmd->coll_parms->flags & CLOTH_COLLISIONSETTINGS_FLAG_ENABLED)
+               if(clmd->coll_parms->flags & CLOTH_COLLSETTINGS_FLAG_ENABLED)
                {
                        // collisions 
                        // itstart();
                        
                        // update verts to current positions
                        for(i = 0; i < numverts; i++)
-                       {               
-                               if(clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) /* do goal stuff */
-                               {                       
-                                       if(verts [i].goal >= SOFTGOALSNAP)
-                                       {                       
-                                               float tvect[3] = {.0,.0,.0};
-                                               // VECSUB(tvect, id->Xnew[i], verts[i].xold);
-                                               mul_fvector_S(tvect, id->V[i], step+dt);
-                                               VECADD(tvect, tvect, cloth->xold[i]);
-                                               VECCOPY(id->Xnew[i], tvect);
-                                       }
-                                               
-                               }
+                       {       
+                               VECCOPY(verts[i].tx, id->Xnew[i]);
                                
-                               VECCOPY(cloth->current_x[i], id->Xnew[i]);
-                               VECSUB(cloth->current_v[i], cloth->current_x[i], cloth->current_xold[i]);
-                               VECCOPY(cloth->v[i], cloth->current_v[i]);
+                               VECSUB(verts[i].tv, verts[i].tx, verts[i].txold);
+                               VECCOPY(verts[i].v, verts[i].tv);
                        }
                        
                        // call collision function
-                       result = cloth_bvh_objcollision(clmd, step + dt, step, dt);
+                       // TODO: check if "step" or "step+dt" is correct - dg
+                       result = cloth_bvh_objcollision(ob, clmd, step, dt);
                        
-                       // copy corrected positions back to simulation
-                       if(result)
+                       // correct velocity again, just to be sure we had to change it due to adaptive collisions
+                       for(i = 0; i < numverts; i++)
                        {
-                               memcpy(cloth->current_xold, cloth->current_x, sizeof(lfVector) * numverts);
-                               memcpy(id->Xnew, cloth->current_x, sizeof(lfVector) * numverts);
-                               
-                               for(i = 0; i < numverts; i++)
-                               {       
-                                       VECCOPY(id->Vnew[i], cloth->current_v[i]);
-                                       VecMulf(id->Vnew[i], 1.0f / dt);
-                               }
+                               VECSUB(verts[i].tv, verts[i].tx, id->X[i]);
                        }
-                       else
-                       {
-                               memcpy(cloth->current_xold, id->Xnew, sizeof(lfVector) * numverts);
+                       
+                       // copy corrected positions back to simulation
+                       for(i = 0; i < numverts; i++)
+                       {               
+                               if(result)
+                               {
+                                       
+                                       if((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (verts [i].flags & CLOTH_VERT_FLAG_PINNED))
+                                               continue;
+                                       
+                                       VECCOPY(id->Xnew[i], verts[i].tx);
+                                       VECCOPY(id->Vnew[i], verts[i].tv);
+                                       VecMulf(id->Vnew[i], clmd->sim_parms->stepsPerFrame);
+                               }
                        }
                        
                        // X = Xnew;
                        cp_lfvector(id->X, id->Xnew, numverts);
                        
                        // if there were collisions, advance the velocity from v_n+1/2 to v_n+1
+                       
                        if(result)
                        {
                                // V = Vnew;
                                cp_lfvector(id->V, id->Vnew, numverts);
                                
                                // calculate 
-                               cloth_calc_force(clmd, id->F, id->X, id->V, id->dFdV, id->dFdX, effectors, step, dt);   
-                               simulate_implicit_euler(id->Vnew, id->X, id->V, id->F, id->dFdV, id->dFdX, dt / 2.0f, id->A, id->B, id->dV, id->S, id->z, id->P, id->Pinv);
+                               effectors= pdInitEffectors(ob,NULL);
+                               cloth_calc_force(clmd, id->F, id->X, id->V, id->dFdV, id->dFdX, effectors, step+dt, id->M);     
+                               if(effectors) pdEndEffectors(effectors);
+                               
+                               simulate_implicit_euler(id->Vnew, id->X, id->V, id->F, id->dFdV, id->dFdX, dt / 2.0f, id->A, id->B, id->dV, id->S, id->z, id->olddV, id->P, id->Pinv, id->M, id->bigI);
                        }
                        
                }
@@ -1626,814 +1646,41 @@ int implicit_solver (Object *ob, float frame, ClothModifierData *clmd, ListBase
                cp_lfvector(id->V, id->Vnew, numverts);
                
                step += dt;
-
-               if(effectors) pdEndEffectors(effectors);
+               
        }
-       
-       if(clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL)
-       {
-               for(i = 0; i < numverts; i++)
+
+       for(i = 0; i < numverts; i++)
+       {                               
+               if((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (verts [i].flags & CLOTH_VERT_FLAG_PINNED))
                {
-                       if(verts [i].goal < SOFTGOALSNAP)
-                       {
-                               VECCOPY(cloth->current_xold[i], id->X[i]);
-                               VECCOPY(cloth->x[i], id->X[i]);
-                       }
-                       else
-                       {
-                               VECCOPY(cloth->current_xold[i], cloth->xconst[i]);
-                               VECCOPY(cloth->x[i], cloth->xconst[i]);
-                       }
+                       VECCOPY(verts[i].txold, verts[i].xconst); // TODO: test --> should be .x 
+                       VECCOPY(verts[i].x, verts[i].xconst);
+                       VECCOPY(verts[i].v, id->V[i]);
                }
-       }
-       else
-       {
-               for(i = 0; i < numverts; i++)
+               else
                {
-                       VECCOPY(cloth->current_xold[i], id->X[i]);
-                       VECCOPY(cloth->x[i], id->X[i]);
+                       VECCOPY(verts[i].txold, id->X[i]);
+                       VECCOPY(verts[i].x, id->X[i]);
+                       VECCOPY(verts[i].v, id->V[i]);
                }
-               // memcpy(cloth->current_xold, id->X, sizeof(lfVector) * numverts);
-               // memcpy(cloth->x, id->X, sizeof(lfVector) * numverts);
        }
        
-       for(i = 0; i < numverts; i++)
-               VECCOPY(cloth->v[i], id->V[i]);
-       
-       // memcpy(cloth->v, id->V, sizeof(lfVector) * numverts);
-       
        return 1;
 }
 
 void implicit_set_positions (ClothModifierData *clmd)
 {              
        Cloth *cloth = clmd->clothObject;
-       unsigned int numverts = cloth->numverts, i = 0;
+       ClothVertex *verts = cloth->verts;
+       unsigned int numverts = cloth->numverts, i;
        Implicit_Data *id = cloth->implicit;
        
-       
        for(i = 0; i < numverts; i++)
-       {
-               VECCOPY(id->X[i], cloth->x[i]);
-               VECCOPY(id->V[i], cloth->v[i]);
+       {                               
+               VECCOPY(id->X[i], verts[i].x);
+               VECCOPY(id->V[i], verts[i].v);
        }
-       
-       // memcpy(id->X, cloth->x, sizeof(lfVector) * numverts);        
-       // memcpy(id->V, cloth->v, sizeof(lfVector) * numverts);        
-}
-
-
-int collisions_collision_response_static(ClothModifierData *clmd, ClothModifierData *coll_clmd)
-{
-       /*
-       unsigned int i = 0;
-       int result = 0;
-       LinkNode *search = NULL;
-       CollPair *collpair = NULL;
-       Cloth *cloth1, *cloth2;
-       float w1, w2, w3, u1, u2, u3;
-       float v1[3], v2[3], relativeVelocity[3];
-       float magrelVel;
-       
-       cloth1 = clmd->clothObject;
-       cloth2 = coll_clmd->clothObject;
-
-       // search = clmd->coll_parms->collision_list;
-       
-       while(search)
-       {
-       collpair = search->link;
-               
-               // compute barycentric coordinates for both collision points
-       collisions_compute_barycentric(collpair->pa,
-       cloth1->verts[collpair->ap1].txold,
-       cloth1->verts[collpair->ap2].txold,
-       cloth1->verts[collpair->ap3].txold, 
-       &w1, &w2, &w3);
-       
-       collisions_compute_barycentric(collpair->pb,
-       cloth2->verts[collpair->bp1].txold,
-       cloth2->verts[collpair->bp2].txold,
-       cloth2->verts[collpair->bp3].txold,
-       &u1, &u2, &u3);
-       
-               // Calculate relative "velocity".
-       interpolateOnTriangle(v1, cloth1->verts[collpair->ap1].tv, cloth1->verts[collpair->ap2].tv, cloth1->verts[collpair->ap3].tv, w1, w2, w3);
-               
-       interpolateOnTriangle(v2, cloth2->verts[collpair->bp1].tv, cloth2->verts[collpair->bp2].tv, cloth2->verts[collpair->bp3].tv, u1, u2, u3);
-               
-       VECSUB(relativeVelocity, v1, v2);
-                       
-               // Calculate the normal component of the relative velocity (actually only the magnitude - the direction is stored in 'normal').
-       magrelVel = INPR(relativeVelocity, collpair->normal);
-               
-               // printf("magrelVel: %f\n", magrelVel);
-                               
-               // Calculate masses of points.
-               
-               // If v_n_mag < 0 the edges are approaching each other.
-       if(magrelVel < -ALMOST_ZERO) 
-       {
-                       // Calculate Impulse magnitude to stop all motion in normal direction.
-                       // const double I_mag = v_n_mag / (1/m1 + 1/m2);
-       float magnitude_i = magrelVel / 2.0f; // TODO implement masses
-       float tangential[3], magtangent, magnormal, collvel[3];
-       float vrel_t_pre[3];
-       float vrel_t[3];
-       double impulse;
-       float epsilon = clmd->coll_parms->epsilon;
-       float overlap = (epsilon + ALMOST_ZERO-collpair->distance);
-                       
-                       // calculateFrictionImpulse(tangential, relativeVelocity, collpair->normal, magrelVel, clmd->coll_parms->friction*0.01, magrelVel);
-                       
-                       // magtangent = INPR(tangential, tangential);
-                       
-                       // Apply friction impulse.
-       if (magtangent < -ALMOST_ZERO) 
-       {
-                               
-                               // printf("friction applied: %f\n", magtangent);
-                               // TODO check original code 
-}
-                       
-
-       impulse = -2.0f * magrelVel / ( 1.0 + w1*w1 + w2*w2 + w3*w3);
-                       
-                       // printf("impulse: %f\n", impulse);
-                       
-                       // face A
-       VECADDMUL(cloth1->verts[collpair->ap1].impulse, collpair->normal, w1 * impulse); 
-       cloth1->verts[collpair->ap1].impulse_count++;
-                       
-       VECADDMUL(cloth1->verts[collpair->ap2].impulse, collpair->normal, w2 * impulse); 
-       cloth1->verts[collpair->ap2].impulse_count++;
-                       
-       VECADDMUL(cloth1->verts[collpair->ap3].impulse, collpair->normal, w3 * impulse); 
-       cloth1->verts[collpair->ap3].impulse_count++;
-                       
-                       // face B
-       VECADDMUL(cloth2->verts[collpair->bp1].impulse, collpair->normal, u1 * impulse); 
-       cloth2->verts[collpair->bp1].impulse_count++;
-                       
-       VECADDMUL(cloth2->verts[collpair->bp2].impulse, collpair->normal, u2 * impulse); 
-       cloth2->verts[collpair->bp2].impulse_count++;
-                       
-       VECADDMUL(cloth2->verts[collpair->bp3].impulse, collpair->normal, u3 * impulse); 
-       cloth2->verts[collpair->bp3].impulse_count++;
-                       
-                       
-       result = 1;     
-               
-                       // printf("magnitude_i: %f\n", magnitude_i); // negative before collision in my case
-                       
-                       // Apply the impulse and increase impulse counters.
-       
-                       
-}
-               
-       search = search->next;
-}
-       
-       
-       return result;
-       */
-       return 0;
-}
-
-
-int collisions_collision_response_moving_tris(ClothModifierData *clmd, ClothModifierData *coll_clmd)
-{
-       return 0;
-}
-
-
-int collisions_collision_response_moving_edges(ClothModifierData *clmd, ClothModifierData *coll_clmd)
-{
-       return 0;
-}
-
-void cloth_collision_static(ClothModifierData *clmd, LinkNode *collision_list)
-{
-       /*
-       CollPair *collpair = NULL;
-       Cloth *cloth1=NULL, *cloth2=NULL;
-       MFace *face1=NULL, *face2=NULL;
-       ClothVertex *verts1=NULL, *verts2=NULL;
-       double distance = 0;
-       float epsilon = clmd->coll_parms->epsilon;
-       unsigned int i = 0;
-
-       for(i = 0; i < 4; i++)
-       {
-       collpair = (CollPair *)MEM_callocN(sizeof(CollPair), "cloth coll pair");        
-               
-       cloth1 = clmd->clothObject;
-       cloth2 = coll_clmd->clothObject;
-               
-       verts1 = cloth1->verts;
-       verts2 = cloth2->verts;
-       
-       face1 = &(cloth1->mfaces[tree1->tri_index]);
-       face2 = &(cloth2->mfaces[tree2->tri_index]);
-               
-               // check all possible pairs of triangles
-       if(i == 0)
-       {
-       collpair->ap1 = face1->v1;
-       collpair->ap2 = face1->v2;
-       collpair->ap3 = face1->v3;
-                       
-       collpair->bp1 = face2->v1;
-       collpair->bp2 = face2->v2;
-       collpair->bp3 = face2->v3;
-                       
-}
-               
-       if(i == 1)
-       {
-       if(face1->v4)
-       {
-       collpair->ap1 = face1->v3;
-       collpair->ap2 = face1->v4;
-       collpair->ap3 = face1->v1;
-                               
-       collpair->bp1 = face2->v1;
-       collpair->bp2 = face2->v2;
-       collpair->bp3 = face2->v3;
-}
-       else
-       i++;
-}
-               
-       if(i == 2)
-       {
-       if(face2->v4)
-       {
-       collpair->ap1 = face1->v1;
-       collpair->ap2 = face1->v2;
-       collpair->ap3 = face1->v3;
-                               
-       collpair->bp1 = face2->v3;
-       collpair->bp2 = face2->v4;
-       collpair->bp3 = face2->v1;
-}
-       else
-       i+=2;
-}
-               
-       if(i == 3)
-       {
-       if((face1->v4)&&(face2->v4))
-       {
-       collpair->ap1 = face1->v3;
-       collpair->ap2 = face1->v4;
-       collpair->ap3 = face1->v1;
-                               
-       collpair->bp1 = face2->v3;
-       collpair->bp2 = face2->v4;
-       collpair->bp3 = face2->v1;
-}
-       else
-       i++;
-}
-       
-               
-       if(i < 4)
-       {
-                       // calc distance + normal       
-       distance = plNearestPoints(
-       verts1[collpair->ap1].txold, verts1[collpair->ap2].txold, verts1[collpair->ap3].txold, verts2[collpair->bp1].txold, verts2[collpair->bp2].txold, verts2[collpair->bp3].txold, collpair->pa,collpair->pb,collpair->vector);
-                       
-       if (distance <= (epsilon + ALMOST_ZERO))
-       {
-                               // printf("dist: %f\n", (float)distance);
-                               
-                               // collpair->face1 = tree1->tri_index;
-                               // collpair->face2 = tree2->tri_index;
-                               
-                               // VECCOPY(collpair->normal, collpair->vector);
-                               // Normalize(collpair->normal);
-                               
-                               // collpair->distance = distance;
-                               
-}
-       else
-       {
-       MEM_freeN(collpair);
-}
-}
-       else
-       {
-       MEM_freeN(collpair);
-}
-}
-       */
-}
-
-int collisions_are_edges_adjacent(ClothModifierData *clmd, ClothModifierData *coll_clmd, EdgeCollPair *edgecollpair)
-{
-       Cloth *cloth1, *cloth2;
-       ClothVertex *verts1, *verts2;
-       float temp[3];
-        /*
-       cloth1 = clmd->clothObject;
-       cloth2 = coll_clmd->clothObject;
-       
-       verts1 = cloth1->verts;
-       verts2 = cloth2->verts;
-       
-       VECSUB(temp, verts1[edgecollpair->p11].xold, verts2[edgecollpair->p21].xold);
-       if(ABS(INPR(temp, temp)) < ALMOST_ZERO)
-               return 1;
-       
-       VECSUB(temp, verts1[edgecollpair->p11].xold, verts2[edgecollpair->p22].xold);
-       if(ABS(INPR(temp, temp)) < ALMOST_ZERO)
-               return 1;
-       
-       VECSUB(temp, verts1[edgecollpair->p12].xold, verts2[edgecollpair->p21].xold);
-       if(ABS(INPR(temp, temp)) < ALMOST_ZERO)
-               return 1;
-       
-       VECSUB(temp, verts1[edgecollpair->p12].xold, verts2[edgecollpair->p22].xold);
-       if(ABS(INPR(temp, temp)) < ALMOST_ZERO)
-               return 1;
-               */
-       return 0;
-}
-
-
-void collisions_collision_moving_edges(ClothModifierData *clmd, ClothModifierData *coll_clmd, CollisionTree *tree1, CollisionTree *tree2)
-{
-       /*
-       EdgeCollPair edgecollpair;
-       Cloth *cloth1=NULL, *cloth2=NULL;
-       MFace *face1=NULL, *face2=NULL;
-       ClothVertex *verts1=NULL, *verts2=NULL;
-       double distance = 0;
-       float epsilon = clmd->coll_parms->epsilon;
-       unsigned int i = 0, j = 0, k = 0;
-       int numsolutions = 0;
-       float a[3], b[3], c[3], d[3], e[3], f[3], solution[3];
-       
-       cloth1 = clmd->clothObject;
-       cloth2 = coll_clmd->clothObject;
-       
-       verts1 = cloth1->verts;
-       verts2 = cloth2->verts;
-
-       face1 = &(cloth1->mfaces[tree1->tri_index]);
-       face2 = &(cloth2->mfaces[tree2->tri_index]);
-       
-       for( i = 0; i < 5; i++)
-       {
-       if(i == 0) 
-       {
-       edgecollpair.p11 = face1->v1;
-       edgecollpair.p12 = face1->v2;
-}
-       else if(i == 1) 
-       {
-       edgecollpair.p11 = face1->v2;
-       edgecollpair.p12 = face1->v3;
-}
-       else if(i == 2) 
-       {
-       if(face1->v4) 
-       {
-       edgecollpair.p11 = face1->v3;
-       edgecollpair.p12 = face1->v4;
-}
-       else 
-       {
-       edgecollpair.p11 = face1->v3;
-       edgecollpair.p12 = face1->v1;
-       i+=5; // get out of here after this edge pair is handled
-}
-}
-       else if(i == 3) 
-       {
-       if(face1->v4) 
-       {
-       edgecollpair.p11 = face1->v4;
-       edgecollpair.p12 = face1->v1;
-}      
-       else
-       continue;
-}
-       else
-       {
-       edgecollpair.p11 = face1->v3;
-       edgecollpair.p12 = face1->v1;
-}
-
-               
-       for( j = 0; j < 5; j++)
-       {
-       if(j == 0)
-       {
-       edgecollpair.p21 = face2->v1;
-       edgecollpair.p22 = face2->v2;
-}
-       else if(j == 1)
-       {
-       edgecollpair.p21 = face2->v2;
-       edgecollpair.p22 = face2->v3;
-}
-       else if(j == 2)
-       {
-       if(face2->v4) 
-       {
-       edgecollpair.p21 = face2->v3;
-       edgecollpair.p22 = face2->v4;
-}
-       else 
-       {
-       edgecollpair.p21 = face2->v3;
-       edgecollpair.p22 = face2->v1;
-}
-}
-       else if(j == 3)
-       {
-       if(face2->v4) 
-       {
-       edgecollpair.p21 = face2->v4;
-       edgecollpair.p22 = face2->v1;
-}
-       else
-       continue;
-}
-       else
-       {
-       edgecollpair.p21 = face2->v3;
-       edgecollpair.p22 = face2->v1;
-}
-                       
-                       
-       if(!collisions_are_edges_adjacent(clmd, coll_clmd, &edgecollpair))
-       {
-       VECSUB(a, verts1[edgecollpair.p12].xold, verts1[edgecollpair.p11].xold);
-       VECSUB(b, verts1[edgecollpair.p12].v, verts1[edgecollpair.p11].v);
-       VECSUB(c, verts1[edgecollpair.p21].xold, verts1[edgecollpair.p11].xold);
-       VECSUB(d, verts1[edgecollpair.p21].v, verts1[edgecollpair.p11].v);
-       VECSUB(e, verts2[edgecollpair.p22].xold, verts1[edgecollpair.p11].xold);
-       VECSUB(f, verts2[edgecollpair.p22].v, verts1[edgecollpair.p11].v);
-                               
-       numsolutions = collisions_get_collision_time(a, b, c, d, e, f, solution);
-                               
-       for (k = 0; k < numsolutions; k++) 
-       {                                                               
-       if ((solution[k] >= 0.0) && (solution[k] <= 1.0)) 
-       {
-       float out_collisionTime = solution[k];
-                                               
-                                               // TODO: check for collisions 
-                                               
-                                               // TODO: put into (edge) collision list
-                                               
-       printf("Moving edge found!\n");
-}
-}
-}
-}
-}      
-       */      
-}
-
-void collisions_collision_moving_tris(ClothModifierData *clmd, ClothModifierData *coll_clmd, CollisionTree *tree1, CollisionTree *tree2)
-{
-       /*
-       CollPair collpair;
-       Cloth *cloth1=NULL, *cloth2=NULL;
-       MFace *face1=NULL, *face2=NULL;
-       ClothVertex *verts1=NULL, *verts2=NULL;
-       double distance = 0;
-       float epsilon = clmd->coll_parms->epsilon;
-       unsigned int i = 0, j = 0, k = 0;
-       int numsolutions = 0;
-       float a[3], b[3], c[3], d[3], e[3], f[3], solution[3];
-
-       for(i = 0; i < 2; i++)
-       {               
-       cloth1 = clmd->clothObject;
-       cloth2 = coll_clmd->clothObject;
-               
-       verts1 = cloth1->verts;
-       verts2 = cloth2->verts;
-       
-       face1 = &(cloth1->mfaces[tree1->tri_index]);
-       face2 = &(cloth2->mfaces[tree2->tri_index]);
-               
-               // check all possible pairs of triangles
-       if(i == 0)
-       {
-       collpair.ap1 = face1->v1;
-       collpair.ap2 = face1->v2;
-       collpair.ap3 = face1->v3;
-                       
-       collpair.pointsb[0] = face2->v1;
-       collpair.pointsb[1] = face2->v2;
-       collpair.pointsb[2] = face2->v3;
-       collpair.pointsb[3] = face2->v4;
-}
-               
-       if(i == 1)
-       {
-       if(face1->v4)
-       {
-       collpair.ap1 = face1->v3;
-       collpair.ap2 = face1->v4;
-       collpair.ap3 = face1->v1;
-                               
-       collpair.pointsb[0] = face2->v1;
-       collpair.pointsb[1] = face2->v2;
-       collpair.pointsb[2] = face2->v3;
-       collpair.pointsb[3] = face2->v4;
-}
-       else
-       i++;
-}
-               
-               // calc SIPcode (?)
-               
-       if(i < 2)
-       {
-       VECSUB(a, verts1[collpair.ap2].xold, verts1[collpair.ap1].xold);
-       VECSUB(b, verts1[collpair.ap2].v, verts1[collpair.ap1].v);
-       VECSUB(c, verts1[collpair.ap3].xold, verts1[collpair.ap1].xold);
-       VECSUB(d, verts1[collpair.ap3].v, verts1[collpair.ap1].v);
-                               
-       for(j = 0; j < 4; j++)
-       {                                       
-       if((j==3) && !(face2->v4))
-       break;
-                               
-       VECSUB(e, verts2[collpair.pointsb[j]].xold, verts1[collpair.ap1].xold);
-       VECSUB(f, verts2[collpair.pointsb[j]].v, verts1[collpair.ap1].v);
-                               
-       numsolutions = collisions_get_collision_time(a, b, c, d, e, f, solution);
-                               
-       for (k = 0; k < numsolutions; k++) 
-       {                                                               
-       if ((solution[k] >= 0.0) && (solution[k] <= 1.0)) 
-       {
-       float out_collisionTime = solution[k];
-                                               
-                                               // TODO: check for collisions 
-                                               
-                                               // TODO: put into (point-face) collision list
-                                               
-       printf("Moving found!\n");
-                                               
-}
-}
-                               
-                               // TODO: check borders for collisions
-}
-                       
-}
-}
-       */
-}
-
-void collisions_collision_moving(ClothModifierData *clmd, ClothModifierData *coll_clmd, CollisionTree *tree1, CollisionTree *tree2)
-{
-       /*
-       // TODO: check for adjacent
-       collisions_collision_moving_edges(clmd, coll_clmd, tree1, tree2);
-       
-       collisions_collision_moving_tris(clmd, coll_clmd, tree1, tree2);
-       collisions_collision_moving_tris(coll_clmd, clmd, tree2, tree1);
-       */
+       if(G.rt > 0)
+               printf("implicit_set_positions\n");     
 }
 
-// cloth - object collisions
-int cloth_bvh_objcollision(ClothModifierData * clmd, float step, float prevstep, float dt)
-{
-       
-       Base *base = NULL;
-       CollisionModifierData *collmd = NULL;
-       Cloth *cloth = NULL;
-       Object *ob2 = NULL;
-       BVH *bvh1 = NULL, *bvh2 = NULL, *self_bvh;
-       LinkNode *collision_list = NULL; 
-       unsigned int i = 0, j = 0;
-       int collisions = 0, count = 0;
-       float (*current_x)[4];
-
-       if (!(((Cloth *)clmd->clothObject)->tree))
-       {
-               printf("No BVH found\n");
-               return 0;
-       }
-       
-       cloth = clmd->clothObject;
-       bvh1 = cloth->tree;
-       self_bvh = cloth->selftree;
-       
-       ////////////////////////////////////////////////////////////
-       // static collisions
-       ////////////////////////////////////////////////////////////
-       
-       // update cloth bvh
-       bvh_update_from_float4(bvh1, cloth->current_xold, cloth->numverts, cloth->current_x, 0); // 0 means STATIC, 1 means MOVING (see later in this function)
-/*
-       // check all collision objects
-       for (base = G.scene->base.first; base; base = base->next)
-       {
-               ob2 = base->object;
-               collmd = (CollisionModifierData *) modifiers_findByType (ob2, eModifierType_Collision);
-               
-               if (!collmd)
-                       continue;
-               
-               // check if there is a bounding volume hierarchy
-               if (collmd->tree) 
-               {                       
-                       bvh2 = collmd->tree;
-                       
-                       // update position + bvh of collision object
-                       collision_move_object(collmd, step, prevstep);
-                       bvh_update_from_mvert(collmd->tree, collmd->current_x, collmd->numverts, NULL, 0);
-                       
-                       // fill collision list 
-                       collisions += bvh_traverse(bvh1->root, bvh2->root, &collision_list);
-                       
-                       // call static collision response
-                       
-                       // free collision list
-                       if(collision_list)
-                       {
-                               LinkNode *search = collision_list;
-                               
-                               while(search)
-                               {
-                                       CollisionPair *coll_pair = search->link;
-                                       
-                                       MEM_freeN(coll_pair);
-                                       search = search->next;
-                               }
-                               BLI_linklist_free(collision_list,NULL);
-       
-                               collision_list = NULL;
-                       }
-               }
-       }
-       
-       //////////////////////////////////////////////
-       // update velocities + positions
-       //////////////////////////////////////////////
-       for(i = 0; i < cloth->numverts; i++)
-       {
-               VECADD(cloth->current_x[i], cloth->current_xold[i], cloth->current_v[i]);
-       }
-       //////////////////////////////////////////////
-*/     
-       /*
-       // fill collision list 
-       collisions += bvh_traverse(self_bvh->root, self_bvh->root, &collision_list);
-       
-       // call static collision response
-       
-       // free collision list
-       if(collision_list)
-       {
-               LinkNode *search = collision_list;
-               
-               while(search)
-               {
-                       float distance = 0;
-                       float mindistance = cloth->selftree->epsilon;
-                       CollisionPair *collpair = (CollisionPair *)search->link;
-                       
-                       // get distance of faces
-                       distance = plNearestPoints(
-                                       cloth->current_x[collpair->point_indexA[0]], cloth->current_x[collpair->point_indexA[1]], cloth->current_x[collpair->point_indexA[2]], cloth->current_x[collpair->point_indexB[0]], cloth->current_x[collpair->point_indexB[1]], cloth->current_x[collpair->point_indexB[2]], collpair->pa,collpair->pb,collpair->vector);
-                                       
-                       if(distance < mindistance)
-                       {
-                               ///////////////////////////////////////////
-                               // TODO: take velocity of the collision points into account!
-                               ///////////////////////////////////////////
-                               
-                               float correction = mindistance - distance;
-                               float temp[3];
-                               
-                               VECCOPY(temp, collpair->vector);
-                               Normalize(temp);
-                               VecMulf(temp, -correction*0.5);
-                               
-                               if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexA[0]].goal >= SOFTGOALSNAP)))
-                                       VECSUB(cloth->current_x[collpair->point_indexA[0]], cloth->current_x[collpair->point_indexA[0]], temp); 
-                               
-                               if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexA[1]].goal >= SOFTGOALSNAP)))
-                                       VECSUB(cloth->current_x[collpair->point_indexA[1]], cloth->current_x[collpair->point_indexA[1]], temp);
-                               
-                               if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexA[2]].goal >= SOFTGOALSNAP)))
-                                       VECSUB(cloth->current_x[collpair->point_indexA[2]], cloth->current_x[collpair->point_indexA[2]], temp);
-                               
-                               
-                               if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexB[0]].goal >= SOFTGOALSNAP)))
-                                       VECSUB(cloth->current_x[collpair->point_indexB[0]], cloth->current_x[collpair->point_indexB[0]], temp); 
-                               
-                               if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexB[1]].goal >= SOFTGOALSNAP)))
-                                       VECSUB(cloth->current_x[collpair->point_indexB[1]], cloth->current_x[collpair->point_indexB[1]], temp);
-                               
-                               if(!((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [collpair->point_indexB[2]].goal >= SOFTGOALSNAP)))
-                                       VECSUB(cloth->current_x[collpair->point_indexB[2]], cloth->current_x[collpair->point_indexB[2]], temp);
-                                       
-                               collisions = 1;
-                               
-                       }
-                       
-               }
-               
-               search = collision_list;
-               while(search)
-               {
-                       CollisionPair *coll_pair = search->link;
-                       
-                       MEM_freeN(coll_pair);
-                       search = search->next;
-               }
-               BLI_linklist_free(collision_list,NULL);
-
-               collision_list = NULL;
-       }
-       */
-       // Test on *simple* selfcollisions
-       collisions = 1;
-       count = 0;
-       current_x = cloth->current_x; // needed for openMP
-/*
-#pragma omp parallel for private(i,j, collisions) shared(current_x)
-       for(count = 0; count < 6; count++)
-       {
-               collisions = 0;
-               
-               for(i = 0; i < cloth->numverts; i++)
-               {
-                       for(j = i + 1; j < cloth->numverts; j++)
-                       {
-                               float temp[3];
-                               float length = 0;
-                               float mindistance = cloth->selftree->epsilon;
-                                       
-                               if(clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL)
-                               {                       
-                                       if((cloth->verts [i].goal >= SOFTGOALSNAP)
-                                       && (cloth->verts [j].goal >= SOFTGOALSNAP))
-                                       {
-                                               continue;
-                                       }
-                               }
-                                       
-                                       // check for adjacent points
-                               if(BLI_edgehash_haskey ( cloth->edgehash, i, j ))
-                               {
-                                       continue;
-                               }
-                                       
-                               VECSUB(temp, current_x[i], current_x[j]);
-                                       
-                               length = Normalize(temp);
-                                       
-                               if(length < mindistance)
-                               {
-                                       float correction = mindistance - length;
-                                               
-                                       if((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [i].goal >= SOFTGOALSNAP))
-                                       {
-                                               VecMulf(temp, -correction);
-                                               VECADD(current_x[j], current_x[j], temp);
-                                       }
-                                       else if((clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_GOAL) && (cloth->verts [j].goal >= SOFTGOALSNAP))
-                                       {
-                                               VecMulf(temp, correction);
-                                               VECADD(current_x[i], current_x[i], temp);
-                                       }
-                                       else
-                                       {
-                                               VecMulf(temp, -correction*0.5);
-                                               VECADD(current_x[j], current_x[j], temp);
-                                               
-                                               VECSUB(current_x[i], current_x[i], temp);       
-                                       }
-                                       
-                                       collisions = 1;
-                               }
-                       }
-               }
-       }
-
-       
-       //////////////////////////////////////////////
-       // SELFCOLLISIONS: update velocities
-       //////////////////////////////////////////////
-       for(i = 0; i < cloth->numverts; i++)
-       {
-               VECSUB(cloth->current_v[i], cloth->current_x[i], cloth->current_xold[i]);
-       }
-       //////////////////////////////////////////////
-*/     
-       return 1;
-}