Upgrade Bullet to version 2.83.
[blender.git] / extern / bullet2 / src / BulletDynamics / Featherstone / btMultiBody.cpp
index 56a1c55d9ae80aca4f34f3581faf176a28f40f66..e49cf30d0442cd1c73aac7c905fd318f849cbc17 100644 (file)
@@ -6,7 +6,8 @@
  *   
  * COPYRIGHT:
  *   Copyright (C) Stephen Thompson, <stephen@solarflare.org.uk>, 2011-2013
- *   Portions written By Erwin Coumans: replacing Eigen math library by Bullet LinearMath and a dedicated 6x6 matrix inverse (solveImatrix)
+ *   Portions written By Erwin Coumans: connection to LCP solver, various multibody constraints, replacing Eigen math library by Bullet LinearMath and a dedicated 6x6 matrix inverse (solveImatrix)
+ *   Portions written By Jakub Stepien: support for multi-DOF constraints, introduction of spatial algebra and several other improvements
 
  This software is provided 'as-is', without any express or implied warranty.
  In no event will the authors be held liable for any damages arising from the use of this software.
 #include "btMultiBody.h"
 #include "btMultiBodyLink.h"
 #include "btMultiBodyLinkCollider.h"
-
+#include "btMultiBodyJointFeedback.h"
+#include "LinearMath/btTransformUtil.h"
+#include "LinearMath/btSerializer.h"
 // #define INCLUDE_GYRO_TERM 
 
+///todo: determine if we need these options. If so, make a proper API, otherwise delete those globals
+bool gJointFeedbackInWorldSpace = false;
+bool gJointFeedbackInJointFrame = false;
+
 namespace {
     const btScalar SLEEP_EPSILON = btScalar(0.05);  // this is a squared velocity (m^2 s^-2)
     const btScalar SLEEP_TIMEOUT = btScalar(2);     // in seconds
 }
 
-
-
-
-//
-// Various spatial helper functions
-//
-
 namespace {
     void SpatialTransform(const btMatrix3x3 &rotation_matrix,  // rotates vectors in 'from' frame to vectors in 'to' frame
                           const btVector3 &displacement,     // vector from origin of 'from' frame to origin of 'to' frame, in 'to' coordinates
@@ -59,7 +59,7 @@ namespace {
                                  btVector3 &bottom_out)
     {
         top_out = rotation_matrix.transpose() * top_in;
-        bottom_out = rotation_matrix.transpose() * (bottom_in + displacement.cross(top_in));
+        bottom_out = rotation_matrix.transpose() * (bottom_in + displacement.cross(top_in));           
     }
 
     btScalar SpatialDotProduct(const btVector3 &a_top,
@@ -69,6 +69,17 @@ namespace {
     {
         return a_bottom.dot(b_top) + a_top.dot(b_bottom);
     }
+
+       void SpatialCrossProduct(const btVector3 &a_top,
+                            const btVector3 &a_bottom,
+                            const btVector3 &b_top,
+                            const btVector3 &b_bottom,
+                                                       btVector3 &top_out,
+                                                       btVector3 &bottom_out)
+       {
+               top_out = a_top.cross(b_top);
+               bottom_out = a_bottom.cross(b_top) + a_top.cross(b_bottom);
+       }
 }
 
 
@@ -79,133 +90,331 @@ namespace {
 btMultiBody::btMultiBody(int n_links,
                      btScalar mass,
                      const btVector3 &inertia,
-                     bool fixed_base_,
-                     bool can_sleep_)
-    : base_quat(0, 0, 0, 1),
-      base_mass(mass),
-      base_inertia(inertia),
+                     bool fixedBase,
+                     bool canSleep,
+                    bool /*deprecatedUseMultiDof*/)
+    : 
+       m_baseCollider(0),
+               m_baseName(0),
+       m_basePos(0,0,0),
+       m_baseQuat(0, 0, 0, 1),
+      m_baseMass(mass),
+      m_baseInertia(inertia),
     
-               fixed_base(fixed_base_),
-               awake(true),
-               can_sleep(can_sleep_),
-               sleep_timer(0),
-               m_baseCollider(0),
+               m_fixedBase(fixedBase),
+               m_awake(true),
+               m_canSleep(canSleep),
+               m_sleepTimer(0),
+               
                m_linearDamping(0.04f),
                m_angularDamping(0.04f),
                m_useGyroTerm(true),
-               m_maxAppliedImpulse(1000.f),
-               m_hasSelfCollision(true)
+                       m_maxAppliedImpulse(1000.f),
+               m_maxCoordinateVelocity(100.f),
+                       m_hasSelfCollision(true),               
+               __posUpdated(false),
+                       m_dofCount(0),
+               m_posVarCnt(0),
+               m_useRK4(false),        
+               m_useGlobalVelocities(false),
+               m_internalNeedsJointFeedback(false)
 {
-        links.resize(n_links);
+       m_links.resize(n_links);
+       m_matrixBuf.resize(n_links + 1);
+
 
-       vector_buf.resize(2*n_links);
-    matrix_buf.resize(n_links + 1);
-       m_real_buf.resize(6 + 2*n_links);
-    base_pos.setValue(0, 0, 0);
-    base_force.setValue(0, 0, 0);
-    base_torque.setValue(0, 0, 0);
+    m_baseForce.setValue(0, 0, 0);
+    m_baseTorque.setValue(0, 0, 0);
 }
 
 btMultiBody::~btMultiBody()
 {
 }
 
+void btMultiBody::setupFixed(int i,
+                                                  btScalar mass,
+                                                  const btVector3 &inertia,
+                                                  int parent,
+                                                  const btQuaternion &rotParentToThis,
+                                                  const btVector3 &parentComToThisPivotOffset,
+                           const btVector3 &thisPivotToThisComOffset, bool /*deprecatedDisableParentCollision*/)
+{
+       
+       m_links[i].m_mass = mass;
+    m_links[i].m_inertiaLocal = inertia;
+    m_links[i].m_parent = parent;
+    m_links[i].m_zeroRotParentToThis = rotParentToThis;
+       m_links[i].m_dVector = thisPivotToThisComOffset;
+    m_links[i].m_eVector = parentComToThisPivotOffset;    
+
+       m_links[i].m_jointType = btMultibodyLink::eFixed;
+       m_links[i].m_dofCount = 0;
+       m_links[i].m_posVarCount = 0;
+
+       m_links[i].m_flags |=BT_MULTIBODYLINKFLAGS_DISABLE_PARENT_COLLISION;    
+       
+       m_links[i].updateCacheMultiDof();
+
+       updateLinksDofOffsets();
+
+}
+
+
 void btMultiBody::setupPrismatic(int i,
                                btScalar mass,
                                const btVector3 &inertia,
                                int parent,
-                               const btQuaternion &rot_parent_to_this,
-                               const btVector3 &joint_axis,
-                               const btVector3 &r_vector_when_q_zero,
+                               const btQuaternion &rotParentToThis,
+                               const btVector3 &jointAxis,
+                               const btVector3 &parentComToThisPivotOffset,
+                                                          const btVector3 &thisPivotToThisComOffset,
                                                           bool disableParentCollision)
 {
-    links[i].mass = mass;
-    links[i].inertia = inertia;
-    links[i].parent = parent;
-    links[i].zero_rot_parent_to_this = rot_parent_to_this;
-    links[i].axis_top.setValue(0,0,0);
-    links[i].axis_bottom = joint_axis;
-    links[i].e_vector = r_vector_when_q_zero;
-    links[i].is_revolute = false;
-    links[i].cached_rot_parent_to_this = rot_parent_to_this;
-       if (disableParentCollision)
-               links[i].m_flags |=BT_MULTIBODYLINKFLAGS_DISABLE_PARENT_COLLISION;
+       m_dofCount += 1;
+       m_posVarCnt += 1;
+       
+    m_links[i].m_mass = mass;
+    m_links[i].m_inertiaLocal = inertia;
+    m_links[i].m_parent = parent;
+    m_links[i].m_zeroRotParentToThis = rotParentToThis;
+    m_links[i].setAxisTop(0, 0., 0., 0.);
+    m_links[i].setAxisBottom(0, jointAxis);
+    m_links[i].m_eVector = parentComToThisPivotOffset;
+       m_links[i].m_dVector = thisPivotToThisComOffset;
+    m_links[i].m_cachedRotParentToThis = rotParentToThis;
+
+       m_links[i].m_jointType = btMultibodyLink::ePrismatic;
+       m_links[i].m_dofCount = 1;
+       m_links[i].m_posVarCount = 1;   
+       m_links[i].m_jointPos[0] = 0.f;
+       m_links[i].m_jointTorque[0] = 0.f;
 
-    links[i].updateCache();
+       if (disableParentCollision)
+               m_links[i].m_flags |=BT_MULTIBODYLINKFLAGS_DISABLE_PARENT_COLLISION;
+       //
+       
+       m_links[i].updateCacheMultiDof();
+       
+       updateLinksDofOffsets();
 }
 
 void btMultiBody::setupRevolute(int i,
                               btScalar mass,
                               const btVector3 &inertia,
                               int parent,
-                              const btQuaternion &zero_rot_parent_to_this,
-                              const btVector3 &joint_axis,
-                              const btVector3 &parent_axis_position,
-                              const btVector3 &my_axis_position,
+                              const btQuaternion &rotParentToThis,
+                              const btVector3 &jointAxis,
+                              const btVector3 &parentComToThisPivotOffset,
+                              const btVector3 &thisPivotToThisComOffset,
                                                          bool disableParentCollision)
 {
-    links[i].mass = mass;
-    links[i].inertia = inertia;
-    links[i].parent = parent;
-    links[i].zero_rot_parent_to_this = zero_rot_parent_to_this;
-    links[i].axis_top = joint_axis;
-    links[i].axis_bottom = joint_axis.cross(my_axis_position);
-    links[i].d_vector = my_axis_position;
-    links[i].e_vector = parent_axis_position;
-    links[i].is_revolute = true;
+       m_dofCount += 1;
+       m_posVarCnt += 1;
+       
+    m_links[i].m_mass = mass;
+    m_links[i].m_inertiaLocal = inertia;
+    m_links[i].m_parent = parent;
+    m_links[i].m_zeroRotParentToThis = rotParentToThis;
+    m_links[i].setAxisTop(0, jointAxis);
+    m_links[i].setAxisBottom(0, jointAxis.cross(thisPivotToThisComOffset));
+    m_links[i].m_dVector = thisPivotToThisComOffset;
+    m_links[i].m_eVector = parentComToThisPivotOffset;
+
+       m_links[i].m_jointType = btMultibodyLink::eRevolute;
+       m_links[i].m_dofCount = 1;
+       m_links[i].m_posVarCount = 1;   
+       m_links[i].m_jointPos[0] = 0.f;
+       m_links[i].m_jointTorque[0] = 0.f;
+
+       if (disableParentCollision)
+               m_links[i].m_flags |=BT_MULTIBODYLINKFLAGS_DISABLE_PARENT_COLLISION;
+    //
+       m_links[i].updateCacheMultiDof();
+       //
+       updateLinksDofOffsets();
+}
+
+
+
+void btMultiBody::setupSpherical(int i,
+                                                  btScalar mass,
+                                                  const btVector3 &inertia,
+                                                  int parent,
+                                                  const btQuaternion &rotParentToThis,
+                                                  const btVector3 &parentComToThisPivotOffset,
+                                                  const btVector3 &thisPivotToThisComOffset,
+                                                  bool disableParentCollision)
+{
+       
+       m_dofCount += 3;
+       m_posVarCnt += 4;
+
+       m_links[i].m_mass = mass;
+    m_links[i].m_inertiaLocal = inertia;
+    m_links[i].m_parent = parent;
+    m_links[i].m_zeroRotParentToThis = rotParentToThis;    
+    m_links[i].m_dVector = thisPivotToThisComOffset;
+    m_links[i].m_eVector = parentComToThisPivotOffset;    
+
+       m_links[i].m_jointType = btMultibodyLink::eSpherical;
+       m_links[i].m_dofCount = 3;
+       m_links[i].m_posVarCount = 4;
+       m_links[i].setAxisTop(0, 1.f, 0.f, 0.f);
+       m_links[i].setAxisTop(1, 0.f, 1.f, 0.f);
+       m_links[i].setAxisTop(2, 0.f, 0.f, 1.f);
+       m_links[i].setAxisBottom(0, m_links[i].getAxisTop(0).cross(thisPivotToThisComOffset));
+       m_links[i].setAxisBottom(1, m_links[i].getAxisTop(1).cross(thisPivotToThisComOffset));
+       m_links[i].setAxisBottom(2, m_links[i].getAxisTop(2).cross(thisPivotToThisComOffset));
+       m_links[i].m_jointPos[0] = m_links[i].m_jointPos[1] = m_links[i].m_jointPos[2] = 0.f; m_links[i].m_jointPos[3] = 1.f;
+       m_links[i].m_jointTorque[0] = m_links[i].m_jointTorque[1] = m_links[i].m_jointTorque[2] = 0.f;
+
+
        if (disableParentCollision)
-               links[i].m_flags |=BT_MULTIBODYLINKFLAGS_DISABLE_PARENT_COLLISION;
-    links[i].updateCache();
+               m_links[i].m_flags |=BT_MULTIBODYLINKFLAGS_DISABLE_PARENT_COLLISION;    
+       //
+       m_links[i].updateCacheMultiDof();       
+       //
+       updateLinksDofOffsets();
 }
 
+void btMultiBody::setupPlanar(int i,
+                                                  btScalar mass,
+                                                  const btVector3 &inertia,
+                                                  int parent,
+                                                  const btQuaternion &rotParentToThis,
+                                                  const btVector3 &rotationAxis,
+                                                  const btVector3 &parentComToThisComOffset,                                              
+                                                  bool disableParentCollision)
+{
+       
+       m_dofCount += 3;
+       m_posVarCnt += 3;
+
+       m_links[i].m_mass = mass;
+    m_links[i].m_inertiaLocal = inertia;
+    m_links[i].m_parent = parent;
+    m_links[i].m_zeroRotParentToThis = rotParentToThis;    
+       m_links[i].m_dVector.setZero();
+    m_links[i].m_eVector = parentComToThisComOffset;
+
+       //
+       static btVector3 vecNonParallelToRotAxis(1, 0, 0);
+       if(rotationAxis.normalized().dot(vecNonParallelToRotAxis) > 0.999)
+               vecNonParallelToRotAxis.setValue(0, 1, 0);
+       //
+
+       m_links[i].m_jointType = btMultibodyLink::ePlanar;
+       m_links[i].m_dofCount = 3;
+       m_links[i].m_posVarCount = 3;
+       btVector3 n=rotationAxis.normalized();
+       m_links[i].setAxisTop(0, n[0],n[1],n[2]);
+       m_links[i].setAxisTop(1,0,0,0);
+       m_links[i].setAxisTop(2,0,0,0);
+       m_links[i].setAxisBottom(0,0,0,0);
+       btVector3 cr = m_links[i].getAxisTop(0).cross(vecNonParallelToRotAxis);
+       m_links[i].setAxisBottom(1,cr[0],cr[1],cr[2]);
+       cr = m_links[i].getAxisBottom(1).cross(m_links[i].getAxisTop(0));
+       m_links[i].setAxisBottom(2,cr[0],cr[1],cr[2]);
+       m_links[i].m_jointPos[0] = m_links[i].m_jointPos[1] = m_links[i].m_jointPos[2] = 0.f;
+       m_links[i].m_jointTorque[0] = m_links[i].m_jointTorque[1] = m_links[i].m_jointTorque[2] = 0.f;
 
+       if (disableParentCollision)
+               m_links[i].m_flags |=BT_MULTIBODYLINKFLAGS_DISABLE_PARENT_COLLISION;
+    //
+       m_links[i].updateCacheMultiDof();
+       //
+       updateLinksDofOffsets();
+}
 
+void btMultiBody::finalizeMultiDof()
+{
+       m_deltaV.resize(0);
+       m_deltaV.resize(6 + m_dofCount);
+       m_realBuf.resize(6 + m_dofCount + m_dofCount*m_dofCount + 6 + m_dofCount);                      //m_dofCount for joint-space vels + m_dofCount^2 for "D" matrices + delta-pos vector (6 base "vels" + joint "vels")
+       m_vectorBuf.resize(2 * m_dofCount);                                                                                                     //two 3-vectors (i.e. one six-vector) for each system dof       ("h" matrices)
 
+       updateLinksDofOffsets();
+}
        
 int btMultiBody::getParent(int i) const
 {
-    return links[i].parent;
+    return m_links[i].m_parent;
 }
 
 btScalar btMultiBody::getLinkMass(int i) const
 {
-    return links[i].mass;
+    return m_links[i].m_mass;
 }
 
 const btVector3 & btMultiBody::getLinkInertia(int i) const
 {
-    return links[i].inertia;
+    return m_links[i].m_inertiaLocal;
 }
 
 btScalar btMultiBody::getJointPos(int i) const
 {
-    return links[i].joint_pos;
+    return m_links[i].m_jointPos[0];
 }
 
 btScalar btMultiBody::getJointVel(int i) const
 {
-    return m_real_buf[6 + i];
+    return m_realBuf[6 + m_links[i].m_dofOffset];
 }
 
+btScalar * btMultiBody::getJointPosMultiDof(int i)
+{
+       return &m_links[i].m_jointPos[0];
+}
+
+btScalar * btMultiBody::getJointVelMultiDof(int i)
+{
+       return &m_realBuf[6 + m_links[i].m_dofOffset];
+}
+
+const btScalar * btMultiBody::getJointPosMultiDof(int i) const 
+{
+       return &m_links[i].m_jointPos[0];
+}
+
+const btScalar * btMultiBody::getJointVelMultiDof(int i) const 
+{
+       return &m_realBuf[6 + m_links[i].m_dofOffset];
+}
+
+
 void btMultiBody::setJointPos(int i, btScalar q)
 {
-    links[i].joint_pos = q;
-    links[i].updateCache();
+    m_links[i].m_jointPos[0] = q;
+    m_links[i].updateCacheMultiDof();
+}
+
+void btMultiBody::setJointPosMultiDof(int i, btScalar *q)
+{
+       for(int pos = 0; pos < m_links[i].m_posVarCount; ++pos)
+               m_links[i].m_jointPos[pos] = q[pos];
+
+    m_links[i].updateCacheMultiDof();
 }
 
 void btMultiBody::setJointVel(int i, btScalar qdot)
 {
-    m_real_buf[6 + i] = qdot;
+    m_realBuf[6 + m_links[i].m_dofOffset] = qdot;
+}
+
+void btMultiBody::setJointVelMultiDof(int i, btScalar *qdot)
+{
+       for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)
+               m_realBuf[6 + m_links[i].m_dofOffset + dof] = qdot[dof];
 }
 
 const btVector3 & btMultiBody::getRVector(int i) const
 {
-    return links[i].cached_r_vector;
+    return m_links[i].m_cachedRVector;
 }
 
 const btQuaternion & btMultiBody::getParentToLocalRot(int i) const
 {
-    return links[i].cached_rot_parent_to_this;
+    return m_links[i].m_cachedRotParentToThis;
 }
 
 btVector3 btMultiBody::localPosToWorld(int i, const btVector3 &local_pos) const
@@ -260,20 +469,20 @@ void btMultiBody::compTreeLinkVelocities(btVector3 *omega, btVector3 *vel) const
 {
        int num_links = getNumLinks();
     // Calculates the velocities of each link (and the base) in its local frame
-    omega[0] = quatRotate(base_quat ,getBaseOmega());
-    vel[0] = quatRotate(base_quat ,getBaseVel());
+    omega[0] = quatRotate(m_baseQuat ,getBaseOmega());
+    vel[0] = quatRotate(m_baseQuat ,getBaseVel());
     
     for (int i = 0; i < num_links; ++i) {
-        const int parent = links[i].parent;
+        const int parent = m_links[i].m_parent;
 
         // transform parent vel into this frame, store in omega[i+1], vel[i+1]
-        SpatialTransform(btMatrix3x3(links[i].cached_rot_parent_to_this), links[i].cached_r_vector,
+        SpatialTransform(btMatrix3x3(m_links[i].m_cachedRotParentToThis), m_links[i].m_cachedRVector,
                          omega[parent+1], vel[parent+1],
                          omega[i+1], vel[i+1]);
 
         // now add qidot * shat_i
-        omega[i+1] += getJointVel(i) * links[i].axis_top;
-        vel[i+1] += getJointVel(i) * links[i].axis_bottom;
+        omega[i+1] += getJointVel(i) * m_links[i].getAxisTop(0);
+        vel[i+1] += getJointVel(i) * m_links[i].getAxisBottom(0);
     }
 }
 
@@ -286,12 +495,12 @@ btScalar btMultiBody::getKineticEnergy() const
     compTreeLinkVelocities(&omega[0], &vel[0]);
 
     // we will do the factor of 0.5 at the end
-    btScalar result = base_mass * vel[0].dot(vel[0]);
-    result += omega[0].dot(base_inertia * omega[0]);
+    btScalar result = m_baseMass * vel[0].dot(vel[0]);
+    result += omega[0].dot(m_baseInertia * omega[0]);
     
     for (int i = 0; i < num_links; ++i) {
-        result += links[i].mass * vel[i+1].dot(vel[i+1]);
-        result += omega[i+1].dot(links[i].inertia * omega[i+1]);
+        result += m_links[i].m_mass * vel[i+1].dot(vel[i+1]);
+        result += omega[i+1].dot(m_links[i].m_inertiaLocal * omega[i+1]);
     }
 
     return 0.5f * result;
@@ -306,27 +515,38 @@ btVector3 btMultiBody::getAngularMomentum() const
     btAlignedObjectArray<btQuaternion> rot_from_world;rot_from_world.resize(num_links+1);
     compTreeLinkVelocities(&omega[0], &vel[0]);
 
-    rot_from_world[0] = base_quat;
-    btVector3 result = quatRotate(rot_from_world[0].inverse() , (base_inertia * omega[0]));
+    rot_from_world[0] = m_baseQuat;
+    btVector3 result = quatRotate(rot_from_world[0].inverse() , (m_baseInertia * omega[0]));
 
     for (int i = 0; i < num_links; ++i) {
-        rot_from_world[i+1] = links[i].cached_rot_parent_to_this * rot_from_world[links[i].parent+1];
-        result += (quatRotate(rot_from_world[i+1].inverse() , (links[i].inertia * omega[i+1])));
+        rot_from_world[i+1] = m_links[i].m_cachedRotParentToThis * rot_from_world[m_links[i].m_parent+1];
+        result += (quatRotate(rot_from_world[i+1].inverse() , (m_links[i].m_inertiaLocal * omega[i+1])));
     }
 
     return result;
 }
 
+void btMultiBody::clearConstraintForces()
+{
+       m_baseConstraintForce.setValue(0, 0, 0);
+       m_baseConstraintTorque.setValue(0, 0, 0);
+
 
+    for (int i = 0; i < getNumLinks(); ++i) {
+        m_links[i].m_appliedConstraintForce.setValue(0, 0, 0);
+        m_links[i].m_appliedConstraintTorque.setValue(0, 0, 0);
+    }
+}
 void btMultiBody::clearForcesAndTorques()
 {
-    base_force.setValue(0, 0, 0);
-    base_torque.setValue(0, 0, 0);
+    m_baseForce.setValue(0, 0, 0);
+    m_baseTorque.setValue(0, 0, 0);
 
+       
     for (int i = 0; i < getNumLinks(); ++i) {
-        links[i].applied_force.setValue(0, 0, 0);
-        links[i].applied_torque.setValue(0, 0, 0);
-        links[i].joint_torque = 0;
+        m_links[i].m_appliedForce.setValue(0, 0, 0);
+        m_links[i].m_appliedTorque.setValue(0, 0, 0);
+               m_links[i].m_jointTorque[0] = m_links[i].m_jointTorque[1] = m_links[i].m_jointTorque[2] = m_links[i].m_jointTorque[3] = m_links[i].m_jointTorque[4] = m_links[i].m_jointTorque[5] = 0.f;
     }
 }
 
@@ -334,54 +554,81 @@ void btMultiBody::clearVelocities()
 {
        for (int i = 0; i < 6 + getNumLinks(); ++i) 
        {
-               m_real_buf[i] = 0.f;
+               m_realBuf[i] = 0.f;
        }
 }
 void btMultiBody::addLinkForce(int i, const btVector3 &f)
 {
-    links[i].applied_force += f;
+    m_links[i].m_appliedForce += f;
 }
 
 void btMultiBody::addLinkTorque(int i, const btVector3 &t)
 {
-    links[i].applied_torque += t;
+    m_links[i].m_appliedTorque += t;
 }
 
+void btMultiBody::addLinkConstraintForce(int i, const btVector3 &f)
+{
+    m_links[i].m_appliedConstraintForce += f;
+}
+
+void btMultiBody::addLinkConstraintTorque(int i, const btVector3 &t)
+{
+    m_links[i].m_appliedConstraintTorque += t;
+}
+
+
+
 void btMultiBody::addJointTorque(int i, btScalar Q)
 {
-    links[i].joint_torque += Q;
+    m_links[i].m_jointTorque[0] += Q;
+}
+
+void btMultiBody::addJointTorqueMultiDof(int i, int dof, btScalar Q)
+{
+       m_links[i].m_jointTorque[dof] += Q;
+}
+
+void btMultiBody::addJointTorqueMultiDof(int i, const btScalar *Q)
+{
+       for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)
+               m_links[i].m_jointTorque[dof] = Q[dof];
 }
 
 const btVector3 & btMultiBody::getLinkForce(int i) const
 {
-    return links[i].applied_force;
+    return m_links[i].m_appliedForce;
 }
 
 const btVector3 & btMultiBody::getLinkTorque(int i) const
 {
-    return links[i].applied_torque;
+    return m_links[i].m_appliedTorque;
 }
 
 btScalar btMultiBody::getJointTorque(int i) const
 {
-    return links[i].joint_torque;
+    return m_links[i].m_jointTorque[0];
 }
 
+btScalar * btMultiBody::getJointTorqueMultiDof(int i)
+{
+    return &m_links[i].m_jointTorque[0];
+}
 
-inline btMatrix3x3 vecMulVecTranspose(const btVector3& v0, const btVector3& v1Transposed)
+inline btMatrix3x3 outerProduct(const btVector3& v0, const btVector3& v1)                              //renamed it from vecMulVecTranspose (http://en.wikipedia.org/wiki/Outer_product); maybe it should be moved to btVector3 like dot and cross?
 {
                btVector3 row0 = btVector3( 
-                       v0.x() * v1Transposed.x(),
-                       v0.x() * v1Transposed.y(),
-                       v0.x() * v1Transposed.z());
+                       v0.x() * v1.x(),
+                       v0.x() * v1.y(),
+                       v0.x() * v1.z());
                btVector3 row1 = btVector3( 
-                       v0.y() * v1Transposed.x(),
-                       v0.y() * v1Transposed.y(),
-                       v0.y() * v1Transposed.z());
+                       v0.y() * v1.x(),
+                       v0.y() * v1.y(),
+                       v0.y() * v1.z());
                btVector3 row2 = btVector3( 
-                       v0.z() * v1Transposed.x(),
-                       v0.z() * v1Transposed.y(),
-                       v0.z() * v1Transposed.z());
+                       v0.z() * v1.x(),
+                       v0.z() * v1.y(),
+                       v0.z() * v1.z());
 
         btMatrix3x3 m(row0[0],row0[1],row0[2],
                                                row1[0],row1[1],row1[2],
@@ -389,11 +636,14 @@ inline btMatrix3x3 vecMulVecTranspose(const btVector3& v0, const btVector3& v1Tr
                return m;
 }
 
+#define vecMulVecTranspose(v0, v1Transposed) outerProduct(v0, v1Transposed)
+//
 
-void btMultiBody::stepVelocities(btScalar dt,
+void btMultiBody::computeAccelerationsArticulatedBodyAlgorithmMultiDof(btScalar dt,
                                btAlignedObjectArray<btScalar> &scratch_r,
                                btAlignedObjectArray<btVector3> &scratch_v,
-                               btAlignedObjectArray<btMatrix3x3> &scratch_m)
+                               btAlignedObjectArray<btMatrix3x3> &scratch_m,
+                               bool isConstraintPass)
 {
     // Implement Featherstone's algorithm to calculate joint accelerations (q_double_dot)
     // and the base linear & angular accelerations.
@@ -405,6 +655,12 @@ void btMultiBody::stepVelocities(btScalar dt,
     // Format is: 3 angular accelerations (in world frame), 3 linear accelerations (in world frame),
     // num_links joint acceleration values.
     
+       // We added support for multi degree of freedom (multi dof) joints.
+       // In addition we also can compute the joint reaction forces. This is performed in a second pass,
+       // so that we can include the effect of the constraint solver forces (computed in the PGS LCP solver)
+
+       m_internalNeedsJointFeedback = false;
+
        int num_links = getNumLinks();
 
     const btScalar DAMPING_K1_LINEAR = m_linearDamping;
@@ -419,261 +675,513 @@ void btMultiBody::stepVelocities(btScalar dt,
     // Temporary matrices/vectors -- use scratch space from caller
     // so that we don't have to keep reallocating every frame
 
-    scratch_r.resize(2*num_links + 6);
+    scratch_r.resize(2*m_dofCount + 6);                                //multidof? ("Y"s use it and it is used to store qdd) => 2 x m_dofCount
     scratch_v.resize(8*num_links + 6);
     scratch_m.resize(4*num_links + 4);
 
-    btScalar * r_ptr = &scratch_r[0];
-    btScalar * output = &scratch_r[num_links];  // "output" holds the q_double_dot results
+       //btScalar * r_ptr = &scratch_r[0];
+    btScalar * output = &scratch_r[m_dofCount];  // "output" holds the q_double_dot results
     btVector3 * v_ptr = &scratch_v[0];
     
-    // vhat_i  (top = angular, bottom = linear part)
-    btVector3 * vel_top_angular = v_ptr; v_ptr += num_links + 1;
-    btVector3 * vel_bottom_linear = v_ptr; v_ptr += num_links + 1;
-
-    // zhat_i^A
-    btVector3 * zero_acc_top_angular = v_ptr; v_ptr += num_links + 1;
-    btVector3 * zero_acc_bottom_linear = v_ptr; v_ptr += num_links + 1;
-
-    // chat_i  (note NOT defined for the base)
-    btVector3 * coriolis_top_angular = v_ptr; v_ptr += num_links;
-    btVector3 * coriolis_bottom_linear = v_ptr; v_ptr += num_links;
-
-    // top left, top right and bottom left blocks of Ihat_i^A.
-    // bottom right block = transpose of top left block and is not stored.
-    // Note: the top right and bottom left blocks are always symmetric matrices, but we don't make use of this fact currently.
-    btMatrix3x3 * inertia_top_left = &scratch_m[num_links + 1];
-    btMatrix3x3 * inertia_top_right = &scratch_m[2*num_links + 2];
-    btMatrix3x3 * inertia_bottom_left = &scratch_m[3*num_links + 3];
+    // vhat_i  (top = angular, bottom = linear part)   
+       btSpatialMotionVector *spatVel = (btSpatialMotionVector *)v_ptr;
+       v_ptr += num_links * 2 + 2;
+       //
+    // zhat_i^A    
+       btSpatialForceVector * zeroAccSpatFrc = (btSpatialForceVector *)v_ptr;
+       v_ptr += num_links * 2 + 2;
+       //
+    // chat_i  (note NOT defined for the base)    
+       btSpatialMotionVector * spatCoriolisAcc = (btSpatialMotionVector *)v_ptr;
+       v_ptr += num_links * 2;
+       //
+    // Ihat_i^A.    
+       btSymmetricSpatialDyad * spatInertia = (btSymmetricSpatialDyad *)&scratch_m[num_links + 1];
 
     // Cached 3x3 rotation matrices from parent frame to this frame.
-    btMatrix3x3 * rot_from_parent = &matrix_buf[0];
+    btMatrix3x3 * rot_from_parent = &m_matrixBuf[0];
     btMatrix3x3 * rot_from_world = &scratch_m[0];
 
     // hhat_i, ahat_i
-    // hhat is NOT stored for the base (but ahat is)
-    btVector3 * h_top = num_links > 0 ? &vector_buf[0] : 0;
-    btVector3 * h_bottom = num_links > 0 ? &vector_buf[num_links] : 0;
-    btVector3 * accel_top = v_ptr; v_ptr += num_links + 1;
-    btVector3 * accel_bottom = v_ptr; v_ptr += num_links + 1;
-
-    // Y_i, D_i
-    btScalar * Y = r_ptr; r_ptr += num_links;
-    btScalar * D = num_links > 0 ? &m_real_buf[6 + num_links] : 0;
+    // hhat is NOT stored for the base (but ahat is)    
+       btSpatialForceVector * h = (btSpatialForceVector *)(m_dofCount > 0 ? &m_vectorBuf[0] : 0);
+       btSpatialMotionVector * spatAcc = (btSpatialMotionVector *)v_ptr;
+       v_ptr += num_links * 2 + 2;
+       //
+    // Y_i, invD_i
+    btScalar * invD = m_dofCount > 0 ? &m_realBuf[6 + m_dofCount] : 0;
+       btScalar * Y = &scratch_r[0];
+       //
+       //aux variables 
+       static btSpatialMotionVector spatJointVel;                                      //spatial velocity due to the joint motion (i.e. without predecessors' influence)
+       static btScalar D[36];                                                                          //"D" matrix; it's dofxdof for each body so asingle 6x6 D matrix will do        
+       static btScalar invD_times_Y[6];                                                        //D^{-1} * Y [dofxdof x dofx1 = dofx1] <=> D^{-1} * u; better moved to buffers since it is recalced in calcAccelerationDeltasMultiDof; num_dof of btScalar would cover all bodies       
+       static btSpatialMotionVector result;                                                    //holds results of the SolveImatrix op; it is a spatial motion vector (accel)
+       static btScalar Y_minus_hT_a[6];                                                        //Y - h^{T} * a; it's dofx1 for each body so a single 6x1 temp is enough        
+       static btSpatialForceVector spatForceVecTemps[6];                               //6 temporary spatial force vectors
+       static btSpatialTransformationMatrix fromParent;                                //spatial transform from parent to child
+       static btSymmetricSpatialDyad dyadTemp;                                         //inertia matrix temp
+       static btSpatialTransformationMatrix fromWorld;
+       fromWorld.m_trnVec.setZero();
+       /////////////////
 
     // ptr to the joint accel part of the output
     btScalar * joint_accel = output + 6;
 
-
     // Start of the algorithm proper.
     
     // First 'upward' loop.
     // Combines CompTreeLinkVelocities and InitTreeLinks from Mirtich.
 
-    rot_from_parent[0] = btMatrix3x3(base_quat);
+    rot_from_parent[0] = btMatrix3x3(m_baseQuat);                              //m_baseQuat assumed to be alias!?
 
-    vel_top_angular[0] = rot_from_parent[0] * base_omega;
-    vel_bottom_linear[0] = rot_from_parent[0] * base_vel;
-
-    if (fixed_base) {
-        zero_acc_top_angular[0] = zero_acc_bottom_linear[0] = btVector3(0,0,0);
-    } else {
-        zero_acc_top_angular[0] = - (rot_from_parent[0] * (base_force 
-                                                   - base_mass*(DAMPING_K1_LINEAR+DAMPING_K2_LINEAR*base_vel.norm())*base_vel));
-               
-        zero_acc_bottom_linear[0] =
-            - (rot_from_parent[0] * base_torque);
+       //create the vector of spatial velocity of the base by transforming global-coor linear and angular velocities into base-local coordinates
+       spatVel[0].setVector(rot_from_parent[0] * base_omega, rot_from_parent[0] * base_vel);
 
+    if (m_fixedBase) 
+       {        
+               zeroAccSpatFrc[0].setZero();
+    }
+       else 
+       {
+               btVector3 baseForce = isConstraintPass? m_baseConstraintForce : m_baseForce;
+               btVector3 baseTorque = isConstraintPass? m_baseConstraintTorque : m_baseTorque;
+               //external forces               
+               zeroAccSpatFrc[0].setVector(-(rot_from_parent[0] * baseTorque), -(rot_from_parent[0] * baseForce));     
+
+               //adding damping terms (only)
+               btScalar linDampMult = 1., angDampMult = 1.;
+               zeroAccSpatFrc[0].addVector(angDampMult * m_baseInertia * spatVel[0].getAngular() * (DAMPING_K1_ANGULAR + DAMPING_K2_ANGULAR * spatVel[0].getAngular().norm()),
+                                                                  linDampMult * m_baseMass * spatVel[0].getLinear() * (DAMPING_K1_LINEAR + DAMPING_K2_LINEAR * spatVel[0].getLinear().norm()));
+
+               //
+               //p += vhat x Ihat vhat - done in a simpler way
                if (m_useGyroTerm)
-                       zero_acc_bottom_linear[0]+=vel_top_angular[0].cross( base_inertia * vel_top_angular[0] );
+                       zeroAccSpatFrc[0].addAngular(spatVel[0].getAngular().cross(m_baseInertia * spatVel[0].getAngular()));
+               //
+               zeroAccSpatFrc[0].addLinear(m_baseMass * spatVel[0].getAngular().cross(spatVel[0].getLinear()));
+    }
 
-        zero_acc_bottom_linear[0] += base_inertia * vel_top_angular[0] * (DAMPING_K1_ANGULAR + DAMPING_K2_ANGULAR*vel_top_angular[0].norm());
 
-    }
+       //init the spatial AB inertia (it has the simple form thanks to choosing local body frames origins at their COMs)
+       spatInertia[0].setMatrix(       btMatrix3x3(0,0,0,0,0,0,0,0,0),
+                                                               //
+                                                               btMatrix3x3(m_baseMass, 0, 0,
+                                                                                       0, m_baseMass, 0,
+                                                                                       0, 0, m_baseMass),
+                                                               //
+                                                               btMatrix3x3(m_baseInertia[0], 0, 0,
+                                                                                       0, m_baseInertia[1], 0,
+                                                                                       0, 0, m_baseInertia[2])
+                                                       );
 
+    rot_from_world[0] = rot_from_parent[0];
 
+       //
+    for (int i = 0; i < num_links; ++i) {              
+        const int parent = m_links[i].m_parent;
+        rot_from_parent[i+1] = btMatrix3x3(m_links[i].m_cachedRotParentToThis);
+        rot_from_world[i+1] = rot_from_parent[i+1] * rot_from_world[parent+1];
 
-    inertia_top_left[0] = btMatrix3x3(0,0,0,0,0,0,0,0,0);//::Zero();
-       
-       
-    inertia_top_right[0].setValue(base_mass, 0, 0,
-                            0, base_mass, 0,
-                            0, 0, base_mass);
-    inertia_bottom_left[0].setValue(base_inertia[0], 0, 0,
-                              0, base_inertia[1], 0,
-                              0, 0, base_inertia[2]);
+               fromParent.m_rotMat = rot_from_parent[i+1]; fromParent.m_trnVec = m_links[i].m_cachedRVector;
+               fromWorld.m_rotMat = rot_from_world[i+1];
+               fromParent.transform(spatVel[parent+1], spatVel[i+1]);
 
-    rot_from_world[0] = rot_from_parent[0];
+               // now set vhat_i to its true value by doing
+        // vhat_i += qidot * shat_i                    
+               if(!m_useGlobalVelocities)
+               {
+                       spatJointVel.setZero();
 
-    for (int i = 0; i < num_links; ++i) {
-        const int parent = links[i].parent;
-        rot_from_parent[i+1] = btMatrix3x3(links[i].cached_rot_parent_to_this);
-               
+                       for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)            
+                               spatJointVel += m_links[i].m_axes[dof] * getJointVelMultiDof(i)[dof];
 
-        rot_from_world[i+1] = rot_from_parent[i+1] * rot_from_world[parent+1];
-        
-        // vhat_i = i_xhat_p(i) * vhat_p(i)
-        SpatialTransform(rot_from_parent[i+1], links[i].cached_r_vector,
-                         vel_top_angular[parent+1], vel_bottom_linear[parent+1],
-                         vel_top_angular[i+1], vel_bottom_linear[i+1]);
-
-        // we can now calculate chat_i
-        // remember vhat_i is really vhat_p(i) (but in current frame) at this point
-        coriolis_bottom_linear[i] = vel_top_angular[i+1].cross(vel_top_angular[i+1].cross(links[i].cached_r_vector))
-            + 2 * vel_top_angular[i+1].cross(links[i].axis_bottom) * getJointVel(i);
-        if (links[i].is_revolute) {
-            coriolis_top_angular[i] = vel_top_angular[i+1].cross(links[i].axis_top) * getJointVel(i);
-            coriolis_bottom_linear[i] += (getJointVel(i) * getJointVel(i)) * links[i].axis_top.cross(links[i].axis_bottom);
-        } else {
-            coriolis_top_angular[i] = btVector3(0,0,0);
-        }
-        
-        // now set vhat_i to its true value by doing
-        // vhat_i += qidot * shat_i
-        vel_top_angular[i+1] += getJointVel(i) * links[i].axis_top;
-        vel_bottom_linear[i+1] += getJointVel(i) * links[i].axis_bottom;
+                       // remember vhat_i is really vhat_p(i) (but in current frame) at this point     => we need to add velocity across the inboard joint
+                       spatVel[i+1] += spatJointVel;
 
-        // calculate zhat_i^A
-        zero_acc_top_angular[i+1] = - (rot_from_world[i+1] * (links[i].applied_force));
-        zero_acc_top_angular[i+1] += links[i].mass * (DAMPING_K1_LINEAR + DAMPING_K2_LINEAR*vel_bottom_linear[i+1].norm()) * vel_bottom_linear[i+1];
+                       //
+                       // vhat_i is vhat_p(i) transformed to local coors + the velocity across the i-th inboard joint
+                       //spatVel[i+1] = fromParent * spatVel[parent+1] + spatJointVel;
 
-        zero_acc_bottom_linear[i+1] =
-            - (rot_from_world[i+1] * links[i].applied_torque);
-               if (m_useGyroTerm)
+               }
+               else
                {
-                       zero_acc_bottom_linear[i+1] += vel_top_angular[i+1].cross( links[i].inertia * vel_top_angular[i+1] );
+                       fromWorld.transformRotationOnly(m_links[i].m_absFrameTotVelocity, spatVel[i+1]);
+                       fromWorld.transformRotationOnly(m_links[i].m_absFrameLocVelocity, spatJointVel);
                }
 
-        zero_acc_bottom_linear[i+1] += links[i].inertia * vel_top_angular[i+1] * (DAMPING_K1_ANGULAR + DAMPING_K2_ANGULAR*vel_top_angular[i+1].norm());
+               // we can now calculate chat_i          
+               spatVel[i+1].cross(spatJointVel, spatCoriolisAcc[i]);           
 
-        // calculate Ihat_i^A
-        inertia_top_left[i+1] = btMatrix3x3(0,0,0,0,0,0,0,0,0);//::Zero();
-        inertia_top_right[i+1].setValue(links[i].mass, 0, 0,
-                                  0, links[i].mass, 0,
-                                  0, 0, links[i].mass);
-        inertia_bottom_left[i+1].setValue(links[i].inertia[0], 0, 0,
-                                    0, links[i].inertia[1], 0,
-                                    0, 0, links[i].inertia[2]);
-    }
+        // calculate zhat_i^A
+               //
+               //external forces               
+               btVector3 linkAppliedForce = isConstraintPass? m_links[i].m_appliedConstraintForce : m_links[i].m_appliedForce;
+               btVector3 linkAppliedTorque =isConstraintPass ? m_links[i].m_appliedConstraintTorque : m_links[i].m_appliedTorque;
+               zeroAccSpatFrc[i+1].setVector(-(rot_from_world[i+1] * linkAppliedTorque), -(rot_from_world[i+1] * linkAppliedForce ));
+       
+#if 0  
+               {
 
+                       b3Printf("stepVelocitiesMultiDof zeroAccSpatFrc[%d] linear:%f,%f,%f, angular:%f,%f,%f",
+                       i+1,
+                       zeroAccSpatFrc[i+1].m_topVec[0],
+                       zeroAccSpatFrc[i+1].m_topVec[1],
+                       zeroAccSpatFrc[i+1].m_topVec[2],
 
+                       zeroAccSpatFrc[i+1].m_bottomVec[0],
+                       zeroAccSpatFrc[i+1].m_bottomVec[1],
+                       zeroAccSpatFrc[i+1].m_bottomVec[2]);
+               }
+#endif
+               //
+               //adding damping terms (only)
+               btScalar linDampMult = 1., angDampMult = 1.;
+               zeroAccSpatFrc[i+1].addVector(angDampMult * m_links[i].m_inertiaLocal * spatVel[i+1].getAngular() * (DAMPING_K1_ANGULAR + DAMPING_K2_ANGULAR * spatVel[i+1].getAngular().norm()),
+                                                                        linDampMult * m_links[i].m_mass * spatVel[i+1].getLinear() * (DAMPING_K1_LINEAR + DAMPING_K2_LINEAR * spatVel[i+1].getLinear().norm()));
+               
+        // calculate Ihat_i^A
+               //init the spatial AB inertia (it has the simple form thanks to choosing local body frames origins at their COMs)
+               spatInertia[i+1].setMatrix(     btMatrix3x3(0,0,0,0,0,0,0,0,0),
+                                                                       //
+                                                                       btMatrix3x3(m_links[i].m_mass, 0, 0,
+                                                                                               0, m_links[i].m_mass, 0,
+                                                                                               0, 0, m_links[i].m_mass),
+                                                                       //
+                                                                       btMatrix3x3(m_links[i].m_inertiaLocal[0], 0, 0,
+                                                                                               0, m_links[i].m_inertiaLocal[1], 0,
+                                                                                               0, 0, m_links[i].m_inertiaLocal[2])
+                                                               );
+               //
+               //p += vhat x Ihat vhat - done in a simpler way
+               if(m_useGyroTerm)
+                       zeroAccSpatFrc[i+1].addAngular(spatVel[i+1].getAngular().cross(m_links[i].m_inertiaLocal * spatVel[i+1].getAngular()));                 
+               //              
+               zeroAccSpatFrc[i+1].addLinear(m_links[i].m_mass * spatVel[i+1].getAngular().cross(spatVel[i+1].getLinear()));
+               //btVector3 temp = m_links[i].m_mass * spatVel[i+1].getAngular().cross(spatVel[i+1].getLinear());
+               ////clamp parent's omega
+               //btScalar parOmegaMod = temp.length();
+               //btScalar parOmegaModMax = 1000;
+               //if(parOmegaMod > parOmegaModMax)
+               //      temp *= parOmegaModMax / parOmegaMod;
+               //zeroAccSpatFrc[i+1].addLinear(temp);
+               //printf("|zeroAccSpatFrc[%d]| = %.4f\n", i+1, temp.length());
+               //temp = spatCoriolisAcc[i].getLinear();
+               //printf("|spatCoriolisAcc[%d]| = %.4f\n", i+1, temp.length());
+               
+               
+
+               //printf("w[%d] = [%.4f %.4f %.4f]\n", i, vel_top_angular[i+1].x(), vel_top_angular[i+1].y(), vel_top_angular[i+1].z());
+               //printf("v[%d] = [%.4f %.4f %.4f]\n", i, vel_bottom_linear[i+1].x(), vel_bottom_linear[i+1].y(), vel_bottom_linear[i+1].z());          
+               //printf("c[%d] = [%.4f %.4f %.4f]\n", i, coriolis_bottom_linear[i].x(), coriolis_bottom_linear[i].y(), coriolis_bottom_linear[i].z());
+    }
+       
     // 'Downward' loop.
     // (part of TreeForwardDynamics in Mirtich.)
-    for (int i = num_links - 1; i >= 0; --i) {
-
-        h_top[i] = inertia_top_left[i+1] * links[i].axis_top + inertia_top_right[i+1] * links[i].axis_bottom;
-        h_bottom[i] = inertia_bottom_left[i+1] * links[i].axis_top + inertia_top_left[i+1].transpose() * links[i].axis_bottom;
-               btScalar val = SpatialDotProduct(links[i].axis_top, links[i].axis_bottom, h_top[i], h_bottom[i]);
-        D[i] = val;
-               Y[i] = links[i].joint_torque
-            - SpatialDotProduct(links[i].axis_top, links[i].axis_bottom, zero_acc_top_angular[i+1], zero_acc_bottom_linear[i+1])
-            - SpatialDotProduct(h_top[i], h_bottom[i], coriolis_top_angular[i], coriolis_bottom_linear[i]);
+    for (int i = num_links - 1; i >= 0; --i)
+       {
+               const int parent = m_links[i].m_parent;
+               fromParent.m_rotMat = rot_from_parent[i+1]; fromParent.m_trnVec = m_links[i].m_cachedRVector;
 
-        const int parent = links[i].parent;
+               for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)
+               {
+                       btSpatialForceVector &hDof = h[m_links[i].m_dofOffset + dof];
+                       //
+                       hDof = spatInertia[i+1] * m_links[i].m_axes[dof];
+                       //
+                       Y[m_links[i].m_dofOffset + dof] = m_links[i].m_jointTorque[dof]
+                       - m_links[i].m_axes[dof].dot(zeroAccSpatFrc[i+1])
+                       - spatCoriolisAcc[i].dot(hDof)
+                       ;
+               }
 
-        
-        // Ip += pXi * (Ii - hi hi' / Di) * iXp
-        const btScalar one_over_di = 1.0f / D[i];
+               for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)
+               {
+                       btScalar *D_row = &D[dof * m_links[i].m_dofCount];                      
+                       for(int dof2 = 0; dof2 < m_links[i].m_dofCount; ++dof2)
+                       {
+                               btSpatialForceVector &hDof2 = h[m_links[i].m_dofOffset + dof2];
+                               D_row[dof2] = m_links[i].m_axes[dof].dot(hDof2);
+                       }
+               }
 
-               
+        btScalar *invDi = &invD[m_links[i].m_dofOffset*m_links[i].m_dofOffset];
+               switch(m_links[i].m_jointType)
+               {
+                       case btMultibodyLink::ePrismatic:
+                       case btMultibodyLink::eRevolute:
+                       {
+                               invDi[0] = 1.0f / D[0];
+                               break;
+                       }
+                       case btMultibodyLink::eSpherical:
+                       case btMultibodyLink::ePlanar:
+                       {
+                               static btMatrix3x3 D3x3; D3x3.setValue(D[0], D[1], D[2], D[3], D[4], D[5], D[6], D[7], D[8]);
+                               static btMatrix3x3 invD3x3; invD3x3 = D3x3.inverse();
+
+                               //unroll the loop?
+                               for(int row = 0; row < 3; ++row)
+                               {
+                                       for(int col = 0; col < 3; ++col)
+                                       {                                               
+                                               invDi[row * 3 + col] = invD3x3[row][col];
+                                       }
+                               }
+
+                               break;
+                       }
+                       default:
+                       {
+                       
+                       }
+               }
 
+               //determine h*D^{-1}
+               for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)
+               {
+                       spatForceVecTemps[dof].setZero();
+
+                       for(int dof2 = 0; dof2 < m_links[i].m_dofCount; ++dof2)
+                       {                               
+                               btSpatialForceVector &hDof2 = h[m_links[i].m_dofOffset + dof2];
+                               //                              
+                               spatForceVecTemps[dof] += hDof2 * invDi[dof2 * m_links[i].m_dofCount + dof];
+                       }
+               }
 
-               const btMatrix3x3 TL = inertia_top_left[i+1]   - vecMulVecTranspose(one_over_di * h_top[i] , h_bottom[i]);
-        const btMatrix3x3 TR = inertia_top_right[i+1]  - vecMulVecTranspose(one_over_di * h_top[i] , h_top[i]);
-        const btMatrix3x3 BL = inertia_bottom_left[i+1]- vecMulVecTranspose(one_over_di * h_bottom[i] , h_bottom[i]);
+               dyadTemp = spatInertia[i+1];
 
+               //determine (h*D^{-1}) * h^{T}
+               for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)
+               {                       
+                       btSpatialForceVector &hDof = h[m_links[i].m_dofOffset + dof];
+                       //
+                       dyadTemp -= symmetricSpatialOuterProduct(hDof, spatForceVecTemps[dof]);
+               }
 
-        btMatrix3x3 r_cross;
-        r_cross.setValue(
-            0, -links[i].cached_r_vector[2], links[i].cached_r_vector[1],
-            links[i].cached_r_vector[2], 0, -links[i].cached_r_vector[0],
-            -links[i].cached_r_vector[1], links[i].cached_r_vector[0], 0);
+               fromParent.transformInverse(dyadTemp, spatInertia[parent+1], btSpatialTransformationMatrix::Add);
         
-        inertia_top_left[parent+1] += rot_from_parent[i+1].transpose() * ( TL - TR * r_cross ) * rot_from_parent[i+1];
-        inertia_top_right[parent+1] += rot_from_parent[i+1].transpose() * TR * rot_from_parent[i+1];
-        inertia_bottom_left[parent+1] += rot_from_parent[i+1].transpose() *
-            (r_cross * (TL - TR * r_cross) + BL - TL.transpose() * r_cross) * rot_from_parent[i+1];
-        
-        
-        // Zp += pXi * (Zi + Ii*ci + hi*Yi/Di)
-        btVector3 in_top, in_bottom, out_top, out_bottom;
-        const btScalar Y_over_D = Y[i] * one_over_di;
-        in_top = zero_acc_top_angular[i+1]
-            + inertia_top_left[i+1] * coriolis_top_angular[i]
-            + inertia_top_right[i+1] * coriolis_bottom_linear[i]
-            + Y_over_D * h_top[i];
-        in_bottom = zero_acc_bottom_linear[i+1]
-            + inertia_bottom_left[i+1] * coriolis_top_angular[i]
-            + inertia_top_left[i+1].transpose() * coriolis_bottom_linear[i]
-            + Y_over_D * h_bottom[i];
-        InverseSpatialTransform(rot_from_parent[i+1], links[i].cached_r_vector,
-                                in_top, in_bottom, out_top, out_bottom);
-        zero_acc_top_angular[parent+1] += out_top;
-        zero_acc_bottom_linear[parent+1] += out_bottom;
+               for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)
+               {
+                       invD_times_Y[dof] = 0.f;
+
+                       for(int dof2 = 0; dof2 < m_links[i].m_dofCount; ++dof2)
+                       {
+                               invD_times_Y[dof] += invDi[dof * m_links[i].m_dofCount + dof2] * Y[m_links[i].m_dofOffset + dof2];                              
+                       }       
+               }
+               
+               spatForceVecTemps[0] = zeroAccSpatFrc[i+1] + spatInertia[i+1] * spatCoriolisAcc[i];             
+
+               for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)
+               {                               
+                       btSpatialForceVector &hDof = h[m_links[i].m_dofOffset + dof];
+                       //
+                       spatForceVecTemps[0] += hDof * invD_times_Y[dof];                       
+               }
+               
+               fromParent.transformInverse(spatForceVecTemps[0], spatForceVecTemps[1]);
+               
+               zeroAccSpatFrc[parent+1] += spatForceVecTemps[1];
     }
 
 
     // Second 'upward' loop
     // (part of TreeForwardDynamics in Mirtich)
 
-    if (fixed_base) 
+    if (m_fixedBase) 
        {
-        accel_top[0] = accel_bottom[0] = btVector3(0,0,0);
+        spatAcc[0].setZero();
     } 
        else 
        {
         if (num_links > 0) 
                {
-            //Matrix<btScalar, 6, 6> Imatrix;
-            //Imatrix.block<3,3>(0,0) = inertia_top_left[0];
-            //Imatrix.block<3,3>(3,0) = inertia_bottom_left[0];
-            //Imatrix.block<3,3>(0,3) = inertia_top_right[0];
-            //Imatrix.block<3,3>(3,3) = inertia_top_left[0].transpose();
-            //cached_imatrix_lu.reset(new Eigen::LU<Matrix<btScalar, 6, 6> >(Imatrix));  // TODO: Avoid memory allocation here?
-
-                       cached_inertia_top_left = inertia_top_left[0];
-                       cached_inertia_top_right = inertia_top_right[0];
-                       cached_inertia_lower_left = inertia_bottom_left[0];
-                       cached_inertia_lower_right= inertia_top_left[0].transpose();
-
-        }
-               btVector3 rhs_top (zero_acc_top_angular[0][0], zero_acc_top_angular[0][1], zero_acc_top_angular[0][2]);
-               btVector3 rhs_bot (zero_acc_bottom_linear[0][0], zero_acc_bottom_linear[0][1], zero_acc_bottom_linear[0][2]);
-        float result[6];
-
-               solveImatrix(rhs_top, rhs_bot, result);
-//             printf("result=%f,%f,%f,%f,%f,%f\n",result[0],result[0],result[0],result[0],result[0],result[0]);
-        for (int i = 0; i < 3; ++i) {
-            accel_top[0][i] = -result[i];
-            accel_bottom[0][i] = -result[i+3];
-        }
+                       m_cachedInertiaTopLeft = spatInertia[0].m_topLeftMat;
+                       m_cachedInertiaTopRight = spatInertia[0].m_topRightMat;
+                       m_cachedInertiaLowerLeft = spatInertia[0].m_bottomLeftMat;
+                       m_cachedInertiaLowerRight= spatInertia[0].m_topLeftMat.transpose();
 
+        }              
+               
+               solveImatrix(zeroAccSpatFrc[0], result);
+               spatAcc[0] = -result;
     }
+       
+       
+    // now do the loop over the m_links
+    for (int i = 0; i < num_links; ++i) 
+       {
+               //      qdd = D^{-1} * (Y - h^{T}*apar) = (S^{T}*I*S)^{-1} * (tau - S^{T}*I*cor - S^{T}*zeroAccFrc - S^{T}*I*apar)
+               //      a = apar + cor + Sqdd
+               //or
+               //      qdd = D^{-1} * (Y - h^{T}*(apar+cor))
+               //      a = apar + Sqdd
+
+        const int parent = m_links[i].m_parent;
+               fromParent.m_rotMat = rot_from_parent[i+1]; fromParent.m_trnVec = m_links[i].m_cachedRVector;
+
+               fromParent.transform(spatAcc[parent+1], spatAcc[i+1]);
+               
+               for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)
+               {
+                       btSpatialForceVector &hDof = h[m_links[i].m_dofOffset + dof];
+                       //                      
+                       Y_minus_hT_a[dof] = Y[m_links[i].m_dofOffset + dof] - spatAcc[i+1].dot(hDof);                   
+               }
+
+               btScalar *invDi = &invD[m_links[i].m_dofOffset*m_links[i].m_dofOffset];
+               //D^{-1} * (Y - h^{T}*apar)
+               mulMatrix(invDi, Y_minus_hT_a, m_links[i].m_dofCount, m_links[i].m_dofCount, m_links[i].m_dofCount, 1, &joint_accel[m_links[i].m_dofOffset]);
+
+               spatAcc[i+1] += spatCoriolisAcc[i];             
+
+               for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)            
+                       spatAcc[i+1] += m_links[i].m_axes[dof] * joint_accel[m_links[i].m_dofOffset + dof];
+
+               if (m_links[i].m_jointFeedback)
+               {
+                       m_internalNeedsJointFeedback = true;
+
+                       btVector3 angularBotVec = (spatInertia[i+1]*spatAcc[i+1]+zeroAccSpatFrc[i+1]).m_bottomVec;
+                       btVector3 linearTopVec = (spatInertia[i+1]*spatAcc[i+1]+zeroAccSpatFrc[i+1]).m_topVec;
+
+                       if (gJointFeedbackInJointFrame)
+                       {
+                               //shift the reaction forces to the joint frame
+                               //linear (force) component is the same
+                               //shift the angular (torque, moment) component using the relative position,  m_links[i].m_dVector
+                                angularBotVec = angularBotVec - linearTopVec.cross(m_links[i].m_dVector);
+                       }
+                       
+
+                       if (gJointFeedbackInWorldSpace)
+                       {
+                               if (isConstraintPass)
+                               {
+ m_links[i].m_jointFeedback->m_reactionForces.m_bottomVec += m_links[i].m_cachedWorldTransform.getBasis()*angularBotVec;
+                                        m_links[i].m_jointFeedback->m_reactionForces.m_topVec += m_links[i].m_cachedWorldTransform.getBasis()*linearTopVec;
+                               } else
+                               {
+                                       m_links[i].m_jointFeedback->m_reactionForces.m_bottomVec = m_links[i].m_cachedWorldTransform.getBasis()*angularBotVec;
+                                       m_links[i].m_jointFeedback->m_reactionForces.m_topVec = m_links[i].m_cachedWorldTransform.getBasis()*linearTopVec;
+                               }
+                       } else
+                       {
+                               if (isConstraintPass)
+                               {
+                                         m_links[i].m_jointFeedback->m_reactionForces.m_bottomVec += angularBotVec;                        
+                                m_links[i].m_jointFeedback->m_reactionForces.m_topVec += linearTopVec;
+
+                               }
+                               else
+                               {
+                               m_links[i].m_jointFeedback->m_reactionForces.m_bottomVec = angularBotVec;
+                               m_links[i].m_jointFeedback->m_reactionForces.m_topVec = linearTopVec;
+                               }               
+                       }       
+       }
 
-    // now do the loop over the links
-    for (int i = 0; i < num_links; ++i) {
-        const int parent = links[i].parent;
-        SpatialTransform(rot_from_parent[i+1], links[i].cached_r_vector,
-                         accel_top[parent+1], accel_bottom[parent+1],
-                         accel_top[i+1], accel_bottom[i+1]);
-        joint_accel[i] = (Y[i] - SpatialDotProduct(h_top[i], h_bottom[i], accel_top[i+1], accel_bottom[i+1])) / D[i];
-        accel_top[i+1] += coriolis_top_angular[i] + joint_accel[i] * links[i].axis_top;
-        accel_bottom[i+1] += coriolis_bottom_linear[i] + joint_accel[i] * links[i].axis_bottom;
     }
 
     // transform base accelerations back to the world frame.
-    btVector3 omegadot_out = rot_from_parent[0].transpose() * accel_top[0];
+    btVector3 omegadot_out = rot_from_parent[0].transpose() * spatAcc[0].getAngular();
        output[0] = omegadot_out[0];
        output[1] = omegadot_out[1];
        output[2] = omegadot_out[2];
 
-    btVector3 vdot_out = rot_from_parent[0].transpose() * accel_bottom[0];
+    btVector3 vdot_out = rot_from_parent[0].transpose() * (spatAcc[0].getLinear() + spatVel[0].getAngular().cross(spatVel[0].getLinear()));
        output[3] = vdot_out[0];
        output[4] = vdot_out[1];
        output[5] = vdot_out[2];
+
+       /////////////////
+       //printf("q = [");
+       //printf("%.6f, %.6f, %.6f, %.6f, %.6f, %.6f, %.6f ", m_baseQuat.x(), m_baseQuat.y(), m_baseQuat.z(), m_baseQuat.w(), m_basePos.x(), m_basePos.y(), m_basePos.z());
+       //for(int link = 0; link < getNumLinks(); ++link)
+       //      for(int dof = 0; dof < m_links[link].m_dofCount; ++dof)
+       //              printf("%.6f ", m_links[link].m_jointPos[dof]);
+       //printf("]\n");
+       ////
+       //printf("qd = [");
+       //for(int dof = 0; dof < getNumDofs() + 6; ++dof)
+       //      printf("%.6f ", m_realBuf[dof]);
+       //printf("]\n");
+       //printf("qdd = [");
+       //for(int dof = 0; dof < getNumDofs() + 6; ++dof)
+       //      printf("%.6f ", output[dof]);
+       //printf("]\n");
+       /////////////////
+
     // Final step: add the accelerations (times dt) to the velocities.
-    applyDeltaVee(output, dt);
 
+       if (!isConstraintPass)
+       {
+       if(dt > 0.)
+               applyDeltaVeeMultiDof(output, dt);
+
+       }
+       /////
+       //btScalar angularThres = 1;
+       //btScalar maxAngVel = 0.;              
+       //bool scaleDown = 1.;
+       //for(int link = 0; link < m_links.size(); ++link)
+       //{             
+       //      if(spatVel[link+1].getAngular().length() > maxAngVel)
+       //      {
+       //              maxAngVel = spatVel[link+1].getAngular().length();
+       //              scaleDown = angularThres / spatVel[link+1].getAngular().length();
+       //              break;
+       //      }               
+       //}
+
+       //if(scaleDown != 1.)
+       //{
+       //      for(int link = 0; link < m_links.size(); ++link)
+       //      {
+       //              if(m_links[link].m_jointType == btMultibodyLink::eRevolute || m_links[link].m_jointType == btMultibodyLink::eSpherical)
+       //              {
+       //                      for(int dof = 0; dof < m_links[link].m_dofCount; ++dof)
+       //                              getJointVelMultiDof(link)[dof] *= scaleDown;
+       //              }
+       //      }
+       //}
+       /////
+
+       /////////////////////
+       if(m_useGlobalVelocities)
+       {
+               for (int i = 0; i < num_links; ++i) 
+               {
+                       const int parent = m_links[i].m_parent;
+                       //rot_from_parent[i+1] = btMatrix3x3(m_links[i].m_cachedRotParentToThis);    /// <- done
+                       //rot_from_world[i+1] = rot_from_parent[i+1] * rot_from_world[parent+1];                /// <- done
+               
+                       fromParent.m_rotMat = rot_from_parent[i+1]; fromParent.m_trnVec = m_links[i].m_cachedRVector;
+                       fromWorld.m_rotMat = rot_from_world[i+1];                       
+        
+                       // vhat_i = i_xhat_p(i) * vhat_p(i)             
+                       fromParent.transform(spatVel[parent+1], spatVel[i+1]);
+                       //nice alternative below (using operator *) but it generates temps
+                       /////////////////////////////////////////////////////////////
+
+                       // now set vhat_i to its true value by doing
+                       // vhat_i += qidot * shat_i                     
+                       spatJointVel.setZero();
+
+                       for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)            
+                               spatJointVel += m_links[i].m_axes[dof] * getJointVelMultiDof(i)[dof];
+               
+                       // remember vhat_i is really vhat_p(i) (but in current frame) at this point     => we need to add velocity across the inboard joint
+                       spatVel[i+1] += spatJointVel;
+
+
+                       fromWorld.transformInverseRotationOnly(spatVel[i+1], m_links[i].m_absFrameTotVelocity);
+                       fromWorld.transformInverseRotationOnly(spatJointVel, m_links[i].m_absFrameLocVelocity);
+               }
+       }
        
 }
 
@@ -685,24 +1193,24 @@ void btMultiBody::solveImatrix(const btVector3& rhs_top, const btVector3& rhs_bo
        ///solve I * x = rhs, so the result = invI * rhs
     if (num_links == 0) 
        {
-               // in the case of 0 links (i.e. a plain rigid body, not a multibody) rhs * invI is easier
-        result[0] = rhs_bot[0] / base_inertia[0];
-        result[1] = rhs_bot[1] / base_inertia[1];
-        result[2] = rhs_bot[2] / base_inertia[2];
-        result[3] = rhs_top[0] / base_mass;
-        result[4] = rhs_top[1] / base_mass;
-        result[5] = rhs_top[2] / base_mass;
+               // in the case of 0 m_links (i.e. a plain rigid body, not a multibody) rhs * invI is easier
+        result[0] = rhs_bot[0] / m_baseInertia[0];
+        result[1] = rhs_bot[1] / m_baseInertia[1];
+        result[2] = rhs_bot[2] / m_baseInertia[2];
+        result[3] = rhs_top[0] / m_baseMass;
+        result[4] = rhs_top[1] / m_baseMass;
+        result[5] = rhs_top[2] / m_baseMass;
     } else 
        {
                /// Special routine for calculating the inverse of a spatial inertia matrix
                ///the 6x6 matrix is stored as 4 blocks of 3x3 matrices
-               btMatrix3x3 Binv = cached_inertia_top_right.inverse()*-1.f;
-               btMatrix3x3 tmp = cached_inertia_lower_right * Binv;
-               btMatrix3x3 invIupper_right = (tmp * cached_inertia_top_left + cached_inertia_lower_left).inverse();
-               tmp = invIupper_right * cached_inertia_lower_right;
+               btMatrix3x3 Binv = m_cachedInertiaTopRight.inverse()*-1.f;
+               btMatrix3x3 tmp = m_cachedInertiaLowerRight * Binv;
+               btMatrix3x3 invIupper_right = (tmp * m_cachedInertiaTopLeft + m_cachedInertiaLowerLeft).inverse();
+               tmp = invIupper_right * m_cachedInertiaLowerRight;
                btMatrix3x3 invI_upper_left = (tmp * Binv);
                btMatrix3x3 invI_lower_right = (invI_upper_left).transpose();
-               tmp = cached_inertia_top_left  * invI_upper_left;
+               tmp = m_cachedInertiaTopLeft  * invI_upper_left;
                tmp[0][0]-= 1.0;
                tmp[1][1]-= 1.0;
                tmp[2][2]-= 1.0;
@@ -727,171 +1235,363 @@ void btMultiBody::solveImatrix(const btVector3& rhs_top, const btVector3& rhs_bo
                
     }
 }
+void btMultiBody::solveImatrix(const btSpatialForceVector &rhs, btSpatialMotionVector &result) const
+{
+       int num_links = getNumLinks();
+       ///solve I * x = rhs, so the result = invI * rhs
+    if (num_links == 0) 
+       {
+               // in the case of 0 m_links (i.e. a plain rigid body, not a multibody) rhs * invI is easier
+               result.setAngular(rhs.getAngular() / m_baseInertia);
+               result.setLinear(rhs.getLinear() / m_baseMass);         
+    } else 
+       {
+               /// Special routine for calculating the inverse of a spatial inertia matrix
+               ///the 6x6 matrix is stored as 4 blocks of 3x3 matrices
+               btMatrix3x3 Binv = m_cachedInertiaTopRight.inverse()*-1.f;
+               btMatrix3x3 tmp = m_cachedInertiaLowerRight * Binv;
+               btMatrix3x3 invIupper_right = (tmp * m_cachedInertiaTopLeft + m_cachedInertiaLowerLeft).inverse();
+               tmp = invIupper_right * m_cachedInertiaLowerRight;
+               btMatrix3x3 invI_upper_left = (tmp * Binv);
+               btMatrix3x3 invI_lower_right = (invI_upper_left).transpose();
+               tmp = m_cachedInertiaTopLeft  * invI_upper_left;
+               tmp[0][0]-= 1.0;
+               tmp[1][1]-= 1.0;
+               tmp[2][2]-= 1.0;
+               btMatrix3x3 invI_lower_left = (Binv * tmp);
+
+               //multiply result = invI * rhs
+               {
+                 btVector3 vtop = invI_upper_left*rhs.getLinear();
+                 btVector3 tmp;
+                 tmp = invIupper_right * rhs.getAngular();
+                 vtop += tmp;
+                 btVector3 vbot = invI_lower_left*rhs.getLinear();
+                 tmp = invI_lower_right * rhs.getAngular();
+                 vbot += tmp;
+                 result.setVector(vtop, vbot);           
+               }
+               
+    }
+}
 
+void btMultiBody::mulMatrix(btScalar *pA, btScalar *pB, int rowsA, int colsA, int rowsB, int colsB, btScalar *pC) const
+{
+       for (int row = 0; row < rowsA; row++)
+       {
+               for (int col = 0; col < colsB; col++)
+               {
+                       pC[row * colsB + col] = 0.f;
+                       for (int inner = 0; inner < rowsB; inner++)
+                       {
+                               pC[row * colsB + col] += pA[row * colsA + inner] * pB[col + inner * colsB];
+                       }
+               }
+       }
+}
 
-void btMultiBody::calcAccelerationDeltas(const btScalar *force, btScalar *output,
+void btMultiBody::calcAccelerationDeltasMultiDof(const btScalar *force, btScalar *output,
                                        btAlignedObjectArray<btScalar> &scratch_r, btAlignedObjectArray<btVector3> &scratch_v) const
 {
     // Temporary matrices/vectors -- use scratch space from caller
     // so that we don't have to keep reallocating every frame
-       int num_links = getNumLinks();
-    scratch_r.resize(num_links);
-    scratch_v.resize(4*num_links + 4);
 
-    btScalar * r_ptr = num_links == 0 ? 0 : &scratch_r[0];
+       
+       int num_links = getNumLinks();  
+    scratch_r.resize(m_dofCount);
+    scratch_v.resize(4*num_links + 4);     
+
+    btScalar * r_ptr = m_dofCount ? &scratch_r[0] : 0;
     btVector3 * v_ptr = &scratch_v[0];
-    
+
     // zhat_i^A (scratch space)
-    btVector3 * zero_acc_top_angular = v_ptr; v_ptr += num_links + 1;
-    btVector3 * zero_acc_bottom_linear = v_ptr; v_ptr += num_links + 1;
+    btSpatialForceVector * zeroAccSpatFrc = (btSpatialForceVector *)v_ptr;
+       v_ptr += num_links * 2 + 2;
 
     // rot_from_parent (cached from calcAccelerations)
-    const btMatrix3x3 * rot_from_parent = &matrix_buf[0];
+    const btMatrix3x3 * rot_from_parent = &m_matrixBuf[0];
 
     // hhat (cached), accel (scratch)
-    const btVector3 * h_top = num_links > 0 ? &vector_buf[0] : 0;
-    const btVector3 * h_bottom = num_links > 0 ? &vector_buf[num_links] : 0;
-    btVector3 * accel_top = v_ptr; v_ptr += num_links + 1;
-    btVector3 * accel_bottom = v_ptr; v_ptr += num_links + 1;
-
-    // Y_i (scratch), D_i (cached)
-    btScalar * Y = r_ptr; r_ptr += num_links;
-    const btScalar * D = num_links > 0 ? &m_real_buf[6 + num_links] : 0;
+    // hhat is NOT stored for the base (but ahat is) 
+       const btSpatialForceVector * h = (btSpatialForceVector *)(m_dofCount > 0 ? &m_vectorBuf[0] : 0);
+       btSpatialMotionVector * spatAcc = (btSpatialMotionVector *)v_ptr;
+       v_ptr += num_links * 2 + 2;
+
+    // Y_i (scratch), invD_i (cached)
+    const btScalar * invD = m_dofCount > 0 ? &m_realBuf[6 + m_dofCount] : 0;
+       btScalar * Y = r_ptr; 
+       ////////////////
+       //aux variables
+       static btScalar invD_times_Y[6];                                                        //D^{-1} * Y [dofxdof x dofx1 = dofx1] <=> D^{-1} * u; better moved to buffers since it is recalced in calcAccelerationDeltasMultiDof; num_dof of btScalar would cover all bodies
+       static btSpatialMotionVector result;                                                    //holds results of the SolveImatrix op; it is a spatial motion vector (accel)
+       static btScalar Y_minus_hT_a[6];                                                        //Y - h^{T} * a; it's dofx1 for each body so a single 6x1 temp is enough        
+       static btSpatialForceVector spatForceVecTemps[6];                               //6 temporary spatial force vectors
+       static btSpatialTransformationMatrix fromParent;        
+       /////////////////
 
-    btAssert(num_links == 0 || r_ptr - &scratch_r[0] == scratch_r.size());
-    btAssert(v_ptr - &scratch_v[0] == scratch_v.size());
-
-
-    
     // First 'upward' loop.
     // Combines CompTreeLinkVelocities and InitTreeLinks from Mirtich.
-
-    btVector3 input_force(force[3],force[4],force[5]);
-    btVector3 input_torque(force[0],force[1],force[2]);
-    
-    // Fill in zero_acc
+       
+       // Fill in zero_acc
     // -- set to force/torque on the base, zero otherwise
-    if (fixed_base) 
+    if (m_fixedBase) 
        {
-        zero_acc_top_angular[0] = zero_acc_bottom_linear[0] = btVector3(0,0,0);
+        zeroAccSpatFrc[0].setZero();
     } else 
-       {
-        zero_acc_top_angular[0] = - (rot_from_parent[0] * input_force);
-        zero_acc_bottom_linear[0] =  - (rot_from_parent[0] * input_torque);
+       {       
+               //test forces
+               fromParent.m_rotMat = rot_from_parent[0];
+               fromParent.transformRotationOnly(btSpatialForceVector(-force[0],-force[1],-force[2], -force[3],-force[4],-force[5]), zeroAccSpatFrc[0]);
     }
     for (int i = 0; i < num_links; ++i) 
        {
-        zero_acc_top_angular[i+1] = zero_acc_bottom_linear[i+1] = btVector3(0,0,0);
-    }
+               zeroAccSpatFrc[i+1].setZero();
+    }    
 
-    // 'Downward' loop.
-    for (int i = num_links - 1; i >= 0; --i) 
+       // 'Downward' loop.
+    // (part of TreeForwardDynamics in Mirtich.)
+    for (int i = num_links - 1; i >= 0; --i)
        {
+               const int parent = m_links[i].m_parent;
+               fromParent.m_rotMat = rot_from_parent[i+1]; fromParent.m_trnVec = m_links[i].m_cachedRVector;
 
-        Y[i] = - SpatialDotProduct(links[i].axis_top, links[i].axis_bottom, zero_acc_top_angular[i+1], zero_acc_bottom_linear[i+1]);
-        Y[i] += force[6 + i];  // add joint torque
-        
-        const int parent = links[i].parent;
-        
-        // Zp += pXi * (Zi + hi*Yi/Di)
-        btVector3 in_top, in_bottom, out_top, out_bottom;
-        const btScalar Y_over_D = Y[i] / D[i];
-        in_top = zero_acc_top_angular[i+1] + Y_over_D * h_top[i];
-        in_bottom = zero_acc_bottom_linear[i+1] + Y_over_D * h_bottom[i];
-        InverseSpatialTransform(rot_from_parent[i+1], links[i].cached_r_vector,
-                                in_top, in_bottom, out_top, out_bottom);
-        zero_acc_top_angular[parent+1] += out_top;
-        zero_acc_bottom_linear[parent+1] += out_bottom;
-    }
+               for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)
+               {
+                       Y[m_links[i].m_dofOffset + dof] = force[6 + m_links[i].m_dofOffset + dof]
+                                                                                       - m_links[i].m_axes[dof].dot(zeroAccSpatFrc[i+1])
+                                                                                       ;
+               }
 
-    // ptr to the joint accel part of the output
+               btVector3 in_top, in_bottom, out_top, out_bottom;
+               const btScalar *invDi = &invD[m_links[i].m_dofOffset*m_links[i].m_dofOffset];
+               
+               for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)
+               {
+                       invD_times_Y[dof] = 0.f;
+
+                       for(int dof2 = 0; dof2 < m_links[i].m_dofCount; ++dof2)
+                       {
+                               invD_times_Y[dof] += invDi[dof * m_links[i].m_dofCount + dof2] * Y[m_links[i].m_dofOffset + dof2];                              
+                       }       
+               }
+               
+                // Zp += pXi * (Zi + hi*Yi/Di)
+               spatForceVecTemps[0] = zeroAccSpatFrc[i+1];
+
+               for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)
+               {
+                       const btSpatialForceVector &hDof = h[m_links[i].m_dofOffset + dof];
+                       //
+                       spatForceVecTemps[0] += hDof * invD_times_Y[dof];               
+               }
+               
+
+               fromParent.transformInverse(spatForceVecTemps[0], spatForceVecTemps[1]);
+                       
+               zeroAccSpatFrc[parent+1] += spatForceVecTemps[1];
+    }
+       
+       // ptr to the joint accel part of the output
     btScalar * joint_accel = output + 6;
 
+
     // Second 'upward' loop
-    if (fixed_base) 
+    // (part of TreeForwardDynamics in Mirtich)
+
+    if (m_fixedBase) 
        {
-        accel_top[0] = accel_bottom[0] = btVector3(0,0,0);
-    } else 
+        spatAcc[0].setZero();
+    } 
+       else 
+       {
+               solveImatrix(zeroAccSpatFrc[0], result);
+               spatAcc[0] = -result;
+
+    }
+       
+    // now do the loop over the m_links
+    for (int i = 0; i < num_links; ++i)
        {
-               btVector3 rhs_top (zero_acc_top_angular[0][0], zero_acc_top_angular[0][1], zero_acc_top_angular[0][2]);
-               btVector3 rhs_bot (zero_acc_bottom_linear[0][0], zero_acc_bottom_linear[0][1], zero_acc_bottom_linear[0][2]);
+        const int parent = m_links[i].m_parent;
+               fromParent.m_rotMat = rot_from_parent[i+1]; fromParent.m_trnVec = m_links[i].m_cachedRVector;
+
+               fromParent.transform(spatAcc[parent+1], spatAcc[i+1]);
                
-               float result[6];
-        solveImatrix(rhs_top,rhs_bot, result);
-       //      printf("result=%f,%f,%f,%f,%f,%f\n",result[0],result[0],result[0],result[0],result[0],result[0]);
+               for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)
+               {
+                       const btSpatialForceVector &hDof = h[m_links[i].m_dofOffset + dof];
+                       //                      
+                       Y_minus_hT_a[dof] = Y[m_links[i].m_dofOffset + dof] - spatAcc[i+1].dot(hDof);
+               }
 
-        for (int i = 0; i < 3; ++i) {
-            accel_top[0][i] = -result[i];
-            accel_bottom[0][i] = -result[i+3];
-        }
+               const btScalar *invDi = &invD[m_links[i].m_dofOffset*m_links[i].m_dofOffset];
+               mulMatrix(const_cast<btScalar*>(invDi), Y_minus_hT_a, m_links[i].m_dofCount, m_links[i].m_dofCount, m_links[i].m_dofCount, 1, &joint_accel[m_links[i].m_dofOffset]);
 
-    }
-    
-    // now do the loop over the links
-    for (int i = 0; i < num_links; ++i) {
-        const int parent = links[i].parent;
-        SpatialTransform(rot_from_parent[i+1], links[i].cached_r_vector,
-                         accel_top[parent+1], accel_bottom[parent+1],
-                         accel_top[i+1], accel_bottom[i+1]);
-        joint_accel[i] = (Y[i] - SpatialDotProduct(h_top[i], h_bottom[i], accel_top[i+1], accel_bottom[i+1])) / D[i];
-        accel_top[i+1] += joint_accel[i] * links[i].axis_top;
-        accel_bottom[i+1] += joint_accel[i] * links[i].axis_bottom;
+               for(int dof = 0; dof < m_links[i].m_dofCount; ++dof)            
+                       spatAcc[i+1] += m_links[i].m_axes[dof] * joint_accel[m_links[i].m_dofOffset + dof];      
     }
 
     // transform base accelerations back to the world frame.
     btVector3 omegadot_out;
-    omegadot_out = rot_from_parent[0].transpose() * accel_top[0];
+    omegadot_out = rot_from_parent[0].transpose() * spatAcc[0].getAngular();
        output[0] = omegadot_out[0];
        output[1] = omegadot_out[1];
        output[2] = omegadot_out[2];
 
     btVector3 vdot_out;
-    vdot_out = rot_from_parent[0].transpose() * accel_bottom[0];
-
+    vdot_out = rot_from_parent[0].transpose() * spatAcc[0].getLinear();
        output[3] = vdot_out[0];
        output[4] = vdot_out[1];
        output[5] = vdot_out[2];
+
+       /////////////////
+       //printf("delta = [");
+       //for(int dof = 0; dof < getNumDofs() + 6; ++dof)
+       //      printf("%.2f ", output[dof]);
+       //printf("]\n");
+       /////////////////
 }
 
-void btMultiBody::stepPositions(btScalar dt)
-{
+
+
+
+void btMultiBody::stepPositionsMultiDof(btScalar dt, btScalar *pq, btScalar *pqd)
+{      
        int num_links = getNumLinks();
     // step position by adding dt * velocity
-       btVector3 v = getBaseVel();
-    base_pos += dt * v;
-
-    // "exponential map" method for the rotation
-    btVector3 base_omega = getBaseOmega();
-    const btScalar omega_norm = base_omega.norm();
-    const btScalar omega_times_dt = omega_norm * dt;
-    const btScalar SMALL_ROTATION_ANGLE = 0.02f; // Theoretically this should be ~ pow(FLT_EPSILON,0.25) which is ~ 0.0156
-    if (fabs(omega_times_dt) < SMALL_ROTATION_ANGLE) 
+       //btVector3 v = getBaseVel();   
+    //m_basePos += dt * v;
+       //
+       btScalar *pBasePos = (pq ? &pq[4] : m_basePos);
+       btScalar *pBaseVel = (pqd ? &pqd[3] : &m_realBuf[3]);                   //note: the !pqd case assumes m_realBuf holds with base velocity at 3,4,5 (should be wrapped for safety)
+       //      
+       pBasePos[0] += dt * pBaseVel[0];
+       pBasePos[1] += dt * pBaseVel[1];
+       pBasePos[2] += dt * pBaseVel[2];
+
+       ///////////////////////////////
+       //local functor for quaternion integration (to avoid error prone redundancy)
+       struct
        {
-        const btScalar xsq = omega_times_dt * omega_times_dt;     // |omega|^2 * dt^2
-        const btScalar sin_term = dt * (xsq / 48.0f - 0.5f);      // -sin(0.5*dt*|omega|) / |omega|
-        const btScalar cos_term = 1.0f - xsq / 8.0f;              // cos(0.5*dt*|omega|)
-        base_quat = base_quat * btQuaternion(sin_term * base_omega[0],sin_term * base_omega[1],sin_term * base_omega[2],cos_term);
-    } else 
+               //"exponential map" based on btTransformUtil::integrateTransform(..)
+               void operator() (const btVector3 &omega, btQuaternion &quat, bool baseBody, btScalar dt)
+               {
+                       //baseBody      =>      quat is alias and omega is global coor
+                       //!baseBody     =>      quat is alibi and omega is local coor   
+                       
+                       btVector3 axis;
+                       btVector3 angvel;
+
+                       if(!baseBody)                   
+                               angvel = quatRotate(quat, omega);                               //if quat is not m_baseQuat, it is alibi => ok                  
+                       else
+                               angvel = omega;
+               
+                       btScalar fAngle = angvel.length();              
+                       //limit the angular motion
+                       if (fAngle * dt > ANGULAR_MOTION_THRESHOLD)
+                       {
+                               fAngle = btScalar(0.5)*SIMD_HALF_PI / dt;
+                       }
+
+                       if ( fAngle < btScalar(0.001) )
+                       {
+                               // use Taylor's expansions of sync function
+                               axis   = angvel*( btScalar(0.5)*dt-(dt*dt*dt)*(btScalar(0.020833333333))*fAngle*fAngle );
+                       }
+                       else
+                       {
+                               // sync(fAngle) = sin(c*fAngle)/t
+                               axis   = angvel*( btSin(btScalar(0.5)*fAngle*dt)/fAngle );
+                       }
+                       
+                       if(!baseBody)                           
+                               quat = btQuaternion(axis.x(),axis.y(),axis.z(),btCos( fAngle*dt*btScalar(0.5) )) * quat;                        
+                       else                    
+                               quat = quat * btQuaternion(-axis.x(),-axis.y(),-axis.z(),btCos( fAngle*dt*btScalar(0.5) ));
+                               //equivalent to: quat = (btQuaternion(axis.x(),axis.y(),axis.z(),btCos( fAngle*dt*btScalar(0.5) )) * quat.inverse()).inverse();                 
+               
+                       quat.normalize();
+               }
+       } pQuatUpdateFun;
+       ///////////////////////////////
+
+       //pQuatUpdateFun(getBaseOmega(), m_baseQuat, true, dt);
+       //      
+       btScalar *pBaseQuat = pq ? pq : m_baseQuat;     
+       btScalar *pBaseOmega = pqd ? pqd : &m_realBuf[0];               //note: the !pqd case assumes m_realBuf starts with base omega (should be wrapped for safety)
+       //
+       static btQuaternion baseQuat; baseQuat.setValue(pBaseQuat[0], pBaseQuat[1], pBaseQuat[2], pBaseQuat[3]);
+       static btVector3 baseOmega; baseOmega.setValue(pBaseOmega[0], pBaseOmega[1], pBaseOmega[2]);
+       pQuatUpdateFun(baseOmega, baseQuat, true, dt);
+       pBaseQuat[0] = baseQuat.x();
+       pBaseQuat[1] = baseQuat.y();
+       pBaseQuat[2] = baseQuat.z();
+       pBaseQuat[3] = baseQuat.w();
+
+
+       //printf("pBaseOmega = %.4f %.4f %.4f\n", pBaseOmega->x(), pBaseOmega->y(), pBaseOmega->z());
+       //printf("pBaseVel = %.4f %.4f %.4f\n", pBaseVel->x(), pBaseVel->y(), pBaseVel->z());
+       //printf("baseQuat = %.4f %.4f %.4f %.4f\n", pBaseQuat->x(), pBaseQuat->y(), pBaseQuat->z(), pBaseQuat->w());
+
+       if(pq)          
+               pq += 7;
+       if(pqd)
+               pqd += 6;
+
+       // Finally we can update m_jointPos for each of the m_links
+    for (int i = 0; i < num_links; ++i) 
        {
-        base_quat = base_quat * btQuaternion(base_omega / omega_norm,-omega_times_dt);
-    }
+               btScalar *pJointPos = (pq ? pq : &m_links[i].m_jointPos[0]);            
+               btScalar *pJointVel = (pqd ? pqd : getJointVelMultiDof(i));
 
-    // Make sure the quaternion represents a valid rotation.
-    // (Not strictly necessary, but helps prevent any round-off errors from building up.)
-    base_quat.normalize();
+               switch(m_links[i].m_jointType)
+               {
+                       case btMultibodyLink::ePrismatic:
+                       case btMultibodyLink::eRevolute:
+                       {
+                               btScalar jointVel = pJointVel[0];       
+                               pJointPos[0] += dt * jointVel;
+                               break;
+                       }
+                       case btMultibodyLink::eSpherical:
+                       {
+                               static btVector3 jointVel; jointVel.setValue(pJointVel[0], pJointVel[1], pJointVel[2]);
+                               static btQuaternion jointOri; jointOri.setValue(pJointPos[0], pJointPos[1], pJointPos[2], pJointPos[3]);
+                               pQuatUpdateFun(jointVel, jointOri, false, dt);
+                               pJointPos[0] = jointOri.x(); pJointPos[1] = jointOri.y(); pJointPos[2] = jointOri.z(); pJointPos[3] = jointOri.w();
+                               break;
+                       }
+                       case btMultibodyLink::ePlanar:
+                       {
+                               pJointPos[0] += dt * getJointVelMultiDof(i)[0];
+
+                               btVector3 q0_coors_qd1qd2 = getJointVelMultiDof(i)[1] * m_links[i].getAxisBottom(1) + getJointVelMultiDof(i)[2] * m_links[i].getAxisBottom(2);
+                               btVector3 no_q0_coors_qd1qd2 = quatRotate(btQuaternion(m_links[i].getAxisTop(0), pJointPos[0]), q0_coors_qd1qd2);
+                               pJointPos[1] += m_links[i].getAxisBottom(1).dot(no_q0_coors_qd1qd2) * dt;
+                               pJointPos[2] += m_links[i].getAxisBottom(2).dot(no_q0_coors_qd1qd2) * dt;
+
+                               break;
+                       }
+                       default:
+                       {
+                       }
 
-    // Finally we can update joint_pos for each of the links
-    for (int i = 0; i < num_links; ++i) 
-       {
-               float jointVel = getJointVel(i);
-        links[i].joint_pos += dt * jointVel;
-        links[i].updateCache();
+               }
+
+               m_links[i].updateCacheMultiDof(pq);
+
+               if(pq)          
+                       pq += m_links[i].m_posVarCount;
+               if(pqd)
+                       pqd += m_links[i].m_dofCount;
     }
 }
 
-void btMultiBody::fillContactJacobian(int link,
+void btMultiBody::fillConstraintJacobianMultiDof(int link,
                                     const btVector3 &contact_point,
-                                    const btVector3 &normal,
+                                                                       const btVector3 &normal_ang,
+                                    const btVector3 &normal_lin,
                                     btScalar *jac,
                                     btAlignedObjectArray<btScalar> &scratch_r,
                                     btAlignedObjectArray<btVector3> &scratch_v,
@@ -899,46 +1599,53 @@ void btMultiBody::fillContactJacobian(int link,
 {
     // temporary space
        int num_links = getNumLinks();
-    scratch_v.resize(2*num_links + 2);
+       int m_dofCount = getNumDofs();
+    scratch_v.resize(3*num_links + 3);                 //(num_links + base) offsets + (num_links + base) normals_lin + (num_links + base) normals_ang
     scratch_m.resize(num_links + 1);
 
     btVector3 * v_ptr = &scratch_v[0];
-    btVector3 * p_minus_com = v_ptr; v_ptr += num_links + 1;
-    btVector3 * n_local = v_ptr; v_ptr += num_links + 1;
+    btVector3 * p_minus_com_local = v_ptr; v_ptr += num_links + 1;
+    btVector3 * n_local_lin = v_ptr; v_ptr += num_links + 1;
+       btVector3 * n_local_ang = v_ptr; v_ptr += num_links + 1;
     btAssert(v_ptr - &scratch_v[0] == scratch_v.size());
 
-    scratch_r.resize(num_links);
-    btScalar * results = num_links > 0 ? &scratch_r[0] : 0;
+    scratch_r.resize(m_dofCount);
+    btScalar * results = m_dofCount > 0 ? &scratch_r[0] : 0;
 
     btMatrix3x3 * rot_from_world = &scratch_m[0];
 
-    const btVector3 p_minus_com_world = contact_point - base_pos;
+    const btVector3 p_minus_com_world = contact_point - m_basePos;
+       const btVector3 &normal_lin_world = normal_lin;                                                 //convenience
+       const btVector3 &normal_ang_world = normal_ang;
 
-    rot_from_world[0] = btMatrix3x3(base_quat);
-
-    p_minus_com[0] = rot_from_world[0] * p_minus_com_world;
-    n_local[0] = rot_from_world[0] * normal;
+    rot_from_world[0] = btMatrix3x3(m_baseQuat);    
     
     // omega coeffients first.
-    btVector3 omega_coeffs;
-    omega_coeffs = p_minus_com_world.cross(normal);
-       jac[0] = omega_coeffs[0];
-       jac[1] = omega_coeffs[1];
-       jac[2] = omega_coeffs[2];
+    btVector3 omega_coeffs_world;
+    omega_coeffs_world = p_minus_com_world.cross(normal_lin_world);
+       jac[0] = omega_coeffs_world[0] + normal_ang_world[0];
+       jac[1] = omega_coeffs_world[1] + normal_ang_world[1];
+       jac[2] = omega_coeffs_world[2] + normal_ang_world[2];
     // then v coefficients
-    jac[3] = normal[0];
-    jac[4] = normal[1];
-    jac[5] = normal[2];
+    jac[3] = normal_lin_world[0];
+    jac[4] = normal_lin_world[1];
+    jac[5] = normal_lin_world[2];
+
+       //create link-local versions of p_minus_com and normal
+       p_minus_com_local[0] = rot_from_world[0] * p_minus_com_world;
+    n_local_lin[0] = rot_from_world[0] * normal_lin_world;
+       n_local_ang[0] = rot_from_world[0] * normal_ang_world;
 
     // Set remaining jac values to zero for now.
-    for (int i = 6; i < 6 + num_links; ++i) {
+    for (int i = 6; i < 6 + m_dofCount; ++i) 
+       {
         jac[i] = 0;
     }
 
     // Qdot coefficients, if necessary.
     if (num_links > 0 && link > -1) {
 
-        // TODO: speed this up -- don't calculate for links we don't need.
+        // TODO: speed this up -- don't calculate for m_links we don't need.
         // (Also, we are making 3 separate calls to this function, for the normal & the 2 friction directions,
         // which is resulting in repeated work being done...)
 
@@ -946,64 +1653,292 @@ void btMultiBody::fillContactJacobian(int link,
         for (int i = 0; i < num_links; ++i) {
 
             // transform to local frame
-            const int parent = links[i].parent;
-            const btMatrix3x3 mtx(links[i].cached_rot_parent_to_this);
+            const int parent = m_links[i].m_parent;
+            const btMatrix3x3 mtx(m_links[i].m_cachedRotParentToThis);
             rot_from_world[i+1] = mtx * rot_from_world[parent+1];
 
-            n_local[i+1] = mtx * n_local[parent+1];
-            p_minus_com[i+1] = mtx * p_minus_com[parent+1] - links[i].cached_r_vector;
-
-            // calculate the jacobian entry
-            if (links[i].is_revolute) {
-                results[i] = n_local[i+1].dot( links[i].axis_top.cross(p_minus_com[i+1]) + links[i].axis_bottom );
-            } else {
-                results[i] = n_local[i+1].dot( links[i].axis_bottom );
-            }
+            n_local_lin[i+1] = mtx * n_local_lin[parent+1];
+                       n_local_ang[i+1] = mtx * n_local_ang[parent+1];
+            p_minus_com_local[i+1] = mtx * p_minus_com_local[parent+1] - m_links[i].m_cachedRVector;
+
+                       // calculate the jacobian entry
+                       switch(m_links[i].m_jointType)
+                       {
+                               case btMultibodyLink::eRevolute:
+                               {
+                                       results[m_links[i].m_dofOffset] = n_local_lin[i+1].dot(m_links[i].getAxisTop(0).cross(p_minus_com_local[i+1]) + m_links[i].getAxisBottom(0));
+                                       results[m_links[i].m_dofOffset] += n_local_ang[i+1].dot(m_links[i].getAxisTop(0));
+                                       break;
+                               }
+                               case btMultibodyLink::ePrismatic:
+                               {
+                                       results[m_links[i].m_dofOffset] = n_local_lin[i+1].dot(m_links[i].getAxisBottom(0));
+                                       break;
+                               }
+                               case btMultibodyLink::eSpherical:
+                               {
+                                       results[m_links[i].m_dofOffset + 0] = n_local_lin[i+1].dot(m_links[i].getAxisTop(0).cross(p_minus_com_local[i+1]) + m_links[i].getAxisBottom(0));
+                                       results[m_links[i].m_dofOffset + 1] = n_local_lin[i+1].dot(m_links[i].getAxisTop(1).cross(p_minus_com_local[i+1]) + m_links[i].getAxisBottom(1));
+                                       results[m_links[i].m_dofOffset + 2] = n_local_lin[i+1].dot(m_links[i].getAxisTop(2).cross(p_minus_com_local[i+1]) + m_links[i].getAxisBottom(2));
+                                                                               
+                                       results[m_links[i].m_dofOffset + 0] += n_local_ang[i+1].dot(m_links[i].getAxisTop(0));
+                                       results[m_links[i].m_dofOffset + 1] += n_local_ang[i+1].dot(m_links[i].getAxisTop(1));
+                                       results[m_links[i].m_dofOffset + 2] += n_local_ang[i+1].dot(m_links[i].getAxisTop(2));
+
+                                       break;
+                               }
+                               case btMultibodyLink::ePlanar:
+                               {
+                                       results[m_links[i].m_dofOffset + 0] = n_local_lin[i+1].dot(m_links[i].getAxisTop(0).cross(p_minus_com_local[i+1]));// + m_links[i].getAxisBottom(0));
+                                       results[m_links[i].m_dofOffset + 1] = n_local_lin[i+1].dot(m_links[i].getAxisBottom(1));
+                                       results[m_links[i].m_dofOffset + 2] = n_local_lin[i+1].dot(m_links[i].getAxisBottom(2));
+
+                                       break;
+                               }
+                               default:
+                               {
+                               }
+                       }
+            
         }
 
         // Now copy through to output.
-        while (link != -1) {
-            jac[6 + link] = results[link];
-            link = links[link].parent;
+               //printf("jac[%d] = ", link);
+        while (link != -1) 
+               {
+                       for(int dof = 0; dof < m_links[link].m_dofCount; ++dof)
+                       {
+                               jac[6 + m_links[link].m_dofOffset + dof] = results[m_links[link].m_dofOffset + dof];
+                               //printf("%.2f\t", jac[6 + m_links[link].m_dofOffset + dof]);
+                       }
+            
+                       link = m_links[link].m_parent;
         }
+               //printf("]\n");
     }
 }
 
+
 void btMultiBody::wakeUp()
 {
-    awake = true;
+    m_awake = true;
 }
 
 void btMultiBody::goToSleep()
 {
-    awake = false;
+    m_awake = false;
 }
 
 void btMultiBody::checkMotionAndSleepIfRequired(btScalar timestep)
 {
-       int num_links = getNumLinks();
        extern bool gDisableDeactivation;
-    if (!can_sleep || gDisableDeactivation) 
+    if (!m_canSleep || gDisableDeactivation) 
        {
-               awake = true;
-               sleep_timer = 0;
+               m_awake = true;
+               m_sleepTimer = 0;
                return;
        }
 
     // motion is computed as omega^2 + v^2 + (sum of squares of joint velocities)
     btScalar motion = 0;
-    for (int i = 0; i < 6 + num_links; ++i) {
-        motion += m_real_buf[i] * m_real_buf[i];
-    }
+       {
+               for (int i = 0; i < 6 + m_dofCount; ++i)                
+                       motion += m_realBuf[i] * m_realBuf[i];
+       }
+       
 
     if (motion < SLEEP_EPSILON) {
-        sleep_timer += timestep;
-        if (sleep_timer > SLEEP_TIMEOUT) {
+        m_sleepTimer += timestep;
+        if (m_sleepTimer > SLEEP_TIMEOUT) {
             goToSleep();
         }
     } else {
-        sleep_timer = 0;
-               if (!awake)
+        m_sleepTimer = 0;
+               if (!m_awake)
                        wakeUp();
     }
 }
+
+
+void   btMultiBody::forwardKinematics(btAlignedObjectArray<btQuaternion>& world_to_local,btAlignedObjectArray<btVector3>& local_origin)
+{
+       
+       int num_links = getNumLinks();
+
+       // Cached 3x3 rotation matrices from parent frame to this frame.
+       btMatrix3x3* rot_from_parent =(btMatrix3x3 *) &m_matrixBuf[0];
+
+       rot_from_parent[0] = btMatrix3x3(m_baseQuat);                           //m_baseQuat assumed to be alias!?
+       
+       for (int i = 0; i < num_links; ++i) 
+       {
+               rot_from_parent[i+1] = btMatrix3x3(m_links[i].m_cachedRotParentToThis);
+       }
+               
+       int nLinks = getNumLinks();
+       ///base + num m_links
+       world_to_local.resize(nLinks+1);
+       local_origin.resize(nLinks+1);
+
+       world_to_local[0] = getWorldToBaseRot();
+       local_origin[0] = getBasePos();
+       
+       for (int k=0;k<getNumLinks();k++)
+       {
+               const int parent = getParent(k);
+               world_to_local[k+1] = getParentToLocalRot(k) * world_to_local[parent+1];
+               local_origin[k+1] = local_origin[parent+1] + (quatRotate(world_to_local[k+1].inverse() , getRVector(k)));
+       }
+
+       for (int link=0;link<getNumLinks();link++)
+       {
+               int index = link+1;
+
+               btVector3 posr = local_origin[index];
+               btScalar quat[4]={-world_to_local[index].x(),-world_to_local[index].y(),-world_to_local[index].z(),world_to_local[index].w()};
+               btTransform tr;
+               tr.setIdentity();
+               tr.setOrigin(posr);
+               tr.setRotation(btQuaternion(quat[0],quat[1],quat[2],quat[3]));
+               getLink(link).m_cachedWorldTransform = tr;
+                       
+       }
+
+}
+
+void   btMultiBody::updateCollisionObjectWorldTransforms(btAlignedObjectArray<btQuaternion>& world_to_local,btAlignedObjectArray<btVector3>& local_origin)
+{
+       world_to_local.resize(getNumLinks()+1);
+       local_origin.resize(getNumLinks()+1);
+       
+       world_to_local[0] = getWorldToBaseRot();
+       local_origin[0] = getBasePos();
+       
+       if (getBaseCollider())
+       {
+               btVector3 posr = local_origin[0];
+               //      float pos[4]={posr.x(),posr.y(),posr.z(),1};
+               btScalar quat[4]={-world_to_local[0].x(),-world_to_local[0].y(),-world_to_local[0].z(),world_to_local[0].w()};
+               btTransform tr;
+               tr.setIdentity();
+               tr.setOrigin(posr);
+               tr.setRotation(btQuaternion(quat[0],quat[1],quat[2],quat[3]));
+               
+               getBaseCollider()->setWorldTransform(tr);
+               
+       }
+       
+       for (int k=0;k<getNumLinks();k++)
+       {
+               const int parent = getParent(k);
+               world_to_local[k+1] = getParentToLocalRot(k) * world_to_local[parent+1];
+               local_origin[k+1] = local_origin[parent+1] + (quatRotate(world_to_local[k+1].inverse() , getRVector(k)));
+       }
+       
+       
+       for (int m=0;m<getNumLinks();m++)
+       {
+               btMultiBodyLinkCollider* col = getLink(m).m_collider;
+               if (col)
+               {
+                       int link = col->m_link;
+                       btAssert(link == m);
+                       
+                       int index = link+1;
+                       
+                       btVector3 posr = local_origin[index];
+                       //                      float pos[4]={posr.x(),posr.y(),posr.z(),1};
+                       btScalar quat[4]={-world_to_local[index].x(),-world_to_local[index].y(),-world_to_local[index].z(),world_to_local[index].w()};
+                       btTransform tr;
+                       tr.setIdentity();
+                       tr.setOrigin(posr);
+                       tr.setRotation(btQuaternion(quat[0],quat[1],quat[2],quat[3]));
+                       
+                       col->setWorldTransform(tr);
+               }
+       }
+}
+
+int    btMultiBody::calculateSerializeBufferSize()     const
+{
+       int sz = sizeof(btMultiBodyData);
+       return sz;
+}
+
+       ///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*    btMultiBody::serialize(void* dataBuffer, class btSerializer* serializer) const
+{
+               btMultiBodyData* mbd = (btMultiBodyData*) dataBuffer;
+               getBaseWorldTransform().serialize(mbd->m_baseWorldTransform);
+               mbd->m_baseMass = this->getBaseMass();
+               getBaseInertia().serialize(mbd->m_baseInertia);
+               {
+                       char* name = (char*) serializer->findNameForPointer(m_baseName);
+                       mbd->m_baseName = (char*)serializer->getUniquePointer(name);
+                       if (mbd->m_baseName)
+                       {
+                               serializer->serializeName(name);
+                       }
+               }
+               mbd->m_numLinks = this->getNumLinks();
+               if (mbd->m_numLinks)
+               {
+                       int sz = sizeof(btMultiBodyLinkData);
+                       int numElem = mbd->m_numLinks;
+                       btChunk* chunk = serializer->allocate(sz,numElem);
+                       btMultiBodyLinkData* memPtr = (btMultiBodyLinkData*)chunk->m_oldPtr;
+                       for (int i=0;i<numElem;i++,memPtr++)
+                       {
+
+                               memPtr->m_jointType = getLink(i).m_jointType;
+                               memPtr->m_dofCount = getLink(i).m_dofCount;
+                               memPtr->m_posVarCount = getLink(i).m_posVarCount;
+                               
+                               getLink(i).m_inertiaLocal.serialize(memPtr->m_linkInertia);
+                               memPtr->m_linkMass = getLink(i).m_mass;
+                               memPtr->m_parentIndex = getLink(i).m_parent;
+                               getLink(i).m_eVector.serialize(memPtr->m_parentComToThisComOffset);
+                               getLink(i).m_dVector.serialize(memPtr->m_thisPivotToThisComOffset);
+                               getLink(i).m_zeroRotParentToThis.serialize(memPtr->m_zeroRotParentToThis);
+                               btAssert(memPtr->m_dofCount<=3);
+                               for (int dof = 0;dof<getLink(i).m_dofCount;dof++)
+                               {
+                                       getLink(i).getAxisBottom(dof).serialize(memPtr->m_jointAxisBottom[dof]);
+                                       getLink(i).getAxisTop(dof).serialize(memPtr->m_jointAxisTop[dof]);
+                                       
+                                       memPtr->m_jointTorque[dof] = getLink(i).m_jointTorque[dof];
+                                       memPtr->m_jointVel[dof] = getJointVelMultiDof(i)[dof];
+
+                               }
+                               int numPosVar = getLink(i).m_posVarCount;
+                               for (int posvar = 0; posvar < numPosVar;posvar++)
+                               {
+                                       memPtr->m_jointPos[posvar] = getLink(i).m_jointPos[posvar];
+                               }
+                               
+                               
+                               {
+                                       char* name = (char*) serializer->findNameForPointer(m_links[i].m_linkName);
+                                       memPtr->m_linkName = (char*)serializer->getUniquePointer(name);
+                                       if (memPtr->m_linkName)
+                                       {
+                                               serializer->serializeName(name);
+                                       }
+                               }
+                               {
+                                       char* name = (char*) serializer->findNameForPointer(m_links[i].m_jointName);
+                                       memPtr->m_jointName = (char*)serializer->getUniquePointer(name);
+                                       if (memPtr->m_jointName)
+                                       {
+                                               serializer->serializeName(name);
+                                       }
+                               }
+                               memPtr->m_linkCollider = (btCollisionObjectData*)serializer->getUniquePointer(getLink(i).m_collider);
+
+                       }
+                       serializer->finalizeChunk(chunk,btMultiBodyLinkDataName,BT_ARRAY_CODE,(void*) &m_links[0]);
+               }
+               mbd->m_links = mbd->m_numLinks? (btMultiBodyLinkData*) serializer->getUniquePointer((void*)&m_links[0]):0;
+
+               return btMultiBodyDataName;
+}