Sculpt: svn merge https://svn.blender.org/svnroot/bf-blender/trunk/blender -r24330...
authorBrecht Van Lommel <brechtvanlommel@pandora.be>
Wed, 11 Nov 2009 10:44:46 +0000 (10:44 +0000)
committerBrecht Van Lommel <brechtvanlommel@pandora.be>
Wed, 11 Nov 2009 10:44:46 +0000 (10:44 +0000)
27 files changed:
1  2 
intern/guardedalloc/intern/mallocn.c
release/scripts/ui/space_view3d.py
release/scripts/ui/space_view3d_toolbar.py
source/blender/blenkernel/intern/brush.c
source/blender/blenkernel/intern/cdderivedmesh.c
source/blender/blenkernel/intern/constraint.c
source/blender/blenkernel/intern/modifier.c
source/blender/blenkernel/intern/object.c
source/blender/blenkernel/intern/particle.c
source/blender/blenkernel/intern/subsurf_ccg.c
source/blender/blenlib/intern/pbvh.c
source/blender/editors/object/object_modifier.c
source/blender/editors/physics/particle_edit.c
source/blender/editors/sculpt_paint/paint_image.c
source/blender/editors/sculpt_paint/paint_stroke.c
source/blender/editors/sculpt_paint/paint_utils.c
source/blender/editors/sculpt_paint/paint_vertex.c
source/blender/editors/sculpt_paint/sculpt.c
source/blender/editors/space_view3d/drawobject.c
source/blender/editors/space_view3d/view3d_draw.c
source/blender/editors/space_view3d/view3d_edit.c
source/blender/editors/space_view3d/view3d_header.c
source/blender/editors/space_view3d/view3d_select.c
source/blender/editors/space_view3d/view3d_view.c
source/blender/gpu/intern/gpu_buffers.c
source/blender/makesdna/DNA_scene_types.h
source/blender/render/intern/source/convertblender.c

Simple merge
Simple merge
@@@ -393,63 -360,7 +393,63 @@@ static void cdDM_drawLooseEdges(Derived
        }
  }
  
 -static void cdDM_drawFacesSolid(DerivedMesh *dm, int (*setMaterial)(int, void *attribs))
 +static int nodes_drawn = 0;
 +static int is_partial = 0;
 +/* XXX: Just a temporary replacement for the real drawing code */
 +static void draw_partial_cb(PBVHNode *node, void *data)
 +{
 +      /* XXX: Just some quick code to show leaf nodes in different colors */
 +      /*float col[3]; int i;
 +      if(is_partial) {
 +              col[0] = (rand() / (float)RAND_MAX); col[1] = col[2] = 0.6;
 +      }
 +      else {
 +              srand((long long)data_v);
 +              for(i = 0; i < 3; ++i)
 +                      col[i] = (rand() / (float)RAND_MAX) * 0.3 + 0.7;
 +      }
 +      glMaterialfv(GL_FRONT_AND_BACK, GL_DIFFUSE, col);
 +
 +      glColor3f(1, 0, 0);*/
 +      GPU_draw_buffers(BLI_pbvh_node_get_draw_buffers(node));
 +      ++nodes_drawn;
 +}
 +
 +/* Adapted from:
 +   http://www.gamedev.net/community/forums/topic.asp?topic_id=512123
 +   Returns true if the AABB is at least partially within the frustum
 +   (ok, not a real frustum), false otherwise.
 +*/
 +int planes_contain_AABB(PBVHNode *node, void *data)
 +{
 +      float (*planes)[4] = data;
 +      int i, axis;
 +      float vmin[3], vmax[3], bb_min[3], bb_max[3];
 +
 +      BLI_pbvh_node_get_BB(node, bb_min, bb_max);
 +
 +      for(i = 0; i < 4; ++i) { 
 +              for(axis = 0; axis < 3; ++axis) {
 +                      if(planes[i][axis] > 0) { 
 +                              vmin[axis] = bb_min[axis];
 +                              vmax[axis] = bb_max[axis];
 +                      }
 +                      else {
 +                              vmin[axis] = bb_max[axis];
 +                              vmax[axis] = bb_min[axis];
 +                      }
 +              }
 +              
-               if(Inpf(planes[i], vmin) + planes[i][3] > 0)
++              if(dot_v3v3(planes[i], vmin) + planes[i][3] > 0)
 +                      return 0;
 +      } 
 +
 +      return 1;
 +}
 +
 +static void cdDM_drawFacesSolid(DerivedMesh *dm,
 +                              float (*partial_redraw_planes)[4],
 +                              int (*setMaterial)(int, void *attribs))
  {
        CDDerivedMesh *cddm = (CDDerivedMesh*) dm;
        MVert *mvert = cddm->mvert;
@@@ -1783,12 -1791,10 +1783,12 @@@ static void give_parvert(Object *par, i
                                float co[3];
  
                                /* get the average of all verts with (original index == nr) */
 -                              for(i = 0; i < numVerts; ++i, ++index) {
 -                                      if(*index == nr) {
 +                              for(i = 0; i < numVerts; ++i) {
 +                                      vindex= (index)? *index: i;
 +
 +                                      if(vindex == nr) {
                                                dm->getVertCo(dm, i, co);
-                                               VecAddf(vec, vec, co);
+                                               add_v3_v3v3(vec, vec, co);
                                                count++;
                                        }
                                }
index 4cf278d,0000000..c2f0705
mode 100644,000000..100644
--- /dev/null
@@@ -1,990 -1,0 +1,990 @@@
- #include "BLI_arithb.h"
 +/**
 + * $Id$
 + *
 + * ***** BEGIN GPL LICENSE BLOCK *****
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License
 + * along with this program; if not, write to the Free Software Foundation,
 + * Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 + *
 + * ***** END GPL LICENSE BLOCK *****
 + */
 +
 +#include <float.h>
 +#include <stdlib.h>
 +#include <string.h>
 +
 +#include "MEM_guardedalloc.h"
 +
 +#include "DNA_meshdata_types.h"
 +
-                                       CalcNormFloat4(bvh->verts[f->v1].co, bvh->verts[f->v2].co,
-                                                                  bvh->verts[f->v3].co, bvh->verts[f->v4].co, fn);
++#include "BLI_math.h"
 +#include "BLI_ghash.h"
 +#include "BLI_pbvh.h"
 +
 +#include "BKE_mesh.h"
 +#include "BKE_utildefines.h"
 +
 +#include "gpu_buffers.h"
 +
 +#define LEAF_LIMIT 10000
 +
 +//#define PERFCNTRS
 +
 +/* Bitmap */
 +typedef char* BLI_bitmap;
 +
 +BLI_bitmap BLI_bitmap_new(int tot)
 +{
 +      return MEM_callocN((tot >> 3) + 1, "BLI bitmap");
 +}
 +
 +int BLI_bitmap_get(BLI_bitmap b, int index)
 +{
 +      return b[index >> 3] & (1 << (index & 7));
 +}
 +
 +void BLI_bitmap_set(BLI_bitmap b, int index)
 +{
 +      b[index >> 3] |= (1 << (index & 7));
 +}
 +
 +void BLI_bitmap_clear(BLI_bitmap b, int index)
 +{
 +      b[index >> 3] &= ~(1 << (index & 7));
 +}
 +
 +/* Axis-aligned bounding box */
 +typedef struct {
 +      float bmin[3], bmax[3];
 +} BB;
 +
 +/* Axis-aligned bounding box with centroid */
 +typedef struct {
 +      float bmin[3], bmax[3], bcentroid[3];
 +} BBC;
 +
 +struct PBVHNode {
 +      /* Opaque handle for drawing code */
 +      void *draw_buffers;
 +
 +      int *vert_indices;
 +
 +      /* Voxel bounds */
 +      BB vb;
 +      BB orig_vb;
 +
 +      /* For internal nodes */
 +      int children_offset;
 +
 +      /* Pointer into bvh face_indices */
 +      int *face_indices;
 +      int *face_vert_indices;
 +
 +      unsigned short totface;
 +      unsigned short uniq_verts, face_verts;
 +
 +      char flag;
 +};
 +
 +struct PBVH {
 +      PBVHNode *nodes;
 +      int node_mem_count, totnode;
 +
 +      int *face_indices;
 +      int totface;
 +      int totvert;
 +
 +      /* Mesh data */
 +      MVert *verts;
 +      MFace *faces;
 +
 +      /* Only used during BVH build and update,
 +         don't need to remain valid after */
 +      BLI_bitmap vert_bitmap;
 +
 +#ifdef PERFCNTRS
 +      int perf_modified;
 +#endif
 +};
 +
 +#define STACK_FIXED_DEPTH     100
 +
 +typedef struct PBVHStack {
 +      PBVHNode *node;
 +      int revisiting;
 +} PBVHStack;
 +
 +typedef struct PBVHIter {
 +      PBVH *bvh;
 +      BLI_pbvh_SearchCallback scb;
 +      void *search_data;
 +
 +      PBVHStack *stack;
 +      int stacksize;
 +
 +      PBVHStack stackfixed[STACK_FIXED_DEPTH];
 +      int stackspace;
 +} PBVHIter;
 +
 +static void BB_reset(BB *bb)
 +{
 +      bb->bmin[0] = bb->bmin[1] = bb->bmin[2] = FLT_MAX;
 +      bb->bmax[0] = bb->bmax[1] = bb->bmax[2] = -FLT_MAX;
 +}
 +
 +/* Expand the bounding box to include a new coordinate */
 +static void BB_expand(BB *bb, float co[3])
 +{
 +      int i;
 +      for(i = 0; i < 3; ++i) {
 +              bb->bmin[i] = MIN2(bb->bmin[i], co[i]);
 +              bb->bmax[i] = MAX2(bb->bmax[i], co[i]);
 +      }
 +}
 +
 +/* Expand the bounding box to include another bounding box */
 +static void BB_expand_with_bb(BB *bb, BB *bb2)
 +{
 +      int i;
 +      for(i = 0; i < 3; ++i) {
 +              bb->bmin[i] = MIN2(bb->bmin[i], bb2->bmin[i]);
 +              bb->bmax[i] = MAX2(bb->bmax[i], bb2->bmax[i]);
 +      }
 +}
 +
 +/* Return 0, 1, or 2 to indicate the widest axis of the bounding box */
 +static int BB_widest_axis(BB *bb)
 +{
 +      float dim[3];
 +      int i;
 +
 +      for(i = 0; i < 3; ++i)
 +              dim[i] = bb->bmax[i] - bb->bmin[i];
 +
 +      if(dim[0] > dim[1]) {
 +              if(dim[0] > dim[2])
 +                      return 0;
 +              else
 +                      return 2;
 +      }
 +      else {
 +              if(dim[1] > dim[2])
 +                      return 1;
 +              else
 +                      return 2;
 +      }
 +}
 +
 +static void BBC_update_centroid(BBC *bbc)
 +{
 +      int i;
 +      for(i = 0; i < 3; ++i)
 +              bbc->bcentroid[i] = (bbc->bmin[i] + bbc->bmax[i]) * 0.5f;
 +}
 +
 +/* Not recursive */
 +static void update_node_vb(PBVH *bvh, PBVHNode *node)
 +{
 +      BB vb;
 +
 +      BB_reset(&vb);
 +      
 +      if(node->flag & PBVH_Leaf) {
 +              int i, totvert= node->uniq_verts + node->face_verts;
 +
 +              for(i = 0; i < totvert; ++i) {
 +                      float *co= bvh->verts[node->vert_indices[i]].co;
 +                      BB_expand(&vb, co);
 +              }
 +      }
 +      else {
 +              BB_expand_with_bb(&vb,
 +                                &bvh->nodes[node->children_offset].vb);
 +              BB_expand_with_bb(&vb,
 +                                &bvh->nodes[node->children_offset + 1].vb);
 +      }
 +
 +      node->vb= vb;
 +}
 +
 +/* Adapted from BLI_kdopbvh.c */
 +/* Returns the index of the first element on the right of the partition */
 +static int partition_indices(int *face_indices, int lo, int hi, int axis,
 +                           float mid, BBC *prim_bbc)
 +{
 +      int i=lo, j=hi;
 +      for(;;) {
 +              for(; prim_bbc[face_indices[i]].bcentroid[axis] < mid; i++);
 +              for(; mid < prim_bbc[face_indices[j]].bcentroid[axis]; j--);
 +              
 +              if(!(i < j))
 +                      return i;
 +              
 +              SWAP(int, face_indices[i], face_indices[j]);
 +              i++;
 +      }
 +}
 +
 +void check_partitioning(int *face_indices, int lo, int hi, int axis,
 +                             float mid, BBC *prim_bbc, int index_of_2nd_partition)
 +{
 +      int i;
 +      for(i = lo; i <= hi; ++i) {
 +              const float c = prim_bbc[face_indices[i]].bcentroid[axis];
 +
 +              if((i < index_of_2nd_partition && c > mid) ||
 +                 (i > index_of_2nd_partition && c < mid)) {
 +                      printf("fail\n");
 +              }
 +      }
 +}
 +
 +static void grow_nodes(PBVH *bvh, int totnode)
 +{
 +      if(totnode > bvh->node_mem_count) {
 +              PBVHNode *prev = bvh->nodes;
 +              bvh->node_mem_count *= 1.33;
 +              if(bvh->node_mem_count < totnode)
 +                      bvh->node_mem_count = totnode;
 +              bvh->nodes = MEM_callocN(sizeof(PBVHNode) * bvh->node_mem_count,
 +                                       "bvh nodes");
 +              memcpy(bvh->nodes, prev, bvh->totnode * sizeof(PBVHNode));
 +              MEM_freeN(prev);
 +      }
 +
 +      bvh->totnode = totnode;
 +}
 +
 +/* Add a vertex to the map, with a positive value for unique vertices and
 +   a negative value for additional vertices */
 +static int map_insert_vert(PBVH *bvh, GHash *map,
 +                          unsigned short *face_verts,
 +                          unsigned short *uniq_verts, int vertex)
 +{
 +      void *value, *key = SET_INT_IN_POINTER(vertex);
 +
 +      if(!BLI_ghash_haskey(map, key)) {
 +              if(BLI_bitmap_get(bvh->vert_bitmap, vertex)) {
 +                      value = SET_INT_IN_POINTER(-(*face_verts) - 1);
 +                      ++(*face_verts);
 +              }
 +              else {
 +                      BLI_bitmap_set(bvh->vert_bitmap, vertex);
 +                      value = SET_INT_IN_POINTER(*uniq_verts);
 +                      ++(*uniq_verts);
 +              }
 +              
 +              BLI_ghash_insert(map, key, value);
 +              return GET_INT_FROM_POINTER(value);
 +      }
 +      else
 +              return GET_INT_FROM_POINTER(BLI_ghash_lookup(map, key));
 +}
 +
 +/* Find vertices used by the faces in this node and update the draw buffers */
 +static void build_leaf_node(PBVH *bvh, PBVHNode *node)
 +{
 +      GHashIterator *iter;
 +      GHash *map;
 +      int i, j, totface;
 +
 +      map = BLI_ghash_new(BLI_ghashutil_inthash, BLI_ghashutil_intcmp);
 +      
 +      node->uniq_verts = node->face_verts = 0;
 +      totface= node->totface;
 +
 +      node->face_vert_indices = MEM_callocN(sizeof(int) *
 +                                       4*totface, "bvh node face vert indices");
 +
 +      for(i = 0; i < totface; ++i) {
 +              MFace *f = bvh->faces + node->face_indices[i];
 +              int sides = f->v4 ? 4 : 3;
 +
 +              for(j = 0; j < sides; ++j) {
 +                      node->face_vert_indices[i*4 + j]= 
 +                              map_insert_vert(bvh, map, &node->face_verts,
 +                                              &node->uniq_verts, (&f->v1)[j]);
 +              }
 +      }
 +
 +      node->vert_indices = MEM_callocN(sizeof(int) *
 +                                       (node->uniq_verts + node->face_verts),
 +                                       "bvh node vert indices");
 +
 +      /* Build the vertex list, unique verts first */
 +      for(iter = BLI_ghashIterator_new(map), i = 0;
 +          !BLI_ghashIterator_isDone(iter);
 +          BLI_ghashIterator_step(iter), ++i) {
 +              void *value = BLI_ghashIterator_getValue(iter);
 +              int ndx = GET_INT_FROM_POINTER(value);
 +
 +              if(ndx < 0)
 +                      ndx = -ndx + node->uniq_verts - 1;
 +
 +              node->vert_indices[ndx] =
 +                      GET_INT_FROM_POINTER(BLI_ghashIterator_getKey(iter));
 +      }
 +
 +      for(i = 0; i < totface*4; ++i)
 +              if(node->face_vert_indices[i] < 0)
 +                      node->face_vert_indices[i]= -node->face_vert_indices[i] + node->uniq_verts - 1;
 +
 +      node->draw_buffers =
 +              GPU_build_buffers(map, bvh->verts, bvh->faces,
 +                                node->face_indices,
 +                                node->totface, node->vert_indices,
 +                                node->uniq_verts,
 +                                node->uniq_verts + node->face_verts);
 +
 +      BLI_ghash_free(map, NULL, NULL);
 +}
 +
 +/* Recursively build a node in the tree
 +
 +   vb is the voxel box around all of the primitives contained in
 +   this node.
 +
 +   cb is the bounding box around all the centroids of the primitives
 +   contained in this node
 +
 +   offset and start indicate a range in the array of primitive indices
 +*/
 +
 +void build_sub(PBVH *bvh, int node_index, BB *cb, BBC *prim_bbc,
 +             int offset, int count)
 +{
 +      int i, axis, end;
 +      BB cb_backing;
 +
 +      /* Decide whether this is a leaf or not */
 +      if(count <= LEAF_LIMIT) {
 +              bvh->nodes[node_index].flag |= PBVH_Leaf;
 +
 +              bvh->nodes[node_index].face_indices = bvh->face_indices + offset;
 +              bvh->nodes[node_index].totface = count;
 +
 +              /* Still need vb for searches */
 +              BB_reset(&bvh->nodes[node_index].vb);
 +              for(i = offset + count - 1; i >= offset; --i) {
 +                      BB_expand_with_bb(&bvh->nodes[node_index].vb,
 +                                        (BB*)(prim_bbc +
 +                                              bvh->face_indices[i]));
 +              }
 +              
 +              build_leaf_node(bvh, bvh->nodes + node_index);
 +              bvh->nodes[node_index].orig_vb= bvh->nodes[node_index].vb;
 +
 +              /* Done with this subtree */
 +              return;
 +      }
 +      else {
 +              BB_reset(&bvh->nodes[node_index].vb);
 +              bvh->nodes[node_index].children_offset = bvh->totnode;
 +              grow_nodes(bvh, bvh->totnode + 2);
 +
 +              if(!cb) {
 +                      cb = &cb_backing;
 +                      BB_reset(cb);
 +                      for(i = offset + count - 1; i >= offset; --i)
 +                              BB_expand(cb, prim_bbc[bvh->face_indices[i]].bcentroid);
 +              }
 +      }
 +
 +      axis = BB_widest_axis(cb);
 +
 +      for(i = offset + count - 1; i >= offset; --i) {
 +              BB_expand_with_bb(&bvh->nodes[node_index].vb,
 +                                (BB*)(prim_bbc + bvh->face_indices[i]));
 +      }
 +
 +      bvh->nodes[node_index].orig_vb= bvh->nodes[node_index].vb;
 +
 +      end = partition_indices(bvh->face_indices, offset, offset + count - 1,
 +                              axis,
 +                              (cb->bmax[axis] + cb->bmin[axis]) * 0.5f,
 +                              prim_bbc);
 +      check_partitioning(bvh->face_indices, offset, offset + count - 1,
 +                         axis,
 +                         (cb->bmax[axis] + cb->bmin[axis]) * 0.5f,
 +                         prim_bbc, end);
 +
 +      build_sub(bvh, bvh->nodes[node_index].children_offset, NULL,
 +                prim_bbc, offset, end - offset);
 +      build_sub(bvh, bvh->nodes[node_index].children_offset + 1, NULL,
 +                prim_bbc, end, offset + count - end);
 +}
 +
 +/* Do a full rebuild */
 +void BLI_pbvh_build(PBVH *bvh, MFace *faces, MVert *verts, int totface, int totvert)
 +{
 +      BBC *prim_bbc = NULL;
 +      BB cb;
 +      int i, j;
 +
 +      if(totface != bvh->totface) {
 +              bvh->totface = totface;
 +              if(bvh->nodes) MEM_freeN(bvh->nodes);
 +              if(bvh->face_indices) MEM_freeN(bvh->face_indices);
 +              bvh->face_indices = MEM_callocN(sizeof(int) * totface,
 +                                              "bvh face indices");
 +              for(i = 0; i < totface; ++i)
 +                      bvh->face_indices[i] = i;
 +              bvh->totnode = 0;
 +              if(bvh->node_mem_count < 100) {
 +                      bvh->node_mem_count = 100;
 +                      bvh->nodes = MEM_callocN(sizeof(PBVHNode) *
 +                                               bvh->node_mem_count,
 +                                               "bvh initial nodes");
 +              }
 +      }
 +
 +      bvh->faces = faces;
 +      bvh->verts = verts;
 +      bvh->vert_bitmap = BLI_bitmap_new(totvert);
 +      bvh->totvert= totvert;
 +
 +      BB_reset(&cb);
 +
 +      /* For each face, store the AABB and the AABB centroid */
 +      prim_bbc = MEM_mallocN(sizeof(BBC) * totface, "prim_bbc");
 +
 +      for(i = 0; i < totface; ++i) {
 +              MFace *f = faces + i;
 +              const int sides = f->v4 ? 4 : 3;
 +              BBC *bbc = prim_bbc + i;
 +
 +              BB_reset((BB*)bbc);
 +
 +              for(j = 0; j < sides; ++j)
 +                      BB_expand((BB*)bbc, verts[(&f->v1)[j]].co);
 +
 +              BBC_update_centroid(bbc);
 +
 +              BB_expand(&cb, bbc->bcentroid);
 +      }
 +
 +      bvh->totnode = 1;
 +      build_sub(bvh, 0, &cb, prim_bbc, 0, totface);
 +
 +      MEM_freeN(prim_bbc);
 +      MEM_freeN(bvh->vert_bitmap);
 +}
 +
 +PBVH *BLI_pbvh_new(void)
 +{
 +      PBVH *bvh = MEM_callocN(sizeof(PBVH), "pbvh");
 +
 +      return bvh;
 +}
 +
 +void BLI_pbvh_free(PBVH *bvh)
 +{
 +      int i;
 +
 +      for(i = 0; i < bvh->totnode; ++i) {
 +              if(bvh->nodes[i].flag & PBVH_Leaf) {
 +                      GPU_free_buffers(bvh->nodes[i].draw_buffers);
 +                      MEM_freeN(bvh->nodes[i].vert_indices);
 +                      MEM_freeN(bvh->nodes[i].face_vert_indices);
 +              }
 +      }
 +
 +      MEM_freeN(bvh->nodes);
 +      MEM_freeN(bvh->face_indices);
 +      MEM_freeN(bvh);
 +}
 +
 +void BLI_pbvh_set_source(PBVH *bvh, MVert *mvert, MFace *mface)
 +{
 +      bvh->verts = mvert;
 +      bvh->faces = mface;
 +}
 +
 +static void do_hit_callback(PBVH *bvh, PBVHNode *node,
 +                          BLI_pbvh_HitCallback cb, void *data)
 +{
 +      if(cb)
 +              cb(node, data);
 +}
 +
 +static void pbvh_iter_begin(PBVHIter *iter, PBVH *bvh, BLI_pbvh_SearchCallback scb, void *search_data)
 +{
 +      iter->bvh= bvh;
 +      iter->scb= scb;
 +      iter->search_data= search_data;
 +
 +      iter->stack= iter->stackfixed;
 +      iter->stackspace= STACK_FIXED_DEPTH;
 +
 +      iter->stack[0].node= bvh->nodes;
 +      iter->stack[0].revisiting= 0;
 +      iter->stacksize= 1;
 +}
 +
 +static void pbvh_iter_end(PBVHIter *iter)
 +{
 +      if(iter->stackspace > STACK_FIXED_DEPTH)
 +              MEM_freeN(iter->stack);
 +}
 +
 +static void pbvh_stack_push(PBVHIter *iter, PBVHNode *node, int revisiting)
 +{
 +      if(iter->stacksize == iter->stackspace) {
 +              PBVHStack *newstack;
 +
 +              iter->stackspace *= 2;
 +              newstack= MEM_callocN(sizeof(PBVHStack)*iter->stackspace, "PBVHStack");
 +              memcpy(newstack, iter->stack, sizeof(PBVHStack)*iter->stacksize);
 +
 +              if(iter->stackspace > STACK_FIXED_DEPTH)
 +                      MEM_freeN(iter->stack);
 +              iter->stack= newstack;
 +      }
 +
 +      iter->stack[iter->stacksize].node= node;
 +      iter->stack[iter->stacksize].revisiting= revisiting;
 +      iter->stacksize++;
 +}
 +
 +static PBVHNode *pbvh_iter_next(PBVHIter *iter)
 +{
 +      PBVHNode *node;
 +      int revisiting;
 +      void *search_data;
 +
 +      /* purpose here is to traverse tree, visiting child nodes before their
 +         parents, this order is necessary for e.g. computing bounding boxes */
 +
 +      while(iter->stacksize) {
 +              /* pop node */
 +              iter->stacksize--;
 +              node= iter->stack[iter->stacksize].node;
 +              revisiting= iter->stack[iter->stacksize].revisiting;
 +
 +              /* revisiting node already checked */
 +              if(revisiting)
 +                      return node;
 +
 +              /* check search callback */
 +              search_data= iter->search_data;
 +
 +              if(iter->scb && !iter->scb(node, search_data))
 +                      continue; /* don't traverse, outside of search zone */
 +
 +              if(node->flag & PBVH_Leaf) {
 +                      /* immediately hit leaf node */
 +                      return node;
 +              }
 +              else {
 +                      /* come back later when children are done */
 +                      pbvh_stack_push(iter, node, 1);
 +
 +                      /* push two child nodes on the stack */
 +                      pbvh_stack_push(iter, iter->bvh->nodes+node->children_offset+1, 0);
 +                      pbvh_stack_push(iter, iter->bvh->nodes+node->children_offset, 0);
 +              }
 +      }
 +
 +      return NULL;
 +}
 +
 +void BLI_pbvh_search_gather(PBVH *bvh,
 +      BLI_pbvh_SearchCallback scb, void *search_data,
 +      PBVHNode ***r_array, int *r_tot)
 +{
 +      PBVHIter iter;
 +      PBVHNode **array= NULL, **newarray, *node;
 +      int tot= 0, space= 0;
 +
 +      pbvh_iter_begin(&iter, bvh, scb, search_data);
 +
 +      while((node=pbvh_iter_next(&iter))) {
 +              if(node->flag & PBVH_Leaf) {
 +                      if(tot == space) {
 +                              /* resize array if needed */
 +                              space= (tot == 0)? 32: space*2;
 +                              newarray= MEM_callocN(sizeof(PBVHNode)*space, "PBVHNodeSearch");
 +
 +                              if(array) {
 +                                      memcpy(newarray, array, sizeof(PBVHNode)*tot);
 +                                      MEM_freeN(array);
 +                              }
 +
 +                              array= newarray;
 +                      }
 +
 +                      array[tot]= node;
 +                      tot++;
 +              }
 +      }
 +
 +      pbvh_iter_end(&iter);
 +
 +      *r_array= array;
 +      *r_tot= tot;
 +}
 +
 +void BLI_pbvh_search_callback(PBVH *bvh,
 +      BLI_pbvh_SearchCallback scb, void *search_data,
 +      BLI_pbvh_HitCallback hcb, void *hit_data)
 +{
 +      PBVHIter iter;
 +      PBVHNode *node;
 +
 +      pbvh_iter_begin(&iter, bvh, scb, search_data);
 +
 +      while((node=pbvh_iter_next(&iter)))
 +              if(node->flag & PBVH_Leaf)
 +                      do_hit_callback(bvh, node, hcb, hit_data);
 +
 +      pbvh_iter_end(&iter);
 +}
 +
 +static int update_search_cb(PBVHNode *node, void *data_v)
 +{
 +      int flag= GET_INT_FROM_POINTER(data_v);
 +
 +      if(node->flag & PBVH_Leaf)
 +              return (node->flag & flag);
 +      
 +      return 1;
 +}
 +
 +static void pbvh_update_normals(PBVH *bvh, PBVHNode **nodes,
 +      int totnode, float (*face_nors)[3])
 +{
 +      float (*vnor)[3];
 +      int n;
 +
 +      /* could be per node to save some memory, but also means
 +         we have to store for each vertex which node it is in */
 +      vnor= MEM_callocN(sizeof(float)*3*bvh->totvert, "bvh temp vnors");
 +
 +      /* subtle assumptions:
 +         - We know that for all edited vertices, the nodes with faces
 +           adjacent to these vertices have been marked with PBVH_UpdateNormals.
 +               This is true because if the vertex is inside the brush radius, the
 +               bounding box of it's adjacent faces will be as well.
 +         - However this is only true for the vertices that have actually been
 +           edited, not for all vertices in the nodes marked for update, so we
 +               can only update vertices marked with ME_VERT_PBVH_UPDATE.
 +      */
 +
 +      #pragma omp parallel for private(n) schedule(static)
 +      for(n = 0; n < totnode; n++) {
 +              PBVHNode *node= nodes[n];
 +
 +              if((node->flag & PBVH_UpdateNormals)) {
 +                      int i, j, totface, *faces;
 +
 +                      faces= node->face_indices;
 +                      totface= node->totface;
 +
 +                      for(i = 0; i < totface; ++i) {
 +                              MFace *f= bvh->faces + faces[i];
 +                              float fn[3];
 +                              unsigned int *fv = &f->v1;
 +                              int sides= (f->v4)? 4: 3;
 +
 +                              if(f->v4)
-                                       CalcNormFloat(bvh->verts[f->v1].co, bvh->verts[f->v2].co,
-                                                                 bvh->verts[f->v3].co, fn);
++                                      normal_quad_v3(fn, bvh->verts[f->v1].co, bvh->verts[f->v2].co,
++                                                                 bvh->verts[f->v3].co, bvh->verts[f->v4].co);
 +                              else
-                                       VECCOPY(face_nors[faces[i]], fn);
++                                      normal_tri_v3(fn, bvh->verts[f->v1].co, bvh->verts[f->v2].co,
++                                                                bvh->verts[f->v3].co);
 +
 +                              for(j = 0; j < sides; ++j) {
 +                                      int v= fv[j];
 +
 +                                      if(bvh->verts[v].flag & ME_VERT_PBVH_UPDATE) {
 +                                              /* this seems like it could be very slow but profile
 +                                                 does not show this, so just leave it for now? */
 +                                              #pragma omp atomic
 +                                              vnor[v][0] += fn[0];
 +                                              #pragma omp atomic
 +                                              vnor[v][1] += fn[1];
 +                                              #pragma omp atomic
 +                                              vnor[v][2] += fn[2];
 +                                      }
 +                              }
 +
 +                              if(face_nors)
-                                       VECCOPY(no, vnor[v]);
-                                       Normalize(no);
++                                      copy_v3_v3(face_nors[faces[i]], fn);
 +                      }
 +              }
 +      }
 +
 +      #pragma omp parallel for private(n) schedule(static)
 +      for(n = 0; n < totnode; n++) {
 +              PBVHNode *node= nodes[n];
 +
 +              if(node->flag & PBVH_UpdateNormals) {
 +                      int i, *verts, totvert;
 +
 +                      verts= node->vert_indices;
 +                      totvert= node->uniq_verts;
 +
 +                      for(i = 0; i < totvert; ++i) {
 +                              const int v = verts[i];
 +                              MVert *mvert= &bvh->verts[v];
 +
 +                              if(mvert->flag & ME_VERT_PBVH_UPDATE) {
 +                                      float no[3];
 +
-       VecCopyf(bb_min, bb.bmin);
-       VecCopyf(bb_max, bb.bmax);
++                                      copy_v3_v3(no, vnor[v]);
++                                      normalize_v3(no);
 +                                      
 +                                      mvert->no[0] = (short)(no[0]*32767.0f);
 +                                      mvert->no[1] = (short)(no[1]*32767.0f);
 +                                      mvert->no[2] = (short)(no[2]*32767.0f);
 +                                      
 +                                      mvert->flag &= ~ME_VERT_PBVH_UPDATE;
 +                              }
 +                      }
 +
 +                      node->flag &= ~PBVH_UpdateNormals;
 +              }
 +      }
 +
 +      MEM_freeN(vnor);
 +}
 +
 +static void pbvh_update_BB_redraw(PBVH *bvh, PBVHNode **nodes,
 +      int totnode, int flag)
 +{
 +      int n;
 +
 +      /* update BB, redraw flag */
 +      #pragma omp parallel for private(n) schedule(static)
 +      for(n = 0; n < totnode; n++) {
 +              PBVHNode *node= nodes[n];
 +
 +              if((flag & PBVH_UpdateBB) && (node->flag & PBVH_UpdateBB))
 +                      /* don't clear flag yet, leave it for flushing later */
 +                      update_node_vb(bvh, node);
 +
 +              if((flag & PBVH_UpdateOriginalBB) && (node->flag & PBVH_UpdateOriginalBB))
 +                      node->orig_vb= node->vb;
 +
 +              if((flag & PBVH_UpdateRedraw) && (node->flag & PBVH_UpdateRedraw))
 +                      node->flag &= ~PBVH_UpdateRedraw;
 +      }
 +}
 +
 +static void pbvh_update_draw_buffers(PBVH *bvh, PBVHNode **nodes, int totnode)
 +{
 +      PBVHNode *node;
 +      int n;
 +
 +      /* can't be done in parallel with OpenGL */
 +      for(n = 0; n < totnode; n++) {
 +              node= nodes[n];
 +
 +              if(node->flag & PBVH_UpdateDrawBuffers) {
 +                      GPU_update_buffers(node->draw_buffers,
 +                                         bvh->verts,
 +                                         node->vert_indices,
 +                                         node->uniq_verts +
 +                                         node->face_verts);
 +
 +                      node->flag &= ~PBVH_UpdateDrawBuffers;
 +              }
 +      }
 +}
 +
 +static int pbvh_flush_bb(PBVH *bvh, PBVHNode *node, int flag)
 +{
 +      int update= 0;
 +
 +      /* difficult to multithread well, we just do single threaded recursive */
 +      if(node->flag & PBVH_Leaf) {
 +              if(flag & PBVH_UpdateBB) {
 +                      update |= (node->flag & PBVH_UpdateBB);
 +                      node->flag &= ~PBVH_UpdateBB;
 +              }
 +
 +              if(flag & PBVH_UpdateOriginalBB) {
 +                      update |= (node->flag & PBVH_UpdateOriginalBB);
 +                      node->flag &= ~PBVH_UpdateOriginalBB;
 +              }
 +
 +              return update;
 +      }
 +      else {
 +              update |= pbvh_flush_bb(bvh, bvh->nodes + node->children_offset, flag);
 +              update |= pbvh_flush_bb(bvh, bvh->nodes + node->children_offset + 1, flag);
 +
 +              if(update & PBVH_UpdateBB)
 +                      update_node_vb(bvh, node);
 +              if(update & PBVH_UpdateOriginalBB)
 +                      node->orig_vb= node->vb;
 +      }
 +
 +      return update;
 +}
 +
 +void BLI_pbvh_update(PBVH *bvh, int flag, float (*face_nors)[3])
 +{
 +      PBVHNode **nodes;
 +      int totnode;
 +
 +      BLI_pbvh_search_gather(bvh, update_search_cb, SET_INT_IN_POINTER(flag),
 +              &nodes, &totnode);
 +
 +      if(flag & PBVH_UpdateNormals)
 +              pbvh_update_normals(bvh, nodes, totnode, face_nors);
 +
 +      if(flag & (PBVH_UpdateBB|PBVH_UpdateOriginalBB|PBVH_UpdateRedraw))
 +              pbvh_update_BB_redraw(bvh, nodes, totnode, flag);
 +
 +      if(flag & PBVH_UpdateDrawBuffers)
 +              pbvh_update_draw_buffers(bvh, nodes, totnode);
 +
 +      if(flag & (PBVH_UpdateBB|PBVH_UpdateOriginalBB))
 +              pbvh_flush_bb(bvh, bvh->nodes, flag);
 +
 +      if(nodes) MEM_freeN(nodes);
 +}
 +
 +void BLI_pbvh_redraw_BB(PBVH *bvh, float bb_min[3], float bb_max[3])
 +{
 +      PBVHIter iter;
 +      PBVHNode *node;
 +      BB bb;
 +
 +      BB_reset(&bb);
 +
 +      pbvh_iter_begin(&iter, bvh, NULL, NULL);
 +
 +      while((node=pbvh_iter_next(&iter)))
 +              if(node->flag & PBVH_UpdateRedraw)
 +                      BB_expand_with_bb(&bb, &node->vb);
 +
 +      pbvh_iter_end(&iter);
 +
-       VecCopyf(bb_min, node->vb.bmin);
-       VecCopyf(bb_max, node->vb.bmax);
++      copy_v3_v3(bb_min, bb.bmin);
++      copy_v3_v3(bb_max, bb.bmax);
 +}
 +
 +/***************************** Node Access ***********************************/
 +
 +void BLI_pbvh_node_mark_update(PBVHNode *node)
 +{
 +      node->flag |= PBVH_UpdateNormals|PBVH_UpdateBB|PBVH_UpdateOriginalBB|PBVH_UpdateDrawBuffers|PBVH_UpdateRedraw;
 +}
 +
 +void BLI_pbvh_node_get_verts(PBVHNode *node, int **vert_indices, int *totvert, int *allvert)
 +{
 +      if(vert_indices) *vert_indices= node->vert_indices;
 +      if(totvert) *totvert= node->uniq_verts;
 +      if(allvert) *allvert= node->uniq_verts + node->face_verts;
 +}
 +
 +void BLI_pbvh_node_get_faces(PBVHNode *node, int **face_indices, int **face_vert_indices, int *totface)
 +{
 +      if(face_indices) *face_indices= node->face_indices;
 +      if(face_vert_indices) *face_vert_indices= node->face_vert_indices;
 +      if(totface) *totface= node->totface;
 +}
 +
 +void *BLI_pbvh_node_get_draw_buffers(PBVHNode *node)
 +{
 +      return node->draw_buffers;
 +}
 +
 +void BLI_pbvh_node_get_BB(PBVHNode *node, float bb_min[3], float bb_max[3])
 +{
-       VecCopyf(bb_min, node->orig_vb.bmin);
-       VecCopyf(bb_max, node->orig_vb.bmax);
++      copy_v3_v3(bb_min, node->vb.bmin);
++      copy_v3_v3(bb_max, node->vb.bmax);
 +}
 +
 +void BLI_pbvh_node_get_original_BB(PBVHNode *node, float bb_min[3], float bb_max[3])
 +{
-       VecCopyf(bbox[0], bb_min);
-       VecCopyf(bbox[1], bb_max);
++      copy_v3_v3(bb_min, node->orig_vb.bmin);
++      copy_v3_v3(bb_max, node->orig_vb.bmax);
 +}
 +
 +/********************************* Raycast ***********************************/
 +
 +typedef struct {
 +      /* Ray */
 +      float start[3];
 +      int sign[3];
 +      float inv_dir[3];
 +      int original;
 +} RaycastData;
 +
 +/* Adapted from here: http://www.gamedev.net/community/forums/topic.asp?topic_id=459973 */
 +static int ray_aabb_intersect(PBVHNode *node, void *data_v)
 +{
 +      RaycastData *ray = data_v;
 +      float bb_min[3], bb_max[3], bbox[2][3];
 +      float tmin, tmax, tymin, tymax, tzmin, tzmax;
 +
 +      if(ray->original)
 +              BLI_pbvh_node_get_original_BB(node, bb_min, bb_max);
 +      else
 +              BLI_pbvh_node_get_BB(node, bb_min, bb_max);
 +
-       VecCopyf(rcd.start, ray_start);
++      copy_v3_v3(bbox[0], bb_min);
++      copy_v3_v3(bbox[1], bb_max);
 +
 +      tmin = (bbox[ray->sign[0]][0] - ray->start[0]) * ray->inv_dir[0];
 +      tmax = (bbox[1-ray->sign[0]][0] - ray->start[0]) * ray->inv_dir[0];
 +
 +      tymin = (bbox[ray->sign[1]][1] - ray->start[1]) * ray->inv_dir[1];
 +      tymax = (bbox[1-ray->sign[1]][1] - ray->start[1]) * ray->inv_dir[1];
 +
 +      if((tmin > tymax) || (tymin > tmax))
 +              return 0;
 +      if(tymin > tmin)
 +              tmin = tymin;
 +      if(tymax < tmax)
 +              tmax = tymax;
 +
 +      tzmin = (bbox[ray->sign[2]][2] - ray->start[2]) * ray->inv_dir[2];
 +      tzmax = (bbox[1-ray->sign[2]][2] - ray->start[2]) * ray->inv_dir[2];
 +
 +      if((tmin > tzmax) || (tzmin > tmax))
 +              return 0;
 +
 +      return 1;
 +
 +      /* XXX: Not sure about this? 
 +         if(tzmin > tmin)
 +         tmin = tzmin;
 +         if(tzmax < tmax)
 +         tmax = tzmax;
 +         return ((tmin < t1) && (tmax > t0));
 +      */
 +
 +}
 +
 +void BLI_pbvh_raycast(PBVH *bvh, BLI_pbvh_HitCallback cb, void *data,
 +                    float ray_start[3], float ray_normal[3], int original)
 +{
 +      RaycastData rcd;
 +
++      copy_v3_v3(rcd.start, ray_start);
 +      rcd.inv_dir[0] = 1.0f / ray_normal[0];
 +      rcd.inv_dir[1] = 1.0f / ray_normal[1];
 +      rcd.inv_dir[2] = 1.0f / ray_normal[2];
 +      rcd.sign[0] = rcd.inv_dir[0] < 0;
 +      rcd.sign[1] = rcd.inv_dir[1] < 0;
 +      rcd.sign[2] = rcd.inv_dir[2] < 0;
 +      rcd.original = original;
 +
 +      BLI_pbvh_search_callback(bvh, ray_aabb_intersect, &rcd, cb, data);
 +}
 +
  
  #include "MEM_guardedalloc.h"
  
- #include "BLI_arithb.h"
+ #include "BLI_math.h"
  #include "BLI_blenlib.h"
  #include "BLI_dynstr.h"
 +#include "BLI_ghash.h"
 +#include "BLI_pbvh.h"
 +#include "BLI_threads.h"
  
  #include "DNA_armature_types.h"
  #include "DNA_brush_types.h"
@@@ -184,340 -197,470 +184,340 @@@ static void projectf(bglMats *mats, con
        p[0]= f[0];
        p[1]= f[1];
  }
 +*/
  
 -/* ===== Sculpting =====
 - *
 - */
 +/*** BVH Tree ***/
  
 -/* Return modified brush strength. Includes the direction of the brush, positive
 -   values pull vertices, negative values push. Uses tablet pressure and a
 -   special multiplier found experimentally to scale the strength factor. */
 -static float brush_strength(Sculpt *sd, StrokeCache *cache)
 +/* Get a screen-space rectangle of the modified area */
 +int sculpt_get_redraw_rect(ARegion *ar, RegionView3D *rv3d,
 +                          Object *ob, rcti *rect)
  {
 -      Brush *brush = paint_brush(&sd->paint);
 -      /* Primary strength input; square it to make lower values more sensitive */
 -      float alpha = brush->alpha * brush->alpha;
 +      float bb_min[3], bb_max[3], pmat[4][4];
 +      int i, j, k;
  
 -      float dir= brush->flag & BRUSH_DIR_IN ? -1 : 1;
 -      float pressure= 1;
 -      float flip= cache->flip ? -1:1;
 +      view3d_get_object_project_mat(rv3d, ob, pmat);
  
 -      if(brush->flag & BRUSH_ALPHA_PRESSURE)
 -              pressure *= cache->pressure;
 -      
 -      switch(brush->sculpt_tool){
 -      case SCULPT_TOOL_DRAW:
 -      case SCULPT_TOOL_INFLATE:
 -      case SCULPT_TOOL_CLAY:
 -      case SCULPT_TOOL_FLATTEN:
 -      case SCULPT_TOOL_LAYER:
 -              return alpha * dir * pressure * flip; /*XXX: not sure why? was multiplied by G.vd->grid */;
 -      case SCULPT_TOOL_SMOOTH:
 -              return alpha * 4 * pressure;
 -      case SCULPT_TOOL_PINCH:
 -              return alpha / 2 * dir * pressure * flip;
 -      case SCULPT_TOOL_GRAB:
 -              return 1;
 -      default:
 +      BLI_pbvh_redraw_BB(ob->sculpt->tree, bb_min, bb_max);
 +
 +      rect->xmin = rect->ymin = INT_MAX;
 +      rect->xmax = rect->ymax = INT_MIN;
 +
 +      if(bb_min[0] > bb_max[0] || bb_min[1] > bb_max[1] || bb_min[2] > bb_max[2])
                return 0;
 +
 +      for(i = 0; i < 2; ++i) {
 +              for(j = 0; j < 2; ++j) {
 +                      for(k = 0; k < 2; ++k) {
 +                              float vec[3], proj[2];
 +                              vec[0] = i ? bb_min[0] : bb_max[0];
 +                              vec[1] = j ? bb_min[1] : bb_max[1];
 +                              vec[2] = k ? bb_min[2] : bb_max[2];
 +                              view3d_project_float(ar, vec, proj, pmat);
 +                              rect->xmin = MIN2(rect->xmin, proj[0]);
 +                              rect->xmax = MAX2(rect->xmax, proj[0]);
 +                              rect->ymin = MIN2(rect->ymin, proj[1]);
 +                              rect->ymax = MAX2(rect->ymax, proj[1]);
 +                      }
 +              }
        }
 +      
 +      return rect->xmin < rect->xmax && rect->ymin < rect->ymax;
  }
  
 -/* Handles clipping against a mirror modifier and SCULPT_LOCK axis flags */
 -static void sculpt_clip(Sculpt *sd, SculptSession *ss, float *co, const float val[3])
 +void sculpt_get_redraw_planes(float planes[4][4], ARegion *ar,
 +                            RegionView3D *rv3d, Object *ob)
  {
 +      BoundBox *bb = MEM_callocN(sizeof(BoundBox), "sculpt boundbox");
 +      bglMats mats;
        int i;
 +      rcti rect;
 +
 +      view3d_get_transformation(ar, rv3d, ob, &mats);
 +      sculpt_get_redraw_rect(ar, rv3d,ob, &rect);
 +
 +#if 1
 +      /* use some extra space just in case */
 +      rect.xmin -= 2;
 +      rect.xmax += 2;
 +      rect.ymin -= 2;
 +      rect.ymax += 2;
 +#else
 +      /* it was doing this before, allows to redraw a smaller
 +         part of the screen but also gives artifaces .. */
 +      rect.xmin += 2;
 +      rect.xmax -= 2;
 +      rect.ymin += 2;
 +      rect.ymax -= 2;
 +#endif
  
 -      for(i=0; i<3; ++i) {
 -              if(sd->flags & (SCULPT_LOCK_X << i))
 -                      continue;
 +      view3d_calculate_clipping(bb, planes, &mats, &rect);
  
 -              if((ss->cache->flag & (CLIP_X << i)) && (fabs(co[i]) <= ss->cache->clip_tolerance[i]))
 -                      co[i]= 0.0f;
 -              else
 -                      co[i]= val[i];
 -      }               
 -}
 +      for(i = 0; i < 16; ++i)
 +              ((float*)planes)[i] = -((float*)planes)[i];
  
 -static void add_norm_if(float view_vec[3], float out[3], float out_flip[3], const short no[3])
 -{
 -      float fno[3] = {no[0], no[1], no[2]};
 -
 -      normalize_v3(fno);
 +      MEM_freeN(bb);
  
 -      if((dot_v3v3(view_vec, fno)) > 0) {
 -              add_v3_v3v3(out, out, fno);
 -      } else {
 -              add_v3_v3v3(out_flip, out_flip, fno); /* out_flip is used when out is {0,0,0} */
 -      }
 +      /* clear redraw flag from nodes */
 +      BLI_pbvh_update(ob->sculpt->tree, PBVH_UpdateRedraw, NULL);
  }
  
 -/* Currently only for the draw brush; finds average normal for all active
 -   vertices */
 -static void calc_area_normal(Sculpt *sd, SculptSession *ss, float out[3], const ListBase* active_verts)
 -{
 -      Brush *brush = paint_brush(&sd->paint);
 -      StrokeCache *cache = ss->cache;
 -      ActiveData *node = active_verts->first;
 -      const int view = 0; /* XXX: should probably be a flag, not number: brush_type==SCULPT_TOOL_DRAW ? sculptmode_brush()->view : 0; */
 -      float out_flip[3];
 -      float *out_dir = cache->view_normal_symmetry;
 -      
 -      out[0]=out[1]=out[2] = out_flip[0]=out_flip[1]=out_flip[2] = 0;
 +/************************** Undo *************************/
  
 -      if(brush->flag & BRUSH_ANCHORED) {
 -              for(; node; node = node->next)
 -                      add_norm_if(out_dir, out, out_flip, cache->orig_norms[node->Index]);
 -      }
 -      else {
 -              for(; node; node = node->next)
 -                      add_norm_if(out_dir, out, out_flip, ss->mvert[node->Index].no);
 -      }
 +typedef struct SculptUndoNode {
 +      struct SculptUndoNode *next, *prev;
  
 -      if (out[0]==0.0 && out[1]==0.0 && out[2]==0.0) {
 -              VECCOPY(out, out_flip);
 -      }
 -      
 -      normalize_v3(out);
 +      char idname[MAX_ID_NAME];       /* name instead of pointer*/
 +      int maxvert;                            /* to verify if totvert it still the same */
 +      void *node;                                     /* only during push, not valid afterwards! */
  
 -      if(out_dir) {
 -              out[0] = out_dir[0] * view + out[0] * (10-view);
 -              out[1] = out_dir[1] * view + out[1] * (10-view);
 -              out[2] = out_dir[2] * view + out[2] * (10-view);
 -      }
 -      
 -      normalize_v3(out);
 -}
 +      float (*co)[3];
 +      short (*no)[3];
 +      int *index;
 +      int totvert;
 +} SculptUndoNode;
  
 -static void do_draw_brush(Sculpt *sd, SculptSession *ss, const ListBase* active_verts)
 +static void update_cb(PBVHNode *node, void *data)
  {
 -      float area_normal[3];
 -      ActiveData *node= active_verts->first;
 -      float* buffer;
 -
 -      calc_area_normal(sd, ss, area_normal, active_verts);
 -      
 -      buffer = ss->drawobject!=0?(float *)GPU_buffer_lock( ss->drawobject->vertices ):0;
 -
 -      while(node){
 -              float *co= ss->mvert[node->Index].co;
 +      BLI_pbvh_node_mark_update(node);
 +}
  
 -              const float val[3]= {co[0]+area_normal[0]*ss->cache->radius*node->Fade*ss->cache->scale[0],
 -                                   co[1]+area_normal[1]*ss->cache->radius*node->Fade*ss->cache->scale[1],
 -                                   co[2]+area_normal[2]*ss->cache->radius*node->Fade*ss->cache->scale[2]};
 +static void sculpt_undo_restore(bContext *C, ListBase *lb)
 +{
 +      Object *ob = CTX_data_active_object(C);
 +      SculptSession *ss = ob->sculpt;
 +      SculptUndoNode *unode;
 +      MVert *mvert;
 +      MultiresModifierData *mmd;
 +      int *index;
 +      int i, totvert, update= 0;
  
 -              if( buffer != 0 ) {
 -                      IndexLink *cur = &ss->drawobject->indices[node->Index];
 -                      while( cur != 0 && cur->element != -1 ) {
 -                              sculpt_clip(sd, ss, &buffer[cur->element*3], val);
 -                              cur = cur->next;
 -                      }
 -              }
 +      sculpt_update_mesh_elements(C, 0);
  
 -              sculpt_clip(sd, ss, co, val);
 +      for(unode=lb->first; unode; unode=unode->next) {
 +              if(!(strcmp(unode->idname, ob->id.name)==0))
 +                      continue;
 +              if(ss->totvert != unode->maxvert)
 +                      continue;
  
 -              node= node->next;
 -      }
 -      if( buffer != 0 )
 -              GPU_buffer_unlock( ss->drawobject->vertices );
 -}
 +              index= unode->index;
 +              totvert= unode->totvert;
 +              mvert= ss->mvert;
  
 -/* For the smooth brush, uses the neighboring vertices around vert to calculate
 -   a smoothed location for vert. Skips corner vertices (used by only one
 -   polygon.) */
 -static void neighbor_average(SculptSession *ss, float avg[3], const int vert)
 -{
 -      int i, skip= -1, total=0;
 -      IndexNode *node= ss->fmap[vert].first;
 -      char ncount= BLI_countlist(&ss->fmap[vert]);
 -      MFace *f;
 -
 -      avg[0] = avg[1] = avg[2] = 0;
 -              
 -      /* Don't modify corner vertices */
 -      if(ncount==1) {
 -              copy_v3_v3(avg, ss->mvert[vert].co);
 -              return;
 -      }
 +              for(i=0; i<totvert; i++) {
 +                      float tmp[3];
  
-                       VECCOPY(tmp, mvert[index[i]].co);
-                       VECCOPY(mvert[index[i]].co, unode->co[i])
-                       VECCOPY(unode->co[i], tmp);
 -      while(node){
 -              f= &ss->mface[node->index];
 -              
 -              if(f->v4) {
 -                      skip= (f->v1==vert?2:
 -                             f->v2==vert?3:
 -                             f->v3==vert?0:
 -                             f->v4==vert?1:-1);
 -              }
++                      copy_v3_v3(tmp, mvert[index[i]].co);
++                      copy_v3_v3(mvert[index[i]].co, unode->co[i]);
++                      copy_v3_v3(unode->co[i], tmp);
  
 -              for(i=0; i<(f->v4?4:3); ++i) {
 -                      if(i != skip && (ncount!=2 || BLI_countlist(&ss->fmap[(&f->v1)[i]]) <= 2)) {
 -                              add_v3_v3v3(avg, avg, ss->mvert[(&f->v1)[i]].co);
 -                              ++total;
 -                      }
 +                      mvert[index[i]].flag |= ME_VERT_PBVH_UPDATE;
                }
  
 -              node= node->next;
 +              update= 1;
        }
  
 -      if(total>0)
 -              mul_v3_fl(avg, 1.0f / total);
 -      else
 -              copy_v3_v3(avg, ss->mvert[vert].co);
 -}
 -
 -static void do_smooth_brush(Sculpt *s, SculptSession *ss, const ListBase* active_verts)
 -{
 -      ActiveData *node= active_verts->first;
 -      float *buffer = ss->drawobject!=0?(float *)GPU_buffer_lock( ss->drawobject->vertices ):0;
 -      int i;
 -      
 -      for(i = 0; i < 2; ++i) {
 -              while(node){
 -                      float *co= ss->mvert[node->Index].co;
 -                      float avg[3], val[3];
 -                      
 -                      neighbor_average(ss, avg, node->Index);
 -                      val[0] = co[0]+(avg[0]-co[0])*node->Fade;
 -                      val[1] = co[1]+(avg[1]-co[1])*node->Fade;
 -                      val[2] = co[2]+(avg[2]-co[2])*node->Fade;
 -                      
 -                      sculpt_clip(s, ss, co, val);                    
 -                      if( buffer != 0 ) {                             
 -                              IndexLink *cur = &ss->drawobject->indices[node->Index]; 
 -                              while( cur != 0 && cur->element != -1 ) {
 -                                      sculpt_clip(s, ss, &buffer[cur->element*3], val);
 -                                      cur = cur->next;
 -                              }
 -                      }
 -                      node= node->next;
 +      if(update) {
 +              /* we update all nodes still, should be more clever, but also
 +                 needs to work correct when exiting/entering sculpt mode and
 +                 the nodes get recreated, though in that case it could do all */
 +              BLI_pbvh_search_callback(ss->tree, NULL, NULL, update_cb, NULL);
 +              BLI_pbvh_update(ss->tree, PBVH_UpdateBB|PBVH_UpdateOriginalBB|PBVH_UpdateRedraw, NULL);
 +
 +              /* not really convinced this is correct .. */
 +              if((mmd=sculpt_multires_active(ob))) {
 +                      mmd->undo_verts = ss->mvert;
 +                      mmd->undo_verts_tot = ss->totvert;
 +                      mmd->undo_signal = !!mmd->undo_verts;
 +
 +                      multires_force_update(ob);
 +                      DAG_id_flush_update(&ob->id, OB_RECALC_DATA);
                }
        }
 -      if( buffer != 0 )
 -              GPU_buffer_unlock( ss->drawobject->vertices );
  }
  
 -static void do_pinch_brush(Sculpt *s, SculptSession *ss, const ListBase* active_verts)
 +static void sculpt_undo_free(ListBase *lb)
  {
 -      ActiveData *node= active_verts->first;
 -      float *buffer = ss->drawobject!=0?(float *)GPU_buffer_lock( ss->drawobject->vertices ):0;
 -
 -      while(node) {
 -              float *co= ss->mvert[node->Index].co;
 -              const float val[3]= {co[0]+(ss->cache->location[0]-co[0])*node->Fade,
 -                                   co[1]+(ss->cache->location[1]-co[1])*node->Fade,
 -                                   co[2]+(ss->cache->location[2]-co[2])*node->Fade};
 -
 -              if( buffer != 0 ) {
 -                      IndexLink *cur = &ss->drawobject->indices[node->Index];
 -                      while( cur != 0 && cur->element != -1 ) {
 -                              sculpt_clip(s, ss, &buffer[cur->element*3], val);
 -                              cur = cur->next;
 -                      }
 -              }
 -
 -              sculpt_clip(s, ss, co, val);
 -              node= node->next;
 +      SculptUndoNode *unode;
 +
 +      for(unode=lb->first; unode; unode=unode->next) {
 +              if(unode->co)
 +                      MEM_freeN(unode->co);
 +              if(unode->no)
 +                      MEM_freeN(unode->no);
 +              if(unode->index)
 +                      MEM_freeN(unode->index);
        }
 -      if( buffer != 0 )
 -              GPU_buffer_unlock( ss->drawobject->vertices );
  }
  
 -static void do_grab_brush(Sculpt *sd, SculptSession *ss)
 +static SculptUndoNode *sculpt_undo_get_node(SculptSession *ss, PBVHNode *node)
  {
 -      ActiveData *node= ss->cache->grab_active_verts[ss->cache->symmetry].first;
 -      float add[3];
 -      float grab_delta[3];
 -      float *buffer = ss->drawobject!=0?(float *)GPU_buffer_lock( ss->drawobject->vertices ):0;
 -      
 -      copy_v3_v3(grab_delta, ss->cache->grab_delta_symmetry);
 -      
 -      while(node) {
 -              float *co= ss->mvert[node->Index].co;
 -              
 -              copy_v3_v3(add, grab_delta);
 -              mul_v3_fl(add, node->Fade);
 -              add_v3_v3v3(add, add, co);
 -
 -              if( buffer != 0 ) {
 -                      IndexLink *cur = &ss->drawobject->indices[node->Index];
 -                      while( cur != 0 && cur->element != -1 ) {
 -                              sculpt_clip(sd, ss, &buffer[cur->element*3], add);
 -                              cur = cur->next;
 -                      }
 -              }
 +      ListBase *lb= undo_paint_push_get_list(UNDO_PAINT_MESH);
 +      SculptUndoNode *unode;
  
 -              sculpt_clip(sd, ss, co, add);
 +      if(!lb)
 +              return NULL;
  
 -              node= node->next;
 -      }
 -      if( buffer != 0 )
 -              GPU_buffer_unlock( ss->drawobject->vertices );
 -      
 +      for(unode=lb->first; unode; unode=unode->next)
 +              if(unode->node == node)
 +                      return unode;
 +
 +      return NULL;
  }
  
 -static void do_layer_brush(Sculpt *sd, SculptSession *ss, const ListBase *active_verts)
 +static SculptUndoNode *sculpt_undo_push_node(SculptSession *ss, PBVHNode *node)
  {
 -      float area_normal[3];
 -      ActiveData *node= active_verts->first;
 -      float *buffer;
 -      float lim= ss->cache->radius / 4;
 -
 -      if(ss->cache->flip)
 -              lim = -lim;
 +      ListBase *lb= undo_paint_push_get_list(UNDO_PAINT_MESH);
 +      Object *ob= ss->ob;
 +      SculptUndoNode *unode;
 +      int i, totvert, allvert, *verts;
  
 -      calc_area_normal(sd, ss, area_normal, active_verts);
 +      BLI_pbvh_node_get_verts(node, &verts, &totvert, &allvert);
  
 -      buffer = ss->drawobject!=0?(float *)GPU_buffer_lock( ss->drawobject->vertices ):0;
 -      while(node){
 -              float *disp= &ss->layer_disps[node->Index];
 -              float *co= ss->mvert[node->Index].co;
 -              float val[3];
 -              
 -              *disp+= node->Fade;
 -              
 -              /* Don't let the displacement go past the limit */
 -              if((lim < 0 && *disp < lim) || (lim > 0 && *disp > lim))
 -                      *disp = lim;
 -              
 -              val[0] = ss->mesh_co_orig[node->Index][0]+area_normal[0] * *disp*ss->cache->scale[0];
 -              val[1] = ss->mesh_co_orig[node->Index][1]+area_normal[1] * *disp*ss->cache->scale[1];
 -              val[2] = ss->mesh_co_orig[node->Index][2]+area_normal[2] * *disp*ss->cache->scale[2];
 -
 -              if( buffer != 0 ) {
 -                      IndexLink *cur = &ss->drawobject->indices[node->Index];
 -                      while( cur != 0 && cur->element != -1 ) {
 -                              sculpt_clip(sd, ss, &buffer[cur->element*3], val);
 -                              cur = cur->next;
 -                      }
 -              }
 +      /* list is manipulated by multiple threads, so we lock */
 +      BLI_lock_thread(LOCK_CUSTOM1);
  
 -              sculpt_clip(sd, ss, co, val);
 +      if((unode= sculpt_undo_get_node(ss, node))) {
 +              BLI_unlock_thread(LOCK_CUSTOM1);
 +              return unode;
 +      }
  
 -              node= node->next;
 +      unode= MEM_callocN(sizeof(SculptUndoNode), "SculptUndoNode");
 +      strcpy(unode->idname, ob->id.name);
 +      unode->node= node;
 +
 +      unode->totvert= totvert;
 +      unode->maxvert= ss->totvert;
 +      /* we will use this while sculpting, is mapalloc slow to access then? */
 +      unode->co= MEM_mapallocN(sizeof(float)*3*allvert, "SculptUndoNode.co");
 +      unode->no= MEM_mapallocN(sizeof(short)*3*allvert, "SculptUndoNode.no");
 +      unode->index= MEM_mapallocN(sizeof(int)*allvert, "SculptUndoNode.index");
 +      undo_paint_push_count_alloc(UNDO_PAINT_MESH, (sizeof(float)*3 + sizeof(short)*3 + sizeof(int))*allvert);
 +      BLI_addtail(lb, unode);
 +
 +      BLI_unlock_thread(LOCK_CUSTOM1);
 +
 +      /* copy threaded, hopefully this is the performance critical part */
 +      memcpy(unode->index, verts, sizeof(int)*allvert);
 +      for(i=0; i<allvert; i++) {
-               VECCOPY(unode->co[i], ss->mvert[verts[i]].co)
-               VECCOPY(unode->no[i], ss->mvert[verts[i]].no)
++              copy_v3_v3(unode->co[i], ss->mvert[verts[i]].co);
++              VECCOPY(unode->no[i], ss->mvert[verts[i]].no);
        }
 -      if( buffer != 0 )
 -              GPU_buffer_unlock( ss->drawobject->vertices );
 +      
 +      return unode;
  }
  
 -static void do_inflate_brush(Sculpt *s, SculptSession *ss, const ListBase *active_verts)
 +static void sculpt_undo_push_begin(SculptSession *ss, char *name)
  {
 -      ActiveData *node= active_verts->first;
 -      float add[3];
 -      float *buffer = ss->drawobject!=0?(float *)GPU_buffer_lock( ss->drawobject->vertices ):0;
 -
 -      while(node) {
 -              float *co= ss->mvert[node->Index].co;
 -              short *no= ss->mvert[node->Index].no;
 -
 -              add[0]= no[0]/ 32767.0f;
 -              add[1]= no[1]/ 32767.0f;
 -              add[2]= no[2]/ 32767.0f;
 -              mul_v3_fl(add, node->Fade * ss->cache->radius);
 -              add[0]*= ss->cache->scale[0];
 -              add[1]*= ss->cache->scale[1];
 -              add[2]*= ss->cache->scale[2];
 -              add_v3_v3v3(add, add, co);
 -              
 -              if( buffer != 0 ) {
 -                      IndexLink *cur = &ss->drawobject->indices[node->Index];
 -                      while( cur != 0 && cur->element != -1 ) {
 -                              sculpt_clip(s, ss, &buffer[cur->element*3], add);
 -                              cur = cur->next;
 -                      }
 -              }
 -
 -              sculpt_clip(s, ss, co, add);
 -
 -              node= node->next;
 -      }
 -      if( buffer != 0 )
 -              GPU_buffer_unlock( ss->drawobject->vertices );
 +      undo_paint_push_begin(UNDO_PAINT_MESH, name,
 +              sculpt_undo_restore, sculpt_undo_free);
  }
  
 -static void calc_flatten_center(SculptSession *ss, ActiveData *node, float co[3])
 +static void sculpt_undo_push_end(SculptSession *ss)
  {
 -      ActiveData *outer[FLATTEN_SAMPLE_SIZE];
 -      int i;
 -      
 -      for(i = 0; i < FLATTEN_SAMPLE_SIZE; ++i)
 -              outer[i] = node;
 -              
 -      for(; node; node = node->next) {
 -              for(i = 0; i < FLATTEN_SAMPLE_SIZE; ++i) {
 -                      if(node->dist > outer[i]->dist) {
 -                              outer[i] = node;
 -                              break;
 -                      }
 +      ListBase *lb= undo_paint_push_get_list(UNDO_PAINT_MESH);
 +      SculptUndoNode *unode;
 +
 +      /* we don't need normals in the undo stack */
 +      for(unode=lb->first; unode; unode=unode->next) {
 +              if(unode->no) {
 +                      MEM_freeN(unode->no);
 +                      unode->no= NULL;
                }
        }
 -      
 -      co[0] = co[1] = co[2] = 0.0f;
 -      for(i = 0; i < FLATTEN_SAMPLE_SIZE; ++i)
 -              add_v3_v3v3(co, co, ss->mvert[outer[i]->Index].co);
 -      mul_v3_fl(co, 1.0f / FLATTEN_SAMPLE_SIZE);
 +
 +      undo_paint_push_end(UNDO_PAINT_MESH);
  }
  
 -/* Projects a point onto a plane along the plane's normal */
 -static void point_plane_project(float intr[3], float co[3], float plane_normal[3], float plane_center[3])
 -{
 -      float p1[3], sub1[3], sub2[3];
 +/************************ Looping Over Verts in a BVH Node *******************/
  
 -      /* Find the intersection between squash-plane and vertex (along the area normal) */
 -      sub_v3_v3v3(p1, co, plane_normal);
 -      sub_v3_v3v3(sub1, plane_center, p1);
 -      sub_v3_v3v3(sub2, co, p1);
 -      sub_v3_v3v3(intr, co, p1);
 -      mul_v3_fl(intr, dot_v3v3(plane_normal, sub1) / dot_v3v3(plane_normal, sub2));
 -      add_v3_v3v3(intr, intr, p1);
 -}
 +typedef struct SculptVertexData {
 +      float radius_squared;
 +      float location[3];
  
 -static int plane_point_side(float co[3], float plane_normal[3], float plane_center[3], int flip)
 -{
 -      float delta[3];
 -      float d;
 +      MVert *mvert;
 +      int *verts;
 +      float (*origvert)[3];
 +      int i, index, totvert;
  
 -      sub_v3_v3v3(delta, co, plane_center);
 -      d = dot_v3v3(plane_normal, delta);
 +      float *co;
 +      float *origco;
 +      short *no;
 +      float dist;
 +} SculptVertexData;
  
 -      if(flip)
 -              d = -d;
 +static void sculpt_node_verts_init(Sculpt *sd, SculptSession *ss,
 +      PBVHNode *node, float (*origvert)[3], SculptVertexData *vd)
 +{
 +      vd->radius_squared= ss->cache->radius*ss->cache->radius;
-       VecCopyf(vd->location, ss->cache->location);
++      copy_v3_v3(vd->location, ss->cache->location);
  
 -      return d <= 0.0f;
 +      vd->mvert= ss->mvert;
 +      vd->origvert= origvert;
 +      vd->i= -1;
 +      BLI_pbvh_node_get_verts(node, &vd->verts, &vd->totvert, NULL);
  }
  
 -static void do_flatten_clay_brush(Sculpt *sd, SculptSession *ss, const ListBase *active_verts, int clay)
 +static int sculpt_node_verts_next(SculptVertexData *vd)
  {
 -      ActiveData *node= active_verts->first;
 -      /* area_normal and cntr define the plane towards which vertices are squashed */
 -      float area_normal[3];
 -      float cntr[3], cntr2[3], bstr = 0;
 -      int flip = 0;
 -      float *buffer;
 -      calc_area_normal(sd, ss, area_normal, active_verts);
 -      calc_flatten_center(ss, node, cntr);
 +      vd->i++;
  
 -      if(clay) {
 -              bstr= brush_strength(sd, ss->cache);
 -              /* Limit clay application to here */
 -              cntr2[0]=cntr[0]+area_normal[0]*bstr*ss->cache->scale[0];
 -              cntr2[1]=cntr[1]+area_normal[1]*bstr*ss->cache->scale[1];
 -              cntr2[2]=cntr[2]+area_normal[2]*bstr*ss->cache->scale[2];
 -              flip = bstr < 0;
 -      }
 +      while(vd->i < vd->totvert) {
 +              float delta[3], dsq;
  
 -      buffer = ss->drawobject!=0?(float *)GPU_buffer_lock( ss->drawobject->vertices ):0;
 +              vd->index= vd->verts[vd->i];
 +              vd->co= vd->mvert[vd->index].co;
 +              vd->origco= (vd->origvert)? vd->origvert[vd->i]: vd->co;
 +              vd->no= vd->mvert[vd->index].no;
-               VECSUB(delta, vd->origco, vd->location);
++              sub_v3_v3v3(delta, vd->origco, vd->location);
 +              dsq = INPR(delta, delta);
  
 -      while(node){
 -              float *co= ss->mvert[node->Index].co;
 -              float intr[3], val[3];
 -              
 -              if(!clay || plane_point_side(co, area_normal, cntr2, flip)) {
 -                      /* Find the intersection between squash-plane and vertex (along the area normal) */             
 -                      point_plane_project(intr, co, area_normal, cntr);
 +              if(dsq < vd->radius_squared) {
 +                      vd->dist = sqrt(dsq);
 +                      return 1;
 +              }
  
 -                      sub_v3_v3v3(val, intr, co);
 +              vd->i++;
 +      }
 +      
 +      return 0;
 +}
  
 -                      if(clay) {
 -                              if(bstr > FLT_EPSILON)
 -                                      mul_v3_fl(val, node->Fade / bstr);
 -                              else
 -                                      mul_v3_fl(val, node->Fade);
 -                              /* Clay displacement */
 -                              val[0]+=area_normal[0] * ss->cache->scale[0]*node->Fade;
 -                              val[1]+=area_normal[1] * ss->cache->scale[1]*node->Fade;
 -                              val[2]+=area_normal[2] * ss->cache->scale[2]*node->Fade;
 -                      }
 -                      else
 -                              mul_v3_fl(val, fabs(node->Fade));
 +/* ===== Sculpting =====
 + *
 + */
  
 -                      add_v3_v3v3(val, val, co);
 +/* Return modified brush strength. Includes the direction of the brush, positive
 +   values pull vertices, negative values push. Uses tablet pressure and a
 +   special multiplier found experimentally to scale the strength factor. */
 +static float brush_strength(Sculpt *sd, StrokeCache *cache)
 +{
 +      Brush *brush = paint_brush(&sd->paint);
 +      /* Primary strength input; square it to make lower values more sensitive */
 +      float alpha = brush->alpha * brush->alpha;
  
 -                      if( buffer != 0 ) {
 -                              IndexLink *cur = &ss->drawobject->indices[node->Index];
 -                              while( cur != 0 && cur->element != -1 ) {
 -                                      sculpt_clip(sd, ss, &buffer[cur->element*3], val);
 -                                      cur = cur->next;
 -                              }
 -                      }                       
 -                      sculpt_clip(sd, ss, co, val);
 +      float dir= brush->flag & BRUSH_DIR_IN ? -1 : 1;
 +      float pressure= 1;
 +      float flip= cache->flip ? -1:1;
  
 -              }
 -              
 -              node= node->next;
 +      if(brush->flag & BRUSH_ALPHA_PRESSURE)
 +              pressure *= cache->pressure;
 +      
 +      switch(brush->sculpt_tool){
 +      case SCULPT_TOOL_DRAW:
 +      case SCULPT_TOOL_INFLATE:
 +      case SCULPT_TOOL_CLAY:
 +      case SCULPT_TOOL_FLATTEN:
 +      case SCULPT_TOOL_LAYER:
 +              return alpha * dir * pressure * flip; /*XXX: not sure why? was multiplied by G.vd->grid */;
 +      case SCULPT_TOOL_SMOOTH:
 +              return alpha * 4 * pressure;
 +      case SCULPT_TOOL_PINCH:
 +              return alpha / 2 * dir * pressure * flip;
 +      case SCULPT_TOOL_GRAB:
 +              return 1;
 +      default:
 +              return 0;
        }
 -      if( buffer != 0 )
 -              GPU_buffer_unlock( ss->drawobject->vertices );
  }
  
  /* Uses symm to selectively flip any axis of a coordinate. */
@@@ -613,621 -757,149 +613,621 @@@ static float tex_strength(SculptSessio
                        const int sx= (const int)tex->size[0];
                        const int sy= (const int)tex->size[1];
                        
 -                      float fx= point_2d[0];
 -                      float fy= point_2d[1];
 +                      float fx= point_2d[0];
 +                      float fy= point_2d[1];
 +                      
 +                      float angle= atan2(fy, fx) - rot;
 +                      float flen= sqrtf(fx*fx + fy*fy);
 +                      
 +                      if(rot<0.001 && rot>-0.001) {
 +                              px= point_2d[0];
 +                              py= point_2d[1];
 +                      } else {
 +                              px= flen * cos(angle) + 2000;
 +                              py= flen * sin(angle) + 2000;
 +                      }
 +                      if(sx != 1)
 +                              px %= sx-1;
 +                      if(sy != 1)
 +                              py %= sy-1;
 +                      avg= get_texcache_pixel_bilinear(ss, ss->texcache_side*px/sx, ss->texcache_side*py/sy);
 +              }
 +              else if(tex->brush_map_mode == MTEX_MAP_MODE_FIXED) {
 +                      float fx= (point_2d[0] - ss->cache->mouse[0]) / bsize;
 +                      float fy= (point_2d[1] - ss->cache->mouse[1]) / bsize;
 +
 +                      float angle= atan2(fy, fx) - rot;
 +                      float flen= sqrtf(fx*fx + fy*fy);
 +                      
 +                      fx = flen * cos(angle) + 0.5;
 +                      fy = flen * sin(angle) + 0.5;
 +
 +                      avg= get_texcache_pixel_bilinear(ss, fx * ss->texcache_side, fy * ss->texcache_side);
 +              }
 +      }
 +
 +      avg*= brush_curve_strength(br, len, ss->cache->radius); /* Falloff curve */
 +
 +      return avg;
 +}
 +
 +typedef struct {
 +      Sculpt *sd;
 +      SculptSession *ss;
 +      float radius_squared;
 +      ListBase *active_verts;
 +      float area_normal[3];
 +} SculptSearchSphereData;
 +
 +/* Test AABB against sphere */
 +static int sculpt_search_sphere_cb(PBVHNode *node, void *data_v)
 +{
 +      SculptSearchSphereData *data = data_v;
 +      float *center = data->ss->cache->location, nearest[3];
 +      float t[3], bb_min[3], bb_max[3];
 +      int i;
 +
 +      //BLI_pbvh_node_get_original_BB(node, bb_min, bb_max);
 +      BLI_pbvh_node_get_BB(node, bb_min, bb_max);
 +
 +      for(i = 0; i < 3; ++i) {
 +              if(bb_min[i] > center[i])
 +                      nearest[i] = bb_min[i];
 +              else if(bb_max[i] < center[i])
 +                      nearest[i] = bb_max[i];
 +              else
 +                      nearest[i] = center[i]; 
 +      }
 +      
-       VecSubf(t, center, nearest);
++      sub_v3_v3v3(t, center, nearest);
 +
 +      return t[0] * t[0] + t[1] * t[1] + t[2] * t[2] < data->radius_squared;
 +}
 +
 +/* Handles clipping against a mirror modifier and SCULPT_LOCK axis flags */
 +static void sculpt_clip(Sculpt *sd, SculptSession *ss, float *co, const float val[3])
 +{
 +      int i;
 +
 +      for(i=0; i<3; ++i) {
 +              if(sd->flags & (SCULPT_LOCK_X << i))
 +                      continue;
 +
 +              if((ss->cache->flag & (CLIP_X << i)) && (fabs(co[i]) <= ss->cache->clip_tolerance[i]))
 +                      co[i]= 0.0f;
 +              else
 +                      co[i]= val[i];
 +      }               
 +}
 +
 +static void add_norm_if(float view_vec[3], float out[3], float out_flip[3], const short no[3])
 +{
 +      float fno[3] = {no[0], no[1], no[2]};
 +
-       Normalize(fno);
++      normalize_v3(fno);
 +
-       if((Inpf(view_vec, fno)) > 0) {
-               VecAddf(out, out, fno);
++      if((dot_v3v3(view_vec, fno)) > 0) {
++              add_v3_v3v3(out, out, fno);
 +      } else {
-               VecAddf(out_flip, out_flip, fno); /* out_flip is used when out is {0,0,0} */
++              add_v3_v3v3(out_flip, out_flip, fno); /* out_flip is used when out is {0,0,0} */
 +      }
 +}
 +
 +/* For draw/layer/flatten; finds average normal for all active vertices */
 +static void calc_area_normal(Sculpt *sd, SculptSession *ss, float area_normal[3], PBVHNode **nodes, int totnode)
 +{
 +      StrokeCache *cache = ss->cache;
 +      const int view = 0; /* XXX: should probably be a flag, not number: brush_type==SCULPT_TOOL_DRAW ? sculptmode_brush()->view : 0; */
 +      float out[3] = {0.0f, 0.0f, 0.0f};
 +      float out_flip[3] = {0.0f, 0.0f, 0.0f};
 +      float out_dir[3];
 +      int n;
 +
-       VecCopyf(out_dir, cache->view_normal_symmetry);
++      copy_v3_v3(out_dir, cache->view_normal_symmetry);
 +
 +      /* threaded loop over nodes */
 +      #pragma omp parallel for private(n) schedule(static)
 +      for(n=0; n<totnode; n++) {
 +              SculptVertexData vd;
 +              SculptUndoNode *unode;
 +              float nout[3] = {0.0f, 0.0f, 0.0f};
 +              float nout_flip[3] = {0.0f, 0.0f, 0.0f};
 +              
 +              // XXX push instead of get for thread safety in draw
 +              // brush .. lame, but also not harmful really
 +              unode= sculpt_undo_push_node(ss, nodes[n]);
 +              sculpt_node_verts_init(sd, ss, nodes[n], NULL, &vd);
 +
 +              if(unode && ss->cache->original) {
 +                      while(sculpt_node_verts_next(&vd))
 +                              add_norm_if(out_dir, nout, nout_flip, unode->no[vd.i]);
 +              }
 +              else {
 +                      while(sculpt_node_verts_next(&vd))
 +                              add_norm_if(out_dir, nout, nout_flip, ss->mvert[vd.index].no);
 +              }
 +
 +              {
 +                      /* we sum per node and add together later for threads */
 +                      #pragma omp critical
-                       VecAddf(out, out, nout);
-                       VecAddf(out_flip, out_flip, nout_flip);
++                      add_v3_v3v3(out, out, nout);
++                      add_v3_v3v3(out_flip, out_flip, nout_flip);
 +              }
 +      }
 +
 +      if (out[0]==0.0 && out[1]==0.0 && out[2]==0.0) {
-               VECCOPY(out, out_flip);
++              copy_v3_v3(out, out_flip);
 +      }
 +      
-       Normalize(out);
++      normalize_v3(out);
 +
 +      out[0] = out_dir[0] * view + out[0] * (10-view);
 +      out[1] = out_dir[1] * view + out[1] * (10-view);
 +      out[2] = out_dir[2] * view + out[2] * (10-view);
 +      
-       Normalize(out);
-       VecCopyf(area_normal, out);
++      normalize_v3(out);
++      copy_v3_v3(area_normal, out);
 +}
 +
 +static void do_draw_brush(Sculpt *sd, SculptSession *ss, PBVHNode **nodes, int totnode)
 +{
 +      Brush *brush = paint_brush(&sd->paint);
 +      float offset[3], area_normal[3];
 +      float bstrength= ss->cache->bstrength;
 +      int n;
 +
 +      /* area normal */
 +      calc_area_normal(sd, ss, area_normal, nodes, totnode);
 +
 +      /* offset with as much as possible factored in already */
 +      offset[0]= area_normal[0]*ss->cache->radius*ss->cache->scale[0]*bstrength;
 +      offset[1]= area_normal[1]*ss->cache->radius*ss->cache->scale[1]*bstrength;
 +      offset[2]= area_normal[2]*ss->cache->radius*ss->cache->scale[2]*bstrength;
 +
 +      /* threaded loop over nodes */
 +      #pragma omp parallel for private(n) schedule(static)
 +      for(n=0; n<totnode; n++) {
 +              SculptVertexData vd;
 +              
 +              sculpt_undo_push_node(ss, nodes[n]);
 +              sculpt_node_verts_init(sd, ss, nodes[n], NULL, &vd);
 +
 +              while(sculpt_node_verts_next(&vd)) {
 +                      /* offset vertex */
 +                      const float fade = tex_strength(ss, brush, vd.co, vd.dist);
 +                      const float val[3]= {vd.co[0] + offset[0]*fade,
 +                                                               vd.co[1] + offset[1]*fade,
 +                                                               vd.co[2] + offset[2]*fade};
 +
 +                      sculpt_clip(sd, ss, vd.co, val);
 +                      ss->mvert[vd.index].flag |= ME_VERT_PBVH_UPDATE;
 +              }
 +
 +              BLI_pbvh_node_mark_update(nodes[n]);
 +      }
 +}
 +
 +/* For the smooth brush, uses the neighboring vertices around vert to calculate
 +   a smoothed location for vert. Skips corner vertices (used by only one
 +   polygon.) */
 +static void neighbor_average(SculptSession *ss, float avg[3], const int vert)
 +{
 +      int i, skip= -1, total=0;
 +      IndexNode *node= ss->fmap[vert].first;
 +      char ncount= BLI_countlist(&ss->fmap[vert]);
 +      MFace *f;
 +
 +      avg[0] = avg[1] = avg[2] = 0;
 +              
 +      /* Don't modify corner vertices */
 +      if(ncount==1) {
-               VecCopyf(avg, ss->mvert[vert].co);
++              copy_v3_v3(avg, ss->mvert[vert].co);
 +              return;
 +      }
 +
 +      while(node){
 +              f= &ss->mface[node->index];
 +              
 +              if(f->v4) {
 +                      skip= (f->v1==vert?2:
 +                             f->v2==vert?3:
 +                             f->v3==vert?0:
 +                             f->v4==vert?1:-1);
 +              }
 +
 +              for(i=0; i<(f->v4?4:3); ++i) {
 +                      if(i != skip && (ncount!=2 || BLI_countlist(&ss->fmap[(&f->v1)[i]]) <= 2)) {
-                               VecAddf(avg, avg, ss->mvert[(&f->v1)[i]].co);
++                              add_v3_v3v3(avg, avg, ss->mvert[(&f->v1)[i]].co);
 +                              ++total;
 +                      }
 +              }
 +
 +              node= node->next;
 +      }
 +
 +      if(total>0)
-               VecMulf(avg, 1.0f / total);
++              mul_v3_fl(avg, 1.0f / total);
 +      else
-               VecCopyf(avg, ss->mvert[vert].co);
++              copy_v3_v3(avg, ss->mvert[vert].co);
 +}
 +
 +static void do_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode **nodes, int totnode)
 +{
 +      Brush *brush = paint_brush(&sd->paint);
 +      float bstrength= ss->cache->bstrength;
 +      int iteration, n;
 +
 +      for(iteration = 0; iteration < 2; ++iteration) {
 +              #pragma omp parallel for private(n) schedule(static)
 +              for(n=0; n<totnode; n++) {
 +                      SculptVertexData vd;
 +                      
 +                      sculpt_undo_push_node(ss, nodes[n]);
 +                      sculpt_node_verts_init(sd, ss, nodes[n], NULL, &vd);
 +
 +                      while(sculpt_node_verts_next(&vd)) {
 +                              const float fade = tex_strength(ss, brush, vd.co, vd.dist)*bstrength;
 +                              float avg[3], val[3];
 +                              
 +                              neighbor_average(ss, avg, vd.index);
 +                              val[0] = vd.co[0]+(avg[0]-vd.co[0])*fade;
 +                              val[1] = vd.co[1]+(avg[1]-vd.co[1])*fade;
 +                              val[2] = vd.co[2]+(avg[2]-vd.co[2])*fade;
 +                              
 +                              sculpt_clip(sd, ss, vd.co, val);                        
 +                              ss->mvert[vd.index].flag |= ME_VERT_PBVH_UPDATE;
 +                      }
 +
 +                      BLI_pbvh_node_mark_update(nodes[n]);
 +              }
 +      }
 +}
 +
 +static void do_pinch_brush(Sculpt *sd, SculptSession *ss, PBVHNode **nodes, int totnode)
 +{
 +      Brush *brush = paint_brush(&sd->paint);
 +      float bstrength= ss->cache->bstrength;
 +      int n;
 +
 +      #pragma omp parallel for private(n) schedule(static)
 +      for(n=0; n<totnode; n++) {
 +              SculptVertexData vd;
 +              
 +              sculpt_undo_push_node(ss, nodes[n]);
 +              sculpt_node_verts_init(sd, ss, nodes[n], NULL, &vd);
 +
 +              while(sculpt_node_verts_next(&vd)) {
 +                      const float fade = tex_strength(ss, brush, vd.co, vd.dist)*bstrength;
 +                      const float val[3]= {vd.co[0]+(vd.location[0]-vd.co[0])*fade,
 +                                                               vd.co[1]+(vd.location[1]-vd.co[1])*fade,
 +                                                               vd.co[2]+(vd.location[2]-vd.co[2])*fade};
 +                      
 +                      sculpt_clip(sd, ss, vd.co, val);                        
 +                      ss->mvert[vd.index].flag |= ME_VERT_PBVH_UPDATE;
 +              }
 +
 +              BLI_pbvh_node_mark_update(nodes[n]);
 +      }
 +}
 +
 +static void do_grab_brush(Sculpt *sd, SculptSession *ss, PBVHNode **nodes, int totnode)
 +{
 +      Brush *brush = paint_brush(&sd->paint);
 +      float bstrength= ss->cache->bstrength;
 +      float grab_delta[3];
 +      int n;
 +      
-       VecCopyf(grab_delta, ss->cache->grab_delta_symmetry);
++      copy_v3_v3(grab_delta, ss->cache->grab_delta_symmetry);
 +
 +      #pragma omp parallel for private(n) schedule(static)
 +      for(n=0; n<totnode; n++) {
 +              SculptVertexData vd;
 +              SculptUndoNode *unode;
 +              float (*origco)[3];
 +              
 +              unode= sculpt_undo_push_node(ss, nodes[n]);
 +              origco= unode->co;
 +              sculpt_node_verts_init(sd, ss, nodes[n], origco, &vd);
 +
 +              while(sculpt_node_verts_next(&vd)) {
 +                      const float fade = tex_strength(ss, brush, origco[vd.i], vd.dist)*bstrength;
 +                      const float add[3]= {vd.co[0]+fade*grab_delta[0],
 +                                                               vd.co[1]+fade*grab_delta[1],
 +                                                               vd.co[2]+fade*grab_delta[2]};
 +
 +                      sculpt_clip(sd, ss, vd.co, add);                        
 +                      ss->mvert[vd.index].flag |= ME_VERT_PBVH_UPDATE;
 +              }
 +
 +              BLI_pbvh_node_mark_update(nodes[n]);
 +      }
 +}
 +
 +static void do_layer_brush(Sculpt *sd, SculptSession *ss, PBVHNode **nodes, int totnode)
 +{
 +      Brush *brush = paint_brush(&sd->paint);
 +      float bstrength= ss->cache->bstrength;
 +      float area_normal[3], offset[3];
 +      float lim= ss->cache->radius / 4;
 +      int n;
 +
 +      if(ss->cache->flip)
 +              lim = -lim;
 +
 +      calc_area_normal(sd, ss, area_normal, nodes, totnode);
 +
 +      offset[0]= ss->cache->scale[0]*area_normal[0];
 +      offset[1]= ss->cache->scale[1]*area_normal[1];
 +      offset[2]= ss->cache->scale[2]*area_normal[2];
 +
 +      #pragma omp parallel for private(n) schedule(static)
 +      for(n=0; n<totnode; n++) {
 +              SculptVertexData vd;
 +              SculptUndoNode *unode;
 +              float (*origco)[3];
 +              
 +              unode= sculpt_undo_push_node(ss, nodes[n]);
 +              origco= unode->co;
 +              sculpt_node_verts_init(sd, ss, nodes[n], NULL, &vd);
 +
 +              while(sculpt_node_verts_next(&vd)) {
 +                      const float fade = tex_strength(ss, brush, vd.co, vd.dist)*bstrength;
 +                      float *disp= &ss->layer_disps[vd.index];
 +                      float val[3];
 +                      
 +                      *disp+= fade;
                        
 -                      float angle= atan2(fy, fx) - rot;
 -                      float flen= sqrtf(fx*fx + fy*fy);
 +                      /* Don't let the displacement go past the limit */
 +                      if((lim < 0 && *disp < lim) || (lim > 0 && *disp > lim))
 +                              *disp = lim;
                        
 -                      if(rot<0.001 && rot>-0.001) {
 -                              px= point_2d[0];
 -                              py= point_2d[1];
 -                      } else {
 -                              px= flen * cos(angle) + 2000;
 -                              py= flen * sin(angle) + 2000;
 +                      if(ss->layer_co && (brush->flag & BRUSH_PERSISTENT)) {
 +                              /* persistent base */
 +                              val[0] = ss->layer_co[vd.index][0] + (*disp)*offset[0];
 +                              val[1] = ss->layer_co[vd.index][1] + (*disp)*offset[1];
 +                              val[2] = ss->layer_co[vd.index][2] + (*disp)*offset[2];
                        }
 -                      if(sx != 1)
 -                              px %= sx-1;
 -                      if(sy != 1)
 -                              py %= sy-1;
 -                      avg= get_texcache_pixel_bilinear(ss, ss->texcache_side*px/sx, ss->texcache_side*py/sy);
 +                      else {
 +                              val[0] = origco[vd.i][0] + (*disp)*offset[0];
 +                              val[1] = origco[vd.i][1] + (*disp)*offset[1];
 +                              val[2] = origco[vd.i][2] + (*disp)*offset[2];
 +                      }
 +
 +                      sculpt_clip(sd, ss, vd.co, val);
 +                      ss->mvert[vd.index].flag |= ME_VERT_PBVH_UPDATE;
                }
 -              else if(tex->brush_map_mode == MTEX_MAP_MODE_FIXED) {
 -                      float fx= (point_2d[0] - ss->cache->mouse[0]) / bsize;
 -                      float fy= (point_2d[1] - ss->cache->mouse[1]) / bsize;
  
 -                      float angle= atan2(fy, fx) - rot;
 -                      float flen= sqrtf(fx*fx + fy*fy);
 +              BLI_pbvh_node_mark_update(nodes[n]);
 +      }
 +}
 +
 +static void do_inflate_brush(Sculpt *sd, SculptSession *ss, PBVHNode **nodes, int totnode)
 +{
 +      Brush *brush = paint_brush(&sd->paint);
 +      float bstrength= ss->cache->bstrength;
 +      int n;
 +
 +      #pragma omp parallel for private(n) schedule(static)
 +      for(n=0; n<totnode; n++) {
 +              SculptVertexData vd;
 +              
 +              sculpt_undo_push_node(ss, nodes[n]);
 +              sculpt_node_verts_init(sd, ss, nodes[n], NULL, &vd);
 +
 +              while(sculpt_node_verts_next(&vd)) {
 +                      const float fade = tex_strength(ss, brush, vd.co, vd.dist)*bstrength;
 +                      float add[3];
                        
 -                      fx = flen * cos(angle) + 0.5;
 -                      fy = flen * sin(angle) + 0.5;
 +                      add[0]= vd.no[0]/32767.0f;
 +                      add[1]= vd.no[1]/32767.0f;
 +                      add[2]= vd.no[2]/32767.0f;
-                       VecMulf(add, fade * ss->cache->radius);
++                      mul_v3_fl(add, fade * ss->cache->radius);
 +                      add[0]*= ss->cache->scale[0];
 +                      add[1]*= ss->cache->scale[1];
 +                      add[2]*= ss->cache->scale[2];
-                       VecAddf(add, add, vd.co);
++                      add_v3_v3v3(add, add, vd.co);
 +                      
 +                      sculpt_clip(sd, ss, vd.co, add);
 +                      ss->mvert[vd.index].flag |= ME_VERT_PBVH_UPDATE;
 +              }
  
 -                      avg= get_texcache_pixel_bilinear(ss, fx * ss->texcache_side, fy * ss->texcache_side);
 +              BLI_pbvh_node_mark_update(nodes[n]);
 +      }
 +}
 +
 +static void calc_flatten_center(Sculpt *sd, SculptSession *ss, PBVHNode **nodes, int totnode, float co[3])
 +{
 +      float outer_dist[FLATTEN_SAMPLE_SIZE];
 +      int outer_index[FLATTEN_SAMPLE_SIZE];
 +      int i, n;
 +      
 +      for(i = 0; i < FLATTEN_SAMPLE_SIZE; ++i) {
 +              outer_index[i] = 0;
 +              outer_dist[i]= -1.0f;
 +      }
 +              
 +      #pragma omp parallel for private(n) schedule(static)
 +      for(n=0; n<totnode; n++) {
 +              SculptVertexData vd;
 +              
 +              sculpt_node_verts_init(sd, ss, nodes[n], NULL, &vd);
 +
 +              while(sculpt_node_verts_next(&vd)) {
 +                      for(i = 0; i < FLATTEN_SAMPLE_SIZE; ++i) {
 +                              if(vd.dist > outer_dist[i]) {
 +                                      outer_index[i] = vd.index;
 +                                      break;
 +                              }
 +                      }
                }
 +
 +              BLI_pbvh_node_mark_update(nodes[n]);
        }
-               VecAddf(co, co, ss->mvert[outer_index[i]].co);
-       VecMulf(co, 1.0f / FLATTEN_SAMPLE_SIZE);
 +      
 +      co[0] = co[1] = co[2] = 0.0f;
 +      for(i = 0; i < FLATTEN_SAMPLE_SIZE; ++i)
++              add_v3_v3v3(co, co, ss->mvert[outer_index[i]].co);
++      mul_v3_fl(co, 1.0f / FLATTEN_SAMPLE_SIZE);
 +}
  
 -      avg*= brush_curve_strength(br, len, ss->cache->radius); /* Falloff curve */
 +/* Projects a point onto a plane along the plane's normal */
 +static void point_plane_project(float intr[3], float co[3], float plane_normal[3], float plane_center[3])
 +{
 +      float p1[3], sub1[3], sub2[3];
  
 -      return avg;
 +      /* Find the intersection between squash-plane and vertex (along the area normal) */
-       VecSubf(p1, co, plane_normal);
-       VecSubf(sub1, plane_center, p1);
-       VecSubf(sub2, co, p1);
-       VecSubf(intr, co, p1);
-       VecMulf(intr, Inpf(plane_normal, sub1) / Inpf(plane_normal, sub2));
-       VecAddf(intr, intr, p1);
++      sub_v3_v3v3(p1, co, plane_normal);
++      sub_v3_v3v3(sub1, plane_center, p1);
++      sub_v3_v3v3(sub2, co, p1);
++      sub_v3_v3v3(intr, co, p1);
++      mul_v3_fl(intr, dot_v3v3(plane_normal, sub1) / dot_v3v3(plane_normal, sub2));
++      add_v3_v3v3(intr, intr, p1);
 +}
 +
 +static int plane_point_side(float co[3], float plane_normal[3], float plane_center[3], int flip)
 +{
 +      float delta[3];
 +      float d;
 +
-       VecSubf(delta, co, plane_center);
-       d = Inpf(plane_normal, delta);
++      sub_v3_v3v3(delta, co, plane_center);
++      d = dot_v3v3(plane_normal, delta);
 +
 +      if(flip)
 +              d = -d;
 +
 +      return d <= 0.0f;
  }
  
 -/* Mark area around the brush as damaged. projverts are marked if they are
 -   inside the area and the damaged rectangle in 2D screen coordinates is 
 -   added to damaged_rects. */
 -static void sculpt_add_damaged_rect(SculptSession *ss)
 +static void do_flatten_clay_brush(Sculpt *sd, SculptSession *ss, PBVHNode **nodes, int totnode, int clay)
  {
 -      short p[2];
 -      RectNode *rn= MEM_mallocN(sizeof(RectNode),"RectNode");
 -      const float radius = MAX2(ss->cache->pixel_radius, ss->cache->previous_pixel_radius);
 -      unsigned i;
 +      /* area_normal and cntr define the plane towards which vertices are squashed */
 +      Brush *brush = paint_brush(&sd->paint);
 +      float bstrength= ss->cache->bstrength;
 +      float area_normal[3];
 +      float cntr[3], cntr2[3], bstr = 0;
 +      int n, flip = 0;
 +
 +      calc_area_normal(sd, ss, area_normal, nodes, totnode);
 +      calc_flatten_center(sd, ss, nodes, totnode, cntr);
 +
 +      if(clay) {
 +              bstr= brush_strength(sd, ss->cache);
 +              /* Limit clay application to here */
 +              cntr2[0]=cntr[0]+area_normal[0]*bstr*ss->cache->scale[0];
 +              cntr2[1]=cntr[1]+area_normal[1]*bstr*ss->cache->scale[1];
 +              cntr2[2]=cntr[2]+area_normal[2]*bstr*ss->cache->scale[2];
 +              flip = bstr < 0;
 +      }
 +
 +      #pragma omp parallel for private(n) schedule(static)
 +      for(n=0; n<totnode; n++) {
 +              SculptVertexData vd;
 +              
 +              sculpt_undo_push_node(ss, nodes[n]);
 +              sculpt_node_verts_init(sd, ss, nodes[n], NULL, &vd);
 +
 +              while(sculpt_node_verts_next(&vd)) {
 +                      float intr[3], val[3];
 +                      
 +                      if(!clay || plane_point_side(vd.co, area_normal, cntr2, flip)) {
 +                              const float fade = tex_strength(ss, brush, vd.co, vd.dist)*bstrength;
 +
 +                              /* Find the intersection between squash-plane and vertex (along the area normal) */             
 +                              point_plane_project(intr, vd.co, area_normal, cntr);
 +
-                               VecSubf(val, intr, vd.co);
++                              sub_v3_v3v3(val, intr, vd.co);
  
 -      /* Find center */
 -      project(ss->cache->mats, ss->cache->location, p);
 -      rn->r.xmin= p[0] - radius;
 -      rn->r.ymin= p[1] - radius;
 -      rn->r.xmax= p[0] + radius;
 -      rn->r.ymax= p[1] + radius;
 +                              if(clay) {
 +                                      if(bstr > FLT_EPSILON)
-                                               VecMulf(val, fade / bstr);
++                                              mul_v3_fl(val, fade / bstr);
 +                                      else
-                                               VecMulf(val, fade);
++                                              mul_v3_fl(val, fade);
 +                                      /* Clay displacement */
 +                                      val[0]+=area_normal[0] * ss->cache->scale[0]*fade;
 +                                      val[1]+=area_normal[1] * ss->cache->scale[1]*fade;
 +                                      val[2]+=area_normal[2] * ss->cache->scale[2]*fade;
 +                              }
 +                              else
-                                       VecMulf(val, fabs(fade));
++                                      mul_v3_fl(val, fabs(fade));
  
-                               VecAddf(val, val, vd.co);
 -      BLI_addtail(&ss->damaged_rects, rn);
++                              add_v3_v3v3(val, val, vd.co);
  
 -      /* Update insides */
 -      for(i=0; i<ss->totvert; ++i) {
 -              if(!ss->projverts[i].inside) {
 -                      if(ss->projverts[i].co[0] > rn->r.xmin && ss->projverts[i].co[1] > rn->r.ymin &&
 -                         ss->projverts[i].co[0] < rn->r.xmax && ss->projverts[i].co[1] < rn->r.ymax) {
 -                              ss->projverts[i].inside= 1;
 +                              sculpt_clip(sd, ss, vd.co, val);
 +                              ss->mvert[vd.index].flag |= ME_VERT_PBVH_UPDATE;
                        }
                }
 -              // XXX: remember to fix this!
 -              // temporary pass
 -              ss->projverts[i].inside = 1;
 +
 +              BLI_pbvh_node_mark_update(nodes[n]);
        }
  }
  
  static void do_brush_action(Sculpt *sd, SculptSession *ss, StrokeCache *cache)
  {
 +      SculptSearchSphereData data;
        Brush *brush = paint_brush(&sd->paint);
 -      float av_dist;
 -      ListBase active_verts={0,0};
 -      ListBase *grab_active_verts = &ss->cache->grab_active_verts[ss->cache->symmetry];
 -      ActiveData *adata= 0;
 -      float *vert;
 -      Mesh *me= NULL; /*XXX: get_mesh(OBACT); */
 -      const float bstrength= brush_strength(sd, cache);
 -      KeyBlock *keyblock= NULL; /*XXX: ob_get_keyblock(OBACT); */
 -      Brush *b = brush;
 -      int i;
 +      //KeyBlock *keyblock= NULL; /*XXX: ob_get_keyblock(OBACT); */
 +      PBVHNode **nodes= NULL;
 +      int totnode;
  
 -      sculpt_add_damaged_rect(ss);
 -
 -      /* Build a list of all vertices that are potentially within the brush's
 -         area of influence. Only do this once for the grab brush. */
 -      if((b->sculpt_tool != SCULPT_TOOL_GRAB) || cache->first_time) {
 -              for(i=0; i<ss->totvert; ++i) {
 -                      /* Projverts.inside provides a rough bounding box */
 -                      if(ss->multires || ss->projverts[i].inside) {
 -                              //vert= ss->vertexcosnos ? &ss->vertexcosnos[i*6] : a->verts[i].co;
 -                              vert= ss->mvert[i].co;
 -                              av_dist= len_v3v3(ss->cache->location, vert);
 -                              if(av_dist < cache->radius) {
 -                                      adata= (ActiveData*)MEM_mallocN(sizeof(ActiveData), "ActiveData");
 -
 -                                      adata->Index = i;
 -                                      /* Fade is used to store the final strength at which the brush
 -                                         should modify a particular vertex. */
 -                                      adata->Fade= tex_strength(sd, ss, vert, av_dist) * bstrength;
 -                                      adata->dist = av_dist;
 -
 -                                      if(b->sculpt_tool == SCULPT_TOOL_GRAB && cache->first_time)
 -                                              BLI_addtail(grab_active_verts, adata);
 -                                      else
 -                                              BLI_addtail(&active_verts, adata);
 -                              }
 -                      }
 +      data.ss = ss;
 +      data.sd = sd;
 +      data.radius_squared = ss->cache->radius * ss->cache->radius;
 +
 +      /* Build a list of all nodes that are potentially within the brush's
 +         area of influence */
 +      if(brush->sculpt_tool == SCULPT_TOOL_GRAB) {
 +              if(cache->first_time) {
 +                      /* For the grab tool we store these nodes once in the beginning
 +                         and then reuse them. */
 +                      BLI_pbvh_search_gather(ss->tree, sculpt_search_sphere_cb, &data,
 +                              &nodes, &totnode);
 +                      
 +                      ss->cache->grab_active_nodes[ss->cache->symmetry]= nodes;
 +                      ss->cache->grab_active_totnode[ss->cache->symmetry]= totnode;
-                       VecCopyf(ss->cache->grab_active_location[ss->cache->symmetry], ss->cache->location);
++                      copy_v3_v3(ss->cache->grab_active_location[ss->cache->symmetry], ss->cache->location);
 +              }
 +              else {
 +                      nodes= ss->cache->grab_active_nodes[ss->cache->symmetry];
 +                      totnode= ss->cache->grab_active_totnode[ss->cache->symmetry];
-                       VecCopyf(ss->cache->location, ss->cache->grab_active_location[ss->cache->symmetry]);
++                      copy_v3_v3(ss->cache->location, ss->cache->grab_active_location[ss->cache->symmetry]);
                }
        }
 +      else {
 +              BLI_pbvh_search_gather(ss->tree, sculpt_search_sphere_cb, &data,
 +                      &nodes, &totnode);
 +      }
  
        /* Only act if some verts are inside the brush area */
 -      if(active_verts.first || (b->sculpt_tool == SCULPT_TOOL_GRAB && grab_active_verts->first)) {
 +      if(totnode) {
                /* Apply one type of brush action */
 -              switch(b->sculpt_tool){
 +              switch(brush->sculpt_tool){
                case SCULPT_TOOL_DRAW:
 -                      do_draw_brush(sd, ss, &active_verts);
 +                      do_draw_brush(sd, ss, nodes, totnode);
                        break;
                case SCULPT_TOOL_SMOOTH:
 -                      do_smooth_brush(sd, ss, &active_verts);
 +                      do_smooth_brush(sd, ss, nodes, totnode);
                        break;
                case SCULPT_TOOL_PINCH:
 -                      do_pinch_brush(sd, ss, &active_verts);
 +                      do_pinch_brush(sd, ss, nodes, totnode);
                        break;
                case SCULPT_TOOL_INFLATE:
 -                      do_inflate_brush(sd, ss, &active_verts);
 +                      do_inflate_brush(sd, ss, nodes, totnode);
                        break;
                case SCULPT_TOOL_GRAB:
 -                      do_grab_brush(sd, ss);
 +                      do_grab_brush(sd, ss, nodes, totnode);
                        break;
                case SCULPT_TOOL_LAYER:
 -                      do_layer_brush(sd, ss, &active_verts);
 +                      do_layer_brush(sd, ss, nodes, totnode);
                        break;
                case SCULPT_TOOL_FLATTEN:
 -                      do_flatten_clay_brush(sd, ss, &active_verts, 0);
 +                      do_flatten_clay_brush(sd, ss, nodes, totnode, 0);
                        break;
                case SCULPT_TOOL_CLAY:
 -                      do_flatten_clay_brush(sd, ss, &active_verts, 1);
 +                      do_flatten_clay_brush(sd, ss, nodes, totnode, 1);
 +                      break;
                }
        
 +#if 0
                /* Copy the modified vertices from mesh to the active key */
                if(keyblock && !ss->multires) {
                        float *co= keyblock->data;
@@@ -1272,10 -940,9 +1272,10 @@@ static void do_symmetrical_brush_action
        const char symm = sd->flags & 7;
        int i;
  
-       VecCopyf(cache->location, cache->true_location);
-       VecCopyf(cache->grab_delta_symmetry, cache->grab_delta);
+       copy_v3_v3(cache->location, cache->true_location);
+       copy_v3_v3(cache->grab_delta_symmetry, cache->grab_delta);
        cache->symmetry = 0;
 +      cache->bstrength = brush_strength(sd, cache);
        do_brush_action(sd, ss, cache);
  
        for(i = 1; i <= symm; ++i) {
@@@ -1454,13 -1251,15 +1454,13 @@@ static void SCULPT_OT_radial_control(wm
  /**** Operator for applying a stroke (various attributes including mouse path)
        using the current brush. ****/
  
 -static float unproject_brush_radius(SculptSession *ss, float offset)
 +static float unproject_brush_radius(ViewContext *vc, float center[3], float offset)
  {
 -      float brush_edge[3];
 -
 -      /* In anchored mode, brush size changes with mouse loc, otherwise it's fixed using the brush radius */
 -      view3d_unproject(ss->cache->mats, brush_edge, ss->cache->initial_mouse[0] + offset,
 -                ss->cache->initial_mouse[1], ss->cache->depth);
 +      float delta[3];
  
 -      return len_v3v3(ss->cache->true_location, brush_edge);
 +      initgrabz(vc->rv3d, center[0], center[1], center[2]);
 +      window_to_3d_delta(vc->ar, delta, offset, 0);
-               return VecLength(delta);
++      return len_v3(delta);
  }
  
  static void sculpt_cache_free(StrokeCache *cache)
@@@ -1497,44 -1297,49 +1497,44 @@@ static void sculpt_update_cache_invaria
  
        /* Truly temporary data that isn't stored in properties */
  
 -      cache->mats = MEM_callocN(sizeof(bglMats), "sculpt bglMats");
 -      view3d_get_transformation(vc, vc->obact, cache->mats);
 -
 -      sculpt_update_mesh_elements(C);
 -
 -      /* Initialize layer brush displacements */
 -      if(brush->sculpt_tool == SCULPT_TOOL_LAYER &&
 -         (!ss->layer_disps || !(brush->flag & BRUSH_PERSISTENT))) {
 -              if(ss->layer_disps)
 -                      MEM_freeN(ss->layer_disps);
 -              ss->layer_disps = MEM_callocN(sizeof(float) * ss->totvert, "layer brush displacements");
 -      }
 +      cache->vc = vc;
 +      cache->brush = brush;
  
 -      /* Make copies of the mesh vertex locations and normals for some tools */
 -      if(brush->sculpt_tool == SCULPT_TOOL_LAYER || (brush->flag & BRUSH_ANCHORED)) {
 -              if(brush->sculpt_tool != SCULPT_TOOL_LAYER ||
 -                 !ss->mesh_co_orig || !(brush->flag & BRUSH_PERSISTENT)) {
 -                      if(!ss->mesh_co_orig)
 -                              ss->mesh_co_orig= MEM_mallocN(sizeof(float) * 3 * ss->totvert,
 +      cache->mats = MEM_callocN(sizeof(bglMats), "sculpt bglMats");
 +      view3d_get_transformation(vc->ar, vc->rv3d, vc->obact, cache->mats);
 +
 +      /* Initialize layer brush displacements and persistent coords */
 +      if(brush->sculpt_tool == SCULPT_TOOL_LAYER) {
 +              if(!ss->layer_disps || !(brush->flag & BRUSH_PERSISTENT)) {
 +                      if(ss->layer_disps)
 +                              MEM_freeN(ss->layer_disps);
 +                      ss->layer_disps = MEM_callocN(sizeof(float) * ss->totvert, "layer brush displacements");
 +              }
 +              if(!ss->layer_co && (brush->flag & BRUSH_PERSISTENT)) {
 +                      if(!ss->layer_co)
 +                              ss->layer_co= MEM_mallocN(sizeof(float) * 3 * ss->totvert,
                                                                       "sculpt mesh vertices copy");
                        for(i = 0; i < ss->totvert; ++i)
-                               VecCopyf(ss->layer_co[i], ss->mvert[i].co);
 -                              copy_v3_v3(ss->mesh_co_orig[i], ss->mvert[i].co);
++                              copy_v3_v3(ss->layer_co[i], ss->mvert[i].co);
                }
 +      }
  
 -              if(brush->flag & BRUSH_ANCHORED) {
 -                      cache->orig_norms= MEM_mallocN(sizeof(short) * 3 * ss->totvert, "Sculpt orig norm");
 -                      for(i = 0; i < ss->totvert; ++i) {
 -                              cache->orig_norms[i][0] = ss->mvert[i].no[0];
 -                              cache->orig_norms[i][1] = ss->mvert[i].no[1];
 -                              cache->orig_norms[i][2] = ss->mvert[i].no[2];
 -                      }
 -
 -                      if(ss->face_normals) {
 -                              float *fn = ss->face_normals;
 -                              cache->face_norms= MEM_mallocN(sizeof(float) * 3 * ss->totface, "Sculpt face norms");
 -                              for(i = 0; i < ss->totface; ++i, fn += 3)
 -                                      copy_v3_v3(cache->face_norms[i], fn);
 -                      }
 +      /* Make copies of the mesh vertex locations and normals for some tools */
 +      if(brush->flag & BRUSH_ANCHORED) {
 +              if(ss->face_normals) {
 +                      float *fn = ss->face_normals;
 +                      cache->face_norms= MEM_mallocN(sizeof(float) * 3 * ss->totface, "Sculpt face norms");
 +                      for(i = 0; i < ss->totface; ++i, fn += 3)
-                               VecCopyf(cache->face_norms[i], fn);
++                              copy_v3_v3(cache->face_norms[i], fn);
                }
 +
 +              cache->original = 1;
        }
  
 -      view3d_unproject(cache->mats, cache->true_location, cache->initial_mouse[0], cache->initial_mouse[1], cache->depth);
 -      cache->initial_radius = unproject_brush_radius(ss, brush->size);
 +      if(ELEM3(brush->sculpt_tool, SCULPT_TOOL_DRAW, SCULPT_TOOL_LAYER, SCULPT_TOOL_INFLATE))
 +              if(!(brush->flag & BRUSH_ACCUMULATE))
 +                      cache->original = 1;
 +
        cache->rotation = 0;
        cache->first_time = 1;
  }
@@@ -1597,138 -1398,13 +1597,138 @@@ static void sculpt_update_cache_variant
  
        /* Find the grab delta */
        if(brush->sculpt_tool == SCULPT_TOOL_GRAB) {
 -              view3d_unproject(cache->mats, grab_location, cache->mouse[0], cache->mouse[1], cache->depth);
 +              // XXX: view3d_unproject(cache->mats, grab_location, cache->mouse[0], cache->mouse[1], cache->depth);
 +              initgrabz(cache->vc->rv3d, cache->true_location[0], cache->true_location[1], cache->true_location[2]);
 +              window_to_3d_delta(cache->vc->ar, grab_location, cache->mouse[0], cache->mouse[1]);
 +
                if(!cache->first_time)
-                       VecSubf(cache->grab_delta, grab_location, cache->old_grab_location);
-               VecCopyf(cache->old_grab_location, grab_location);
+                       sub_v3_v3v3(cache->grab_delta, grab_location, cache->old_grab_location);
+               copy_v3_v3(cache->old_grab_location, grab_location);
        }
  }
  
-               if(!RayIntersectsTriangle(ray_start, ray_normal, t0, t1, t2,
 +/* XXX: Code largely copied from bvhutils.c, should be unified */
 +/* Returns 1 if a better intersection has been found */
 +static int ray_face_intersection(float ray_start[3], float ray_normal[3],
 +                               float *t0, float *t1, float *t2, float *t3,
 +                               float *fdist)
 +{
 +      int hit = 0;
 +
 +      do
 +      {       
 +              float dist = FLT_MAX;
 +                      
-       VecCopyf(out, ray_normal);
-       VecMulf(out, srd.dist);
-       VecAddf(out, out, ray_start);
++              if(!isect_ray_tri_v3(ray_start, ray_normal, t0, t1, t2,
 +                                       &dist, NULL))
 +                      dist = FLT_MAX;
 +
 +              if(dist >= 0 && dist < *fdist) {
 +                      hit = 1;
 +                      *fdist = dist;
 +              }
 +
 +              t1 = t2;
 +              t2 = t3;
 +              t3 = NULL;
 +
 +      } while(t2);
 +
 +      return hit;
 +}
 +
 +typedef struct {
 +      SculptSession *ss;
 +      float *ray_start, *ray_normal;
 +      int hit;
 +      float dist;
 +      int original;
 +} SculptRaycastData;
 +
 +void sculpt_raycast_cb(PBVHNode *node, void *data_v)
 +{
 +      SculptRaycastData *srd = data_v;
 +      MVert *vert = srd->ss->mvert;
 +      int i, totface, *faces, *face_verts;
 +
 +      if(srd->original && srd->ss->cache) {
 +              SculptUndoNode *unode;
 +
 +              unode= sculpt_undo_get_node(srd->ss, node);
 +
 +              if(unode) {
 +                      /* intersect with coordinates from before we started stroke */
 +                      BLI_pbvh_node_get_faces(node, &faces, &face_verts, &totface);
 +
 +                      for(i = 0; i < totface; ++i) {
 +                              MFace *f = srd->ss->mface + faces[i];
 +                              /*if(face_verts[i*4 + 0] >= unode->totvert) abort();
 +                              if(face_verts[i*4 + 1] >= unode->totvert) abort();
 +                              if(face_verts[i*4 + 2] >= unode->totvert) abort();
 +                              if(f->v4 && face_verts[i*4 + 3] >= unode->totvert) abort();*/
 +
 +                              if(ray_face_intersection(srd->ray_start, srd->ray_normal,
 +                                                       unode->co[face_verts[i*4+0]],
 +                                                       unode->co[face_verts[i*4+1]],
 +                                                       unode->co[face_verts[i*4+2]],
 +                                                       f->v4? unode->co[face_verts[i*4+3]]: NULL,
 +                                                       &srd->dist)) {
 +                                      srd->hit = faces[i];
 +                              }
 +                      }
 +
 +                      return;
 +              }
 +      }
 +
 +      /* intersect with current coordinates */
 +      BLI_pbvh_node_get_faces(node, &faces, NULL, &totface);
 +
 +      for(i = 0; i < totface; ++i) {
 +              MFace *f = srd->ss->mface + faces[i];
 +              if(ray_face_intersection(srd->ray_start, srd->ray_normal,
 +                                       vert[f->v1].co,
 +                                       vert[f->v2].co,
 +                                       vert[f->v3].co,
 +                                       f->v4 ? vert[f->v4].co : NULL,
 +                                       &srd->dist)) {
 +                      srd->hit = faces[i];
 +              }
 +      }
 +}
 +
 +/* Do a raycast in the tree to find the 3d brush location
 +   (This allows us to ignore the GL depth buffer)
 +   Returns 0 if the ray doesn't hit the mesh, non-zero otherwise
 + */
 +int sculpt_stroke_get_location(bContext *C, struct PaintStroke *stroke, float out[3], float mouse[2])
 +{
 +      ViewContext *vc = paint_stroke_view_context(stroke);
 +      SculptSession *ss= vc->obact->sculpt;
 +      StrokeCache *cache= ss->cache;
 +      float ray_start[3], ray_normal[3];
 +      float mval[2] = {mouse[0] - vc->ar->winrct.xmin,
 +                       mouse[1] - vc->ar->winrct.ymin};
 +      SculptRaycastData srd;
 +
 +      viewray(vc->ar, vc->v3d, mval, ray_start, ray_normal);
 +
 +      srd.ss = vc->obact->sculpt;
 +      srd.ray_start = ray_start;
 +      srd.ray_normal = ray_normal;
 +      srd.dist = FLT_MAX;
 +      srd.hit = -1;
 +      srd.original = (cache)? cache->original: 0;
 +      BLI_pbvh_raycast(ss->tree, sculpt_raycast_cb, &srd,
 +                   ray_start, ray_normal, srd.original);
 +      
++      copy_v3_v3(out, ray_normal);
++      mul_v3_fl(out, srd.dist);
++      add_v3_v3v3(out, out, ray_start);
 +
 +      return srd.hit != -1;
 +}
 +
  /* Initialize stroke operator properties */
  static void sculpt_brush_stroke_init_properties(bContext *C, wmOperator *op, wmEvent *event, SculptSession *ss)
  {
@@@ -1789,23 -1470,27 +1789,23 @@@ static void sculpt_restore_mesh(Sculpt 
        int i;
  
        /* Restore the mesh before continuing with anchored stroke */
 -      if((brush->flag & BRUSH_ANCHORED) && ss->mesh_co_orig) {
 -
 -              if(ss->drawobject)
 -                      buffer= (float *)GPU_buffer_lock(ss->drawobject->normals);
 -
 -              for(i = 0; i < ss->totvert; ++i) {
 -                      copy_v3_v3(ss->mvert[i].co, ss->mesh_co_orig[i]);
 -                      ss->mvert[i].no[0] = cache->orig_norms[i][0];
 -                      ss->mvert[i].no[1] = cache->orig_norms[i][1];
 -                      ss->mvert[i].no[2] = cache->orig_norms[i][2];
 -                      if( buffer != 0 ) {
 -                              IndexLink *cur = &ss->drawobject->indices[i];
 -                              while( cur != 0 && cur->element != -1 ) {
 -                                      VECCOPY(&buffer[cur->element*3],cache->orig_norms[i]);
 -                                      cur = cur->next;
 -                              }
 +      if(brush->flag & BRUSH_ANCHORED) {
 +              ListBase *lb= undo_paint_push_get_list(UNDO_PAINT_MESH);
 +              SculptUndoNode *unode;
 +
 +              /* this could benefit from multithreading... */
 +
 +              for(unode = lb->first; unode; unode = unode->next) {
 +                      float (*co)[3]= unode->co;
 +                      short (*no)[3]= unode->no;
 +                      int *index= unode->index;
 +                      int totvert= unode->totvert;
 +
 +                      for(i = 0; i < totvert; ++i) {
-                               VECCOPY(ss->mvert[index[i]].co, co[i]);
++                              copy_v3_v3(ss->mvert[index[i]].co, co[i]);
 +                              VECCOPY(ss->mvert[index[i]].no, no[i]);
                        }
                }
 -              if( buffer != 0 )
 -                      GPU_buffer_unlock( ss->drawobject->normals );
  
                if(ss->face_normals) {
                        float *fn = ss->face_normals;
@@@ -129,7 -129,7 +129,7 @@@ void view3d_get_transformation(ARegion 
        float cpy[4][4];
        int i, j;
  
-       Mat4MulMat4(cpy, ob->obmat, rv3d->viewmat);
 -      mul_m4_m4m4(cpy, ob->obmat, vc->rv3d->viewmat);
++      mul_m4_m4m4(cpy, ob->obmat, rv3d->viewmat);
  
        for(i = 0; i < 4; ++i) {
                for(j = 0; j < 4; ++j) {
@@@ -481,45 -481,6 +481,44 @@@ void VIEW3D_OT_setobjectascamera(wmOper
  }
  /* ********************************** */
  
-               CalcNormFloat(bb->vec[val], bb->vec[val==3?0:val+1], bb->vec[val+4],
-                             planes[val]);
 +void view3d_calculate_clipping(BoundBox *bb, float planes[4][4], bglMats *mats, rcti *rect)
 +{
 +      double xs, ys, p[3];
 +      short val;
 +
 +      /* near zero floating point values can give issues with gluUnProject
 +              in side view on some implementations */
 +      if(fabs(mats->modelview[0]) < 1e-6) mats->modelview[0]= 0.0;
 +      if(fabs(mats->modelview[5]) < 1e-6) mats->modelview[5]= 0.0;
 +
 +      /* Set up viewport so that gluUnProject will give correct values */
 +      mats->viewport[0] = 0;
 +      mats->viewport[1] = 0;
 +
 +      /* four clipping planes and bounding volume */
 +      /* first do the bounding volume */
 +      for(val=0; val<4; val++) {
 +              xs= (val==0||val==3)?rect->xmin:rect->xmax;
 +              ys= (val==0||val==1)?rect->ymin:rect->ymax;
 +
 +              gluUnProject(xs, ys, 0.0, mats->modelview, mats->projection, mats->viewport, &p[0], &p[1], &p[2]);
 +              VECCOPY(bb->vec[val], p);
 +
 +              gluUnProject(xs, ys, 1.0, mats->modelview, mats->projection, mats->viewport, &p[0], &p[1], &p[2]);
 +              VECCOPY(bb->vec[4+val], p);
 +      }
 +
 +      /* then plane equations */
 +      for(val=0; val<4; val++) {
 +
++              normal_tri_v3(planes[val], bb->vec[val], bb->vec[val==3?0:val+1], bb->vec[val+4]);
 +
 +              planes[val][3]= - planes[val][0]*bb->vec[val][0]
 +                      - planes[val][1]*bb->vec[val][1]
 +                      - planes[val][2]*bb->vec[val][2];
 +      }
 +}
 +
  /* create intersection coordinates in view Z direction at mouse coordinates */
  void viewline(ARegion *ar, View3D *v3d, float mval[2], float ray_start[3], float ray_end[3])
  {
                vec[2]= -1.0f;
                vec[3]= 1.0f;
                
-               Mat4MulVec4fl(rv3d->persinv, vec);
-               VecMulf(vec, 1.0f / vec[3]);
+               mul_m4_v4(rv3d->persinv, vec);
+               mul_v3_fl(vec, 1.0f / vec[3]);
                
--              VECCOPY(ray_start, rv3d->viewinv[3]);
--              VECSUB(vec, vec, ray_start);
-               Normalize(vec);
++              copy_v3_v3(ray_start, rv3d->viewinv[3]);
++              sub_v3_v3v3(vec, vec, ray_start);
+               normalize_v3(vec);
                
                VECADDFAC(ray_start, rv3d->viewinv[3], vec, v3d->near);
                VECADDFAC(ray_end, rv3d->viewinv[3], vec, v3d->far);
@@@ -36,8 -36,7 +36,8 @@@
  
  #include "MEM_guardedalloc.h"
  
- #include "BLI_arithb.h"
+ #include "BLI_math.h"
 +#include "BLI_ghash.h"
  
  #include "DNA_meshdata_types.h"
  
@@@ -377,131 -376,6 +377,131 @@@ void GPU_drawobject_free( DerivedMesh *
        dm->drawObject = 0;
  }
  
-               VecCopyf(out->co, v->co);
 +/* Convenience struct for building the VBO.
 +   TODO: check that (lack-of) padding is OK,
 +   also check performance of short vs float for normals */
 +typedef struct {
 +      float co[3];
 +      short no[3];
 +      
 +      char pad[14];
 +} VertexBufferFormat;
 +
 +typedef struct {
 +      unsigned int vert_buf, tri_buf;
 +      unsigned short tot_tri;
 +} GPU_Buffers;
 +
 +void GPU_update_buffers(void *buffers_v, MVert *mvert,
 +                      int *vert_indices, int totvert)
 +{
 +      GPU_Buffers *buffers = buffers_v;
 +      VertexBufferFormat *vert_data;
 +      int i;
 +
 +      /* Build VBO */
 +      glBindBufferARB(GL_ARRAY_BUFFER_ARB, buffers->vert_buf);
 +      glBufferDataARB(GL_ARRAY_BUFFER_ARB,
 +                   sizeof(VertexBufferFormat) * totvert,
 +                   NULL, GL_STATIC_DRAW_ARB);
 +      vert_data = glMapBufferARB(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB);
 +
 +      for(i = 0; i < totvert; ++i) {
 +              MVert *v = mvert + vert_indices[i];
 +              VertexBufferFormat *out = vert_data + i;
 +
++              copy_v3_v3(out->co, v->co);
 +              memcpy(out->no, v->no, sizeof(short) * 3);
 +      }
 +      glUnmapBufferARB(GL_ARRAY_BUFFER_ARB);
 +
 +      //printf("node updated %p\n", buffers_v);
 +}
 +
 +void *GPU_build_buffers(GHash *map, MVert *mvert, MFace *mface,
 +                      int *face_indices, int totface,
 +                      int *vert_indices, int tot_uniq_verts,
 +                      int totvert)
 +{
 +      GPU_Buffers *buffers;
 +      unsigned short *tri_data;
 +      int i, j, k, tottri;
 +
 +      buffers = MEM_callocN(sizeof(GPU_Buffers), "GPU_Buffers");
 +
 +      /* Count the number of triangles */
 +      for(i = 0, tottri = 0; i < totface; ++i)
 +              tottri += mface[face_indices[i]].v4 ? 2 : 1;
 +
 +      /* Generate index buffer object */
 +      glGenBuffersARB(1, &buffers->tri_buf);
 +      glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, buffers->tri_buf);
 +      glBufferDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB,
 +                   sizeof(unsigned short) * tottri * 3, NULL, GL_STATIC_DRAW_ARB);
 +
 +      /* Fill the triangle buffer */
 +      tri_data = glMapBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB);
 +      for(i = 0; i < totface; ++i) {
 +              MFace *f = mface + face_indices[i];
 +              int v[3] = {f->v1, f->v2, f->v3};
 +
 +              for(j = 0; j < (f->v4 ? 2 : 1); ++j) {
 +                      for(k = 0; k < 3; ++k) {
 +                              void *value, *key = SET_INT_IN_POINTER(v[k]);
 +                              int vbo_index;
 +
 +                              value = BLI_ghash_lookup(map, key);
 +                              vbo_index = GET_INT_FROM_POINTER(value);
 +
 +                              if(vbo_index < 0) {
 +                                      vbo_index = -vbo_index +
 +                                              tot_uniq_verts - 1;
 +                              }
 +
 +                              *tri_data = vbo_index;
 +                              ++tri_data;
 +                      }
 +                      v[0] = f->v4;
 +                      v[1] = f->v1;
 +                      v[2] = f->v3;
 +              }
 +      }
 +      glUnmapBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB);
 +
 +      /* Build VBO */
 +      glGenBuffersARB(1, &buffers->vert_buf);
 +      GPU_update_buffers(buffers, mvert, vert_indices, totvert);
 +
 +      buffers->tot_tri = tottri;
 +
 +      return buffers;
 +}
 +
 +void GPU_draw_buffers(void *buffers_v)
 +{
 +      GPU_Buffers *buffers = buffers_v;
 +
 +      glBindBufferARB(GL_ARRAY_BUFFER_ARB, buffers->vert_buf);
 +      glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, buffers->tri_buf);
 +
 +      glVertexPointer(3, GL_FLOAT, sizeof(VertexBufferFormat), 0);
 +      glNormalPointer(GL_SHORT, sizeof(VertexBufferFormat), (void*)12);
 +
 +      glDrawElements(GL_TRIANGLES, buffers->tot_tri * 3, GL_UNSIGNED_SHORT, 0);
 +}
 +
 +void GPU_free_buffers(void *buffers_v)
 +{
 +      if(buffers_v) {
 +              GPU_Buffers *buffers = buffers_v;
 +              
 +              glDeleteBuffersARB(1, &buffers->vert_buf);
 +              glDeleteBuffersARB(1, &buffers->tri_buf);
 +
 +              MEM_freeN(buffers);
 +      }
 +}
 +
  GPUBuffer *GPU_buffer_setup( DerivedMesh *dm, GPUDrawObject *object, int size, GLenum target, void *user, void (*copy_f)(DerivedMesh *, float *, int *, int *, void *) )
  {
        GPUBuffer *buffer;