Depsgraph: Split debug flags
[blender-staging.git] / source / blender / blenkernel / intern / particle_system.c
index 71656240369239c9141abe824cddc74acb903680..2a1e0f559d7990d65c105ef063253fa490e51d04 100644 (file)
 #include <math.h>
 #include <string.h>
 
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
 #include "MEM_guardedalloc.h"
 
 #include "DNA_anim_types.h"
@@ -52,7 +48,7 @@
 #include "DNA_mesh_types.h"
 #include "DNA_meshdata_types.h"
 #include "DNA_modifier_types.h"
-#include "DNA_object_force.h"
+#include "DNA_object_force_types.h"
 #include "DNA_object_types.h"
 #include "DNA_curve_types.h"
 #include "DNA_scene_types.h"
 #include "BLI_utildefines.h"
 #include "BLI_edgehash.h"
 #include "BLI_rand.h"
-#include "BLI_jitter.h"
+#include "BLI_jitter_2d.h"
 #include "BLI_math.h"
 #include "BLI_blenlib.h"
 #include "BLI_kdtree.h"
 #include "BLI_kdopbvh.h"
 #include "BLI_sort.h"
+#include "BLI_task.h"
 #include "BLI_threads.h"
 #include "BLI_linklist.h"
 
@@ -75,7 +72,9 @@
 #include "BKE_boids.h"
 #include "BKE_cdderivedmesh.h"
 #include "BKE_collision.h"
+#include "BKE_colortools.h"
 #include "BKE_effect.h"
+#include "BKE_library_query.h"
 #include "BKE_particle.h"
 #include "BKE_global.h"
 
@@ -89,6 +88,7 @@
 #include "BKE_modifier.h"
 #include "BKE_scene.h"
 #include "BKE_bvhutils.h"
+#include "BKE_depsgraph.h"
 
 #include "PIL_time.h"
 
@@ -96,7 +96,7 @@
 
 /* fluid sim particle import */
 #ifdef WITH_MOD_FLUID
-#include "DNA_object_fluidsim.h"
+#include "DNA_object_fluidsim_types.h"
 #include "LBM_fluidsim.h"
 #include <zlib.h>
 #include <string.h>
@@ -230,7 +230,7 @@ static void realloc_particles(ParticleSimulationData *sim, int new_totpart)
                                newboids= MEM_callocN(totpart*sizeof(BoidParticle), "boid particles");
 
                                if (newboids == NULL) {
-                                        /* allocation error! */
+                                       /* allocation error! */
                                        if (newpars)
                                                MEM_freeN(newpars);
                                        return;
@@ -284,7 +284,7 @@ static void realloc_particles(ParticleSimulationData *sim, int new_totpart)
        }
 }
 
-static int get_psys_child_number(struct Scene *scene, ParticleSystem *psys)
+int psys_get_child_number(Scene *scene, ParticleSystem *psys)
 {
        int nbr;
 
@@ -296,40 +296,19 @@ static int get_psys_child_number(struct Scene *scene, ParticleSystem *psys)
        else
                nbr= psys->part->child_nbr;
 
-       return get_render_child_particle_number(&scene->r, nbr);
-}
-
-static int get_psys_tot_child(struct Scene *scene, ParticleSystem *psys)
-{
-       return psys->totpart*get_psys_child_number(scene, psys);
+       return get_render_child_particle_number(&scene->r, nbr, psys->renderdata != NULL);
 }
 
-static void alloc_child_particles(ParticleSystem *psys, int tot)
+int psys_get_tot_child(Scene *scene, ParticleSystem *psys)
 {
-       if (psys->child) {
-               /* only re-allocate if we have to */
-               if (psys->part->childtype && psys->totchild == tot) {
-                       memset(psys->child, 0, tot*sizeof(ChildParticle));
-                       return;
-               }
-
-               MEM_freeN(psys->child);
-               psys->child=NULL;
-               psys->totchild=0;
-       }
-
-       if (psys->part->childtype) {
-               psys->totchild= tot;
-               if (psys->totchild)
-                       psys->child= MEM_callocN(psys->totchild*sizeof(ChildParticle), "child_particles");
-       }
+       return psys->totpart*psys_get_child_number(scene, psys);
 }
 
 /************************************************/
 /*                     Distribution                                            */
 /************************************************/
 
-void psys_calc_dmcache(Object *ob, DerivedMesh *dm, ParticleSystem *psys)
+void psys_calc_dmcache(Object *ob, DerivedMesh *dm_final, DerivedMesh *dm_deformed, ParticleSystem *psys)
 {
        /* use for building derived mesh mapping info:
         *
@@ -342,13 +321,13 @@ void psys_calc_dmcache(Object *ob, DerivedMesh *dm, ParticleSystem *psys)
        PARTICLE_P;
        
        /* CACHE LOCATIONS */
-       if (!dm->deformedOnly) {
+       if (!dm_final->deformedOnly) {
                /* Will use later to speed up subsurf/derivedmesh */
                LinkNode *node, *nodedmelem, **nodearray;
                int totdmelem, totelem, i, *origindex, *origindex_poly = NULL;
 
                if (psys->part->from == PART_FROM_VERT) {
-                       totdmelem= dm->getNumVerts(dm);
+                       totdmelem= dm_final->getNumVerts(dm_final);
 
                        if (use_modifier_stack) {
                                totelem= totdmelem;
@@ -356,11 +335,11 @@ void psys_calc_dmcache(Object *ob, DerivedMesh *dm, ParticleSystem *psys)
                        }
                        else {
                                totelem= me->totvert;
-                               origindex= dm->getVertDataArray(dm, CD_ORIGINDEX);
+                               origindex= dm_final->getVertDataArray(dm_final, CD_ORIGINDEX);
                        }
                }
                else { /* FROM_FACE/FROM_VOLUME */
-                       totdmelem= dm->getNumTessFaces(dm);
+                       totdmelem= dm_final->getNumTessFaces(dm_final);
 
                        if (use_modifier_stack) {
                                totelem= totdmelem;
@@ -368,20 +347,20 @@ void psys_calc_dmcache(Object *ob, DerivedMesh *dm, ParticleSystem *psys)
                                origindex_poly= NULL;
                        }
                        else {
-                               totelem= me->totpoly;
-                               origindex= dm->getTessFaceDataArray(dm, CD_ORIGINDEX);
+                               totelem = dm_deformed->getNumTessFaces(dm_deformed);
+                               origindex = dm_final->getTessFaceDataArray(dm_final, CD_ORIGINDEX);
 
                                /* for face lookups we need the poly origindex too */
-                               origindex_poly= dm->getPolyDataArray(dm, CD_ORIGINDEX);
+                               origindex_poly= dm_final->getPolyDataArray(dm_final, CD_ORIGINDEX);
                                if (origindex_poly == NULL) {
                                        origindex= NULL;
                                }
                        }
                }
-       
+
                nodedmelem= MEM_callocN(sizeof(LinkNode)*totdmelem, "psys node elems");
                nodearray= MEM_callocN(sizeof(LinkNode *)*totelem, "psys node array");
-               
+
                for (i=0, node=nodedmelem; i<totdmelem; i++, node++) {
                        int origindex_final;
                        node->link = SET_INT_IN_POINTER(i);
@@ -410,7 +389,7 @@ void psys_calc_dmcache(Object *ob, DerivedMesh *dm, ParticleSystem *psys)
                                }
                        }
                }
-               
+
                /* cache the verts/faces! */
                LOOP_PARTICLES {
                        if (pa->num < 0) {
@@ -432,9 +411,7 @@ void psys_calc_dmcache(Object *ob, DerivedMesh *dm, ParticleSystem *psys)
                                                pa->num_dmcache = DMCACHE_NOTFOUND;
                                }
                                else { /* FROM_FACE/FROM_VOLUME */
-                                       /* Note that sometimes the pa->num is over the nodearray size, this is bad, maybe there is a better place to fix this,
-                                        * but for now passing NULL is OK. every face will be searched for the particle so its slower - Campbell */
-                                       pa->num_dmcache= psys_particle_dm_face_lookup(ob, dm, pa->num, pa->fuv, pa->num < totelem ? nodearray[pa->num] : NULL);
+                                       pa->num_dmcache = psys_particle_dm_face_lookup(dm_final, dm_deformed, pa->num, pa->fuv, nodearray);
                                }
                        }
                }
@@ -447,1058 +424,66 @@ void psys_calc_dmcache(Object *ob, DerivedMesh *dm, ParticleSystem *psys)
                 * should know to use the num or num_dmcache, set the num_dmcache to
                 * an invalid value, just in case */
                
-               LOOP_PARTICLES
+               LOOP_PARTICLES {
                        pa->num_dmcache = DMCACHE_NOTFOUND;
-       }
-}
-
-static void distribute_simple_children(Scene *scene, Object *ob, DerivedMesh *finaldm, ParticleSystem *psys)
-{
-       ChildParticle *cpa = NULL;
-       int i, p;
-       int child_nbr= get_psys_child_number(scene, psys);
-       int totpart= get_psys_tot_child(scene, psys);
-
-       alloc_child_particles(psys, totpart);
-
-       cpa = psys->child;
-       for (i=0; i<child_nbr; i++) {
-               for (p=0; p<psys->totpart; p++,cpa++) {
-                       float length=2.0;
-                       cpa->parent=p;
-                                       
-                       /* create even spherical distribution inside unit sphere */
-                       while (length>=1.0f) {
-                               cpa->fuv[0]=2.0f*BLI_frand()-1.0f;
-                               cpa->fuv[1]=2.0f*BLI_frand()-1.0f;
-                               cpa->fuv[2]=2.0f*BLI_frand()-1.0f;
-                               length=len_v3(cpa->fuv);
-                       }
-
-                       cpa->num=-1;
-               }
-       }
-       /* dmcache must be updated for parent particles if children from faces is used */
-       psys_calc_dmcache(ob, finaldm, psys);
-}
-static void distribute_grid(DerivedMesh *dm, ParticleSystem *psys)
-{
-       ParticleData *pa=NULL;
-       float min[3], max[3], delta[3], d;
-       MVert *mv, *mvert = dm->getVertDataArray(dm,0);
-       int totvert=dm->getNumVerts(dm), from=psys->part->from;
-       int i, j, k, p, res=psys->part->grid_res, size[3], axis;
-
-       /* find bounding box of dm */
-       if (totvert > 0) {
-               mv=mvert;
-               copy_v3_v3(min, mv->co);
-               copy_v3_v3(max, mv->co);
-               mv++;
-               for (i = 1; i < totvert; i++, mv++) {
-                       minmax_v3v3_v3(min, max, mv->co);
-               }
-       }
-       else {
-               zero_v3(min);
-               zero_v3(max);
-       }
-
-       sub_v3_v3v3(delta, max, min);
-
-       /* determine major axis */
-       axis = axis_dominant_v3_single(delta);
-        
-       d = delta[axis]/(float)res;
-
-       size[axis] = res;
-       size[(axis+1)%3] = (int)ceil(delta[(axis+1)%3]/d);
-       size[(axis+2)%3] = (int)ceil(delta[(axis+2)%3]/d);
-
-       /* float errors grrr.. */
-       size[(axis+1)%3] = MIN2(size[(axis+1)%3],res);
-       size[(axis+2)%3] = MIN2(size[(axis+2)%3],res);
-
-       size[0] = MAX2(size[0], 1);
-       size[1] = MAX2(size[1], 1);
-       size[2] = MAX2(size[2], 1);
-
-       /* no full offset for flat/thin objects */
-       min[0]+= d < delta[0] ? d/2.f : delta[0]/2.f;
-       min[1]+= d < delta[1] ? d/2.f : delta[1]/2.f;
-       min[2]+= d < delta[2] ? d/2.f : delta[2]/2.f;
-
-       for (i=0,p=0,pa=psys->particles; i<res; i++) {
-               for (j=0; j<res; j++) {
-                       for (k=0; k<res; k++,p++,pa++) {
-                               pa->fuv[0] = min[0] + (float)i*d;
-                               pa->fuv[1] = min[1] + (float)j*d;
-                               pa->fuv[2] = min[2] + (float)k*d;
-                               pa->flag |= PARS_UNEXIST;
-                               pa->hair_index = 0; /* abused in volume calculation */
-                       }
-               }
-       }
-
-       /* enable particles near verts/edges/faces/inside surface */
-       if (from==PART_FROM_VERT) {
-               float vec[3];
-
-               pa=psys->particles;
-
-               min[0] -= d/2.0f;
-               min[1] -= d/2.0f;
-               min[2] -= d/2.0f;
-
-               for (i=0,mv=mvert; i<totvert; i++,mv++) {
-                       sub_v3_v3v3(vec,mv->co,min);
-                       vec[0]/=delta[0];
-                       vec[1]/=delta[1];
-                       vec[2]/=delta[2];
-                       pa[((int)(vec[0] * (size[0] - 1))  * res +
-                           (int)(vec[1] * (size[1] - 1))) * res +
-                           (int)(vec[2] * (size[2] - 1))].flag &= ~PARS_UNEXIST;
-               }
-       }
-       else if (ELEM(from,PART_FROM_FACE,PART_FROM_VOLUME)) {
-               float co1[3], co2[3];
-
-               MFace *mface= NULL, *mface_array;
-               float v1[3], v2[3], v3[3], v4[4], lambda;
-               int a, a1, a2, a0mul, a1mul, a2mul, totface;
-               int amax= from==PART_FROM_FACE ? 3 : 1;
-
-               totface=dm->getNumTessFaces(dm);
-               mface=mface_array=dm->getTessFaceDataArray(dm,CD_MFACE);
-               
-               for (a=0; a<amax; a++) {
-                       if (a==0) { a0mul=res*res; a1mul=res; a2mul=1; }
-                       else if (a==1) { a0mul=res; a1mul=1; a2mul=res*res; }
-                       else { a0mul=1; a1mul=res*res; a2mul=res; }
-
-                       for (a1=0; a1<size[(a+1)%3]; a1++) {
-                               for (a2=0; a2<size[(a+2)%3]; a2++) {
-                                       mface= mface_array;
-
-                                       pa = psys->particles + a1*a1mul + a2*a2mul;
-                                       copy_v3_v3(co1, pa->fuv);
-                                       co1[a] -= d < delta[a] ? d/2.f : delta[a]/2.f;
-                                       copy_v3_v3(co2, co1);
-                                       co2[a] += delta[a] + 0.001f*d;
-                                       co1[a] -= 0.001f*d;
-                                       
-                                       /* lets intersect the faces */
-                                       for (i=0; i<totface; i++,mface++) {
-                                               copy_v3_v3(v1, mvert[mface->v1].co);
-                                               copy_v3_v3(v2, mvert[mface->v2].co);
-                                               copy_v3_v3(v3, mvert[mface->v3].co);
-
-                                               if (isect_axial_line_tri_v3(a, co1, co2, v2, v3, v1, &lambda)) {
-                                                       if (from==PART_FROM_FACE)
-                                                               (pa+(int)(lambda*size[a])*a0mul)->flag &= ~PARS_UNEXIST;
-                                                       else /* store number of intersections */
-                                                               (pa+(int)(lambda*size[a])*a0mul)->hair_index++;
-                                               }
-                                               else if (mface->v4) {
-                                                       copy_v3_v3(v4, mvert[mface->v4].co);
-
-                                                       if (isect_axial_line_tri_v3(a, co1, co2, v4, v1, v3, &lambda)) {
-                                                               if (from==PART_FROM_FACE)
-                                                                       (pa+(int)(lambda*size[a])*a0mul)->flag &= ~PARS_UNEXIST;
-                                                               else
-                                                                       (pa+(int)(lambda*size[a])*a0mul)->hair_index++;
-                                                       }
-                                               }
-                                       }
-
-                                       if (from==PART_FROM_VOLUME) {
-                                               int in=pa->hair_index%2;
-                                               if (in) pa->hair_index++;
-                                               for (i=0; i<size[0]; i++) {
-                                                       if (in || (pa+i*a0mul)->hair_index%2)
-                                                               (pa+i*a0mul)->flag &= ~PARS_UNEXIST;
-                                                       /* odd intersections == in->out / out->in */
-                                                       /* even intersections -> in stays same */
-                                                       in=(in + (pa+i*a0mul)->hair_index) % 2;
-                                               }
-                                       }
-                               }
-                       }
-               }
-       }
-
-       if (psys->part->flag & PART_GRID_HEXAGONAL) {
-               for (i=0,p=0,pa=psys->particles; i<res; i++) {
-                       for (j=0; j<res; j++) {
-                               for (k=0; k<res; k++,p++,pa++) {
-                                       if (j%2)
-                                               pa->fuv[0] += d/2.f;
-
-                                       if (k%2) {
-                                               pa->fuv[0] += d/2.f;
-                                               pa->fuv[1] += d/2.f;
-                                       }
-                               }
-                       }
-               }
-       }
-
-       if (psys->part->flag & PART_GRID_INVERT) {
-               for (i=0; i<size[0]; i++) {
-                       for (j=0; j<size[1]; j++) {
-                               pa=psys->particles + res*(i*res + j);
-                               for (k=0; k<size[2]; k++, pa++) {
-                                       pa->flag ^= PARS_UNEXIST;
-                               }
-                       }
-               }
-       }
-
-       if (psys->part->grid_rand > 0.f) {
-               float rfac = d * psys->part->grid_rand;
-               for (p=0,pa=psys->particles; p<psys->totpart; p++,pa++) {
-                       if (pa->flag & PARS_UNEXIST)
-                               continue;
-
-                       pa->fuv[0] += rfac * (psys_frand(psys, p + 31) - 0.5f);
-                       pa->fuv[1] += rfac * (psys_frand(psys, p + 32) - 0.5f);
-                       pa->fuv[2] += rfac * (psys_frand(psys, p + 33) - 0.5f);
-               }
-       }
-}
-
-/* modified copy from rayshade.c */
-static void hammersley_create(float *out, int n, int seed, float amount)
-{
-       RNG *rng;
-       double p, t, offs[2];
-       int k, kk;
-
-       rng = BLI_rng_new(31415926 + n + seed);
-       offs[0] = BLI_rng_get_double(rng) + (double)amount;
-       offs[1] = BLI_rng_get_double(rng) + (double)amount;
-       BLI_rng_free(rng);
-
-       for (k = 0; k < n; k++) {
-               t = 0;
-               for (p = 0.5, kk = k; kk; p *= 0.5, kk >>= 1)
-                       if (kk & 1) /* kk mod 2 = 1 */
-                               t += p;
-
-               out[2*k + 0] = fmod((double)k/(double)n + offs[0], 1.0);
-               out[2*k + 1] = fmod(t + offs[1], 1.0);
-       }
-}
-
-/* almost exact copy of BLI_jitter_init */
-static void init_mv_jit(float *jit, int num, int seed2, float amount)
-{
-       RNG *rng;
-       float *jit2, x, rad1, rad2, rad3;
-       int i, num2;
-
-       if (num==0) return;
-
-       rad1= (float)(1.0f/sqrtf((float)num));
-       rad2= (float)(1.0f/((float)num));
-       rad3= (float)sqrtf((float)num)/((float)num);
-
-       rng = BLI_rng_new(31415926 + num + seed2);
-       x= 0;
-               num2 = 2 * num;
-       for (i=0; i<num2; i+=2) {
-       
-               jit[i] = x + amount*rad1*(0.5f - BLI_rng_get_float(rng));
-               jit[i+1] = i/(2.0f*num) + amount*rad1*(0.5f - BLI_rng_get_float(rng));
-               
-               jit[i]-= (float)floor(jit[i]);
-               jit[i+1]-= (float)floor(jit[i+1]);
-               
-               x+= rad3;
-               x -= (float)floor(x);
-       }
-
-       jit2= MEM_mallocN(12 + 2*sizeof(float)*num, "initjit");
-
-       for (i=0 ; i<4 ; i++) {
-               BLI_jitterate1((float (*)[2])jit, (float (*)[2])jit2, num, rad1);
-               BLI_jitterate1((float (*)[2])jit, (float (*)[2])jit2, num, rad1);
-               BLI_jitterate2((float (*)[2])jit, (float (*)[2])jit2, num, rad2);
-       }
-       MEM_freeN(jit2);
-       BLI_rng_free(rng);
-}
-
-static void psys_uv_to_w(float u, float v, int quad, float *w)
-{
-       float vert[4][3], co[3];
-
-       if (!quad) {
-               if (u+v > 1.0f)
-                       v= 1.0f-v;
-               else
-                       u= 1.0f-u;
-       }
-
-       vert[0][0] = 0.0f; vert[0][1] = 0.0f; vert[0][2] = 0.0f;
-       vert[1][0] = 1.0f; vert[1][1] = 0.0f; vert[1][2] = 0.0f;
-       vert[2][0] = 1.0f; vert[2][1] = 1.0f; vert[2][2] = 0.0f;
-
-       co[0] = u;
-       co[1] = v;
-       co[2] = 0.0f;
-
-       if (quad) {
-               vert[3][0] = 0.0f; vert[3][1] = 1.0f; vert[3][2] = 0.0f;
-               interp_weights_poly_v3( w,vert, 4, co);
-       }
-       else {
-               interp_weights_poly_v3( w,vert, 3, co);
-               w[3] = 0.0f;
-       }
-}
-
-/* Find the index in "sum" array before "value" is crossed. */
-static int distribute_binary_search(float *sum, int n, float value)
-{
-       int mid, low=0, high=n;
-
-       if (value == 0.f)
-               return 0;
-
-       while (low <= high) {
-               mid= (low + high)/2;
-               
-               if (sum[mid] < value && value <= sum[mid+1])
-                       return mid;
-               
-               if (sum[mid] >= value)
-                       high= mid - 1;
-               else if (sum[mid] < value)
-                       low= mid + 1;
-               else
-                       return mid;
-       }
-
-       return low;
-}
-
-/* the max number if calls to rng_* funcs within psys_thread_distribute_particle
- * be sure to keep up to date if this changes */
-#define PSYS_RND_DIST_SKIP 2
-
-/* note: this function must be thread safe, for from == PART_FROM_CHILD */
-#define ONLY_WORKING_WITH_PA_VERTS 0
-static void distribute_threads_exec(ParticleThread *thread, ParticleData *pa, ChildParticle *cpa, int p)
-{
-       ParticleThreadContext *ctx= thread->ctx;
-       Object *ob= ctx->sim.ob;
-       DerivedMesh *dm= ctx->dm;
-       float *v1, *v2, *v3, *v4, nor[3], orco1[3], co1[3], co2[3], nor1[3];
-       float cur_d, min_d, randu, randv;
-       int from= ctx->from;
-       int cfrom= ctx->cfrom;
-       int distr= ctx->distr;
-       int i, intersect, tot;
-       int rng_skip_tot= PSYS_RND_DIST_SKIP; /* count how many rng_* calls wont need skipping */
-
-       if (from == PART_FROM_VERT) {
-               /* TODO_PARTICLE - use original index */
-               pa->num= ctx->index[p];
-               pa->fuv[0] = 1.0f;
-               pa->fuv[1] = pa->fuv[2] = pa->fuv[3] = 0.0;
-
-#if ONLY_WORKING_WITH_PA_VERTS
-               if (ctx->tree) {
-                       KDTreeNearest ptn[3];
-                       int w, maxw;
-
-                       psys_particle_on_dm(ctx->dm,from,pa->num,pa->num_dmcache,pa->fuv,pa->foffset,co1,0,0,0,orco1,0);
-                       BKE_mesh_orco_verts_transform((Mesh*)ob->data, &orco1, 1, 1);
-                       maxw = BLI_kdtree_find_nearest_n(ctx->tree,orco1,ptn,3);
-
-                       for (w=0; w<maxw; w++) {
-                               pa->verts[w]=ptn->num;
-                       }
-               }
-#endif
-       }
-       else if (from == PART_FROM_FACE || from == PART_FROM_VOLUME) {
-               MFace *mface;
-
-               pa->num = i = ctx->index[p];
-               mface = dm->getTessFaceData(dm,i,CD_MFACE);
-               
-               switch (distr) {
-               case PART_DISTR_JIT:
-                       if (ctx->jitlevel == 1) {
-                               if (mface->v4)
-                                       psys_uv_to_w(0.5f, 0.5f, mface->v4, pa->fuv);
-                               else
-                                       psys_uv_to_w(1.0f / 3.0f, 1.0f / 3.0f, mface->v4, pa->fuv);
-                       }
-                       else {
-                               ctx->jitoff[i] = fmod(ctx->jitoff[i],(float)ctx->jitlevel);
-                               if (!isnan(ctx->jitoff[i])) {
-                                       psys_uv_to_w(ctx->jit[2*(int)ctx->jitoff[i]], ctx->jit[2*(int)ctx->jitoff[i]+1], mface->v4, pa->fuv);
-                                       ctx->jitoff[i]++;
-                               }
-                       }
-                       break;
-               case PART_DISTR_RAND:
-                       randu= BLI_rng_get_float(thread->rng);
-                       randv= BLI_rng_get_float(thread->rng);
-                       rng_skip_tot -= 2;
-
-                       psys_uv_to_w(randu, randv, mface->v4, pa->fuv);
-                       break;
-               }
-               pa->foffset= 0.0f;
-               
-               /* experimental */
-               if (from==PART_FROM_VOLUME) {
-                       MVert *mvert=dm->getVertDataArray(dm,CD_MVERT);
-
-                       tot=dm->getNumTessFaces(dm);
-
-                       psys_interpolate_face(mvert,mface,0,0,pa->fuv,co1,nor,0,0,0,0);
-
-                       normalize_v3(nor);
-                       mul_v3_fl(nor,-100.0);
-
-                       add_v3_v3v3(co2,co1,nor);
-
-                       min_d=2.0;
-                       intersect=0;
-
-                       for (i=0,mface=dm->getTessFaceDataArray(dm,CD_MFACE); i<tot; i++,mface++) {
-                               if (i==pa->num) continue;
-
-                               v1=mvert[mface->v1].co;
-                               v2=mvert[mface->v2].co;
-                               v3=mvert[mface->v3].co;
-
-                               if (isect_line_tri_v3(co1, co2, v2, v3, v1, &cur_d, 0)) {
-                                       if (cur_d<min_d) {
-                                               min_d=cur_d;
-                                               pa->foffset=cur_d*50.0f; /* to the middle of volume */
-                                               intersect=1;
-                                       }
-                               }
-                               if (mface->v4) {
-                                       v4=mvert[mface->v4].co;
-
-                                       if (isect_line_tri_v3(co1, co2, v4, v1, v3, &cur_d, 0)) {
-                                               if (cur_d<min_d) {
-                                                       min_d=cur_d;
-                                                       pa->foffset=cur_d*50.0f; /* to the middle of volume */
-                                                       intersect=1;
-                                               }
-                                       }
-                               }
-                       }
-                       if (intersect==0)
-                               pa->foffset=0.0;
-                       else {
-                               switch (distr) {
-                                       case PART_DISTR_JIT:
-                                               pa->foffset *= ctx->jit[p % (2 * ctx->jitlevel)];
-                                               break;
-                                       case PART_DISTR_RAND:
-                                               pa->foffset *= BLI_frand();
-                                               break;
-                               }
-                       }
-               }
-       }
-       else if (from == PART_FROM_CHILD) {
-               MFace *mf;
-
-               if (ctx->index[p] < 0) {
-                       cpa->num=0;
-                       cpa->fuv[0]=cpa->fuv[1]=cpa->fuv[2]=cpa->fuv[3]=0.0f;
-                       cpa->pa[0]=cpa->pa[1]=cpa->pa[2]=cpa->pa[3]=0;
-                       return;
-               }
-
-               mf= dm->getTessFaceData(dm, ctx->index[p], CD_MFACE);
-
-               randu= BLI_rng_get_float(thread->rng);
-               randv= BLI_rng_get_float(thread->rng);
-               rng_skip_tot -= 2;
-
-               psys_uv_to_w(randu, randv, mf->v4, cpa->fuv);
-
-               cpa->num = ctx->index[p];
-
-               if (ctx->tree) {
-                       KDTreeNearest ptn[10];
-                       int w,maxw;//, do_seams;
-                       float maxd /*, mind,dd */, totw= 0.0f;
-                       int parent[10];
-                       float pweight[10];
-
-                       psys_particle_on_dm(dm,cfrom,cpa->num,DMCACHE_ISCHILD,cpa->fuv,cpa->foffset,co1,nor1,NULL,NULL,orco1,NULL);
-                       BKE_mesh_orco_verts_transform((Mesh*)ob->data, &orco1, 1, 1);
-                       maxw = BLI_kdtree_find_nearest_n(ctx->tree,orco1,ptn,3);
-
-                       maxd=ptn[maxw-1].dist;
-                       /* mind=ptn[0].dist; */ /* UNUSED */
-                       
-                       /* the weights here could be done better */
-                       for (w=0; w<maxw; w++) {
-                               parent[w]=ptn[w].index;
-                               pweight[w]=(float)pow(2.0,(double)(-6.0f*ptn[w].dist/maxd));
-                       }
-                       for (;w<10; w++) {
-                               parent[w]=-1;
-                               pweight[w]=0.0f;
-                       }
-
-                       for (w=0,i=0; w<maxw && i<4; w++) {
-                               if (parent[w]>=0) {
-                                       cpa->pa[i]=parent[w];
-                                       cpa->w[i]=pweight[w];
-                                       totw+=pweight[w];
-                                       i++;
-                               }
-                       }
-                       for (;i<4; i++) {
-                               cpa->pa[i]=-1;
-                               cpa->w[i]=0.0f;
-                       }
-
-                       if (totw>0.0f) for (w=0; w<4; w++)
-                               cpa->w[w]/=totw;
-
-                       cpa->parent=cpa->pa[0];
                }
        }
-
-       if (rng_skip_tot > 0) /* should never be below zero */
-               BLI_rng_skip(thread->rng, rng_skip_tot);
 }
 
-static void *distribute_threads_exec_cb(void *data)
+/* threaded child particle distribution and path caching */
+void psys_thread_context_init(ParticleThreadContext *ctx, ParticleSimulationData *sim)
 {
-       ParticleThread *thread= (ParticleThread*)data;
-       ParticleSystem *psys= thread->ctx->sim.psys;
-       ParticleData *pa;
-       ChildParticle *cpa;
-       int p, totpart;
-
-       if (thread->ctx->from == PART_FROM_CHILD) {
-               totpart= psys->totchild;
-               cpa= psys->child;
-
-               for (p=0; p<totpart; p++, cpa++) {
-                       if (thread->ctx->skip) /* simplification skip */
-                               BLI_rng_skip(thread->rng, PSYS_RND_DIST_SKIP * thread->ctx->skip[p]);
-
-                       if ((p+thread->num) % thread->tot == 0)
-                               distribute_threads_exec(thread, NULL, cpa, p);
-                       else /* thread skip */
-                               BLI_rng_skip(thread->rng, PSYS_RND_DIST_SKIP);
-               }
-       }
-       else {
-               totpart= psys->totpart;
-               pa= psys->particles + thread->num;
-               for (p=thread->num; p<totpart; p+=thread->tot, pa+=thread->tot)
-                       distribute_threads_exec(thread, pa, NULL, p);
-       }
-
-       return 0;
+       memset(ctx, 0, sizeof(ParticleThreadContext));
+       ctx->sim = *sim;
+       ctx->dm = ctx->sim.psmd->dm_final;
+       ctx->ma = give_current_material(sim->ob, sim->psys->part->omat);
 }
 
-static int distribute_compare_orig_index(const void *p1, const void *p2, void *user_data)
-{
-       int *orig_index = (int *) user_data;
-       int index1 = orig_index[*(const int *)p1];
-       int index2 = orig_index[*(const int *)p2];
-
-       if (index1 < index2)
-               return -1;
-       else if (index1 == index2) {
-               /* this pointer comparison appears to make qsort stable for glibc,
-                * and apparently on solaris too, makes the renders reproducible */
-               if (p1 < p2)
-                       return -1;
-               else if (p1 == p2)
-                       return 0;
-               else
-                       return 1;
-       }
-       else
-               return 1;
-}
+#define MAX_PARTICLES_PER_TASK 256 /* XXX arbitrary - maybe use at least number of points instead for better balancing? */
 
-static void distribute_invalid(Scene *scene, ParticleSystem *psys, int from)
+BLI_INLINE int ceil_ii(int a, int b)
 {
-       if (from == PART_FROM_CHILD) {
-               ChildParticle *cpa;
-               int p, totchild = get_psys_tot_child(scene, psys);
-
-               if (psys->child && totchild) {
-                       for (p=0,cpa=psys->child; p<totchild; p++,cpa++) {
-                               cpa->fuv[0]=cpa->fuv[1]=cpa->fuv[2]=cpa->fuv[3] = 0.0;
-                               cpa->foffset= 0.0f;
-                               cpa->parent=0;
-                               cpa->pa[0]=cpa->pa[1]=cpa->pa[2]=cpa->pa[3]=0;
-                               cpa->num= -1;
-                       }
-               }
-       }
-       else {
-               PARTICLE_P;
-               LOOP_PARTICLES {
-                       pa->fuv[0] = pa->fuv[1] = pa->fuv[2] = pa->fuv[3] = 0.0;
-                       pa->foffset= 0.0f;
-                       pa->num= -1;
-               }
-       }
+       return (a + b - 1) / b;
 }
 
-/* Creates a distribution of coordinates on a DerivedMesh      */
-/* This is to denote functionality that does not yet work with mesh - only derived mesh */
-static int distribute_threads_init_data(ParticleThread *threads, Scene *scene, DerivedMesh *finaldm, int from)
+void psys_tasks_create(ParticleThreadContext *ctx, int startpart, int endpart, ParticleTask **r_tasks, int *r_numtasks)
 {
-       ParticleThreadContext *ctx= threads[0].ctx;
-       Object *ob= ctx->sim.ob;
-       ParticleSystem *psys= ctx->sim.psys;
-       ParticleData *pa=0, *tpars= 0;
-       ParticleSettings *part;
-       ParticleSeam *seams= 0;
-       KDTree *tree=0;
-       DerivedMesh *dm= NULL;
-       float *jit= NULL;
-       int i, seed, p=0, totthread= threads[0].tot;
-       int cfrom=0;
-       int totelem=0, totpart, *particle_element=0, children=0, totseam=0;
-       int jitlevel= 1, distr;
-       float *element_weight=NULL,*element_sum=NULL,*jitter_offset=NULL, *vweight=NULL;
-       float cur, maxweight=0.0, tweight, totweight, inv_totweight, co[3], nor[3], orco[3];
-       
-       if (ELEM(NULL, ob, psys, psys->part))
-               return 0;
-
-       part=psys->part;
-       totpart=psys->totpart;
-       if (totpart==0)
-               return 0;
-
-       if (!finaldm->deformedOnly && !finaldm->getTessFaceDataArray(finaldm, CD_ORIGINDEX)) {
-               printf("Can't create particles with the current modifier stack, disable destructive modifiers\n");
-// XXX         error("Can't paint with the current modifier stack, disable destructive modifiers");
-               return 0;
-       }
-
-       /* First handle special cases */
-       if (from == PART_FROM_CHILD) {
-               /* Simple children */
-               if (part->childtype != PART_CHILD_FACES) {
-                       BLI_srandom(31415926 + psys->seed + psys->child_seed);
-                       distribute_simple_children(scene, ob, finaldm, psys);
-                       return 0;
-               }
-       }
-       else {
-               /* Grid distribution */
-               if (part->distr==PART_DISTR_GRID && from != PART_FROM_VERT) {
-                       BLI_srandom(31415926 + psys->seed);
-                       dm= CDDM_from_mesh((Mesh*)ob->data);
-                       DM_ensure_tessface(dm);
-                       distribute_grid(dm,psys);
-                       dm->release(dm);
-                       return 0;
-               }
-       }
+       ParticleTask *tasks;
+       int numtasks = ceil_ii((endpart - startpart), MAX_PARTICLES_PER_TASK);
+       float particles_per_task = (float)(endpart - startpart) / (float)numtasks, p, pnext;
+       int i;
        
-       /* Create trees and original coordinates if needed */
-       if (from == PART_FROM_CHILD) {
-               distr=PART_DISTR_RAND;
-               BLI_srandom(31415926 + psys->seed + psys->child_seed);
-               dm= finaldm;
-
-               /* BMESH ONLY */
-               DM_ensure_tessface(dm);
-
-               children=1;
-
-               tree=BLI_kdtree_new(totpart);
-
-               for (p=0,pa=psys->particles; p<totpart; p++,pa++) {
-                       psys_particle_on_dm(dm,part->from,pa->num,pa->num_dmcache,pa->fuv,pa->foffset,co,nor,0,0,orco,NULL);
-                       BKE_mesh_orco_verts_transform((Mesh*)ob->data, &orco, 1, 1);
-                       BLI_kdtree_insert(tree, p, orco);
-               }
-
-               BLI_kdtree_balance(tree);
-
-               totpart = get_psys_tot_child(scene, psys);
-               cfrom = from = PART_FROM_FACE;
-       }
-       else {
-               distr = part->distr;
-               BLI_srandom(31415926 + psys->seed);
-               
-               if (psys->part->use_modifier_stack)
-                       dm = finaldm;
-               else
-                       dm= CDDM_from_mesh((Mesh*)ob->data);
-
-               /* BMESH ONLY, for verts we don't care about tessfaces */
-               if (from != PART_FROM_VERT) {
-                       DM_ensure_tessface(dm);
-               }
-
-               /* we need orco for consistent distributions */
-               if (!CustomData_has_layer(&dm->vertData, CD_ORCO))
-                       DM_add_vert_layer(dm, CD_ORCO, CD_ASSIGN, BKE_mesh_orco_verts_get(ob));
-
-               if (from == PART_FROM_VERT) {
-                       MVert *mv= dm->getVertDataArray(dm, CD_MVERT);
-                       float (*orcodata)[3] = dm->getVertDataArray(dm, CD_ORCO);
-                       int totvert = dm->getNumVerts(dm);
-
-                       tree=BLI_kdtree_new(totvert);
-
-                       for (p=0; p<totvert; p++) {
-                               if (orcodata) {
-                                       copy_v3_v3(co,orcodata[p]);
-                                       BKE_mesh_orco_verts_transform((Mesh*)ob->data, &co, 1, 1);
-                               }
-                               else
-                                       copy_v3_v3(co,mv[p].co);
-                               BLI_kdtree_insert(tree, p, co);
-                       }
-
-                       BLI_kdtree_balance(tree);
-               }
-       }
-
-       /* Get total number of emission elements and allocate needed arrays */
-       totelem = (from == PART_FROM_VERT) ? dm->getNumVerts(dm) : dm->getNumTessFaces(dm);
-
-       if (totelem == 0) {
-               distribute_invalid(scene, psys, children ? PART_FROM_CHILD : 0);
-
-               if (G.debug & G_DEBUG)
-                       fprintf(stderr,"Particle distribution error: Nothing to emit from!\n");
-
-               if (dm != finaldm) dm->release(dm);
-
-               BLI_kdtree_free(tree);
-
-               return 0;
-       }
-
-       element_weight  = MEM_callocN(sizeof(float)*totelem, "particle_distribution_weights");
-       particle_element= MEM_callocN(sizeof(int)*totpart, "particle_distribution_indexes");
-       element_sum             = MEM_callocN(sizeof(float)*(totelem+1), "particle_distribution_sum");
-       jitter_offset   = MEM_callocN(sizeof(float)*totelem, "particle_distribution_jitoff");
-
-       /* Calculate weights from face areas */
-       if ((part->flag&PART_EDISTR || children) && from != PART_FROM_VERT) {
-               MVert *v1, *v2, *v3, *v4;
-               float totarea=0.f, co1[3], co2[3], co3[3], co4[3];
-               float (*orcodata)[3];
-               
-               orcodata= dm->getVertDataArray(dm, CD_ORCO);
-
-               for (i=0; i<totelem; i++) {
-                       MFace *mf=dm->getTessFaceData(dm,i,CD_MFACE);
-
-                       if (orcodata) {
-                               copy_v3_v3(co1, orcodata[mf->v1]);
-                               copy_v3_v3(co2, orcodata[mf->v2]);
-                               copy_v3_v3(co3, orcodata[mf->v3]);
-                               BKE_mesh_orco_verts_transform((Mesh*)ob->data, &co1, 1, 1);
-                               BKE_mesh_orco_verts_transform((Mesh*)ob->data, &co2, 1, 1);
-                               BKE_mesh_orco_verts_transform((Mesh*)ob->data, &co3, 1, 1);
-                               if (mf->v4) {
-                                       copy_v3_v3(co4, orcodata[mf->v4]);
-                                       BKE_mesh_orco_verts_transform((Mesh*)ob->data, &co4, 1, 1);
-                               }
-                       }
-                       else {
-                               v1= (MVert*)dm->getVertData(dm,mf->v1,CD_MVERT);
-                               v2= (MVert*)dm->getVertData(dm,mf->v2,CD_MVERT);
-                               v3= (MVert*)dm->getVertData(dm,mf->v3,CD_MVERT);
-                               copy_v3_v3(co1, v1->co);
-                               copy_v3_v3(co2, v2->co);
-                               copy_v3_v3(co3, v3->co);
-                               if (mf->v4) {
-                                       v4= (MVert*)dm->getVertData(dm,mf->v4,CD_MVERT);
-                                       copy_v3_v3(co4, v4->co);
-                               }
-                       }
-
-                       cur = mf->v4 ? area_quad_v3(co1, co2, co3, co4) : area_tri_v3(co1, co2, co3);
-                       
-                       if (cur > maxweight)
-                               maxweight = cur;
-
-                       element_weight[i] = cur;
-                       totarea += cur;
-               }
-
-               for (i=0; i<totelem; i++)
-                       element_weight[i] /= totarea;
-
-               maxweight /= totarea;
-       }
-       else {
-               float min=1.0f/(float)(MIN2(totelem,totpart));
-               for (i=0; i<totelem; i++)
-                       element_weight[i]=min;
-               maxweight=min;
-       }
-
-       /* Calculate weights from vgroup */
-       vweight = psys_cache_vgroup(dm,psys,PSYS_VG_DENSITY);
-
-       if (vweight) {
-               if (from==PART_FROM_VERT) {
-                       for (i=0;i<totelem; i++)
-                               element_weight[i]*=vweight[i];
-               }
-               else { /* PART_FROM_FACE / PART_FROM_VOLUME */
-                       for (i=0;i<totelem; i++) {
-                               MFace *mf=dm->getTessFaceData(dm,i,CD_MFACE);
-                               tweight = vweight[mf->v1] + vweight[mf->v2] + vweight[mf->v3];
-                               
-                               if (mf->v4) {
-                                       tweight += vweight[mf->v4];
-                                       tweight /= 4.0f;
-                               }
-                               else {
-                                       tweight /= 3.0f;
-                               }
-
-                               element_weight[i]*=tweight;
-                       }
-               }
-               MEM_freeN(vweight);
-       }
-
-       /* Calculate total weight of all elements */
-       totweight= 0.0f;
-       for (i=0;i<totelem; i++)
-               totweight += element_weight[i];
-
-       inv_totweight = (totweight > 0.f ? 1.f/totweight : 0.f);
-
-       /* Calculate cumulative weights */
-       element_sum[0] = 0.0f;
-       for (i=0; i<totelem; i++)
-               element_sum[i+1] = element_sum[i] + element_weight[i] * inv_totweight;
+       tasks = MEM_callocN(sizeof(ParticleTask) * numtasks, "ParticleThread");
+       *r_numtasks = numtasks;
+       *r_tasks = tasks;
        
-       /* Finally assign elements to particles */
-       if ((part->flag&PART_TRAND) || (part->simplify_flag&PART_SIMPLIFY_ENABLE)) {
-               float pos;
-
-               for (p=0; p<totpart; p++) {
-                       /* In theory element_sum[totelem] should be 1.0, but due to float errors this is not necessarily always true, so scale pos accordingly. */
-                       pos= BLI_frand() * element_sum[totelem];
-                       particle_element[p] = distribute_binary_search(element_sum, totelem, pos);
-                       particle_element[p] = MIN2(totelem-1, particle_element[p]);
-                       jitter_offset[particle_element[p]] = pos;
-               }
-       }
-       else {
-               double step, pos;
-               
-               step= (totpart < 2) ? 0.5 : 1.0/(double)totpart;
-               pos= 1e-6; /* tiny offset to avoid zero weight face */
-               i= 0;
-
-               for (p=0; p<totpart; p++, pos+=step) {
-                       while ((i < totelem) && (pos > (double)element_sum[i + 1]))
-                               i++;
-
-                       particle_element[p] = MIN2(totelem-1, i);
-
-                       /* avoid zero weight face */
-                       if (p == totpart-1 && element_weight[particle_element[p]] == 0.0f)
-                               particle_element[p] = particle_element[p-1];
-
-                       jitter_offset[particle_element[p]] = pos;
-               }
-       }
-
-       MEM_freeN(element_sum);
-
-       /* For hair, sort by origindex (allows optimization's in rendering), */
-       /* however with virtual parents the children need to be in random order. */
-       if (part->type == PART_HAIR && !(part->childtype==PART_CHILD_FACES && part->parents!=0.0f)) {
-               int *orig_index = NULL;
-
-               if (from == PART_FROM_VERT) {
-                       if (dm->numVertData)
-                               orig_index = dm->getVertDataArray(dm, CD_ORIGINDEX);
-               }
-               else {
-                       if (dm->numTessFaceData)
-                               orig_index = dm->getTessFaceDataArray(dm, CD_ORIGINDEX);
-               }
-
-               if (orig_index) {
-                       BLI_qsort_r(particle_element, totpart, sizeof(int), distribute_compare_orig_index, orig_index);
-               }
-       }
-
-       /* Create jittering if needed */
-       if (distr==PART_DISTR_JIT && ELEM(from,PART_FROM_FACE,PART_FROM_VOLUME)) {
-               jitlevel= part->userjit;
-               
-               if (jitlevel == 0) {
-                       jitlevel= totpart/totelem;
-                       if (part->flag & PART_EDISTR) jitlevel*= 2;     /* looks better in general, not very scietific */
-                       if (jitlevel<3) jitlevel= 3;
-               }
+       p = (float)startpart;
+       for (i = 0; i < numtasks; i++, p = pnext) {
+               pnext = p + particles_per_task;
                
-               jit= MEM_callocN((2+ jitlevel*2)*sizeof(float), "jit");
-
-               /* for small amounts of particles we use regular jitter since it looks
-                * a bit better, for larger amounts we switch to hammersley sequence 
-                * because it is much faster */
-               if (jitlevel < 25)
-                       init_mv_jit(jit, jitlevel, psys->seed, part->jitfac);
-               else
-                       hammersley_create(jit, jitlevel+1, psys->seed, part->jitfac);
-               BLI_array_randomize(jit, 2*sizeof(float), jitlevel, psys->seed); /* for custom jit or even distribution */
-       }
-
-       /* Setup things for threaded distribution */
-       ctx->tree= tree;
-       ctx->seams= seams;
-       ctx->totseam= totseam;
-       ctx->sim.psys= psys;
-       ctx->index= particle_element;
-       ctx->jit= jit;
-       ctx->jitlevel= jitlevel;
-       ctx->jitoff= jitter_offset;
-       ctx->weight= element_weight;
-       ctx->maxweight= maxweight;
-       ctx->from= (children) ? PART_FROM_CHILD : from;
-       ctx->cfrom= cfrom;
-       ctx->distr= distr;
-       ctx->dm= dm;
-       ctx->tpars= tpars;
-
-       if (children) {
-               totpart= psys_render_simplify_distribution(ctx, totpart);
-               alloc_child_particles(psys, totpart);
-       }
-
-       if (!children || psys->totchild < 10000)
-               totthread= 1;
-       
-       seed= 31415926 + ctx->sim.psys->seed;
-       for (i=0; i<totthread; i++) {
-               threads[i].rng= BLI_rng_new(seed);
-               threads[i].tot= totthread;
-       }
-
-       return 1;
-}
-
-static void distribute_particles_on_dm(ParticleSimulationData *sim, int from)
-{
-       DerivedMesh *finaldm = sim->psmd->dm;
-       ListBase threads;
-       ParticleThread *pthreads;
-       ParticleThreadContext *ctx;
-       int i, totthread;
-
-       pthreads= psys_threads_create(sim);
-
-       if (!distribute_threads_init_data(pthreads, sim->scene, finaldm, from)) {
-               psys_threads_free(pthreads);
-               return;
-       }
-
-       totthread= pthreads[0].tot;
-       if (totthread > 1) {
-               BLI_init_threads(&threads, distribute_threads_exec_cb, totthread);
-
-               for (i=0; i<totthread; i++)
-                       BLI_insert_thread(&threads, &pthreads[i]);
-
-               BLI_end_threads(&threads);
-       }
-       else
-               distribute_threads_exec_cb(&pthreads[0]);
-
-       psys_calc_dmcache(sim->ob, finaldm, sim->psys);
-
-       ctx= pthreads[0].ctx;
-       if (ctx->dm != finaldm)
-               ctx->dm->release(ctx->dm);
-
-       psys_threads_free(pthreads);
-}
-
-/* ready for future use, to emit particles without geometry */
-static void distribute_particles_on_shape(ParticleSimulationData *sim, int UNUSED(from))
-{
-       distribute_invalid(sim->scene, sim->psys, 0);
-
-       fprintf(stderr,"Shape emission not yet possible!\n");
-}
-
-static void distribute_particles(ParticleSimulationData *sim, int from)
-{
-       PARTICLE_PSMD;
-       int distr_error=0;
-
-       if (psmd) {
-               if (psmd->dm)
-                       distribute_particles_on_dm(sim, from);
-               else
-                       distr_error=1;
-       }
-       else
-               distribute_particles_on_shape(sim, from);
-
-       if (distr_error) {
-               distribute_invalid(sim->scene, sim->psys, from);
-
-               fprintf(stderr,"Particle distribution error!\n");
+               tasks[i].ctx = ctx;
+               tasks[i].begin = (int)p;
+               tasks[i].end = min_ii((int)pnext, endpart);
        }
 }
 
-/* threaded child particle distribution and path caching */
-ParticleThread *psys_threads_create(ParticleSimulationData *sim)
+void psys_tasks_free(ParticleTask *tasks, int numtasks)
 {
-       ParticleThread *threads;
-       ParticleThreadContext *ctx;
-       int i, totthread = BKE_scene_num_threads(sim->scene);
+       int i;
        
-       threads= MEM_callocN(sizeof(ParticleThread)*totthread, "ParticleThread");
-       ctx= MEM_callocN(sizeof(ParticleThreadContext), "ParticleThreadContext");
-
-       ctx->sim = *sim;
-       ctx->dm= ctx->sim.psmd->dm;
-       ctx->ma= give_current_material(sim->ob, sim->psys->part->omat);
-
-       memset(threads, 0, sizeof(ParticleThread)*totthread);
-
-       for (i=0; i<totthread; i++) {
-               threads[i].ctx= ctx;
-               threads[i].num= i;
-               threads[i].tot= totthread;
+       /* threads */
+       for (i = 0; i < numtasks; ++i) {
+               if (tasks[i].rng)
+                       BLI_rng_free(tasks[i].rng);
+               if (tasks[i].rng_path)
+                       BLI_rng_free(tasks[i].rng_path);
        }
 
-       return threads;
+       MEM_freeN(tasks);
 }
 
-void psys_threads_free(ParticleThread *threads)
+void psys_thread_context_free(ParticleThreadContext *ctx)
 {
-       ParticleThreadContext *ctx= threads[0].ctx;
-       int i, totthread= threads[0].tot;
-
        /* path caching */
        if (ctx->vg_length)
                MEM_freeN(ctx->vg_length);
@@ -1512,6 +497,8 @@ void psys_threads_free(ParticleThread *threads)
                MEM_freeN(ctx->vg_rough2);
        if (ctx->vg_roughe)
                MEM_freeN(ctx->vg_roughe);
+       if (ctx->vg_twist)
+               MEM_freeN(ctx->vg_twist);
 
        if (ctx->sim.psys->lattice_deform_data) {
                end_latt_deform(ctx->sim.psys->lattice_deform_data);
@@ -1528,16 +515,15 @@ void psys_threads_free(ParticleThread *threads)
        //if (ctx->vertpart) MEM_freeN(ctx->vertpart);
        BLI_kdtree_free(ctx->tree);
 
-       /* threads */
-       for (i=0; i<totthread; i++) {
-               if (threads[i].rng)
-                       BLI_rng_free(threads[i].rng);
-               if (threads[i].rng_path)
-                       BLI_rng_free(threads[i].rng_path);
+       if (ctx->clumpcurve != NULL) {
+               curvemapping_free(ctx->clumpcurve);
+       }
+       if (ctx->roughcurve != NULL) {
+               curvemapping_free(ctx->roughcurve);
+       }
+       if (ctx->twistcurve != NULL) {
+               curvemapping_free(ctx->twistcurve);
        }
-
-       MEM_freeN(ctx);
-       MEM_freeN(threads);
 }
 
 static void initialize_particle_texture(ParticleSimulationData *sim, ParticleData *pa, int p)
@@ -1578,7 +564,32 @@ void initialize_particle(ParticleSimulationData *sim, ParticleData *pa)
        /* usage other than straight after distribute has to handle this index by itself - jahka*/
        //pa->num_dmcache = DMCACHE_NOTFOUND; /* assume we don't have a derived mesh face */
 }
+
 static void initialize_all_particles(ParticleSimulationData *sim)
+{
+       ParticleSystem *psys = sim->psys;
+       ParticleSettings *part = psys->part;
+       /* Grid distributionsets UNEXIST flag, need to take care of
+        * it here because later this flag is being reset.
+        *
+        * We can't do it for any distribution, because it'll then
+        * conflict with texture influence, which does not free
+        * unexisting particles and only sets flag.
+        *
+        * It's not so bad, because only grid distribution sets
+        * UNEXIST flag.
+        */
+       const bool emit_from_volume_grid = (part->distr == PART_DISTR_GRID) &&
+                                          (!ELEM(part->from, PART_FROM_VERT, PART_FROM_CHILD));
+       PARTICLE_P;
+       LOOP_PARTICLES {
+               if (!(emit_from_volume_grid && (pa->flag & PARS_UNEXIST) != 0)) {
+                       initialize_particle(sim, pa);
+               }
+       }
+}
+
+static void free_unexisting_particles(ParticleSimulationData *sim)
 {
        ParticleSystem *psys = sim->psys;
        PARTICLE_P;
@@ -1586,14 +597,11 @@ static void initialize_all_particles(ParticleSimulationData *sim)
        psys->totunexist = 0;
 
        LOOP_PARTICLES {
-               if ((pa->flag & PARS_UNEXIST)==0)
-                       initialize_particle(sim, pa);
-
-               if (pa->flag & PARS_UNEXIST)
+               if (pa->flag & PARS_UNEXIST) {
                        psys->totunexist++;
+               }
        }
 
-       /* Free unexisting particles. */
        if (psys->totpart && psys->totunexist == psys->totpart) {
                if (psys->particles->boid)
                        MEM_freeN(psys->particles->boid);
@@ -1625,8 +633,9 @@ static void initialize_all_particles(ParticleSimulationData *sim)
                if (psys->particles->boid) {
                        BoidParticle *newboids = MEM_callocN(psys->totpart * sizeof(BoidParticle), "boid particles");
 
-                       LOOP_PARTICLES
+                       LOOP_PARTICLES {
                                pa->boid = newboids++;
+                       }
 
                }
        }
@@ -1893,7 +902,7 @@ void psys_get_birth_coords(ParticleSimulationData *sim, ParticleData *pa, Partic
                                float q_imat[4];
 
                                mat4_to_quat(q_obmat, ob->obmat);
-                               invert_qt_qt(q_imat, q_obmat);
+                               invert_qt_qt_normalized(q_imat, q_obmat);
 
 
                                if (part->rotmode != PART_ROT_NOR_TAN) {
@@ -1996,7 +1005,7 @@ void reset_particle(ParticleSimulationData *sim, ParticleData *pa, float dtime,
        part=psys->part;
        
        /* get precise emitter matrix if particle is born */
-       if (part->type!=PART_HAIR && dtime > 0.f && pa->time < cfra && pa->time >= sim->psys->cfra) {
+       if (part->type != PART_HAIR && dtime > 0.f && pa->time < cfra && pa->time >= sim->psys->cfra) {
                evaluate_emitter_anim(sim->scene, sim->ob, pa->time);
 
                psys->flag |= PSYS_OB_ANIM_RESTORE;
@@ -2179,7 +1188,7 @@ static void set_keyed_keys(ParticleSimulationData *sim)
                                key->time = pa->time;
                }
 
-               if (psys->flag & PSYS_KEYED_TIMING && pt->duration!=0.0f)
+               if (psys->flag & PSYS_KEYED_TIMING && pt->duration != 0.0f)
                        k++;
 
                ksim.psys->flag |= keyed_flag;
@@ -2214,8 +1223,8 @@ void psys_get_pointcache_start_end(Scene *scene, ParticleSystem *psys, int *sfra
 {
        ParticleSettings *part = psys->part;
 
-       *sfra = MAX2(1, (int)part->sta);
-       *efra = MIN2((int)(part->end + part->lifetime + 1.0f), scene->r.efra);
+       *sfra = max_ii(1, (int)part->sta);
+       *efra = min_ii((int)(part->end + part->lifetime + 1.0f), max_ii(scene->r.pefra, scene->r.efra));
 }
 
 /************************************************/
@@ -2580,13 +1589,15 @@ static void sph_evaluate_func(BVHTree *tree, ParticleSystem **psys, float co[3],
                }
        }
 }
-static void sph_density_accum_cb(void *userdata, int index, float squared_dist)
+static void sph_density_accum_cb(void *userdata, int index, const float co[3], float squared_dist)
 {
        SPHRangeData *pfr = (SPHRangeData *)userdata;
        ParticleData *npa = pfr->npsys->particles + index;
        float q;
        float dist;
 
+       UNUSED_VARS(co);
+
        if (npa == pfr->pa || squared_dist < FLT_EPSILON)
                return;
 
@@ -2745,7 +1756,6 @@ static void sph_force_cb(void *sphdata_v, ParticleKey *state, float *force, floa
                                        temp_spring.delete_flag = 0;
 
                                        /* sph_spring_add is not thread-safe. - z0r */
-#pragma omp critical
                                        sph_spring_add(psys[0], &temp_spring);
                                }
                        }
@@ -2764,7 +1774,7 @@ static void sph_force_cb(void *sphdata_v, ParticleKey *state, float *force, floa
        sphdata->pass++;
 }
 
-static void sphclassical_density_accum_cb(void *userdata, int index, float UNUSED(squared_dist))
+static void sphclassical_density_accum_cb(void *userdata, int index, const float co[3], float UNUSED(squared_dist))
 {
        SPHRangeData *pfr = (SPHRangeData *)userdata;
        ParticleData *npa = pfr->npsys->particles + index;
@@ -2776,7 +1786,7 @@ static void sphclassical_density_accum_cb(void *userdata, int index, float UNUSE
        /* Exclude particles that are more than 2h away. Can't use squared_dist here
         * because it is not accurate enough. Use current state, i.e. the output of
         * basic_integrate() - z0r */
-       sub_v3_v3v3(vec, npa->state.co, pfr->pa->state.co);
+       sub_v3_v3v3(vec, npa->state.co, co);
        rij = len_v3(vec);
        rij_h = rij / pfr->h;
        if (rij_h > 2.0f)
@@ -2795,7 +1805,7 @@ static void sphclassical_density_accum_cb(void *userdata, int index, float UNUSE
        pfr->data[1] += q / npa->sphdensity;
 }
 
-static void sphclassical_neighbour_accum_cb(void *userdata, int index, float UNUSED(squared_dist))
+static void sphclassical_neighbour_accum_cb(void *userdata, int index, const float co[3], float UNUSED(squared_dist))
 {
        SPHRangeData *pfr = (SPHRangeData *)userdata;
        ParticleData *npa = pfr->npsys->particles + index;
@@ -2808,7 +1818,7 @@ static void sphclassical_neighbour_accum_cb(void *userdata, int index, float UNU
        /* Exclude particles that are more than 2h away. Can't use squared_dist here
         * because it is not accurate enough. Use current state, i.e. the output of
         * basic_integrate() - z0r */
-       sub_v3_v3v3(vec, npa->state.co, pfr->pa->state.co);
+       sub_v3_v3v3(vec, npa->state.co, co);
        rij = len_v3(vec);
        rij_h = rij / pfr->h;
        if (rij_h > 2.0f)
@@ -2938,7 +1948,7 @@ static void sphclassical_calc_dens(ParticleData *pa, float UNUSED(dfra), SPHData
        pfr.mass = sphdata->mass;
 
        sph_evaluate_func( NULL, psys, pa->state.co, &pfr, interaction_radius, sphclassical_density_accum_cb);
-       pa->sphdensity = MIN2(MAX2(data[0], fluid->rest_density * 0.9f), fluid->rest_density * 1.1f);
+       pa->sphdensity = min_ff(max_ff(data[0], fluid->rest_density * 0.9f), fluid->rest_density * 1.1f);
 }
 
 void psys_sph_init(ParticleSimulationData *sim, SPHData *sphdata)
@@ -3110,7 +2120,7 @@ static void basic_integrate(ParticleSimulationData *sim, int p, float dfra, floa
        tkey.time=pa->state.time;
 
        if (part->type != PART_HAIR) {
-               if (do_guides(sim->psys->effectors, &tkey, p, time)) {
+               if (do_guides(sim->psys->part, sim->psys->effectors, &tkey, p, time)) {
                        copy_v3_v3(pa->state.co,tkey.co);
                        /* guides don't produce valid velocity */
                        sub_v3_v3v3(pa->state.vel, tkey.co, pa->prev_state.co);
@@ -3175,10 +2185,9 @@ static void basic_rotate(ParticleSettings *part, ParticleData *pa, float dfra, f
  * The algorithm is roughly:
  *  1. Use a BVH tree to search for faces that a particle may collide with.
  *  2. Use Newton's method to find the exact time at which the collision occurs.
- *     http://en.wikipedia.org/wiki/Newton's_method
+ *     https://en.wikipedia.org/wiki/Newton's_method
  *
  ************************************************/
-#define COLLISION_MAX_COLLISIONS       10
 #define COLLISION_MIN_RADIUS 0.001f
 #define COLLISION_MIN_DISTANCE 0.0001f
 #define COLLISION_ZERO 0.00001f
@@ -3315,21 +2324,21 @@ static void collision_point_on_surface(float p[3], ParticleCollisionElement *pce
                }
                case 3:
                {
-                               float p0[3], e1[3], e2[3], nor[3];
+                       float p0[3], e1[3], e2[3], nor[3];
 
-                               sub_v3_v3v3(e1, pce->x1, pce->x0);
-                               sub_v3_v3v3(e2, pce->x2, pce->x0);
-                               sub_v3_v3v3(p0, p, pce->x0);
+                       sub_v3_v3v3(e1, pce->x1, pce->x0);
+                       sub_v3_v3v3(e2, pce->x2, pce->x0);
+                       sub_v3_v3v3(p0, p, pce->x0);
 
-                               cross_v3_v3v3(nor, e1, e2);
-                               normalize_v3(nor);
+                       cross_v3_v3v3(nor, e1, e2);
+                       normalize_v3(nor);
 
-                               if (pce->inv_nor == 1)
-                                       negate_v3(nor);
+                       if (pce->inv_nor == 1)
+                               negate_v3(nor);
 
-                               madd_v3_v3v3fl(co, pce->x0, nor, col->radius);
-                               madd_v3_v3fl(co, e1, pce->uv[0]);
-                               madd_v3_v3fl(co, e2, pce->uv[1]);
+                       madd_v3_v3v3fl(co, pce->x0, nor, col->radius);
+                       madd_v3_v3fl(co, e1, pce->uv[0]);
+                       madd_v3_v3fl(co, e2, pce->uv[1]);
                        break;
                }
        }
@@ -3477,10 +2486,6 @@ static int collision_sphere_to_edges(ParticleCollision *col, float radius, Parti
        int i;
 
        for (i=0; i<3; i++) {
-               /* in case of a quad, no need to check "edge" that goes through face twice */
-               if ((pce->x[3] && i==2))
-                       continue;
-
                cur = edge+i;
                cur->x[0] = pce->x[i]; cur->x[1] = pce->x[(i+1)%3];
                cur->v[0] = pce->v[i]; cur->v[1] = pce->v[(i+1)%3];
@@ -3524,10 +2529,6 @@ static int collision_sphere_to_verts(ParticleCollision *col, float radius, Parti
        int i;
 
        for (i=0; i<3; i++) {
-               /* in case of quad, only check one vert the first time */
-               if (pce->x[3] && i != 1)
-                       continue;
-
                cur = vert+i;
                cur->x[0] = pce->x[i];
                cur->v[0] = pce->v[i];
@@ -3555,58 +2556,42 @@ void BKE_psys_collision_neartest_cb(void *userdata, int index, const BVHTreeRay
 {
        ParticleCollision *col = (ParticleCollision *) userdata;
        ParticleCollisionElement pce;
-       MFace *face = col->md->mfaces + index;
+       const MVertTri *vt = &col->md->tri[index];
        MVert *x = col->md->x;
        MVert *v = col->md->current_v;
        float t = hit->dist/col->original_ray_length;
        int collision = 0;
 
-       pce.x[0] = x[face->v1].co;
-       pce.x[1] = x[face->v2].co;
-       pce.x[2] = x[face->v3].co;
-       pce.x[3] = face->v4 ? x[face->v4].co : NULL;
+       pce.x[0] = x[vt->tri[0]].co;
+       pce.x[1] = x[vt->tri[1]].co;
+       pce.x[2] = x[vt->tri[2]].co;
 
-       pce.v[0] = v[face->v1].co;
-       pce.v[1] = v[face->v2].co;
-       pce.v[2] = v[face->v3].co;
-       pce.v[3] = face->v4 ? v[face->v4].co : NULL;
+       pce.v[0] = v[vt->tri[0]].co;
+       pce.v[1] = v[vt->tri[1]].co;
+       pce.v[2] = v[vt->tri[2]].co;
 
        pce.tot = 3;
        pce.inside = 0;
        pce.index = index;
 
-       /* don't collide with same face again */
-       if (col->hit == col->current && col->pce.index == index && col->pce.tot == 3)
-               return;
-
-       do {
-               collision = collision_sphere_to_tri(col, ray->radius, &pce, &t);
-               if (col->pce.inside == 0) {
-                       collision += collision_sphere_to_edges(col, ray->radius, &pce, &t);
-                       collision += collision_sphere_to_verts(col, ray->radius, &pce, &t);
-               }
-
-               if (collision) {
-                       hit->dist = col->original_ray_length * t;
-                       hit->index = index;
-                               
-                       collision_point_velocity(&col->pce);
-
-                       col->hit = col->current;
-               }
+       collision = collision_sphere_to_tri(col, ray->radius, &pce, &t);
+       if (col->pce.inside == 0) {
+               collision += collision_sphere_to_edges(col, ray->radius, &pce, &t);
+               collision += collision_sphere_to_verts(col, ray->radius, &pce, &t);
+       }
 
-               pce.x[1] = pce.x[2];
-               pce.x[2] = pce.x[3];
-               pce.x[3] = NULL;
+       if (collision) {
+               hit->dist = col->original_ray_length * t;
+               hit->index = index;
 
-               pce.v[1] = pce.v[2];
-               pce.v[2] = pce.v[3];
-               pce.v[3] = NULL;
+               collision_point_velocity(&col->pce);
 
-       } while (pce.x[2]);
+               col->hit = col->current;
+       }
 }
 static int collision_detect(ParticleData *pa, ParticleCollision *col, BVHTreeRayHit *hit, ListBase *colliders)
 {
+       const int raycast_flag = BVH_RAYCAST_DEFAULT & ~(BVH_RAYCAST_WATERTIGHT);
        ColliderCache *coll;
        float ray_dir[3];
 
@@ -3615,7 +2600,7 @@ static int collision_detect(ParticleData *pa, ParticleCollision *col, BVHTreeRay
 
        sub_v3_v3v3(ray_dir, col->co2, col->co1);
        hit->index = -1;
-       hit->dist = col->original_ray_length = len_v3(ray_dir);
+       hit->dist = col->original_ray_length = normalize_v3(ray_dir);
        col->pce.inside = 0;
 
        /* even if particle is stationary we want to check for moving colliders */
@@ -3624,8 +2609,17 @@ static int collision_detect(ParticleData *pa, ParticleCollision *col, BVHTreeRay
                hit->dist = col->original_ray_length = 0.000001f;
 
        for (coll = colliders->first; coll; coll=coll->next) {
-               /* for boids: don't check with current ground object */
-               if (coll->ob == col->skip)
+               /* for boids: don't check with current ground object; also skip if permeated */
+               bool skip = false;
+
+               for (int i = 0; i < col->skip_count; i++) {
+                       if (coll->ob == col->skip[i]) {
+                               skip = true;
+                               break;
+                       }
+               }
+
+               if (skip)
                        continue;
 
                /* particles should not collide with emitter at birth */
@@ -3637,8 +2631,11 @@ static int collision_detect(ParticleData *pa, ParticleCollision *col, BVHTreeRay
                col->fac1 = (col->old_cfra - coll->collmd->time_x) / (coll->collmd->time_xnew - coll->collmd->time_x);
                col->fac2 = (col->cfra - coll->collmd->time_x) / (coll->collmd->time_xnew - coll->collmd->time_x);
 
-               if (col->md && col->md->bvhtree)
-                       BLI_bvhtree_ray_cast(col->md->bvhtree, col->co1, ray_dir, col->radius, hit, BKE_psys_collision_neartest_cb, col);
+               if (col->md && col->md->bvhtree) {
+                       BLI_bvhtree_ray_cast_ex(
+                               col->md->bvhtree, col->co1, ray_dir, col->radius, hit,
+                               BKE_psys_collision_neartest_cb, col, raycast_flag);
+               }
        }
 
        return hit->index >= 0;
@@ -3758,7 +2755,7 @@ static int collision_response(ParticleData *pa, ParticleCollision *col, BVHTreeR
                if (through==0 && ((vc_dot>0.0f && v0_dot>0.0f && vc_dot>v0_dot) || (vc_dot<0.0f && v0_dot<0.0f && vc_dot<v0_dot)))
                        mul_v3_v3fl(v0_nor, pce->nor, vc_dot);
                else if (v0_dot > 0.f)
-                       mul_v3_v3fl(v0_nor, pce->nor, vc_dot + (through ? -1.0f : 1.0f) * v0_dot);
+                       mul_v3_v3fl(v0_nor, pce->nor, vc_dot + v0_dot);
                else
                        mul_v3_v3fl(v0_nor, pce->nor, vc_dot + (through ? 1.0f : -1.0f) * v0_dot);
 
@@ -3813,8 +2810,10 @@ static int collision_response(ParticleData *pa, ParticleCollision *col, BVHTreeR
                col->f = f;
        }
 
-       col->prev = col->hit;
-       col->prev_index = hit->index;
+       /* if permeability random roll succeeded, disable collider for this sim step */
+       if (through) {
+               col->skip[col->skip_count++] = col->hit;
+       }
 
        return 1;
 }
@@ -3875,16 +2874,16 @@ static void collision_check(ParticleSimulationData *sim, int p, float dfra, floa
        if (part->phystype == PART_PHYS_BOIDS && part->boids->options & BOID_ALLOW_LAND) {
                col.boid = 1;
                col.boid_z = pa->state.co[2];
-               col.skip = pa->boid->ground;
+               col.skip[col.skip_count++] = pa->boid->ground;
        }
 
        /* 10 iterations to catch multiple collisions */
-       while (collision_count < COLLISION_MAX_COLLISIONS) {
+       while (collision_count < PARTICLE_COLLISION_MAX_COLLISIONS) {
                if (collision_detect(pa, &col, &hit, sim->colliders)) {
                        
                        collision_count++;
 
-                       if (collision_count == COLLISION_MAX_COLLISIONS)
+                       if (collision_count == PARTICLE_COLLISION_MAX_COLLISIONS)
                                collision_fail(pa, &col);
                        else if (collision_response(pa, &col, &hit, part->flag & PART_DIE_ON_COL, part->flag & PART_ROT_DYN)==0)
                                return;
@@ -3897,7 +2896,7 @@ static void collision_check(ParticleSimulationData *sim, int p, float dfra, floa
 /*                     Hair                                                            */
 /************************************************/
 /* check if path cache or children need updating and do it if needed */
-static void psys_update_path_cache(ParticleSimulationData *sim, float cfra)
+static void psys_update_path_cache(ParticleSimulationData *sim, float cfra, const bool use_render_params)
 {
        ParticleSystem *psys = sim->psys;
        ParticleSettings *part = psys->part;
@@ -3905,7 +2904,7 @@ static void psys_update_path_cache(ParticleSimulationData *sim, float cfra)
        Base *base;
        int distr=0, alloc=0, skip=0;
 
-       if ((psys->part->childtype && psys->totchild != get_psys_tot_child(sim->scene, psys)) || psys->recalc&PSYS_RECALC_RESET)
+       if ((psys->part->childtype && psys->totchild != psys_get_tot_child(sim->scene, psys)) || psys->recalc&PSYS_RECALC_RESET)
                alloc=1;
 
        if (alloc || psys->recalc&PSYS_RECALC_CHILD || (psys->vgroup[PSYS_VG_DENSITY] && (sim->ob && sim->ob->mode & OB_MODE_WEIGHT_PAINT)))
@@ -3915,13 +2914,13 @@ static void psys_update_path_cache(ParticleSimulationData *sim, float cfra)
                if (alloc)
                        realloc_particles(sim, sim->psys->totpart);
 
-               if (get_psys_tot_child(sim->scene, psys)) {
+               if (psys_get_tot_child(sim->scene, psys)) {
                        /* don't generate children while computing the hair keys */
                        if (!(psys->part->type == PART_HAIR) || (psys->flag & PSYS_HAIR_DONE)) {
                                distribute_particles(sim, PART_FROM_CHILD);
 
                                if (part->childtype==PART_CHILD_FACES && part->parents != 0.0f)
-                                       psys_find_parents(sim);
+                                       psys_find_parents(sim, use_render_params);
                        }
                }
                else
@@ -3959,7 +2958,7 @@ static void psys_update_path_cache(ParticleSimulationData *sim, float cfra)
        }
 
        if (!skip) {
-               psys_cache_paths(sim, cfra);
+               psys_cache_paths(sim, cfra, use_render_params);
 
                /* for render, child particle paths are computed on the fly */
                if (part->childtype) {
@@ -3969,135 +2968,249 @@ static void psys_update_path_cache(ParticleSimulationData *sim, float cfra)
                                skip = 1;
 
                        if (!skip)
-                               psys_cache_child_paths(sim, cfra, 0);
+                               psys_cache_child_paths(sim, cfra, 0, use_render_params);
                }
        }
        else if (psys->pathcache)
                psys_free_path_cache(psys, NULL);
 }
 
-static void do_hair_dynamics(ParticleSimulationData *sim)
+static bool psys_hair_use_simulation(ParticleData *pa, float max_length)
 {
-       ParticleSystem *psys = sim->psys;
-       DerivedMesh *dm = psys->hair_in_dm;
-       MVert *mvert = NULL;
-       MEdge *medge = NULL;
-       MDeformVert *dvert = NULL;
+       /* Minimum segment length relative to average length.
+        * Hairs with segments below this length will be excluded from the simulation,
+        * because otherwise the solver will become unstable.
+        * The hair system should always make sure the hair segments have reasonable length ratios,
+        * but this can happen in old files when e.g. cutting hair.
+        */
+       const float min_length = 0.1f * max_length;
+       
        HairKey *key;
-       PARTICLE_P;
-       int totpoint = 0;
-       int totedge;
        int k;
-       float hairmat[4][4];
-       float (*deformedVerts)[3];
-
-       if (!psys->clmd) {
-               psys->clmd = (ClothModifierData*)modifier_new(eModifierType_Cloth);
-               psys->clmd->sim_parms->goalspring = 0.0f;
-               psys->clmd->sim_parms->vel_damping = 1.0f;
-               psys->clmd->sim_parms->flags |= CLOTH_SIMSETTINGS_FLAG_GOAL|CLOTH_SIMSETTINGS_FLAG_NO_SPRING_COMPRESS;
-               psys->clmd->coll_parms->flags &= ~CLOTH_COLLSETTINGS_FLAG_SELF;
-               psys->clmd->coll_parms->flags |= CLOTH_COLLSETTINGS_FLAG_POINTS;
+       
+       if (pa->totkey < 2)
+               return false;
+       
+       for (k=1, key=pa->hair+1; k<pa->totkey; k++,key++) {
+               float length = len_v3v3(key->co, (key-1)->co);
+               if (length < min_length)
+                       return false;
        }
+       
+       return true;
+}
 
-       /* create a dm from hair vertices */
-       LOOP_PARTICLES
-               totpoint += pa->totkey;
-
-       totedge = totpoint;
-       totpoint += psys->totpart;
-
-       if (dm && (totpoint != dm->getNumVerts(dm) || totedge != dm->getNumEdges(dm))) {
-               dm->release(dm);
-               dm = psys->hair_in_dm = NULL;
+static MDeformVert *hair_set_pinning(MDeformVert *dvert, float weight)
+{
+       if (dvert) {
+               if (!dvert->totweight) {
+                       dvert->dw = MEM_callocN(sizeof(MDeformWeight), "deformWeight");
+                       dvert->totweight = 1;
+               }
+               
+               dvert->dw->weight = weight;
+               dvert++;
        }
+       return dvert;
+}
 
+static void hair_create_input_dm(ParticleSimulationData *sim, int totpoint, int totedge, DerivedMesh **r_dm, ClothHairData **r_hairdata)
+{
+       ParticleSystem *psys = sim->psys;
+       ParticleSettings *part = psys->part;
+       DerivedMesh *dm;
+       ClothHairData *hairdata;
+       MVert *mvert;
+       MEdge *medge;
+       MDeformVert *dvert;
+       HairKey *key;
+       PARTICLE_P;
+       int k, hair_index;
+       float hairmat[4][4];
+       float max_length;
+       float hair_radius;
+       
+       dm = *r_dm;
        if (!dm) {
-               dm = psys->hair_in_dm = CDDM_new(totpoint, totedge, 0, 0, 0);
+               *r_dm = dm = CDDM_new(totpoint, totedge, 0, 0, 0);
                DM_add_vert_layer(dm, CD_MDEFORMVERT, CD_CALLOC, NULL);
        }
-
        mvert = CDDM_get_verts(dm);
        medge = CDDM_get_edges(dm);
        dvert = DM_get_vert_data_layer(dm, CD_MDEFORMVERT);
-
+       
+       hairdata = *r_hairdata;
+       if (!hairdata) {
+               *r_hairdata = hairdata = MEM_mallocN(sizeof(ClothHairData) * totpoint, "hair data");
+       }
+       
+       /* calculate maximum segment length */
+       max_length = 0.0f;
+       LOOP_PARTICLES {
+               if (!(pa->flag & PARS_UNEXIST)) {
+                       for (k=1, key=pa->hair+1; k<pa->totkey; k++,key++) {
+                               float length = len_v3v3(key->co, (key-1)->co);
+                               if (max_length < length)
+                                       max_length = length;
+                       }
+               }
+       }
+       
        psys->clmd->sim_parms->vgroup_mass = 1;
-
+       
+       /* XXX placeholder for more flexible future hair settings */
+       hair_radius = part->size;
+       
        /* make vgroup for pin roots etc.. */
-       psys->particles->hair_index = 1;
+       hair_index = 1;
        LOOP_PARTICLES {
-               if (p)
-                       pa->hair_index = (pa-1)->hair_index + (pa-1)->totkey + 1;
+               if (!(pa->flag & PARS_UNEXIST)) {
+                       float root_mat[4][4];
+                       float bending_stiffness;
+                       bool use_hair;
 
-               psys_mat_hair_to_object(sim->ob, sim->psmd->dm, psys->part->from, pa, hairmat);
+                       pa->hair_index = hair_index;
+                       use_hair = psys_hair_use_simulation(pa, max_length);
 
-               for (k=0, key=pa->hair; k<pa->totkey; k++,key++) {
-                       
-                       /* create fake root before actual root to resist bending */
-                       if (k==0) {
-                               float temp[3];
-                               sub_v3_v3v3(temp, key->co, (key+1)->co);
-                               copy_v3_v3(mvert->co, key->co);
-                               add_v3_v3v3(mvert->co, mvert->co, temp);
-                               mul_m4_v3(hairmat, mvert->co);
-                               mvert++;
+                       psys_mat_hair_to_object(sim->ob, sim->psmd->dm_final, psys->part->from, pa, hairmat);
+                       mul_m4_m4m4(root_mat, sim->ob->obmat, hairmat);
+                       normalize_m4(root_mat);
 
-                               medge->v1 = pa->hair_index - 1;
-                               medge->v2 = pa->hair_index;
-                               medge++;
+                       bending_stiffness = CLAMPIS(1.0f - part->bending_random * psys_frand(psys, p + 666), 0.0f, 1.0f);
 
-                               if (dvert) {
-                                       if (!dvert->totweight) {
-                                               dvert->dw = MEM_callocN(sizeof(MDeformWeight), "deformWeight");
-                                               dvert->totweight = 1;
-                                       }
+                       for (k=0, key=pa->hair; k<pa->totkey; k++,key++) {
+                               ClothHairData *hair;
+                               float *co, *co_next;
+
+                               co = key->co;
+                               co_next = (key+1)->co;
 
-                                       dvert->dw->weight = 1.0f;
-                                       dvert++;
+                               /* create fake root before actual root to resist bending */
+                               if (k==0) {
+                                       hair = &psys->clmd->hairdata[pa->hair_index - 1];
+                                       copy_v3_v3(hair->loc, root_mat[3]);
+                                       copy_m3_m4(hair->rot, root_mat);
+
+                                       hair->radius = hair_radius;
+                                       hair->bending_stiffness = bending_stiffness;
+
+                                       add_v3_v3v3(mvert->co, co, co);
+                                       sub_v3_v3(mvert->co, co_next);
+                                       mul_m4_v3(hairmat, mvert->co);
+
+                                       medge->v1 = pa->hair_index - 1;
+                                       medge->v2 = pa->hair_index;
+
+                                       dvert = hair_set_pinning(dvert, 1.0f);
+
+                                       mvert++;
+                                       medge++;
                                }
-                       }
 
-                       copy_v3_v3(mvert->co, key->co);
-                       mul_m4_v3(hairmat, mvert->co);
-                       mvert++;
-                       
-                       if (k) {
-                               medge->v1 = pa->hair_index + k - 1;
-                               medge->v2 = pa->hair_index + k;
-                               medge++;
-                       }
+                               /* store root transform in cloth data */
+                               hair = &psys->clmd->hairdata[pa->hair_index + k];
+                               copy_v3_v3(hair->loc, root_mat[3]);
+                               copy_m3_m4(hair->rot, root_mat);
 
-                       if (dvert) {
-                               if (!dvert->totweight) {
-                                       dvert->dw = MEM_callocN(sizeof(MDeformWeight), "deformWeight");
-                                       dvert->totweight = 1;
+                               hair->radius = hair_radius;
+                               hair->bending_stiffness = bending_stiffness;
+
+                               copy_v3_v3(mvert->co, co);
+                               mul_m4_v3(hairmat, mvert->co);
+
+                               if (k) {
+                                       medge->v1 = pa->hair_index + k - 1;
+                                       medge->v2 = pa->hair_index + k;
                                }
-                               /* roots should be 1.0, the rest can be anything from 0.0 to 1.0 */
-                               dvert->dw->weight = key->weight;
-                               dvert++;
+
+                               /* roots and disabled hairs should be 1.0, the rest can be anything from 0.0 to 1.0 */
+                               if (use_hair)
+                                       dvert = hair_set_pinning(dvert, key->weight);
+                               else
+                                       dvert = hair_set_pinning(dvert, 1.0f);
+
+                               mvert++;
+                               if (k)
+                                       medge++;
                        }
+
+                       hair_index += pa->totkey + 1;
                }
        }
+}
 
+static void do_hair_dynamics(ParticleSimulationData *sim)
+{
+       ParticleSystem *psys = sim->psys;
+       PARTICLE_P;
+       EffectorWeights *clmd_effweights;
+       int totpoint;
+       int totedge;
+       float (*deformedVerts)[3];
+       bool realloc_roots;
+       
+       if (!psys->clmd) {
+               psys->clmd = (ClothModifierData*)modifier_new(eModifierType_Cloth);
+               psys->clmd->sim_parms->goalspring = 0.0f;
+               psys->clmd->sim_parms->vel_damping = 1.0f;
+               psys->clmd->sim_parms->flags |= CLOTH_SIMSETTINGS_FLAG_GOAL|CLOTH_SIMSETTINGS_FLAG_NO_SPRING_COMPRESS;
+               psys->clmd->coll_parms->flags &= ~CLOTH_COLLSETTINGS_FLAG_SELF;
+       }
+       
+       /* count simulated points */
+       totpoint = 0;
+       totedge = 0;
+       LOOP_PARTICLES {
+               if (!(pa->flag & PARS_UNEXIST)) {
+                       /* "out" dm contains all hairs */
+                       totedge += pa->totkey;
+                       totpoint += pa->totkey + 1; /* +1 for virtual root point */
+               }
+       }
+       
+       realloc_roots = false; /* whether hair root info array has to be reallocated */
+       if (psys->hair_in_dm) {
+               DerivedMesh *dm = psys->hair_in_dm;
+               if (totpoint != dm->getNumVerts(dm) || totedge != dm->getNumEdges(dm)) {
+                       dm->release(dm);
+                       psys->hair_in_dm = NULL;
+                       realloc_roots = true;
+               }
+       }
+       
+       if (!psys->hair_in_dm || !psys->clmd->hairdata || realloc_roots) {
+               if (psys->clmd->hairdata) {
+                       MEM_freeN(psys->clmd->hairdata);
+                       psys->clmd->hairdata = NULL;
+               }
+       }
+       
+       hair_create_input_dm(sim, totpoint, totedge, &psys->hair_in_dm, &psys->clmd->hairdata);
+       
        if (psys->hair_out_dm)
                psys->hair_out_dm->release(psys->hair_out_dm);
-
+       
        psys->clmd->point_cache = psys->pointcache;
+       /* for hair sim we replace the internal cloth effector weights temporarily
+        * to use the particle settings
+        */
+       clmd_effweights = psys->clmd->sim_parms->effector_weights;
        psys->clmd->sim_parms->effector_weights = psys->part->effector_weights;
-
-       deformedVerts = MEM_mallocN(sizeof(*deformedVerts) * dm->getNumVerts(dm), "do_hair_dynamics vertexCos");
-       psys->hair_out_dm = CDDM_copy(dm);
+       
+       deformedVerts = MEM_mallocN(sizeof(*deformedVerts) * psys->hair_in_dm->getNumVerts(psys->hair_in_dm), "do_hair_dynamics vertexCos");
+       psys->hair_out_dm = CDDM_copy(psys->hair_in_dm);
        psys->hair_out_dm->getVertCos(psys->hair_out_dm, deformedVerts);
-
-       clothModifier_do(psys->clmd, sim->scene, sim->ob, dm, deformedVerts);
-
+       
+       clothModifier_do(psys->clmd, sim->scene, sim->ob, psys->hair_in_dm, deformedVerts);
+       
        CDDM_apply_vert_coords(psys->hair_out_dm, deformedVerts);
-
+       
        MEM_freeN(deformedVerts);
-
-       psys->clmd->sim_parms->effector_weights = NULL;
+       
+       /* restore cloth effector weights */
+       psys->clmd->sim_parms->effector_weights = clmd_effweights;
 }
-static void hair_step(ParticleSimulationData *sim, float cfra)
+static void hair_step(ParticleSimulationData *sim, float cfra, const bool use_render_params)
 {
        ParticleSystem *psys = sim->psys;
        ParticleSettings *part = psys->part;
@@ -4117,7 +3230,7 @@ static void hair_step(ParticleSimulationData *sim, float cfra)
 
        if (psys->recalc & PSYS_RECALC_RESET) {
                /* need this for changing subsurf levels */
-               psys_calc_dmcache(sim->ob, sim->psmd->dm, psys);
+               psys_calc_dmcache(sim->ob, sim->psmd->dm_final, sim->psmd->dm_deformed, psys);
 
                if (psys->clmd)
                        cloth_free_modifier(psys->clmd);
@@ -4129,7 +3242,7 @@ static void hair_step(ParticleSimulationData *sim, float cfra)
 
        /* following lines were removed r29079 but cause bug [#22811], see report for details */
        psys_update_effectors(sim);
-       psys_update_path_cache(sim, cfra);
+       psys_update_path_cache(sim, cfra, use_render_params);
 
        psys->flag |= PSYS_HAIR_UPDATED;
 }
@@ -4164,7 +3277,7 @@ static void save_hair(ParticleSimulationData *sim, float UNUSED(cfra))
 
                if (pa->totkey) {
                        sub_v3_v3(key->co, root->co);
-                       psys_vec_rot_to_face(sim->psmd->dm, pa, key->co);
+                       psys_vec_rot_to_face(sim->psmd->dm_final, pa, key->co);
                }
 
                key->time = pa->state.time;
@@ -4195,22 +3308,27 @@ static const float TIMESTEP_EXPANSION_TOLERANCE = 1.5f;
  * step, after the velocity has been updated. element_size defines the scale of
  * the simulation, and is typically the distance to neighboring particles. */
 static void update_courant_num(ParticleSimulationData *sim, ParticleData *pa,
-                               float dtime, SPHData *sphdata)
+                               float dtime, SPHData *sphdata, SpinLock *spin)
 {
        float relative_vel[3];
-       float speed;
 
        sub_v3_v3v3(relative_vel, pa->prev_state.vel, sphdata->flow);
-       speed = len_v3(relative_vel);
-       if (sim->courant_num < speed * dtime / sphdata->element_size)
-               sim->courant_num = speed * dtime / sphdata->element_size;
+
+       const float courant_num = len_v3(relative_vel) * dtime / sphdata->element_size;
+       if (sim->courant_num < courant_num) {
+               BLI_spin_lock(spin);
+               if (sim->courant_num < courant_num) {
+                       sim->courant_num = courant_num;
+               }
+               BLI_spin_unlock(spin);
+       }
 }
 static float get_base_time_step(ParticleSettings *part)
 {
        return 1.0f / (float) (part->subframes + 1);
 }
 /* Update time step size to suit current conditions. */
-static float update_timestep(ParticleSystem *psys, ParticleSimulationData *sim, float t_frac)
+static void update_timestep(ParticleSystem *psys, ParticleSimulationData *sim)
 {
        float dt_target;
        if (sim->courant_num == 0.0f)
@@ -4230,7 +3348,10 @@ static float update_timestep(ParticleSystem *psys, ParticleSimulationData *sim,
                psys->dt_frac = interpf(dt_target, psys->dt_frac, TIMESTEP_EXPANSION_FACTOR);
        else
                psys->dt_frac = dt_target;
+}
 
+static float sync_timestep(ParticleSystem *psys, float t_frac)
+{
        /* Sync with frame end if it's close. */
        if (t_frac == 1.0f)
                return psys->dt_frac;
@@ -4243,6 +3364,124 @@ static float update_timestep(ParticleSystem *psys, ParticleSimulationData *sim,
 /************************************************/
 /*                     System Core                                                     */
 /************************************************/
+
+typedef struct DynamicStepSolverTaskData {
+       ParticleSimulationData *sim;
+
+       float cfra;
+       float timestep;
+       float dtime;
+
+       SpinLock spin;
+} DynamicStepSolverTaskData;
+
+static void dynamics_step_sph_ddr_task_cb_ex(
+        void *__restrict userdata,
+        const int p,
+        const ParallelRangeTLS *__restrict tls)
+{
+       DynamicStepSolverTaskData *data = userdata;
+       ParticleSimulationData *sim = data->sim;
+       ParticleSystem *psys = sim->psys;
+       ParticleSettings *part = psys->part;
+
+       SPHData *sphdata = tls->userdata_chunk;
+
+       ParticleData *pa;
+
+       if ((pa = psys->particles + p)->state.time <= 0.0f) {
+               return;
+       }
+
+       /* do global forces & effectors */
+       basic_integrate(sim, p, pa->state.time, data->cfra);
+
+       /* actual fluids calculations */
+       sph_integrate(sim, pa, pa->state.time, sphdata);
+
+       if (sim->colliders)
+               collision_check(sim, p, pa->state.time, data->cfra);
+
+       /* SPH particles are not physical particles, just interpolation
+        * particles,  thus rotation has not a direct sense for them */
+       basic_rotate(part, pa, pa->state.time, data->timestep);
+
+       if (part->time_flag & PART_TIME_AUTOSF) {
+               update_courant_num(sim, pa, data->dtime, sphdata, &data->spin);
+       }
+}
+
+static void dynamics_step_sph_classical_basic_integrate_task_cb_ex(
+        void *__restrict userdata, 
+        const int p,
+        const ParallelRangeTLS *__restrict UNUSED(tls))
+{
+       DynamicStepSolverTaskData *data = userdata;
+       ParticleSimulationData *sim = data->sim;
+       ParticleSystem *psys = sim->psys;
+
+       ParticleData *pa;
+
+       if ((pa = psys->particles + p)->state.time <= 0.0f) {
+               return;
+       }
+
+       basic_integrate(sim, p, pa->state.time, data->cfra);
+}
+
+static void dynamics_step_sph_classical_calc_density_task_cb_ex(
+        void *__restrict userdata,
+        const int p,
+        const ParallelRangeTLS *__restrict tls)
+{
+       DynamicStepSolverTaskData *data = userdata;
+       ParticleSimulationData *sim = data->sim;
+       ParticleSystem *psys = sim->psys;
+
+       SPHData *sphdata = tls->userdata_chunk;
+
+       ParticleData *pa;
+
+       if ((pa = psys->particles + p)->state.time <= 0.0f) {
+               return;
+       }
+
+       sphclassical_calc_dens(pa, pa->state.time, sphdata);
+}
+
+static void dynamics_step_sph_classical_integrate_task_cb_ex(
+        void *__restrict userdata,
+        const int p,
+        const ParallelRangeTLS *__restrict tls)
+{
+       DynamicStepSolverTaskData *data = userdata;
+       ParticleSimulationData *sim = data->sim;
+       ParticleSystem *psys = sim->psys;
+       ParticleSettings *part = psys->part;
+
+       SPHData *sphdata = tls->userdata_chunk;
+
+       ParticleData *pa;
+
+       if ((pa = psys->particles + p)->state.time <= 0.0f) {
+               return;
+       }
+
+       /* actual fluids calculations */
+       sph_integrate(sim, pa, pa->state.time, sphdata);
+
+       if (sim->colliders)
+               collision_check(sim, p, pa->state.time, data->cfra);
+
+       /* SPH particles are not physical particles, just interpolation
+        * particles,  thus rotation has not a direct sense for them */
+       basic_rotate(part, pa, pa->state.time, data->timestep);
+
+       if (part->time_flag & PART_TIME_AUTOSF) {
+               update_courant_num(sim, pa, data->dtime, sphdata, &data->spin);
+       }
+}
+
 /* unbaked particles are calculated dynamically */
 static void dynamics_step(ParticleSimulationData *sim, float cfra)
 {
@@ -4282,7 +3521,7 @@ static void dynamics_step(ParticleSimulationData *sim, float cfra)
        psys_update_effectors(sim);
 
        if (part->type != PART_HAIR)
-               sim->colliders = get_collider_cache(sim->scene, sim->ob, NULL);
+               sim->colliders = get_collider_cache(sim->scene, sim->ob, part->collision_group);
 
        /* initialize physics type specific stuff */
        switch (part->phystype) {
@@ -4301,8 +3540,10 @@ static void dynamics_step(ParticleSimulationData *sim, float cfra)
                        boids_precalc_rules(part, cfra);
 
                        for (; pt; pt=pt->next) {
-                               if (pt->ob)
-                                       psys_update_particle_tree(BLI_findlink(&pt->ob->particlesystem, pt->psys-1), cfra);
+                               ParticleSystem *psys_target = psys_get_target_system(sim->ob, pt);
+                               if (psys_target && psys_target != psys) {
+                                       psys_update_particle_tree(psys_target, cfra);
+                               }
                        }
                        break;
                }
@@ -4397,34 +3638,30 @@ static void dynamics_step(ParticleSimulationData *sim, float cfra)
                case PART_PHYS_FLUID:
                {
                        SPHData sphdata;
-                       ParticleSettings *part = sim->psys->part;
                        psys_sph_init(sim, &sphdata);
 
+                       DynamicStepSolverTaskData task_data = {
+                           .sim = sim, .cfra = cfra, .timestep = timestep, .dtime = dtime,
+                       };
+
+                       BLI_spin_init(&task_data.spin);
+
                        if (part->fluid->solver == SPH_SOLVER_DDR) {
                                /* Apply SPH forces using double-density relaxation algorithm
                                 * (Clavat et. al.) */
-#pragma omp parallel for firstprivate (sphdata) private (pa) schedule(dynamic,5)
-                               LOOP_DYNAMIC_PARTICLES {
-                                       /* do global forces & effectors */
-                                       basic_integrate(sim, p, pa->state.time, cfra);
-
-                                       /* actual fluids calculations */
-                                       sph_integrate(sim, pa, pa->state.time, &sphdata);
 
-                                       if (sim->colliders)
-                                               collision_check(sim, p, pa->state.time, cfra);
-
-                                       /* SPH particles are not physical particles, just interpolation
-                                        * particles,  thus rotation has not a direct sense for them */
-                                       basic_rotate(part, pa, pa->state.time, timestep);
-
-#pragma omp critical
-                                       if (part->time_flag & PART_TIME_AUTOSF)
-                                               update_courant_num(sim, pa, dtime, &sphdata);
-                               }
+                               ParallelRangeSettings settings;
+                               BLI_parallel_range_settings_defaults(&settings);
+                               settings.use_threading = (psys->totpart > 100);
+                               settings.userdata_chunk = &sphdata;
+                               settings.userdata_chunk_size = sizeof(sphdata);
+                               BLI_task_parallel_range(
+                                       0, psys->totpart,
+                                       &task_data,
+                                       dynamics_step_sph_ddr_task_cb_ex,
+                                       &settings);
 
                                sph_springs_modify(psys, timestep);
-
                        }
                        else {
                                /* SPH_SOLVER_CLASSICAL */
@@ -4432,36 +3669,50 @@ static void dynamics_step(ParticleSimulationData *sim, float cfra)
                                 * and Monaghan). Note that, unlike double-density relaxation,
                                 * this algorithm is separated into distinct loops. */
 
-#pragma omp parallel for private (pa) schedule(dynamic,5)
-                               LOOP_DYNAMIC_PARTICLES {
-                                       basic_integrate(sim, p, pa->state.time, cfra);
+                               {
+                                       ParallelRangeSettings settings;
+                                       BLI_parallel_range_settings_defaults(&settings);
+                                       settings.use_threading = (psys->totpart > 100);
+                                       BLI_task_parallel_range(
+                                               0, psys->totpart,
+                                               &task_data,
+                                               dynamics_step_sph_classical_basic_integrate_task_cb_ex,
+                                               &settings);
                                }
 
                                /* calculate summation density */
-#pragma omp parallel for firstprivate (sphdata) private (pa) schedule(dynamic,5)
-                               LOOP_DYNAMIC_PARTICLES {
-                                       sphclassical_calc_dens(pa, pa->state.time, &sphdata);
+                               /* Note that we could avoid copying sphdata for each thread here (it's only read here),
+                                * but doubt this would gain us anything except confusion... */
+                               {
+                               ParallelRangeSettings settings;
+                               BLI_parallel_range_settings_defaults(&settings);
+                               settings.use_threading = (psys->totpart > 100);
+                               settings.userdata_chunk = &sphdata;
+                               settings.userdata_chunk_size = sizeof(sphdata);
+                                       BLI_task_parallel_range(
+                                               0, psys->totpart,
+                                               &task_data,
+                                               dynamics_step_sph_classical_calc_density_task_cb_ex,
+                                               &settings);
                                }
 
                                /* do global forces & effectors */
-#pragma omp parallel for firstprivate (sphdata) private (pa) schedule(dynamic,5)
-                               LOOP_DYNAMIC_PARTICLES {
-                                       /* actual fluids calculations */
-                                       sph_integrate(sim, pa, pa->state.time, &sphdata);
-
-                                       if (sim->colliders)
-                                               collision_check(sim, p, pa->state.time, cfra);
-                               
-                                       /* SPH particles are not physical particles, just interpolation
-                                        * particles,  thus rotation has not a direct sense for them */
-                                       basic_rotate(part, pa, pa->state.time, timestep);
-
-#pragma omp critical
-                                       if (part->time_flag & PART_TIME_AUTOSF)
-                                               update_courant_num(sim, pa, dtime, &sphdata);
+                               {
+                               ParallelRangeSettings settings;
+                               BLI_parallel_range_settings_defaults(&settings);
+                               settings.use_threading = (psys->totpart > 100);
+                               settings.userdata_chunk = &sphdata;
+                               settings.userdata_chunk_size = sizeof(sphdata);
+                                       BLI_task_parallel_range(
+                                               0, psys->totpart,
+                                               &task_data,
+                                               dynamics_step_sph_classical_integrate_task_cb_ex,
+                                               &settings);
                                }
                        }
 
+                       BLI_spin_end(&task_data.spin);
+
                        psys_sph_finalise(&sphdata);
                        break;
                }
@@ -4486,7 +3737,7 @@ static void update_children(ParticleSimulationData *sim)
        /* don't generate children while growing hair - waste of time */
                psys_free_children(sim->psys);
        else if (sim->psys->part->childtype) {
-               if (sim->psys->totchild != get_psys_tot_child(sim->scene, sim->psys))
+               if (sim->psys->totchild != psys_get_tot_child(sim->scene, sim->psys))
                        distribute_particles(sim, PART_FROM_CHILD);
                else {
                        /* Children are up to date, nothing to do. */
@@ -4541,7 +3792,7 @@ static void cached_step(ParticleSimulationData *sim, float cfra)
        }
 }
 
-static void particles_fluid_step(ParticleSimulationData *sim, int UNUSED(cfra))
+static void particles_fluid_step(ParticleSimulationData *sim, int UNUSED(cfra), const bool use_render_params)
 {      
        ParticleSystem *psys = sim->psys;
        if (psys->particles) {
@@ -4584,7 +3835,7 @@ static void particles_fluid_step(ParticleSimulationData *sim, int UNUSED(cfra))
                        }
        
                        gzread(gzf, &totpart, sizeof(totpart));
-                       totpart = (G.is_rendering)?totpart:(part->disp*totpart) / 100;
+                       totpart = (use_render_params) ? totpart:(part->disp*totpart) / 100;
                        
                        part->totpart= totpart;
                        part->sta=part->end = 1.0f;
@@ -4645,6 +3896,8 @@ static void particles_fluid_step(ParticleSimulationData *sim, int UNUSED(cfra))
                        
                } // fluid sim particles done
        }
+#else
+       UNUSED_VARS(use_render_params);
 #endif // WITH_MOD_FLUID
 }
 
@@ -4666,7 +3919,7 @@ static int emit_particles(ParticleSimulationData *sim, PTCacheID *pid, float UNU
  * 2. Check cache (if used) and return if frame is cached
  * 3. Do dynamics
  * 4. Save to cache */
-static void system_step(ParticleSimulationData *sim, float cfra)
+static void system_step(ParticleSimulationData *sim, float cfra, const bool use_render_params)
 {
        ParticleSystem *psys = sim->psys;
        ParticleSettings *part = psys->part;
@@ -4689,8 +3942,8 @@ static void system_step(ParticleSimulationData *sim, float cfra)
                
                BKE_ptcache_id_time(pid, sim->scene, 0.0f, &startframe, &endframe, NULL);
 
-               /* clear everythin on start frame */
-               if (cfra == startframe) {
+               /* clear everything on start frame, or when psys needs full reset! */
+               if ((cfra == startframe) || (psys->recalc & PSYS_RECALC_RESET)) {
                        BKE_ptcache_id_reset(sim->scene, pid, PTCACHE_RESET_OUTDATED);
                        BKE_ptcache_validate(cache, startframe);
                        cache->flag &= ~PTCACHE_REDO_NEEDED;
@@ -4706,6 +3959,7 @@ static void system_step(ParticleSimulationData *sim, float cfra)
                initialize_all_particles(sim);
                /* reset only just created particles (on startframe all particles are recreated) */
                reset_all_particles(sim, 0.0, cfra, oldtotpart);
+               free_unexisting_particles(sim);
 
                if (psys->fluid_springs) {
                        MEM_freeN(psys->fluid_springs);
@@ -4722,12 +3976,12 @@ static void system_step(ParticleSimulationData *sim, float cfra)
 
 /* 2. try to read from the cache */
        if (pid) {
-               int cache_result = BKE_ptcache_read(pid, cache_cfra);
+               int cache_result = BKE_ptcache_read(pid, cache_cfra, true);
 
                if (ELEM(cache_result, PTCACHE_READ_EXACT, PTCACHE_READ_INTERPOLATED)) {
                        cached_step(sim, cfra);
                        update_children(sim);
-                       psys_update_path_cache(sim, cfra);
+                       psys_update_path_cache(sim, cfra, use_render_params);
 
                        BKE_ptcache_validate(cache, (int)cache_cfra);
 
@@ -4797,7 +4051,9 @@ static void system_step(ParticleSimulationData *sim, float cfra)
                                printf("%f,%f,%f,%f\n", cfra+dframe+t_frac - 1.f, t_frac, dt_frac, sim->courant_num);
 #endif
                                if (part->time_flag & PART_TIME_AUTOSF)
-                                       dt_frac = update_timestep(psys, sim, t_frac);
+                                       update_timestep(psys, sim);
+                               /* Even without AUTOSF dt_frac may not add up to 1.0 due to float precision. */
+                               dt_frac = sync_timestep(psys, t_frac);
                        }
                }
        }
@@ -4945,7 +4201,7 @@ static int hair_needs_recalc(ParticleSystem *psys)
 
 /* main particle update call, checks that things are ok on the large scale and
  * then advances in to actual particle calculations depending on particle type */
-void particle_system_update(Scene *scene, Object *ob, ParticleSystem *psys)
+void particle_system_update(Scene *scene, Object *ob, ParticleSystem *psys, const bool use_render_params)
 {
        ParticleSimulationData sim= {0};
        ParticleSettings *part = psys->part;
@@ -4954,7 +4210,7 @@ void particle_system_update(Scene *scene, Object *ob, ParticleSystem *psys)
        /* drawdata is outdated after ANY change */
        if (psys->pdd) psys->pdd->flag &= ~PARTICLE_DRAW_DATA_UPDATED;
 
-       if (!psys_check_enabled(ob, psys))
+       if (!psys_check_enabled(ob, psys, use_render_params))
                return;
 
        cfra= BKE_scene_frame_get(scene);
@@ -4972,11 +4228,11 @@ void particle_system_update(Scene *scene, Object *ob, ParticleSystem *psys)
                        return;
        }
 
-       if (!sim.psmd->dm)
+       if (!sim.psmd->dm_final)
                return;
 
        if (part->from != PART_FROM_VERT) {
-               DM_ensure_tessface(sim.psmd->dm);
+               DM_ensure_tessface(sim.psmd->dm_final);
        }
 
        /* execute drivers only, as animation has already been done */
@@ -5023,7 +4279,7 @@ void particle_system_update(Scene *scene, Object *ob, ParticleSystem *psys)
                                        hcfra=100.0f*(float)i/(float)psys->part->hair_step;
                                        if ((part->flag & PART_HAIR_REGROW)==0)
                                                BKE_animsys_evaluate_animdata(scene, &part->id, part->adt, hcfra, ADT_RECALC_ANIM);
-                                       system_step(&sim, hcfra);
+                                       system_step(&sim, hcfra, use_render_params);
                                        psys->cfra = hcfra;
                                        psys->recalc = 0;
                                        save_hair(&sim, hcfra);
@@ -5036,12 +4292,12 @@ void particle_system_update(Scene *scene, Object *ob, ParticleSystem *psys)
                                psys->flag |= PSYS_HAIR_DONE;
 
                        if (psys->flag & PSYS_HAIR_DONE)
-                               hair_step(&sim, cfra);
+                               hair_step(&sim, cfra, use_render_params);
                        break;
                }
                case PART_FLUID:
                {
-                       particles_fluid_step(&sim, (int)cfra);
+                       particles_fluid_step(&sim, (int)cfra, use_render_params);
                        break;
                }
                default:
@@ -5052,6 +4308,7 @@ void particle_system_update(Scene *scene, Object *ob, ParticleSystem *psys)
                                {
                                        PARTICLE_P;
                                        float disp = psys_get_current_display_percentage(psys);
+                                       bool free_unexisting = false;
 
                                        /* Particles without dynamics haven't been reset yet because they don't use pointcache */
                                        if (psys->recalc & PSYS_RECALC_RESET)
@@ -5061,6 +4318,7 @@ void particle_system_update(Scene *scene, Object *ob, ParticleSystem *psys)
                                                free_keyed_keys(psys);
                                                distribute_particles(&sim, part->from);
                                                initialize_all_particles(&sim);
+                                               free_unexisting = true;
 
                                                /* flag for possible explode modifiers after this system */
                                                sim.psmd->flag |= eParticleSystemFlag_Pars;
@@ -5079,17 +4337,21 @@ void particle_system_update(Scene *scene, Object *ob, ParticleSystem *psys)
                                                        pa->flag &= ~PARS_NO_DISP;
                                        }
 
+                                       /* free unexisting after reseting particles */
+                                       if (free_unexisting)
+                                               free_unexisting_particles(&sim);
+
                                        if (part->phystype == PART_PHYS_KEYED) {
                                                psys_count_keyed_targets(&sim);
                                                set_keyed_keys(&sim);
-                                               psys_update_path_cache(&sim,(int)cfra);
+                                               psys_update_path_cache(&sim, (int)cfra, use_render_params);
                                        }
                                        break;
                                }
                                default:
                                {
                                        /* the main dynamic particle system step */
-                                       system_step(&sim, cfra);
+                                       system_step(&sim, cfra, use_render_params);
                                        break;
                                }
                        }
@@ -5111,3 +4373,40 @@ void particle_system_update(Scene *scene, Object *ob, ParticleSystem *psys)
                invert_m4_m4(psys->imat, ob->obmat);
 }
 
+/* ID looper */
+
+void BKE_particlesystem_id_loop(ParticleSystem *psys, ParticleSystemIDFunc func, void *userdata)
+{
+       ParticleTarget *pt;
+
+       func(psys, (ID **)&psys->part, userdata, IDWALK_CB_USER | IDWALK_CB_NEVER_NULL);
+       func(psys, (ID **)&psys->target_ob, userdata, IDWALK_CB_NOP);
+       func(psys, (ID **)&psys->parent, userdata, IDWALK_CB_NOP);
+
+       for (pt = psys->targets.first; pt; pt = pt->next) {
+               func(psys, (ID **)&pt->ob, userdata, IDWALK_CB_NOP);
+       }
+
+       /* Even though psys->part should never be NULL, this can happen as an exception during deletion.
+        * See ID_REMAP_SKIP/FORCE/FLAG_NEVER_NULL_USAGE in BKE_library_remap. */
+       if (psys->part && psys->part->phystype == PART_PHYS_BOIDS) {
+               ParticleData *pa;
+               int p;
+
+               for (p = 0, pa = psys->particles; p < psys->totpart; p++, pa++) {
+                       func(psys, (ID **)&pa->boid->ground, userdata, IDWALK_CB_NOP);
+               }
+       }
+}
+
+/* **** Depsgraph evaluation **** */
+
+void BKE_particle_system_eval_init(EvaluationContext *UNUSED(eval_ctx),
+                                   Scene *scene,
+                                   Object *ob)
+{
+       if (G.debug & G_DEBUG_DEPSGRAPH_EVAL) {
+               printf("%s on %s\n", __func__, ob->id.name);
+       }
+       BKE_ptcache_object_reset(scene, ob, PTCACHE_RESET_DEPSGRAPH);
+}