Fluids: improve multithreaded CPU usage.
authorBrecht Van Lommel <brechtvanlommel@gmail.com>
Sun, 2 Oct 2016 13:43:02 +0000 (15:43 +0200)
committerBrecht Van Lommel <brechtvanlommel@gmail.com>
Sun, 2 Oct 2016 14:38:14 +0000 (16:38 +0200)
Fixes for clamp-omp, fewer shared variables, fix some cases of threads writing
to the same memory location. Issue found by Jens Verwiebe, who reports 30%
speedup with 16 core CPU, when using this with a recent clang-omp version.

intern/elbeem/intern/solver_class.h
intern/elbeem/intern/solver_main.cpp

index 593fea1b998b657db28d44accc6741b714e8573f..2b2e214458da8479d677f870d941fd8d836e20ee 100644 (file)
@@ -332,7 +332,7 @@ class LbmFsgrSolver :
                void debugMarkCellCall(int level, int vi,int vj,int vk);
                
                // loop over grid, stream&collide update
-               void mainLoop(int lev);
+               void mainLoop(const int lev);
                // change time step size
                void adaptTimestep();
                //! init mObjectSpeeds for current parametrization
index 55a8d3eb4aadb267ebfc269f92190c7c0c87dbb6..a338bb77b4c076c74562223418bcf7de6d181c89 100644 (file)
@@ -355,7 +355,7 @@ void LbmFsgrSolver::fineAdvance()
 //! fine step function
 /*****************************************************************************/
 void 
-LbmFsgrSolver::mainLoop(int lev)
+LbmFsgrSolver::mainLoop(const int lev)
 {
        // loops over _only inner_ cells  -----------------------------------------------------------------------------------
        
@@ -376,13 +376,16 @@ LbmFsgrSolver::mainLoop(int lev)
   // main loop region
        const bool doReduce = true;
        const int gridLoopBound=1;
+       const int gDebugLevel = ::gDebugLevel;
+       int calcNumInvIfCells = 0;
+       LbmFloat calcInitialMass = 0;
        GRID_REGION_INIT();
 #if PARALLEL==1
-#pragma omp parallel default(shared) num_threads(mNumOMPThreads) \
+#pragma omp parallel default(none) num_threads(mNumOMPThreads) \
   reduction(+: \
          calcCurrentMass,calcCurrentVolume, \
                calcCellsFilled,calcCellsEmptied, \
-               calcNumUsedCells )
+               calcNumUsedCells,calcNumInvIfCells,calcInitialMass)
        GRID_REGION_START();
 #else // PARALLEL==1
        GRID_REGION_START();
@@ -468,7 +471,7 @@ LbmFsgrSolver::mainLoop(int lev)
                                calcCurrentMass += iniRho; 
                                calcCurrentVolume += 1.0; 
                                calcNumUsedCells++;
-                               mInitialMass += iniRho;
+                               calcInitialMass += iniRho;
                                // dont treat cell until next step
                                continue;
                        } 
@@ -479,7 +482,7 @@ LbmFsgrSolver::mainLoop(int lev)
                        if(isnotValid) {
                                // remove fluid cells, shouldnt be here anyway
                                LbmFloat fluidRho = m[0]; FORDF1 { fluidRho += m[l]; }
-                               mInitialMass -= fluidRho;
+                               calcInitialMass -= fluidRho;
                                const LbmFloat iniRho = 0.0;
                                RAC(tcel, dMass) = RAC(tcel, dFfrac) = iniRho;
                                RAC(tcel, dFlux) = FLUX_INIT;
@@ -608,8 +611,8 @@ LbmFsgrSolver::mainLoop(int lev)
                // read distribution funtions of adjacent cells = stream step
                DEFAULT_STREAM;
 
-               if((nbored & CFFluid)==0) { newFlag |= CFNoNbFluid; mNumInvIfCells++; }
-               if((nbored & CFEmpty)==0) { newFlag |= CFNoNbEmpty; mNumInvIfCells++; }
+               if((nbored & CFFluid)==0) { newFlag |= CFNoNbFluid; calcNumInvIfCells++; }
+               if((nbored & CFEmpty)==0) { newFlag |= CFNoNbEmpty; calcNumInvIfCells++; }
 
                // calculate mass exchange for interface cells 
                LbmFloat myfrac = RAC(ccel,dFfrac);
@@ -809,7 +812,7 @@ LbmFsgrSolver::mainLoop(int lev)
                        // fill if cells in inflow region
                        if(myfrac<0.5) { 
                                mass += 0.25; 
-                               mInitialMass += 0.25;
+                               calcInitialMass += 0.25;
                        }
                        const int OId = oldFlag>>24;
                        const LbmVec vel(mObjectSpeeds[OId]);
@@ -1013,7 +1016,7 @@ LbmFsgrSolver::mainLoop(int lev)
                if( (mass) <= (rho * (   -FSGR_MAGICNR)) ) { ifemptied = 1; }
 
                if(oldFlag & (CFMbndOutflow)) {
-                       mInitialMass -= mass;
+                       calcInitialMass -= mass;
                        mass = myfrac = 0.0;
                        iffilled = 0; ifemptied = 1;
                }
@@ -1105,6 +1108,8 @@ LbmFsgrSolver::mainLoop(int lev)
        mNumFilledCells  = calcCellsFilled;
        mNumEmptiedCells = calcCellsEmptied;
        mNumUsedCells = calcNumUsedCells;
+       mNumInvIfCells += calcNumInvIfCells;
+       mInitialMass += calcInitialMass;
 }
 
 
@@ -1115,13 +1120,14 @@ LbmFsgrSolver::preinitGrids()
        const int lev = mMaxRefine;
        const bool doReduce = false;
        const int gridLoopBound=0;
+       const int gDebugLevel = ::gDebugLevel;
 
        // preinit both grids
        for(int s=0; s<2; s++) {
        
                GRID_REGION_INIT();
 #if PARALLEL==1
-#pragma omp parallel default(shared) num_threads(mNumOMPThreads) \
+#pragma omp parallel default(none) num_threads(mNumOMPThreads) \
   reduction(+: \
          calcCurrentMass,calcCurrentVolume, \
                calcCellsFilled,calcCellsEmptied, \
@@ -1155,10 +1161,11 @@ LbmFsgrSolver::standingFluidPreinit()
        const int lev = mMaxRefine;
        const bool doReduce = false;
        const int gridLoopBound=1;
+       const int gDebugLevel = ::gDebugLevel;
 
        GRID_REGION_INIT();
 #if PARALLEL==1
-#pragma omp parallel default(shared) num_threads(mNumOMPThreads) \
+#pragma omp parallel default(none) num_threads(mNumOMPThreads) \
   reduction(+: \
          calcCurrentMass,calcCurrentVolume, \
                calcCellsFilled,calcCellsEmptied, \