Merging r58475 through r58700 from trunk into soc-2013-depsgraph_mt
[blender.git] / source / blender / editors / sculpt_paint / paint_image_proj.c
index bf897913b6c752b64ddd18bcf91ce4e23eee644f..db55dc271f1f8d6a64d8e3a6c81b84b4d92dbfce 100644 (file)
 #  include "BLI_winstuff.h"
 #endif
 
-#include "BLI_math.h"
 #include "BLI_blenlib.h"
 #include "BLI_linklist.h"
+#include "BLI_math.h"
+#include "BLI_math_color_blend.h"
 #include "BLI_memarena.h"
 #include "BLI_threads.h"
 #include "BLI_utildefines.h"
@@ -74,7 +75,7 @@
 #include "BKE_scene.h"
 #include "BKE_colortools.h"
 
-#include "BKE_tessmesh.h"
+#include "BKE_editmesh.h"
 
 #include "BIF_gl.h"
 #include "BIF_glutil.h"
@@ -88,6 +89,8 @@
 #include "ED_view3d.h"
 #include "ED_mesh.h"
 
+#include "GPU_extensions.h"
+
 #include "WM_api.h"
 #include "WM_types.h"
 
@@ -108,26 +111,6 @@ BLI_INLINE unsigned char f_to_char(const float val)
        return FTOCHAR(val);
 }
 
-#define IMAPAINT_FLOAT_RGBA_TO_CHAR(c, f)  {                                  \
-       (c)[0] = f_to_char((f)[0]);                                               \
-       (c)[1] = f_to_char((f)[1]);                                               \
-       (c)[2] = f_to_char((f)[2]);                                               \
-       (c)[3] = f_to_char((f)[3]);                                               \
-} (void)0
-
-#define IMAPAINT_CHAR_RGBA_TO_FLOAT(f, c)  {                                  \
-       (f)[0] = IMAPAINT_CHAR_TO_FLOAT((c)[0]);                                  \
-       (f)[1] = IMAPAINT_CHAR_TO_FLOAT((c)[1]);                                  \
-       (f)[2] = IMAPAINT_CHAR_TO_FLOAT((c)[2]);                                  \
-       (f)[3] = IMAPAINT_CHAR_TO_FLOAT((c)[3]);                                  \
-} (void)0
-
-#define IMAPAINT_FLOAT_RGB_TO_CHAR(c, f)  {                                   \
-       (c)[0] = f_to_char((f)[0]);                                               \
-       (c)[1] = f_to_char((f)[1]);                                               \
-       (c)[2] = f_to_char((f)[2]);                                               \
-} (void)0
-
 /* ProjectionPaint defines */
 
 /* approx the number of buckets to have under the brush,
@@ -262,10 +245,12 @@ typedef struct ProjPaintState {
        float normal_angle_inner;
        float normal_angle_range;       /* difference between normal_angle and normal_angle_inner, for easy access */
 
-       short is_ortho;
+       bool do_face_sel;               /* quick access to (me->editflag & ME_EDIT_PAINT_FACE_SEL) */
+       bool is_ortho;
        bool do_masking;              /* use masking during painting. Some operations such as airbrush may disable */
        bool is_texbrush;              /* only to avoid running  */
-       bool is_maskbrush;
+       bool is_maskbrush;            /* mask brush is applied before masking */
+       bool is_maskbrush_tiled;      /* mask brush is applied after masking */
 #ifndef PROJ_DEBUG_NOSEAMBLEED
        float seam_bleed_px;
 #endif
@@ -281,12 +266,14 @@ typedef struct ProjPaintState {
        Image *reproject_image;
        ImBuf *reproject_ibuf;
 
-
        /* threads */
        int thread_tot;
        int bucketMin[2];
        int bucketMax[2];
        int context_bucket_x, context_bucket_y; /* must lock threads while accessing these */
+
+       /* redraw */
+       bool need_redraw;
 } ProjPaintState;
 
 typedef union pixelPointer {
@@ -307,8 +294,8 @@ typedef struct ProjPixel {
        /* Only used when the airbrush is disabled.
         * Store the max mask value to avoid painting over an area with a lower opacity
         * with an advantage that we can avoid touching the pixel at all, if the
-        * new mask value is lower then mask_max */
-       unsigned short mask_max;
+        * new mask value is lower then mask_accum */
+       unsigned short mask_accum;
 
        /* for various reasons we may want to mask out painting onto this pixel */
        unsigned short mask;
@@ -513,7 +500,7 @@ static int project_paint_PickFace(const ProjPaintState *ps, const float pt[2], f
 }
 
 /* Converts a uv coord into a pixel location wrapping if the uv is outside 0-1 range */
-static void uvco_to_wrapped_pxco(float uv[2], int ibuf_x, int ibuf_y, float *x, float *y)
+static void uvco_to_wrapped_pxco(const float uv[2], int ibuf_x, int ibuf_y, float *x, float *y)
 {
        /* use */
        *x = (float)fmodf(uv[0], 1.0f);
@@ -568,7 +555,7 @@ static int project_paint_PickColor(const ProjPaintState *ps, const float pt[2],
                        else {
                                float rgba_tmp_f[4];
                                bilinear_interpolation_color_wrap(ibuf, NULL, rgba_tmp_f, x, y);
-                               IMAPAINT_FLOAT_RGBA_TO_CHAR(rgba, rgba_tmp_f);
+                               premul_float_to_straight_uchar(rgba, rgba_tmp_f);
                        }
                }
                else {
@@ -578,7 +565,7 @@ static int project_paint_PickColor(const ProjPaintState *ps, const float pt[2],
                        else {
                                unsigned char rgba_tmp[4];
                                bilinear_interpolation_color_wrap(ibuf, rgba_tmp, NULL, x, y);
-                               IMAPAINT_CHAR_RGBA_TO_FLOAT(rgba_fp, rgba_tmp);
+                               straight_uchar_to_premul_float(rgba_fp, rgba_tmp);
                        }
                }
        }
@@ -597,7 +584,7 @@ static int project_paint_PickColor(const ProjPaintState *ps, const float pt[2],
                if (rgba) {
                        if (ibuf->rect_float) {
                                float *rgba_tmp_fp = ibuf->rect_float + (xi + yi * ibuf->x * 4);
-                               IMAPAINT_FLOAT_RGBA_TO_CHAR(rgba, rgba_tmp_fp);
+                               premul_float_to_straight_uchar(rgba, rgba_tmp_fp);
                        }
                        else {
                                *((unsigned int *)rgba) = *(unsigned int *)(((char *)ibuf->rect) + ((xi + yi * ibuf->x) * 4));
@@ -609,8 +596,8 @@ static int project_paint_PickColor(const ProjPaintState *ps, const float pt[2],
                                copy_v4_v4(rgba_fp, (ibuf->rect_float + ((xi + yi * ibuf->x) * 4)));
                        }
                        else {
-                               char *tmp_ch = ((char *)ibuf->rect) + ((xi + yi * ibuf->x) * 4);
-                               IMAPAINT_CHAR_RGBA_TO_FLOAT(rgba_fp, tmp_ch);
+                               unsigned char *tmp_ch = ((unsigned char *)ibuf->rect) + ((xi + yi * ibuf->x) * 4);
+                               straight_uchar_to_premul_float(rgba_fp, tmp_ch);
                        }
                }
        }
@@ -1135,6 +1122,44 @@ static void screen_px_from_persp(
        interp_v3_v3v3v3(pixelScreenCo, v1co, v2co, v3co, w);
 }
 
+
+/* same as screen_px_from_persp except we return ortho weights back to the caller.
+ * These weights will be used to determine correct interpolation of uvs in cloned uv layer */
+static void screen_px_from_persp_ortho_weights(
+        float uv[2],
+        float v1co[4], float v2co[4], float v3co[4],  /* screenspace coords */
+        float uv1co[2], float uv2co[2], float uv3co[2],
+        float pixelScreenCo[4],
+        float w[3])
+{
+       float w_int[3];
+       float wtot_inv, wtot;
+       barycentric_weights_v2(uv1co, uv2co, uv3co, uv, w);
+
+       /* re-weight from the 4th coord of each screen vert */
+       w_int[0] = w[0] * v1co[3];
+       w_int[1] = w[1] * v2co[3];
+       w_int[2] = w[2] * v3co[3];
+
+       wtot = w_int[0] + w_int[1] + w_int[2];
+
+       if (wtot > 0.0f) {
+               wtot_inv = 1.0f / wtot;
+               w_int[0] *= wtot_inv;
+               w_int[1] *= wtot_inv;
+               w_int[2] *= wtot_inv;
+       }
+       else {
+               w[0] = w[1] = w[2] =
+               w_int[0] = w_int[1] = w_int[2] = 1.0f / 3.0f;  /* dummy values for zero area face */
+       }
+       /* done re-weighting */
+
+       /* do interpolation based on projected weight */
+       interp_v3_v3v3v3(pixelScreenCo, v1co, v2co, v3co, w_int);
+}
+
+
 static void project_face_pixel(const MTFace *tf_other, ImBuf *ibuf_other, const float w[3],
                                int side, unsigned char rgba_ub[4], float rgba_f[4])
 {
@@ -1190,10 +1215,10 @@ static float project_paint_uvpixel_mask(
                        project_face_pixel(tf_other, ibuf_other, w, side, rgba_ub, rgba_f);
 
                        if (ibuf_other->rect_float) { /* from float to float */
-                               mask = ((rgba_f[0] + rgba_f[1] + rgba_f[2]) / 3.0f) * rgba_f[3];
+                               mask = ((rgba_f[0] + rgba_f[1] + rgba_f[2]) * (1.0f / 3.0f)) * rgba_f[3];
                        }
                        else { /* from char to float */
-                               mask = ((rgba_ub[0] + rgba_ub[1] + rgba_ub[2]) / (256 * 3.0f)) * (rgba_ub[3] / 256.0f);
+                               mask = ((rgba_ub[0] + rgba_ub[1] + rgba_ub[2]) * (1.0f / (255.0f * 3.0f))) * (rgba_ub[3] * (1.0f / 255.0f));
                        }
 
                        BKE_image_release_ibuf(other_tpage, ibuf_other, NULL);
@@ -1337,14 +1362,16 @@ static ProjPixel *project_paint_uvpixel_init(
 
        if (ibuf->rect_float) {
                projPixel->pixel.f_pt = ibuf->rect_float + ((x_px + y_px * ibuf->x) * 4);
-               projPixel->origColor.f[0] = projPixel->newColor.f[0] = projPixel->pixel.f_pt[0];
-               projPixel->origColor.f[1] = projPixel->newColor.f[1] = projPixel->pixel.f_pt[1];
-               projPixel->origColor.f[2] = projPixel->newColor.f[2] = projPixel->pixel.f_pt[2];
-               projPixel->origColor.f[3] = projPixel->newColor.f[3] = projPixel->pixel.f_pt[3];
+               projPixel->origColor.f[0] = projPixel->pixel.f_pt[0];
+               projPixel->origColor.f[1] = projPixel->pixel.f_pt[1];
+               projPixel->origColor.f[2] = projPixel->pixel.f_pt[2];
+               projPixel->origColor.f[3] = projPixel->pixel.f_pt[3];
+               zero_v4(projPixel->newColor.f);
        }
        else {
                projPixel->pixel.ch_pt = ((unsigned char *)ibuf->rect + ((x_px + y_px * ibuf->x) * 4));
-               projPixel->origColor.uint = projPixel->newColor.uint = *projPixel->pixel.uint_pt;
+               projPixel->origColor.uint = *projPixel->pixel.uint_pt;
+               projPixel->newColor.uint = 0;
        }
 
        /* screenspace unclamped, we could keep its z and w values but don't need them at the moment */
@@ -1358,7 +1385,7 @@ static ProjPixel *project_paint_uvpixel_init(
        projPixel->y_px = y_px;
 
        projPixel->mask = (unsigned short)(mask * 65535);
-       projPixel->mask_max = 0;
+       projPixel->mask_accum = 0;
 
        /* which bounding box cell are we in?, needed for undo */
        projPixel->bb_cell_index = ((int)(((float)x_px / (float)ibuf->x) * PROJ_BOUNDBOX_DIV)) +
@@ -1600,9 +1627,9 @@ static int line_clip_rect2f(
 static void scale_quad(float insetCos[4][3], float *origCos[4], const float inset)
 {
        float cent[3];
-       cent[0] = (origCos[0][0] + origCos[1][0] + origCos[2][0] + origCos[3][0]) / 4.0f;
-       cent[1] = (origCos[0][1] + origCos[1][1] + origCos[2][1] + origCos[3][1]) / 4.0f;
-       cent[2] = (origCos[0][2] + origCos[1][2] + origCos[2][2] + origCos[3][2]) / 4.0f;
+       cent[0] = (origCos[0][0] + origCos[1][0] + origCos[2][0] + origCos[3][0]) * (1.0f / 4.0f);
+       cent[1] = (origCos[0][1] + origCos[1][1] + origCos[2][1] + origCos[3][1]) * (1.0f / 4.0f);
+       cent[2] = (origCos[0][2] + origCos[1][2] + origCos[2][2] + origCos[3][2]) * (1.0f / 4.0f);
 
        sub_v3_v3v3(insetCos[0], origCos[0], cent);
        sub_v3_v3v3(insetCos[1], origCos[1], cent);
@@ -1624,9 +1651,9 @@ static void scale_quad(float insetCos[4][3], float *origCos[4], const float inse
 static void scale_tri(float insetCos[4][3], float *origCos[4], const float inset)
 {
        float cent[3];
-       cent[0] = (origCos[0][0] + origCos[1][0] + origCos[2][0]) / 3.0f;
-       cent[1] = (origCos[0][1] + origCos[1][1] + origCos[2][1]) / 3.0f;
-       cent[2] = (origCos[0][2] + origCos[1][2] + origCos[2][2]) / 3.0f;
+       cent[0] = (origCos[0][0] + origCos[1][0] + origCos[2][0]) * (1.0f / 3.0f);
+       cent[1] = (origCos[0][1] + origCos[1][1] + origCos[2][1]) * (1.0f / 3.0f);
+       cent[2] = (origCos[0][2] + origCos[1][2] + origCos[2][2]) * (1.0f / 3.0f);
 
        sub_v3_v3v3(insetCos[0], origCos[0], cent);
        sub_v3_v3v3(insetCos[1], origCos[1], cent);
@@ -2180,6 +2207,7 @@ static void project_paint_face_init(const ProjPaintState *ps, const int thread_i
        float uv_clip[8][2];
        int uv_clip_tot;
        const short is_ortho = ps->is_ortho;
+       const short is_clone_other = ((ps->brush->imagepaint_tool == PAINT_TOOL_CLONE) && ps->dm_mtface_clone);
        const short do_backfacecull = ps->do_backfacecull;
        const short do_clip = ps->rv3d ? ps->rv3d->rflag & RV3D_CLIPPING : 0;
 
@@ -2191,8 +2219,8 @@ static void project_paint_face_init(const ProjPaintState *ps, const int thread_i
        /* Use tf_uv_pxoffset instead of tf->uv so we can offset the UV half a pixel
         * this is done so we can avoid offsetting all the pixels by 0.5 which causes
         * problems when wrapping negative coords */
-       xhalfpx = (0.5f + (PROJ_GEOM_TOLERANCE / 3.0f)) / ibuf_xf;
-       yhalfpx = (0.5f + (PROJ_GEOM_TOLERANCE / 4.0f)) / ibuf_yf;
+       xhalfpx = (0.5f + (PROJ_GEOM_TOLERANCE * (1.0f / 3.0f))) / ibuf_xf;
+       yhalfpx = (0.5f + (PROJ_GEOM_TOLERANCE * (1.0f / 4.0f))) / ibuf_yf;
 
        /* Note about (PROJ_GEOM_TOLERANCE/x) above...
         * Needed to add this offset since UV coords are often quads aligned to pixels.
@@ -2203,8 +2231,6 @@ static void project_paint_face_init(const ProjPaintState *ps, const int thread_i
         * but since the first thing most people try is painting onto a quad- better make it work.
         */
 
-
-
        tf_uv_pxoffset[0][0] = tf->uv[0][0] - xhalfpx;
        tf_uv_pxoffset[0][1] = tf->uv[0][1] - yhalfpx;
 
@@ -2289,7 +2315,8 @@ static void project_paint_face_init(const ProjPaintState *ps, const int thread_i
                                                has_x_isect = has_isect = 1;
 
                                                if (is_ortho) screen_px_from_ortho(uv, v1coSS, v2coSS, v3coSS, uv1co, uv2co, uv3co, pixelScreenCo, w);
-                                               else          screen_px_from_persp(uv, v1coSS, v2coSS, v3coSS, uv1co, uv2co, uv3co, pixelScreenCo, w);
+                                               else if (is_clone_other) screen_px_from_persp_ortho_weights(uv, v1coSS, v2coSS, v3coSS, uv1co, uv2co, uv3co, pixelScreenCo, w);
+                                               else screen_px_from_persp(uv, v1coSS, v2coSS, v3coSS, uv1co, uv2co, uv3co, pixelScreenCo, w);
 
                                                /* a pity we need to get the worldspace pixel location here */
                                                if (do_clip || do_3d_mapping) {
@@ -2475,8 +2502,8 @@ static void project_paint_face_init(const ProjPaintState *ps, const int thread_i
                                                                                if (!is_ortho) {
                                                                                        pixelScreenCo[3] = 1.0f;
                                                                                        mul_m4_v4((float(*)[4])ps->projectMat, pixelScreenCo); /* cast because of const */
-                                                                                       pixelScreenCo[0] = (float)(ps->winx / 2.0f) + (ps->winx / 2.0f) * pixelScreenCo[0] / pixelScreenCo[3];
-                                                                                       pixelScreenCo[1] = (float)(ps->winy / 2.0f) + (ps->winy / 2.0f) * pixelScreenCo[1] / pixelScreenCo[3];
+                                                                                       pixelScreenCo[0] = (float)(ps->winx * 0.5f) + (ps->winx * 0.5f) * pixelScreenCo[0] / pixelScreenCo[3];
+                                                                                       pixelScreenCo[1] = (float)(ps->winy * 0.5f) + (ps->winy * 0.5f) * pixelScreenCo[1] / pixelScreenCo[3];
                                                                                        pixelScreenCo[2] = pixelScreenCo[2] / pixelScreenCo[3]; /* Use the depth for bucket point occlusion */
                                                                                }
 
@@ -2783,22 +2810,32 @@ static void project_paint_begin(ProjPaintState *ps)
        Image *tpage_last = NULL, *tpage;
 
        /* Face vars */
+       MPoly *mpoly_orig;
        MFace *mf;
        MTFace *tf;
 
        int a, i; /* generic looping vars */
        int image_index = -1, face_index;
+
+       /* double lookup */
+       const int *index_mf_to_mpoly = NULL;
+       const int *index_mp_to_orig  = NULL;
+
        MVert *mv;
 
        MemArena *arena; /* at the moment this is just ps->arena_mt[0], but use this to show were not multithreading */
 
        const int diameter = 2 * BKE_brush_size_get(ps->scene, ps->brush);
 
+       bool reset_threads = false;
+
        /* ---- end defines ---- */
 
        if (ps->source == PROJ_SRC_VIEW)
                ED_view3d_clipping_local(ps->rv3d, ps->ob->obmat);  /* faster clipping lookups */
 
+       ps->do_face_sel = ((((Mesh *)ps->ob->data)->editflag & ME_EDIT_PAINT_FACE_SEL) != 0);
+
        /* paint onto the derived mesh */
 
        /* Workaround for subsurf selection, try the display mesh first */
@@ -2807,12 +2844,17 @@ static void project_paint_begin(ProjPaintState *ps)
                ps->dm = mesh_create_derived_render(ps->scene, ps->ob, ps->scene->customdata_mask | CD_MASK_MTFACE);
                ps->dm_release = TRUE;
        }
-       else if (ps->ob->derivedFinal && CustomData_has_layer(&ps->ob->derivedFinal->faceData, CD_MTFACE)) {
+       else if (ps->ob->derivedFinal &&
+                CustomData_has_layer(&ps->ob->derivedFinal->faceData, CD_MTFACE) &&
+                (ps->do_face_sel == false || CustomData_has_layer(&ps->ob->derivedFinal->polyData, CD_ORIGINDEX)))
+       {
                ps->dm = ps->ob->derivedFinal;
                ps->dm_release = FALSE;
        }
        else {
-               ps->dm = mesh_get_derived_final(ps->scene, ps->ob, ps->scene->customdata_mask | CD_MASK_MTFACE);
+               ps->dm = mesh_get_derived_final(
+                            ps->scene, ps->ob,
+                            ps->scene->customdata_mask | CD_MASK_MTFACE | (ps->do_face_sel ? CD_ORIGINDEX : 0));
                ps->dm_release = TRUE;
        }
 
@@ -2832,6 +2874,20 @@ static void project_paint_begin(ProjPaintState *ps)
        ps->dm_totvert = ps->dm->getNumVerts(ps->dm);
        ps->dm_totface = ps->dm->getNumTessFaces(ps->dm);
 
+       if (ps->do_face_sel) {
+               index_mf_to_mpoly = ps->dm->getTessFaceDataArray(ps->dm, CD_ORIGINDEX);
+               index_mp_to_orig  = ps->dm->getPolyDataArray(ps->dm, CD_ORIGINDEX);
+               if (index_mf_to_mpoly == NULL) {
+                       index_mp_to_orig = NULL;
+               }
+               else {
+                       mpoly_orig = ((Mesh *)ps->ob->data)->mpoly;
+               }
+       }
+       else {
+               mpoly_orig = NULL;
+       }
+
        /* use clone mtface? */
 
 
@@ -2839,20 +2895,19 @@ static void project_paint_begin(ProjPaintState *ps)
         * this avoids re-generating the derived mesh just to get the new index */
        if (ps->do_layer_clone) {
                //int layer_num = CustomData_get_clone_layer(&ps->dm->faceData, CD_MTFACE);
-               int layer_num = CustomData_get_clone_layer(&((Mesh *)ps->ob->data)->fdata, CD_MTFACE);
+               int layer_num = CustomData_get_clone_layer(&((Mesh *)ps->ob->data)->pdata, CD_MTEXPOLY);
                if (layer_num != -1)
                        ps->dm_mtface_clone = CustomData_get_layer_n(&ps->dm->faceData, CD_MTFACE, layer_num);
 
                if (ps->dm_mtface_clone == NULL || ps->dm_mtface_clone == ps->dm_mtface) {
                        ps->do_layer_clone = FALSE;
                        ps->dm_mtface_clone = NULL;
-                       printf("ACK!\n");
                }
        }
 
        if (ps->do_layer_stencil) {
                //int layer_num = CustomData_get_stencil_layer(&ps->dm->faceData, CD_MTFACE);
-               int layer_num = CustomData_get_stencil_layer(&((Mesh *)ps->ob->data)->fdata, CD_MTFACE);
+               int layer_num = CustomData_get_stencil_layer(&((Mesh *)ps->ob->data)->pdata, CD_MTEXPOLY);
                if (layer_num != -1)
                        ps->dm_mtface_stencil = CustomData_get_layer_n(&ps->dm->faceData, CD_MTFACE, layer_num);
 
@@ -2942,8 +2997,8 @@ static void project_paint_begin(ProjPaintState *ps)
                        }
 
                        /* same as #ED_view3d_ob_project_mat_get */
-                       mult_m4_m4m4(vmat, viewmat, ps->ob->obmat);
-                       mult_m4_m4m4(ps->projectMat, winmat, vmat);
+                       mul_m4_m4m4(vmat, viewmat, ps->ob->obmat);
+                       mul_m4_m4m4(ps->projectMat, winmat, vmat);
                }
 
 
@@ -2974,8 +3029,8 @@ static void project_paint_begin(ProjPaintState *ps)
                        mul_v3_m4v3(projScreenCo, ps->projectMat, mv->co);
 
                        /* screen space, not clamped */
-                       projScreenCo[0] = (float)(ps->winx / 2.0f) + (ps->winx / 2.0f) * projScreenCo[0];
-                       projScreenCo[1] = (float)(ps->winy / 2.0f) + (ps->winy / 2.0f) * projScreenCo[1];
+                       projScreenCo[0] = (float)(ps->winx * 0.5f) + (ps->winx * 0.5f) * projScreenCo[0];
+                       projScreenCo[1] = (float)(ps->winy * 0.5f) + (ps->winy * 0.5f) * projScreenCo[1];
                        minmax_v2v2_v2(ps->screenMin, ps->screenMax, projScreenCo);
                }
        }
@@ -2988,8 +3043,8 @@ static void project_paint_begin(ProjPaintState *ps)
 
                        if (projScreenCo[3] > ps->clipsta) {
                                /* screen space, not clamped */
-                               projScreenCo[0] = (float)(ps->winx / 2.0f) + (ps->winx / 2.0f) * projScreenCo[0] / projScreenCo[3];
-                               projScreenCo[1] = (float)(ps->winy / 2.0f) + (ps->winy / 2.0f) * projScreenCo[1] / projScreenCo[3];
+                               projScreenCo[0] = (float)(ps->winx * 0.5f) + (ps->winx * 0.5f) * projScreenCo[0] / projScreenCo[3];
+                               projScreenCo[1] = (float)(ps->winy * 0.5f) + (ps->winy * 0.5f) * projScreenCo[1] / projScreenCo[3];
                                projScreenCo[2] = projScreenCo[2] / projScreenCo[3]; /* Use the depth for bucket point occlusion */
                                minmax_v2v2_v2(ps->screenMin, ps->screenMax, projScreenCo);
                        }
@@ -3039,6 +3094,10 @@ static void project_paint_begin(ProjPaintState *ps)
 
        /* printf("\tscreenspace bucket division x:%d y:%d\n", ps->buckets_x, ps->buckets_y); */
 
+       if (ps->buckets_x > PROJ_BUCKET_RECT_MAX || ps->buckets_y > PROJ_BUCKET_RECT_MAX) {
+               reset_threads = true;
+       }
+
        /* really high values could cause problems since it has to allocate a few
         * (ps->buckets_x*ps->buckets_y) sized arrays  */
        CLAMP(ps->buckets_x, PROJ_BUCKET_RECT_MIN, PROJ_BUCKET_RECT_MAX);
@@ -3062,12 +3121,13 @@ static void project_paint_begin(ProjPaintState *ps)
         * threads is being able to fill in multiple buckets at once.
         * Only use threads for bigger brushes. */
 
-       if (ps->scene->r.mode & R_FIXED_THREADS) {
-               ps->thread_tot = ps->scene->r.threads;
-       }
-       else {
-               ps->thread_tot = BLI_system_thread_count();
-       }
+       ps->thread_tot = BKE_scene_num_threads(ps->scene);
+
+       /* workaround for #35057, disable threading if diameter is less than is possible for
+        * optimum bucket number generation */
+       if (reset_threads)
+               ps->thread_tot = 1;
+
        for (a = 0; a < ps->thread_tot; a++) {
                ps->arena_mt[a] = BLI_memarena_new(1 << 16, "project paint arena");
        }
@@ -3097,8 +3157,8 @@ static void project_paint_begin(ProjPaintState *ps)
                }
        }
 
-
        for (face_index = 0, tf = ps->dm_mtface, mf = ps->dm_mface; face_index < ps->dm_totface; mf++, tf++, face_index++) {
+               bool is_face_sel;
 
 #ifndef PROJ_DEBUG_NOSEAMBLEED
                /* add face user if we have bleed enabled, set the UV seam flags later */
@@ -3113,10 +3173,23 @@ static void project_paint_begin(ProjPaintState *ps)
                }
 #endif
 
-               tpage = project_paint_face_image(ps, ps->dm_mtface, face_index);
-
-               if (tpage && ((((Mesh *)ps->ob->data)->editflag & ME_EDIT_PAINT_FACE_SEL) == 0 || mf->flag & ME_FACE_SEL)) {
+               if (ps->do_face_sel) {
+                       int orig_index;
+                       if (index_mp_to_orig && ((orig_index = DM_origindex_mface_mpoly(index_mf_to_mpoly, index_mp_to_orig,
+                                                                                       face_index))) != ORIGINDEX_NONE)
+                       {
+                               MPoly *mp = &mpoly_orig[orig_index];
+                               is_face_sel = ((mp->flag & ME_FACE_SEL) != 0);
+                       }
+                       else {
+                               is_face_sel = ((mf->flag & ME_FACE_SEL) != 0);
+                       }
+               }
+               else {
+                       is_face_sel = true;
+               }
 
+               if (is_face_sel && (tpage = project_paint_face_image(ps, ps->dm_mtface, face_index))) {
                        float *v1coSS, *v2coSS, *v3coSS, *v4coSS = NULL;
 
                        v1coSS = ps->screenCoords[mf->v1];
@@ -3225,7 +3298,7 @@ static void project_paint_begin(ProjPaintState *ps)
        BLI_linklist_free(image_LinkList, NULL);
 }
 
-static void paint_proj_begin_clone(ProjPaintState *ps, const int mouse[2])
+static void paint_proj_begin_clone(ProjPaintState *ps, const float mouse[2])
 {
        /* setup clone offset */
        if (ps->tool == PAINT_TOOL_CLONE) {
@@ -3235,8 +3308,8 @@ static void paint_proj_begin_clone(ProjPaintState *ps, const int mouse[2])
 
                projCo[3] = 1.0f;
                mul_m4_v4(ps->projectMat, projCo);
-               ps->cloneOffset[0] = mouse[0] - ((float)(ps->winx / 2.0f) + (ps->winx / 2.0f) * projCo[0] / projCo[3]);
-               ps->cloneOffset[1] = mouse[1] - ((float)(ps->winy / 2.0f) + (ps->winy / 2.0f) * projCo[1] / projCo[3]);
+               ps->cloneOffset[0] = mouse[0] - ((float)(ps->winx * 0.5f) + (ps->winx * 0.5f) * projCo[0] / projCo[3]);
+               ps->cloneOffset[1] = mouse[1] - ((float)(ps->winy * 0.5f) + (ps->winy * 0.5f) * projCo[1] / projCo[3]);
        }
 }
 
@@ -3531,72 +3604,42 @@ typedef struct ProjectHandle {
        struct ImagePool *pool;
 } ProjectHandle;
 
-static void blend_color_mix(unsigned char cp[4], const unsigned char cp1[4], const unsigned char cp2[4], const int fac)
+static void do_projectpaint_clone(ProjPaintState *ps, ProjPixel *projPixel, float mask)
 {
-       /* this and other blending modes previously used >>8 instead of /255. both
-        * are not equivalent (>>8 is /256), and the former results in rounding
-        * errors that can turn colors black fast after repeated blending */
-       const int mfac = 255 - fac;
-
-       cp[0] = (mfac * cp1[0] + fac * cp2[0]) / 255;
-       cp[1] = (mfac * cp1[1] + fac * cp2[1]) / 255;
-       cp[2] = (mfac * cp1[2] + fac * cp2[2]) / 255;
-       cp[3] = (mfac * cp1[3] + fac * cp2[3]) / 255;
-}
+       const unsigned char *clone_pt = ((ProjPixelClone *)projPixel)->clonepx.ch;
 
-static void blend_color_mix_float(float cp[4], const float cp1[4], const float cp2[4], const float fac)
-{
-       const float mfac = 1.0f - fac;
-       cp[0] = mfac * cp1[0] + fac * cp2[0];
-       cp[1] = mfac * cp1[1] + fac * cp2[1];
-       cp[2] = mfac * cp1[2] + fac * cp2[2];
-       cp[3] = mfac * cp1[3] + fac * cp2[3];
-}
+       if (clone_pt[3]) {
+               unsigned char clone_rgba[4];
 
-static void blend_color_mix_accum(unsigned char cp[4], const unsigned char cp1[4], const unsigned char cp2[4], const int fac)
-{
-       /* this and other blending modes previously used >>8 instead of /255. both
-        * are not equivalent (>>8 is /256), and the former results in rounding
-        * errors that can turn colors black fast after repeated blending */
-       const int mfac = 255 - fac;
-       const int alpha = cp1[3] + ((fac * cp2[3]) / 255);
-
-       cp[0] = (mfac * cp1[0] + fac * cp2[0]) / 255;
-       cp[1] = (mfac * cp1[1] + fac * cp2[1]) / 255;
-       cp[2] = (mfac * cp1[2] + fac * cp2[2]) / 255;
-       cp[3] = alpha > 255 ? 255 : alpha;
-}
-static void blend_color_mix_accum_float(float cp[4], const float cp1[4], const unsigned char cp2[4], const float fac)
-{
-       const float mfac = 1.0f - fac;
-       const float alpha = cp1[3] + (fac * (cp2[3] / 255.0f));
-
-       cp[0] = (mfac * cp1[0] + (fac * (cp2[0] / 255.0f)));
-       cp[1] = (mfac * cp1[1] + (fac * (cp2[1] / 255.0f)));
-       cp[2] = (mfac * cp1[2] + (fac * (cp2[2] / 255.0f)));
-       cp[3] = alpha > 1.0f ? 1.0f : alpha;
-}
+               clone_rgba[0] = clone_pt[0];
+               clone_rgba[1] = clone_pt[1];
+               clone_rgba[2] = clone_pt[2];
+               clone_rgba[3] = (unsigned char)(clone_pt[3] * mask);
 
-
-static void do_projectpaint_clone(ProjPaintState *ps, ProjPixel *projPixel, float alpha, float mask)
-{
-       if (ps->do_masking && mask < 1.0f) {
-               projPixel->newColor.uint = IMB_blend_color(projPixel->newColor.uint, ((ProjPixelClone *)projPixel)->clonepx.uint, (int)(alpha * 255), ps->blend);
-               blend_color_mix(projPixel->pixel.ch_pt,  projPixel->origColor.ch, projPixel->newColor.ch, (int)(mask * 255));
-       }
-       else {
-               *projPixel->pixel.uint_pt = IMB_blend_color(*projPixel->pixel.uint_pt, ((ProjPixelClone *)projPixel)->clonepx.uint, (int)(alpha * mask * 255), ps->blend);
+               if (ps->do_masking) {
+                       IMB_blend_color_byte(projPixel->pixel.ch_pt, projPixel->origColor.ch, clone_rgba, ps->blend);
+               }
+               else {
+                       IMB_blend_color_byte(projPixel->pixel.ch_pt, projPixel->pixel.ch_pt, clone_rgba, ps->blend);
+               }
        }
 }
 
-static void do_projectpaint_clone_f(ProjPaintState *ps, ProjPixel *projPixel, float alpha, float mask)
+static void do_projectpaint_clone_f(ProjPaintState *ps, ProjPixel *projPixel, float mask)
 {
-       if (ps->do_masking && mask < 1.0f) {
-               IMB_blend_color_float(projPixel->newColor.f, projPixel->newColor.f, ((ProjPixelClone *)projPixel)->clonepx.f, alpha, ps->blend);
-               blend_color_mix_float(projPixel->pixel.f_pt,  projPixel->origColor.f, projPixel->newColor.f, mask);
-       }
-       else {
-               IMB_blend_color_float(projPixel->pixel.f_pt, projPixel->pixel.f_pt, ((ProjPixelClone *)projPixel)->clonepx.f, alpha * mask, ps->blend);
+       const float *clone_pt = ((ProjPixelClone *)projPixel)->clonepx.f;
+
+       if (clone_pt[3]) {
+               float clone_rgba[4];
+
+               mul_v4_v4fl(clone_rgba, clone_pt, mask);
+
+               if (ps->do_masking) {
+                       IMB_blend_color_float(projPixel->pixel.f_pt, projPixel->origColor.f, clone_rgba, ps->blend);
+               }
+               else {
+                       IMB_blend_color_float(projPixel->pixel.f_pt, projPixel->pixel.f_pt, clone_rgba, ps->blend);
+               }
        }
 }
 
@@ -3606,19 +3649,19 @@ static void do_projectpaint_clone_f(ProjPaintState *ps, ProjPixel *projPixel, fl
  * accumulation of color greater then 'projPixel->mask' however in the case of smear its not
  * really that important to be correct as it is with clone and painting
  */
-static void do_projectpaint_smear(ProjPaintState *ps, ProjPixel *projPixel, float alpha, float mask,
+static void do_projectpaint_smear(ProjPaintState *ps, ProjPixel *projPixel, float mask,
                                   MemArena *smearArena, LinkNode **smearPixels, const float co[2])
 {
        unsigned char rgba_ub[4];
 
        if (project_paint_PickColor(ps, co, NULL, rgba_ub, 1) == 0)
                return;
-       /* ((ProjPixelClone *)projPixel)->clonepx.uint = IMB_blend_color(*projPixel->pixel.uint_pt, *((unsigned int *)rgba_ub), (int)(alpha*mask*255), ps->blend); */
-       blend_color_mix(((ProjPixelClone *)projPixel)->clonepx.ch, projPixel->pixel.ch_pt, rgba_ub, (int)(alpha * mask * 255));
+
+       blend_color_interpolate_byte(((ProjPixelClone *)projPixel)->clonepx.ch, projPixel->pixel.ch_pt, rgba_ub, mask);
        BLI_linklist_prepend_arena(smearPixels, (void *)projPixel, smearArena);
 }
 
-static void do_projectpaint_smear_f(ProjPaintState *ps, ProjPixel *projPixel, float alpha, float mask,
+static void do_projectpaint_smear_f(ProjPaintState *ps, ProjPixel *projPixel, float mask,
                                     MemArena *smearArena, LinkNode **smearPixels_f, const float co[2])
 {
        float rgba[4];
@@ -3626,8 +3669,7 @@ static void do_projectpaint_smear_f(ProjPaintState *ps, ProjPixel *projPixel, fl
        if (project_paint_PickColor(ps, co, rgba, NULL, 1) == 0)
                return;
 
-       /* (ProjPixelClone *)projPixel)->clonepx.uint = IMB_blend_color(*((unsigned int *)rgba_smear), *((unsigned int *)rgba_ub), (int)(alpha*mask*255), ps->blend); */
-       blend_color_mix_float(((ProjPixelClone *)projPixel)->clonepx.f, projPixel->pixel.f_pt, rgba, alpha * mask);
+       blend_color_interpolate_float(((ProjPixelClone *)projPixel)->clonepx.f, projPixel->pixel.f_pt, rgba, mask);
        BLI_linklist_prepend_arena(smearPixels_f, (void *)projPixel, smearArena);
 }
 
@@ -3640,7 +3682,7 @@ static float inv_pow2(float f)
        return 1.0f - f;
 }
 
-static void do_projectpaint_soften_f(ProjPaintState *ps, ProjPixel *projPixel, float alpha, float mask,
+static void do_projectpaint_soften_f(ProjPaintState *ps, ProjPixel *projPixel, float mask,
                                      MemArena *softenArena, LinkNode **softenPixels)
 {
        unsigned int accum_tot = 0;
@@ -3648,9 +3690,8 @@ static void do_projectpaint_soften_f(ProjPaintState *ps, ProjPixel *projPixel, f
 
        float *rgba = projPixel->newColor.f;
 
-       /* sigh, alpha values tend to need to be a _lot_ stronger with blur */
+       /* sigh, mask values tend to need to be a _lot_ stronger with blur */
        mask  = inv_pow2(mask);
-       alpha = inv_pow2(alpha);
 
        /* rather then painting, accumulate surrounding colors */
        zero_v4(rgba);
@@ -3667,13 +3708,12 @@ static void do_projectpaint_soften_f(ProjPaintState *ps, ProjPixel *projPixel, f
 
        if (LIKELY(accum_tot != 0)) {
                mul_v4_fl(rgba, 1.0f / (float)accum_tot);
-               blend_color_mix_float(rgba, projPixel->pixel.f_pt, rgba, alpha);
-               if (mask < 1.0f) blend_color_mix_float(rgba, projPixel->origColor.f, rgba, mask);
+               blend_color_interpolate_float(rgba, rgba, projPixel->pixel.f_pt, mask);
                BLI_linklist_prepend_arena(softenPixels, (void *)projPixel, softenArena);
        }
 }
 
-static void do_projectpaint_soften(ProjPaintState *ps, ProjPixel *projPixel, float alpha, float mask,
+static void do_projectpaint_soften(ProjPaintState *ps, ProjPixel *projPixel, float mask,
                                    MemArena *softenArena, LinkNode **softenPixels)
 {
        unsigned int accum_tot = 0;
@@ -3681,9 +3721,8 @@ static void do_projectpaint_soften(ProjPaintState *ps, ProjPixel *projPixel, flo
 
        float rgba[4];  /* convert to byte after */
 
-       /* sigh, alpha values tend to need to be a _lot_ stronger with blur */
+       /* sigh, mask values tend to need to be a _lot_ stronger with blur */
        mask  = inv_pow2(mask);
-       alpha = inv_pow2(alpha);
 
        /* rather then painting, accumulate surrounding colors */
        zero_v4(rgba);
@@ -3702,67 +3741,56 @@ static void do_projectpaint_soften(ProjPaintState *ps, ProjPixel *projPixel, flo
                unsigned char *rgba_ub = projPixel->newColor.ch;
 
                mul_v4_fl(rgba, 1.0f / (float)accum_tot);
-               IMAPAINT_FLOAT_RGBA_TO_CHAR(rgba_ub, rgba);
+               premul_float_to_straight_uchar(rgba_ub, rgba);
 
-               blend_color_mix(rgba_ub, projPixel->pixel.ch_pt, rgba_ub, (int)(alpha * 255));
-               if (mask != 1.0f) blend_color_mix(rgba_ub, projPixel->origColor.ch, rgba_ub, (int)(mask * 255));
+               blend_color_interpolate_byte(rgba_ub, rgba_ub, projPixel->pixel.ch_pt, mask);
                BLI_linklist_prepend_arena(softenPixels, (void *)projPixel, softenArena);
        }
 }
 
-BLI_INLINE void rgba_float_to_uchar__mul_v3(unsigned char rgba_ub[4], const float rgba[4], const float rgb[3])
-{
-       rgba_ub[0] = f_to_char(rgba[0] * rgb[0]);
-       rgba_ub[1] = f_to_char(rgba[1] * rgb[1]);
-       rgba_ub[2] = f_to_char(rgba[2] * rgb[2]);
-       rgba_ub[3] = f_to_char(rgba[3]);
-}
-
-static void do_projectpaint_draw(ProjPaintState *ps, ProjPixel *projPixel, const float rgba[4], float alpha, float mask)
+static void do_projectpaint_draw(ProjPaintState *ps, ProjPixel *projPixel, const float texrgb[3], float mask)
 {
+       float rgb[3];
        unsigned char rgba_ub[4];
 
+       copy_v3_v3(rgb, ps->brush->rgb);
+
        if (ps->is_texbrush) {
-               rgba_float_to_uchar__mul_v3(rgba_ub, rgba, ps->brush->rgb);
-       }
-       else {
-               IMAPAINT_FLOAT_RGB_TO_CHAR(rgba_ub, ps->brush->rgb);
-               rgba_ub[3] = 255;
+               /* XXX actually should convert texrgb from linear to srgb here */
+               mul_v3_v3(rgb, texrgb);
        }
 
-       if (ps->do_masking && mask < 1.0f) {
-               projPixel->newColor.uint = IMB_blend_color(projPixel->newColor.uint, *((unsigned int *)rgba_ub), (int)(alpha * 255), ps->blend);
-               blend_color_mix(projPixel->pixel.ch_pt,  projPixel->origColor.ch, projPixel->newColor.ch, (int)(mask * 255));
+       rgb_float_to_uchar(rgba_ub, rgb);
+       rgba_ub[3] = FTOCHAR(mask);
+
+       if (ps->do_masking) {
+               IMB_blend_color_byte(projPixel->pixel.ch_pt, projPixel->origColor.ch, rgba_ub, ps->blend);
        }
        else {
-               *projPixel->pixel.uint_pt = IMB_blend_color(*projPixel->pixel.uint_pt, *((unsigned int *)rgba_ub), (int)(alpha * mask * 255), ps->blend);
+               IMB_blend_color_byte(projPixel->pixel.ch_pt, projPixel->pixel.ch_pt, rgba_ub, ps->blend);
        }
 }
 
-static void do_projectpaint_draw_f(ProjPaintState *ps, ProjPixel *projPixel, float rgba[4], float alpha, float mask)
+static void do_projectpaint_draw_f(ProjPaintState *ps, ProjPixel *projPixel, const float texrgb[3], float mask)
 {
-       if (ps->is_texbrush) {
-               /* rgba already holds a texture result here from higher level function */
-               float rgba_br[3];
-               srgb_to_linearrgb_v3_v3(rgba_br, ps->brush->rgb);
-               mul_v3_v3(rgba, rgba_br);
-       }
-       else {
-               srgb_to_linearrgb_v3_v3(rgba, ps->brush->rgb);
-               rgba[3] = 1.0;
-       }
+       float rgba[4];
+
+       srgb_to_linearrgb_v3_v3(rgba, ps->brush->rgb);
 
-       if (ps->do_masking && mask < 1.0f) {
-               IMB_blend_color_float(projPixel->newColor.f, projPixel->newColor.f, rgba, alpha, ps->blend);
-               blend_color_mix_float(projPixel->pixel.f_pt,  projPixel->origColor.f, projPixel->newColor.f, mask);
+       if (ps->is_texbrush)
+               mul_v3_v3(rgba, texrgb);
+       
+       mul_v3_fl(rgba, mask);
+       rgba[3] = mask;
+
+       if (ps->do_masking) {
+               IMB_blend_color_float(projPixel->pixel.f_pt, projPixel->origColor.f, rgba, ps->blend);
        }
        else {
-               IMB_blend_color_float(projPixel->pixel.f_pt, projPixel->pixel.f_pt, rgba, alpha * mask, ps->blend);
+               IMB_blend_color_float(projPixel->pixel.f_pt, projPixel->pixel.f_pt, rgba, ps->blend);
        }
 }
 
-
-
 /* run this for single and multithreaded painting */
 static void *do_projectpaint_thread(void *ph_v)
 {
@@ -3783,7 +3811,7 @@ static void *do_projectpaint_thread(void *ph_v)
        ProjPaintImage *last_projIma = NULL;
        ImagePaintPartialRedraw *last_partial_redraw_cell;
 
-       float rgba[4], alpha, dist_nosqrt, dist;
+       float dist_nosqrt, dist;
 
        float falloff;
        int bucket_index;
@@ -3794,10 +3822,10 @@ static void *do_projectpaint_thread(void *ph_v)
        /* for smear only */
        float pos_ofs[2] = {0};
        float co[2];
-       float mask = 1.0f; /* airbrush wont use mask */
        unsigned short mask_short;
-       const float radius = (float)BKE_brush_size_get(ps->scene, brush);
-       const float radius_squared = radius * radius; /* avoid a square root with every dist comparison */
+       const float brush_alpha = BKE_brush_alpha_get(ps->scene, brush);
+       const float brush_radius = (float)BKE_brush_size_get(ps->scene, brush);
+       const float brush_radius_sq = brush_radius * brush_radius; /* avoid a square root with every dist comparison */
 
        short lock_alpha = ELEM(brush->blend, IMB_BLEND_ERASE_ALPHA, IMB_BLEND_ADD_ALPHA) ? 0 : brush->flag & BRUSH_LOCK_ALPHA;
 
@@ -3851,9 +3879,15 @@ static void *do_projectpaint_thread(void *ph_v)
                                        bicubic_interpolation_color(ps->reproject_ibuf, projPixel->newColor.ch, NULL,
                                                                    projPixel->projCoSS[0], projPixel->projCoSS[1]);
                                        if (projPixel->newColor.ch[3]) {
-                                               mask = ((float)projPixel->mask) / 65535.0f;
-                                               blend_color_mix_accum_float(projPixel->pixel.f_pt,  projPixel->origColor.f,
-                                                                           projPixel->newColor.ch, (mask * (projPixel->newColor.ch[3] / 255.0f)));
+                                               float newColor_f[4];
+                                               float mask = ((float)projPixel->mask) * (1.0f / 65535.0f);
+
+                                               straight_uchar_to_premul_float(newColor_f, projPixel->newColor.ch);
+                                               IMB_colormanagement_colorspace_to_scene_linear_v4(newColor_f, TRUE, ps->reproject_ibuf->rect_colorspace);
+                                               mul_v4_v4fl(newColor_f, newColor_f, mask);
+
+                                               blend_color_mix_float(projPixel->pixel.f_pt,  projPixel->origColor.f,
+                                                                     newColor_f);
                                        }
                                }
                                else {
@@ -3861,9 +3895,11 @@ static void *do_projectpaint_thread(void *ph_v)
                                        bicubic_interpolation_color(ps->reproject_ibuf, projPixel->newColor.ch, NULL,
                                                                    projPixel->projCoSS[0], projPixel->projCoSS[1]);
                                        if (projPixel->newColor.ch[3]) {
-                                               mask = ((float)projPixel->mask) / 65535.0f;
-                                               blend_color_mix_accum(projPixel->pixel.ch_pt,  projPixel->origColor.ch,
-                                                                     projPixel->newColor.ch, (int)(mask * projPixel->newColor.ch[3]));
+                                               float mask = ((float)projPixel->mask) * (1.0f / 65535.0f);
+                                               projPixel->newColor.ch[3] *= mask;
+
+                                               blend_color_mix_byte(projPixel->pixel.ch_pt,  projPixel->origColor.ch,
+                                                                    projPixel->newColor.ch);
                                        }
                                }
                        }
@@ -3878,62 +3914,81 @@ static void *do_projectpaint_thread(void *ph_v)
                                dist_nosqrt = len_squared_v2v2(projPixel->projCoSS, pos);
 
                                /*if (dist < radius) {*/ /* correct but uses a sqrtf */
-                               if (dist_nosqrt <= radius_squared) {
-                                       float samplecos[3];
+                               if (dist_nosqrt <= brush_radius_sq) {
                                        dist = sqrtf(dist_nosqrt);
 
-                                       falloff = BKE_brush_curve_strength_clamp(ps->brush, dist, radius);
+                                       falloff = BKE_brush_curve_strength_clamp(ps->brush, dist, brush_radius);
 
-                                       if (ps->is_texbrush) {
-                                               MTex *mtex = &brush->mtex;
-                                               /* taking 3d copy to account for 3D mapping too. It gets concatenated during sampling */
-                                               if (mtex->brush_map_mode == MTEX_MAP_MODE_3D) {
-                                                       copy_v3_v3(samplecos, projPixel->worldCoSS);
+                                       if (falloff > 0.0f) {
+                                               float texrgb[3];
+                                               float mask = falloff;
+
+                                               if (ps->do_masking) {
+                                                       /* masking to keep brush contribution to a pixel limited. note we do not do
+                                                        * a simple max(mask, mask_accum), as this is very sensitive to spacing and
+                                                        * gives poor results for strokes crossing themselves.
+                                                        * 
+                                                        * Instead we use a formula that adds up but approaches brush_alpha slowly
+                                                        * and never exceeds it, which gives nice smooth results. */
+                                                       float mask_accum = projPixel->mask_accum;
+
+                                                       if (ps->is_maskbrush) {
+                                                               float texmask = BKE_brush_sample_masktex(ps->scene, ps->brush, projPixel->projCoSS, thread_index, pool);
+                                                               CLAMP(texmask, 0.0f, 1.0f);
+                                                               mask = mask_accum + (brush_alpha * texmask * 65535.0f - mask_accum) * mask;
+                                                       }
+                                                       else {
+                                                               mask = mask_accum + (brush_alpha * 65535.0f - mask_accum) * mask;
+                                                       }
+                                                       mask_short = (unsigned short)mask;
+
+                                                       if (mask_short > projPixel->mask_accum) {
+                                                               projPixel->mask_accum = mask_short;
+                                                               mask = mask_short * (1.0f / 65535.0f);
+                                                       }
+                                                       else {
+                                                               /* Go onto the next pixel */
+                                                               continue;
+                                                       }
                                                }
                                                else {
-                                                       copy_v2_v2(samplecos, projPixel->projCoSS);
-                                                       samplecos[2] = 0.0f;
+                                                       mask *= brush_alpha;
+                                                       if (ps->is_maskbrush) {
+                                                               float texmask = BKE_brush_sample_masktex(ps->scene, ps->brush, projPixel->projCoSS, thread_index, pool);
+                                                               CLAMP(texmask, 0.0f, 1.0f);
+                                                               mask *= texmask;
+                                                       }
                                                }
-                                       }
 
-                                       if (falloff > 0.0f) {
                                                if (ps->is_texbrush) {
+                                                       MTex *mtex = &brush->mtex;
+                                                       float samplecos[3];
+                                                       float texrgba[4];
+
+                                                       /* taking 3d copy to account for 3D mapping too. It gets concatenated during sampling */
+                                                       if (mtex->brush_map_mode == MTEX_MAP_MODE_3D) {
+                                                               copy_v3_v3(samplecos, projPixel->worldCoSS);
+                                                       }
+                                                       else {
+                                                               copy_v2_v2(samplecos, projPixel->projCoSS);
+                                                               samplecos[2] = 0.0f;
+                                                       }
+
                                                        /* note, for clone and smear, we only use the alpha, could be a special function */
-                                                       BKE_brush_sample_tex_3D(ps->scene, brush, samplecos, rgba, thread_index, pool);
-                                                       alpha = rgba[3];
-                                               }
-                                               else {
-                                                       alpha = 1.0f;
-                                               }
+                                                       BKE_brush_sample_tex_3D(ps->scene, brush, samplecos, texrgba, thread_index, pool);
 
-                                               if (ps->is_maskbrush) {
-                                                       alpha *= BKE_brush_sample_masktex(ps->scene, ps->brush, projPixel->projCoSS, thread_index, pool);
+                                                       copy_v3_v3(texrgb, texrgba);
+                                                       mask *= texrgba[3];
                                                }
 
-                                               if (!ps->do_masking) {
-                                                       /* for an aurbrush there is no real mask, so just multiply the alpha by it */
-                                                       alpha *= falloff * BKE_brush_alpha_get(ps->scene, brush);
-                                                       mask = ((float)projPixel->mask) / 65535.0f;
+                                               if (ps->is_maskbrush_tiled) {
+                                                       mask *= BKE_brush_sample_masktex(ps->scene, ps->brush, projPixel->projCoSS, thread_index, pool);
                                                }
-                                               else {
-                                                       /* This brush dosnt accumulate so add some curve to the brushes falloff */
-                                                       falloff = 1.0f - falloff;
-                                                       falloff = 1.0f - (falloff * falloff);
-
-                                                       mask_short = (unsigned short)(projPixel->mask * (BKE_brush_alpha_get(ps->scene, brush) * falloff));
-                                                       if (mask_short > projPixel->mask_max) {
-                                                               mask = ((float)mask_short) / 65535.0f;
-                                                               projPixel->mask_max = mask_short;
-                                                       }
-                                                       else {
-                                                               /*mask = ((float)projPixel->mask_max)/65535.0f;*/
 
-                                                               /* Go onto the next pixel */
-                                                               continue;
-                                                       }
-                                               }
+                                               /* extra mask for normal, layer stencil, .. */
+                                               mask *= ((float)projPixel->mask) * (1.0f / 65535.0f);
 
-                                               if (alpha > 0.0f) {
+                                               if (mask > 0.0f) {
 
                                                        /* copy of code above */
                                                        if (last_index != projPixel->image_index) {
@@ -3952,33 +4007,25 @@ static void *do_projectpaint_thread(void *ph_v)
                                                        last_partial_redraw_cell->x2 = max_ii(last_partial_redraw_cell->x2, (int)projPixel->x_px + 1);
                                                        last_partial_redraw_cell->y2 = max_ii(last_partial_redraw_cell->y2, (int)projPixel->y_px + 1);
 
-
+                                                       /* texrgb is not used for clone, smear or soften */
                                                        switch (tool) {
                                                                case PAINT_TOOL_CLONE:
-                                                                       if (is_floatbuf) {
-                                                                               if (((ProjPixelClone *)projPixel)->clonepx.f[3]) {
-                                                                                       do_projectpaint_clone_f(ps, projPixel, alpha, mask); /* rgba isn't used for cloning, only alpha */
-                                                                               }
-                                                                       }
-                                                                       else {
-                                                                               if (((ProjPixelClone *)projPixel)->clonepx.ch[3]) {
-                                                                                       do_projectpaint_clone(ps, projPixel, alpha, mask); /* rgba isn't used for cloning, only alpha */
-                                                                               }
-                                                                       }
+                                                                       if (is_floatbuf) do_projectpaint_clone_f(ps, projPixel, mask);
+                                                                       else             do_projectpaint_clone(ps, projPixel, mask);
                                                                        break;
                                                                case PAINT_TOOL_SMEAR:
                                                                        sub_v2_v2v2(co, projPixel->projCoSS, pos_ofs);
 
-                                                                       if (is_floatbuf) do_projectpaint_smear_f(ps, projPixel, alpha, mask, smearArena, &smearPixels_f, co);
-                                                                       else do_projectpaint_smear(ps, projPixel, alpha, mask, smearArena, &smearPixels, co);
+                                                                       if (is_floatbuf) do_projectpaint_smear_f(ps, projPixel, mask, smearArena, &smearPixels_f, co);
+                                                                       else             do_projectpaint_smear(ps, projPixel, mask, smearArena, &smearPixels, co);
                                                                        break;
                                                                case PAINT_TOOL_SOFTEN:
-                                                                       if (is_floatbuf) do_projectpaint_soften_f(ps, projPixel, alpha, mask, softenArena, &softenPixels_f);
-                                                                       else do_projectpaint_soften(ps, projPixel, alpha, mask, softenArena, &softenPixels);
+                                                                       if (is_floatbuf) do_projectpaint_soften_f(ps, projPixel, mask, softenArena, &softenPixels_f);
+                                                                       else             do_projectpaint_soften(ps, projPixel, mask, softenArena, &softenPixels);
                                                                        break;
                                                                default:
-                                                                       if (is_floatbuf) do_projectpaint_draw_f(ps, projPixel, rgba, alpha, mask);
-                                                                       else do_projectpaint_draw(ps, projPixel, rgba, alpha, mask);
+                                                                       if (is_floatbuf) do_projectpaint_draw_f(ps, projPixel, texrgb, mask);
+                                                                       else             do_projectpaint_draw(ps, projPixel, texrgb, mask);
                                                                        break;
                                                        }
                                                }
@@ -4103,43 +4150,36 @@ static int project_paint_op(void *state, const float lastpos[2], const float pos
 }
 
 
-int paint_proj_stroke(bContext *C, void *pps, const int prevmval_i[2], const int mval_i[2])
+void paint_proj_stroke(bContext *C, void *pps, const float prev_pos[2], const float pos[2])
 {
        ProjPaintState *ps = pps;
-       int a, redraw;
-       float pos[2], prev_pos[2];
-
-       pos[0] = (float)(mval_i[0]);
-       pos[1] = (float)(mval_i[1]);
-
-       prev_pos[0] = (float)(prevmval_i[0]);
-       prev_pos[1] = (float)(prevmval_i[1]);
+       int a;
 
        /* clone gets special treatment here to avoid going through image initialization */
        if (ps->tool == PAINT_TOOL_CLONE && ps->mode == BRUSH_STROKE_INVERT) {
                Scene *scene = ps->scene;
                View3D *v3d = ps->v3d;
                float *cursor = give_cursor(scene, v3d);
+               int mval_i[2] = {(int)pos[0], (int)pos[1]};
 
                view3d_operator_needs_opengl(C);
 
                if (!ED_view3d_autodist(scene, ps->ar, v3d, mval_i, cursor, false))
-                       return 0;
+                       return;
 
                ED_region_tag_redraw(ps->ar);
 
-               return 0;
+               return;
        }
 
-       for (a = 0; a < ps->image_tot; a++)
-               partial_redraw_array_init(ps->projImages[a].partRedrawRect);
-
-       redraw = project_paint_op(ps, prev_pos, pos) ? 1 : 0;
-
-       if (project_image_refresh_tagged(ps))
-               return redraw;
+       /* continue adding to existing partial redraw rects until redraw */
+       if (!ps->need_redraw) {
+               for (a = 0; a < ps->image_tot; a++)
+                       partial_redraw_array_init(ps->projImages[a].partRedrawRect);
+       }
 
-       return 0;
+       if (project_paint_op(ps, prev_pos, pos))
+               ps->need_redraw = true;
 }
 
 
@@ -4151,24 +4191,35 @@ static void project_state_init(bContext *C, Object *ob, ProjPaintState *ps, int
 
        /* brush */
        ps->mode = mode;
-       ps->brush = paint_brush(&settings->imapaint.paint);
+       ps->brush = BKE_paint_brush(&settings->imapaint.paint);
        if (ps->brush) {
                Brush *brush = ps->brush;
                ps->tool = brush->imagepaint_tool;
                ps->blend = brush->blend;
 
                /* disable for 3d mapping also because painting on mirrored mesh can create "stripes" */
-               ps->do_masking = (brush->flag & BRUSH_AIRBRUSH || (brush->mtex.tex &&
-                                !ELEM(brush->mtex.brush_map_mode, MTEX_MAP_MODE_TILED, MTEX_MAP_MODE_STENCIL)))
+               ps->do_masking = (brush->flag & BRUSH_AIRBRUSH ||
+                                 (brush->imagepaint_tool == PAINT_TOOL_SMEAR) ||
+                                 (brush->mtex.tex && !ELEM3(brush->mtex.brush_map_mode, MTEX_MAP_MODE_TILED, MTEX_MAP_MODE_STENCIL, MTEX_MAP_MODE_3D)))
                                 ? false : true;
                ps->is_texbrush = (brush->mtex.tex && brush->imagepaint_tool == PAINT_TOOL_DRAW) ? true : false;
-               ps->is_maskbrush = (brush->mask_mtex.tex) ? true : false;
+               ps->is_maskbrush = false;
+               ps->is_maskbrush_tiled = false;
+               if (brush->mask_mtex.tex) {
+                       if (ELEM(brush->mask_mtex.brush_map_mode, MTEX_MAP_MODE_STENCIL, MTEX_MAP_MODE_TILED)) {
+                               ps->is_maskbrush_tiled = true;
+                       }
+                       else {
+                               ps->is_maskbrush = true;
+                       }
+               }
        }
        else {
                /* brush may be NULL*/
                ps->do_masking = false;
                ps->is_texbrush = false;
                ps->is_maskbrush = false;
+               ps->is_maskbrush_tiled = false;
        }
 
        /* sizeof(ProjPixel), since we alloc this a _lot_ */
@@ -4218,7 +4269,7 @@ static void project_state_init(bContext *C, Object *ob, ProjPaintState *ps, int
        return;
 }
 
-void *paint_proj_new_stroke(bContext *C, Object *ob, const int mouse[2], int mode)
+void *paint_proj_new_stroke(bContext *C, Object *ob, const float mouse[2], int mode)
 {
        ProjPaintState *ps = MEM_callocN(sizeof(ProjPaintState), "ProjectionPaintState");
        project_state_init(C, ob, ps, mode);
@@ -4259,6 +4310,28 @@ void *paint_proj_new_stroke(bContext *C, Object *ob, const int mouse[2], int mod
        return ps;
 }
 
+void paint_proj_redraw(const bContext *C, void *pps, bool final)
+{
+       ProjPaintState *ps = pps;
+
+       if (ps->need_redraw) {
+               project_image_refresh_tagged(ps);
+
+               ps->need_redraw = false;
+       }
+       else if (!final) {
+               return;
+       }
+
+       if (final) {
+               /* compositor listener deals with updating */
+               WM_event_add_notifier(C, NC_IMAGE | NA_EDITED, NULL);
+       }
+       else {
+               ED_region_tag_redraw(CTX_wm_region(C));
+       }
+}
+
 void paint_proj_stroke_done(void *pps)
 {
        ProjPaintState *ps = pps;
@@ -4331,6 +4404,7 @@ static int texture_paint_camera_project_exec(bContext *C, wmOperator *op)
        /* override */
        ps.is_texbrush = false;
        ps.is_maskbrush = false;
+       ps.is_maskbrush_tiled = false;
        ps.do_masking = false;
        orig_brush_size = BKE_brush_size_get(scene, ps.brush);
        BKE_brush_size_set(scene, ps.brush, 32); /* cover the whole image */
@@ -4411,7 +4485,7 @@ static int texture_paint_image_from_view_exec(bContext *C, wmOperator *op)
 
        RNA_string_get(op->ptr, "filepath", filename);
 
-       glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxsize);
+       maxsize = GPU_max_texture_size();
 
        if (w > maxsize) w = maxsize;
        if (h > maxsize) h = maxsize;