Optimization for speed regression in mipmap generation
authorSergey Sharybin <sergey.vfx@gmail.com>
Fri, 4 Jan 2013 17:28:58 +0000 (17:28 +0000)
committerSergey Sharybin <sergey.vfx@gmail.com>
Fri, 4 Jan 2013 17:28:58 +0000 (17:28 +0000)
Regression was caused by alpha premul cleanup commit and the reason
of slowdown was uchar <-> float conversion which is slow.

Replaced with uchar <-> int conversion which seeps to be accurate
enough and mostly eliminates slowdown.

Slowdown was easy to notice when movie clip is used for 3d vierport
background and undistortion is enabled. In this case every frame
will re-calculate mipmaps.

It's still a nit slower than mipmap generation before cleanup
commit, but couldn't think about extra boost here atm.

source/blender/blenlib/BLI_math_color.h
source/blender/blenlib/intern/math_color_inline.c
source/blender/imbuf/intern/scaling.c

index 3831ec3cbb4110f3a7054822d50b174de6d05fc5..1af6985deaaab73905be4e23ac34773d53831338 100644 (file)
@@ -106,6 +106,8 @@ MINLINE void premul_to_straight_v4(float straight[4], const float premul[4]);
 MINLINE void straight_to_premul_v4(float straight[4], const float premul[4]);
 MINLINE void straight_uchar_to_premul_float(float result[4], const unsigned char color[4]);
 MINLINE void premul_float_to_straight_uchar(unsigned char *result, const float color[4]);
+MINLINE void straight_uchar_to_premul_int(int result[4], const unsigned char color[4]);
+MINLINE void premul_int_to_straight_uchar(unsigned char *result, const int color[4]);
 
 /************************** Other *************************/
 
index b8eeca50db665f7501ac84df6c40b31cc0072410..f8bb6b81a1d7f197e6083986ca03b5798161ed9e 100644 (file)
@@ -326,4 +326,32 @@ MINLINE void premul_float_to_straight_uchar(unsigned char *result, const float c
        }
 }
 
+MINLINE void straight_uchar_to_premul_int(int result[4], const unsigned char color[4])
+{
+       int alpha = color[3];
+
+       result[0] = (color[0] * alpha) / 255;
+       result[1] = (color[1] * alpha) / 255;
+       result[2] = (color[2] * alpha) / 255;
+       result[3] = alpha;
+}
+
+MINLINE void premul_int_to_straight_uchar(unsigned char *result, const int color[4])
+{
+       if (color[3] == 0 || color[3] == 255) {
+               result[0] = color[0];
+               result[1] = color[1];
+               result[2] = color[2];
+               result[3] = color[3];
+       }
+       else {
+               int alpha = color[3];
+
+               result[0] = color[0] * 255 / alpha;
+               result[0] = color[1] * 255 / alpha;
+               result[0] = color[2] * 255 / alpha;
+               result[3] = alpha;
+       }
+}
+
 #endif /* __MATH_COLOR_INLINE_C__ */
index 75d1f4c412db013898207e7eea602447fc856fc3..8d3cd648fc1e2edebe07a693ec91cd3357785bc2 100644 (file)
@@ -311,19 +311,19 @@ void imb_onehalf_no_alloc(struct ImBuf *ibuf2, struct ImBuf *ibuf1)
                for (y = ibuf2->y; y > 0; y--) {
                        cp2 = cp1 + (ibuf1->x << 2);
                        for (x = ibuf2->x; x > 0; x--) {
-                               float p1f[8], p2f[8], destf[4];
+                               int p1i[8], p2i[8], desti[4];
 
-                               straight_uchar_to_premul_float(p1f, cp1);
-                               straight_uchar_to_premul_float(p2f, cp2);
-                               straight_uchar_to_premul_float(p1f + 4, cp1 + 4);
-                               straight_uchar_to_premul_float(p2f + 4, cp2 + 4);
+                               straight_uchar_to_premul_int(p1i, cp1);
+                               straight_uchar_to_premul_int(p2i, cp2);
+                               straight_uchar_to_premul_int(p1i + 4, cp1 + 4);
+                               straight_uchar_to_premul_int(p2i + 4, cp2 + 4);
 
-                               destf[0] = 0.25f * (p1f[0] + p2f[0] + p1f[4] + p2f[4]);
-                               destf[1] = 0.25f * (p1f[1] + p2f[1] + p1f[5] + p2f[5]);
-                               destf[2] = 0.25f * (p1f[2] + p2f[2] + p1f[6] + p2f[6]);
-                               destf[3] = 0.25f * (p1f[3] + p2f[3] + p1f[7] + p2f[7]);
+                               desti[0] = (p1i[0] + p2i[0] + p1i[4] + p2i[4]) >> 2;
+                               desti[1] = (p1i[1] + p2i[1] + p1i[5] + p2i[5]) >> 2;
+                               desti[2] = (p1i[2] + p2i[2] + p1i[6] + p2i[6]) >> 2;
+                               desti[3] = (p1i[3] + p2i[3] + p1i[7] + p2i[7]) >> 2;
 
-                               premul_float_to_straight_uchar(dest, destf);
+                               premul_int_to_straight_uchar(dest, desti);
 
                                cp1 += 8;
                                cp2 += 8;