Make scopes update multi-threaded
authorSergey Sharybin <sergey.vfx@gmail.com>
Tue, 19 May 2015 15:18:26 +0000 (20:18 +0500)
committerSergey Sharybin <sergey.vfx@gmail.com>
Thu, 11 Jun 2015 21:37:49 +0000 (23:37 +0200)
This commits makes scopes evaluation multithreaded by using OpenMP
for the outer loop of pixel processor. it also makes all the changes
needed for keeping performance as high as possible by keeping data
local to thread for as long as it's possible.

Reviewers: campbellbarton

Differential Revision: https://developer.blender.org/D1334

source/blender/blenkernel/intern/colortools.c

index 96488d021ce2b1a7b47f72181cac7fea13d52ad8..0af809e79f8bd0ddffccb26deeebd4ffbaf851a2 100644 (file)
@@ -43,6 +43,7 @@
 #include "BLI_blenlib.h"
 #include "BLI_math.h"
 #include "BLI_utildefines.h"
+#include "BLI_threads.h"
 
 #include "BKE_colortools.h"
 #include "BKE_curve.h"
 #include "IMB_colormanagement.h"
 #include "IMB_imbuf_types.h"
 
+#ifdef _OPENMP
+#  include <omp.h>
+#endif
+
 /* ********************************* color curve ********************* */
 
 /* ***************** operations on full struct ************* */
@@ -1029,11 +1034,25 @@ void BKE_histogram_update_sample_line(Histogram *hist, ImBuf *ibuf, const ColorM
 void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *view_settings,
                    const ColorManagedDisplaySettings *display_settings)
 {
+#ifdef _OPENMP
+       const int num_threads = BLI_system_thread_count();
+#else
+       const int num_threads = 1;
+#endif
        int a, y;
        unsigned int nl, na, nr, ng, nb;
        double divl, diva, divr, divg, divb;
        unsigned char *display_buffer;
-       unsigned int *bin_lum, *bin_r, *bin_g, *bin_b, *bin_a;
+       unsigned int bin_lum[256] = {0},
+                    bin_r[256] = {0},
+                    bin_g[256] = {0},
+                    bin_b[256] = {0},
+                    bin_a[256] = {0};
+       unsigned int bin_lum_t[BLENDER_MAX_THREADS][256] = {{0}},
+                    bin_r_t[BLENDER_MAX_THREADS][256] = {{0}},
+                    bin_g_t[BLENDER_MAX_THREADS][256] = {{0}},
+                    bin_b_t[BLENDER_MAX_THREADS][256] = {{0}},
+                    bin_a_t[BLENDER_MAX_THREADS][256] = {{0}};
        int ycc_mode = -1;
        const bool is_float = (ibuf->rect_float != NULL);
        void *cache_handle = NULL;
@@ -1068,13 +1087,6 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
                        break;
        }
 
-       /* temp table to count pix value for histogram */
-       bin_r     = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins");
-       bin_g     = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins");
-       bin_b     = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins");
-       bin_a = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins");
-       bin_lum   = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins");
-
        /* convert to number of lines with logarithmic scale */
        scopes->sample_lines = (scopes->accuracy * 0.01f) * (scopes->accuracy * 0.01f) * ibuf->y;
        
@@ -1114,9 +1126,19 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
                                                                             &cache_handle);
        }
 
+       /* Keep number of threads in sync with the merge parts below. */
+#pragma omp parallel for private(y) schedule(static) num_threads(num_threads) if(ibuf->y > 256)
        for (y = 0; y < ibuf->y; y++) {
+#ifdef _OPENMP
+               const int thread_idx = omp_get_thread_num();
+#else
+               const int thread_idx = 0;
+#endif
                const float *rf = NULL;
-               unsigned char *rc = NULL;
+               const unsigned char *rc = NULL;
+               const bool do_sample_line = (y % rows_per_sample_line) == 0;
+               float min[3] = { FLT_MAX,  FLT_MAX,  FLT_MAX},
+                     max[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
                int x, c;
                if (is_float)
                        rf = ibuf->rect_float + ((size_t)y) * ibuf->x * ibuf->channels;
@@ -1140,27 +1162,27 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
                        /* check for min max */
                        if (ycc_mode == -1) {
                                for (c = 0; c < 3; c++) {
-                                       if (rgba[c] < scopes->minmax[c][0]) scopes->minmax[c][0] = rgba[c];
-                                       if (rgba[c] > scopes->minmax[c][1]) scopes->minmax[c][1] = rgba[c];
+                                       if (rgba[c] < min[c]) min[c] = rgba[c];
+                                       if (rgba[c] > max[c]) max[c] = rgba[c];
                                }
                        }
                        else {
                                rgb_to_ycc(rgba[0], rgba[1], rgba[2], &ycc[0], &ycc[1], &ycc[2], ycc_mode);
                                for (c = 0; c < 3; c++) {
                                        ycc[c] *= INV_255;
-                                       if (ycc[c] < scopes->minmax[c][0]) scopes->minmax[c][0] = ycc[c];
-                                       if (ycc[c] > scopes->minmax[c][1]) scopes->minmax[c][1] = ycc[c];
+                                       if (ycc[c] < min[c]) min[c] = ycc[c];
+                                       if (ycc[c] > max[c]) max[c] = ycc[c];
                                }
                        }
                        /* increment count for histo*/
-                       bin_lum[get_bin_float(luma)] += 1;
-                       bin_r[get_bin_float(rgba[0])] += 1;
-                       bin_g[get_bin_float(rgba[1])] += 1;
-                       bin_b[get_bin_float(rgba[2])] += 1;
-                       bin_a[get_bin_float(rgba[3])] += 1;
+                       bin_lum_t[thread_idx][get_bin_float(luma)] += 1;
+                       bin_r_t[thread_idx][get_bin_float(rgba[0])] += 1;
+                       bin_g_t[thread_idx][get_bin_float(rgba[1])] += 1;
+                       bin_b_t[thread_idx][get_bin_float(rgba[2])] += 1;
+                       bin_a_t[thread_idx][get_bin_float(rgba[3])] += 1;
 
                        /* save sample if needed */
-                       if (y % rows_per_sample_line == 0) {
+                       if (do_sample_line) {
                                const float fx = (float)x / (float)ibuf->x;
                                const int savedlines = y / rows_per_sample_line;
                                const int idx = 2 * (ibuf->x * savedlines + x);
@@ -1170,16 +1192,46 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
                        rf += ibuf->channels;
                        rc += ibuf->channels;
                }
+#pragma omp critical
+               {
+                       for (c = 0; c < 3; c++) {
+                               if (min[c] < scopes->minmax[c][0]) scopes->minmax[c][0] = min[c];
+                               if (max[c] > scopes->minmax[c][1]) scopes->minmax[c][1] = max[c];
+                       }
+               }
+       }
+
+#ifdef _OPENMP
+       if (ibuf->y > 256) {
+               for (a = 0; a < num_threads; a++) {
+                       int b;
+                       for (b = 0; b < 256; b++) {
+                               bin_lum[b] += bin_lum_t[a][b];
+                               bin_r[b] += bin_r_t[a][b];
+                               bin_g[b] += bin_g_t[a][b];
+                               bin_b[b] += bin_b_t[a][b];
+                               bin_a[b] += bin_a_t[a][b];
+                       }
+               }
+       }
+       else
+#endif
+       {
+               memcpy(bin_lum, bin_lum_t[0], sizeof(bin_lum));
+               memcpy(bin_r, bin_r_t[0], sizeof(bin_r));
+               memcpy(bin_g, bin_g_t[0], sizeof(bin_g));
+               memcpy(bin_b, bin_b_t[0], sizeof(bin_b));
+               memcpy(bin_a, bin_a_t[0], sizeof(bin_a));
        }
 
        /* test for nicer distribution even - non standard, leave it out for a while */
 #if 0
-       for (x = 0; x < 256; x++) {
-               bin_lum[x] = sqrt (bin_lum[x]);
-               bin_r[x] = sqrt(bin_r[x]);
-               bin_g[x] = sqrt(bin_g[x]);
-               bin_b[x] = sqrt(bin_b[x]);
-               bin_a[x] = sqrt(bin_a[x]);
+       for (a = 0; a < 256; a++) {
+               bin_lum[a] = sqrt (bin_lum[a]);
+               bin_r[a] = sqrt(bin_r[a]);
+               bin_g[a] = sqrt(bin_g[a]);
+               bin_b[a] = sqrt(bin_b[a]);
+               bin_a[a] = sqrt(bin_a[a]);
        }
 #endif
        
@@ -1205,11 +1257,6 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
                scopes->hist.data_b[a] = bin_b[a] * divb;
                scopes->hist.data_a[a] = bin_a[a] * diva;
        }
-       MEM_freeN(bin_lum);
-       MEM_freeN(bin_r);
-       MEM_freeN(bin_g);
-       MEM_freeN(bin_b);
-       MEM_freeN(bin_a);
 
        if (cm_processor)
                IMB_colormanagement_processor_free(cm_processor);