Compositor:
authorJeroen Bakker <j.bakker@atmind.nl>
Wed, 11 Jul 2012 20:51:00 +0000 (20:51 +0000)
committerJeroen Bakker <j.bakker@atmind.nl>
Wed, 11 Jul 2012 20:51:00 +0000 (20:51 +0000)
re-optimized the Defocus node.
 * localized MemoryBuffers
 * removed read(x,y) calls
 * shuffled some lines in the execute pixel
 * added a readNoCheck function to the memorybuffer (only use this when
you are certain you are reading a pixel inside the memorybuffer.

source/blender/compositor/intern/COM_MemoryBuffer.h
source/blender/compositor/operations/COM_OpenCLKernels.cl
source/blender/compositor/operations/COM_OpenCLKernels.cl.h
source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.cpp
source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.h

index 51a45efc05180cfc760268a425060e89cb205392..eed0c796cd8dbf017221518a9ee57fc10dc4fc3d 100644 (file)
@@ -140,6 +140,13 @@ public:
                }
        }
 
+       inline void readNoCheck(float result[4], int x, int y)  {
+               const int dx = x - this->m_rect.xmin;
+               const int dy = y - this->m_rect.ymin;
+               const int offset = (this->m_chunkWidth * dy + dx) * COM_NUMBER_OF_CHANNELS;
+               copy_v4_v4(result, &this->m_buffer[offset]);
+       }
+       
        void writePixel(int x, int y, const float color[4]);
        void addPixel(int x, int y, const float color[4]);
        inline void readCubic(float result[4], float x, float y)
index 41838e41fbaaac3272baa3c02585c8d8ef9893f5..cbbb4d0b3f270e17324a76dc0d6b89eecbaa2934 100644 (file)
@@ -101,16 +101,16 @@ __kernel void defocusKernel(__read_only image2d_t inputImage, __read_only image2
                float size = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;
                color_accum = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate);
 
-               for (int ny = miny; ny < maxy; ny += step) {
-                       for (int nx = minx; nx < maxx; nx += step) {
-                               if (nx >= 0 && nx < dimension.s0 && ny >= 0 && ny < dimension.s1) {
-                                       inputCoordinate.s0 = nx - offsetInput.s0;
-                                       inputCoordinate.s1 = ny - offsetInput.s1;
-                                       tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;
-                                       if (size > threshold && tempSize > threshold) {
-                                               float dx = nx - realCoordinate.s0;
-                                               float dy = ny - realCoordinate.s1;
-                                               if (dx != 0 || dy != 0) {
+               if (size > threshold) {
+                       for (int ny = miny; ny < maxy; ny += step) {
+                               inputCoordinate.s1 = ny - offsetInput.s1;
+                               float dy = ny - realCoordinate.s1;
+                               for (int nx = minx; nx < maxx; nx += step) {
+                                       float dx = nx - realCoordinate.s0;
+                                       if (dx != 0 || dy != 0) {
+                                               inputCoordinate.s0 = nx - offsetInput.s0;
+                                               tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;
+                                               if (tempSize > threshold) {
                                                        if (tempSize >= fabs(dx) && tempSize >= fabs(dy)) {
                                                                float2 uv = { 256.0f + dx * 256.0f / tempSize, 256.0f + dy * 256.0f / tempSize};
                                                                bokeh = read_imagef(bokehImage, SAMPLER_NEAREST, uv);
@@ -121,8 +121,8 @@ __kernel void defocusKernel(__read_only image2d_t inputImage, __read_only image2
                                                }
                                        }
                                }
-                       }
-               } 
+                       } 
+               }
        }
 
        color = color_accum * (1.0f / multiplier_accum);
index d57aa1366de7492ed392d23b17cc03e8ee820de5..cc18039c5b1dd449a0153d430eec3f657a392c50 100644 (file)
@@ -103,16 +103,16 @@ const char * clkernelstoh_COM_OpenCLKernels_cl = "/*\n" \
 "              float size = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;\n" \
 "              color_accum = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate);\n" \
 "\n" \
-"              for (int ny = miny; ny < maxy; ny += step) {\n" \
-"                      for (int nx = minx; nx < maxx; nx += step) {\n" \
-"                              if (nx >= 0 && nx < dimension.s0 && ny >= 0 && ny < dimension.s1) {\n" \
-"                                      inputCoordinate.s0 = nx - offsetInput.s0;\n" \
-"                                      inputCoordinate.s1 = ny - offsetInput.s1;\n" \
-"                                      tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;\n" \
-"                                      if (size > threshold && tempSize > threshold) {\n" \
-"                                              float dx = nx - realCoordinate.s0;\n" \
-"                                              float dy = ny - realCoordinate.s1;\n" \
-"                                              if (dx != 0 || dy != 0) {\n" \
+"              if (size > threshold) {\n" \
+"                      for (int ny = miny; ny < maxy; ny += step) {\n" \
+"                              inputCoordinate.s1 = ny - offsetInput.s1;\n" \
+"                              float dy = ny - realCoordinate.s1;\n" \
+"                              for (int nx = minx; nx < maxx; nx += step) {\n" \
+"                                      float dx = nx - realCoordinate.s0;\n" \
+"                                      if (dx != 0 || dy != 0) {\n" \
+"                                              inputCoordinate.s0 = nx - offsetInput.s0;\n" \
+"                                              tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;\n" \
+"                                              if (tempSize > threshold) {\n" \
 "                                                      if (tempSize >= fabs(dx) && tempSize >= fabs(dy)) {\n" \
 "                                                              float2 uv = { 256.0f + dx * 256.0f / tempSize, 256.0f + dy * 256.0f / tempSize};\n" \
 "                                                              bokeh = read_imagef(bokehImage, SAMPLER_NEAREST, uv);\n" \
index 61538fde2589aa56eb2dbc565927dee3746e3f15..5d17526185b098a87b7a9aaac75afa87ac65ecb2 100644 (file)
@@ -62,8 +62,29 @@ void VariableSizeBokehBlurOperation::initExecution()
        QualityStepHelper::initExecution(COM_QH_INCREASE);
 }
 
+void *VariableSizeBokehBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers)
+{
+       MemoryBuffer** result = new MemoryBuffer*[3];
+       result[0] = (MemoryBuffer*)this->m_inputProgram->initializeTileData(rect, memoryBuffers);
+       result[1] = (MemoryBuffer*)this->m_inputBokehProgram->initializeTileData(rect, memoryBuffers);
+       result[2] = (MemoryBuffer*)this->m_inputSizeProgram->initializeTileData(rect, memoryBuffers);
+       return result;
+}
+
+void VariableSizeBokehBlurOperation::deinitializeTileData(rcti *rect, MemoryBuffer **memoryBuffers, void *data)
+{
+       MemoryBuffer** result = (MemoryBuffer**)data;
+       delete[] result;
+}
+
 void VariableSizeBokehBlurOperation::executePixel(float *color, int x, int y, MemoryBuffer *inputBuffers[], void *data)
 {
+       MemoryBuffer** buffers = (MemoryBuffer**)data;
+       MemoryBuffer* inputProgramBuffer = buffers[0];
+       MemoryBuffer* inputBokehBuffer = buffers[1];
+       MemoryBuffer* inputSizeBuffer = buffers[2];
+       float* inputSizeFloatBuffer = inputSizeBuffer->getBuffer();
+       float* inputProgramFloatBuffer = inputProgramBuffer->getBuffer();
        float readColor[4];
        float bokeh[4];
        float tempSize[4];
@@ -84,32 +105,37 @@ void VariableSizeBokehBlurOperation::executePixel(float *color, int x, int y, Me
        int maxy = MIN2(y + this->m_maxBlur, m_height);
 #endif
        {
-               this->m_inputSizeProgram->read(tempSize, x, y, COM_PS_NEAREST, inputBuffers);
-               this->m_inputProgram->read(readColor, x, y, COM_PS_NEAREST, inputBuffers);
+               inputSizeBuffer->readNoCheck(tempSize, x, y);
+               inputProgramBuffer->readNoCheck(readColor, x, y);
+
                add_v4_v4(color_accum, readColor);
                add_v4_fl(multiplier_accum, 1.0f);
                float sizeCenter = tempSize[0];
                
-               for (int ny = miny; ny < maxy; ny += QualityStepHelper::getStep()) {
-                       for (int nx = minx; nx < maxx; nx += QualityStepHelper::getStep()) {
-                               if (nx >= 0 && nx < this->getWidth() && ny >= 0 && ny < getHeight()) {
-                                       this->m_inputSizeProgram->read(tempSize, nx, ny, COM_PS_NEAREST, inputBuffers);
-                                       float size = tempSize[0];
-                                       float fsize = fabsf(size);
-                                       if (sizeCenter > this->m_threshold && size > this->m_threshold) {
-                                               float dx = nx - x;
-                                               float dy = ny - y;
-                                               if (nx == x && ny == y) {
-                                               }
-                                               else if (fsize > fabsf(dx) && fsize > fabsf(dy)) {
-                                                       float u = (256 + (dx/size) * 256);
-                                                       float v = (256 + (dy/size) * 256);
-                                                       this->m_inputBokehProgram->read(bokeh, u, v, COM_PS_NEAREST, inputBuffers);
-                                                       this->m_inputProgram->read(readColor, nx, ny, COM_PS_NEAREST, inputBuffers);
-                                                       madd_v4_v4v4(color_accum, bokeh, readColor);
-                                                       add_v4_v4(multiplier_accum, bokeh);
+               const int addXStep = QualityStepHelper::getStep()*COM_NUMBER_OF_CHANNELS;
+               
+               if (sizeCenter > this->m_threshold) {
+                       for (int ny = miny; ny < maxy; ny += QualityStepHelper::getStep()) {
+                               float dy = ny - y;
+                               int offsetNy = ny * inputSizeBuffer->getWidth() * COM_NUMBER_OF_CHANNELS;
+                               int offsetNxNy = offsetNy + (minx*COM_NUMBER_OF_CHANNELS);
+                               for (int nx = minx; nx < maxx; nx += QualityStepHelper::getStep()) {
+                                       if (nx != x || ny != y) 
+                                       {
+                                               float size = inputSizeFloatBuffer[offsetNxNy];
+                                               if (size > this->m_threshold) {
+                                                       float fsize = fabsf(size);
+                                                       float dx = nx - x;
+                                                       if (fsize > fabsf(dx) && fsize > fabsf(dy)) {
+                                                               float u = (256.0f + (dx/size) * 256.0f);
+                                                               float v = (256.0f + (dy/size) * 256.0f);
+                                                               inputBokehBuffer->readNoCheck(bokeh, u, v);
+                                                               madd_v4_v4v4(color_accum, bokeh, &inputProgramFloatBuffer[offsetNxNy]);
+                                                               add_v4_v4(multiplier_accum, bokeh);
+                                                       }
                                                }
                                        }
+                                       offsetNxNy += addXStep;
                                }
                        }
                }
index 6c9196c3eab57736ca1b48dfd87660bdd4ed2902..0ecfb5a542c1e8c720e70d70182e6da499409dac 100644 (file)
@@ -50,6 +50,10 @@ public:
         */
        void initExecution();
        
+       void *initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers);
+       
+       void deinitializeTileData(rcti *rect, MemoryBuffer **memoryBuffers, void *data);
+       
        /**
         * Deinitialize the execution
         */