* Added OpenCL implementation of the Defocus node
authorJeroen Bakker <j.bakker@atmind.nl>
Fri, 6 Jul 2012 11:31:40 +0000 (11:31 +0000)
committerJeroen Bakker <j.bakker@atmind.nl>
Fri, 6 Jul 2012 11:31:40 +0000 (11:31 +0000)
 * Always disable two phase compositing during rendering

 - At Mind -

source/blender/compositor/intern/COM_compositor.cpp
source/blender/compositor/operations/COM_OpenCLKernels.cl
source/blender/compositor/operations/COM_OpenCLKernels.cl.h
source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.cpp
source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.h

index 9e48334bccac29a7fa1757a4db9ba0f3412d5722..ab64f8f7bf18f93113c816c6c9943885fe5ded78 100644 (file)
@@ -57,7 +57,7 @@ void COM_execute(RenderData *rd, bNodeTree *editingtree, int rendering)
        /* set progress bar to 0% and status to init compositing*/
        editingtree->progress(editingtree->prh, 0.0);
 
-       bool twopass = (editingtree->flag&NTREE_TWO_PASS) > 0 || rendering;
+       bool twopass = (editingtree->flag&NTREE_TWO_PASS) > 0 && !rendering;
        /* initialize execution system */
        if (twopass) {
                ExecutionSystem *system = new ExecutionSystem(rd, editingtree, rendering, twopass);
index 0f8e543de7fa2ffa24064118c767410dd39d9512..ce1979153600b071524840b3018ad6bfc88183c1 100644 (file)
@@ -51,6 +51,68 @@ __kernel void bokehBlurKernel(__read_only image2d_t boundingBox, __read_only ima
        write_imagef(output, coords, color);
 }
 
+//KERNEL --- DEFOCUS /VARIABLESIZEBOKEHBLUR ---
+__kernel void defocusKernel(__read_only image2d_t inputImage, __read_only image2d_t bokehImage, 
+                                        __read_only image2d_t inputDepth,  __read_only image2d_t inputSize,
+                                       __write_only image2d_t output, int2 offsetInput, int2 offsetOutput, 
+                                       int step, int maxBlur, float threshold, int2 dimension, int2 offset) 
+{
+       float4 color = {1.0f, 0.0f, 0.0f, 1.0f};
+       int2 coords = {get_global_id(0), get_global_id(1)};
+       coords += offset;
+       const int2 realCoordinate = coords + offsetOutput;
+
+       float4 readColor;
+       float4 bokeh;
+       float tempSize;
+       float tempDepth;
+       float4 multiplier_accum = {1.0f, 1.0f, 1.0f, 1.0f};
+       float4 color_accum;
+       
+       int minx = max(realCoordinate.s0 - maxBlur, 0);
+       int miny = max(realCoordinate.s1 - maxBlur, 0);
+       int maxx = min(realCoordinate.s0 + maxBlur, dimension.s0);
+       int maxy = min(realCoordinate.s1 + maxBlur, dimension.s1);
+       
+       {
+               int2 inputCoordinate = realCoordinate - offsetInput;
+               float size = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;
+               float depth = read_imagef(inputDepth, SAMPLER_NEAREST, inputCoordinate).s0 + threshold;
+               color_accum = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate);
+
+               for (int ny = miny; ny < maxy; ny += step) {
+                       for (int nx = minx; nx < maxx; nx += step) {
+                               if (nx >= 0 && nx < dimension.s0 && ny >= 0 && ny < dimension.s1) {
+                                       inputCoordinate.s0 = nx - offsetInput.s0;
+                                       inputCoordinate.s1 = ny - offsetInput.s1;
+                                       tempDepth = read_imagef(inputDepth, SAMPLER_NEAREST, inputCoordinate).s0;
+                                       if (tempDepth < depth) {
+                                               tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;
+                                               
+                                               if ((size > threshold && tempSize > threshold) || tempSize <= threshold) {
+                                                       float dx = nx - realCoordinate.s0;
+                                                       float dy = ny - realCoordinate.s1;
+                                                       if (dx != 0 || dy != 0) {
+                                                               if (tempSize >= fabs(dx) && tempSize >= fabs(dy)) {
+                                                                       float2 uv = { 256.0f + dx * 256.0f / tempSize, 256.0f + dy * 256.0f / tempSize};
+                                                                       bokeh = read_imagef(bokehImage, SAMPLER_NEAREST, uv);
+                                                                       readColor = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate);
+                                                                       color_accum += bokeh*readColor;
+                                                                       multiplier_accum += bokeh;
+                                                               }
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               } 
+       }
+
+       color = color_accum * (1.0f / multiplier_accum);
+       write_imagef(output, coords, color);
+}
+
+
 // KERNEL --- DILATE ---
 __kernel void dilateKernel(__read_only image2d_t inputImage,  __write_only image2d_t output,
                            int2 offsetInput, int2 offsetOutput, int scope, int distanceSquared, int2 dimension, 
index e064b7511cb152b76cea0a2d269a9fc06be1a4ba..ca66ab858028ae03aa8fe8c8828df8a7430f39fc 100644 (file)
@@ -16,7 +16,7 @@ const char * clkernelstoh_COM_OpenCLKernels_cl = "/// This file contains all ope
 "      coords += offset;\n" \
 "      float tempBoundingBox;\n" \
 "      float4 color = {0.0f,0.0f,0.0f,0.0f};\n" \
-"      float4 multiplier = {0.0f,0.0f,0.0f,0.0f};\n" \
+"      float4 multiplyer = {0.0f,0.0f,0.0f,0.0f};\n" \
 "      float4 bokeh;\n" \
 "      const float radius2 = radius*2.0f;\n" \
 "      const int2 realCoordinate = coords + offsetOutput;\n" \
@@ -40,10 +40,10 @@ const char * clkernelstoh_COM_OpenCLKernels_cl = "/// This file contains all ope
 "                              uv.x = ((realCoordinate.x-nx)/radius2)*bokehImageDim.x+bokehImageCenter.x;\n" \
 "                              bokeh = read_imagef(bokehImage, SAMPLER_NEAREST, uv);\n" \
 "                              color += bokeh * read_imagef(inputImage, SAMPLER_NEAREST, inputXy);\n" \
-"                              multiplier += bokeh;\n" \
+"                              multiplyer += bokeh;\n" \
 "                      }\n" \
 "              }\n" \
-"              color /= multiplier;\n" \
+"              color /= multiplyer;\n" \
 "\n" \
 "      } else {\n" \
 "              int2 imageCoordinates = realCoordinate - offsetInput;\n" \
@@ -53,6 +53,68 @@ const char * clkernelstoh_COM_OpenCLKernels_cl = "/// This file contains all ope
 "      write_imagef(output, coords, color);\n" \
 "}\n" \
 "\n" \
+"//KERNEL --- DEFOCUS /VARIABLESIZEBOKEHBLUR ---\n" \
+"__kernel void defocusKernel(__read_only image2d_t inputImage, __read_only image2d_t bokehImage,\n" \
+"                                       __read_only image2d_t inputDepth,  __read_only image2d_t inputSize,\n" \
+"                                      __write_only image2d_t output, int2 offsetInput, int2 offsetOutput,\n" \
+"                                      int step, int maxBlur, float threshold, int2 dimension, int2 offset)\n" \
+"{\n" \
+"      float4 color = {1.0f, 0.0f, 0.0f, 1.0f};\n" \
+"      int2 coords = {get_global_id(0), get_global_id(1)};\n" \
+"      coords += offset;\n" \
+"      const int2 realCoordinate = coords + offsetOutput;\n" \
+"\n" \
+"      float4 readColor;\n" \
+"      float4 bokeh;\n" \
+"      float tempSize;\n" \
+"      float tempDepth;\n" \
+"      float4 multiplier_accum = {1.0f, 1.0f, 1.0f, 1.0f};\n" \
+"      float4 color_accum;\n" \
+"\n" \
+"      int minx = max(realCoordinate.s0 - maxBlur, 0);\n" \
+"      int miny = max(realCoordinate.s1 - maxBlur, 0);\n" \
+"      int maxx = min(realCoordinate.s0 + maxBlur, dimension.s0);\n" \
+"      int maxy = min(realCoordinate.s1 + maxBlur, dimension.s1);\n" \
+"\n" \
+"      {\n" \
+"              int2 inputCoordinate = realCoordinate - offsetInput;\n" \
+"              float size = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;\n" \
+"              float depth = read_imagef(inputDepth, SAMPLER_NEAREST, inputCoordinate).s0 + threshold;\n" \
+"              color_accum = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate);\n" \
+"\n" \
+"              for (int ny = miny; ny < maxy; ny += step) {\n" \
+"                      for (int nx = minx; nx < maxx; nx += step) {\n" \
+"                              if (nx >= 0 && nx < dimension.s0 && ny >= 0 && ny < dimension.s1) {\n" \
+"                                      inputCoordinate.s0 = nx - offsetInput.s0;\n" \
+"                                      inputCoordinate.s1 = ny - offsetInput.s1;\n" \
+"                                      tempDepth = read_imagef(inputDepth, SAMPLER_NEAREST, inputCoordinate).s0;\n" \
+"                                      if (tempDepth < depth) {\n" \
+"                                              tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;\n" \
+"\n" \
+"                                              if ((size > threshold && tempSize > threshold) || tempSize <= threshold) {\n" \
+"                                                      float dx = nx - realCoordinate.s0;\n" \
+"                                                      float dy = ny - realCoordinate.s1;\n" \
+"                                                      if (dx != 0 || dy != 0) {\n" \
+"                                                              if (tempSize >= fabs(dx) && tempSize >= fabs(dy)) {\n" \
+"                                                                      float2 uv = { 256.0f + dx * 256.0f / tempSize, 256.0f + dy * 256.0f / tempSize};\n" \
+"                                                                      bokeh = read_imagef(bokehImage, SAMPLER_NEAREST, uv);\n" \
+"                                                                      readColor = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate);\n" \
+"                                                                      color_accum += bokeh*readColor;\n" \
+"                                                                      multiplier_accum += bokeh;\n" \
+"                                                              }\n" \
+"                                                      }\n" \
+"                                              }\n" \
+"                                      }\n" \
+"                              }\n" \
+"                      }\n" \
+"              }\n" \
+"      }\n" \
+"\n" \
+"      color = color_accum * (1.0f / multiplier_accum);\n" \
+"      write_imagef(output, coords, color);\n" \
+"}\n" \
+"\n" \
+"\n" \
 "// KERNEL --- DILATE ---\n" \
 "__kernel void dilateKernel(__read_only image2d_t inputImage,  __write_only image2d_t output,\n" \
 "                           int2 offsetInput, int2 offsetOutput, int scope, int distanceSquared, int2 dimension,\n" \
@@ -70,9 +132,9 @@ const char * clkernelstoh_COM_OpenCLKernels_cl = "/// This file contains all ope
 "      int2 inputXy;\n" \
 "\n" \
 "      for (ny = minXY.y, inputXy.y = ny - offsetInput.y ; ny < maxXY.y ; ny ++, inputXy.y++) {\n" \
+"              const float deltaY = (realCoordinate.y - ny);\n" \
 "              for (nx = minXY.x, inputXy.x = nx - offsetInput.x; nx < maxXY.x ; nx ++, inputXy.x++) {\n" \
 "                      const float deltaX = (realCoordinate.x - nx);\n" \
-"                      const float deltaY = (realCoordinate.y - ny);\n" \
 "                      const float measuredDistance = deltaX*deltaX+deltaY*deltaY;\n" \
 "                      if (measuredDistance <= distanceSquared) {\n" \
 "                              value = max(value, read_imagef(inputImage, SAMPLER_NEAREST, inputXy).s0);\n" \
index 1368476e9b436e6b397bdaed7add2e237a8882dc..7ddcb78b61f335770905eae66e6e0b2eef078b4e 100644 (file)
@@ -22,6 +22,7 @@
 
 #include "COM_VariableSizeBokehBlurOperation.h"
 #include "BLI_math.h"
+#include "COM_OpenCLDevice.h"
 
 extern "C" {
        #include "RE_pipeline.h"
@@ -38,6 +39,7 @@ VariableSizeBokehBlurOperation::VariableSizeBokehBlurOperation() : NodeOperation
 #endif
        this->addOutputSocket(COM_DT_COLOR);
        this->setComplex(true);
+       this->setOpenCL(true);
 
        this->m_inputProgram = NULL;
        this->m_inputBokehProgram = NULL;
@@ -128,6 +130,33 @@ void VariableSizeBokehBlurOperation::executePixel(float *color, int x, int y, Me
 
 }
 
+static cl_kernel defocusKernel = 0;
+void VariableSizeBokehBlurOperation::executeOpenCL(OpenCLDevice* device,
+                                       MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, 
+                                       MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, 
+                                       list<cl_kernel> *clKernelsToCleanUp) 
+{
+       if (!defocusKernel) {
+               defocusKernel = device->COM_clCreateKernel("defocusKernel", NULL);
+       }
+       cl_int step = this->getStep();
+       cl_int maxBlur = this->m_maxBlur;
+       cl_float threshold = this->m_threshold;
+       
+       device->COM_clAttachMemoryBufferToKernelParameter(defocusKernel, 0, -1, clMemToCleanUp, inputMemoryBuffers, this->m_inputProgram);
+       device->COM_clAttachMemoryBufferToKernelParameter(defocusKernel, 1,  -1, clMemToCleanUp, inputMemoryBuffers, this->m_inputBokehProgram);
+       device->COM_clAttachMemoryBufferToKernelParameter(defocusKernel, 2,  5, clMemToCleanUp, inputMemoryBuffers, this->m_inputDepthProgram);
+       device->COM_clAttachMemoryBufferToKernelParameter(defocusKernel, 3,  -1, clMemToCleanUp, inputMemoryBuffers, this->m_inputSizeProgram);
+       device->COM_clAttachOutputMemoryBufferToKernelParameter(defocusKernel, 4, clOutputBuffer);
+       device->COM_clAttachMemoryBufferOffsetToKernelParameter(defocusKernel, 6, outputMemoryBuffer);
+       clSetKernelArg(defocusKernel, 7, sizeof(cl_int), &step);
+       clSetKernelArg(defocusKernel, 8, sizeof(cl_int), &maxBlur);
+       clSetKernelArg(defocusKernel, 9, sizeof(cl_float), &threshold);
+       device->COM_clAttachSizeToKernelParameter(defocusKernel, 10, this);
+       
+       device->COM_clEnqueueRange(defocusKernel, outputMemoryBuffer, 11, this);
+}
+
 void VariableSizeBokehBlurOperation::deinitExecution()
 {
        this->m_inputProgram = NULL;
index 4bf597ff831613d3f131138d939958c3f8e25932..8e5589fafec7a541a5f0c0ce1a95f97acccbb297 100644 (file)
@@ -62,7 +62,7 @@ public:
 
        void setThreshold(float threshold) { this->m_threshold = threshold; }
 
-
+       void executeOpenCL(OpenCLDevice* device, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, list<cl_kernel> *clKernelsToCleanUp);
 };
 
 #ifdef COM_DEFOCUS_SEARCH