Refactoring of tiles opencl implementation:
authorMonique Dewanchand <m.dewanchand@atmind.nl>
Wed, 20 Jun 2012 20:05:21 +0000 (20:05 +0000)
committerMonique Dewanchand <m.dewanchand@atmind.nl>
Wed, 20 Jun 2012 20:05:21 +0000 (20:05 +0000)
- Moved methods from NodeOperation to OpenCLDevice
- Added check on Nvidia for local size

14 files changed:
source/blender/compositor/intern/COM_Device.h
source/blender/compositor/intern/COM_Node.h
source/blender/compositor/intern/COM_NodeOperation.cpp
source/blender/compositor/intern/COM_NodeOperation.h
source/blender/compositor/intern/COM_OpenCLDevice.cpp
source/blender/compositor/intern/COM_OpenCLDevice.h
source/blender/compositor/intern/COM_WorkPackage.h
source/blender/compositor/intern/COM_WorkScheduler.cpp
source/blender/compositor/operations/COM_BokehBlurOperation.cpp
source/blender/compositor/operations/COM_BokehBlurOperation.h
source/blender/compositor/operations/COM_DilateErodeOperation.cpp
source/blender/compositor/operations/COM_DilateErodeOperation.h
source/blender/compositor/operations/COM_WriteBufferOperation.cpp
source/blender/compositor/operations/COM_WriteBufferOperation.h

index 08fdb5bb578d14e6351e921980fcf8fb45eafc2c..2a86382a191a33e3a29ae6a9cd74ba042f79b9b5 100644 (file)
 #ifndef _COM_Device_h
 #define _COM_Device_h
 
-#include "COM_ExecutionSystem.h"
 #include "COM_WorkPackage.h"
-#include "COM_NodeOperation.h"
-#include "BLI_rect.h"
-#include "COM_MemoryBuffer.h"
 
 /**
  * @brief Abstract class for device implementations to be used by the Compositor.
index 12baa26cd6eeab80f14eaf0dcb61f8d593cfe447..090b14554407ec83e11815cd7ffec3f569d61f6b 100644 (file)
@@ -29,6 +29,7 @@
 #include "COM_CompositorContext.h"
 #include "DNA_node_types.h"
 #include "BKE_text.h"
+#include "COM_ExecutionSystem.h"
 #include <vector>
 #include <string>
 
index b39b1758051de3e32112bfec85c423c6c15bec41..33989fa5787c22408a915926468da78f6d72c817 100644 (file)
@@ -140,118 +140,3 @@ bool NodeOperation::determineDependingAreaOfInterest(rcti *input, ReadBufferOper
                return false;
        }
 }
-
-cl_mem NodeOperation::COM_clAttachMemoryBufferToKernelParameter(cl_context context, cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader)
-{
-       cl_int error;
-       MemoryBuffer *result = (MemoryBuffer *)reader->initializeTileData(NULL, inputMemoryBuffers);
-
-       const cl_image_format imageFormat = {
-               CL_RGBA,
-               CL_FLOAT
-       };
-
-       cl_mem clBuffer = clCreateImage2D(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, result->getWidth(),
-                                         result->getHeight(), 0, result->getBuffer(), &error);
-       
-       if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
-       if (error == CL_SUCCESS) cleanup->push_back(clBuffer);
-
-       error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clBuffer);
-       if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
-       
-       COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, offsetIndex, result);
-       return clBuffer;
-}
-       
-void NodeOperation::COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffer) 
-{
-       if (offsetIndex != -1) {
-               cl_int error;
-               rcti *rect = memoryBuffer->getRect();
-               cl_int2 offset = {rect->xmin, rect->ymin};
-
-               error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
-               if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
-       }
-}
-
-void NodeOperation::COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex) 
-{
-       if (offsetIndex != -1) {
-               cl_int error;
-               cl_int2 offset = {this->getWidth(), this->getHeight()};
-
-               error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
-               if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
-       }
-}
-
-void NodeOperation::COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer) 
-{
-       cl_int error;
-       error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clOutputMemoryBuffer);
-       if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
-}
-
-void NodeOperation::COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer)
-{
-       cl_int error;
-       const size_t size[] = {outputMemoryBuffer->getWidth(), outputMemoryBuffer->getHeight()};
-       
-       error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, size, 0, 0, 0, NULL);
-       if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
-}
-
-void NodeOperation::COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex)
-{
-       cl_int error;
-       const int width = outputMemoryBuffer->getWidth();
-       const int height = outputMemoryBuffer->getHeight();
-       int offsetx;
-       int offsety;
-       const int localSize = 32;
-       size_t size[2];
-       cl_int2 offset;
-       
-       bool breaked = false;
-       for (offsety = 0; offsety < height && (!breaked); offsety += localSize) {
-               offset[1] = offsety;
-               if (offsety + localSize < height) {
-                       size[1] = localSize;
-               }
-               else {
-                       size[1] = height - offsety;
-               }
-               for (offsetx = 0; offsetx < width && (!breaked); offsetx += localSize) {
-                       if (offsetx + localSize < width) {
-                               size[0] = localSize;
-                       }
-                       else {
-                               size[0] = width - offsetx;
-                       }
-                       offset[0] = offsetx;
-
-                       error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
-                       if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
-                       error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, size, 0, 0, 0, NULL);
-                       if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
-                       clFlush(queue);
-                       if (isBreaked()) {
-                               breaked = false;
-                       }
-               }
-       }
-}
-
-cl_kernel NodeOperation::COM_clCreateKernel(cl_program program, const char *kernelname, list<cl_kernel> *clKernelsToCleanUp) 
-{
-       cl_int error;
-       cl_kernel kernel = clCreateKernel(program, kernelname, &error);
-       if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
-       else {
-               if (clKernelsToCleanUp) clKernelsToCleanUp->push_back(kernel);
-       }
-       return kernel;
-       
-}
index 3073157271291929cdd53f740df3022b9d5f407f..f96b994685a4fa78c4c16f3cb5f3ee5d6220f88f 100644 (file)
@@ -22,9 +22,7 @@
 
 #ifndef _COM_NodeOperation_h
 #define _COM_NodeOperation_h
-
-class NodeOperation;
-
+class OpenCLDevice;
 #include "COM_Node.h"
 #include <string>
 #include <sstream>
@@ -150,7 +148,7 @@ public:
         * @param memoryBuffers all input MemoryBuffer's needed
         * @param outputBuffer the outputbuffer to write to
         */
-       virtual void executeOpenCLRegion(cl_context context, cl_program program, cl_command_queue queue, rcti *rect, 
+       virtual void executeOpenCLRegion(OpenCLDevice* device, rcti *rect,
                                         unsigned int chunkNumber, MemoryBuffer **memoryBuffers, MemoryBuffer *outputBuffer) {}
 
        /**
@@ -165,7 +163,7 @@ public:
         * @param clMemToCleanUp all created cl_mem references must be added to this list. Framework will clean this after execution
         * @param clKernelsToCleanUp all created cl_kernel references must be added to this list. Framework will clean this after execution
         */
-       virtual void executeOpenCL(cl_context context, cl_program program, cl_command_queue queue, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, list<cl_kernel> *clKernelsToCleanUp) {}
+       virtual void executeOpenCL(OpenCLDevice* device, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, list<cl_kernel> *clKernelsToCleanUp) {}
        virtual void deinitExecution();
 
        bool isResolutionSet() {
@@ -272,15 +270,6 @@ protected:
         * @brief set if this NodeOperation can be scheduled on a OpenCLDevice
         */
        void setOpenCL(bool openCL) { this->openCL = openCL; }
-
-       static cl_mem COM_clAttachMemoryBufferToKernelParameter(cl_context context, cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader);
-       static void COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffers);
-       static void COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer);
-       void COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex);
-       static void COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer);
-       void COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex);
-       cl_kernel COM_clCreateKernel(cl_program program, const char *kernelname, list<cl_kernel> *clKernelsToCleanUp);
-
 };
 
 #endif
index 9d0058040985fff4f7fde724d52a3ca24bed00a9..c9d27b8543c3a32895331af8eda0a5beaebd3a9c 100644 (file)
 #include "COM_OpenCLDevice.h"
 #include "COM_WorkScheduler.h"
 
+typedef enum COM_VendorID  {NVIDIA=0x10DE, AMD=0x1002} COM_VendorID;
 
-OpenCLDevice::OpenCLDevice(cl_context context, cl_device_id device, cl_program program)
+OpenCLDevice::OpenCLDevice(cl_context context, cl_device_id device, cl_program program, cl_int vendorId)
 {
        this->device = device;
        this->context = context;
        this->program = program;
        this->queue = NULL;
+       this->vendorID = vendorId;
 }
 
 bool OpenCLDevice::initialize()
@@ -56,10 +58,126 @@ void OpenCLDevice::execute(WorkPackage *work)
        MemoryBuffer **inputBuffers = executionGroup->getInputBuffersOpenCL(chunkNumber);
        MemoryBuffer *outputBuffer = executionGroup->allocateOutputBuffer(chunkNumber, &rect);
 
-       executionGroup->getOutputNodeOperation()->executeOpenCLRegion(this->context, this->program, this->queue, &rect, 
+       executionGroup->getOutputNodeOperation()->executeOpenCLRegion(this, &rect,
                                                                      chunkNumber, inputBuffers, outputBuffer);
 
        delete outputBuffer;
        
        executionGroup->finalizeChunkExecution(chunkNumber, inputBuffers);
 }
+
+cl_mem OpenCLDevice::COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader)
+{
+       cl_int error;
+       MemoryBuffer *result = (MemoryBuffer *)reader->initializeTileData(NULL, inputMemoryBuffers);
+
+       const cl_image_format imageFormat = {
+               CL_RGBA,
+               CL_FLOAT
+       };
+
+       cl_mem clBuffer = clCreateImage2D(this->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, result->getWidth(),
+                                                                         result->getHeight(), 0, result->getBuffer(), &error);
+
+       if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
+       if (error == CL_SUCCESS) cleanup->push_back(clBuffer);
+
+       error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clBuffer);
+       if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
+
+       COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, offsetIndex, result);
+       return clBuffer;
+}
+
+void OpenCLDevice::COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffer)
+{
+       if (offsetIndex != -1) {
+               cl_int error;
+               rcti *rect = memoryBuffer->getRect();
+               cl_int2 offset = {rect->xmin, rect->ymin};
+
+               error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
+               if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
+       }
+}
+
+void OpenCLDevice::COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation* operation)
+{
+       if (offsetIndex != -1) {
+               cl_int error;
+               cl_int2 offset = {operation->getWidth(), operation->getHeight()};
+
+               error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
+               if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
+       }
+}
+
+void OpenCLDevice::COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer)
+{
+       cl_int error;
+       error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clOutputMemoryBuffer);
+       if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
+}
+
+void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer)
+{
+       cl_int error;
+       const size_t size[] = {outputMemoryBuffer->getWidth(), outputMemoryBuffer->getHeight()};
+
+       error = clEnqueueNDRangeKernel(this->queue, kernel, 2, NULL, size, 0, 0, 0, NULL);
+       if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
+}
+
+void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex, NodeOperation* operation)
+{
+       cl_int error;
+       const int width = outputMemoryBuffer->getWidth();
+       const int height = outputMemoryBuffer->getHeight();
+       int offsetx;
+       int offsety;
+       int localSize = 1024;
+       size_t size[2];
+       cl_int2 offset;
+
+       if (this->vendorID == NVIDIA){localSize = 32;}
+       bool breaked = false;
+       for (offsety = 0; offsety < height && (!breaked); offsety += localSize) {
+               offset[1] = offsety;
+               if (offsety + localSize < height) {
+                       size[1] = localSize;
+               }
+               else {
+                       size[1] = height - offsety;
+               }
+               for (offsetx = 0; offsetx < width && (!breaked); offsetx += localSize) {
+                       if (offsetx + localSize < width) {
+                               size[0] = localSize;
+                       }
+                       else {
+                               size[0] = width - offsetx;
+                       }
+                       offset[0] = offsetx;
+
+                       error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
+                       if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
+                       error = clEnqueueNDRangeKernel(this->queue, kernel, 2, NULL, size, 0, 0, 0, NULL);
+                       if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
+                       clFlush(this->queue);
+                       if (operation->isBreaked()) {
+                               breaked = false;
+                       }
+               }
+       }
+}
+
+cl_kernel OpenCLDevice::COM_clCreateKernel(const char *kernelname, list<cl_kernel> *clKernelsToCleanUp)
+{
+       cl_int error;
+       cl_kernel kernel = clCreateKernel(this->program, kernelname, &error);
+       if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
+       else {
+               if (clKernelsToCleanUp) clKernelsToCleanUp->push_back(kernel);
+       }
+       return kernel;
+
+}
index 83ce8cec811023097939b8571fbb3c553c72009d..d132f330651ad828726ffc2365b1bce3d22aa275 100644 (file)
@@ -29,7 +29,6 @@ class OpenCLDevice;
 #include "OCL_opencl.h"
 #include "COM_WorkScheduler.h"
 
-
 /**
  * @brief device representing an GPU OpenCL device.
  * an instance of this class represents a single cl_device
@@ -55,13 +54,21 @@ private:
         * @brief opencl command queue
         */
        cl_command_queue queue;
+
+       /**
+        * @brief opencl vendor ID
+        */
+       cl_int vendorID;
+
 public:
        /**
         * @brief constructor with opencl device
         * @param context
         * @param device
+        * @param program
+        * @param vendorID
         */
-       OpenCLDevice(cl_context context, cl_device_id device, cl_program program);
+       OpenCLDevice(cl_context context, cl_device_id device, cl_program program, cl_int vendorId);
        
        
        /**
@@ -83,6 +90,18 @@ public:
         * @param work the WorkPackage to execute
         */
        void execute(WorkPackage *work);
+
+       cl_context getContext(){return this->context;}
+
+       cl_command_queue getQueue(){return this->queue;}
+
+       cl_mem COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader);
+       void COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffers);
+       void COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer);
+       void COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation* operation);
+       void COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer);
+       void COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex, NodeOperation* operation);
+       cl_kernel COM_clCreateKernel(const char *kernelname, list<cl_kernel> *clKernelsToCleanUp);
 };
 
 #endif
index 18d83cc151c068f3d417c3e6cd35c3b21d29772a..fed87186d2022db137f34d6fd2a276c6e04f68e1 100644 (file)
@@ -24,7 +24,7 @@ class WorkPackage;
 
 #ifndef _COM_WorkPackage_h_
 #define _COM_WorkPackage_h_
-
+class ExecutionGroup;
 #include "COM_ExecutionGroup.h"
 
 /**
index a410c28f47d075b8081b6ff6f7892a9f10b3a3ce..12c0f28ec9b94384f9a924c1a74a8a438b293aee 100644 (file)
@@ -257,7 +257,10 @@ void WorkScheduler::initialize()
                                unsigned int indexDevices;
                                for (indexDevices = 0; indexDevices < totalNumberOfDevices; indexDevices++) {
                                        cl_device_id device = cldevices[indexDevices];
-                                       OpenCLDevice *clDevice = new OpenCLDevice(context, device, program);
+                                       cl_int vendorID = 0;
+                                       cl_int error = clGetDeviceInfo(device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, NULL);
+                                       if (error!= CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
+                                       OpenCLDevice *clDevice = new OpenCLDevice(context, device, program, vendorID);
                                        clDevice->initialize(),
                                            gpudevices.push_back(clDevice);
                                        if (G.f & G_DEBUG) {
index e2fce5047913ee4a5ab1358a537608007a45e044..9fe5abcb07515d809e37a85a140e2441a44bebf8 100644 (file)
@@ -22,6 +22,7 @@
 
 #include "COM_BokehBlurOperation.h"
 #include "BLI_math.h"
+#include "COM_OpenCLDevice.h"
 
 extern "C" {
        #include "RE_pipeline.h"
@@ -160,25 +161,25 @@ bool BokehBlurOperation::determineDependingAreaOfInterest(rcti *input, ReadBuffe
 }
 
 static cl_kernel kernel = 0;
-void BokehBlurOperation::executeOpenCL(cl_context context, cl_program program, cl_command_queue queue, 
+void BokehBlurOperation::executeOpenCL(OpenCLDevice* device,
                                        MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, 
                                        MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, 
                                        list<cl_kernel> *clKernelsToCleanUp) 
 {
        if (!kernel) {
-               kernel = COM_clCreateKernel(program, "bokehBlurKernel", NULL);
+               kernel = device->COM_clCreateKernel("bokehBlurKernel", NULL);
        }
        cl_int radius = this->getWidth() * this->size / 100.0f;
        cl_int step = this->getStep();
        
-       COM_clAttachMemoryBufferToKernelParameter(context, kernel, 0, -1, clMemToCleanUp, inputMemoryBuffers, this->inputBoundingBoxReader);
-       COM_clAttachMemoryBufferToKernelParameter(context, kernel, 1,  4, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
-       COM_clAttachMemoryBufferToKernelParameter(context, kernel, 2,  -1, clMemToCleanUp, inputMemoryBuffers, this->inputBokehProgram);
-       COM_clAttachOutputMemoryBufferToKernelParameter(kernel, 3, clOutputBuffer);
-       COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, 5, outputMemoryBuffer);
+       device->COM_clAttachMemoryBufferToKernelParameter(kernel, 0, -1, clMemToCleanUp, inputMemoryBuffers, this->inputBoundingBoxReader);
+       device->COM_clAttachMemoryBufferToKernelParameter(kernel, 1,  4, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
+       device->COM_clAttachMemoryBufferToKernelParameter(kernel, 2,  -1, clMemToCleanUp, inputMemoryBuffers, this->inputBokehProgram);
+       device->COM_clAttachOutputMemoryBufferToKernelParameter(kernel, 3, clOutputBuffer);
+       device->COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, 5, outputMemoryBuffer);
        clSetKernelArg(kernel, 6, sizeof(cl_int), &radius);
        clSetKernelArg(kernel, 7, sizeof(cl_int), &step);
-       COM_clAttachSizeToKernelParameter(kernel, 8);
+       device->COM_clAttachSizeToKernelParameter(kernel, 8, this);
        
-       COM_clEnqueueRange(queue, kernel, outputMemoryBuffer, 9);       
+       device->COM_clEnqueueRange(kernel, outputMemoryBuffer, 9, this);
 }
index 3ec61c5ce01f53ee9ca6b717663979bb716c7e0d..853855d5c34fcfc843b23dd8e2ae47225ff4b935 100644 (file)
@@ -57,6 +57,6 @@ public:
 
        void setSize(float size) { this->size = size; }
        
-       void executeOpenCL(cl_context context, cl_program program, cl_command_queue queue, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, list<cl_kernel> *clKernelsToCleanUp);
+       void executeOpenCL(OpenCLDevice* device, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, list<cl_kernel> *clKernelsToCleanUp);
 };
 #endif
index 306a2d96985eeb0a0faa109e1cbe9a2f7c6e1abe..80d1c6444eb7c94f842399616eeaa674a9d24c19 100644 (file)
@@ -22,6 +22,7 @@
 
 #include "COM_DilateErodeOperation.h"
 #include "BLI_math.h"
+#include "COM_OpenCLDevice.h"
 
 // DilateErode Distance Threshold
 DilateErodeThresholdOperation::DilateErodeThresholdOperation() : NodeOperation()
@@ -234,24 +235,24 @@ bool DilateDistanceOperation::determineDependingAreaOfInterest(rcti *input, Read
 }
 
 static cl_kernel dilateKernel = 0;
-void DilateDistanceOperation::executeOpenCL(cl_context context, cl_program program, cl_command_queue queue, 
+void DilateDistanceOperation::executeOpenCL(OpenCLDevice* device,
                                             MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
                                             MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
                                             list<cl_kernel> *clKernelsToCleanUp)
 {
        if (!dilateKernel) {
-               dilateKernel = COM_clCreateKernel(program, "dilateKernel", NULL);
+               dilateKernel = device->COM_clCreateKernel("dilateKernel", NULL);
        }
        cl_int distanceSquared = this->distance * this->distance;
        cl_int scope = this->scope;
        
-       COM_clAttachMemoryBufferToKernelParameter(context, dilateKernel, 0,  2, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
-       COM_clAttachOutputMemoryBufferToKernelParameter(dilateKernel, 1, clOutputBuffer);
-       COM_clAttachMemoryBufferOffsetToKernelParameter(dilateKernel, 3, outputMemoryBuffer);
+       device->COM_clAttachMemoryBufferToKernelParameter(dilateKernel, 0,  2, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
+       device->COM_clAttachOutputMemoryBufferToKernelParameter(dilateKernel, 1, clOutputBuffer);
+       device->COM_clAttachMemoryBufferOffsetToKernelParameter(dilateKernel, 3, outputMemoryBuffer);
        clSetKernelArg(dilateKernel, 4, sizeof(cl_int), &scope);
        clSetKernelArg(dilateKernel, 5, sizeof(cl_int), &distanceSquared);
-       COM_clAttachSizeToKernelParameter(dilateKernel, 6);
-       COM_clEnqueueRange(queue, dilateKernel, outputMemoryBuffer, 7);
+       device->COM_clAttachSizeToKernelParameter(dilateKernel, 6, this);
+       device->COM_clEnqueueRange(dilateKernel, outputMemoryBuffer, 7, this);
 }
 
 // Erode Distance
@@ -293,24 +294,24 @@ void ErodeDistanceOperation::executePixel(float *color, int x, int y, MemoryBuff
 }
 
 static cl_kernel erodeKernel = 0;
-void ErodeDistanceOperation::executeOpenCL(cl_context context, cl_program program, cl_command_queue queue, 
+void ErodeDistanceOperation::executeOpenCL(OpenCLDevice* device,
                                            MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
                                            MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
                                            list<cl_kernel> *clKernelsToCleanUp)
 {
        if (!erodeKernel) {
-               erodeKernel = COM_clCreateKernel(program, "erodeKernel", NULL);
+               erodeKernel = device->COM_clCreateKernel("erodeKernel", NULL);
        }
        cl_int distanceSquared = this->distance * this->distance;
        cl_int scope = this->scope;
        
-       COM_clAttachMemoryBufferToKernelParameter(context, erodeKernel, 0,  2, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
-       COM_clAttachOutputMemoryBufferToKernelParameter(erodeKernel, 1, clOutputBuffer);
-       COM_clAttachMemoryBufferOffsetToKernelParameter(erodeKernel, 3, outputMemoryBuffer);
+       device->COM_clAttachMemoryBufferToKernelParameter(erodeKernel, 0,  2, clMemToCleanUp, inputMemoryBuffers, this->inputProgram);
+       device->COM_clAttachOutputMemoryBufferToKernelParameter(erodeKernel, 1, clOutputBuffer);
+       device->COM_clAttachMemoryBufferOffsetToKernelParameter(erodeKernel, 3, outputMemoryBuffer);
        clSetKernelArg(erodeKernel, 4, sizeof(cl_int), &scope);
        clSetKernelArg(erodeKernel, 5, sizeof(cl_int), &distanceSquared);
-       COM_clAttachSizeToKernelParameter(erodeKernel, 6);
-       COM_clEnqueueRange(queue, erodeKernel, outputMemoryBuffer, 7);
+       device->COM_clAttachSizeToKernelParameter(erodeKernel, 6, this);
+       device->COM_clEnqueueRange(erodeKernel, outputMemoryBuffer, 7, this);
 }
 
 // Dilate step
index b11356129b451f427df551bfff38cb042ca6dd5f..4d0bf9de0ec57a9ca5618fafccce81e9dad96d0a 100644 (file)
@@ -99,7 +99,7 @@ public:
        void setDistance(float distance) { this->distance = distance; }
        bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output);
        
-       void executeOpenCL(cl_context context, cl_program program, cl_command_queue queue, 
+       void executeOpenCL(OpenCLDevice* device,
                           MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, 
                           MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
                           list<cl_kernel> *clKernelsToCleanUp);
@@ -113,7 +113,7 @@ public:
         */
        void executePixel(float *color, int x, int y, MemoryBuffer * inputBuffers[], void *data);
 
-       void executeOpenCL(cl_context context, cl_program program, cl_command_queue queue, 
+       void executeOpenCL(OpenCLDevice* device,
                           MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, 
                           MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, 
                           list<cl_kernel> *clKernelsToCleanUp);
index 4fff3fdcc31790f0c7236e5ba32546a2e3a8aa83..356ba452185023ea627943361ad78aff12940efb 100644 (file)
@@ -23,6 +23,7 @@
 #include "COM_WriteBufferOperation.h"
 #include "COM_defines.h"
 #include <stdio.h>
+#include "COM_OpenCLDevice.h"
 
 WriteBufferOperation::WriteBufferOperation() : NodeOperation()
 {
@@ -110,7 +111,7 @@ void WriteBufferOperation::executeRegion(rcti *rect, unsigned int tileNumber, Me
        memoryBuffer->setCreatedState();
 }
 
-void WriteBufferOperation::executeOpenCLRegion(cl_context context, cl_program program, cl_command_queue queue, rcti *rect, unsigned int chunkNumber, MemoryBuffer **inputMemoryBuffers, MemoryBuffer *outputBuffer)
+void WriteBufferOperation::executeOpenCLRegion(OpenCLDevice* device, rcti *rect, unsigned int chunkNumber, MemoryBuffer **inputMemoryBuffers, MemoryBuffer *outputBuffer)
 {
        float *outputFloatBuffer = outputBuffer->getBuffer();
        cl_int error;
@@ -131,7 +132,7 @@ void WriteBufferOperation::executeOpenCLRegion(cl_context context, cl_program pr
                CL_FLOAT
        };
 
-       cl_mem clOutputBuffer = clCreateImage2D(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, outputBufferWidth, outputBufferHeight, 0, outputFloatBuffer, &error);
+       cl_mem clOutputBuffer = clCreateImage2D(device->getContext(), CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, outputBufferWidth, outputBufferHeight, 0, outputFloatBuffer, &error);
        if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
        
        // STEP 2
@@ -139,7 +140,7 @@ void WriteBufferOperation::executeOpenCLRegion(cl_context context, cl_program pr
        clMemToCleanUp->push_back(clOutputBuffer);
        list<cl_kernel> *clKernelsToCleanUp = new list<cl_kernel>();
 
-       this->input->executeOpenCL(context, program, queue, outputBuffer, clOutputBuffer, inputMemoryBuffers, clMemToCleanUp, clKernelsToCleanUp);
+       this->input->executeOpenCL(device, outputBuffer, clOutputBuffer, inputMemoryBuffers, clMemToCleanUp, clKernelsToCleanUp);
 
        // STEP 3
 
@@ -149,9 +150,9 @@ void WriteBufferOperation::executeOpenCLRegion(cl_context context, cl_program pr
 //     clFlush(queue);
 //     clFinish(queue);
 
-       error = clEnqueueBarrier(queue);
+       error = clEnqueueBarrier(device->getQueue());
        if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
-       error = clEnqueueReadImage(queue, clOutputBuffer, CL_TRUE, origin, region, 0, 0, outputFloatBuffer, 0, NULL, NULL);
+       error = clEnqueueReadImage(device->getQueue(), clOutputBuffer, CL_TRUE, origin, region, 0, 0, outputFloatBuffer, 0, NULL, NULL);
        if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
        
        this->getMemoryProxy()->getBuffer()->copyContentFrom(outputBuffer);
index 321eed7240a997f9478d9f3a8d7037e038056004..ccc20584186074bb6f17ca86223bd96abc46829c 100644 (file)
@@ -44,7 +44,7 @@ public:
        void executeRegion(rcti *rect, unsigned int tileNumber, MemoryBuffer **memoryBuffers);
        void initExecution();
        void deinitExecution();
-       void executeOpenCLRegion(cl_context context, cl_program program, cl_command_queue queue, rcti *rect, unsigned int chunkNumber, MemoryBuffer **memoryBuffers, MemoryBuffer *outputBuffer);
+       void executeOpenCLRegion(OpenCLDevice* device, rcti *rect, unsigned int chunkNumber, MemoryBuffer **memoryBuffers, MemoryBuffer *outputBuffer);
        void readResolutionFromInputSocket();
 
 };