Merging r50470 through r50477 from trunk into soc-2011-tomato
[blender.git] / source / blender / compositor / intern / COM_OpenCLDevice.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * Contributor: 
19  *              Jeroen Bakker 
20  *              Monique Dewanchand
21  */
22
23 #include "COM_OpenCLDevice.h"
24 #include "COM_WorkScheduler.h"
25
26 typedef enum COM_VendorID  {NVIDIA = 0x10DE, AMD = 0x1002} COM_VendorID;
27
28 OpenCLDevice::OpenCLDevice(cl_context context, cl_device_id device, cl_program program, cl_int vendorId)
29 {
30         this->m_device = device;
31         this->m_context = context;
32         this->m_program = program;
33         this->m_queue = NULL;
34         this->m_vendorID = vendorId;
35 }
36
37 bool OpenCLDevice::initialize()
38 {
39         cl_int error;
40         this->m_queue = clCreateCommandQueue(this->m_context, this->m_device, 0, &error);
41         return false;
42 }
43
44 void OpenCLDevice::deinitialize()
45 {
46         if (this->m_queue) {
47                 clReleaseCommandQueue(this->m_queue);
48         }
49 }
50
51 void OpenCLDevice::execute(WorkPackage *work)
52 {
53         const unsigned int chunkNumber = work->getChunkNumber();
54         ExecutionGroup *executionGroup = work->getExecutionGroup();
55         rcti rect;
56
57         executionGroup->determineChunkRect(&rect, chunkNumber);
58         MemoryBuffer **inputBuffers = executionGroup->getInputBuffersOpenCL(chunkNumber);
59         MemoryBuffer *outputBuffer = executionGroup->allocateOutputBuffer(chunkNumber, &rect);
60
61         executionGroup->getOutputNodeOperation()->executeOpenCLRegion(this, &rect,
62                                                                       chunkNumber, inputBuffers, outputBuffer);
63
64         delete outputBuffer;
65         
66         executionGroup->finalizeChunkExecution(chunkNumber, inputBuffers);
67 }
68 cl_mem OpenCLDevice::COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader)
69 {
70         return COM_clAttachMemoryBufferToKernelParameter(kernel, parameterIndex, offsetIndex, cleanup, inputMemoryBuffers, (ReadBufferOperation *)reader);
71 }
72
73 cl_mem OpenCLDevice::COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, ReadBufferOperation *reader)
74 {
75         cl_int error;
76         
77         MemoryBuffer *result = (MemoryBuffer *)reader->getInputMemoryBuffer(inputMemoryBuffers);
78
79         const cl_image_format imageFormat = {
80                 CL_RGBA,
81                 CL_FLOAT
82         };
83
84         cl_mem clBuffer = clCreateImage2D(this->m_context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, result->getWidth(),
85                                           result->getHeight(), 0, result->getBuffer(), &error);
86
87         if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
88         if (error == CL_SUCCESS) cleanup->push_back(clBuffer);
89
90         error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clBuffer);
91         if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
92
93         COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, offsetIndex, result);
94         return clBuffer;
95 }
96
97 void OpenCLDevice::COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffer)
98 {
99         if (offsetIndex != -1) {
100                 cl_int error;
101                 rcti *rect = memoryBuffer->getRect();
102                 cl_int2 offset = {rect->xmin, rect->ymin};
103
104                 error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
105                 if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
106         }
107 }
108
109 void OpenCLDevice::COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation *operation)
110 {
111         if (offsetIndex != -1) {
112                 cl_int error;
113                 cl_int2 offset = {(cl_int)operation->getWidth(), (cl_int)operation->getHeight()};
114
115                 error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
116                 if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
117         }
118 }
119
120 void OpenCLDevice::COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer)
121 {
122         cl_int error;
123         error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clOutputMemoryBuffer);
124         if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
125 }
126
127 void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer)
128 {
129         cl_int error;
130         const size_t size[] = {(size_t)outputMemoryBuffer->getWidth(), (size_t)outputMemoryBuffer->getHeight()};
131
132         error = clEnqueueNDRangeKernel(this->m_queue, kernel, 2, NULL, size, 0, 0, 0, NULL);
133         if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
134 }
135
136 void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex, NodeOperation *operation)
137 {
138         cl_int error;
139         const int width = outputMemoryBuffer->getWidth();
140         const int height = outputMemoryBuffer->getHeight();
141         int offsetx;
142         int offsety;
143         int localSize = 1024;
144         size_t size[2];
145         cl_int2 offset;
146
147         if (this->m_vendorID == NVIDIA) {
148                 localSize = 32;
149         }
150
151         bool breaked = false;
152         for (offsety = 0; offsety < height && (!breaked); offsety += localSize) {
153                 offset[1] = offsety;
154                 if (offsety + localSize < height) {
155                         size[1] = localSize;
156                 }
157                 else {
158                         size[1] = height - offsety;
159                 }
160
161                 for (offsetx = 0; offsetx < width && (!breaked); offsetx += localSize) {
162                         if (offsetx + localSize < width) {
163                                 size[0] = localSize;
164                         }
165                         else {
166                                 size[0] = width - offsetx;
167                         }
168                         offset[0] = offsetx;
169
170                         error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
171                         if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
172                         error = clEnqueueNDRangeKernel(this->m_queue, kernel, 2, NULL, size, 0, 0, 0, NULL);
173                         if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
174                         clFlush(this->m_queue);
175                         if (operation->isBreaked()) {
176                                 breaked = false;
177                         }
178                 }
179         }
180 }
181
182 cl_kernel OpenCLDevice::COM_clCreateKernel(const char *kernelname, list<cl_kernel> *clKernelsToCleanUp)
183 {
184         cl_int error;
185         cl_kernel kernel = clCreateKernel(this->m_program, kernelname, &error);
186         if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
187         else {
188                 if (clKernelsToCleanUp) clKernelsToCleanUp->push_back(kernel);
189         }
190         return kernel;
191
192 }