Refactoring of tiles opencl implementation:
[blender.git] / source / blender / compositor / intern / COM_WorkScheduler.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * Contributor: 
19  *              Jeroen Bakker 
20  *              Monique Dewanchand
21  */
22
23 #include <list>
24 #include <stdio.h>
25
26 #include "BKE_global.h"
27
28 #include "COM_WorkScheduler.h"
29 #include "COM_CPUDevice.h"
30 #include "COM_OpenCLDevice.h"
31 #include "COM_OpenCLKernels.cl.h"
32 #include "OCL_opencl.h"
33
34 #include "PIL_time.h"
35 #include "BLI_threads.h"
36
37 #if COM_CURRENT_THREADING_MODEL == COM_TM_NOTHREAD
38 #warning COM_CURRENT_THREADING_MODEL COM_TM_NOTHREAD is activated. Use only for debugging.
39 #elif COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
40 #else
41 #error COM_CURRENT_THREADING_MODEL No threading model selected
42 #endif
43
44
45 /// @brief list of all CPUDevices. for every hardware thread an instance of CPUDevice is created
46 static vector<CPUDevice *> cpudevices;
47
48 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
49 /// @brief list of all thread for every CPUDevice in cpudevices a thread exists
50 static ListBase cputhreads;
51 /// @brief all scheduled work for the cpu
52 static ThreadQueue *cpuqueue;
53 static ThreadQueue *gpuqueue;
54 #ifdef COM_OPENCL_ENABLED
55 static cl_context context;
56 static cl_program program;
57 /// @brief list of all OpenCLDevices. for every OpenCL GPU device an instance of OpenCLDevice is created
58 static vector<OpenCLDevice *> gpudevices;
59 /// @brief list of all thread for every GPUDevice in cpudevices a thread exists
60 static ListBase gputhreads;
61 /// @brief all scheduled work for the gpu
62 #ifdef COM_OPENCL_ENABLED
63 static bool openclActive = false;
64 #endif
65 #endif
66 #endif
67
68
69 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
70 void *WorkScheduler::thread_execute_cpu(void *data)
71 {
72         Device *device = (Device *)data;
73         WorkPackage *work;
74         
75         while ((work = (WorkPackage *)BLI_thread_queue_pop(cpuqueue))) {
76                 device->execute(work);
77                 delete work;
78         }
79         
80         return NULL;
81 }
82
83 void *WorkScheduler::thread_execute_gpu(void *data)
84 {
85         Device *device = (Device *)data;
86         WorkPackage *work;
87         
88         while ((work = (WorkPackage *)BLI_thread_queue_pop(gpuqueue))) {
89                 device->execute(work);
90                 delete work;
91         }
92         
93         return NULL;
94 }
95 #endif
96
97
98
99 void WorkScheduler::schedule(ExecutionGroup *group, int chunkNumber)
100 {
101         WorkPackage *package = new WorkPackage(group, chunkNumber);
102 #if COM_CURRENT_THREADING_MODEL == COM_TM_NOTHREAD
103         CPUDevice device;
104         device.execute(package);
105         delete package;
106 #elif COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
107 #ifdef COM_OPENCL_ENABLED
108         if (group->isOpenCL() && openclActive) {
109                 BLI_thread_queue_push(gpuqueue, package);
110         }
111         else {
112                 BLI_thread_queue_push(cpuqueue, package);
113         }
114 #else
115         BLI_thread_queue_push(cpuqueue, package);
116 #endif
117 #endif
118 }
119
120 void WorkScheduler::start(CompositorContext &context)
121 {
122 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
123         unsigned int index;
124         cpuqueue = BLI_thread_queue_init();
125         BLI_init_threads(&cputhreads, thread_execute_cpu, cpudevices.size());
126         for (index = 0; index < cpudevices.size(); index++) {
127                 Device *device = cpudevices[index];
128                 BLI_insert_thread(&cputhreads, device);
129         }
130 #ifdef COM_OPENCL_ENABLED
131         if (context.getHasActiveOpenCLDevices()) {
132                 gpuqueue = BLI_thread_queue_init();
133                 BLI_init_threads(&gputhreads, thread_execute_gpu, gpudevices.size());
134                 for (index = 0; index < gpudevices.size(); index++) {
135                         Device *device = gpudevices[index];
136                         BLI_insert_thread(&gputhreads, device);
137                 }
138                 openclActive = true;
139         }
140         else {
141                 openclActive = false;
142         }
143 #endif
144 #endif
145 }
146 void WorkScheduler::finish()
147 {
148 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
149 #ifdef COM_OPENCL_ENABLED
150         if (openclActive) {
151                 BLI_thread_queue_wait_finish(gpuqueue);
152                 BLI_thread_queue_wait_finish(cpuqueue);
153         }
154         else {
155                 BLI_thread_queue_wait_finish(cpuqueue);
156         }
157 #else
158         BLI_thread_queue_wait_finish(cpuqueue);
159 #endif
160 #endif
161 }
162 void WorkScheduler::stop()
163 {
164 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
165         BLI_thread_queue_nowait(cpuqueue);
166         BLI_end_threads(&cputhreads);
167         BLI_thread_queue_free(cpuqueue);
168         cpuqueue = NULL;
169 #ifdef COM_OPENCL_ENABLED
170         if (openclActive) {
171                 BLI_thread_queue_nowait(gpuqueue);
172                 BLI_end_threads(&gputhreads);
173                 BLI_thread_queue_free(gpuqueue);
174                 gpuqueue = NULL;
175         }
176 #endif
177 #endif
178 }
179
180 bool WorkScheduler::hasGPUDevices()
181 {
182 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
183 #ifdef COM_OPENCL_ENABLED
184         return gpudevices.size() > 0;
185 #else
186         return 0;
187 #endif
188 #else
189         return 0;
190 #endif
191 }
192
193 extern void clContextError(const char *errinfo, const void *private_info, size_t cb, void *user_data)
194 {
195         printf("OPENCL error: %s\n", errinfo);
196 }
197
198 void WorkScheduler::initialize()
199 {
200 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
201         int numberOfCPUThreads = BLI_system_thread_count();
202
203         for (int index = 0; index < numberOfCPUThreads; index++) {
204                 CPUDevice *device = new CPUDevice();
205                 device->initialize();
206                 cpudevices.push_back(device);
207         }
208 #ifdef COM_OPENCL_ENABLED
209         context = NULL;
210         program = NULL;
211         if (clCreateContextFromType) {
212                 cl_uint numberOfPlatforms = 0;
213                 cl_int error;
214                 error = clGetPlatformIDs(0, 0, &numberOfPlatforms);
215                 if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
216                 if (G.f & G_DEBUG) printf("%d number of platforms\n", numberOfPlatforms);
217                 cl_platform_id *platforms = new cl_platform_id[numberOfPlatforms];
218                 error = clGetPlatformIDs(numberOfPlatforms, platforms, 0);
219                 unsigned int indexPlatform;
220                 cl_uint totalNumberOfDevices = 0;
221                 for (indexPlatform = 0; indexPlatform < numberOfPlatforms; indexPlatform++) {
222                         cl_platform_id platform = platforms[indexPlatform];
223                         cl_uint numberOfDevices;
224                         clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, 0, &numberOfDevices);
225                         totalNumberOfDevices += numberOfDevices;
226                 }
227
228                 cl_device_id *cldevices = new cl_device_id[totalNumberOfDevices];
229                 unsigned int numberOfDevicesReceived = 0;
230                 for (indexPlatform = 0; indexPlatform < numberOfPlatforms; indexPlatform++) {
231                         cl_platform_id platform = platforms[indexPlatform];
232                         cl_uint numberOfDevices;
233                         clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, 0, &numberOfDevices);
234                         clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numberOfDevices, cldevices + numberOfDevicesReceived * sizeof(cl_device_id), 0);
235                         numberOfDevicesReceived += numberOfDevices;
236                 }
237                 if (totalNumberOfDevices > 0) {
238                         context = clCreateContext(NULL, totalNumberOfDevices, cldevices, clContextError, NULL, &error);
239                         if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
240                         program = clCreateProgramWithSource(context, 1, &clkernelstoh_COM_OpenCLKernels_cl, 0, &error);
241                         error = clBuildProgram(program, totalNumberOfDevices, cldevices, 0, 0, 0);
242                         if (error != CL_SUCCESS) { 
243                                 cl_int error2;
244                                 size_t ret_val_size = 0;
245                                 printf("CLERROR[%d]: %s\n", error, clewErrorString(error));     
246                                 error2 = clGetProgramBuildInfo(program, cldevices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
247                                 if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
248                                 char *build_log =  new char[ret_val_size + 1];
249                                 error2 = clGetProgramBuildInfo(program, cldevices[0], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
250                                 if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
251                                 build_log[ret_val_size] = '\0';
252                                 printf("%s", build_log);
253                                 delete build_log;
254                                 
255                         }
256                         else {
257                                 unsigned int indexDevices;
258                                 for (indexDevices = 0; indexDevices < totalNumberOfDevices; indexDevices++) {
259                                         cl_device_id device = cldevices[indexDevices];
260                                         cl_int vendorID = 0;
261                                         cl_int error = clGetDeviceInfo(device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, NULL);
262                                         if (error!= CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
263                                         OpenCLDevice *clDevice = new OpenCLDevice(context, device, program, vendorID);
264                                         clDevice->initialize(),
265                                             gpudevices.push_back(clDevice);
266                                         if (G.f & G_DEBUG) {
267                                                 char resultString[32];
268                                                 error = clGetDeviceInfo(device, CL_DEVICE_NAME, 32, resultString, 0);
269                                                 printf("OPENCL_DEVICE: %s, ", resultString);
270                                                 error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, 32, resultString, 0);
271                                                 printf("%s\n", resultString);
272                                         }
273                                 }
274                         }
275                 }
276                 delete[] cldevices;
277                 delete[] platforms;
278         }
279 #endif
280 #endif
281 }
282
283 void WorkScheduler::deinitialize()
284 {
285 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
286         Device *device;
287         while (cpudevices.size() > 0) {
288                 device = cpudevices.back();
289                 cpudevices.pop_back();
290                 device->deinitialize();
291                 delete device;
292         }
293 #ifdef COM_OPENCL_ENABLED
294         while (gpudevices.size() > 0) {
295                 device = gpudevices.back();
296                 gpudevices.pop_back();
297                 device->deinitialize();
298                 delete device;
299         }
300         if (program) {
301                 clReleaseProgram(program);
302                 program = NULL;
303         }
304         if (context) {
305                 clReleaseContext(context);
306                 context = NULL;
307         }
308 #endif
309 #endif
310 }