f732a40e7682eb51330e8a3596536e9961739532
[blender.git] / source / blender / compositor / intern / COM_WorkScheduler.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * Contributor: 
19  *              Jeroen Bakker 
20  *              Monique Dewanchand
21  */
22
23 #include <list>
24 #include <stdio.h>
25
26 #include "BKE_global.h"
27
28 #include "COM_WorkScheduler.h"
29 #include "COM_CPUDevice.h"
30 #include "COM_OpenCLDevice.h"
31 #include "COM_OpenCLKernels.cl.h"
32 #include "OCL_opencl.h"
33 #include "COM_WriteBufferOperation.h"
34
35 #include "MEM_guardedalloc.h"
36
37 #include "PIL_time.h"
38 #include "BLI_threads.h"
39
40 #if COM_CURRENT_THREADING_MODEL == COM_TM_NOTHREAD
41 #  ifndef DEBUG  /* test this so we dont get warnings in debug builds */
42 #    warning COM_CURRENT_THREADING_MODEL COM_TM_NOTHREAD is activated. Use only for debugging.
43 #  endif
44 #elif COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
45    /* do nothing - default */
46 #else
47 #  error COM_CURRENT_THREADING_MODEL No threading model selected
48 #endif
49
50
51 /// @brief list of all CPUDevices. for every hardware thread an instance of CPUDevice is created
52 static vector<CPUDevice *> g_cpudevices;
53
54 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
55 /// @brief list of all thread for every CPUDevice in cpudevices a thread exists
56 static ListBase g_cputhreads;
57 static bool g_cpuInitialized = false;
58 /// @brief all scheduled work for the cpu
59 static ThreadQueue *g_cpuqueue;
60 static ThreadQueue *g_gpuqueue;
61 #ifdef COM_OPENCL_ENABLED
62 static cl_context g_context;
63 static cl_program g_program;
64 /// @brief list of all OpenCLDevices. for every OpenCL GPU device an instance of OpenCLDevice is created
65 static vector<OpenCLDevice *> g_gpudevices;
66 /// @brief list of all thread for every GPUDevice in cpudevices a thread exists
67 static ListBase g_gputhreads;
68 /// @brief all scheduled work for the gpu
69 #ifdef COM_OPENCL_ENABLED
70 static bool g_openclActive = false;
71 static bool g_openclInitialized = false;
72 #endif
73 #endif
74 #endif
75
76 #define MAX_HIGHLIGHT 8
77 static bool g_highlightInitialized = false;
78 extern "C" {
79 int g_highlightIndex;
80 void **g_highlightedNodes;
81 void **g_highlightedNodesRead;
82
83 #define HIGHLIGHT(wp) \
84 { \
85         ExecutionGroup *group = wp->getExecutionGroup(); \
86         if (group->isComplex()) { \
87                 NodeOperation *operation = group->getOutputNodeOperation(); \
88                 if (operation->isWriteBufferOperation()) { \
89                         WriteBufferOperation *writeOperation = (WriteBufferOperation *)operation; \
90                         NodeOperation *complexOperation = writeOperation->getInput(); \
91                         bNode *node = complexOperation->getbNode(); \
92                         if (node) { \
93                                 if (node->original) { \
94                                         node = node->original; \
95                                 } \
96                                 if (g_highlightInitialized && g_highlightedNodes) { \
97                                         if (g_highlightIndex < MAX_HIGHLIGHT) { \
98                                                 g_highlightedNodes[g_highlightIndex++] = node; \
99                                         } \
100                                 } \
101                         } \
102                 } \
103         } \
104 }
105
106 void COM_startReadHighlights()
107 {
108         if (!g_highlightInitialized)
109         {
110                 return;
111         }
112         
113         if (g_highlightedNodesRead) 
114         {
115                 MEM_freeN(g_highlightedNodesRead);
116         }
117         
118         g_highlightedNodesRead = g_highlightedNodes;
119         g_highlightedNodes = (void **)MEM_callocN(sizeof(void *) * MAX_HIGHLIGHT, __func__);
120         g_highlightIndex = 0;
121 }
122
123 int COM_isHighlightedbNode(bNode *bnode)
124 {
125         if (!g_highlightInitialized) {
126                 return false;
127         }
128         
129         if (!g_highlightedNodesRead) {
130                 return false;
131         }
132
133         for (int i = 0; i < MAX_HIGHLIGHT; i++) {
134                 void *p = g_highlightedNodesRead[i];
135                 if (!p) return false;
136                 if (p == bnode) return true;
137         }
138         return false;
139 }
140 } // end extern "C"
141
142 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
143 void *WorkScheduler::thread_execute_cpu(void *data)
144 {
145         Device *device = (Device *)data;
146         WorkPackage *work;
147         
148         while ((work = (WorkPackage *)BLI_thread_queue_pop(g_cpuqueue))) {
149                 HIGHLIGHT(work);
150                 device->execute(work);
151                 delete work;
152         }
153         
154         return NULL;
155 }
156
157 void *WorkScheduler::thread_execute_gpu(void *data)
158 {
159         Device *device = (Device *)data;
160         WorkPackage *work;
161         
162         while ((work = (WorkPackage *)BLI_thread_queue_pop(g_gpuqueue))) {
163                 HIGHLIGHT(work);
164                 device->execute(work);
165                 delete work;
166         }
167         
168         return NULL;
169 }
170 #endif
171
172
173
174 void WorkScheduler::schedule(ExecutionGroup *group, int chunkNumber)
175 {
176         WorkPackage *package = new WorkPackage(group, chunkNumber);
177 #if COM_CURRENT_THREADING_MODEL == COM_TM_NOTHREAD
178         CPUDevice device;
179         device.execute(package);
180         delete package;
181 #elif COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
182 #ifdef COM_OPENCL_ENABLED
183         if (group->isOpenCL() && g_openclActive) {
184                 BLI_thread_queue_push(g_gpuqueue, package);
185         }
186         else {
187                 BLI_thread_queue_push(g_cpuqueue, package);
188         }
189 #else
190         BLI_thread_queue_push(cpuqueue, package);
191 #endif
192 #endif
193 }
194
195 void WorkScheduler::start(CompositorContext &context)
196 {
197 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
198         unsigned int index;
199         g_cpuqueue = BLI_thread_queue_init();
200         BLI_init_threads(&g_cputhreads, thread_execute_cpu, g_cpudevices.size());
201         for (index = 0; index < g_cpudevices.size(); index++) {
202                 Device *device = g_cpudevices[index];
203                 BLI_insert_thread(&g_cputhreads, device);
204         }
205 #ifdef COM_OPENCL_ENABLED
206         if (context.getHasActiveOpenCLDevices()) {
207                 g_gpuqueue = BLI_thread_queue_init();
208                 BLI_init_threads(&g_gputhreads, thread_execute_gpu, g_gpudevices.size());
209                 for (index = 0; index < g_gpudevices.size(); index++) {
210                         Device *device = g_gpudevices[index];
211                         BLI_insert_thread(&g_gputhreads, device);
212                 }
213                 g_openclActive = true;
214         }
215         else {
216                 g_openclActive = false;
217         }
218 #endif
219 #endif
220 }
221 void WorkScheduler::finish()
222 {
223 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
224 #ifdef COM_OPENCL_ENABLED
225         if (g_openclActive) {
226                 BLI_thread_queue_wait_finish(g_gpuqueue);
227                 BLI_thread_queue_wait_finish(g_cpuqueue);
228         }
229         else {
230                 BLI_thread_queue_wait_finish(g_cpuqueue);
231         }
232 #else
233         BLI_thread_queue_wait_finish(cpuqueue);
234 #endif
235 #endif
236 }
237 void WorkScheduler::stop()
238 {
239 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
240         BLI_thread_queue_nowait(g_cpuqueue);
241         BLI_end_threads(&g_cputhreads);
242         BLI_thread_queue_free(g_cpuqueue);
243         g_cpuqueue = NULL;
244 #ifdef COM_OPENCL_ENABLED
245         if (g_openclActive) {
246                 BLI_thread_queue_nowait(g_gpuqueue);
247                 BLI_end_threads(&g_gputhreads);
248                 BLI_thread_queue_free(g_gpuqueue);
249                 g_gpuqueue = NULL;
250         }
251 #endif
252 #endif
253 }
254
255 bool WorkScheduler::hasGPUDevices()
256 {
257 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
258 #ifdef COM_OPENCL_ENABLED
259         return g_gpudevices.size() > 0;
260 #else
261         return 0;
262 #endif
263 #else
264         return 0;
265 #endif
266 }
267
268 extern void clContextError(const char *errinfo, const void *private_info, size_t cb, void *user_data)
269 {
270         printf("OPENCL error: %s\n", errinfo);
271 }
272
273 void WorkScheduler::initialize(bool use_opencl)
274 {
275         /* initialize highlighting */
276         if (!g_highlightInitialized) {
277                 if (g_highlightedNodesRead) MEM_freeN(g_highlightedNodesRead);
278                 if (g_highlightedNodes)     MEM_freeN(g_highlightedNodes);
279
280                 g_highlightedNodesRead = NULL;
281                 g_highlightedNodes = NULL;
282
283                 COM_startReadHighlights();
284
285                 g_highlightInitialized = true;
286         }
287
288 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
289         /* initialize CPU threads */
290         if (!g_cpuInitialized) {
291                 int numberOfCPUThreads = BLI_system_thread_count();
292
293                 for (int index = 0; index < numberOfCPUThreads; index++) {
294                         CPUDevice *device = new CPUDevice();
295                         device->initialize();
296                         g_cpudevices.push_back(device);
297                 }
298
299                 g_cpuInitialized = true;
300         }
301
302 #ifdef COM_OPENCL_ENABLED
303         /* deinitialize OpenCL GPU's */
304         if (use_opencl && !g_openclInitialized) {
305                 g_context = NULL;
306                 g_program = NULL;
307
308                 OCL_init(); /* this will check and skip if already initialized */
309
310                 if (clCreateContextFromType) {
311                         cl_uint numberOfPlatforms = 0;
312                         cl_int error;
313                         error = clGetPlatformIDs(0, 0, &numberOfPlatforms);
314                         if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
315                         if (G.f & G_DEBUG) printf("%d number of platforms\n", numberOfPlatforms);
316                         cl_platform_id *platforms = (cl_platform_id *)MEM_mallocN(sizeof(cl_platform_id) * numberOfPlatforms, __func__);
317                         error = clGetPlatformIDs(numberOfPlatforms, platforms, 0);
318                         unsigned int indexPlatform;
319                         for (indexPlatform = 0; indexPlatform < numberOfPlatforms; indexPlatform++) {
320                                 cl_platform_id platform = platforms[indexPlatform];
321                                 cl_uint numberOfDevices = 0;
322                                 clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, 0, &numberOfDevices);
323                                 if (numberOfDevices <= 0)
324                                         continue;
325
326                                 cl_device_id *cldevices = (cl_device_id *)MEM_mallocN(sizeof(cl_device_id) * numberOfDevices, __func__);
327                                 clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numberOfDevices, cldevices, 0);
328
329                                 g_context = clCreateContext(NULL, numberOfDevices, cldevices, clContextError, NULL, &error);
330                                 if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error));  }
331                                 const char *cl_str[2] = {datatoc_COM_OpenCLKernels_cl, NULL};
332                                 g_program = clCreateProgramWithSource(g_context, 1, cl_str, 0, &error);
333                                 error = clBuildProgram(g_program, numberOfDevices, cldevices, 0, 0, 0);
334                                 if (error != CL_SUCCESS) { 
335                                         cl_int error2;
336                                         size_t ret_val_size = 0;
337                                         printf("CLERROR[%d]: %s\n", error, clewErrorString(error));     
338                                         error2 = clGetProgramBuildInfo(g_program, cldevices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
339                                         if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
340                                         char *build_log = (char *)MEM_mallocN(sizeof(char) * ret_val_size + 1, __func__);
341                                         error2 = clGetProgramBuildInfo(g_program, cldevices[0], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
342                                         if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
343                                         build_log[ret_val_size] = '\0';
344                                         printf("%s", build_log);
345                                         MEM_freeN(build_log);
346                                 }
347                                 else {
348                                         unsigned int indexDevices;
349                                         for (indexDevices = 0; indexDevices < numberOfDevices; indexDevices++) {
350                                                 cl_device_id device = cldevices[indexDevices];
351                                                 cl_int vendorID = 0;
352                                                 cl_int error2 = clGetDeviceInfo(device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, NULL);
353                                                 if (error2 != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error2, clewErrorString(error2)); }
354                                                 OpenCLDevice *clDevice = new OpenCLDevice(g_context, device, g_program, vendorID);
355                                                 clDevice->initialize();
356                                                 g_gpudevices.push_back(clDevice);
357                                         }
358                                 }
359                                 MEM_freeN(cldevices);
360                         }
361                         MEM_freeN(platforms);
362                 }
363
364                 g_openclInitialized = true;
365         }
366 #endif
367 #endif
368 }
369
370 void WorkScheduler::deinitialize()
371 {
372 #if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
373         /* deinitialize CPU threads */
374         if (g_cpuInitialized) {
375                 Device *device;
376                 while (g_cpudevices.size() > 0) {
377                         device = g_cpudevices.back();
378                         g_cpudevices.pop_back();
379                         device->deinitialize();
380                         delete device;
381                 }
382
383                 g_cpuInitialized = false;
384         }
385
386 #ifdef COM_OPENCL_ENABLED
387         /* deinitialize OpenCL GPU's */
388         if (g_openclInitialized) {
389                 Device *device;
390                 while (g_gpudevices.size() > 0) {
391                         device = g_gpudevices.back();
392                         g_gpudevices.pop_back();
393                         device->deinitialize();
394                         delete device;
395                 }
396                 if (g_program) {
397                         clReleaseProgram(g_program);
398                         g_program = NULL;
399                 }
400                 if (g_context) {
401                         clReleaseContext(g_context);
402                         g_context = NULL;
403                 }
404
405                 g_openclInitialized = false;
406         }
407 #endif
408 #endif
409
410         /* deinitialize highlighting */
411         if (g_highlightInitialized) {
412                 g_highlightInitialized = false;
413                 if (g_highlightedNodes) {
414                         MEM_freeN(g_highlightedNodes);
415                         g_highlightedNodes = NULL;
416                 }
417
418                 if (g_highlightedNodesRead) {
419                         MEM_freeN(g_highlightedNodesRead);
420                         g_highlightedNodesRead = NULL;
421                 }
422         }
423 }
424