== SoC Bullet - Bullet Upgrade to 2.76 ==
[blender.git] / extern / bullet2 / BulletMultiThreaded / MiniCL.cpp
1 /*
2    Copyright (C) 2010 Sony Computer Entertainment Inc.
3    All rights reserved.
4
5 This software is provided 'as-is', without any express or implied warranty.
6 In no event will the authors be held liable for any damages arising from the use of this software.
7 Permission is granted to anyone to use this software for any purpose, 
8 including commercial applications, and to alter it and redistribute it freely, 
9 subject to the following restrictions:
10
11 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
12 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 3. This notice may not be removed or altered from any source distribution.
14
15 */
16
17
18 #include "MiniCL/cl.h"
19 #define __PHYSICS_COMMON_H__ 1
20 #ifdef _WIN32
21 #include "BulletMultiThreaded/Win32ThreadSupport.h"
22 #endif
23
24 #include "BulletMultiThreaded/SequentialThreadSupport.h"
25 #include "MiniCLTaskScheduler.h"
26 #include "MiniCLTask/MiniCLTask.h"
27 #include "LinearMath/btMinMax.h"
28
29 //#define DEBUG_MINICL_KERNELS 1
30
31
32
33
34 CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
35         cl_device_id            device ,
36         cl_device_info          param_name ,
37         size_t                  param_value_size ,
38         void *                  param_value ,
39         size_t *                /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
40 {
41
42         switch (param_name)
43         {
44         case CL_DEVICE_NAME:
45                 {
46                         char deviceName[] = "CPU";
47                         unsigned int nameLen = strlen(deviceName)+1;
48                         assert(param_value_size>strlen(deviceName));
49                         if (nameLen < param_value_size)
50                         {
51                                 const char* cpuName = "CPU";
52                                 sprintf((char*)param_value,"%s",cpuName);
53                         } else
54                         {
55                                 printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size);
56                         }
57                         break;
58                 }
59         case CL_DEVICE_TYPE:
60                 {
61                         if (param_value_size>=sizeof(cl_device_type))
62                         {
63                                 cl_device_type* deviceType = (cl_device_type*)param_value;
64                                 *deviceType = CL_DEVICE_TYPE_CPU;
65                         } else
66                         {
67                                 printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type));
68                         }
69                         break;
70                 }
71         case CL_DEVICE_MAX_COMPUTE_UNITS:
72                 {
73                         if (param_value_size>=sizeof(cl_uint))
74                         {
75                                 cl_uint* numUnits = (cl_uint*)param_value;
76                                 *numUnits= 4;
77                         } else
78                         {
79                                 printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
80                         }
81
82                         break;
83                 }
84         case CL_DEVICE_MAX_WORK_ITEM_SIZES:
85                 {
86                         size_t workitem_size[3];
87
88                         if (param_value_size>=sizeof(workitem_size))
89                         {
90                                 size_t* workItemSize = (size_t*)param_value;
91                                 workItemSize[0] = 64;
92                                 workItemSize[1] = 24;
93                                 workItemSize[2] = 16;
94                         } else
95                         {
96                                 printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
97                         }
98                         break;
99                 }
100         case CL_DEVICE_MAX_CLOCK_FREQUENCY:
101                 {
102                          cl_uint* clock_frequency = (cl_uint*)param_value;
103                          *clock_frequency = 3*1024;
104                         break;
105                 }
106         default:
107                 {
108                         printf("error: unsupported param_name:%d\n",param_name);
109                 }
110         }
111
112
113         return 0;
114 }
115
116 CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0
117 {
118         return 0;
119 }
120
121
122
123 CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
124 {
125         return 0;
126 }
127
128 CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0
129 {
130         return 0;
131 }
132
133 CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel   /* kernel */) CL_API_SUFFIX__VERSION_1_0
134 {
135         return 0;
136 }
137
138
139 // Enqueued Commands APIs
140 CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue     command_queue ,
141                     cl_mem               buffer ,
142                     cl_bool             /* blocking_read */,
143                     size_t               offset ,
144                     size_t               cb , 
145                     void *               ptr ,
146                     cl_uint             /* num_events_in_wait_list */,
147                     const cl_event *    /* event_wait_list */,
148                     cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0
149 {
150         MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
151
152         ///wait for all work items to be completed
153         scheduler->flush();
154
155         memcpy(ptr,(char*)buffer + offset,cb);
156         return 0;
157 }
158
159
160 CL_API_ENTRY cl_int clGetProgramBuildInfo(cl_program            /* program */,
161                       cl_device_id          /* device */,
162                       cl_program_build_info /* param_name */,
163                       size_t                /* param_value_size */,
164                       void *                /* param_value */,
165                       size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
166 {
167
168         return 0;
169 }
170
171
172 // Program Object APIs
173 CL_API_ENTRY cl_program
174 clCreateProgramWithSource(cl_context         context ,
175                           cl_uint           /* count */,
176                           const char **     /* strings */,
177                           const size_t *    /* lengths */,
178                           cl_int *          errcode_ret ) CL_API_SUFFIX__VERSION_1_0
179 {
180         *errcode_ret = CL_SUCCESS;
181         return (cl_program)context;
182 }
183
184 CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue     command_queue ,
185                     cl_mem               buffer ,
186                     cl_bool             /* blocking_read */,
187                     size_t              offset,
188                     size_t               cb , 
189                     const void *         ptr ,
190                     cl_uint             /* num_events_in_wait_list */,
191                     const cl_event *    /* event_wait_list */,
192                     cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0
193 {
194         MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
195
196         ///wait for all work items to be completed
197         scheduler->flush();
198
199         memcpy((char*)buffer + offset, ptr,cb);
200         return 0;
201 }
202
203 CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue  command_queue)
204 {
205         MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
206         ///wait for all work items to be completed
207         scheduler->flush();
208         return 0;
209 }
210
211
212 CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
213                        cl_kernel         clKernel ,
214                        cl_uint           work_dim ,
215                        const size_t *   /* global_work_offset */,
216                        const size_t *    global_work_size ,
217                        const size_t *   /* local_work_size */,
218                        cl_uint          /* num_events_in_wait_list */,
219                        const cl_event * /* event_wait_list */,
220                        cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0
221 {
222
223         
224         MiniCLKernel* kernel = (MiniCLKernel*) clKernel;
225         for (unsigned int ii=0;ii<work_dim;ii++)
226         {
227                 int maxTask = kernel->m_scheduler->getMaxNumOutstandingTasks();
228                 int numWorkItems = global_work_size[ii];
229
230 //              //at minimum 64 work items per task
231 //              int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask);
232                 int numWorkItemsPerTask = numWorkItems / maxTask;
233                 if (!numWorkItemsPerTask) numWorkItemsPerTask = 1;
234
235                 for (int t=0;t<numWorkItems;)
236                 {
237                         //Performance Hint: tweak this number during benchmarking
238                         int endIndex = (t+numWorkItemsPerTask) < numWorkItems ? t+numWorkItemsPerTask : numWorkItems;
239                         kernel->m_scheduler->issueTask(t, endIndex, kernel);
240                         t = endIndex;
241                 }
242         }
243 /*
244
245         void* bla = 0;
246
247         scheduler->issueTask(bla,2,3);
248         scheduler->flush();
249
250         */
251
252         return 0;
253 }
254
255 #define LOCAL_BUF_SIZE 32768
256 static int sLocalMemBuf[LOCAL_BUF_SIZE * 4 + 16];
257 static int* spLocalBufCurr = NULL;
258 static int sLocalBufUsed = LOCAL_BUF_SIZE; // so it will be reset at the first call
259 static void* localBufMalloc(int size)
260 {
261         int size16 = (size + 15) >> 4; // in 16-byte units
262         if((sLocalBufUsed + size16) > LOCAL_BUF_SIZE)
263         { // reset
264                 spLocalBufCurr = sLocalMemBuf;
265                 while((long)spLocalBufCurr & 0x0F) spLocalBufCurr++; // align to 16 bytes
266                 sLocalBufUsed = 0;
267         }
268         void* ret = spLocalBufCurr;
269         spLocalBufCurr += size16 * 4;
270         sLocalBufUsed += size;
271         return ret;
272 }
273
274
275
276 CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel    clKernel ,
277                cl_uint      arg_index ,
278                size_t       arg_size ,
279                const void *  arg_value ) CL_API_SUFFIX__VERSION_1_0
280 {
281         MiniCLKernel* kernel = (MiniCLKernel* ) clKernel;
282         btAssert(arg_size <= MINICL_MAX_ARGLENGTH);
283         if (arg_index>MINI_CL_MAX_ARG)
284         {
285                 printf("error: clSetKernelArg arg_index (%d) exceeds %d\n",arg_index,MINI_CL_MAX_ARG);
286         } else
287         {
288 //              if (arg_size>=MINICL_MAX_ARGLENGTH)
289                 if (arg_size != MINICL_MAX_ARGLENGTH)
290                 {
291                         printf("error: clSetKernelArg argdata too large: %d (maximum is %d)\n",arg_size,MINICL_MAX_ARGLENGTH);
292                 } 
293                 else
294                 {
295                         if(arg_value == NULL)
296                         {       // this is only for __local memory qualifier
297                                 void* ptr = localBufMalloc(arg_size);
298                                 kernel->m_argData[arg_index] = ptr;
299                         }
300                         else
301                         {
302                                 memcpy(&(kernel->m_argData[arg_index]), arg_value, arg_size);
303                         }
304                         kernel->m_argSizes[arg_index] = arg_size;
305                         if(arg_index >= kernel->m_numArgs)
306                         {
307                                 kernel->m_numArgs = arg_index + 1;
308                                 kernel->updateLauncher();
309                         }
310                 }
311         }
312         return 0;
313 }
314
315 // Kernel Object APIs
316 CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program       program ,
317                const char *     kernel_name ,
318                cl_int *         errcode_ret ) CL_API_SUFFIX__VERSION_1_0
319 {
320         MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program;
321         MiniCLKernel* kernel = new MiniCLKernel();
322         int nameLen = strlen(kernel_name);
323         if(nameLen >= MINI_CL_MAX_KERNEL_NAME)
324         {
325                 *errcode_ret = CL_INVALID_KERNEL_NAME;
326                 return NULL;
327         }
328         strcpy(kernel->m_name, kernel_name);
329         kernel->m_numArgs = 0;
330
331         //kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name);
332         //if (kernel->m_kernelProgramCommandId>=0)
333         //{
334         //      *errcode_ret = CL_SUCCESS;
335         //} else
336         //{
337         //      *errcode_ret = CL_INVALID_KERNEL_NAME;
338         //}
339         kernel->m_scheduler = scheduler;
340         if(kernel->registerSelf() == NULL)
341         {
342                 *errcode_ret = CL_INVALID_KERNEL_NAME;
343                 return NULL;
344         }
345         else
346         {
347                 *errcode_ret = CL_SUCCESS;
348         }
349
350         return (cl_kernel)kernel;
351
352 }
353
354
355 CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program           /* program */,
356                cl_uint              /* num_devices */,
357                const cl_device_id * /* device_list */,
358                const char *         /* options */, 
359                void (*pfn_notify)(cl_program /* program */, void * /* user_data */),
360                void *               /* user_data */) CL_API_SUFFIX__VERSION_1_0
361 {
362         return CL_SUCCESS;
363 }
364
365 CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context                     context ,
366                           cl_uint                        /* num_devices */,
367                           const cl_device_id *           /* device_list */,
368                           const size_t *                 /* lengths */,
369                           const unsigned char **         /* binaries */,
370                           cl_int *                       /* binary_status */,
371                           cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
372 {
373         return (cl_program)context;
374 }
375
376
377 // Memory Object APIs
378 CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context   /* context */,
379                cl_mem_flags flags ,
380                size_t       size,
381                void *       host_ptr ,
382                cl_int *     errcode_ret ) CL_API_SUFFIX__VERSION_1_0
383 {
384         cl_mem buf = (cl_mem)malloc(size);
385         if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr)
386         {
387                 memcpy(buf,host_ptr,size);
388         }
389         *errcode_ret = 0;
390         return buf;
391 }
392
393 // Command Queue APIs
394 CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context                      context , 
395                      cl_device_id                   /* device */, 
396                      cl_command_queue_properties    /* properties */,
397                      cl_int *                        errcode_ret ) CL_API_SUFFIX__VERSION_1_0
398 {
399         *errcode_ret = 0;
400         return (cl_command_queue) context;
401 }
402
403 extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context         /* context */, 
404                  cl_context_info    param_name , 
405                  size_t             param_value_size , 
406                  void *             param_value, 
407                  size_t *           param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0
408 {
409
410         switch (param_name)
411         {
412         case CL_CONTEXT_DEVICES:
413                 {
414                         if (!param_value_size)
415                         {
416                                 *param_value_size_ret = 13;
417                         } else
418                         {
419                                 const char* testName = "MiniCL_Test.";
420                                 sprintf((char*)param_value,"%s",testName);
421                         }
422                         break;
423                 };
424         default:
425                 {
426                         printf("unsupported\n");
427                 }
428         }
429         
430         return 0;
431 }
432
433 CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */,
434                         cl_device_type          /* device_type */,
435                         void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
436                         void *                  /* user_data */,
437                         cl_int *                 errcode_ret ) CL_API_SUFFIX__VERSION_1_0
438 {
439         int maxNumOutstandingTasks = 4;
440 //      int maxNumOutstandingTasks = 2;
441 //      int maxNumOutstandingTasks = 1;
442         gMiniCLNumOutstandingTasks = maxNumOutstandingTasks;
443         const int maxNumOfThreadSupports = 8;
444         static int sUniqueThreadSupportIndex = 0;
445         static char* sUniqueThreadSupportName[maxNumOfThreadSupports] = 
446         {
447                 "MiniCL_0", "MiniCL_1", "MiniCL_2", "MiniCL_3", "MiniCL_4", "MiniCL_5", "MiniCL_6", "MiniCL_7" 
448         };
449
450 #ifdef DEBUG_MINICL_KERNELS
451         SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
452         SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc);
453 #else
454
455 #if _WIN32
456         btAssert(sUniqueThreadSupportIndex < maxNumOfThreadSupports);
457         Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
458 //                                                              "MiniCL",
459                                                                 sUniqueThreadSupportName[sUniqueThreadSupportIndex++],
460                                                                 processMiniCLTask, //processCollisionTask,
461                                                                 createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory,
462                                                                 maxNumOutstandingTasks));
463 #else
464         ///todo: add posix thread support for other platforms
465         SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
466         SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc);
467 #endif
468
469 #endif //DEBUG_MINICL_KERNELS
470         
471         
472         MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks);
473
474         *errcode_ret = 0;
475         return (cl_context)scheduler;
476 }
477
478 CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context  context ) CL_API_SUFFIX__VERSION_1_0
479 {
480
481         MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context;
482         
483         btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface();
484         delete scheduler;
485         delete threadSupport;
486         
487         return 0;
488 }
489 extern CL_API_ENTRY cl_int CL_API_CALL
490 clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
491 {
492         return CL_SUCCESS;
493 }
494
495
496 extern CL_API_ENTRY cl_int CL_API_CALL
497 clGetKernelWorkGroupInfo(cl_kernel                   kernel ,
498                          cl_device_id               /* device */,
499                          cl_kernel_work_group_info  wgi/* param_name */,
500                          size_t   sz                  /* param_value_size */,
501                          void *     ptr                /* param_value */,
502                          size_t *                   /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
503 {
504         if((wgi == CL_KERNEL_WORK_GROUP_SIZE)
505          &&(sz == sizeof(int))
506          &&(ptr != NULL))
507         {
508                 MiniCLKernel* miniCLKernel = (MiniCLKernel*)kernel;
509                 MiniCLTaskScheduler* scheduler = miniCLKernel->m_scheduler;
510                 *((int*)ptr) = scheduler->getMaxNumOutstandingTasks();
511                 return CL_SUCCESS;
512         }
513         else
514         {
515                 return CL_INVALID_VALUE;
516         }
517 }