47d099a1f949771d90a32e383f3cba0d32f6f093
[blender-staging.git] / intern / cycles / device / opencl / opencl_util.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifdef WITH_OPENCL
18
19 #include "device/opencl/opencl.h"
20
21 #include "util/util_logging.h"
22 #include "util/util_md5.h"
23 #include "util/util_path.h"
24 #include "util/util_time.h"
25
26 using std::cerr;
27 using std::endl;
28
29 CCL_NAMESPACE_BEGIN
30
31 OpenCLCache::Slot::ProgramEntry::ProgramEntry()
32  : program(NULL),
33    mutex(NULL)
34 {
35 }
36
37 OpenCLCache::Slot::ProgramEntry::ProgramEntry(const ProgramEntry& rhs)
38  : program(rhs.program),
39    mutex(NULL)
40 {
41 }
42
43 OpenCLCache::Slot::ProgramEntry::~ProgramEntry()
44 {
45         delete mutex;
46 }
47
48 OpenCLCache::Slot::Slot()
49  : context_mutex(NULL),
50    context(NULL)
51 {
52 }
53
54 OpenCLCache::Slot::Slot(const Slot& rhs)
55  : context_mutex(NULL),
56    context(NULL),
57    programs(rhs.programs)
58 {
59 }
60
61 OpenCLCache::Slot::~Slot()
62 {
63         delete context_mutex;
64 }
65
66 OpenCLCache& OpenCLCache::global_instance()
67 {
68         static OpenCLCache instance;
69         return instance;
70 }
71
72 cl_context OpenCLCache::get_context(cl_platform_id platform,
73                                     cl_device_id device,
74                                     thread_scoped_lock& slot_locker)
75 {
76         assert(platform != NULL);
77
78         OpenCLCache& self = global_instance();
79
80         thread_scoped_lock cache_lock(self.cache_lock);
81
82         pair<CacheMap::iterator,bool> ins = self.cache.insert(
83                 CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
84
85         Slot &slot = ins.first->second;
86
87         /* create slot lock only while holding cache lock */
88         if(!slot.context_mutex)
89                 slot.context_mutex = new thread_mutex;
90
91         /* need to unlock cache before locking slot, to allow store to complete */
92         cache_lock.unlock();
93
94         /* lock the slot */
95         slot_locker = thread_scoped_lock(*slot.context_mutex);
96
97         /* If the thing isn't cached */
98         if(slot.context == NULL) {
99                 /* return with the caller's lock holder holding the slot lock */
100                 return NULL;
101         }
102
103         /* the item was already cached, release the slot lock */
104         slot_locker.unlock();
105
106         cl_int ciErr = clRetainContext(slot.context);
107         assert(ciErr == CL_SUCCESS);
108         (void)ciErr;
109
110         return slot.context;
111 }
112
113 cl_program OpenCLCache::get_program(cl_platform_id platform,
114                                     cl_device_id device,
115                                     ustring key,
116                                     thread_scoped_lock& slot_locker)
117 {
118         assert(platform != NULL);
119
120         OpenCLCache& self = global_instance();
121
122         thread_scoped_lock cache_lock(self.cache_lock);
123
124         pair<CacheMap::iterator,bool> ins = self.cache.insert(
125                 CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
126
127         Slot &slot = ins.first->second;
128
129         pair<Slot::EntryMap::iterator,bool> ins2 = slot.programs.insert(
130                 Slot::EntryMap::value_type(key, Slot::ProgramEntry()));
131
132         Slot::ProgramEntry &entry = ins2.first->second;
133
134         /* create slot lock only while holding cache lock */
135         if(!entry.mutex)
136                 entry.mutex = new thread_mutex;
137
138         /* need to unlock cache before locking slot, to allow store to complete */
139         cache_lock.unlock();
140
141         /* lock the slot */
142         slot_locker = thread_scoped_lock(*entry.mutex);
143
144         /* If the thing isn't cached */
145         if(entry.program == NULL) {
146                 /* return with the caller's lock holder holding the slot lock */
147                 return NULL;
148         }
149
150         /* the item was already cached, release the slot lock */
151         slot_locker.unlock();
152
153         cl_int ciErr = clRetainProgram(entry.program);
154         assert(ciErr == CL_SUCCESS);
155         (void)ciErr;
156
157         return entry.program;
158 }
159
160 void OpenCLCache::store_context(cl_platform_id platform,
161                                 cl_device_id device,
162                                 cl_context context,
163                                 thread_scoped_lock& slot_locker)
164 {
165         assert(platform != NULL);
166         assert(device != NULL);
167         assert(context != NULL);
168
169         OpenCLCache &self = global_instance();
170
171         thread_scoped_lock cache_lock(self.cache_lock);
172         CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
173         cache_lock.unlock();
174
175         Slot &slot = i->second;
176
177         /* sanity check */
178         assert(i != self.cache.end());
179         assert(slot.context == NULL);
180
181         slot.context = context;
182
183         /* unlock the slot */
184         slot_locker.unlock();
185
186         /* increment reference count in OpenCL.
187          * The caller is going to release the object when done with it. */
188         cl_int ciErr = clRetainContext(context);
189         assert(ciErr == CL_SUCCESS);
190         (void)ciErr;
191 }
192
193 void OpenCLCache::store_program(cl_platform_id platform,
194                                 cl_device_id device,
195                                 cl_program program,
196                                 ustring key,
197                                 thread_scoped_lock& slot_locker)
198 {
199         assert(platform != NULL);
200         assert(device != NULL);
201         assert(program != NULL);
202
203         OpenCLCache &self = global_instance();
204
205         thread_scoped_lock cache_lock(self.cache_lock);
206
207         CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
208         assert(i != self.cache.end());
209         Slot &slot = i->second;
210
211         Slot::EntryMap::iterator i2 = slot.programs.find(key);
212         assert(i2 != slot.programs.end());
213         Slot::ProgramEntry &entry = i2->second;
214
215         assert(entry.program == NULL);
216
217         cache_lock.unlock();
218
219         entry.program = program;
220
221         /* unlock the slot */
222         slot_locker.unlock();
223
224         /* Increment reference count in OpenCL.
225          * The caller is going to release the object when done with it.
226          */
227         cl_int ciErr = clRetainProgram(program);
228         assert(ciErr == CL_SUCCESS);
229         (void)ciErr;
230 }
231
232 string OpenCLCache::get_kernel_md5()
233 {
234         OpenCLCache &self = global_instance();
235         thread_scoped_lock lock(self.kernel_md5_lock);
236
237         if(self.kernel_md5.empty()) {
238                 self.kernel_md5 = path_files_md5_hash(path_get("source"));
239         }
240         return self.kernel_md5;
241 }
242
243 OpenCLDeviceBase::OpenCLProgram::OpenCLProgram(OpenCLDeviceBase *device,
244                                                const string& program_name,
245                                                const string& kernel_file,
246                                                const string& kernel_build_options,
247                                                bool use_stdout)
248  : device(device),
249    program_name(program_name),
250    kernel_file(kernel_file),
251    kernel_build_options(kernel_build_options),
252    use_stdout(use_stdout)
253 {
254         loaded = false;
255         program = NULL;
256 }
257
258 OpenCLDeviceBase::OpenCLProgram::~OpenCLProgram()
259 {
260         release();
261 }
262
263 void OpenCLDeviceBase::OpenCLProgram::release()
264 {
265         for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
266                 if(kernel->second) {
267                         clReleaseKernel(kernel->second);
268                         kernel->second = NULL;
269                 }
270         }
271         if(program) {
272                 clReleaseProgram(program);
273                 program = NULL;
274         }
275 }
276
277 void OpenCLDeviceBase::OpenCLProgram::add_log(const string& msg, bool debug)
278 {
279         if(!use_stdout) {
280                 log += msg + "\n";
281         }
282         else if(!debug) {
283                 printf("%s\n", msg.c_str());
284                 fflush(stdout);
285         }
286         else {
287                 VLOG(2) << msg;
288         }
289 }
290
291 void OpenCLDeviceBase::OpenCLProgram::add_error(const string& msg)
292 {
293         if(use_stdout) {
294                 fprintf(stderr, "%s\n", msg.c_str());
295         }
296         if(error_msg == "") {
297                 error_msg += "\n";
298         }
299         error_msg += msg;
300 }
301
302 void OpenCLDeviceBase::OpenCLProgram::add_kernel(ustring name)
303 {
304         if(!kernels.count(name)) {
305                 kernels[name] = NULL;
306         }
307 }
308
309 bool OpenCLDeviceBase::OpenCLProgram::build_kernel(const string *debug_src)
310 {
311         string build_options;
312         build_options = device->kernel_build_options(debug_src) + kernel_build_options;
313
314         VLOG(1) << "Build options passed to clBuildProgram: '"
315                 << build_options << "'.";
316         cl_int ciErr = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
317
318         /* show warnings even if build is successful */
319         size_t ret_val_size = 0;
320
321         clGetProgramBuildInfo(program, device->cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
322
323         if(ciErr != CL_SUCCESS) {
324                 add_error(string("OpenCL build failed with error ") + clewErrorString(ciErr) + ", errors in console.");
325         }
326
327         if(ret_val_size > 1) {
328                 vector<char> build_log(ret_val_size + 1);
329                 clGetProgramBuildInfo(program, device->cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, &build_log[0], NULL);
330
331                 build_log[ret_val_size] = '\0';
332                 /* Skip meaningless empty output from the NVidia compiler. */
333                 if(!(ret_val_size == 2 && build_log[0] == '\n')) {
334                         add_log(string("OpenCL program ") + program_name + " build output: " + string(&build_log[0]), ciErr == CL_SUCCESS);
335                 }
336         }
337
338         return (ciErr == CL_SUCCESS);
339 }
340
341 bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src)
342 {
343         string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
344         /* We compile kernels consisting of many files. unfortunately OpenCL
345          * kernel caches do not seem to recognize changes in included files.
346          * so we force recompile on changes by adding the md5 hash of all files.
347          */
348         source = path_source_replace_includes(source, path_get("source"));
349         source += "\n// " + util_md5_string(source) + "\n";
350
351         if(debug_src) {
352                 path_write_text(*debug_src, source);
353         }
354
355         size_t source_len = source.size();
356         const char *source_str = source.c_str();
357         cl_int ciErr;
358
359         program = clCreateProgramWithSource(device->cxContext,
360                                             1,
361                                             &source_str,
362                                             &source_len,
363                                             &ciErr);
364
365         if(ciErr != CL_SUCCESS) {
366                 add_error(string("OpenCL program creation failed: ") + clewErrorString(ciErr));
367                 return false;
368         }
369
370         double starttime = time_dt();
371         add_log(string("Compiling OpenCL program ") + program_name.c_str(), false);
372         add_log(string("Build flags: ") + kernel_build_options, true);
373
374         if(!build_kernel(debug_src))
375                 return false;
376
377         add_log(string("Kernel compilation of ") + program_name + " finished in " + string_printf("%.2lfs.\n", time_dt() - starttime), false);
378
379         return true;
380 }
381
382 bool OpenCLDeviceBase::OpenCLProgram::load_binary(const string& clbin,
383                                                   const string *debug_src)
384 {
385         /* read binary into memory */
386         vector<uint8_t> binary;
387
388         if(!path_read_binary(clbin, binary)) {
389                 add_error(string_printf("OpenCL failed to read cached binary %s.", clbin.c_str()));
390                 return false;
391         }
392
393         /* create program */
394         cl_int status, ciErr;
395         size_t size = binary.size();
396         const uint8_t *bytes = &binary[0];
397
398         program = clCreateProgramWithBinary(device->cxContext, 1, &device->cdDevice,
399                 &size, &bytes, &status, &ciErr);
400
401         if(status != CL_SUCCESS || ciErr != CL_SUCCESS) {
402                 add_error(string("OpenCL failed create program from cached binary ") + clbin + ": "
403                                  + clewErrorString(status) + " " + clewErrorString(ciErr));
404                 return false;
405         }
406
407         if(!build_kernel(debug_src))
408                 return false;
409
410         return true;
411 }
412
413 bool OpenCLDeviceBase::OpenCLProgram::save_binary(const string& clbin)
414 {
415         size_t size = 0;
416         clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
417
418         if(!size)
419                 return false;
420
421         vector<uint8_t> binary(size);
422         uint8_t *bytes = &binary[0];
423
424         clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);
425
426         return path_write_binary(clbin, binary);
427 }
428
429 void OpenCLDeviceBase::OpenCLProgram::load()
430 {
431         assert(device);
432
433         loaded = false;
434
435         string device_md5 = device->device_md5_hash(kernel_build_options);
436
437         /* Try to use cached kernel. */
438         thread_scoped_lock cache_locker;
439         ustring cache_key(program_name + device_md5);
440         program = device->load_cached_kernel(cache_key,
441                                              cache_locker);
442
443         if(!program) {
444                 add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
445
446                 /* need to create source to get md5 */
447                 string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
448                 source = path_source_replace_includes(source, path_get("source"));
449
450                 string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source);
451                 basename = path_cache_get(path_join("kernels", basename));
452                 string clbin = basename + ".clbin";
453
454                 /* path to preprocessed source for debugging */
455                 string clsrc, *debug_src = NULL;
456
457                 if(OpenCLInfo::use_debug()) {
458                         clsrc = basename + ".cl";
459                         debug_src = &clsrc;
460                 }
461
462                 /* If binary kernel exists already, try use it. */
463                 if(path_exists(clbin) && load_binary(clbin)) {
464                         /* Kernel loaded from binary, nothing to do. */
465                         add_log(string("Loaded program from ") + clbin + ".", true);
466                 }
467                 else {
468                         add_log(string("Kernel file ") + clbin + " either doesn't exist or failed to be loaded by driver.", true);
469
470                         /* If does not exist or loading binary failed, compile kernel. */
471                         if(!compile_kernel(debug_src)) {
472                                 return;
473                         }
474
475                         /* Save binary for reuse. */
476                         if(!save_binary(clbin)) {
477                                 add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
478                         }
479                 }
480
481                 /* Cache the program. */
482                 device->store_cached_kernel(program,
483                                             cache_key,
484                                             cache_locker);
485         }
486         else {
487                 add_log(string("Found cached OpenCL program ") + program_name + ".", true);
488         }
489
490         for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
491                 assert(kernel->second == NULL);
492                 cl_int ciErr;
493                 string name = "kernel_ocl_" + kernel->first.string();
494                 kernel->second = clCreateKernel(program, name.c_str(), &ciErr);
495                 if(device->opencl_error(ciErr)) {
496                         add_error(string("Error getting kernel ") + name + " from program " + program_name + ": " + clewErrorString(ciErr));
497                         return;
498                 }
499         }
500
501         loaded = true;
502 }
503
504 void OpenCLDeviceBase::OpenCLProgram::report_error()
505 {
506         /* If loaded is true, there was no error. */
507         if(loaded) return;
508         /* if use_stdout is true, the error was already reported. */
509         if(use_stdout) return;
510
511         cerr << error_msg << endl;
512         if(!compile_output.empty()) {
513                 cerr << "OpenCL kernel build output for " << program_name << ":" << endl;
514                 cerr << compile_output << endl;
515         }
516 }
517
518 cl_kernel OpenCLDeviceBase::OpenCLProgram::operator()()
519 {
520         assert(kernels.size() == 1);
521         return kernels.begin()->second;
522 }
523
524 cl_kernel OpenCLDeviceBase::OpenCLProgram::operator()(ustring name)
525 {
526         assert(kernels.count(name));
527         return kernels[name];
528 }
529
530 cl_device_type OpenCLInfo::device_type()
531 {
532         switch(DebugFlags().opencl.device_type)
533         {
534                 case DebugFlags::OpenCL::DEVICE_NONE:
535                         return 0;
536                 case DebugFlags::OpenCL::DEVICE_ALL:
537                         return CL_DEVICE_TYPE_ALL;
538                 case DebugFlags::OpenCL::DEVICE_DEFAULT:
539                         return CL_DEVICE_TYPE_DEFAULT;
540                 case DebugFlags::OpenCL::DEVICE_CPU:
541                         return CL_DEVICE_TYPE_CPU;
542                 case DebugFlags::OpenCL::DEVICE_GPU:
543                         return CL_DEVICE_TYPE_GPU;
544                 case DebugFlags::OpenCL::DEVICE_ACCELERATOR:
545                         return CL_DEVICE_TYPE_ACCELERATOR;
546                 default:
547                         return CL_DEVICE_TYPE_ALL;
548         }
549 }
550
551 bool OpenCLInfo::use_debug()
552 {
553         return DebugFlags().opencl.debug;
554 }
555
556 bool OpenCLInfo::use_single_program()
557 {
558         return DebugFlags().opencl.single_program;
559 }
560
561 bool OpenCLInfo::kernel_use_advanced_shading(const string& platform)
562 {
563         /* keep this in sync with kernel_types.h! */
564         if(platform == "NVIDIA CUDA")
565                 return true;
566         else if(platform == "Apple")
567                 return true;
568         else if(platform == "AMD Accelerated Parallel Processing")
569                 return true;
570         else if(platform == "Intel(R) OpenCL")
571                 return true;
572         /* Make sure officially unsupported OpenCL platforms
573          * does not set up to use advanced shading.
574          */
575         return false;
576 }
577
578 bool OpenCLInfo::kernel_use_split(const string& platform_name,
579                                   const cl_device_type device_type)
580 {
581         if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_SPLIT) {
582                 VLOG(1) << "Forcing split kernel to use.";
583                 return true;
584         }
585         if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_MEGA) {
586                 VLOG(1) << "Forcing mega kernel to use.";
587                 return false;
588         }
589         /* TODO(sergey): Replace string lookups with more enum-like API,
590          * similar to device/vendor checks blender's gpu.
591          */
592         if(platform_name == "AMD Accelerated Parallel Processing" &&
593            device_type == CL_DEVICE_TYPE_GPU)
594         {
595                 return true;
596         }
597         return false;
598 }
599
600 bool OpenCLInfo::device_supported(const string& platform_name,
601                                   const cl_device_id device_id)
602 {
603         cl_device_type device_type;
604         if(!get_device_type(device_id, &device_type)) {
605                 return false;
606         }
607         string device_name;
608         if(!get_device_name(device_id, &device_name)) {
609                 return false;
610         }
611
612         int driver_major = 0;
613         int driver_minor = 0;
614         if(!get_driver_version(device_id, &driver_major, &driver_minor)) {
615                 return false;
616         }
617         VLOG(3) << "OpenCL driver version " << driver_major << "." << driver_minor;
618
619         /* It is possible tyo have Iris GPU on AMD/Apple OpenCL framework
620          * (aka, it will not be on Intel framework). This isn't supported
621          * and needs an explicit blacklist.
622          */
623         if(strstr(device_name.c_str(), "Iris")) {
624                 return false;
625         }
626         if(platform_name == "AMD Accelerated Parallel Processing" &&
627            device_type == CL_DEVICE_TYPE_GPU)
628         {
629                 if(driver_major < 2236) {
630                         VLOG(1) << "AMD driver version " << driver_major << "." << driver_minor << " not supported.";
631                         return false;
632                 }
633                 const char *blacklist[] = {
634                         /* GCN 1 */
635                         "Tahiti", "Pitcairn", "Capeverde", "Oland",
636                         NULL
637                 };
638                 for(int i = 0; blacklist[i] != NULL; i++) {
639                         if(device_name == blacklist[i]) {
640                                 VLOG(1) << "AMD device " << device_name << " not supported";
641                                 return false;
642                         }
643                 }
644                 return true;
645         }
646         if(platform_name == "Apple" && device_type == CL_DEVICE_TYPE_GPU) {
647                 return true;
648         }
649         return false;
650 }
651
652 bool OpenCLInfo::platform_version_check(cl_platform_id platform,
653                                         string *error)
654 {
655         const int req_major = 1, req_minor = 1;
656         int major, minor;
657         char version[256];
658         clGetPlatformInfo(platform,
659                           CL_PLATFORM_VERSION,
660                           sizeof(version),
661                           &version,
662                           NULL);
663         if(sscanf(version, "OpenCL %d.%d", &major, &minor) < 2) {
664                 if(error != NULL) {
665                         *error = string_printf("OpenCL: failed to parse platform version string (%s).", version);
666                 }
667                 return false;
668         }
669         if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
670                 if(error != NULL) {
671                         *error = string_printf("OpenCL: platform version 1.1 or later required, found %d.%d", major, minor);
672                 }
673                 return false;
674         }
675         if(error != NULL) {
676                 *error = "";
677         }
678         return true;
679 }
680
681 bool OpenCLInfo::device_version_check(cl_device_id device,
682                                       string *error)
683 {
684         const int req_major = 1, req_minor = 1;
685         int major, minor;
686         char version[256];
687         clGetDeviceInfo(device,
688                         CL_DEVICE_OPENCL_C_VERSION,
689                         sizeof(version),
690                         &version,
691                         NULL);
692         if(sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) {
693                 if(error != NULL) {
694                         *error = string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version);
695                 }
696                 return false;
697         }
698         if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
699                 if(error != NULL) {
700                         *error = string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor);
701                 }
702                 return false;
703         }
704         if(error != NULL) {
705                 *error = "";
706         }
707         return true;
708 }
709
710 string OpenCLInfo::get_hardware_id(const string& platform_name, cl_device_id device_id)
711 {
712         if(platform_name == "AMD Accelerated Parallel Processing" || platform_name == "Apple") {
713                 /* Use cl_amd_device_topology extension. */
714                 cl_char topology[24];
715                 if(clGetDeviceInfo(device_id, 0x4037, sizeof(topology), topology, NULL) == CL_SUCCESS && topology[0] == 1) {
716                         return string_printf("%02x:%02x.%01x",
717                                              (unsigned int)topology[21],
718                                              (unsigned int)topology[22],
719                                              (unsigned int)topology[23]);
720                 }
721         }
722         else if(platform_name == "NVIDIA CUDA") {
723                 /* Use two undocumented options of the cl_nv_device_attribute_query extension. */
724                 cl_int bus_id, slot_id;
725                 if(clGetDeviceInfo(device_id, 0x4008, sizeof(cl_int), &bus_id,  NULL) == CL_SUCCESS &&
726                    clGetDeviceInfo(device_id, 0x4009, sizeof(cl_int), &slot_id, NULL) == CL_SUCCESS) {
727                         return string_printf("%02x:%02x.%01x",
728                                              (unsigned int)(bus_id),
729                                              (unsigned int)(slot_id >> 3),
730                                              (unsigned int)(slot_id & 0x7));
731                 }
732         }
733         /* No general way to get a hardware ID from OpenCL => give up. */
734         return "";
735 }
736
737 void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
738                                     bool force_all)
739 {
740         const bool force_all_platforms = force_all ||
741                 (DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT);
742         const cl_device_type device_type = OpenCLInfo::device_type();
743         static bool first_time = true;
744 #define FIRST_VLOG(severity) if(first_time) VLOG(severity)
745
746         usable_devices->clear();
747
748         if(device_type == 0) {
749                 FIRST_VLOG(2) << "OpenCL devices are forced to be disabled.";
750                 first_time = false;
751                 return;
752         }
753
754         cl_int error;
755         vector<cl_device_id> device_ids;
756         vector<cl_platform_id> platform_ids;
757
758         /* Get platforms. */
759         if(!get_platforms(&platform_ids, &error)) {
760                 FIRST_VLOG(2) << "Error fetching platforms:"
761                               << string(clewErrorString(error));
762                 first_time = false;
763                 return;
764         }
765         if(platform_ids.size() == 0) {
766                 FIRST_VLOG(2) << "No OpenCL platforms were found.";
767                 first_time = false;
768                 return;
769         }
770         /* Devices are numbered consecutively across platforms. */
771         for(int platform = 0; platform < platform_ids.size(); platform++) {
772                 cl_platform_id platform_id = platform_ids[platform];
773                 string platform_name;
774                 if(!get_platform_name(platform_id, &platform_name)) {
775                         FIRST_VLOG(2) << "Failed to get platform name, ignoring.";
776                         continue;
777                 }
778                 FIRST_VLOG(2) << "Enumerating devices for platform "
779                               << platform_name << ".";
780                 if(!platform_version_check(platform_id)) {
781                         FIRST_VLOG(2) << "Ignoring platform " << platform_name
782                                       << " due to too old compiler version.";
783                         continue;
784                 }
785                 if(!get_platform_devices(platform_id,
786                                          device_type,
787                                          &device_ids,
788                                          &error))
789                 {
790                         FIRST_VLOG(2) << "Ignoring platform " << platform_name
791                                       << ", failed to fetch of devices: "
792                                       << string(clewErrorString(error));
793                         continue;
794                 }
795                 if(device_ids.size() == 0) {
796                         FIRST_VLOG(2) << "Ignoring platform " << platform_name
797                                       << ", it has no devices.";
798                         continue;
799                 }
800                 for(int num = 0; num < device_ids.size(); num++) {
801                         const cl_device_id device_id = device_ids[num];
802                         string device_name;
803                         if(!get_device_name(device_id, &device_name, &error)) {
804                                 FIRST_VLOG(2) << "Failed to fetch device name: "
805                                               << string(clewErrorString(error))
806                                               << ", ignoring.";
807                                 continue;
808                         }
809                         if(!device_version_check(device_id)) {
810                                 FIRST_VLOG(2) << "Ignoring device " << device_name
811                                               << " due to old compiler version.";
812                                 continue;
813                         }
814                         if(force_all_platforms ||
815                            device_supported(platform_name, device_id))
816                         {
817                                 cl_device_type device_type;
818                                 if(!get_device_type(device_id, &device_type, &error)) {
819                                         FIRST_VLOG(2) << "Ignoring device " << device_name
820                                                       << ", failed to fetch device type:"
821                                                       << string(clewErrorString(error));
822                                         continue;
823                                 }
824                                 string readable_device_name =
825                                         get_readable_device_name(device_id);
826                                 if(readable_device_name != device_name) {
827                                         FIRST_VLOG(2) << "Using more readable device name: "
828                                                       << readable_device_name;
829                                 }
830                                 FIRST_VLOG(2) << "Adding new device "
831                                               << readable_device_name << ".";
832                                 string hardware_id = get_hardware_id(platform_name, device_id);
833                                 usable_devices->push_back(OpenCLPlatformDevice(
834                                         platform_id,
835                                         platform_name,
836                                         device_id,
837                                         device_type,
838                                         readable_device_name,
839                                         hardware_id));
840                         }
841                         else {
842                                 FIRST_VLOG(2) << "Ignoring device " << device_name
843                                               << ", not officially supported yet.";
844                         }
845                 }
846         }
847         first_time = false;
848 }
849
850 bool OpenCLInfo::get_platforms(vector<cl_platform_id> *platform_ids,
851                                cl_int *error)
852 {
853         /* Reset from possible previous state. */
854         platform_ids->resize(0);
855         cl_uint num_platforms;
856         if(!get_num_platforms(&num_platforms, error)) {
857                 return false;
858         }
859         /* Get actual platforms. */
860         cl_int err;
861         platform_ids->resize(num_platforms);
862         if((err = clGetPlatformIDs(num_platforms,
863                                    &platform_ids->at(0),
864                                    NULL)) != CL_SUCCESS) {
865                 if(error != NULL) {
866                         *error = err;
867                 }
868                 return false;
869         }
870         if(error != NULL) {
871                 *error = CL_SUCCESS;
872         }
873         return true;
874 }
875
876 vector<cl_platform_id> OpenCLInfo::get_platforms()
877 {
878         vector<cl_platform_id> platform_ids;
879         get_platforms(&platform_ids);
880         return platform_ids;
881 }
882
883 bool OpenCLInfo::get_num_platforms(cl_uint *num_platforms, cl_int *error)
884 {
885         cl_int err;
886         if((err = clGetPlatformIDs(0, NULL, num_platforms)) != CL_SUCCESS) {
887                 if(error != NULL) {
888                         *error = err;
889                 }
890                 *num_platforms = 0;
891                 return false;
892         }
893         if(error != NULL) {
894                 *error = CL_SUCCESS;
895         }
896         return true;
897 }
898
899 cl_uint OpenCLInfo::get_num_platforms()
900 {
901         cl_uint num_platforms;
902         if(!get_num_platforms(&num_platforms)) {
903                 return 0;
904         }
905         return num_platforms;
906 }
907
908 bool OpenCLInfo::get_platform_name(cl_platform_id platform_id,
909                                    string *platform_name)
910 {
911         char buffer[256];
912         if(clGetPlatformInfo(platform_id,
913                              CL_PLATFORM_NAME,
914                              sizeof(buffer),
915                              &buffer,
916                              NULL) != CL_SUCCESS)
917         {
918                 *platform_name = "";
919                 return false;
920         }
921         *platform_name = buffer;
922         return true;
923 }
924
925 string OpenCLInfo::get_platform_name(cl_platform_id platform_id)
926 {
927         string platform_name;
928         if(!get_platform_name(platform_id, &platform_name)) {
929                 return "";
930         }
931         return platform_name;
932 }
933
934 bool OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
935                                           cl_device_type device_type,
936                                           cl_uint *num_devices,
937                                           cl_int *error)
938 {
939         cl_int err;
940         if((err = clGetDeviceIDs(platform_id,
941                                  device_type,
942                                  0,
943                                  NULL,
944                                  num_devices)) != CL_SUCCESS)
945         {
946                 if(error != NULL) {
947                         *error = err;
948                 }
949                 *num_devices = 0;
950                 return false;
951         }
952         if(error != NULL) {
953                 *error = CL_SUCCESS;
954         }
955         return true;
956 }
957
958 cl_uint OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
959                                              cl_device_type device_type)
960 {
961         cl_uint num_devices;
962         if(!get_num_platform_devices(platform_id,
963                                      device_type,
964                                      &num_devices))
965         {
966                 return 0;
967         }
968         return num_devices;
969 }
970
971 bool OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
972                                       cl_device_type device_type,
973                                       vector<cl_device_id> *device_ids,
974                                       cl_int* error)
975 {
976         /* Reset from possible previous state. */
977         device_ids->resize(0);
978         /* Get number of devices to pre-allocate memory. */
979         cl_uint num_devices;
980         if(!get_num_platform_devices(platform_id,
981                                      device_type,
982                                      &num_devices,
983                                      error))
984         {
985                 return false;
986         }
987         /* Get actual device list. */
988         device_ids->resize(num_devices);
989         cl_int err;
990         if((err = clGetDeviceIDs(platform_id,
991                                  device_type,
992                                  num_devices,
993                                  &device_ids->at(0),
994                                  NULL)) != CL_SUCCESS)
995         {
996                 if(error != NULL) {
997                         *error = err;
998                 }
999                 return false;
1000         }
1001         if(error != NULL) {
1002                 *error = CL_SUCCESS;
1003         }
1004         return true;
1005 }
1006
1007 vector<cl_device_id> OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
1008                                                       cl_device_type device_type)
1009 {
1010         vector<cl_device_id> devices;
1011         get_platform_devices(platform_id, device_type, &devices);
1012         return devices;
1013 }
1014
1015 bool OpenCLInfo::get_device_name(cl_device_id device_id,
1016                                  string *device_name,
1017                                  cl_int* error)
1018 {
1019         char buffer[1024];
1020         cl_int err;
1021         if((err = clGetDeviceInfo(device_id,
1022                                   CL_DEVICE_NAME,
1023                                   sizeof(buffer),
1024                                   &buffer,
1025                                   NULL)) != CL_SUCCESS)
1026         {
1027                 if(error != NULL) {
1028                         *error = err;
1029                 }
1030                 *device_name = "";
1031                 return false;
1032         }
1033         if(error != NULL) {
1034                 *error = CL_SUCCESS;
1035         }
1036         *device_name = buffer;
1037         return true;
1038 }
1039
1040 string OpenCLInfo::get_device_name(cl_device_id device_id)
1041 {
1042         string device_name;
1043         if(!get_device_name(device_id, &device_name)) {
1044                 return "";
1045         }
1046         return device_name;
1047 }
1048
1049 bool OpenCLInfo::get_device_type(cl_device_id device_id,
1050                                  cl_device_type *device_type,
1051                                  cl_int* error)
1052 {
1053         cl_int err;
1054         if((err = clGetDeviceInfo(device_id,
1055                                   CL_DEVICE_TYPE,
1056                                   sizeof(cl_device_type),
1057                                   device_type,
1058                                   NULL)) != CL_SUCCESS)
1059         {
1060                 if(error != NULL) {
1061                         *error = err;
1062                 }
1063                 *device_type = 0;
1064                 return false;
1065         }
1066         if(error != NULL) {
1067                 *error = CL_SUCCESS;
1068         }
1069         return true;
1070 }
1071
1072 cl_device_type OpenCLInfo::get_device_type(cl_device_id device_id)
1073 {
1074         cl_device_type device_type;
1075         if(!get_device_type(device_id, &device_type)) {
1076                 return 0;
1077         }
1078         return device_type;
1079 }
1080
1081 string OpenCLInfo::get_readable_device_name(cl_device_id device_id)
1082 {
1083         string name = "";
1084         char board_name[1024];
1085         size_t length = 0;
1086         if(clGetDeviceInfo(device_id,
1087                            CL_DEVICE_BOARD_NAME_AMD,
1088                            sizeof(board_name),
1089                            &board_name,
1090                            &length) == CL_SUCCESS)
1091         {
1092                 if(length != 0 && board_name[0] != '\0') {
1093                         name = board_name;
1094                 }
1095         }
1096
1097         /* Fallback to standard device name API. */
1098         if(name.empty()) {
1099                 name = get_device_name(device_id);
1100         }
1101
1102         /* Distinguish from our native CPU device. */
1103         if(get_device_type(device_id) & CL_DEVICE_TYPE_CPU) {
1104                 name += " (OpenCL)";
1105         }
1106
1107         return name;
1108 }
1109
1110 bool OpenCLInfo::get_driver_version(cl_device_id device_id,
1111                                     int *major,
1112                                     int *minor,
1113                                     cl_int* error)
1114 {
1115         char buffer[1024];
1116         cl_int err;
1117         if((err = clGetDeviceInfo(device_id,
1118                                   CL_DRIVER_VERSION,
1119                                   sizeof(buffer),
1120                                   &buffer,
1121                                   NULL)) != CL_SUCCESS)
1122         {
1123                 if(error != NULL) {
1124                         *error = err;
1125                 }
1126                 return false;
1127         }
1128         if(error != NULL) {
1129                 *error = CL_SUCCESS;
1130         }
1131         if(sscanf(buffer, "%d.%d", major, minor) < 2) {
1132                 VLOG(1) << string_printf("OpenCL: failed to parse driver version string (%s).", buffer);
1133                 return false;
1134         }
1135         return true;
1136 }
1137
1138 int OpenCLInfo::mem_address_alignment(cl_device_id device_id)
1139 {
1140         int base_align_bytes;
1141         if(clGetDeviceInfo(device_id,
1142                            CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE,
1143                            sizeof(int),
1144                            &base_align_bytes,
1145                            NULL) == CL_SUCCESS)
1146         {
1147                 return base_align_bytes;
1148         }
1149         return 1;
1150 }
1151
1152 CCL_NAMESPACE_END
1153
1154 #endif