f43aa5f350a46fae4e417da4e54707e6e99d2fca
[blender.git] / intern / cycles / device / opencl / opencl_util.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifdef WITH_OPENCL
18
19 #include "device/opencl/opencl.h"
20
21 #include "util/util_debug.h"
22 #include "util/util_logging.h"
23 #include "util/util_md5.h"
24 #include "util/util_path.h"
25 #include "util/util_time.h"
26
27 using std::cerr;
28 using std::endl;
29
30 CCL_NAMESPACE_BEGIN
31
32 OpenCLCache::Slot::ProgramEntry::ProgramEntry()
33  : program(NULL),
34    mutex(NULL)
35 {
36 }
37
38 OpenCLCache::Slot::ProgramEntry::ProgramEntry(const ProgramEntry& rhs)
39  : program(rhs.program),
40    mutex(NULL)
41 {
42 }
43
44 OpenCLCache::Slot::ProgramEntry::~ProgramEntry()
45 {
46         delete mutex;
47 }
48
49 OpenCLCache::Slot::Slot()
50  : context_mutex(NULL),
51    context(NULL)
52 {
53 }
54
55 OpenCLCache::Slot::Slot(const Slot& rhs)
56  : context_mutex(NULL),
57    context(NULL),
58    programs(rhs.programs)
59 {
60 }
61
62 OpenCLCache::Slot::~Slot()
63 {
64         delete context_mutex;
65 }
66
67 OpenCLCache& OpenCLCache::global_instance()
68 {
69         static OpenCLCache instance;
70         return instance;
71 }
72
73 cl_context OpenCLCache::get_context(cl_platform_id platform,
74                                     cl_device_id device,
75                                     thread_scoped_lock& slot_locker)
76 {
77         assert(platform != NULL);
78
79         OpenCLCache& self = global_instance();
80
81         thread_scoped_lock cache_lock(self.cache_lock);
82
83         pair<CacheMap::iterator,bool> ins = self.cache.insert(
84                 CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
85
86         Slot &slot = ins.first->second;
87
88         /* create slot lock only while holding cache lock */
89         if(!slot.context_mutex)
90                 slot.context_mutex = new thread_mutex;
91
92         /* need to unlock cache before locking slot, to allow store to complete */
93         cache_lock.unlock();
94
95         /* lock the slot */
96         slot_locker = thread_scoped_lock(*slot.context_mutex);
97
98         /* If the thing isn't cached */
99         if(slot.context == NULL) {
100                 /* return with the caller's lock holder holding the slot lock */
101                 return NULL;
102         }
103
104         /* the item was already cached, release the slot lock */
105         slot_locker.unlock();
106
107         cl_int ciErr = clRetainContext(slot.context);
108         assert(ciErr == CL_SUCCESS);
109         (void) ciErr;
110
111         return slot.context;
112 }
113
114 cl_program OpenCLCache::get_program(cl_platform_id platform,
115                                     cl_device_id device,
116                                     ustring key,
117                                     thread_scoped_lock& slot_locker)
118 {
119         assert(platform != NULL);
120
121         OpenCLCache& self = global_instance();
122
123         thread_scoped_lock cache_lock(self.cache_lock);
124
125         pair<CacheMap::iterator,bool> ins = self.cache.insert(
126                 CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
127
128         Slot &slot = ins.first->second;
129
130         pair<Slot::EntryMap::iterator,bool> ins2 = slot.programs.insert(
131                 Slot::EntryMap::value_type(key, Slot::ProgramEntry()));
132
133         Slot::ProgramEntry &entry = ins2.first->second;
134
135         /* create slot lock only while holding cache lock */
136         if(!entry.mutex)
137                 entry.mutex = new thread_mutex;
138
139         /* need to unlock cache before locking slot, to allow store to complete */
140         cache_lock.unlock();
141
142         /* lock the slot */
143         slot_locker = thread_scoped_lock(*entry.mutex);
144
145         /* If the thing isn't cached */
146         if(entry.program == NULL) {
147                 /* return with the caller's lock holder holding the slot lock */
148                 return NULL;
149         }
150
151         /* the item was already cached, release the slot lock */
152         slot_locker.unlock();
153
154         cl_int ciErr = clRetainProgram(entry.program);
155         assert(ciErr == CL_SUCCESS);
156         (void) ciErr;
157
158         return entry.program;
159 }
160
161 void OpenCLCache::store_context(cl_platform_id platform,
162                                 cl_device_id device,
163                                 cl_context context,
164                                 thread_scoped_lock& slot_locker)
165 {
166         assert(platform != NULL);
167         assert(device != NULL);
168         assert(context != NULL);
169
170         OpenCLCache &self = global_instance();
171
172         thread_scoped_lock cache_lock(self.cache_lock);
173         CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
174         cache_lock.unlock();
175
176         Slot &slot = i->second;
177
178         /* sanity check */
179         assert(i != self.cache.end());
180         assert(slot.context == NULL);
181
182         slot.context = context;
183
184         /* unlock the slot */
185         slot_locker.unlock();
186
187         /* increment reference count in OpenCL.
188          * The caller is going to release the object when done with it. */
189         cl_int ciErr = clRetainContext(context);
190         assert(ciErr == CL_SUCCESS);
191         (void) ciErr;
192 }
193
194 void OpenCLCache::store_program(cl_platform_id platform,
195                                 cl_device_id device,
196                                 cl_program program,
197                                 ustring key,
198                                 thread_scoped_lock& slot_locker)
199 {
200         assert(platform != NULL);
201         assert(device != NULL);
202         assert(program != NULL);
203
204         OpenCLCache &self = global_instance();
205
206         thread_scoped_lock cache_lock(self.cache_lock);
207
208         CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
209         assert(i != self.cache.end());
210         Slot &slot = i->second;
211
212         Slot::EntryMap::iterator i2 = slot.programs.find(key);
213         assert(i2 != slot.programs.end());
214         Slot::ProgramEntry &entry = i2->second;
215
216         assert(entry.program == NULL);
217
218         cache_lock.unlock();
219
220         entry.program = program;
221
222         /* unlock the slot */
223         slot_locker.unlock();
224
225         /* Increment reference count in OpenCL.
226          * The caller is going to release the object when done with it.
227          */
228         cl_int ciErr = clRetainProgram(program);
229         assert(ciErr == CL_SUCCESS);
230         (void) ciErr;
231 }
232
233 string OpenCLCache::get_kernel_md5()
234 {
235         OpenCLCache &self = global_instance();
236         thread_scoped_lock lock(self.kernel_md5_lock);
237
238         if(self.kernel_md5.empty()) {
239                 self.kernel_md5 = path_files_md5_hash(path_get("source"));
240         }
241         return self.kernel_md5;
242 }
243
244 OpenCLDeviceBase::OpenCLProgram::OpenCLProgram(OpenCLDeviceBase *device,
245                                                const string& program_name,
246                                                const string& kernel_file,
247                                                const string& kernel_build_options,
248                                                bool use_stdout)
249  : device(device),
250    program_name(program_name),
251    kernel_file(kernel_file),
252    kernel_build_options(kernel_build_options),
253    use_stdout(use_stdout)
254 {
255         loaded = false;
256         program = NULL;
257 }
258
259 OpenCLDeviceBase::OpenCLProgram::~OpenCLProgram()
260 {
261         release();
262 }
263
264 void OpenCLDeviceBase::OpenCLProgram::release()
265 {
266         for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
267                 if(kernel->second) {
268                         clReleaseKernel(kernel->second);
269                         kernel->second = NULL;
270                 }
271         }
272         if(program) {
273                 clReleaseProgram(program);
274                 program = NULL;
275         }
276 }
277
278 void OpenCLDeviceBase::OpenCLProgram::add_log(const string& msg, bool debug)
279 {
280         if(!use_stdout) {
281                 log += msg + "\n";
282         }
283         else if(!debug) {
284                 printf("%s\n", msg.c_str());
285                 fflush(stdout);
286         }
287         else {
288                 VLOG(2) << msg;
289         }
290 }
291
292 void OpenCLDeviceBase::OpenCLProgram::add_error(const string& msg)
293 {
294         if(use_stdout) {
295                 fprintf(stderr, "%s\n", msg.c_str());
296         }
297         if(error_msg == "") {
298                 error_msg += "\n";
299         }
300         error_msg += msg;
301 }
302
303 void OpenCLDeviceBase::OpenCLProgram::add_kernel(ustring name)
304 {
305         if(!kernels.count(name)) {
306                 kernels[name] = NULL;
307         }
308 }
309
310 bool OpenCLDeviceBase::OpenCLProgram::build_kernel(const string *debug_src)
311 {
312         string build_options;
313         build_options = device->kernel_build_options(debug_src) + kernel_build_options;
314
315         VLOG(1) << "Build options passed to clBuildProgram: '"
316                 << build_options << "'.";
317         cl_int ciErr = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
318
319         /* show warnings even if build is successful */
320         size_t ret_val_size = 0;
321
322         clGetProgramBuildInfo(program, device->cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
323
324         if(ciErr != CL_SUCCESS) {
325                 add_error(string("OpenCL build failed with error ") + clewErrorString(ciErr) + ", errors in console.");
326         }
327
328         if(ret_val_size > 1) {
329                 vector<char> build_log(ret_val_size + 1);
330                 clGetProgramBuildInfo(program, device->cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, &build_log[0], NULL);
331
332                 build_log[ret_val_size] = '\0';
333                 /* Skip meaningless empty output from the NVidia compiler. */
334                 if(!(ret_val_size == 2 && build_log[0] == '\n')) {
335                         add_log(string("OpenCL program ") + program_name + " build output: " + string(&build_log[0]), ciErr == CL_SUCCESS);
336                 }
337         }
338
339         return (ciErr == CL_SUCCESS);
340 }
341
342 bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src)
343 {
344         string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
345         /* We compile kernels consisting of many files. unfortunately OpenCL
346          * kernel caches do not seem to recognize changes in included files.
347          * so we force recompile on changes by adding the md5 hash of all files.
348          */
349         source = path_source_replace_includes(source, path_get("source"));
350         source += "\n// " + util_md5_string(source) + "\n";
351
352         if(debug_src) {
353                 path_write_text(*debug_src, source);
354         }
355
356         size_t source_len = source.size();
357         const char *source_str = source.c_str();
358         cl_int ciErr;
359
360         program = clCreateProgramWithSource(device->cxContext,
361                                             1,
362                                             &source_str,
363                                             &source_len,
364                                             &ciErr);
365
366         if(ciErr != CL_SUCCESS) {
367                 add_error(string("OpenCL program creation failed: ") + clewErrorString(ciErr));
368                 return false;
369         }
370
371         double starttime = time_dt();
372         add_log(string("Compiling OpenCL program ") + program_name.c_str(), false);
373         add_log(string("Build flags: ") + kernel_build_options, true);
374
375         if(!build_kernel(debug_src))
376                 return false;
377
378         add_log(string("Kernel compilation of ") + program_name + " finished in " + string_printf("%.2lfs.\n", time_dt() - starttime), false);
379
380         return true;
381 }
382
383 bool OpenCLDeviceBase::OpenCLProgram::load_binary(const string& clbin,
384                                                   const string *debug_src)
385 {
386         /* read binary into memory */
387         vector<uint8_t> binary;
388
389         if(!path_read_binary(clbin, binary)) {
390                 add_error(string_printf("OpenCL failed to read cached binary %s.", clbin.c_str()));
391                 return false;
392         }
393
394         /* create program */
395         cl_int status, ciErr;
396         size_t size = binary.size();
397         const uint8_t *bytes = &binary[0];
398
399         program = clCreateProgramWithBinary(device->cxContext, 1, &device->cdDevice,
400                 &size, &bytes, &status, &ciErr);
401
402         if(status != CL_SUCCESS || ciErr != CL_SUCCESS) {
403                 add_error(string("OpenCL failed create program from cached binary ") + clbin + ": "
404                                  + clewErrorString(status) + " " + clewErrorString(ciErr));
405                 return false;
406         }
407
408         if(!build_kernel(debug_src))
409                 return false;
410
411         return true;
412 }
413
414 bool OpenCLDeviceBase::OpenCLProgram::save_binary(const string& clbin)
415 {
416         size_t size = 0;
417         clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
418
419         if(!size)
420                 return false;
421
422         vector<uint8_t> binary(size);
423         uint8_t *bytes = &binary[0];
424
425         clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);
426
427         return path_write_binary(clbin, binary);
428 }
429
430 void OpenCLDeviceBase::OpenCLProgram::load()
431 {
432         assert(device);
433
434         loaded = false;
435
436         string device_md5 = device->device_md5_hash(kernel_build_options);
437
438         /* Try to use cached kernel. */
439         thread_scoped_lock cache_locker;
440         ustring cache_key(program_name + device_md5);
441         program = device->load_cached_kernel(cache_key,
442                                              cache_locker);
443
444         if(!program) {
445                 add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
446
447                 /* need to create source to get md5 */
448                 string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
449                 source = path_source_replace_includes(source, path_get("source"));
450
451                 string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source);
452                 basename = path_cache_get(path_join("kernels", basename));
453                 string clbin = basename + ".clbin";
454
455                 /* path to preprocessed source for debugging */
456                 string clsrc, *debug_src = NULL;
457
458                 if(OpenCLInfo::use_debug()) {
459                         clsrc = basename + ".cl";
460                         debug_src = &clsrc;
461                 }
462
463                 /* If binary kernel exists already, try use it. */
464                 if(path_exists(clbin) && load_binary(clbin)) {
465                         /* Kernel loaded from binary, nothing to do. */
466                         add_log(string("Loaded program from ") + clbin + ".", true);
467                 }
468                 else {
469                         add_log(string("Kernel file ") + clbin + " either doesn't exist or failed to be loaded by driver.", true);
470
471                         /* If does not exist or loading binary failed, compile kernel. */
472                         if(!compile_kernel(debug_src)) {
473                                 return;
474                         }
475
476                         /* Save binary for reuse. */
477                         if(!save_binary(clbin)) {
478                                 add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
479                         }
480                 }
481
482                 /* Cache the program. */
483                 device->store_cached_kernel(program,
484                                             cache_key,
485                                             cache_locker);
486         }
487         else {
488                 add_log(string("Found cached OpenCL program ") + program_name + ".", true);
489         }
490
491         for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
492                 assert(kernel->second == NULL);
493                 cl_int ciErr;
494                 string name = "kernel_ocl_" + kernel->first.string();
495                 kernel->second = clCreateKernel(program, name.c_str(), &ciErr);
496                 if(device->opencl_error(ciErr)) {
497                         add_error(string("Error getting kernel ") + name + " from program " + program_name + ": " + clewErrorString(ciErr));
498                         return;
499                 }
500         }
501
502         loaded = true;
503 }
504
505 void OpenCLDeviceBase::OpenCLProgram::report_error()
506 {
507         /* If loaded is true, there was no error. */
508         if(loaded) return;
509         /* if use_stdout is true, the error was already reported. */
510         if(use_stdout) return;
511
512         cerr << error_msg << endl;
513         if(!compile_output.empty()) {
514                 cerr << "OpenCL kernel build output for " << program_name << ":" << endl;
515                 cerr << compile_output << endl;
516         }
517 }
518
519 cl_kernel OpenCLDeviceBase::OpenCLProgram::operator()()
520 {
521         assert(kernels.size() == 1);
522         return kernels.begin()->second;
523 }
524
525 cl_kernel OpenCLDeviceBase::OpenCLProgram::operator()(ustring name)
526 {
527         assert(kernels.count(name));
528         return kernels[name];
529 }
530
531 cl_device_type OpenCLInfo::device_type()
532 {
533         switch(DebugFlags().opencl.device_type)
534         {
535                 case DebugFlags::OpenCL::DEVICE_NONE:
536                         return 0;
537                 case DebugFlags::OpenCL::DEVICE_ALL:
538                         return CL_DEVICE_TYPE_ALL;
539                 case DebugFlags::OpenCL::DEVICE_DEFAULT:
540                         return CL_DEVICE_TYPE_DEFAULT;
541                 case DebugFlags::OpenCL::DEVICE_CPU:
542                         return CL_DEVICE_TYPE_CPU;
543                 case DebugFlags::OpenCL::DEVICE_GPU:
544                         return CL_DEVICE_TYPE_GPU;
545                 case DebugFlags::OpenCL::DEVICE_ACCELERATOR:
546                         return CL_DEVICE_TYPE_ACCELERATOR;
547                 default:
548                         return CL_DEVICE_TYPE_ALL;
549         }
550 }
551
552 bool OpenCLInfo::use_debug()
553 {
554         return DebugFlags().opencl.debug;
555 }
556
557 bool OpenCLInfo::use_single_program()
558 {
559         return DebugFlags().opencl.single_program;
560 }
561
562 bool OpenCLInfo::kernel_use_advanced_shading(const string& platform)
563 {
564         /* keep this in sync with kernel_types.h! */
565         if(platform == "NVIDIA CUDA")
566                 return true;
567         else if(platform == "Apple")
568                 return true;
569         else if(platform == "AMD Accelerated Parallel Processing")
570                 return true;
571         else if(platform == "Intel(R) OpenCL")
572                 return true;
573         /* Make sure officially unsupported OpenCL platforms
574          * does not set up to use advanced shading.
575          */
576         return false;
577 }
578
579 bool OpenCLInfo::kernel_use_split(const string& platform_name,
580                                   const cl_device_type device_type)
581 {
582         if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_SPLIT) {
583                 VLOG(1) << "Forcing split kernel to use.";
584                 return true;
585         }
586         if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_MEGA) {
587                 VLOG(1) << "Forcing mega kernel to use.";
588                 return false;
589         }
590         /* TODO(sergey): Replace string lookups with more enum-like API,
591          * similar to device/vendor checks blender's gpu.
592          */
593         if(platform_name == "AMD Accelerated Parallel Processing" &&
594            device_type == CL_DEVICE_TYPE_GPU)
595         {
596                 return true;
597         }
598         return false;
599 }
600
601 bool OpenCLInfo::device_supported(const string& platform_name,
602                                   const cl_device_id device_id)
603 {
604         cl_device_type device_type;
605         if(!get_device_type(device_id, &device_type)) {
606                 return false;
607         }
608         string device_name;
609         if(!get_device_name(device_id, &device_name)) {
610                 return false;
611         }
612
613         int driver_major = 0;
614         int driver_minor = 0;
615         if(!get_driver_version(device_id, &driver_major, &driver_minor)) {
616                 return false;
617         }
618         VLOG(3) << "OpenCL driver version " << driver_major << "." << driver_minor;
619
620         /* It is possible tyo have Iris GPU on AMD/Apple OpenCL framework
621          * (aka, it will not be on Intel framework). This isn't supported
622          * and needs an explicit blacklist.
623          */
624         if(strstr(device_name.c_str(), "Iris")) {
625                 return false;
626         }
627         if(platform_name == "AMD Accelerated Parallel Processing" &&
628            device_type == CL_DEVICE_TYPE_GPU)
629         {
630                 if(driver_major < 2236) {
631                         VLOG(1) << "AMD driver version " << driver_major << "." << driver_minor << " not supported.";
632                         return false;
633                 }
634                 const char *blacklist[] = {
635                         /* GCN 1 */
636                         "Tahiti", "Pitcairn", "Capeverde", "Oland", "Hainan",
637                         NULL
638                 };
639                 for(int i = 0; blacklist[i] != NULL; i++) {
640                         if(device_name == blacklist[i]) {
641                                 VLOG(1) << "AMD device " << device_name << " not supported";
642                                 return false;
643                         }
644                 }
645                 return true;
646         }
647         if(platform_name == "Apple" && device_type == CL_DEVICE_TYPE_GPU) {
648                 return false;
649         }
650         return false;
651 }
652
653 bool OpenCLInfo::platform_version_check(cl_platform_id platform,
654                                         string *error)
655 {
656         const int req_major = 1, req_minor = 1;
657         int major, minor;
658         char version[256];
659         clGetPlatformInfo(platform,
660                           CL_PLATFORM_VERSION,
661                           sizeof(version),
662                           &version,
663                           NULL);
664         if(sscanf(version, "OpenCL %d.%d", &major, &minor) < 2) {
665                 if(error != NULL) {
666                         *error = string_printf("OpenCL: failed to parse platform version string (%s).", version);
667                 }
668                 return false;
669         }
670         if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
671                 if(error != NULL) {
672                         *error = string_printf("OpenCL: platform version 1.1 or later required, found %d.%d", major, minor);
673                 }
674                 return false;
675         }
676         if(error != NULL) {
677                 *error = "";
678         }
679         return true;
680 }
681
682 bool OpenCLInfo::device_version_check(cl_device_id device,
683                                       string *error)
684 {
685         const int req_major = 1, req_minor = 1;
686         int major, minor;
687         char version[256];
688         clGetDeviceInfo(device,
689                         CL_DEVICE_OPENCL_C_VERSION,
690                         sizeof(version),
691                         &version,
692                         NULL);
693         if(sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) {
694                 if(error != NULL) {
695                         *error = string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version);
696                 }
697                 return false;
698         }
699         if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
700                 if(error != NULL) {
701                         *error = string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor);
702                 }
703                 return false;
704         }
705         if(error != NULL) {
706                 *error = "";
707         }
708         return true;
709 }
710
711 string OpenCLInfo::get_hardware_id(const string& platform_name, cl_device_id device_id)
712 {
713         if(platform_name == "AMD Accelerated Parallel Processing" || platform_name == "Apple") {
714                 /* Use cl_amd_device_topology extension. */
715                 cl_char topology[24];
716                 if(clGetDeviceInfo(device_id, 0x4037, sizeof(topology), topology, NULL) == CL_SUCCESS && topology[0] == 1) {
717                         return string_printf("%02x:%02x.%01x",
718                                              (unsigned int)topology[21],
719                                              (unsigned int)topology[22],
720                                              (unsigned int)topology[23]);
721                 }
722         }
723         else if(platform_name == "NVIDIA CUDA") {
724                 /* Use two undocumented options of the cl_nv_device_attribute_query extension. */
725                 cl_int bus_id, slot_id;
726                 if(clGetDeviceInfo(device_id, 0x4008, sizeof(cl_int), &bus_id,  NULL) == CL_SUCCESS &&
727                    clGetDeviceInfo(device_id, 0x4009, sizeof(cl_int), &slot_id, NULL) == CL_SUCCESS) {
728                         return string_printf("%02x:%02x.%01x",
729                                              (unsigned int)(bus_id),
730                                              (unsigned int)(slot_id >> 3),
731                                              (unsigned int)(slot_id & 0x7));
732                 }
733         }
734         /* No general way to get a hardware ID from OpenCL => give up. */
735         return "";
736 }
737
738 void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
739                                     bool force_all)
740 {
741         const bool force_all_platforms = force_all ||
742                 (DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT);
743         const cl_device_type device_type = OpenCLInfo::device_type();
744         static bool first_time = true;
745 #define FIRST_VLOG(severity) if(first_time) VLOG(severity)
746
747         usable_devices->clear();
748
749         if(device_type == 0) {
750                 FIRST_VLOG(2) << "OpenCL devices are forced to be disabled.";
751                 first_time = false;
752                 return;
753         }
754
755         cl_int error;
756         vector<cl_device_id> device_ids;
757         vector<cl_platform_id> platform_ids;
758
759         /* Get platforms. */
760         if(!get_platforms(&platform_ids, &error)) {
761                 FIRST_VLOG(2) << "Error fetching platforms:"
762                               << string(clewErrorString(error));
763                 first_time = false;
764                 return;
765         }
766         if(platform_ids.size() == 0) {
767                 FIRST_VLOG(2) << "No OpenCL platforms were found.";
768                 first_time = false;
769                 return;
770         }
771         /* Devices are numbered consecutively across platforms. */
772         for(int platform = 0; platform < platform_ids.size(); platform++) {
773                 cl_platform_id platform_id = platform_ids[platform];
774                 string platform_name;
775                 if(!get_platform_name(platform_id, &platform_name)) {
776                         FIRST_VLOG(2) << "Failed to get platform name, ignoring.";
777                         continue;
778                 }
779                 FIRST_VLOG(2) << "Enumerating devices for platform "
780                               << platform_name << ".";
781                 if(!platform_version_check(platform_id)) {
782                         FIRST_VLOG(2) << "Ignoring platform " << platform_name
783                                       << " due to too old compiler version.";
784                         continue;
785                 }
786                 if(!get_platform_devices(platform_id,
787                                          device_type,
788                                          &device_ids,
789                                          &error))
790                 {
791                         FIRST_VLOG(2) << "Ignoring platform " << platform_name
792                                       << ", failed to fetch of devices: "
793                                       << string(clewErrorString(error));
794                         continue;
795                 }
796                 if(device_ids.size() == 0) {
797                         FIRST_VLOG(2) << "Ignoring platform " << platform_name
798                                       << ", it has no devices.";
799                         continue;
800                 }
801                 for(int num = 0; num < device_ids.size(); num++) {
802                         const cl_device_id device_id = device_ids[num];
803                         string device_name;
804                         if(!get_device_name(device_id, &device_name, &error)) {
805                                 FIRST_VLOG(2) << "Failed to fetch device name: "
806                                               << string(clewErrorString(error))
807                                               << ", ignoring.";
808                                 continue;
809                         }
810                         if(!device_version_check(device_id)) {
811                                 FIRST_VLOG(2) << "Ignoring device " << device_name
812                                               << " due to old compiler version.";
813                                 continue;
814                         }
815                         if(force_all_platforms ||
816                            device_supported(platform_name, device_id))
817                         {
818                                 cl_device_type device_type;
819                                 if(!get_device_type(device_id, &device_type, &error)) {
820                                         FIRST_VLOG(2) << "Ignoring device " << device_name
821                                                       << ", failed to fetch device type:"
822                                                       << string(clewErrorString(error));
823                                         continue;
824                                 }
825                                 string readable_device_name =
826                                         get_readable_device_name(device_id);
827                                 if(readable_device_name != device_name) {
828                                         FIRST_VLOG(2) << "Using more readable device name: "
829                                                       << readable_device_name;
830                                 }
831                                 FIRST_VLOG(2) << "Adding new device "
832                                               << readable_device_name << ".";
833                                 string hardware_id = get_hardware_id(platform_name, device_id);
834                                 string device_extensions = get_device_extensions(device_id);
835                                 usable_devices->push_back(OpenCLPlatformDevice(
836                                         platform_id,
837                                         platform_name,
838                                         device_id,
839                                         device_type,
840                                         readable_device_name,
841                                         hardware_id,
842                                         device_extensions));
843                         }
844                         else {
845                                 FIRST_VLOG(2) << "Ignoring device " << device_name
846                                               << ", not officially supported yet.";
847                         }
848                 }
849         }
850         first_time = false;
851 }
852
853 bool OpenCLInfo::get_platforms(vector<cl_platform_id> *platform_ids,
854                                cl_int *error)
855 {
856         /* Reset from possible previous state. */
857         platform_ids->resize(0);
858         cl_uint num_platforms;
859         if(!get_num_platforms(&num_platforms, error)) {
860                 return false;
861         }
862         /* Get actual platforms. */
863         cl_int err;
864         platform_ids->resize(num_platforms);
865         if((err = clGetPlatformIDs(num_platforms,
866                                    &platform_ids->at(0),
867                                    NULL)) != CL_SUCCESS) {
868                 if(error != NULL) {
869                         *error = err;
870                 }
871                 return false;
872         }
873         if(error != NULL) {
874                 *error = CL_SUCCESS;
875         }
876         return true;
877 }
878
879 vector<cl_platform_id> OpenCLInfo::get_platforms()
880 {
881         vector<cl_platform_id> platform_ids;
882         get_platforms(&platform_ids);
883         return platform_ids;
884 }
885
886 bool OpenCLInfo::get_num_platforms(cl_uint *num_platforms, cl_int *error)
887 {
888         cl_int err;
889         if((err = clGetPlatformIDs(0, NULL, num_platforms)) != CL_SUCCESS) {
890                 if(error != NULL) {
891                         *error = err;
892                 }
893                 *num_platforms = 0;
894                 return false;
895         }
896         if(error != NULL) {
897                 *error = CL_SUCCESS;
898         }
899         return true;
900 }
901
902 cl_uint OpenCLInfo::get_num_platforms()
903 {
904         cl_uint num_platforms;
905         if(!get_num_platforms(&num_platforms)) {
906                 return 0;
907         }
908         return num_platforms;
909 }
910
911 bool OpenCLInfo::get_platform_name(cl_platform_id platform_id,
912                                    string *platform_name)
913 {
914         char buffer[256];
915         if(clGetPlatformInfo(platform_id,
916                              CL_PLATFORM_NAME,
917                              sizeof(buffer),
918                              &buffer,
919                              NULL) != CL_SUCCESS)
920         {
921                 *platform_name = "";
922                 return false;
923         }
924         *platform_name = buffer;
925         return true;
926 }
927
928 string OpenCLInfo::get_platform_name(cl_platform_id platform_id)
929 {
930         string platform_name;
931         if(!get_platform_name(platform_id, &platform_name)) {
932                 return "";
933         }
934         return platform_name;
935 }
936
937 bool OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
938                                           cl_device_type device_type,
939                                           cl_uint *num_devices,
940                                           cl_int *error)
941 {
942         cl_int err;
943         if((err = clGetDeviceIDs(platform_id,
944                                  device_type,
945                                  0,
946                                  NULL,
947                                  num_devices)) != CL_SUCCESS)
948         {
949                 if(error != NULL) {
950                         *error = err;
951                 }
952                 *num_devices = 0;
953                 return false;
954         }
955         if(error != NULL) {
956                 *error = CL_SUCCESS;
957         }
958         return true;
959 }
960
961 cl_uint OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
962                                              cl_device_type device_type)
963 {
964         cl_uint num_devices;
965         if(!get_num_platform_devices(platform_id,
966                                      device_type,
967                                      &num_devices))
968         {
969                 return 0;
970         }
971         return num_devices;
972 }
973
974 bool OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
975                                       cl_device_type device_type,
976                                       vector<cl_device_id> *device_ids,
977                                       cl_int* error)
978 {
979         /* Reset from possible previous state. */
980         device_ids->resize(0);
981         /* Get number of devices to pre-allocate memory. */
982         cl_uint num_devices;
983         if(!get_num_platform_devices(platform_id,
984                                      device_type,
985                                      &num_devices,
986                                      error))
987         {
988                 return false;
989         }
990         /* Get actual device list. */
991         device_ids->resize(num_devices);
992         cl_int err;
993         if((err = clGetDeviceIDs(platform_id,
994                                  device_type,
995                                  num_devices,
996                                  &device_ids->at(0),
997                                  NULL)) != CL_SUCCESS)
998         {
999                 if(error != NULL) {
1000                         *error = err;
1001                 }
1002                 return false;
1003         }
1004         if(error != NULL) {
1005                 *error = CL_SUCCESS;
1006         }
1007         return true;
1008 }
1009
1010 vector<cl_device_id> OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
1011                                                       cl_device_type device_type)
1012 {
1013         vector<cl_device_id> devices;
1014         get_platform_devices(platform_id, device_type, &devices);
1015         return devices;
1016 }
1017
1018 bool OpenCLInfo::get_device_name(cl_device_id device_id,
1019                                  string *device_name,
1020                                  cl_int* error)
1021 {
1022         char buffer[1024];
1023         cl_int err;
1024         if((err = clGetDeviceInfo(device_id,
1025                                   CL_DEVICE_NAME,
1026                                   sizeof(buffer),
1027                                   &buffer,
1028                                   NULL)) != CL_SUCCESS)
1029         {
1030                 if(error != NULL) {
1031                         *error = err;
1032                 }
1033                 *device_name = "";
1034                 return false;
1035         }
1036         if(error != NULL) {
1037                 *error = CL_SUCCESS;
1038         }
1039         *device_name = buffer;
1040         return true;
1041 }
1042
1043 string OpenCLInfo::get_device_name(cl_device_id device_id)
1044 {
1045         string device_name;
1046         if(!get_device_name(device_id, &device_name)) {
1047                 return "";
1048         }
1049         return device_name;
1050 }
1051
1052 bool OpenCLInfo::get_device_extensions(cl_device_id device_id,
1053         string *device_extensions,
1054         cl_int* error)
1055 {
1056         char buffer[1024];
1057         cl_int err;
1058         if((err = clGetDeviceInfo(device_id,
1059                 CL_DEVICE_EXTENSIONS,
1060                 sizeof(buffer),
1061                 &buffer,
1062                 NULL)) != CL_SUCCESS)
1063         {
1064                 if(error != NULL) {
1065                         *error = err;
1066                 }
1067                 *device_extensions = "";
1068                 return false;
1069         }
1070         if(error != NULL) {
1071                 *error = CL_SUCCESS;
1072         }
1073         *device_extensions = buffer;
1074         return true;
1075 }
1076
1077 string OpenCLInfo::get_device_extensions(cl_device_id device_id)
1078 {
1079         string device_extensions;
1080         if(!get_device_extensions(device_id, &device_extensions)) {
1081                 return "";
1082         }
1083         return device_extensions;
1084 }
1085
1086 bool OpenCLInfo::get_device_type(cl_device_id device_id,
1087                                  cl_device_type *device_type,
1088                                  cl_int* error)
1089 {
1090         cl_int err;
1091         if((err = clGetDeviceInfo(device_id,
1092                                   CL_DEVICE_TYPE,
1093                                   sizeof(cl_device_type),
1094                                   device_type,
1095                                   NULL)) != CL_SUCCESS)
1096         {
1097                 if(error != NULL) {
1098                         *error = err;
1099                 }
1100                 *device_type = 0;
1101                 return false;
1102         }
1103         if(error != NULL) {
1104                 *error = CL_SUCCESS;
1105         }
1106         return true;
1107 }
1108
1109 cl_device_type OpenCLInfo::get_device_type(cl_device_id device_id)
1110 {
1111         cl_device_type device_type;
1112         if(!get_device_type(device_id, &device_type)) {
1113                 return 0;
1114         }
1115         return device_type;
1116 }
1117
1118 string OpenCLInfo::get_readable_device_name(cl_device_id device_id)
1119 {
1120         string name = "";
1121         char board_name[1024];
1122         size_t length = 0;
1123         if(clGetDeviceInfo(device_id,
1124                            CL_DEVICE_BOARD_NAME_AMD,
1125                            sizeof(board_name),
1126                            &board_name,
1127                            &length) == CL_SUCCESS)
1128         {
1129                 if(length != 0 && board_name[0] != '\0') {
1130                         name = board_name;
1131                 }
1132         }
1133
1134         /* Fallback to standard device name API. */
1135         if(name.empty()) {
1136                 name = get_device_name(device_id);
1137         }
1138
1139         /* Special exception for AMD Vega, need to be able to tell
1140          * Vega 56 from 64 apart.
1141          */
1142         if(name == "Radeon RX Vega") {
1143                 cl_int max_compute_units = 0;
1144                 if(clGetDeviceInfo(device_id,
1145                                    CL_DEVICE_MAX_COMPUTE_UNITS,
1146                                    sizeof(max_compute_units),
1147                                    &max_compute_units,
1148                                    NULL) == CL_SUCCESS)
1149                 {
1150                         name += " " + to_string(max_compute_units);
1151                 }
1152         }
1153
1154         /* Distinguish from our native CPU device. */
1155         if(get_device_type(device_id) & CL_DEVICE_TYPE_CPU) {
1156                 name += " (OpenCL)";
1157         }
1158
1159         return name;
1160 }
1161
1162 bool OpenCLInfo::get_driver_version(cl_device_id device_id,
1163                                     int *major,
1164                                     int *minor,
1165                                     cl_int* error)
1166 {
1167         char buffer[1024];
1168         cl_int err;
1169         if((err = clGetDeviceInfo(device_id,
1170                                   CL_DRIVER_VERSION,
1171                                   sizeof(buffer),
1172                                   &buffer,
1173                                   NULL)) != CL_SUCCESS)
1174         {
1175                 if(error != NULL) {
1176                         *error = err;
1177                 }
1178                 return false;
1179         }
1180         if(error != NULL) {
1181                 *error = CL_SUCCESS;
1182         }
1183         if(sscanf(buffer, "%d.%d", major, minor) < 2) {
1184                 VLOG(1) << string_printf("OpenCL: failed to parse driver version string (%s).", buffer);
1185                 return false;
1186         }
1187         return true;
1188 }
1189
1190 int OpenCLInfo::mem_sub_ptr_alignment(cl_device_id device_id)
1191 {
1192         int base_align_bits;
1193         if(clGetDeviceInfo(device_id,
1194                            CL_DEVICE_MEM_BASE_ADDR_ALIGN,
1195                            sizeof(int),
1196                            &base_align_bits,
1197                            NULL) == CL_SUCCESS)
1198         {
1199                 return base_align_bits/8;
1200         }
1201         return 1;
1202 }
1203
1204 CCL_NAMESPACE_END
1205
1206 #endif