Cycles: Support multithreaded compilation of kernels
[blender.git] / intern / cycles / device / opencl / opencl_util.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifdef WITH_OPENCL
18
19 #include "device/opencl/opencl.h"
20 #include "device/device_intern.h"
21
22 #include "util/util_debug.h"
23 #include "util/util_logging.h"
24 #include "util/util_md5.h"
25 #include "util/util_path.h"
26 #include "util/util_time.h"
27 #include "util/util_system.h"
28
29 using std::cerr;
30 using std::endl;
31
32 CCL_NAMESPACE_BEGIN
33
34 OpenCLCache::Slot::ProgramEntry::ProgramEntry()
35  : program(NULL),
36    mutex(NULL)
37 {
38 }
39
40 OpenCLCache::Slot::ProgramEntry::ProgramEntry(const ProgramEntry& rhs)
41  : program(rhs.program),
42    mutex(NULL)
43 {
44 }
45
46 OpenCLCache::Slot::ProgramEntry::~ProgramEntry()
47 {
48         delete mutex;
49 }
50
51 OpenCLCache::Slot::Slot()
52  : context_mutex(NULL),
53    context(NULL)
54 {
55 }
56
57 OpenCLCache::Slot::Slot(const Slot& rhs)
58  : context_mutex(NULL),
59    context(NULL),
60    programs(rhs.programs)
61 {
62 }
63
64 OpenCLCache::Slot::~Slot()
65 {
66         delete context_mutex;
67 }
68
69 OpenCLCache& OpenCLCache::global_instance()
70 {
71         static OpenCLCache instance;
72         return instance;
73 }
74
75 cl_context OpenCLCache::get_context(cl_platform_id platform,
76                                     cl_device_id device,
77                                     thread_scoped_lock& slot_locker)
78 {
79         assert(platform != NULL);
80
81         OpenCLCache& self = global_instance();
82
83         thread_scoped_lock cache_lock(self.cache_lock);
84
85         pair<CacheMap::iterator,bool> ins = self.cache.insert(
86                 CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
87
88         Slot &slot = ins.first->second;
89
90         /* create slot lock only while holding cache lock */
91         if(!slot.context_mutex)
92                 slot.context_mutex = new thread_mutex;
93
94         /* need to unlock cache before locking slot, to allow store to complete */
95         cache_lock.unlock();
96
97         /* lock the slot */
98         slot_locker = thread_scoped_lock(*slot.context_mutex);
99
100         /* If the thing isn't cached */
101         if(slot.context == NULL) {
102                 /* return with the caller's lock holder holding the slot lock */
103                 return NULL;
104         }
105
106         /* the item was already cached, release the slot lock */
107         slot_locker.unlock();
108
109         cl_int ciErr = clRetainContext(slot.context);
110         assert(ciErr == CL_SUCCESS);
111         (void) ciErr;
112
113         return slot.context;
114 }
115
116 cl_program OpenCLCache::get_program(cl_platform_id platform,
117                                     cl_device_id device,
118                                     ustring key,
119                                     thread_scoped_lock& slot_locker)
120 {
121         assert(platform != NULL);
122
123         OpenCLCache& self = global_instance();
124
125         thread_scoped_lock cache_lock(self.cache_lock);
126
127         pair<CacheMap::iterator,bool> ins = self.cache.insert(
128                 CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
129
130         Slot &slot = ins.first->second;
131
132         pair<Slot::EntryMap::iterator,bool> ins2 = slot.programs.insert(
133                 Slot::EntryMap::value_type(key, Slot::ProgramEntry()));
134
135         Slot::ProgramEntry &entry = ins2.first->second;
136
137         /* create slot lock only while holding cache lock */
138         if(!entry.mutex)
139                 entry.mutex = new thread_mutex;
140
141         /* need to unlock cache before locking slot, to allow store to complete */
142         cache_lock.unlock();
143
144         /* lock the slot */
145         slot_locker = thread_scoped_lock(*entry.mutex);
146
147         /* If the thing isn't cached */
148         if(entry.program == NULL) {
149                 /* return with the caller's lock holder holding the slot lock */
150                 return NULL;
151         }
152
153         /* the item was already cached, release the slot lock */
154         slot_locker.unlock();
155
156         cl_int ciErr = clRetainProgram(entry.program);
157         assert(ciErr == CL_SUCCESS);
158         (void) ciErr;
159
160         return entry.program;
161 }
162
163 void OpenCLCache::store_context(cl_platform_id platform,
164                                 cl_device_id device,
165                                 cl_context context,
166                                 thread_scoped_lock& slot_locker)
167 {
168         assert(platform != NULL);
169         assert(device != NULL);
170         assert(context != NULL);
171
172         OpenCLCache &self = global_instance();
173
174         thread_scoped_lock cache_lock(self.cache_lock);
175         CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
176         cache_lock.unlock();
177
178         Slot &slot = i->second;
179
180         /* sanity check */
181         assert(i != self.cache.end());
182         assert(slot.context == NULL);
183
184         slot.context = context;
185
186         /* unlock the slot */
187         slot_locker.unlock();
188
189         /* increment reference count in OpenCL.
190          * The caller is going to release the object when done with it. */
191         cl_int ciErr = clRetainContext(context);
192         assert(ciErr == CL_SUCCESS);
193         (void) ciErr;
194 }
195
196 void OpenCLCache::store_program(cl_platform_id platform,
197                                 cl_device_id device,
198                                 cl_program program,
199                                 ustring key,
200                                 thread_scoped_lock& slot_locker)
201 {
202         assert(platform != NULL);
203         assert(device != NULL);
204         assert(program != NULL);
205
206         OpenCLCache &self = global_instance();
207
208         thread_scoped_lock cache_lock(self.cache_lock);
209
210         CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
211         assert(i != self.cache.end());
212         Slot &slot = i->second;
213
214         Slot::EntryMap::iterator i2 = slot.programs.find(key);
215         assert(i2 != slot.programs.end());
216         Slot::ProgramEntry &entry = i2->second;
217
218         assert(entry.program == NULL);
219
220         cache_lock.unlock();
221
222         entry.program = program;
223
224         /* unlock the slot */
225         slot_locker.unlock();
226
227         /* Increment reference count in OpenCL.
228          * The caller is going to release the object when done with it.
229          */
230         cl_int ciErr = clRetainProgram(program);
231         assert(ciErr == CL_SUCCESS);
232         (void) ciErr;
233 }
234
235 string OpenCLCache::get_kernel_md5()
236 {
237         OpenCLCache &self = global_instance();
238         thread_scoped_lock lock(self.kernel_md5_lock);
239
240         if(self.kernel_md5.empty()) {
241                 self.kernel_md5 = path_files_md5_hash(path_get("source"));
242         }
243         return self.kernel_md5;
244 }
245
246 OpenCLDeviceBase::OpenCLProgram::OpenCLProgram(OpenCLDeviceBase *device,
247                                                const string& program_name,
248                                                const string& kernel_file,
249                                                const string& kernel_build_options,
250                                                bool use_stdout)
251  : device(device),
252    program_name(program_name),
253    kernel_file(kernel_file),
254    kernel_build_options(kernel_build_options),
255    use_stdout(use_stdout)
256 {
257         loaded = false;
258         program = NULL;
259 }
260
261 OpenCLDeviceBase::OpenCLProgram::~OpenCLProgram()
262 {
263         release();
264 }
265
266 void OpenCLDeviceBase::OpenCLProgram::release()
267 {
268         for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
269                 if(kernel->second) {
270                         clReleaseKernel(kernel->second);
271                         kernel->second = NULL;
272                 }
273         }
274         if(program) {
275                 clReleaseProgram(program);
276                 program = NULL;
277         }
278 }
279
280 void OpenCLDeviceBase::OpenCLProgram::add_log(const string& msg, bool debug)
281 {
282         if(!use_stdout) {
283                 log += msg + "\n";
284         }
285         else if(!debug) {
286                 printf("%s\n", msg.c_str());
287                 fflush(stdout);
288         }
289         else {
290                 VLOG(2) << msg;
291         }
292 }
293
294 void OpenCLDeviceBase::OpenCLProgram::add_error(const string& msg)
295 {
296         if(use_stdout) {
297                 fprintf(stderr, "%s\n", msg.c_str());
298         }
299         if(error_msg == "") {
300                 error_msg += "\n";
301         }
302         error_msg += msg;
303 }
304
305 void OpenCLDeviceBase::OpenCLProgram::add_kernel(ustring name)
306 {
307         if(!kernels.count(name)) {
308                 kernels[name] = NULL;
309         }
310 }
311
312 bool OpenCLDeviceBase::OpenCLProgram::build_kernel(const string *debug_src)
313 {
314         string build_options;
315         build_options = device->kernel_build_options(debug_src) + kernel_build_options;
316
317         VLOG(1) << "Build options passed to clBuildProgram: '"
318                 << build_options << "'.";
319         cl_int ciErr = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
320
321         /* show warnings even if build is successful */
322         size_t ret_val_size = 0;
323
324         clGetProgramBuildInfo(program, device->cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
325
326         if(ciErr != CL_SUCCESS) {
327                 add_error(string("OpenCL build failed with error ") + clewErrorString(ciErr) + ", errors in console.");
328         }
329
330         if(ret_val_size > 1) {
331                 vector<char> build_log(ret_val_size + 1);
332                 clGetProgramBuildInfo(program, device->cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, &build_log[0], NULL);
333
334                 build_log[ret_val_size] = '\0';
335                 /* Skip meaningless empty output from the NVidia compiler. */
336                 if(!(ret_val_size == 2 && build_log[0] == '\n')) {
337                         add_log(string("OpenCL program ") + program_name + " build output: " + string(&build_log[0]), ciErr == CL_SUCCESS);
338                 }
339         }
340
341         return (ciErr == CL_SUCCESS);
342 }
343
344 bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src)
345 {
346         string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
347         /* We compile kernels consisting of many files. unfortunately OpenCL
348          * kernel caches do not seem to recognize changes in included files.
349          * so we force recompile on changes by adding the md5 hash of all files.
350          */
351         source = path_source_replace_includes(source, path_get("source"));
352         source += "\n// " + util_md5_string(source) + "\n";
353
354         if(debug_src) {
355                 path_write_text(*debug_src, source);
356         }
357
358         size_t source_len = source.size();
359         const char *source_str = source.c_str();
360         cl_int ciErr;
361
362         program = clCreateProgramWithSource(device->cxContext,
363                                             1,
364                                             &source_str,
365                                             &source_len,
366                                             &ciErr);
367
368         if(ciErr != CL_SUCCESS) {
369                 add_error(string("OpenCL program creation failed: ") + clewErrorString(ciErr));
370                 return false;
371         }
372
373         double starttime = time_dt();
374         add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
375         add_log(string("Build flags: ") + kernel_build_options, true);
376
377         if(!build_kernel(debug_src))
378                 return false;
379
380         double elapsed = time_dt() - starttime;
381         add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false);
382
383         return true;
384 }
385
386 bool OpenCLDeviceBase::OpenCLProgram::compile_separate(const string& clbin)
387 {
388         vector<string> args;
389         args.push_back("--background");
390         args.push_back("--factory-startup");
391         args.push_back("--python-expr");
392
393         args.push_back(
394                 string_printf(
395                         "import _cycles; _cycles.opencl_compile('%s', '%d', '%s', '%s', '%s', '%s', '%s')",
396                         (DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT)? "true" : "false",
397                         device->device_num,
398                         device->device_name.c_str(),
399                         device->platform_name.c_str(),
400                         (device->kernel_build_options(NULL) + kernel_build_options).c_str(),
401                         kernel_file.c_str(),
402                         clbin.c_str()));
403
404         double starttime = time_dt();
405         add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
406         add_log(string("Build flags: ") + kernel_build_options, true);
407         if(!system_call_self(args) || !path_exists(clbin)) {
408                 return false;
409         }
410
411         double elapsed = time_dt() - starttime;
412         add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false);
413
414         return load_binary(clbin);
415 }
416
417 /* Compile opencl kernel. This method is called from the _cycles Python
418  * module compile kernels. Parameters must match function above. */
419 bool device_opencl_compile_kernel(const vector<string>& parameters)
420 {
421         bool force_all_platforms = parameters[0] == "true";
422         int device_platform_id = std::stoi(parameters[1]);
423         const string& device_name = parameters[2];
424         const string& platform_name = parameters[3];
425         const string& build_options = parameters[4];
426         const string& kernel_file = parameters[5];
427         const string& binary_path = parameters[6];
428
429         if(clewInit() != CLEW_SUCCESS) {
430                 return false;
431         }
432
433         vector<OpenCLPlatformDevice> usable_devices;
434         OpenCLInfo::get_usable_devices(&usable_devices, force_all_platforms);
435         if(device_platform_id >= usable_devices.size()) {
436                 return false;
437         }
438
439         OpenCLPlatformDevice& platform_device = usable_devices[device_platform_id];
440         if(platform_device.platform_name != platform_name ||
441            platform_device.device_name != device_name)
442         {
443                 return false;
444         }
445
446         cl_platform_id platform = platform_device.platform_id;
447         cl_device_id device = platform_device.device_id;
448         const cl_context_properties context_props[] = {
449                 CL_CONTEXT_PLATFORM, (cl_context_properties) platform,
450                 0, 0
451         };
452
453         cl_int err;
454         cl_context context = clCreateContext(context_props, 1, &device, NULL, NULL, &err);
455         if(err != CL_SUCCESS) {
456                 return false;
457         }
458
459         string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\" // " + path_files_md5_hash(path_get("kernel")) + "\n";
460         source = path_source_replace_includes(source, path_get("source"));
461         size_t source_len = source.size();
462         const char *source_str = source.c_str();
463         cl_program program = clCreateProgramWithSource(context, 1, &source_str, &source_len, &err);
464         bool result = false;
465
466         if(err == CL_SUCCESS) {
467                 err = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
468
469                 if(err == CL_SUCCESS) {
470                         size_t size = 0;
471                         clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
472                         if(size > 0) {
473                                 vector<uint8_t> binary(size);
474                                 uint8_t *bytes = &binary[0];
475                                 clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);
476                                 result = path_write_binary(binary_path, binary);
477                         }
478                 }
479                 clReleaseProgram(program);
480         }
481
482         clReleaseContext(context);
483
484         return result;
485 }
486
487 bool OpenCLDeviceBase::OpenCLProgram::load_binary(const string& clbin,
488                                                   const string *debug_src)
489 {
490         /* read binary into memory */
491         vector<uint8_t> binary;
492
493         if(!path_read_binary(clbin, binary)) {
494                 add_error(string_printf("OpenCL failed to read cached binary %s.", clbin.c_str()));
495                 return false;
496         }
497
498         /* create program */
499         cl_int status, ciErr;
500         size_t size = binary.size();
501         const uint8_t *bytes = &binary[0];
502
503         program = clCreateProgramWithBinary(device->cxContext, 1, &device->cdDevice,
504                 &size, &bytes, &status, &ciErr);
505
506         if(status != CL_SUCCESS || ciErr != CL_SUCCESS) {
507                 add_error(string("OpenCL failed create program from cached binary ") + clbin + ": "
508                                  + clewErrorString(status) + " " + clewErrorString(ciErr));
509                 return false;
510         }
511
512         if(!build_kernel(debug_src))
513                 return false;
514
515         return true;
516 }
517
518 bool OpenCLDeviceBase::OpenCLProgram::save_binary(const string& clbin)
519 {
520         size_t size = 0;
521         clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
522
523         if(!size)
524                 return false;
525
526         vector<uint8_t> binary(size);
527         uint8_t *bytes = &binary[0];
528
529         clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);
530
531         return path_write_binary(clbin, binary);
532 }
533
534 void OpenCLDeviceBase::OpenCLProgram::load()
535 {
536         assert(device);
537
538         loaded = false;
539
540         string device_md5 = device->device_md5_hash(kernel_build_options);
541
542         /* Try to use cached kernel. */
543         thread_scoped_lock cache_locker;
544         ustring cache_key(program_name + device_md5);
545         program = device->load_cached_kernel(cache_key,
546                                              cache_locker);
547
548         if(!program) {
549                 add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
550
551                 /* need to create source to get md5 */
552                 string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
553                 source = path_source_replace_includes(source, path_get("source"));
554
555                 string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source);
556                 basename = path_cache_get(path_join("kernels", basename));
557                 string clbin = basename + ".clbin";
558
559                 /* path to preprocessed source for debugging */
560                 string clsrc, *debug_src = NULL;
561
562                 if(OpenCLInfo::use_debug()) {
563                         clsrc = basename + ".cl";
564                         debug_src = &clsrc;
565                 }
566
567                 /* If binary kernel exists already, try use it. */
568                 if(path_exists(clbin) && load_binary(clbin)) {
569                         /* Kernel loaded from binary, nothing to do. */
570                         add_log(string("Loaded program from ") + clbin + ".", true);
571                 }
572                 else {
573                         add_log(string("Kernel file ") + clbin + " either doesn't exist or failed to be loaded by driver.", true);
574                         if(!path_exists(clbin)) {
575                                 if(compile_separate(clbin)) {
576                                         add_log(string("Built and loaded program from ") + clbin + ".", true);
577                                         loaded = true;
578                                 }
579                                 else {
580                                         add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true);
581
582                                         /* If does not exist or loading binary failed, compile kernel. */
583                                         if(!compile_kernel(debug_src)) {
584                                                 return;
585                                         }
586
587                                         /* Save binary for reuse. */
588                                         if(!save_binary(clbin)) {
589                                                 add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
590                                         }
591                                 }
592                         }
593                         else {
594                                 add_log(string("Kernel file ") + clbin + "exists, but failed to be loaded by driver.", true);
595                                 /* Fall back to compiling. */
596                                 if(!compile_kernel(debug_src)) {
597                                         return;
598                                 }
599                         }
600                 }
601
602                 /* Cache the program. */
603                 device->store_cached_kernel(program,
604                                             cache_key,
605                                             cache_locker);
606         }
607         else {
608                 add_log(string("Found cached OpenCL program ") + program_name + ".", true);
609         }
610
611         for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
612                 assert(kernel->second == NULL);
613                 cl_int ciErr;
614                 string name = "kernel_ocl_" + kernel->first.string();
615                 kernel->second = clCreateKernel(program, name.c_str(), &ciErr);
616                 if(device->opencl_error(ciErr)) {
617                         add_error(string("Error getting kernel ") + name + " from program " + program_name + ": " + clewErrorString(ciErr));
618                         return;
619                 }
620         }
621
622         loaded = true;
623 }
624
625 void OpenCLDeviceBase::OpenCLProgram::report_error()
626 {
627         /* If loaded is true, there was no error. */
628         if(loaded) return;
629         /* if use_stdout is true, the error was already reported. */
630         if(use_stdout) return;
631
632         cerr << error_msg << endl;
633         if(!compile_output.empty()) {
634                 cerr << "OpenCL kernel build output for " << program_name << ":" << endl;
635                 cerr << compile_output << endl;
636         }
637 }
638
639 cl_kernel OpenCLDeviceBase::OpenCLProgram::operator()()
640 {
641         assert(kernels.size() == 1);
642         return kernels.begin()->second;
643 }
644
645 cl_kernel OpenCLDeviceBase::OpenCLProgram::operator()(ustring name)
646 {
647         assert(kernels.count(name));
648         return kernels[name];
649 }
650
651 cl_device_type OpenCLInfo::device_type()
652 {
653         switch(DebugFlags().opencl.device_type)
654         {
655                 case DebugFlags::OpenCL::DEVICE_NONE:
656                         return 0;
657                 case DebugFlags::OpenCL::DEVICE_ALL:
658                         return CL_DEVICE_TYPE_ALL;
659                 case DebugFlags::OpenCL::DEVICE_DEFAULT:
660                         return CL_DEVICE_TYPE_DEFAULT;
661                 case DebugFlags::OpenCL::DEVICE_CPU:
662                         return CL_DEVICE_TYPE_CPU;
663                 case DebugFlags::OpenCL::DEVICE_GPU:
664                         return CL_DEVICE_TYPE_GPU;
665                 case DebugFlags::OpenCL::DEVICE_ACCELERATOR:
666                         return CL_DEVICE_TYPE_ACCELERATOR;
667                 default:
668                         return CL_DEVICE_TYPE_ALL;
669         }
670 }
671
672 bool OpenCLInfo::use_debug()
673 {
674         return DebugFlags().opencl.debug;
675 }
676
677 bool OpenCLInfo::use_single_program()
678 {
679         return DebugFlags().opencl.single_program;
680 }
681
682 bool OpenCLInfo::kernel_use_advanced_shading(const string& platform)
683 {
684         /* keep this in sync with kernel_types.h! */
685         if(platform == "NVIDIA CUDA")
686                 return true;
687         else if(platform == "Apple")
688                 return true;
689         else if(platform == "AMD Accelerated Parallel Processing")
690                 return true;
691         else if(platform == "Intel(R) OpenCL")
692                 return true;
693         /* Make sure officially unsupported OpenCL platforms
694          * does not set up to use advanced shading.
695          */
696         return false;
697 }
698
699 bool OpenCLInfo::kernel_use_split(const string& platform_name,
700                                   const cl_device_type device_type)
701 {
702         if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_SPLIT) {
703                 VLOG(1) << "Forcing split kernel to use.";
704                 return true;
705         }
706         if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_MEGA) {
707                 VLOG(1) << "Forcing mega kernel to use.";
708                 return false;
709         }
710         /* TODO(sergey): Replace string lookups with more enum-like API,
711          * similar to device/vendor checks blender's gpu.
712          */
713         if(platform_name == "AMD Accelerated Parallel Processing" &&
714            device_type == CL_DEVICE_TYPE_GPU)
715         {
716                 return true;
717         }
718         return false;
719 }
720
721 bool OpenCLInfo::device_supported(const string& platform_name,
722                                   const cl_device_id device_id)
723 {
724         cl_device_type device_type;
725         if(!get_device_type(device_id, &device_type)) {
726                 return false;
727         }
728         string device_name;
729         if(!get_device_name(device_id, &device_name)) {
730                 return false;
731         }
732
733         int driver_major = 0;
734         int driver_minor = 0;
735         if(!get_driver_version(device_id, &driver_major, &driver_minor)) {
736                 return false;
737         }
738         VLOG(3) << "OpenCL driver version " << driver_major << "." << driver_minor;
739
740         /* It is possible tyo have Iris GPU on AMD/Apple OpenCL framework
741          * (aka, it will not be on Intel framework). This isn't supported
742          * and needs an explicit blacklist.
743          */
744         if(strstr(device_name.c_str(), "Iris")) {
745                 return false;
746         }
747         if(platform_name == "AMD Accelerated Parallel Processing" &&
748            device_type == CL_DEVICE_TYPE_GPU)
749         {
750                 if(driver_major < 2236) {
751                         VLOG(1) << "AMD driver version " << driver_major << "." << driver_minor << " not supported.";
752                         return false;
753                 }
754                 const char *blacklist[] = {
755                         /* GCN 1 */
756                         "Tahiti", "Pitcairn", "Capeverde", "Oland", "Hainan",
757                         NULL
758                 };
759                 for(int i = 0; blacklist[i] != NULL; i++) {
760                         if(device_name == blacklist[i]) {
761                                 VLOG(1) << "AMD device " << device_name << " not supported";
762                                 return false;
763                         }
764                 }
765                 return true;
766         }
767         if(platform_name == "Apple" && device_type == CL_DEVICE_TYPE_GPU) {
768                 return false;
769         }
770         return false;
771 }
772
773 bool OpenCLInfo::platform_version_check(cl_platform_id platform,
774                                         string *error)
775 {
776         const int req_major = 1, req_minor = 1;
777         int major, minor;
778         char version[256];
779         clGetPlatformInfo(platform,
780                           CL_PLATFORM_VERSION,
781                           sizeof(version),
782                           &version,
783                           NULL);
784         if(sscanf(version, "OpenCL %d.%d", &major, &minor) < 2) {
785                 if(error != NULL) {
786                         *error = string_printf("OpenCL: failed to parse platform version string (%s).", version);
787                 }
788                 return false;
789         }
790         if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
791                 if(error != NULL) {
792                         *error = string_printf("OpenCL: platform version 1.1 or later required, found %d.%d", major, minor);
793                 }
794                 return false;
795         }
796         if(error != NULL) {
797                 *error = "";
798         }
799         return true;
800 }
801
802 bool OpenCLInfo::device_version_check(cl_device_id device,
803                                       string *error)
804 {
805         const int req_major = 1, req_minor = 1;
806         int major, minor;
807         char version[256];
808         clGetDeviceInfo(device,
809                         CL_DEVICE_OPENCL_C_VERSION,
810                         sizeof(version),
811                         &version,
812                         NULL);
813         if(sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) {
814                 if(error != NULL) {
815                         *error = string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version);
816                 }
817                 return false;
818         }
819         if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
820                 if(error != NULL) {
821                         *error = string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor);
822                 }
823                 return false;
824         }
825         if(error != NULL) {
826                 *error = "";
827         }
828         return true;
829 }
830
831 string OpenCLInfo::get_hardware_id(const string& platform_name, cl_device_id device_id)
832 {
833         if(platform_name == "AMD Accelerated Parallel Processing" || platform_name == "Apple") {
834                 /* Use cl_amd_device_topology extension. */
835                 cl_char topology[24];
836                 if(clGetDeviceInfo(device_id, 0x4037, sizeof(topology), topology, NULL) == CL_SUCCESS && topology[0] == 1) {
837                         return string_printf("%02x:%02x.%01x",
838                                              (unsigned int)topology[21],
839                                              (unsigned int)topology[22],
840                                              (unsigned int)topology[23]);
841                 }
842         }
843         else if(platform_name == "NVIDIA CUDA") {
844                 /* Use two undocumented options of the cl_nv_device_attribute_query extension. */
845                 cl_int bus_id, slot_id;
846                 if(clGetDeviceInfo(device_id, 0x4008, sizeof(cl_int), &bus_id,  NULL) == CL_SUCCESS &&
847                    clGetDeviceInfo(device_id, 0x4009, sizeof(cl_int), &slot_id, NULL) == CL_SUCCESS) {
848                         return string_printf("%02x:%02x.%01x",
849                                              (unsigned int)(bus_id),
850                                              (unsigned int)(slot_id >> 3),
851                                              (unsigned int)(slot_id & 0x7));
852                 }
853         }
854         /* No general way to get a hardware ID from OpenCL => give up. */
855         return "";
856 }
857
858 void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
859                                     bool force_all)
860 {
861         const bool force_all_platforms = force_all ||
862                 (DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT);
863         const cl_device_type device_type = OpenCLInfo::device_type();
864         static bool first_time = true;
865 #define FIRST_VLOG(severity) if(first_time) VLOG(severity)
866
867         usable_devices->clear();
868
869         if(device_type == 0) {
870                 FIRST_VLOG(2) << "OpenCL devices are forced to be disabled.";
871                 first_time = false;
872                 return;
873         }
874
875         cl_int error;
876         vector<cl_device_id> device_ids;
877         vector<cl_platform_id> platform_ids;
878
879         /* Get platforms. */
880         if(!get_platforms(&platform_ids, &error)) {
881                 FIRST_VLOG(2) << "Error fetching platforms:"
882                               << string(clewErrorString(error));
883                 first_time = false;
884                 return;
885         }
886         if(platform_ids.size() == 0) {
887                 FIRST_VLOG(2) << "No OpenCL platforms were found.";
888                 first_time = false;
889                 return;
890         }
891         /* Devices are numbered consecutively across platforms. */
892         for(int platform = 0; platform < platform_ids.size(); platform++) {
893                 cl_platform_id platform_id = platform_ids[platform];
894                 string platform_name;
895                 if(!get_platform_name(platform_id, &platform_name)) {
896                         FIRST_VLOG(2) << "Failed to get platform name, ignoring.";
897                         continue;
898                 }
899                 FIRST_VLOG(2) << "Enumerating devices for platform "
900                               << platform_name << ".";
901                 if(!platform_version_check(platform_id)) {
902                         FIRST_VLOG(2) << "Ignoring platform " << platform_name
903                                       << " due to too old compiler version.";
904                         continue;
905                 }
906                 if(!get_platform_devices(platform_id,
907                                          device_type,
908                                          &device_ids,
909                                          &error))
910                 {
911                         FIRST_VLOG(2) << "Ignoring platform " << platform_name
912                                       << ", failed to fetch of devices: "
913                                       << string(clewErrorString(error));
914                         continue;
915                 }
916                 if(device_ids.size() == 0) {
917                         FIRST_VLOG(2) << "Ignoring platform " << platform_name
918                                       << ", it has no devices.";
919                         continue;
920                 }
921                 for(int num = 0; num < device_ids.size(); num++) {
922                         const cl_device_id device_id = device_ids[num];
923                         string device_name;
924                         if(!get_device_name(device_id, &device_name, &error)) {
925                                 FIRST_VLOG(2) << "Failed to fetch device name: "
926                                               << string(clewErrorString(error))
927                                               << ", ignoring.";
928                                 continue;
929                         }
930                         if(!device_version_check(device_id)) {
931                                 FIRST_VLOG(2) << "Ignoring device " << device_name
932                                               << " due to old compiler version.";
933                                 continue;
934                         }
935                         if(force_all_platforms ||
936                            device_supported(platform_name, device_id))
937                         {
938                                 cl_device_type device_type;
939                                 if(!get_device_type(device_id, &device_type, &error)) {
940                                         FIRST_VLOG(2) << "Ignoring device " << device_name
941                                                       << ", failed to fetch device type:"
942                                                       << string(clewErrorString(error));
943                                         continue;
944                                 }
945                                 string readable_device_name =
946                                         get_readable_device_name(device_id);
947                                 if(readable_device_name != device_name) {
948                                         FIRST_VLOG(2) << "Using more readable device name: "
949                                                       << readable_device_name;
950                                 }
951                                 FIRST_VLOG(2) << "Adding new device "
952                                               << readable_device_name << ".";
953                                 string hardware_id = get_hardware_id(platform_name, device_id);
954                                 string device_extensions = get_device_extensions(device_id);
955                                 usable_devices->push_back(OpenCLPlatformDevice(
956                                         platform_id,
957                                         platform_name,
958                                         device_id,
959                                         device_type,
960                                         readable_device_name,
961                                         hardware_id,
962                                         device_extensions));
963                         }
964                         else {
965                                 FIRST_VLOG(2) << "Ignoring device " << device_name
966                                               << ", not officially supported yet.";
967                         }
968                 }
969         }
970         first_time = false;
971 }
972
973 bool OpenCLInfo::get_platforms(vector<cl_platform_id> *platform_ids,
974                                cl_int *error)
975 {
976         /* Reset from possible previous state. */
977         platform_ids->resize(0);
978         cl_uint num_platforms;
979         if(!get_num_platforms(&num_platforms, error)) {
980                 return false;
981         }
982         /* Get actual platforms. */
983         cl_int err;
984         platform_ids->resize(num_platforms);
985         if((err = clGetPlatformIDs(num_platforms,
986                                    &platform_ids->at(0),
987                                    NULL)) != CL_SUCCESS) {
988                 if(error != NULL) {
989                         *error = err;
990                 }
991                 return false;
992         }
993         if(error != NULL) {
994                 *error = CL_SUCCESS;
995         }
996         return true;
997 }
998
999 vector<cl_platform_id> OpenCLInfo::get_platforms()
1000 {
1001         vector<cl_platform_id> platform_ids;
1002         get_platforms(&platform_ids);
1003         return platform_ids;
1004 }
1005
1006 bool OpenCLInfo::get_num_platforms(cl_uint *num_platforms, cl_int *error)
1007 {
1008         cl_int err;
1009         if((err = clGetPlatformIDs(0, NULL, num_platforms)) != CL_SUCCESS) {
1010                 if(error != NULL) {
1011                         *error = err;
1012                 }
1013                 *num_platforms = 0;
1014                 return false;
1015         }
1016         if(error != NULL) {
1017                 *error = CL_SUCCESS;
1018         }
1019         return true;
1020 }
1021
1022 cl_uint OpenCLInfo::get_num_platforms()
1023 {
1024         cl_uint num_platforms;
1025         if(!get_num_platforms(&num_platforms)) {
1026                 return 0;
1027         }
1028         return num_platforms;
1029 }
1030
1031 bool OpenCLInfo::get_platform_name(cl_platform_id platform_id,
1032                                    string *platform_name)
1033 {
1034         char buffer[256];
1035         if(clGetPlatformInfo(platform_id,
1036                              CL_PLATFORM_NAME,
1037                              sizeof(buffer),
1038                              &buffer,
1039                              NULL) != CL_SUCCESS)
1040         {
1041                 *platform_name = "";
1042                 return false;
1043         }
1044         *platform_name = buffer;
1045         return true;
1046 }
1047
1048 string OpenCLInfo::get_platform_name(cl_platform_id platform_id)
1049 {
1050         string platform_name;
1051         if(!get_platform_name(platform_id, &platform_name)) {
1052                 return "";
1053         }
1054         return platform_name;
1055 }
1056
1057 bool OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
1058                                           cl_device_type device_type,
1059                                           cl_uint *num_devices,
1060                                           cl_int *error)
1061 {
1062         cl_int err;
1063         if((err = clGetDeviceIDs(platform_id,
1064                                  device_type,
1065                                  0,
1066                                  NULL,
1067                                  num_devices)) != CL_SUCCESS)
1068         {
1069                 if(error != NULL) {
1070                         *error = err;
1071                 }
1072                 *num_devices = 0;
1073                 return false;
1074         }
1075         if(error != NULL) {
1076                 *error = CL_SUCCESS;
1077         }
1078         return true;
1079 }
1080
1081 cl_uint OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
1082                                              cl_device_type device_type)
1083 {
1084         cl_uint num_devices;
1085         if(!get_num_platform_devices(platform_id,
1086                                      device_type,
1087                                      &num_devices))
1088         {
1089                 return 0;
1090         }
1091         return num_devices;
1092 }
1093
1094 bool OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
1095                                       cl_device_type device_type,
1096                                       vector<cl_device_id> *device_ids,
1097                                       cl_int* error)
1098 {
1099         /* Reset from possible previous state. */
1100         device_ids->resize(0);
1101         /* Get number of devices to pre-allocate memory. */
1102         cl_uint num_devices;
1103         if(!get_num_platform_devices(platform_id,
1104                                      device_type,
1105                                      &num_devices,
1106                                      error))
1107         {
1108                 return false;
1109         }
1110         /* Get actual device list. */
1111         device_ids->resize(num_devices);
1112         cl_int err;
1113         if((err = clGetDeviceIDs(platform_id,
1114                                  device_type,
1115                                  num_devices,
1116                                  &device_ids->at(0),
1117                                  NULL)) != CL_SUCCESS)
1118         {
1119                 if(error != NULL) {
1120                         *error = err;
1121                 }
1122                 return false;
1123         }
1124         if(error != NULL) {
1125                 *error = CL_SUCCESS;
1126         }
1127         return true;
1128 }
1129
1130 vector<cl_device_id> OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
1131                                                       cl_device_type device_type)
1132 {
1133         vector<cl_device_id> devices;
1134         get_platform_devices(platform_id, device_type, &devices);
1135         return devices;
1136 }
1137
1138 bool OpenCLInfo::get_device_name(cl_device_id device_id,
1139                                  string *device_name,
1140                                  cl_int* error)
1141 {
1142         char buffer[1024];
1143         cl_int err;
1144         if((err = clGetDeviceInfo(device_id,
1145                                   CL_DEVICE_NAME,
1146                                   sizeof(buffer),
1147                                   &buffer,
1148                                   NULL)) != CL_SUCCESS)
1149         {
1150                 if(error != NULL) {
1151                         *error = err;
1152                 }
1153                 *device_name = "";
1154                 return false;
1155         }
1156         if(error != NULL) {
1157                 *error = CL_SUCCESS;
1158         }
1159         *device_name = buffer;
1160         return true;
1161 }
1162
1163 string OpenCLInfo::get_device_name(cl_device_id device_id)
1164 {
1165         string device_name;
1166         if(!get_device_name(device_id, &device_name)) {
1167                 return "";
1168         }
1169         return device_name;
1170 }
1171
1172 bool OpenCLInfo::get_device_extensions(cl_device_id device_id,
1173         string *device_extensions,
1174         cl_int* error)
1175 {
1176         char buffer[1024];
1177         cl_int err;
1178         if((err = clGetDeviceInfo(device_id,
1179                 CL_DEVICE_EXTENSIONS,
1180                 sizeof(buffer),
1181                 &buffer,
1182                 NULL)) != CL_SUCCESS)
1183         {
1184                 if(error != NULL) {
1185                         *error = err;
1186                 }
1187                 *device_extensions = "";
1188                 return false;
1189         }
1190         if(error != NULL) {
1191                 *error = CL_SUCCESS;
1192         }
1193         *device_extensions = buffer;
1194         return true;
1195 }
1196
1197 string OpenCLInfo::get_device_extensions(cl_device_id device_id)
1198 {
1199         string device_extensions;
1200         if(!get_device_extensions(device_id, &device_extensions)) {
1201                 return "";
1202         }
1203         return device_extensions;
1204 }
1205
1206 bool OpenCLInfo::get_device_type(cl_device_id device_id,
1207                                  cl_device_type *device_type,
1208                                  cl_int* error)
1209 {
1210         cl_int err;
1211         if((err = clGetDeviceInfo(device_id,
1212                                   CL_DEVICE_TYPE,
1213                                   sizeof(cl_device_type),
1214                                   device_type,
1215                                   NULL)) != CL_SUCCESS)
1216         {
1217                 if(error != NULL) {
1218                         *error = err;
1219                 }
1220                 *device_type = 0;
1221                 return false;
1222         }
1223         if(error != NULL) {
1224                 *error = CL_SUCCESS;
1225         }
1226         return true;
1227 }
1228
1229 cl_device_type OpenCLInfo::get_device_type(cl_device_id device_id)
1230 {
1231         cl_device_type device_type;
1232         if(!get_device_type(device_id, &device_type)) {
1233                 return 0;
1234         }
1235         return device_type;
1236 }
1237
1238 string OpenCLInfo::get_readable_device_name(cl_device_id device_id)
1239 {
1240         string name = "";
1241         char board_name[1024];
1242         size_t length = 0;
1243         if(clGetDeviceInfo(device_id,
1244                            CL_DEVICE_BOARD_NAME_AMD,
1245                            sizeof(board_name),
1246                            &board_name,
1247                            &length) == CL_SUCCESS)
1248         {
1249                 if(length != 0 && board_name[0] != '\0') {
1250                         name = board_name;
1251                 }
1252         }
1253
1254         /* Fallback to standard device name API. */
1255         if(name.empty()) {
1256                 name = get_device_name(device_id);
1257         }
1258
1259         /* Special exception for AMD Vega, need to be able to tell
1260          * Vega 56 from 64 apart.
1261          */
1262         if(name == "Radeon RX Vega") {
1263                 cl_int max_compute_units = 0;
1264                 if(clGetDeviceInfo(device_id,
1265                                    CL_DEVICE_MAX_COMPUTE_UNITS,
1266                                    sizeof(max_compute_units),
1267                                    &max_compute_units,
1268                                    NULL) == CL_SUCCESS)
1269                 {
1270                         name += " " + to_string(max_compute_units);
1271                 }
1272         }
1273
1274         /* Distinguish from our native CPU device. */
1275         if(get_device_type(device_id) & CL_DEVICE_TYPE_CPU) {
1276                 name += " (OpenCL)";
1277         }
1278
1279         return name;
1280 }
1281
1282 bool OpenCLInfo::get_driver_version(cl_device_id device_id,
1283                                     int *major,
1284                                     int *minor,
1285                                     cl_int* error)
1286 {
1287         char buffer[1024];
1288         cl_int err;
1289         if((err = clGetDeviceInfo(device_id,
1290                                   CL_DRIVER_VERSION,
1291                                   sizeof(buffer),
1292                                   &buffer,
1293                                   NULL)) != CL_SUCCESS)
1294         {
1295                 if(error != NULL) {
1296                         *error = err;
1297                 }
1298                 return false;
1299         }
1300         if(error != NULL) {
1301                 *error = CL_SUCCESS;
1302         }
1303         if(sscanf(buffer, "%d.%d", major, minor) < 2) {
1304                 VLOG(1) << string_printf("OpenCL: failed to parse driver version string (%s).", buffer);
1305                 return false;
1306         }
1307         return true;
1308 }
1309
1310 int OpenCLInfo::mem_sub_ptr_alignment(cl_device_id device_id)
1311 {
1312         int base_align_bits;
1313         if(clGetDeviceInfo(device_id,
1314                            CL_DEVICE_MEM_BASE_ADDR_ALIGN,
1315                            sizeof(int),
1316                            &base_align_bits,
1317                            NULL) == CL_SUCCESS)
1318         {
1319                 return base_align_bits/8;
1320         }
1321         return 1;
1322 }
1323
1324 CCL_NAMESPACE_END
1325
1326 #endif