Update CUEW to latest version
[blender.git] / extern / cuew / src / cuew.c
1 /*
2  * Copyright 2011-2014 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License
15  */
16
17 #ifdef _MSC_VER
18 #  if _MSC_VER < 1900
19 #    define snprintf _snprintf
20 #  endif
21 #  define popen _popen
22 #  define pclose _pclose
23 #  define _CRT_SECURE_NO_WARNINGS
24 #endif
25
26 #include <cuew.h>
27 #include <assert.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <sys/stat.h>
31
32 #ifdef _WIN32
33 #  define WIN32_LEAN_AND_MEAN
34 #  define VC_EXTRALEAN
35 #  include <windows.h>
36
37 /* Utility macros. */
38
39 typedef HMODULE DynamicLibrary;
40
41 #  define dynamic_library_open(path)         LoadLibraryA(path)
42 #  define dynamic_library_close(lib)         FreeLibrary(lib)
43 #  define dynamic_library_find(lib, symbol)  GetProcAddress(lib, symbol)
44 #else
45 #  include <dlfcn.h>
46
47 typedef void* DynamicLibrary;
48
49 #  define dynamic_library_open(path)         dlopen(path, RTLD_NOW)
50 #  define dynamic_library_close(lib)         dlclose(lib)
51 #  define dynamic_library_find(lib, symbol)  dlsym(lib, symbol)
52 #endif
53
54 #define _LIBRARY_FIND_CHECKED(lib, name) \
55         name = (t##name *)dynamic_library_find(lib, #name); \
56         assert(name);
57
58 #define _LIBRARY_FIND(lib, name) \
59         name = (t##name *)dynamic_library_find(lib, #name);
60
61 #define CUDA_LIBRARY_FIND_CHECKED(name) \
62         _LIBRARY_FIND_CHECKED(cuda_lib, name)
63 #define CUDA_LIBRARY_FIND(name) _LIBRARY_FIND(cuda_lib, name)
64
65 #define NVRTC_LIBRARY_FIND_CHECKED(name) \
66         _LIBRARY_FIND_CHECKED(nvrtc_lib, name)
67 #define NVRTC_LIBRARY_FIND(name) _LIBRARY_FIND(nvrtc_lib, name)
68
69 static DynamicLibrary cuda_lib;
70 static DynamicLibrary nvrtc_lib;
71
72 /* Function definitions. */
73 tcuGetErrorString *cuGetErrorString;
74 tcuGetErrorName *cuGetErrorName;
75 tcuInit *cuInit;
76 tcuDriverGetVersion *cuDriverGetVersion;
77 tcuDeviceGet *cuDeviceGet;
78 tcuDeviceGetCount *cuDeviceGetCount;
79 tcuDeviceGetName *cuDeviceGetName;
80 tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2;
81 tcuDeviceGetAttribute *cuDeviceGetAttribute;
82 tcuDeviceGetProperties *cuDeviceGetProperties;
83 tcuDeviceComputeCapability *cuDeviceComputeCapability;
84 tcuDevicePrimaryCtxRetain *cuDevicePrimaryCtxRetain;
85 tcuDevicePrimaryCtxRelease *cuDevicePrimaryCtxRelease;
86 tcuDevicePrimaryCtxSetFlags *cuDevicePrimaryCtxSetFlags;
87 tcuDevicePrimaryCtxGetState *cuDevicePrimaryCtxGetState;
88 tcuDevicePrimaryCtxReset *cuDevicePrimaryCtxReset;
89 tcuCtxCreate_v2 *cuCtxCreate_v2;
90 tcuCtxDestroy_v2 *cuCtxDestroy_v2;
91 tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2;
92 tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2;
93 tcuCtxSetCurrent *cuCtxSetCurrent;
94 tcuCtxGetCurrent *cuCtxGetCurrent;
95 tcuCtxGetDevice *cuCtxGetDevice;
96 tcuCtxGetFlags *cuCtxGetFlags;
97 tcuCtxSynchronize *cuCtxSynchronize;
98 tcuCtxSetLimit *cuCtxSetLimit;
99 tcuCtxGetLimit *cuCtxGetLimit;
100 tcuCtxGetCacheConfig *cuCtxGetCacheConfig;
101 tcuCtxSetCacheConfig *cuCtxSetCacheConfig;
102 tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig;
103 tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig;
104 tcuCtxGetApiVersion *cuCtxGetApiVersion;
105 tcuCtxGetStreamPriorityRange *cuCtxGetStreamPriorityRange;
106 tcuCtxAttach *cuCtxAttach;
107 tcuCtxDetach *cuCtxDetach;
108 tcuModuleLoad *cuModuleLoad;
109 tcuModuleLoadData *cuModuleLoadData;
110 tcuModuleLoadDataEx *cuModuleLoadDataEx;
111 tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
112 tcuModuleUnload *cuModuleUnload;
113 tcuModuleGetFunction *cuModuleGetFunction;
114 tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2;
115 tcuModuleGetTexRef *cuModuleGetTexRef;
116 tcuModuleGetSurfRef *cuModuleGetSurfRef;
117 tcuLinkCreate_v2 *cuLinkCreate_v2;
118 tcuLinkAddData_v2 *cuLinkAddData_v2;
119 tcuLinkAddFile_v2 *cuLinkAddFile_v2;
120 tcuLinkComplete *cuLinkComplete;
121 tcuLinkDestroy *cuLinkDestroy;
122 tcuMemGetInfo_v2 *cuMemGetInfo_v2;
123 tcuMemAlloc_v2 *cuMemAlloc_v2;
124 tcuMemAllocPitch_v2 *cuMemAllocPitch_v2;
125 tcuMemFree_v2 *cuMemFree_v2;
126 tcuMemGetAddressRange_v2 *cuMemGetAddressRange_v2;
127 tcuMemAllocHost_v2 *cuMemAllocHost_v2;
128 tcuMemFreeHost *cuMemFreeHost;
129 tcuMemHostAlloc *cuMemHostAlloc;
130 tcuMemHostGetDevicePointer_v2 *cuMemHostGetDevicePointer_v2;
131 tcuMemHostGetFlags *cuMemHostGetFlags;
132 tcuMemAllocManaged *cuMemAllocManaged;
133 tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId;
134 tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId;
135 tcuIpcGetEventHandle *cuIpcGetEventHandle;
136 tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
137 tcuIpcGetMemHandle *cuIpcGetMemHandle;
138 tcuIpcOpenMemHandle *cuIpcOpenMemHandle;
139 tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
140 tcuMemHostRegister_v2 *cuMemHostRegister_v2;
141 tcuMemHostUnregister *cuMemHostUnregister;
142 tcuMemcpy *cuMemcpy;
143 tcuMemcpyPeer *cuMemcpyPeer;
144 tcuMemcpyHtoD_v2 *cuMemcpyHtoD_v2;
145 tcuMemcpyDtoH_v2 *cuMemcpyDtoH_v2;
146 tcuMemcpyDtoD_v2 *cuMemcpyDtoD_v2;
147 tcuMemcpyDtoA_v2 *cuMemcpyDtoA_v2;
148 tcuMemcpyAtoD_v2 *cuMemcpyAtoD_v2;
149 tcuMemcpyHtoA_v2 *cuMemcpyHtoA_v2;
150 tcuMemcpyAtoH_v2 *cuMemcpyAtoH_v2;
151 tcuMemcpyAtoA_v2 *cuMemcpyAtoA_v2;
152 tcuMemcpy2D_v2 *cuMemcpy2D_v2;
153 tcuMemcpy2DUnaligned_v2 *cuMemcpy2DUnaligned_v2;
154 tcuMemcpy3D_v2 *cuMemcpy3D_v2;
155 tcuMemcpy3DPeer *cuMemcpy3DPeer;
156 tcuMemcpyAsync *cuMemcpyAsync;
157 tcuMemcpyPeerAsync *cuMemcpyPeerAsync;
158 tcuMemcpyHtoDAsync_v2 *cuMemcpyHtoDAsync_v2;
159 tcuMemcpyDtoHAsync_v2 *cuMemcpyDtoHAsync_v2;
160 tcuMemcpyDtoDAsync_v2 *cuMemcpyDtoDAsync_v2;
161 tcuMemcpyHtoAAsync_v2 *cuMemcpyHtoAAsync_v2;
162 tcuMemcpyAtoHAsync_v2 *cuMemcpyAtoHAsync_v2;
163 tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync_v2;
164 tcuMemcpy3DAsync_v2 *cuMemcpy3DAsync_v2;
165 tcuMemcpy3DPeerAsync *cuMemcpy3DPeerAsync;
166 tcuMemsetD8_v2 *cuMemsetD8_v2;
167 tcuMemsetD16_v2 *cuMemsetD16_v2;
168 tcuMemsetD32_v2 *cuMemsetD32_v2;
169 tcuMemsetD2D8_v2 *cuMemsetD2D8_v2;
170 tcuMemsetD2D16_v2 *cuMemsetD2D16_v2;
171 tcuMemsetD2D32_v2 *cuMemsetD2D32_v2;
172 tcuMemsetD8Async *cuMemsetD8Async;
173 tcuMemsetD16Async *cuMemsetD16Async;
174 tcuMemsetD32Async *cuMemsetD32Async;
175 tcuMemsetD2D8Async *cuMemsetD2D8Async;
176 tcuMemsetD2D16Async *cuMemsetD2D16Async;
177 tcuMemsetD2D32Async *cuMemsetD2D32Async;
178 tcuArrayCreate_v2 *cuArrayCreate_v2;
179 tcuArrayGetDescriptor_v2 *cuArrayGetDescriptor_v2;
180 tcuArrayDestroy *cuArrayDestroy;
181 tcuArray3DCreate_v2 *cuArray3DCreate_v2;
182 tcuArray3DGetDescriptor_v2 *cuArray3DGetDescriptor_v2;
183 tcuMipmappedArrayCreate *cuMipmappedArrayCreate;
184 tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
185 tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
186 tcuPointerGetAttribute *cuPointerGetAttribute;
187 tcuMemPrefetchAsync *cuMemPrefetchAsync;
188 tcuMemAdvise *cuMemAdvise;
189 tcuMemRangeGetAttribute *cuMemRangeGetAttribute;
190 tcuMemRangeGetAttributes *cuMemRangeGetAttributes;
191 tcuPointerSetAttribute *cuPointerSetAttribute;
192 tcuPointerGetAttributes *cuPointerGetAttributes;
193 tcuStreamCreate *cuStreamCreate;
194 tcuStreamCreateWithPriority *cuStreamCreateWithPriority;
195 tcuStreamGetPriority *cuStreamGetPriority;
196 tcuStreamGetFlags *cuStreamGetFlags;
197 tcuStreamWaitEvent *cuStreamWaitEvent;
198 tcuStreamAddCallback *cuStreamAddCallback;
199 tcuStreamAttachMemAsync *cuStreamAttachMemAsync;
200 tcuStreamQuery *cuStreamQuery;
201 tcuStreamSynchronize *cuStreamSynchronize;
202 tcuStreamDestroy_v2 *cuStreamDestroy_v2;
203 tcuEventCreate *cuEventCreate;
204 tcuEventRecord *cuEventRecord;
205 tcuEventQuery *cuEventQuery;
206 tcuEventSynchronize *cuEventSynchronize;
207 tcuEventDestroy_v2 *cuEventDestroy_v2;
208 tcuEventElapsedTime *cuEventElapsedTime;
209 tcuStreamWaitValue32 *cuStreamWaitValue32;
210 tcuStreamWaitValue64 *cuStreamWaitValue64;
211 tcuStreamWriteValue32 *cuStreamWriteValue32;
212 tcuStreamWriteValue64 *cuStreamWriteValue64;
213 tcuStreamBatchMemOp *cuStreamBatchMemOp;
214 tcuFuncGetAttribute *cuFuncGetAttribute;
215 tcuFuncSetAttribute *cuFuncSetAttribute;
216 tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
217 tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig;
218 tcuLaunchKernel *cuLaunchKernel;
219 tcuLaunchCooperativeKernel *cuLaunchCooperativeKernel;
220 tcuLaunchCooperativeKernelMultiDevice *cuLaunchCooperativeKernelMultiDevice;
221 tcuFuncSetBlockShape *cuFuncSetBlockShape;
222 tcuFuncSetSharedSize *cuFuncSetSharedSize;
223 tcuParamSetSize *cuParamSetSize;
224 tcuParamSeti *cuParamSeti;
225 tcuParamSetf *cuParamSetf;
226 tcuParamSetv *cuParamSetv;
227 tcuLaunch *cuLaunch;
228 tcuLaunchGrid *cuLaunchGrid;
229 tcuLaunchGridAsync *cuLaunchGridAsync;
230 tcuParamSetTexRef *cuParamSetTexRef;
231 tcuOccupancyMaxActiveBlocksPerMultiprocessor *cuOccupancyMaxActiveBlocksPerMultiprocessor;
232 tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
233 tcuOccupancyMaxPotentialBlockSize *cuOccupancyMaxPotentialBlockSize;
234 tcuOccupancyMaxPotentialBlockSizeWithFlags *cuOccupancyMaxPotentialBlockSizeWithFlags;
235 tcuTexRefSetArray *cuTexRefSetArray;
236 tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray;
237 tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2;
238 tcuTexRefSetAddress2D_v3 *cuTexRefSetAddress2D_v3;
239 tcuTexRefSetFormat *cuTexRefSetFormat;
240 tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
241 tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
242 tcuTexRefSetMipmapFilterMode *cuTexRefSetMipmapFilterMode;
243 tcuTexRefSetMipmapLevelBias *cuTexRefSetMipmapLevelBias;
244 tcuTexRefSetMipmapLevelClamp *cuTexRefSetMipmapLevelClamp;
245 tcuTexRefSetMaxAnisotropy *cuTexRefSetMaxAnisotropy;
246 tcuTexRefSetBorderColor *cuTexRefSetBorderColor;
247 tcuTexRefSetFlags *cuTexRefSetFlags;
248 tcuTexRefGetAddress_v2 *cuTexRefGetAddress_v2;
249 tcuTexRefGetArray *cuTexRefGetArray;
250 tcuTexRefGetMipmappedArray *cuTexRefGetMipmappedArray;
251 tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
252 tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
253 tcuTexRefGetFormat *cuTexRefGetFormat;
254 tcuTexRefGetMipmapFilterMode *cuTexRefGetMipmapFilterMode;
255 tcuTexRefGetMipmapLevelBias *cuTexRefGetMipmapLevelBias;
256 tcuTexRefGetMipmapLevelClamp *cuTexRefGetMipmapLevelClamp;
257 tcuTexRefGetMaxAnisotropy *cuTexRefGetMaxAnisotropy;
258 tcuTexRefGetBorderColor *cuTexRefGetBorderColor;
259 tcuTexRefGetFlags *cuTexRefGetFlags;
260 tcuTexRefCreate *cuTexRefCreate;
261 tcuTexRefDestroy *cuTexRefDestroy;
262 tcuSurfRefSetArray *cuSurfRefSetArray;
263 tcuSurfRefGetArray *cuSurfRefGetArray;
264 tcuTexObjectCreate *cuTexObjectCreate;
265 tcuTexObjectDestroy *cuTexObjectDestroy;
266 tcuTexObjectGetResourceDesc *cuTexObjectGetResourceDesc;
267 tcuTexObjectGetTextureDesc *cuTexObjectGetTextureDesc;
268 tcuTexObjectGetResourceViewDesc *cuTexObjectGetResourceViewDesc;
269 tcuSurfObjectCreate *cuSurfObjectCreate;
270 tcuSurfObjectDestroy *cuSurfObjectDestroy;
271 tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc;
272 tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer;
273 tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess;
274 tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess;
275 tcuDeviceGetP2PAttribute *cuDeviceGetP2PAttribute;
276 tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
277 tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
278 tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
279 tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2;
280 tcuGraphicsResourceSetMapFlags_v2 *cuGraphicsResourceSetMapFlags_v2;
281 tcuGraphicsMapResources *cuGraphicsMapResources;
282 tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
283 tcuGetExportTable *cuGetExportTable;
284
285 tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
286 tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
287 tcuGLGetDevices_v2 *cuGLGetDevices_v2;
288 tcuGLCtxCreate_v2 *cuGLCtxCreate_v2;
289 tcuGLInit *cuGLInit;
290 tcuGLRegisterBufferObject *cuGLRegisterBufferObject;
291 tcuGLMapBufferObject_v2 *cuGLMapBufferObject_v2;
292 tcuGLUnmapBufferObject *cuGLUnmapBufferObject;
293 tcuGLUnregisterBufferObject *cuGLUnregisterBufferObject;
294 tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags;
295 tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2;
296 tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync;
297
298 tnvrtcGetErrorString *nvrtcGetErrorString;
299 tnvrtcVersion *nvrtcVersion;
300 tnvrtcCreateProgram *nvrtcCreateProgram;
301 tnvrtcDestroyProgram *nvrtcDestroyProgram;
302 tnvrtcCompileProgram *nvrtcCompileProgram;
303 tnvrtcGetPTXSize *nvrtcGetPTXSize;
304 tnvrtcGetPTX *nvrtcGetPTX;
305 tnvrtcGetProgramLogSize *nvrtcGetProgramLogSize;
306 tnvrtcGetProgramLog *nvrtcGetProgramLog;
307 tnvrtcAddNameExpression *nvrtcAddNameExpression;
308 tnvrtcGetLoweredName *nvrtcGetLoweredName;
309
310
311 static DynamicLibrary dynamic_library_open_find(const char **paths) {
312   int i = 0;
313   while (paths[i] != NULL) {
314       DynamicLibrary lib = dynamic_library_open(paths[i]);
315       if (lib != NULL) {
316         return lib;
317       }
318       ++i;
319   }
320   return NULL;
321 }
322
323 /* Implementation function. */
324 static void cuewCudaExit(void) {
325   if (cuda_lib != NULL) {
326     /*  Ignore errors. */
327     dynamic_library_close(cuda_lib);
328     cuda_lib = NULL;
329   }
330 }
331
332 static int cuewCudaInit(void) {
333   /* Library paths. */
334 #ifdef _WIN32
335   /* Expected in c:/windows/system or similar, no path needed. */
336   const char *cuda_paths[] = {"nvcuda.dll", NULL};
337 #elif defined(__APPLE__)
338   /* Default installation path. */
339   const char *cuda_paths[] = {"/usr/local/cuda/lib/libcuda.dylib", NULL};
340 #else
341   const char *cuda_paths[] = {"libcuda.so", NULL};
342 #endif
343   static int initialized = 0;
344   static int result = 0;
345   int error, driver_version;
346
347   if (initialized) {
348     return result;
349   }
350
351   initialized = 1;
352
353   error = atexit(cuewCudaExit);
354   if (error) {
355     result = CUEW_ERROR_ATEXIT_FAILED;
356     return result;
357   }
358
359   /* Load library. */
360   cuda_lib = dynamic_library_open_find(cuda_paths);
361
362   if (cuda_lib == NULL) {
363     result = CUEW_ERROR_OPEN_FAILED;
364     return result;
365   }
366
367   /* Detect driver version. */
368   driver_version = 1000;
369
370   CUDA_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
371   if (cuDriverGetVersion) {
372     cuDriverGetVersion(&driver_version);
373   }
374
375   /* We require version 4.0. */
376   if (driver_version < 4000) {
377     result = CUEW_ERROR_OPEN_FAILED;
378     return result;
379   }
380   /* Fetch all function pointers. */
381   CUDA_LIBRARY_FIND(cuGetErrorString);
382   CUDA_LIBRARY_FIND(cuGetErrorName);
383   CUDA_LIBRARY_FIND(cuInit);
384   CUDA_LIBRARY_FIND(cuDriverGetVersion);
385   CUDA_LIBRARY_FIND(cuDeviceGet);
386   CUDA_LIBRARY_FIND(cuDeviceGetCount);
387   CUDA_LIBRARY_FIND(cuDeviceGetName);
388   CUDA_LIBRARY_FIND(cuDeviceTotalMem_v2);
389   CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
390   CUDA_LIBRARY_FIND(cuDeviceGetProperties);
391   CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
392   CUDA_LIBRARY_FIND(cuDevicePrimaryCtxRetain);
393   CUDA_LIBRARY_FIND(cuDevicePrimaryCtxRelease);
394   CUDA_LIBRARY_FIND(cuDevicePrimaryCtxSetFlags);
395   CUDA_LIBRARY_FIND(cuDevicePrimaryCtxGetState);
396   CUDA_LIBRARY_FIND(cuDevicePrimaryCtxReset);
397   CUDA_LIBRARY_FIND(cuCtxCreate_v2);
398   CUDA_LIBRARY_FIND(cuCtxDestroy_v2);
399   CUDA_LIBRARY_FIND(cuCtxPushCurrent_v2);
400   CUDA_LIBRARY_FIND(cuCtxPopCurrent_v2);
401   CUDA_LIBRARY_FIND(cuCtxSetCurrent);
402   CUDA_LIBRARY_FIND(cuCtxGetCurrent);
403   CUDA_LIBRARY_FIND(cuCtxGetDevice);
404   CUDA_LIBRARY_FIND(cuCtxGetFlags);
405   CUDA_LIBRARY_FIND(cuCtxSynchronize);
406   CUDA_LIBRARY_FIND(cuCtxSetLimit);
407   CUDA_LIBRARY_FIND(cuCtxGetLimit);
408   CUDA_LIBRARY_FIND(cuCtxGetCacheConfig);
409   CUDA_LIBRARY_FIND(cuCtxSetCacheConfig);
410   CUDA_LIBRARY_FIND(cuCtxGetSharedMemConfig);
411   CUDA_LIBRARY_FIND(cuCtxSetSharedMemConfig);
412   CUDA_LIBRARY_FIND(cuCtxGetApiVersion);
413   CUDA_LIBRARY_FIND(cuCtxGetStreamPriorityRange);
414   CUDA_LIBRARY_FIND(cuCtxAttach);
415   CUDA_LIBRARY_FIND(cuCtxDetach);
416   CUDA_LIBRARY_FIND(cuModuleLoad);
417   CUDA_LIBRARY_FIND(cuModuleLoadData);
418   CUDA_LIBRARY_FIND(cuModuleLoadDataEx);
419   CUDA_LIBRARY_FIND(cuModuleLoadFatBinary);
420   CUDA_LIBRARY_FIND(cuModuleUnload);
421   CUDA_LIBRARY_FIND(cuModuleGetFunction);
422   CUDA_LIBRARY_FIND(cuModuleGetGlobal_v2);
423   CUDA_LIBRARY_FIND(cuModuleGetTexRef);
424   CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
425   CUDA_LIBRARY_FIND(cuLinkCreate_v2);
426   CUDA_LIBRARY_FIND(cuLinkAddData_v2);
427   CUDA_LIBRARY_FIND(cuLinkAddFile_v2);
428   CUDA_LIBRARY_FIND(cuLinkComplete);
429   CUDA_LIBRARY_FIND(cuLinkDestroy);
430   CUDA_LIBRARY_FIND(cuMemGetInfo_v2);
431   CUDA_LIBRARY_FIND(cuMemAlloc_v2);
432   CUDA_LIBRARY_FIND(cuMemAllocPitch_v2);
433   CUDA_LIBRARY_FIND(cuMemFree_v2);
434   CUDA_LIBRARY_FIND(cuMemGetAddressRange_v2);
435   CUDA_LIBRARY_FIND(cuMemAllocHost_v2);
436   CUDA_LIBRARY_FIND(cuMemFreeHost);
437   CUDA_LIBRARY_FIND(cuMemHostAlloc);
438   CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer_v2);
439   CUDA_LIBRARY_FIND(cuMemHostGetFlags);
440   CUDA_LIBRARY_FIND(cuMemAllocManaged);
441   CUDA_LIBRARY_FIND(cuDeviceGetByPCIBusId);
442   CUDA_LIBRARY_FIND(cuDeviceGetPCIBusId);
443   CUDA_LIBRARY_FIND(cuIpcGetEventHandle);
444   CUDA_LIBRARY_FIND(cuIpcOpenEventHandle);
445   CUDA_LIBRARY_FIND(cuIpcGetMemHandle);
446   CUDA_LIBRARY_FIND(cuIpcOpenMemHandle);
447   CUDA_LIBRARY_FIND(cuIpcCloseMemHandle);
448   CUDA_LIBRARY_FIND(cuMemHostRegister_v2);
449   CUDA_LIBRARY_FIND(cuMemHostUnregister);
450   CUDA_LIBRARY_FIND(cuMemcpy);
451   CUDA_LIBRARY_FIND(cuMemcpyPeer);
452   CUDA_LIBRARY_FIND(cuMemcpyHtoD_v2);
453   CUDA_LIBRARY_FIND(cuMemcpyDtoH_v2);
454   CUDA_LIBRARY_FIND(cuMemcpyDtoD_v2);
455   CUDA_LIBRARY_FIND(cuMemcpyDtoA_v2);
456   CUDA_LIBRARY_FIND(cuMemcpyAtoD_v2);
457   CUDA_LIBRARY_FIND(cuMemcpyHtoA_v2);
458   CUDA_LIBRARY_FIND(cuMemcpyAtoH_v2);
459   CUDA_LIBRARY_FIND(cuMemcpyAtoA_v2);
460   CUDA_LIBRARY_FIND(cuMemcpy2D_v2);
461   CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned_v2);
462   CUDA_LIBRARY_FIND(cuMemcpy3D_v2);
463   CUDA_LIBRARY_FIND(cuMemcpy3DPeer);
464   CUDA_LIBRARY_FIND(cuMemcpyAsync);
465   CUDA_LIBRARY_FIND(cuMemcpyPeerAsync);
466   CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync_v2);
467   CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync_v2);
468   CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync_v2);
469   CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync_v2);
470   CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync_v2);
471   CUDA_LIBRARY_FIND(cuMemcpy2DAsync_v2);
472   CUDA_LIBRARY_FIND(cuMemcpy3DAsync_v2);
473   CUDA_LIBRARY_FIND(cuMemcpy3DPeerAsync);
474   CUDA_LIBRARY_FIND(cuMemsetD8_v2);
475   CUDA_LIBRARY_FIND(cuMemsetD16_v2);
476   CUDA_LIBRARY_FIND(cuMemsetD32_v2);
477   CUDA_LIBRARY_FIND(cuMemsetD2D8_v2);
478   CUDA_LIBRARY_FIND(cuMemsetD2D16_v2);
479   CUDA_LIBRARY_FIND(cuMemsetD2D32_v2);
480   CUDA_LIBRARY_FIND(cuMemsetD8Async);
481   CUDA_LIBRARY_FIND(cuMemsetD16Async);
482   CUDA_LIBRARY_FIND(cuMemsetD32Async);
483   CUDA_LIBRARY_FIND(cuMemsetD2D8Async);
484   CUDA_LIBRARY_FIND(cuMemsetD2D16Async);
485   CUDA_LIBRARY_FIND(cuMemsetD2D32Async);
486   CUDA_LIBRARY_FIND(cuArrayCreate_v2);
487   CUDA_LIBRARY_FIND(cuArrayGetDescriptor_v2);
488   CUDA_LIBRARY_FIND(cuArrayDestroy);
489   CUDA_LIBRARY_FIND(cuArray3DCreate_v2);
490   CUDA_LIBRARY_FIND(cuArray3DGetDescriptor_v2);
491   CUDA_LIBRARY_FIND(cuMipmappedArrayCreate);
492   CUDA_LIBRARY_FIND(cuMipmappedArrayGetLevel);
493   CUDA_LIBRARY_FIND(cuMipmappedArrayDestroy);
494   CUDA_LIBRARY_FIND(cuPointerGetAttribute);
495   CUDA_LIBRARY_FIND(cuMemPrefetchAsync);
496   CUDA_LIBRARY_FIND(cuMemAdvise);
497   CUDA_LIBRARY_FIND(cuMemRangeGetAttribute);
498   CUDA_LIBRARY_FIND(cuMemRangeGetAttributes);
499   CUDA_LIBRARY_FIND(cuPointerSetAttribute);
500   CUDA_LIBRARY_FIND(cuPointerGetAttributes);
501   CUDA_LIBRARY_FIND(cuStreamCreate);
502   CUDA_LIBRARY_FIND(cuStreamCreateWithPriority);
503   CUDA_LIBRARY_FIND(cuStreamGetPriority);
504   CUDA_LIBRARY_FIND(cuStreamGetFlags);
505   CUDA_LIBRARY_FIND(cuStreamWaitEvent);
506   CUDA_LIBRARY_FIND(cuStreamAddCallback);
507   CUDA_LIBRARY_FIND(cuStreamAttachMemAsync);
508   CUDA_LIBRARY_FIND(cuStreamQuery);
509   CUDA_LIBRARY_FIND(cuStreamSynchronize);
510   CUDA_LIBRARY_FIND(cuStreamDestroy_v2);
511   CUDA_LIBRARY_FIND(cuEventCreate);
512   CUDA_LIBRARY_FIND(cuEventRecord);
513   CUDA_LIBRARY_FIND(cuEventQuery);
514   CUDA_LIBRARY_FIND(cuEventSynchronize);
515   CUDA_LIBRARY_FIND(cuEventDestroy_v2);
516   CUDA_LIBRARY_FIND(cuEventElapsedTime);
517   CUDA_LIBRARY_FIND(cuStreamWaitValue32);
518   CUDA_LIBRARY_FIND(cuStreamWaitValue64);
519   CUDA_LIBRARY_FIND(cuStreamWriteValue32);
520   CUDA_LIBRARY_FIND(cuStreamWriteValue64);
521   CUDA_LIBRARY_FIND(cuStreamBatchMemOp);
522   CUDA_LIBRARY_FIND(cuFuncGetAttribute);
523   CUDA_LIBRARY_FIND(cuFuncSetAttribute);
524   CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
525   CUDA_LIBRARY_FIND(cuFuncSetSharedMemConfig);
526   CUDA_LIBRARY_FIND(cuLaunchKernel);
527   CUDA_LIBRARY_FIND(cuLaunchCooperativeKernel);
528   CUDA_LIBRARY_FIND(cuLaunchCooperativeKernelMultiDevice);
529   CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
530   CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
531   CUDA_LIBRARY_FIND(cuParamSetSize);
532   CUDA_LIBRARY_FIND(cuParamSeti);
533   CUDA_LIBRARY_FIND(cuParamSetf);
534   CUDA_LIBRARY_FIND(cuParamSetv);
535   CUDA_LIBRARY_FIND(cuLaunch);
536   CUDA_LIBRARY_FIND(cuLaunchGrid);
537   CUDA_LIBRARY_FIND(cuLaunchGridAsync);
538   CUDA_LIBRARY_FIND(cuParamSetTexRef);
539   CUDA_LIBRARY_FIND(cuOccupancyMaxActiveBlocksPerMultiprocessor);
540   CUDA_LIBRARY_FIND(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags);
541   CUDA_LIBRARY_FIND(cuOccupancyMaxPotentialBlockSize);
542   CUDA_LIBRARY_FIND(cuOccupancyMaxPotentialBlockSizeWithFlags);
543   CUDA_LIBRARY_FIND(cuTexRefSetArray);
544   CUDA_LIBRARY_FIND(cuTexRefSetMipmappedArray);
545   CUDA_LIBRARY_FIND(cuTexRefSetAddress_v2);
546   CUDA_LIBRARY_FIND(cuTexRefSetAddress2D_v3);
547   CUDA_LIBRARY_FIND(cuTexRefSetFormat);
548   CUDA_LIBRARY_FIND(cuTexRefSetAddressMode);
549   CUDA_LIBRARY_FIND(cuTexRefSetFilterMode);
550   CUDA_LIBRARY_FIND(cuTexRefSetMipmapFilterMode);
551   CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelBias);
552   CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelClamp);
553   CUDA_LIBRARY_FIND(cuTexRefSetMaxAnisotropy);
554   CUDA_LIBRARY_FIND(cuTexRefSetBorderColor);
555   CUDA_LIBRARY_FIND(cuTexRefSetFlags);
556   CUDA_LIBRARY_FIND(cuTexRefGetAddress_v2);
557   CUDA_LIBRARY_FIND(cuTexRefGetArray);
558   CUDA_LIBRARY_FIND(cuTexRefGetMipmappedArray);
559   CUDA_LIBRARY_FIND(cuTexRefGetAddressMode);
560   CUDA_LIBRARY_FIND(cuTexRefGetFilterMode);
561   CUDA_LIBRARY_FIND(cuTexRefGetFormat);
562   CUDA_LIBRARY_FIND(cuTexRefGetMipmapFilterMode);
563   CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelBias);
564   CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelClamp);
565   CUDA_LIBRARY_FIND(cuTexRefGetMaxAnisotropy);
566   CUDA_LIBRARY_FIND(cuTexRefGetBorderColor);
567   CUDA_LIBRARY_FIND(cuTexRefGetFlags);
568   CUDA_LIBRARY_FIND(cuTexRefCreate);
569   CUDA_LIBRARY_FIND(cuTexRefDestroy);
570   CUDA_LIBRARY_FIND(cuSurfRefSetArray);
571   CUDA_LIBRARY_FIND(cuSurfRefGetArray);
572   CUDA_LIBRARY_FIND(cuTexObjectCreate);
573   CUDA_LIBRARY_FIND(cuTexObjectDestroy);
574   CUDA_LIBRARY_FIND(cuTexObjectGetResourceDesc);
575   CUDA_LIBRARY_FIND(cuTexObjectGetTextureDesc);
576   CUDA_LIBRARY_FIND(cuTexObjectGetResourceViewDesc);
577   CUDA_LIBRARY_FIND(cuSurfObjectCreate);
578   CUDA_LIBRARY_FIND(cuSurfObjectDestroy);
579   CUDA_LIBRARY_FIND(cuSurfObjectGetResourceDesc);
580   CUDA_LIBRARY_FIND(cuDeviceCanAccessPeer);
581   CUDA_LIBRARY_FIND(cuCtxEnablePeerAccess);
582   CUDA_LIBRARY_FIND(cuCtxDisablePeerAccess);
583   CUDA_LIBRARY_FIND(cuDeviceGetP2PAttribute);
584   CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
585   CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
586   CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedMipmappedArray);
587   CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer_v2);
588   CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags_v2);
589   CUDA_LIBRARY_FIND(cuGraphicsMapResources);
590   CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
591   CUDA_LIBRARY_FIND(cuGetExportTable);
592
593   CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
594   CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
595   CUDA_LIBRARY_FIND(cuGLGetDevices_v2);
596   CUDA_LIBRARY_FIND(cuGLCtxCreate_v2);
597   CUDA_LIBRARY_FIND(cuGLInit);
598   CUDA_LIBRARY_FIND(cuGLRegisterBufferObject);
599   CUDA_LIBRARY_FIND(cuGLMapBufferObject_v2);
600   CUDA_LIBRARY_FIND(cuGLUnmapBufferObject);
601   CUDA_LIBRARY_FIND(cuGLUnregisterBufferObject);
602   CUDA_LIBRARY_FIND(cuGLSetBufferObjectMapFlags);
603   CUDA_LIBRARY_FIND(cuGLMapBufferObjectAsync_v2);
604   CUDA_LIBRARY_FIND(cuGLUnmapBufferObjectAsync);
605
606   result = CUEW_SUCCESS;
607   return result;
608 }
609
610 static void cuewExitNvrtc(void) {
611   if (nvrtc_lib != NULL) {
612     /*  Ignore errors. */
613     dynamic_library_close(nvrtc_lib);
614     nvrtc_lib = NULL;
615   }
616 }
617
618 static int cuewNvrtcInit(void) {
619   /* Library paths. */
620 #ifdef _WIN32
621   /* Expected in c:/windows/system or similar, no path needed. */
622   const char *nvrtc_paths[] = {"nvrtc64_80.dll", "nvrtc64_90.dll", "nvrtc64_91.dll", NULL};
623 #elif defined(__APPLE__)
624   /* Default installation path. */
625   const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL};
626 #else
627   const char *nvrtc_paths[] = {"libnvrtc.so",
628 #  if defined(__x86_64__) || defined(_M_X64)
629                                "/usr/local/cuda/lib64/libnvrtc.so",
630 #else
631                                "/usr/local/cuda/lib/libnvrtc.so",
632 #endif
633                                NULL};
634 #endif
635   static int initialized = 0;
636   static int result = 0;
637   int error;
638
639   if (initialized) {
640     return result;
641   }
642
643   initialized = 1;
644
645   error = atexit(cuewExitNvrtc);
646   if (error) {
647     result = CUEW_ERROR_ATEXIT_FAILED;
648     return result;
649   }
650
651   /* Load library. */
652   nvrtc_lib = dynamic_library_open_find(nvrtc_paths);
653
654   if (nvrtc_lib == NULL) {
655     result = CUEW_ERROR_OPEN_FAILED;
656     return result;
657   }
658
659   NVRTC_LIBRARY_FIND(nvrtcGetErrorString);
660   NVRTC_LIBRARY_FIND(nvrtcVersion);
661   NVRTC_LIBRARY_FIND(nvrtcCreateProgram);
662   NVRTC_LIBRARY_FIND(nvrtcDestroyProgram);
663   NVRTC_LIBRARY_FIND(nvrtcCompileProgram);
664   NVRTC_LIBRARY_FIND(nvrtcGetPTXSize);
665   NVRTC_LIBRARY_FIND(nvrtcGetPTX);
666   NVRTC_LIBRARY_FIND(nvrtcGetProgramLogSize);
667   NVRTC_LIBRARY_FIND(nvrtcGetProgramLog);
668   NVRTC_LIBRARY_FIND(nvrtcAddNameExpression);
669   NVRTC_LIBRARY_FIND(nvrtcGetLoweredName);
670
671   result = CUEW_SUCCESS;
672   return result;
673 }
674
675
676 int cuewInit(cuuint32_t flags) {
677         int result = CUEW_SUCCESS;
678
679         if (flags & CUEW_INIT_CUDA) {
680                 result = cuewCudaInit();
681                 if (result != CUEW_SUCCESS) {
682                         return result;
683                 }
684         }
685
686         if (flags & CUEW_INIT_NVRTC) {
687                 result = cuewNvrtcInit();
688                 if (result != CUEW_SUCCESS) {
689                         return result;
690                 }
691         }
692
693         return result;
694 }
695
696
697 const char *cuewErrorString(CUresult result) {
698   switch (result) {
699     case CUDA_SUCCESS: return "No errors";
700     case CUDA_ERROR_INVALID_VALUE: return "Invalid value";
701     case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory";
702     case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized";
703     case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized";
704     case CUDA_ERROR_PROFILER_DISABLED: return "Profiler disabled";
705     case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "Profiler not initialized";
706     case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "Profiler already started";
707     case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "Profiler already stopped";
708     case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available";
709     case CUDA_ERROR_INVALID_DEVICE: return "Invalid device";
710     case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image";
711     case CUDA_ERROR_INVALID_CONTEXT: return "Invalid context";
712     case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: return "Context already current";
713     case CUDA_ERROR_MAP_FAILED: return "Map failed";
714     case CUDA_ERROR_UNMAP_FAILED: return "Unmap failed";
715     case CUDA_ERROR_ARRAY_IS_MAPPED: return "Array is mapped";
716     case CUDA_ERROR_ALREADY_MAPPED: return "Already mapped";
717     case CUDA_ERROR_NO_BINARY_FOR_GPU: return "No binary for GPU";
718     case CUDA_ERROR_ALREADY_ACQUIRED: return "Already acquired";
719     case CUDA_ERROR_NOT_MAPPED: return "Not mapped";
720     case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "Mapped resource not available for access as an array";
721     case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Mapped resource not available for access as a pointer";
722     case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error detected";
723     case CUDA_ERROR_UNSUPPORTED_LIMIT: return "CUlimit not supported by device";
724     case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "Context already in use";
725     case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "Peer access unsupported";
726     case CUDA_ERROR_INVALID_PTX: return "Invalid ptx";
727     case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics context";
728     case CUDA_ERROR_NVLINK_UNCORRECTABLE: return "Nvlink uncorrectable";
729     case CUDA_ERROR_JIT_COMPILER_NOT_FOUND: return "Jit compiler not found";
730     case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
731     case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
732     case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve";
733     case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed";
734     case CUDA_ERROR_OPERATING_SYSTEM: return "Operating system";
735     case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle";
736     case CUDA_ERROR_NOT_FOUND: return "Not found";
737     case CUDA_ERROR_NOT_READY: return "CUDA not ready";
738     case CUDA_ERROR_ILLEGAL_ADDRESS: return "Illegal address";
739     case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources";
740     case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded timeout";
741     case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing";
742     case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "Peer access already enabled";
743     case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "Peer access not enabled";
744     case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "Primary context active";
745     case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "Context is destroyed";
746     case CUDA_ERROR_ASSERT: return "Assert";
747     case CUDA_ERROR_TOO_MANY_PEERS: return "Too many peers";
748     case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "Host memory already registered";
749     case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "Host memory not registered";
750     case CUDA_ERROR_HARDWARE_STACK_ERROR: return "Hardware stack error";
751     case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "Illegal instruction";
752     case CUDA_ERROR_MISALIGNED_ADDRESS: return "Misaligned address";
753     case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "Invalid address space";
754     case CUDA_ERROR_INVALID_PC: return "Invalid pc";
755     case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed";
756     case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE: return "Cooperative launch too large";
757     case CUDA_ERROR_NOT_PERMITTED: return "Not permitted";
758     case CUDA_ERROR_NOT_SUPPORTED: return "Not supported";
759     case CUDA_ERROR_UNKNOWN: return "Unknown error";
760     default: return "Unknown CUDA error value";
761   }
762 }
763
764 static void path_join(const char *path1,
765                       const char *path2,
766                       int maxlen,
767                       char *result) {
768 #if defined(WIN32) || defined(_WIN32)
769   const char separator = '\\';
770 #else
771   const char separator = '/';
772 #endif
773   int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
774   if (n != -1 && n < maxlen) {
775     result[n] = '\0';
776   }
777   else {
778     result[maxlen - 1] = '\0';
779   }
780 }
781
782 static int path_exists(const char *path) {
783   struct stat st;
784   if (stat(path, &st)) {
785     return 0;
786   }
787   return 1;
788 }
789
790 const char *cuewCompilerPath(void) {
791 #ifdef _WIN32
792   const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
793   const char *executable = "nvcc.exe";
794 #else
795   const char *defaultpaths[] = {
796     "/Developer/NVIDIA/CUDA-5.0/bin",
797     "/usr/local/cuda-5.0/bin",
798     "/usr/local/cuda/bin",
799     "/Developer/NVIDIA/CUDA-6.0/bin",
800     "/usr/local/cuda-6.0/bin",
801     "/Developer/NVIDIA/CUDA-5.5/bin",
802     "/usr/local/cuda-5.5/bin",
803     NULL};
804   const char *executable = "nvcc";
805 #endif
806   int i;
807
808   const char *binpath = getenv("CUDA_BIN_PATH");
809
810   static char nvcc[65536];
811
812   if (binpath) {
813     path_join(binpath, executable, sizeof(nvcc), nvcc);
814     if (path_exists(nvcc)) {
815       return nvcc;
816     }
817   }
818
819   for (i = 0; defaultpaths[i]; ++i) {
820     path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc);
821     if (path_exists(nvcc)) {
822       return nvcc;
823     }
824   }
825
826 #ifndef _WIN32
827   {
828     FILE *handle = popen("which nvcc", "r");
829     if (handle) {
830       char buffer[4096] = {0};
831       int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
832       buffer[len] = '\0';
833       pclose(handle);
834       if (buffer[0]) {
835         return "nvcc";
836       }
837     }
838   }
839 #endif
840
841   return NULL;
842 }
843
844 int cuewNvrtcVersion(void) {
845   int major, minor;
846   if (nvrtcVersion) {
847     nvrtcVersion(&major, &minor);
848     return 10 * major + minor;
849   }
850   return 0;
851 }
852
853 int cuewCompilerVersion(void) {
854   const char *path = cuewCompilerPath();
855   const char *marker = "Cuda compilation tools, release ";
856   FILE *pipe;
857   int major, minor;
858   char *versionstr;
859   char buf[128];
860   char output[65536] = "\0";
861   char command[65536] = "\0";
862
863   if (path == NULL) {
864     return 0;
865   }
866
867   /* get --version output */
868   strncpy(command, path, sizeof(command));
869   strncat(command, " --version", sizeof(command) - strlen(path));
870   pipe = popen(command, "r");
871   if (!pipe) {
872     fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
873     return 0;
874   }
875
876   while (!feof(pipe)) {
877     if (fgets(buf, sizeof(buf), pipe) != NULL) {
878       strncat(output, buf, sizeof(output) - strlen(output) - 1);
879     }
880   }
881
882   pclose(pipe);
883
884   /* parse version number */
885   versionstr = strstr(output, marker);
886   if (versionstr == NULL) {
887     fprintf(stderr, "CUDA: failed to find version number in:\n\n%s\n", output);
888     return 0;
889   }
890   versionstr += strlen(marker);
891
892   if (sscanf(versionstr, "%d.%d", &major, &minor) < 2) {
893     fprintf(stderr, "CUDA: failed to parse version number from:\n\n%s\n", output);
894     return 0;
895   }
896
897   return 10 * major + minor;
898 }
899