962059bfcce98bc505545afdf9a2b3ab854a9847
[blender.git] / extern / cuew / src / cuew.c
1 /*
2  * Copyright 2011-2014 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License
15  */
16
17 #ifdef _MSC_VER
18 #  if _MSC_VER < 1900
19 #    define snprintf _snprintf
20 #  endif
21 #  define popen _popen
22 #  define pclose _pclose
23 #  define _CRT_SECURE_NO_WARNINGS
24 #endif
25
26 #include <cuew.h>
27 #include <assert.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <sys/stat.h>
31
32 #ifdef _WIN32
33 #  define WIN32_LEAN_AND_MEAN
34 #  define VC_EXTRALEAN
35 #  include <windows.h>
36
37 /* Utility macros. */
38
39 typedef HMODULE DynamicLibrary;
40
41 #  define dynamic_library_open(path)         LoadLibraryA(path)
42 #  define dynamic_library_close(lib)         FreeLibrary(lib)
43 #  define dynamic_library_find(lib, symbol)  GetProcAddress(lib, symbol)
44 #else
45 #  include <dlfcn.h>
46
47 typedef void* DynamicLibrary;
48
49 #  define dynamic_library_open(path)         dlopen(path, RTLD_NOW)
50 #  define dynamic_library_close(lib)         dlclose(lib)
51 #  define dynamic_library_find(lib, symbol)  dlsym(lib, symbol)
52 #endif
53
54 #define _LIBRARY_FIND_CHECKED(lib, name) \
55         name = (t##name *)dynamic_library_find(lib, #name); \
56         assert(name);
57
58 #define _LIBRARY_FIND(lib, name) \
59         name = (t##name *)dynamic_library_find(lib, #name);
60
61 #define CUDA_LIBRARY_FIND_CHECKED(name) \
62         _LIBRARY_FIND_CHECKED(cuda_lib, name)
63 #define CUDA_LIBRARY_FIND(name) _LIBRARY_FIND(cuda_lib, name)
64
65 #define NVRTC_LIBRARY_FIND_CHECKED(name) \
66         _LIBRARY_FIND_CHECKED(nvrtc_lib, name)
67 #define NVRTC_LIBRARY_FIND(name) _LIBRARY_FIND(nvrtc_lib, name)
68
69 static DynamicLibrary cuda_lib;
70 static DynamicLibrary nvrtc_lib;
71
72 /* Function definitions. */
73 tcuGetErrorString *cuGetErrorString;
74 tcuGetErrorName *cuGetErrorName;
75 tcuInit *cuInit;
76 tcuDriverGetVersion *cuDriverGetVersion;
77 tcuDeviceGet *cuDeviceGet;
78 tcuDeviceGetCount *cuDeviceGetCount;
79 tcuDeviceGetName *cuDeviceGetName;
80 tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2;
81 tcuDeviceGetAttribute *cuDeviceGetAttribute;
82 tcuDeviceGetProperties *cuDeviceGetProperties;
83 tcuDeviceComputeCapability *cuDeviceComputeCapability;
84 tcuDevicePrimaryCtxRetain *cuDevicePrimaryCtxRetain;
85 tcuDevicePrimaryCtxRelease *cuDevicePrimaryCtxRelease;
86 tcuDevicePrimaryCtxSetFlags *cuDevicePrimaryCtxSetFlags;
87 tcuDevicePrimaryCtxGetState *cuDevicePrimaryCtxGetState;
88 tcuDevicePrimaryCtxReset *cuDevicePrimaryCtxReset;
89 tcuCtxCreate_v2 *cuCtxCreate_v2;
90 tcuCtxDestroy_v2 *cuCtxDestroy_v2;
91 tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2;
92 tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2;
93 tcuCtxSetCurrent *cuCtxSetCurrent;
94 tcuCtxGetCurrent *cuCtxGetCurrent;
95 tcuCtxGetDevice *cuCtxGetDevice;
96 tcuCtxGetFlags *cuCtxGetFlags;
97 tcuCtxSynchronize *cuCtxSynchronize;
98 tcuCtxSetLimit *cuCtxSetLimit;
99 tcuCtxGetLimit *cuCtxGetLimit;
100 tcuCtxGetCacheConfig *cuCtxGetCacheConfig;
101 tcuCtxSetCacheConfig *cuCtxSetCacheConfig;
102 tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig;
103 tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig;
104 tcuCtxGetApiVersion *cuCtxGetApiVersion;
105 tcuCtxGetStreamPriorityRange *cuCtxGetStreamPriorityRange;
106 tcuCtxAttach *cuCtxAttach;
107 tcuCtxDetach *cuCtxDetach;
108 tcuModuleLoad *cuModuleLoad;
109 tcuModuleLoadData *cuModuleLoadData;
110 tcuModuleLoadDataEx *cuModuleLoadDataEx;
111 tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
112 tcuModuleUnload *cuModuleUnload;
113 tcuModuleGetFunction *cuModuleGetFunction;
114 tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2;
115 tcuModuleGetTexRef *cuModuleGetTexRef;
116 tcuModuleGetSurfRef *cuModuleGetSurfRef;
117 tcuLinkCreate_v2 *cuLinkCreate_v2;
118 tcuLinkAddData_v2 *cuLinkAddData_v2;
119 tcuLinkAddFile_v2 *cuLinkAddFile_v2;
120 tcuLinkComplete *cuLinkComplete;
121 tcuLinkDestroy *cuLinkDestroy;
122 tcuMemGetInfo_v2 *cuMemGetInfo_v2;
123 tcuMemAlloc_v2 *cuMemAlloc_v2;
124 tcuMemAllocPitch_v2 *cuMemAllocPitch_v2;
125 tcuMemFree_v2 *cuMemFree_v2;
126 tcuMemGetAddressRange_v2 *cuMemGetAddressRange_v2;
127 tcuMemAllocHost_v2 *cuMemAllocHost_v2;
128 tcuMemFreeHost *cuMemFreeHost;
129 tcuMemHostAlloc *cuMemHostAlloc;
130 tcuMemHostGetDevicePointer_v2 *cuMemHostGetDevicePointer_v2;
131 tcuMemHostGetFlags *cuMemHostGetFlags;
132 tcuMemAllocManaged *cuMemAllocManaged;
133 tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId;
134 tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId;
135 tcuIpcGetEventHandle *cuIpcGetEventHandle;
136 tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
137 tcuIpcGetMemHandle *cuIpcGetMemHandle;
138 tcuIpcOpenMemHandle *cuIpcOpenMemHandle;
139 tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
140 tcuMemHostRegister_v2 *cuMemHostRegister_v2;
141 tcuMemHostUnregister *cuMemHostUnregister;
142 tcuMemcpy *cuMemcpy;
143 tcuMemcpyPeer *cuMemcpyPeer;
144 tcuMemcpyHtoD_v2 *cuMemcpyHtoD_v2;
145 tcuMemcpyDtoH_v2 *cuMemcpyDtoH_v2;
146 tcuMemcpyDtoD_v2 *cuMemcpyDtoD_v2;
147 tcuMemcpyDtoA_v2 *cuMemcpyDtoA_v2;
148 tcuMemcpyAtoD_v2 *cuMemcpyAtoD_v2;
149 tcuMemcpyHtoA_v2 *cuMemcpyHtoA_v2;
150 tcuMemcpyAtoH_v2 *cuMemcpyAtoH_v2;
151 tcuMemcpyAtoA_v2 *cuMemcpyAtoA_v2;
152 tcuMemcpy2D_v2 *cuMemcpy2D_v2;
153 tcuMemcpy2DUnaligned_v2 *cuMemcpy2DUnaligned_v2;
154 tcuMemcpy3D_v2 *cuMemcpy3D_v2;
155 tcuMemcpy3DPeer *cuMemcpy3DPeer;
156 tcuMemcpyAsync *cuMemcpyAsync;
157 tcuMemcpyPeerAsync *cuMemcpyPeerAsync;
158 tcuMemcpyHtoDAsync_v2 *cuMemcpyHtoDAsync_v2;
159 tcuMemcpyDtoHAsync_v2 *cuMemcpyDtoHAsync_v2;
160 tcuMemcpyDtoDAsync_v2 *cuMemcpyDtoDAsync_v2;
161 tcuMemcpyHtoAAsync_v2 *cuMemcpyHtoAAsync_v2;
162 tcuMemcpyAtoHAsync_v2 *cuMemcpyAtoHAsync_v2;
163 tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync_v2;
164 tcuMemcpy3DAsync_v2 *cuMemcpy3DAsync_v2;
165 tcuMemcpy3DPeerAsync *cuMemcpy3DPeerAsync;
166 tcuMemsetD8_v2 *cuMemsetD8_v2;
167 tcuMemsetD16_v2 *cuMemsetD16_v2;
168 tcuMemsetD32_v2 *cuMemsetD32_v2;
169 tcuMemsetD2D8_v2 *cuMemsetD2D8_v2;
170 tcuMemsetD2D16_v2 *cuMemsetD2D16_v2;
171 tcuMemsetD2D32_v2 *cuMemsetD2D32_v2;
172 tcuMemsetD8Async *cuMemsetD8Async;
173 tcuMemsetD16Async *cuMemsetD16Async;
174 tcuMemsetD32Async *cuMemsetD32Async;
175 tcuMemsetD2D8Async *cuMemsetD2D8Async;
176 tcuMemsetD2D16Async *cuMemsetD2D16Async;
177 tcuMemsetD2D32Async *cuMemsetD2D32Async;
178 tcuArrayCreate_v2 *cuArrayCreate_v2;
179 tcuArrayGetDescriptor_v2 *cuArrayGetDescriptor_v2;
180 tcuArrayDestroy *cuArrayDestroy;
181 tcuArray3DCreate_v2 *cuArray3DCreate_v2;
182 tcuArray3DGetDescriptor_v2 *cuArray3DGetDescriptor_v2;
183 tcuMipmappedArrayCreate *cuMipmappedArrayCreate;
184 tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
185 tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
186 tcuPointerGetAttribute *cuPointerGetAttribute;
187 tcuMemPrefetchAsync *cuMemPrefetchAsync;
188 tcuMemAdvise *cuMemAdvise;
189 tcuMemRangeGetAttribute *cuMemRangeGetAttribute;
190 tcuMemRangeGetAttributes *cuMemRangeGetAttributes;
191 tcuPointerSetAttribute *cuPointerSetAttribute;
192 tcuPointerGetAttributes *cuPointerGetAttributes;
193 tcuStreamCreate *cuStreamCreate;
194 tcuStreamCreateWithPriority *cuStreamCreateWithPriority;
195 tcuStreamGetPriority *cuStreamGetPriority;
196 tcuStreamGetFlags *cuStreamGetFlags;
197 tcuStreamWaitEvent *cuStreamWaitEvent;
198 tcuStreamAddCallback *cuStreamAddCallback;
199 tcuStreamAttachMemAsync *cuStreamAttachMemAsync;
200 tcuStreamQuery *cuStreamQuery;
201 tcuStreamSynchronize *cuStreamSynchronize;
202 tcuStreamDestroy_v2 *cuStreamDestroy_v2;
203 tcuEventCreate *cuEventCreate;
204 tcuEventRecord *cuEventRecord;
205 tcuEventQuery *cuEventQuery;
206 tcuEventSynchronize *cuEventSynchronize;
207 tcuEventDestroy_v2 *cuEventDestroy_v2;
208 tcuEventElapsedTime *cuEventElapsedTime;
209 tcuStreamWaitValue32 *cuStreamWaitValue32;
210 tcuStreamWriteValue32 *cuStreamWriteValue32;
211 tcuStreamBatchMemOp *cuStreamBatchMemOp;
212 tcuFuncGetAttribute *cuFuncGetAttribute;
213 tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
214 tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig;
215 tcuLaunchKernel *cuLaunchKernel;
216 tcuFuncSetBlockShape *cuFuncSetBlockShape;
217 tcuFuncSetSharedSize *cuFuncSetSharedSize;
218 tcuParamSetSize *cuParamSetSize;
219 tcuParamSeti *cuParamSeti;
220 tcuParamSetf *cuParamSetf;
221 tcuParamSetv *cuParamSetv;
222 tcuLaunch *cuLaunch;
223 tcuLaunchGrid *cuLaunchGrid;
224 tcuLaunchGridAsync *cuLaunchGridAsync;
225 tcuParamSetTexRef *cuParamSetTexRef;
226 tcuOccupancyMaxActiveBlocksPerMultiprocessor *cuOccupancyMaxActiveBlocksPerMultiprocessor;
227 tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
228 tcuOccupancyMaxPotentialBlockSize *cuOccupancyMaxPotentialBlockSize;
229 tcuOccupancyMaxPotentialBlockSizeWithFlags *cuOccupancyMaxPotentialBlockSizeWithFlags;
230 tcuTexRefSetArray *cuTexRefSetArray;
231 tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray;
232 tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2;
233 tcuTexRefSetAddress2D_v3 *cuTexRefSetAddress2D_v3;
234 tcuTexRefSetFormat *cuTexRefSetFormat;
235 tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
236 tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
237 tcuTexRefSetMipmapFilterMode *cuTexRefSetMipmapFilterMode;
238 tcuTexRefSetMipmapLevelBias *cuTexRefSetMipmapLevelBias;
239 tcuTexRefSetMipmapLevelClamp *cuTexRefSetMipmapLevelClamp;
240 tcuTexRefSetMaxAnisotropy *cuTexRefSetMaxAnisotropy;
241 tcuTexRefSetBorderColor *cuTexRefSetBorderColor;
242 tcuTexRefSetFlags *cuTexRefSetFlags;
243 tcuTexRefGetAddress_v2 *cuTexRefGetAddress_v2;
244 tcuTexRefGetArray *cuTexRefGetArray;
245 tcuTexRefGetMipmappedArray *cuTexRefGetMipmappedArray;
246 tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
247 tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
248 tcuTexRefGetFormat *cuTexRefGetFormat;
249 tcuTexRefGetMipmapFilterMode *cuTexRefGetMipmapFilterMode;
250 tcuTexRefGetMipmapLevelBias *cuTexRefGetMipmapLevelBias;
251 tcuTexRefGetMipmapLevelClamp *cuTexRefGetMipmapLevelClamp;
252 tcuTexRefGetMaxAnisotropy *cuTexRefGetMaxAnisotropy;
253 tcuTexRefGetBorderColor *cuTexRefGetBorderColor;
254 tcuTexRefGetFlags *cuTexRefGetFlags;
255 tcuTexRefCreate *cuTexRefCreate;
256 tcuTexRefDestroy *cuTexRefDestroy;
257 tcuSurfRefSetArray *cuSurfRefSetArray;
258 tcuSurfRefGetArray *cuSurfRefGetArray;
259 tcuTexObjectCreate *cuTexObjectCreate;
260 tcuTexObjectDestroy *cuTexObjectDestroy;
261 tcuTexObjectGetResourceDesc *cuTexObjectGetResourceDesc;
262 tcuTexObjectGetTextureDesc *cuTexObjectGetTextureDesc;
263 tcuTexObjectGetResourceViewDesc *cuTexObjectGetResourceViewDesc;
264 tcuSurfObjectCreate *cuSurfObjectCreate;
265 tcuSurfObjectDestroy *cuSurfObjectDestroy;
266 tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc;
267 tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer;
268 tcuDeviceGetP2PAttribute *cuDeviceGetP2PAttribute;
269 tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess;
270 tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess;
271 tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
272 tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
273 tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
274 tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2;
275 tcuGraphicsResourceSetMapFlags_v2 *cuGraphicsResourceSetMapFlags_v2;
276 tcuGraphicsMapResources *cuGraphicsMapResources;
277 tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
278 tcuGetExportTable *cuGetExportTable;
279
280 tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
281 tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
282 tcuGLGetDevices_v2 *cuGLGetDevices_v2;
283 tcuGLCtxCreate_v2 *cuGLCtxCreate_v2;
284 tcuGLInit *cuGLInit;
285 tcuGLRegisterBufferObject *cuGLRegisterBufferObject;
286 tcuGLMapBufferObject_v2 *cuGLMapBufferObject_v2;
287 tcuGLUnmapBufferObject *cuGLUnmapBufferObject;
288 tcuGLUnregisterBufferObject *cuGLUnregisterBufferObject;
289 tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags;
290 tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2;
291 tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync;
292
293 tnvrtcGetErrorString *nvrtcGetErrorString;
294 tnvrtcVersion *nvrtcVersion;
295 tnvrtcCreateProgram *nvrtcCreateProgram;
296 tnvrtcDestroyProgram *nvrtcDestroyProgram;
297 tnvrtcCompileProgram *nvrtcCompileProgram;
298 tnvrtcGetPTXSize *nvrtcGetPTXSize;
299 tnvrtcGetPTX *nvrtcGetPTX;
300 tnvrtcGetProgramLogSize *nvrtcGetProgramLogSize;
301 tnvrtcGetProgramLog *nvrtcGetProgramLog;
302 tnvrtcAddNameExpression *nvrtcAddNameExpression;
303 tnvrtcGetLoweredName *nvrtcGetLoweredName;
304
305
306 static DynamicLibrary dynamic_library_open_find(const char **paths) {
307   int i = 0;
308   while (paths[i] != NULL) {
309       DynamicLibrary lib = dynamic_library_open(paths[i]);
310       if (lib != NULL) {
311         return lib;
312       }
313       ++i;
314   }
315   return NULL;
316 }
317
318 static void cuewExit(void) {
319   if(cuda_lib != NULL) {
320     /*  Ignore errors. */
321     dynamic_library_close(cuda_lib);
322     cuda_lib = NULL;
323   }
324 }
325
326 /* Implementation function. */
327 int cuewInit(void) {
328   /* Library paths. */
329 #ifdef _WIN32
330   /* Expected in c:/windows/system or similar, no path needed. */
331   const char *cuda_paths[] = {"nvcuda.dll", NULL};
332   const char *nvrtc_paths[] = {"nvrtc.dll", NULL};
333 #elif defined(__APPLE__)
334   /* Default installation path. */
335   const char *cuda_paths[] = {"/usr/local/cuda/lib/libcuda.dylib", NULL};
336   const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL};
337 #else
338   const char *cuda_paths[] = {"libcuda.so", NULL};
339   const char *nvrtc_paths[] = {"libnvrtc.so",
340 #  if defined(__x86_64__) || defined(_M_X64)
341                                "/usr/local/cuda/lib64/libnvrtc.so",
342 #else
343                                "/usr/local/cuda/lib/libnvrtc.so",
344 #endif
345                                NULL};
346 #endif
347   static int initialized = 0;
348   static int result = 0;
349   int error, driver_version;
350
351   if (initialized) {
352     return result;
353   }
354
355   initialized = 1;
356
357   error = atexit(cuewExit);
358   if (error) {
359     result = CUEW_ERROR_ATEXIT_FAILED;
360     return result;
361   }
362
363   /* Load library. */
364   cuda_lib = dynamic_library_open_find(cuda_paths);
365   nvrtc_lib = dynamic_library_open_find(nvrtc_paths);
366
367   /* CUDA library is mandatory to have, while nvrtc might be missing. */
368   if (cuda_lib == NULL) {
369     result = CUEW_ERROR_OPEN_FAILED;
370     return result;
371   }
372
373   /* Detect driver version. */
374   driver_version = 1000;
375
376   CUDA_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
377   if (cuDriverGetVersion) {
378     cuDriverGetVersion(&driver_version);
379   }
380
381   /* We require version 4.0. */
382   if (driver_version < 4000) {
383     result = CUEW_ERROR_OPEN_FAILED;
384     return result;
385   }
386   /* Fetch all function pointers. */
387   CUDA_LIBRARY_FIND(cuGetErrorString);
388   CUDA_LIBRARY_FIND(cuGetErrorName);
389   CUDA_LIBRARY_FIND(cuInit);
390   CUDA_LIBRARY_FIND(cuDriverGetVersion);
391   CUDA_LIBRARY_FIND(cuDeviceGet);
392   CUDA_LIBRARY_FIND(cuDeviceGetCount);
393   CUDA_LIBRARY_FIND(cuDeviceGetName);
394   CUDA_LIBRARY_FIND(cuDeviceTotalMem_v2);
395   CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
396   CUDA_LIBRARY_FIND(cuDeviceGetProperties);
397   CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
398   CUDA_LIBRARY_FIND(cuDevicePrimaryCtxRetain);
399   CUDA_LIBRARY_FIND(cuDevicePrimaryCtxRelease);
400   CUDA_LIBRARY_FIND(cuDevicePrimaryCtxSetFlags);
401   CUDA_LIBRARY_FIND(cuDevicePrimaryCtxGetState);
402   CUDA_LIBRARY_FIND(cuDevicePrimaryCtxReset);
403   CUDA_LIBRARY_FIND(cuCtxCreate_v2);
404   CUDA_LIBRARY_FIND(cuCtxDestroy_v2);
405   CUDA_LIBRARY_FIND(cuCtxPushCurrent_v2);
406   CUDA_LIBRARY_FIND(cuCtxPopCurrent_v2);
407   CUDA_LIBRARY_FIND(cuCtxSetCurrent);
408   CUDA_LIBRARY_FIND(cuCtxGetCurrent);
409   CUDA_LIBRARY_FIND(cuCtxGetDevice);
410   CUDA_LIBRARY_FIND(cuCtxGetFlags);
411   CUDA_LIBRARY_FIND(cuCtxSynchronize);
412   CUDA_LIBRARY_FIND(cuCtxSetLimit);
413   CUDA_LIBRARY_FIND(cuCtxGetLimit);
414   CUDA_LIBRARY_FIND(cuCtxGetCacheConfig);
415   CUDA_LIBRARY_FIND(cuCtxSetCacheConfig);
416   CUDA_LIBRARY_FIND(cuCtxGetSharedMemConfig);
417   CUDA_LIBRARY_FIND(cuCtxSetSharedMemConfig);
418   CUDA_LIBRARY_FIND(cuCtxGetApiVersion);
419   CUDA_LIBRARY_FIND(cuCtxGetStreamPriorityRange);
420   CUDA_LIBRARY_FIND(cuCtxAttach);
421   CUDA_LIBRARY_FIND(cuCtxDetach);
422   CUDA_LIBRARY_FIND(cuModuleLoad);
423   CUDA_LIBRARY_FIND(cuModuleLoadData);
424   CUDA_LIBRARY_FIND(cuModuleLoadDataEx);
425   CUDA_LIBRARY_FIND(cuModuleLoadFatBinary);
426   CUDA_LIBRARY_FIND(cuModuleUnload);
427   CUDA_LIBRARY_FIND(cuModuleGetFunction);
428   CUDA_LIBRARY_FIND(cuModuleGetGlobal_v2);
429   CUDA_LIBRARY_FIND(cuModuleGetTexRef);
430   CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
431   CUDA_LIBRARY_FIND(cuLinkCreate_v2);
432   CUDA_LIBRARY_FIND(cuLinkAddData_v2);
433   CUDA_LIBRARY_FIND(cuLinkAddFile_v2);
434   CUDA_LIBRARY_FIND(cuLinkComplete);
435   CUDA_LIBRARY_FIND(cuLinkDestroy);
436   CUDA_LIBRARY_FIND(cuMemGetInfo_v2);
437   CUDA_LIBRARY_FIND(cuMemAlloc_v2);
438   CUDA_LIBRARY_FIND(cuMemAllocPitch_v2);
439   CUDA_LIBRARY_FIND(cuMemFree_v2);
440   CUDA_LIBRARY_FIND(cuMemGetAddressRange_v2);
441   CUDA_LIBRARY_FIND(cuMemAllocHost_v2);
442   CUDA_LIBRARY_FIND(cuMemFreeHost);
443   CUDA_LIBRARY_FIND(cuMemHostAlloc);
444   CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer_v2);
445   CUDA_LIBRARY_FIND(cuMemHostGetFlags);
446   CUDA_LIBRARY_FIND(cuMemAllocManaged);
447   CUDA_LIBRARY_FIND(cuDeviceGetByPCIBusId);
448   CUDA_LIBRARY_FIND(cuDeviceGetPCIBusId);
449   CUDA_LIBRARY_FIND(cuIpcGetEventHandle);
450   CUDA_LIBRARY_FIND(cuIpcOpenEventHandle);
451   CUDA_LIBRARY_FIND(cuIpcGetMemHandle);
452   CUDA_LIBRARY_FIND(cuIpcOpenMemHandle);
453   CUDA_LIBRARY_FIND(cuIpcCloseMemHandle);
454   CUDA_LIBRARY_FIND(cuMemHostRegister_v2);
455   CUDA_LIBRARY_FIND(cuMemHostUnregister);
456   CUDA_LIBRARY_FIND(cuMemcpy);
457   CUDA_LIBRARY_FIND(cuMemcpyPeer);
458   CUDA_LIBRARY_FIND(cuMemcpyHtoD_v2);
459   CUDA_LIBRARY_FIND(cuMemcpyDtoH_v2);
460   CUDA_LIBRARY_FIND(cuMemcpyDtoD_v2);
461   CUDA_LIBRARY_FIND(cuMemcpyDtoA_v2);
462   CUDA_LIBRARY_FIND(cuMemcpyAtoD_v2);
463   CUDA_LIBRARY_FIND(cuMemcpyHtoA_v2);
464   CUDA_LIBRARY_FIND(cuMemcpyAtoH_v2);
465   CUDA_LIBRARY_FIND(cuMemcpyAtoA_v2);
466   CUDA_LIBRARY_FIND(cuMemcpy2D_v2);
467   CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned_v2);
468   CUDA_LIBRARY_FIND(cuMemcpy3D_v2);
469   CUDA_LIBRARY_FIND(cuMemcpy3DPeer);
470   CUDA_LIBRARY_FIND(cuMemcpyAsync);
471   CUDA_LIBRARY_FIND(cuMemcpyPeerAsync);
472   CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync_v2);
473   CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync_v2);
474   CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync_v2);
475   CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync_v2);
476   CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync_v2);
477   CUDA_LIBRARY_FIND(cuMemcpy2DAsync_v2);
478   CUDA_LIBRARY_FIND(cuMemcpy3DAsync_v2);
479   CUDA_LIBRARY_FIND(cuMemcpy3DPeerAsync);
480   CUDA_LIBRARY_FIND(cuMemsetD8_v2);
481   CUDA_LIBRARY_FIND(cuMemsetD16_v2);
482   CUDA_LIBRARY_FIND(cuMemsetD32_v2);
483   CUDA_LIBRARY_FIND(cuMemsetD2D8_v2);
484   CUDA_LIBRARY_FIND(cuMemsetD2D16_v2);
485   CUDA_LIBRARY_FIND(cuMemsetD2D32_v2);
486   CUDA_LIBRARY_FIND(cuMemsetD8Async);
487   CUDA_LIBRARY_FIND(cuMemsetD16Async);
488   CUDA_LIBRARY_FIND(cuMemsetD32Async);
489   CUDA_LIBRARY_FIND(cuMemsetD2D8Async);
490   CUDA_LIBRARY_FIND(cuMemsetD2D16Async);
491   CUDA_LIBRARY_FIND(cuMemsetD2D32Async);
492   CUDA_LIBRARY_FIND(cuArrayCreate_v2);
493   CUDA_LIBRARY_FIND(cuArrayGetDescriptor_v2);
494   CUDA_LIBRARY_FIND(cuArrayDestroy);
495   CUDA_LIBRARY_FIND(cuArray3DCreate_v2);
496   CUDA_LIBRARY_FIND(cuArray3DGetDescriptor_v2);
497   CUDA_LIBRARY_FIND(cuMipmappedArrayCreate);
498   CUDA_LIBRARY_FIND(cuMipmappedArrayGetLevel);
499   CUDA_LIBRARY_FIND(cuMipmappedArrayDestroy);
500   CUDA_LIBRARY_FIND(cuPointerGetAttribute);
501   CUDA_LIBRARY_FIND(cuMemPrefetchAsync);
502   CUDA_LIBRARY_FIND(cuMemAdvise);
503   CUDA_LIBRARY_FIND(cuMemRangeGetAttribute);
504   CUDA_LIBRARY_FIND(cuMemRangeGetAttributes);
505   CUDA_LIBRARY_FIND(cuPointerSetAttribute);
506   CUDA_LIBRARY_FIND(cuPointerGetAttributes);
507   CUDA_LIBRARY_FIND(cuStreamCreate);
508   CUDA_LIBRARY_FIND(cuStreamCreateWithPriority);
509   CUDA_LIBRARY_FIND(cuStreamGetPriority);
510   CUDA_LIBRARY_FIND(cuStreamGetFlags);
511   CUDA_LIBRARY_FIND(cuStreamWaitEvent);
512   CUDA_LIBRARY_FIND(cuStreamAddCallback);
513   CUDA_LIBRARY_FIND(cuStreamAttachMemAsync);
514   CUDA_LIBRARY_FIND(cuStreamQuery);
515   CUDA_LIBRARY_FIND(cuStreamSynchronize);
516   CUDA_LIBRARY_FIND(cuStreamDestroy_v2);
517   CUDA_LIBRARY_FIND(cuEventCreate);
518   CUDA_LIBRARY_FIND(cuEventRecord);
519   CUDA_LIBRARY_FIND(cuEventQuery);
520   CUDA_LIBRARY_FIND(cuEventSynchronize);
521   CUDA_LIBRARY_FIND(cuEventDestroy_v2);
522   CUDA_LIBRARY_FIND(cuEventElapsedTime);
523   CUDA_LIBRARY_FIND(cuStreamWaitValue32);
524   CUDA_LIBRARY_FIND(cuStreamWriteValue32);
525   CUDA_LIBRARY_FIND(cuStreamBatchMemOp);
526   CUDA_LIBRARY_FIND(cuFuncGetAttribute);
527   CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
528   CUDA_LIBRARY_FIND(cuFuncSetSharedMemConfig);
529   CUDA_LIBRARY_FIND(cuLaunchKernel);
530   CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
531   CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
532   CUDA_LIBRARY_FIND(cuParamSetSize);
533   CUDA_LIBRARY_FIND(cuParamSeti);
534   CUDA_LIBRARY_FIND(cuParamSetf);
535   CUDA_LIBRARY_FIND(cuParamSetv);
536   CUDA_LIBRARY_FIND(cuLaunch);
537   CUDA_LIBRARY_FIND(cuLaunchGrid);
538   CUDA_LIBRARY_FIND(cuLaunchGridAsync);
539   CUDA_LIBRARY_FIND(cuParamSetTexRef);
540   CUDA_LIBRARY_FIND(cuOccupancyMaxActiveBlocksPerMultiprocessor);
541   CUDA_LIBRARY_FIND(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags);
542   CUDA_LIBRARY_FIND(cuOccupancyMaxPotentialBlockSize);
543   CUDA_LIBRARY_FIND(cuOccupancyMaxPotentialBlockSizeWithFlags);
544   CUDA_LIBRARY_FIND(cuTexRefSetArray);
545   CUDA_LIBRARY_FIND(cuTexRefSetMipmappedArray);
546   CUDA_LIBRARY_FIND(cuTexRefSetAddress_v2);
547   CUDA_LIBRARY_FIND(cuTexRefSetAddress2D_v3);
548   CUDA_LIBRARY_FIND(cuTexRefSetFormat);
549   CUDA_LIBRARY_FIND(cuTexRefSetAddressMode);
550   CUDA_LIBRARY_FIND(cuTexRefSetFilterMode);
551   CUDA_LIBRARY_FIND(cuTexRefSetMipmapFilterMode);
552   CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelBias);
553   CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelClamp);
554   CUDA_LIBRARY_FIND(cuTexRefSetMaxAnisotropy);
555   CUDA_LIBRARY_FIND(cuTexRefSetBorderColor);
556   CUDA_LIBRARY_FIND(cuTexRefSetFlags);
557   CUDA_LIBRARY_FIND(cuTexRefGetAddress_v2);
558   CUDA_LIBRARY_FIND(cuTexRefGetArray);
559   CUDA_LIBRARY_FIND(cuTexRefGetMipmappedArray);
560   CUDA_LIBRARY_FIND(cuTexRefGetAddressMode);
561   CUDA_LIBRARY_FIND(cuTexRefGetFilterMode);
562   CUDA_LIBRARY_FIND(cuTexRefGetFormat);
563   CUDA_LIBRARY_FIND(cuTexRefGetMipmapFilterMode);
564   CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelBias);
565   CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelClamp);
566   CUDA_LIBRARY_FIND(cuTexRefGetMaxAnisotropy);
567   CUDA_LIBRARY_FIND(cuTexRefGetBorderColor);
568   CUDA_LIBRARY_FIND(cuTexRefGetFlags);
569   CUDA_LIBRARY_FIND(cuTexRefCreate);
570   CUDA_LIBRARY_FIND(cuTexRefDestroy);
571   CUDA_LIBRARY_FIND(cuSurfRefSetArray);
572   CUDA_LIBRARY_FIND(cuSurfRefGetArray);
573   CUDA_LIBRARY_FIND(cuTexObjectCreate);
574   CUDA_LIBRARY_FIND(cuTexObjectDestroy);
575   CUDA_LIBRARY_FIND(cuTexObjectGetResourceDesc);
576   CUDA_LIBRARY_FIND(cuTexObjectGetTextureDesc);
577   CUDA_LIBRARY_FIND(cuTexObjectGetResourceViewDesc);
578   CUDA_LIBRARY_FIND(cuSurfObjectCreate);
579   CUDA_LIBRARY_FIND(cuSurfObjectDestroy);
580   CUDA_LIBRARY_FIND(cuSurfObjectGetResourceDesc);
581   CUDA_LIBRARY_FIND(cuDeviceCanAccessPeer);
582   CUDA_LIBRARY_FIND(cuDeviceGetP2PAttribute);
583   CUDA_LIBRARY_FIND(cuCtxEnablePeerAccess);
584   CUDA_LIBRARY_FIND(cuCtxDisablePeerAccess);
585   CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
586   CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
587   CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedMipmappedArray);
588   CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer_v2);
589   CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags_v2);
590   CUDA_LIBRARY_FIND(cuGraphicsMapResources);
591   CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
592   CUDA_LIBRARY_FIND(cuGetExportTable);
593
594   CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
595   CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
596   CUDA_LIBRARY_FIND(cuGLGetDevices_v2);
597   CUDA_LIBRARY_FIND(cuGLCtxCreate_v2);
598   CUDA_LIBRARY_FIND(cuGLInit);
599   CUDA_LIBRARY_FIND(cuGLRegisterBufferObject);
600   CUDA_LIBRARY_FIND(cuGLMapBufferObject_v2);
601   CUDA_LIBRARY_FIND(cuGLUnmapBufferObject);
602   CUDA_LIBRARY_FIND(cuGLUnregisterBufferObject);
603   CUDA_LIBRARY_FIND(cuGLSetBufferObjectMapFlags);
604   CUDA_LIBRARY_FIND(cuGLMapBufferObjectAsync_v2);
605   CUDA_LIBRARY_FIND(cuGLUnmapBufferObjectAsync);
606
607
608   if (nvrtc_lib != NULL) {
609     NVRTC_LIBRARY_FIND(nvrtcGetErrorString);
610     NVRTC_LIBRARY_FIND(nvrtcVersion);
611     NVRTC_LIBRARY_FIND(nvrtcCreateProgram);
612     NVRTC_LIBRARY_FIND(nvrtcDestroyProgram);
613     NVRTC_LIBRARY_FIND(nvrtcCompileProgram);
614     NVRTC_LIBRARY_FIND(nvrtcGetPTXSize);
615     NVRTC_LIBRARY_FIND(nvrtcGetPTX);
616     NVRTC_LIBRARY_FIND(nvrtcGetProgramLogSize);
617     NVRTC_LIBRARY_FIND(nvrtcGetProgramLog);
618     NVRTC_LIBRARY_FIND(nvrtcAddNameExpression);
619     NVRTC_LIBRARY_FIND(nvrtcGetLoweredName);
620   }
621
622   result = CUEW_SUCCESS;
623   return result;
624 }
625
626 const char *cuewErrorString(CUresult result) {
627   switch(result) {
628     case CUDA_SUCCESS: return "No errors";
629     case CUDA_ERROR_INVALID_VALUE: return "Invalid value";
630     case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory";
631     case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized";
632     case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized";
633     case CUDA_ERROR_PROFILER_DISABLED: return "Profiler disabled";
634     case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "Profiler not initialized";
635     case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "Profiler already started";
636     case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "Profiler already stopped";
637     case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available";
638     case CUDA_ERROR_INVALID_DEVICE: return "Invalid device";
639     case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image";
640     case CUDA_ERROR_INVALID_CONTEXT: return "Invalid context";
641     case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: return "Context already current";
642     case CUDA_ERROR_MAP_FAILED: return "Map failed";
643     case CUDA_ERROR_UNMAP_FAILED: return "Unmap failed";
644     case CUDA_ERROR_ARRAY_IS_MAPPED: return "Array is mapped";
645     case CUDA_ERROR_ALREADY_MAPPED: return "Already mapped";
646     case CUDA_ERROR_NO_BINARY_FOR_GPU: return "No binary for GPU";
647     case CUDA_ERROR_ALREADY_ACQUIRED: return "Already acquired";
648     case CUDA_ERROR_NOT_MAPPED: return "Not mapped";
649     case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "Mapped resource not available for access as an array";
650     case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Mapped resource not available for access as a pointer";
651     case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error detected";
652     case CUDA_ERROR_UNSUPPORTED_LIMIT: return "CUlimit not supported by device";
653     case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "Context already in use";
654     case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "Peer access unsupported";
655     case CUDA_ERROR_INVALID_PTX: return "Invalid ptx";
656     case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics context";
657     case CUDA_ERROR_NVLINK_UNCORRECTABLE: return "Nvlink uncorrectable";
658     case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
659     case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
660     case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve";
661     case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed";
662     case CUDA_ERROR_OPERATING_SYSTEM: return "Operating system";
663     case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle";
664     case CUDA_ERROR_NOT_FOUND: return "Not found";
665     case CUDA_ERROR_NOT_READY: return "CUDA not ready";
666     case CUDA_ERROR_ILLEGAL_ADDRESS: return "Illegal address";
667     case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources";
668     case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded timeout";
669     case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing";
670     case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "Peer access already enabled";
671     case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "Peer access not enabled";
672     case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "Primary context active";
673     case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "Context is destroyed";
674     case CUDA_ERROR_ASSERT: return "Assert";
675     case CUDA_ERROR_TOO_MANY_PEERS: return "Too many peers";
676     case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "Host memory already registered";
677     case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "Host memory not registered";
678     case CUDA_ERROR_HARDWARE_STACK_ERROR: return "Hardware stack error";
679     case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "Illegal instruction";
680     case CUDA_ERROR_MISALIGNED_ADDRESS: return "Misaligned address";
681     case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "Invalid address space";
682     case CUDA_ERROR_INVALID_PC: return "Invalid pc";
683     case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed";
684     case CUDA_ERROR_NOT_PERMITTED: return "Not permitted";
685     case CUDA_ERROR_NOT_SUPPORTED: return "Not supported";
686     case CUDA_ERROR_UNKNOWN: return "Unknown error";
687     default: return "Unknown CUDA error value";
688   }
689 }
690
691 static void path_join(const char *path1,
692                       const char *path2,
693                       int maxlen,
694                       char *result) {
695 #if defined(WIN32) || defined(_WIN32)
696   const char separator = '\\';
697 #else
698   const char separator = '/';
699 #endif
700   int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
701   if (n != -1 && n < maxlen) {
702     result[n] = '\0';
703   }
704   else {
705     result[maxlen - 1] = '\0';
706   }
707 }
708
709 static int path_exists(const char *path) {
710   struct stat st;
711   if (stat(path, &st)) {
712     return 0;
713   }
714   return 1;
715 }
716
717 const char *cuewCompilerPath(void) {
718 #ifdef _WIN32
719   const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
720   const char *executable = "nvcc.exe";
721 #else
722   const char *defaultpaths[] = {
723     "/Developer/NVIDIA/CUDA-5.0/bin",
724     "/usr/local/cuda-5.0/bin",
725     "/usr/local/cuda/bin",
726     "/Developer/NVIDIA/CUDA-6.0/bin",
727     "/usr/local/cuda-6.0/bin",
728     "/Developer/NVIDIA/CUDA-5.5/bin",
729     "/usr/local/cuda-5.5/bin",
730     NULL};
731   const char *executable = "nvcc";
732 #endif
733   int i;
734
735   const char *binpath = getenv("CUDA_BIN_PATH");
736
737   static char nvcc[65536];
738
739   if (binpath) {
740     path_join(binpath, executable, sizeof(nvcc), nvcc);
741     if (path_exists(nvcc))
742       return nvcc;
743   }
744
745   for (i = 0; defaultpaths[i]; ++i) {
746     path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc);
747     if (path_exists(nvcc))
748       return nvcc;
749   }
750
751 #ifndef _WIN32
752   {
753     FILE *handle = popen("which nvcc", "r");
754     if (handle) {
755       char buffer[4096] = {0};
756       int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
757       buffer[len] = '\0';
758       pclose(handle);
759
760       if (buffer[0])
761         return "nvcc";
762     }
763   }
764 #endif
765
766   return NULL;
767 }
768
769 int cuewCompilerVersion(void) {
770   const char *path = cuewCompilerPath();
771   const char *marker = "Cuda compilation tools, release ";
772   FILE *pipe;
773   int major, minor;
774   char *versionstr;
775   char buf[128];
776   char output[65536] = "\0";
777   char command[65536] = "\0";
778
779   if (path == NULL)
780     return 0;
781
782   /* get --version output */
783   strncpy(command, path, sizeof(command));
784   strncat(command, " --version", sizeof(command) - strlen(path));
785   pipe = popen(command, "r");
786   if (!pipe) {
787     fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
788     return 0;
789   }
790
791   while (!feof(pipe)) {
792     if (fgets(buf, sizeof(buf), pipe) != NULL) {
793       strncat(output, buf, sizeof(output) - strlen(output) - 1);
794     }
795   }
796
797   pclose(pipe);
798
799   /* parse version number */
800   versionstr = strstr(output, marker);
801   if (versionstr == NULL) {
802     fprintf(stderr, "CUDA: failed to find version number in:\n\n%s\n", output);
803     return 0;
804   }
805   versionstr += strlen(marker);
806
807   if (sscanf(versionstr, "%d.%d", &major, &minor) < 2) {
808     fprintf(stderr, "CUDA: failed to parse version number from:\n\n%s\n", output);
809     return 0;
810   }
811
812   return 10 * major + minor;
813 }
814