svn merge ^/trunk/blender -r47023:HEAD
[blender-staging.git] / intern / cycles / util / util_cuda.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #include <stdlib.h>
20
21 #include "util_cuda.h"
22 #include "util_debug.h"
23 #include "util_dynlib.h"
24 #include "util_path.h"
25 #include "util_string.h"
26
27 /* function defininitions */
28
29 tcuInit *cuInit;
30 tcuDriverGetVersion *cuDriverGetVersion;
31 tcuDeviceGet *cuDeviceGet;
32 tcuDeviceGetCount *cuDeviceGetCount;
33 tcuDeviceGetName *cuDeviceGetName;
34 tcuDeviceComputeCapability *cuDeviceComputeCapability;
35 tcuDeviceTotalMem *cuDeviceTotalMem;
36 tcuDeviceGetProperties *cuDeviceGetProperties;
37 tcuDeviceGetAttribute *cuDeviceGetAttribute;
38 tcuCtxCreate *cuCtxCreate;
39 tcuCtxDestroy *cuCtxDestroy;
40 tcuCtxAttach *cuCtxAttach;
41 tcuCtxDetach *cuCtxDetach;
42 tcuCtxPushCurrent *cuCtxPushCurrent;
43 tcuCtxPopCurrent *cuCtxPopCurrent;
44 tcuCtxGetDevice *cuCtxGetDevice;
45 tcuCtxSynchronize *cuCtxSynchronize;
46 tcuModuleLoad *cuModuleLoad;
47 tcuModuleLoadData *cuModuleLoadData;
48 tcuModuleLoadDataEx *cuModuleLoadDataEx;
49 tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
50 tcuModuleUnload *cuModuleUnload;
51 tcuModuleGetFunction *cuModuleGetFunction;
52 tcuModuleGetGlobal *cuModuleGetGlobal;
53 tcuModuleGetTexRef *cuModuleGetTexRef;
54 tcuModuleGetSurfRef *cuModuleGetSurfRef;
55 tcuMemGetInfo *cuMemGetInfo;
56 tcuMemAlloc *cuMemAlloc;
57 tcuMemAllocPitch *cuMemAllocPitch;
58 tcuMemFree *cuMemFree;
59 tcuMemGetAddressRange *cuMemGetAddressRange;
60 tcuMemAllocHost *cuMemAllocHost;
61 tcuMemFreeHost *cuMemFreeHost;
62 tcuMemHostAlloc *cuMemHostAlloc;
63 tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer;
64 tcuMemHostGetFlags *cuMemHostGetFlags;
65 tcuMemcpyHtoD *cuMemcpyHtoD;
66 tcuMemcpyDtoH *cuMemcpyDtoH;
67 tcuMemcpyDtoD *cuMemcpyDtoD;
68 tcuMemcpyDtoA *cuMemcpyDtoA;
69 tcuMemcpyAtoD *cuMemcpyAtoD;
70 tcuMemcpyHtoA *cuMemcpyHtoA;
71 tcuMemcpyAtoH *cuMemcpyAtoH;
72 tcuMemcpyAtoA *cuMemcpyAtoA;
73 tcuMemcpy2D *cuMemcpy2D;
74 tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned;
75 tcuMemcpy3D *cuMemcpy3D;
76 tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync;
77 tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync;
78 tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync;
79 tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync;
80 tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync;
81 tcuMemcpy2DAsync *cuMemcpy2DAsync;
82 tcuMemcpy3DAsync *cuMemcpy3DAsync;
83 tcuMemsetD8 *cuMemsetD8;
84 tcuMemsetD16 *cuMemsetD16;
85 tcuMemsetD32 *cuMemsetD32;
86 tcuMemsetD2D8 *cuMemsetD2D8;
87 tcuMemsetD2D16 *cuMemsetD2D16;
88 tcuMemsetD2D32 *cuMemsetD2D32;
89 tcuFuncSetBlockShape *cuFuncSetBlockShape;
90 tcuFuncSetSharedSize *cuFuncSetSharedSize;
91 tcuFuncGetAttribute *cuFuncGetAttribute;
92 tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
93 tcuArrayCreate *cuArrayCreate;
94 tcuArrayGetDescriptor *cuArrayGetDescriptor;
95 tcuArrayDestroy *cuArrayDestroy;
96 tcuArray3DCreate *cuArray3DCreate;
97 tcuArray3DGetDescriptor *cuArray3DGetDescriptor;
98 tcuTexRefCreate *cuTexRefCreate;
99 tcuTexRefDestroy *cuTexRefDestroy;
100 tcuTexRefSetArray *cuTexRefSetArray;
101 tcuTexRefSetAddress *cuTexRefSetAddress;
102 tcuTexRefSetAddress2D *cuTexRefSetAddress2D;
103 tcuTexRefSetFormat *cuTexRefSetFormat;
104 tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
105 tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
106 tcuTexRefSetFlags *cuTexRefSetFlags;
107 tcuTexRefGetAddress *cuTexRefGetAddress;
108 tcuTexRefGetArray *cuTexRefGetArray;
109 tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
110 tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
111 tcuTexRefGetFormat *cuTexRefGetFormat;
112 tcuTexRefGetFlags *cuTexRefGetFlags;
113 tcuSurfRefSetArray *cuSurfRefSetArray;
114 tcuSurfRefGetArray *cuSurfRefGetArray;
115 tcuParamSetSize *cuParamSetSize;
116 tcuParamSeti *cuParamSeti;
117 tcuParamSetf *cuParamSetf;
118 tcuParamSetv *cuParamSetv;
119 tcuParamSetTexRef *cuParamSetTexRef;
120 tcuLaunch *cuLaunch;
121 tcuLaunchGrid *cuLaunchGrid;
122 tcuLaunchGridAsync *cuLaunchGridAsync;
123 tcuEventCreate *cuEventCreate;
124 tcuEventRecord *cuEventRecord;
125 tcuEventQuery *cuEventQuery;
126 tcuEventSynchronize *cuEventSynchronize;
127 tcuEventDestroy *cuEventDestroy;
128 tcuEventElapsedTime *cuEventElapsedTime;
129 tcuStreamCreate *cuStreamCreate;
130 tcuStreamQuery *cuStreamQuery;
131 tcuStreamSynchronize *cuStreamSynchronize;
132 tcuStreamDestroy *cuStreamDestroy;
133 tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
134 tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
135 tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer;
136 tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
137 tcuGraphicsMapResources *cuGraphicsMapResources;
138 tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
139 tcuGetExportTable *cuGetExportTable;
140 tcuCtxSetLimit *cuCtxSetLimit;
141 tcuCtxGetLimit *cuCtxGetLimit;
142 tcuGLCtxCreate *cuGLCtxCreate;
143 tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
144 tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
145 tcuCtxSetCurrent *cuCtxSetCurrent;
146
147 CCL_NAMESPACE_BEGIN
148
149 /* utility macros */
150 #define CUDA_LIBRARY_FIND_CHECKED(name) \
151         name = (t##name*)dynamic_library_find(lib, #name);
152
153 #define CUDA_LIBRARY_FIND(name) \
154         name = (t##name*)dynamic_library_find(lib, #name); \
155         assert(name);
156
157 #define CUDA_LIBRARY_FIND_V2(name) \
158         name = (t##name*)dynamic_library_find(lib, #name "_v2"); \
159         assert(name);
160
161 /* initialization function */
162
163 bool cuLibraryInit()
164 {
165         static bool initialized = false;
166         static bool result = false;
167
168         if(initialized)
169                 return result;
170         
171         initialized = true;
172
173         /* library paths */
174 #ifdef _WIN32
175         /* expected in c:/windows/system or similar, no path needed */
176         const char *path = "nvcuda.dll";
177 #elif defined(__APPLE__)
178         /* default installation path */
179         const char *path = "/usr/local/cuda/lib/libcuda.dylib";
180 #else
181         const char *path = "libcuda.so";
182 #endif
183
184         /* load library */
185         DynamicLibrary *lib = dynamic_library_open(path);
186
187         if(lib == NULL)
188                 return false;
189
190         /* detect driver version */
191         int driver_version = 1000;
192
193         CUDA_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
194         if(cuDriverGetVersion)
195                 cuDriverGetVersion(&driver_version);
196
197         /* we require version 4.0 */
198         if(driver_version < 4000)
199                 return false;
200
201         /* fetch all function pointers */
202         CUDA_LIBRARY_FIND(cuInit);
203         CUDA_LIBRARY_FIND(cuDeviceGet);
204         CUDA_LIBRARY_FIND(cuDeviceGetCount);
205         CUDA_LIBRARY_FIND(cuDeviceGetName);
206         CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
207         CUDA_LIBRARY_FIND(cuDeviceTotalMem);
208         CUDA_LIBRARY_FIND(cuDeviceGetProperties);
209         CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
210         CUDA_LIBRARY_FIND(cuCtxCreate);
211         CUDA_LIBRARY_FIND(cuCtxDestroy);
212         CUDA_LIBRARY_FIND(cuCtxAttach);
213         CUDA_LIBRARY_FIND(cuCtxDetach);
214         CUDA_LIBRARY_FIND(cuCtxPushCurrent);
215         CUDA_LIBRARY_FIND(cuCtxPopCurrent);
216         CUDA_LIBRARY_FIND(cuCtxGetDevice);
217         CUDA_LIBRARY_FIND(cuCtxSynchronize);
218         CUDA_LIBRARY_FIND(cuModuleLoad);
219         CUDA_LIBRARY_FIND(cuModuleLoadData);
220         CUDA_LIBRARY_FIND(cuModuleUnload);
221         CUDA_LIBRARY_FIND(cuModuleGetFunction);
222         CUDA_LIBRARY_FIND(cuModuleGetGlobal);
223         CUDA_LIBRARY_FIND(cuModuleGetTexRef);
224         CUDA_LIBRARY_FIND(cuMemGetInfo);
225         CUDA_LIBRARY_FIND(cuMemAlloc);
226         CUDA_LIBRARY_FIND(cuMemAllocPitch);
227         CUDA_LIBRARY_FIND(cuMemFree);
228         CUDA_LIBRARY_FIND(cuMemGetAddressRange);
229         CUDA_LIBRARY_FIND(cuMemAllocHost);
230         CUDA_LIBRARY_FIND(cuMemFreeHost);
231         CUDA_LIBRARY_FIND(cuMemHostAlloc);
232         CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer);
233         CUDA_LIBRARY_FIND(cuMemcpyHtoD);
234         CUDA_LIBRARY_FIND(cuMemcpyDtoH);
235         CUDA_LIBRARY_FIND(cuMemcpyDtoD);
236         CUDA_LIBRARY_FIND(cuMemcpyDtoA);
237         CUDA_LIBRARY_FIND(cuMemcpyAtoD);
238         CUDA_LIBRARY_FIND(cuMemcpyHtoA);
239         CUDA_LIBRARY_FIND(cuMemcpyAtoH);
240         CUDA_LIBRARY_FIND(cuMemcpyAtoA);
241         CUDA_LIBRARY_FIND(cuMemcpy2D);
242         CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned);
243         CUDA_LIBRARY_FIND(cuMemcpy3D);
244         CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync);
245         CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync);
246         CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync);
247         CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync);
248         CUDA_LIBRARY_FIND(cuMemcpy2DAsync);
249         CUDA_LIBRARY_FIND(cuMemcpy3DAsync);
250         CUDA_LIBRARY_FIND(cuMemsetD8);
251         CUDA_LIBRARY_FIND(cuMemsetD16);
252         CUDA_LIBRARY_FIND(cuMemsetD32);
253         CUDA_LIBRARY_FIND(cuMemsetD2D8);
254         CUDA_LIBRARY_FIND(cuMemsetD2D16);
255         CUDA_LIBRARY_FIND(cuMemsetD2D32);
256         CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
257         CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
258         CUDA_LIBRARY_FIND(cuFuncGetAttribute);
259         CUDA_LIBRARY_FIND(cuArrayCreate);
260         CUDA_LIBRARY_FIND(cuArrayGetDescriptor);
261         CUDA_LIBRARY_FIND(cuArrayDestroy);
262         CUDA_LIBRARY_FIND(cuArray3DCreate);
263         CUDA_LIBRARY_FIND(cuArray3DGetDescriptor);
264         CUDA_LIBRARY_FIND(cuTexRefCreate);
265         CUDA_LIBRARY_FIND(cuTexRefDestroy);
266         CUDA_LIBRARY_FIND(cuTexRefSetArray);
267         CUDA_LIBRARY_FIND(cuTexRefSetAddress);
268         CUDA_LIBRARY_FIND(cuTexRefSetAddress2D);
269         CUDA_LIBRARY_FIND(cuTexRefSetFormat);
270         CUDA_LIBRARY_FIND(cuTexRefSetAddressMode);
271         CUDA_LIBRARY_FIND(cuTexRefSetFilterMode);
272         CUDA_LIBRARY_FIND(cuTexRefSetFlags);
273         CUDA_LIBRARY_FIND(cuTexRefGetAddress);
274         CUDA_LIBRARY_FIND(cuTexRefGetArray);
275         CUDA_LIBRARY_FIND(cuTexRefGetAddressMode);
276         CUDA_LIBRARY_FIND(cuTexRefGetFilterMode);
277         CUDA_LIBRARY_FIND(cuTexRefGetFormat);
278         CUDA_LIBRARY_FIND(cuTexRefGetFlags);
279         CUDA_LIBRARY_FIND(cuParamSetSize);
280         CUDA_LIBRARY_FIND(cuParamSeti);
281         CUDA_LIBRARY_FIND(cuParamSetf);
282         CUDA_LIBRARY_FIND(cuParamSetv);
283         CUDA_LIBRARY_FIND(cuParamSetTexRef);
284         CUDA_LIBRARY_FIND(cuLaunch);
285         CUDA_LIBRARY_FIND(cuLaunchGrid);
286         CUDA_LIBRARY_FIND(cuLaunchGridAsync);
287         CUDA_LIBRARY_FIND(cuEventCreate);
288         CUDA_LIBRARY_FIND(cuEventRecord);
289         CUDA_LIBRARY_FIND(cuEventQuery);
290         CUDA_LIBRARY_FIND(cuEventSynchronize);
291         CUDA_LIBRARY_FIND(cuEventDestroy);
292         CUDA_LIBRARY_FIND(cuEventElapsedTime);
293         CUDA_LIBRARY_FIND(cuStreamCreate);
294         CUDA_LIBRARY_FIND(cuStreamQuery);
295         CUDA_LIBRARY_FIND(cuStreamSynchronize);
296         CUDA_LIBRARY_FIND(cuStreamDestroy);
297
298         /* cuda 2.1 */
299         CUDA_LIBRARY_FIND(cuModuleLoadDataEx);
300         CUDA_LIBRARY_FIND(cuModuleLoadFatBinary);
301         CUDA_LIBRARY_FIND(cuGLCtxCreate);
302         CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
303         CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
304
305         /* cuda 2.3 */
306         CUDA_LIBRARY_FIND(cuMemHostGetFlags);
307         CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
308         CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
309
310         /* cuda 3.0 */
311         CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync);
312         CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
313         CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
314         CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
315         CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer);
316         CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags);
317         CUDA_LIBRARY_FIND(cuGraphicsMapResources);
318         CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
319         CUDA_LIBRARY_FIND(cuGetExportTable);
320
321         /* cuda 3.1 */
322         CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
323         CUDA_LIBRARY_FIND(cuSurfRefSetArray);
324         CUDA_LIBRARY_FIND(cuSurfRefGetArray);
325         CUDA_LIBRARY_FIND(cuCtxSetLimit);
326         CUDA_LIBRARY_FIND(cuCtxGetLimit);
327
328         /* functions which changed 3.1 -> 3.2 for 64 bit stuff, the cuda library
329          * has both the old ones for compatibility and new ones with _v2 postfix,
330          * we load the _v2 ones here. */
331         CUDA_LIBRARY_FIND_V2(cuDeviceTotalMem);
332         CUDA_LIBRARY_FIND_V2(cuCtxCreate);
333         CUDA_LIBRARY_FIND_V2(cuModuleGetGlobal);
334         CUDA_LIBRARY_FIND_V2(cuMemGetInfo);
335         CUDA_LIBRARY_FIND_V2(cuMemAlloc);
336         CUDA_LIBRARY_FIND_V2(cuMemAllocPitch);
337         CUDA_LIBRARY_FIND_V2(cuMemFree);
338         CUDA_LIBRARY_FIND_V2(cuMemGetAddressRange);
339         CUDA_LIBRARY_FIND_V2(cuMemAllocHost);
340         CUDA_LIBRARY_FIND_V2(cuMemHostGetDevicePointer);
341         CUDA_LIBRARY_FIND_V2(cuMemcpyHtoD);
342         CUDA_LIBRARY_FIND_V2(cuMemcpyDtoH);
343         CUDA_LIBRARY_FIND_V2(cuMemcpyDtoD);
344         CUDA_LIBRARY_FIND_V2(cuMemcpyDtoA);
345         CUDA_LIBRARY_FIND_V2(cuMemcpyAtoD);
346         CUDA_LIBRARY_FIND_V2(cuMemcpyHtoA);
347         CUDA_LIBRARY_FIND_V2(cuMemcpyAtoH);
348         CUDA_LIBRARY_FIND_V2(cuMemcpyAtoA);
349         CUDA_LIBRARY_FIND_V2(cuMemcpyHtoAAsync);
350         CUDA_LIBRARY_FIND_V2(cuMemcpyAtoHAsync);
351         CUDA_LIBRARY_FIND_V2(cuMemcpy2D);
352         CUDA_LIBRARY_FIND_V2(cuMemcpy2DUnaligned);
353         CUDA_LIBRARY_FIND_V2(cuMemcpy3D);
354         CUDA_LIBRARY_FIND_V2(cuMemcpyHtoDAsync);
355         CUDA_LIBRARY_FIND_V2(cuMemcpyDtoHAsync);
356         CUDA_LIBRARY_FIND_V2(cuMemcpyDtoDAsync);
357         CUDA_LIBRARY_FIND_V2(cuMemcpy2DAsync);
358         CUDA_LIBRARY_FIND_V2(cuMemcpy3DAsync);
359         CUDA_LIBRARY_FIND_V2(cuMemsetD8);
360         CUDA_LIBRARY_FIND_V2(cuMemsetD16);
361         CUDA_LIBRARY_FIND_V2(cuMemsetD32);
362         CUDA_LIBRARY_FIND_V2(cuMemsetD2D8);
363         CUDA_LIBRARY_FIND_V2(cuMemsetD2D16);
364         CUDA_LIBRARY_FIND_V2(cuMemsetD2D32);
365         CUDA_LIBRARY_FIND_V2(cuArrayCreate);
366         CUDA_LIBRARY_FIND_V2(cuArrayGetDescriptor);
367         CUDA_LIBRARY_FIND_V2(cuArray3DCreate);
368         CUDA_LIBRARY_FIND_V2(cuArray3DGetDescriptor);
369         CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress);
370         CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress2D);
371         CUDA_LIBRARY_FIND_V2(cuTexRefGetAddress);
372         CUDA_LIBRARY_FIND_V2(cuGraphicsResourceGetMappedPointer);
373         CUDA_LIBRARY_FIND_V2(cuGLCtxCreate);
374
375         /* cuda 4.0 */
376         CUDA_LIBRARY_FIND(cuCtxSetCurrent);
377
378 #ifndef WITH_CUDA_BINARIES
379 #ifdef _WIN32
380         return false; /* runtime build doesn't work at the moment */
381 #else
382         if(cuCompilerPath() == "")
383                 return false;
384 #endif
385 #endif
386
387         /* success */
388         result = true;
389
390         return result;
391 }
392
393 string cuCompilerPath()
394 {
395 #ifdef _WIN32
396         const char *defaultpath = "C:/CUDA/bin";
397         const char *executable = "nvcc.exe";
398 #else
399         const char *defaultpath = "/usr/local/cuda/bin";
400         const char *executable = "nvcc";
401 #endif
402
403         const char *binpath = getenv("CUDA_BIN_PATH");
404
405         string nvcc;
406
407         if(binpath)
408                 nvcc = path_join(binpath, executable);
409         else
410                 nvcc = path_join(defaultpath, executable);
411
412         if(path_exists(nvcc))
413                 return nvcc;
414
415 #ifndef _WIN32
416         if(system("which nvcc") == 0)
417                 return "nvcc";
418 #endif
419
420         return "";
421 }
422
423 CCL_NAMESPACE_END
424