Cycles: svn merge -r37040:37333 https://svn.blender.org/svnroot/bf-blender/trunk...
[blender-staging.git] / intern / cycles / util / util_cuda.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #include "util_cuda.h"
20 #include "util_debug.h"
21 #include "util_dynlib.h"
22
23 /* function defininitions */
24
25 tcuInit *cuInit;
26 tcuDriverGetVersion *cuDriverGetVersion;
27 tcuDeviceGet *cuDeviceGet;
28 tcuDeviceGetCount *cuDeviceGetCount;
29 tcuDeviceGetName *cuDeviceGetName;
30 tcuDeviceComputeCapability *cuDeviceComputeCapability;
31 tcuDeviceTotalMem *cuDeviceTotalMem;
32 tcuDeviceGetProperties *cuDeviceGetProperties;
33 tcuDeviceGetAttribute *cuDeviceGetAttribute;
34 tcuCtxCreate *cuCtxCreate;
35 tcuCtxDestroy *cuCtxDestroy;
36 tcuCtxAttach *cuCtxAttach;
37 tcuCtxDetach *cuCtxDetach;
38 tcuCtxPushCurrent *cuCtxPushCurrent;
39 tcuCtxPopCurrent *cuCtxPopCurrent;
40 tcuCtxGetDevice *cuCtxGetDevice;
41 tcuCtxSynchronize *cuCtxSynchronize;
42 tcuModuleLoad *cuModuleLoad;
43 tcuModuleLoadData *cuModuleLoadData;
44 tcuModuleLoadDataEx *cuModuleLoadDataEx;
45 tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
46 tcuModuleUnload *cuModuleUnload;
47 tcuModuleGetFunction *cuModuleGetFunction;
48 tcuModuleGetGlobal *cuModuleGetGlobal;
49 tcuModuleGetTexRef *cuModuleGetTexRef;
50 tcuModuleGetSurfRef *cuModuleGetSurfRef;
51 tcuMemGetInfo *cuMemGetInfo;
52 tcuMemAlloc *cuMemAlloc;
53 tcuMemAllocPitch *cuMemAllocPitch;
54 tcuMemFree *cuMemFree;
55 tcuMemGetAddressRange *cuMemGetAddressRange;
56 tcuMemAllocHost *cuMemAllocHost;
57 tcuMemFreeHost *cuMemFreeHost;
58 tcuMemHostAlloc *cuMemHostAlloc;
59 tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer;
60 tcuMemHostGetFlags *cuMemHostGetFlags;
61 tcuMemcpyHtoD *cuMemcpyHtoD;
62 tcuMemcpyDtoH *cuMemcpyDtoH;
63 tcuMemcpyDtoD *cuMemcpyDtoD;
64 tcuMemcpyDtoA *cuMemcpyDtoA;
65 tcuMemcpyAtoD *cuMemcpyAtoD;
66 tcuMemcpyHtoA *cuMemcpyHtoA;
67 tcuMemcpyAtoH *cuMemcpyAtoH;
68 tcuMemcpyAtoA *cuMemcpyAtoA;
69 tcuMemcpy2D *cuMemcpy2D;
70 tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned;
71 tcuMemcpy3D *cuMemcpy3D;
72 tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync;
73 tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync;
74 tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync;
75 tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync;
76 tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync;
77 tcuMemcpy2DAsync *cuMemcpy2DAsync;
78 tcuMemcpy3DAsync *cuMemcpy3DAsync;
79 tcuMemsetD8 *cuMemsetD8;
80 tcuMemsetD16 *cuMemsetD16;
81 tcuMemsetD32 *cuMemsetD32;
82 tcuMemsetD2D8 *cuMemsetD2D8;
83 tcuMemsetD2D16 *cuMemsetD2D16;
84 tcuMemsetD2D32 *cuMemsetD2D32;
85 tcuFuncSetBlockShape *cuFuncSetBlockShape;
86 tcuFuncSetSharedSize *cuFuncSetSharedSize;
87 tcuFuncGetAttribute *cuFuncGetAttribute;
88 tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
89 tcuArrayCreate *cuArrayCreate;
90 tcuArrayGetDescriptor *cuArrayGetDescriptor;
91 tcuArrayDestroy *cuArrayDestroy;
92 tcuArray3DCreate *cuArray3DCreate;
93 tcuArray3DGetDescriptor *cuArray3DGetDescriptor;
94 tcuTexRefCreate *cuTexRefCreate;
95 tcuTexRefDestroy *cuTexRefDestroy;
96 tcuTexRefSetArray *cuTexRefSetArray;
97 tcuTexRefSetAddress *cuTexRefSetAddress;
98 tcuTexRefSetAddress2D *cuTexRefSetAddress2D;
99 tcuTexRefSetFormat *cuTexRefSetFormat;
100 tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
101 tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
102 tcuTexRefSetFlags *cuTexRefSetFlags;
103 tcuTexRefGetAddress *cuTexRefGetAddress;
104 tcuTexRefGetArray *cuTexRefGetArray;
105 tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
106 tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
107 tcuTexRefGetFormat *cuTexRefGetFormat;
108 tcuTexRefGetFlags *cuTexRefGetFlags;
109 tcuSurfRefSetArray *cuSurfRefSetArray;
110 tcuSurfRefGetArray *cuSurfRefGetArray;
111 tcuParamSetSize *cuParamSetSize;
112 tcuParamSeti *cuParamSeti;
113 tcuParamSetf *cuParamSetf;
114 tcuParamSetv *cuParamSetv;
115 tcuParamSetTexRef *cuParamSetTexRef;
116 tcuLaunch *cuLaunch;
117 tcuLaunchGrid *cuLaunchGrid;
118 tcuLaunchGridAsync *cuLaunchGridAsync;
119 tcuEventCreate *cuEventCreate;
120 tcuEventRecord *cuEventRecord;
121 tcuEventQuery *cuEventQuery;
122 tcuEventSynchronize *cuEventSynchronize;
123 tcuEventDestroy *cuEventDestroy;
124 tcuEventElapsedTime *cuEventElapsedTime;
125 tcuStreamCreate *cuStreamCreate;
126 tcuStreamQuery *cuStreamQuery;
127 tcuStreamSynchronize *cuStreamSynchronize;
128 tcuStreamDestroy *cuStreamDestroy;
129 tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
130 tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
131 tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer;
132 tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
133 tcuGraphicsMapResources *cuGraphicsMapResources;
134 tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
135 tcuGetExportTable *cuGetExportTable;
136 tcuCtxSetLimit *cuCtxSetLimit;
137 tcuCtxGetLimit *cuCtxGetLimit;
138 tcuGLCtxCreate *cuGLCtxCreate;
139 tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
140 tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
141 tcuCtxSetCurrent *cuCtxSetCurrent;
142
143 CCL_NAMESPACE_BEGIN
144
145 /* utility macros */
146
147 #define CUDA_LIBRARY_FIND(name) \
148         name = (t##name*)dynamic_library_find(lib, #name); \
149         assert(name);
150
151 #define CUDA_LIBRARY_FIND_V2(name) \
152         name = (t##name*)dynamic_library_find(lib, #name "_v2"); \
153         assert(name);
154
155 /* initialization function */
156
157 bool cuLibraryInit()
158 {
159         static bool initialized = false;
160         static bool result = false;
161
162         if(initialized)
163                 return result;
164         
165         initialized = true;
166
167         /* library paths */
168 #ifdef _WIN32
169         /* expected in c:/windows/system or similar, no path needed */
170         const char *path = "nvcuda.dll";
171 #elif defined(__APPLE__)
172         /* default installation path */
173         const char *path = "/usr/local/cuda/lib/libcuda.dylib";
174 #else
175         const char *path = "libcuda.so";
176 #endif
177
178         /* load library */
179         DynamicLibrary *lib = dynamic_library_open(path);
180
181         if(lib == NULL)
182                 return false;
183
184         /* detect driver version */
185         int driver_version = 1000;
186
187         CUDA_LIBRARY_FIND(cuDriverGetVersion);
188         if(cuDriverGetVersion)
189                 cuDriverGetVersion(&driver_version);
190
191         /* we require version 4.0 */
192         if(driver_version < 4000)
193                 return false;
194
195         /* fetch all function pointers */
196         CUDA_LIBRARY_FIND(cuInit);
197         CUDA_LIBRARY_FIND(cuDeviceGet);
198         CUDA_LIBRARY_FIND(cuDeviceGetCount);
199         CUDA_LIBRARY_FIND(cuDeviceGetName);
200         CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
201         CUDA_LIBRARY_FIND(cuDeviceTotalMem);
202         CUDA_LIBRARY_FIND(cuDeviceGetProperties);
203         CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
204         CUDA_LIBRARY_FIND(cuCtxCreate);
205         CUDA_LIBRARY_FIND(cuCtxDestroy);
206         CUDA_LIBRARY_FIND(cuCtxAttach);
207         CUDA_LIBRARY_FIND(cuCtxDetach);
208         CUDA_LIBRARY_FIND(cuCtxPushCurrent);
209         CUDA_LIBRARY_FIND(cuCtxPopCurrent);
210         CUDA_LIBRARY_FIND(cuCtxGetDevice);
211         CUDA_LIBRARY_FIND(cuCtxSynchronize);
212         CUDA_LIBRARY_FIND(cuModuleLoad);
213         CUDA_LIBRARY_FIND(cuModuleLoadData);
214         CUDA_LIBRARY_FIND(cuModuleUnload);
215         CUDA_LIBRARY_FIND(cuModuleGetFunction);
216         CUDA_LIBRARY_FIND(cuModuleGetGlobal);
217         CUDA_LIBRARY_FIND(cuModuleGetTexRef);
218         CUDA_LIBRARY_FIND(cuMemGetInfo);
219         CUDA_LIBRARY_FIND(cuMemAlloc);
220         CUDA_LIBRARY_FIND(cuMemAllocPitch);
221         CUDA_LIBRARY_FIND(cuMemFree);
222         CUDA_LIBRARY_FIND(cuMemGetAddressRange);
223         CUDA_LIBRARY_FIND(cuMemAllocHost);
224         CUDA_LIBRARY_FIND(cuMemFreeHost);
225         CUDA_LIBRARY_FIND(cuMemHostAlloc);
226         CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer);
227         CUDA_LIBRARY_FIND(cuMemcpyHtoD);
228         CUDA_LIBRARY_FIND(cuMemcpyDtoH);
229         CUDA_LIBRARY_FIND(cuMemcpyDtoD);
230         CUDA_LIBRARY_FIND(cuMemcpyDtoA);
231         CUDA_LIBRARY_FIND(cuMemcpyAtoD);
232         CUDA_LIBRARY_FIND(cuMemcpyHtoA);
233         CUDA_LIBRARY_FIND(cuMemcpyAtoH);
234         CUDA_LIBRARY_FIND(cuMemcpyAtoA);
235         CUDA_LIBRARY_FIND(cuMemcpy2D);
236         CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned);
237         CUDA_LIBRARY_FIND(cuMemcpy3D);
238         CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync);
239         CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync);
240         CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync);
241         CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync);
242         CUDA_LIBRARY_FIND(cuMemcpy2DAsync);
243         CUDA_LIBRARY_FIND(cuMemcpy3DAsync);
244         CUDA_LIBRARY_FIND(cuMemsetD8);
245         CUDA_LIBRARY_FIND(cuMemsetD16);
246         CUDA_LIBRARY_FIND(cuMemsetD32);
247         CUDA_LIBRARY_FIND(cuMemsetD2D8);
248         CUDA_LIBRARY_FIND(cuMemsetD2D16);
249         CUDA_LIBRARY_FIND(cuMemsetD2D32);
250         CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
251         CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
252         CUDA_LIBRARY_FIND(cuFuncGetAttribute);
253         CUDA_LIBRARY_FIND(cuArrayCreate);
254         CUDA_LIBRARY_FIND(cuArrayGetDescriptor);
255         CUDA_LIBRARY_FIND(cuArrayDestroy);
256         CUDA_LIBRARY_FIND(cuArray3DCreate);
257         CUDA_LIBRARY_FIND(cuArray3DGetDescriptor);
258         CUDA_LIBRARY_FIND(cuTexRefCreate);
259         CUDA_LIBRARY_FIND(cuTexRefDestroy);
260         CUDA_LIBRARY_FIND(cuTexRefSetArray);
261         CUDA_LIBRARY_FIND(cuTexRefSetAddress);
262         CUDA_LIBRARY_FIND(cuTexRefSetAddress2D);
263         CUDA_LIBRARY_FIND(cuTexRefSetFormat);
264         CUDA_LIBRARY_FIND(cuTexRefSetAddressMode);
265         CUDA_LIBRARY_FIND(cuTexRefSetFilterMode);
266         CUDA_LIBRARY_FIND(cuTexRefSetFlags);
267         CUDA_LIBRARY_FIND(cuTexRefGetAddress);
268         CUDA_LIBRARY_FIND(cuTexRefGetArray);
269         CUDA_LIBRARY_FIND(cuTexRefGetAddressMode);
270         CUDA_LIBRARY_FIND(cuTexRefGetFilterMode);
271         CUDA_LIBRARY_FIND(cuTexRefGetFormat);
272         CUDA_LIBRARY_FIND(cuTexRefGetFlags);
273         CUDA_LIBRARY_FIND(cuParamSetSize);
274         CUDA_LIBRARY_FIND(cuParamSeti);
275         CUDA_LIBRARY_FIND(cuParamSetf);
276         CUDA_LIBRARY_FIND(cuParamSetv);
277         CUDA_LIBRARY_FIND(cuParamSetTexRef);
278         CUDA_LIBRARY_FIND(cuLaunch);
279         CUDA_LIBRARY_FIND(cuLaunchGrid);
280         CUDA_LIBRARY_FIND(cuLaunchGridAsync);
281         CUDA_LIBRARY_FIND(cuEventCreate);
282         CUDA_LIBRARY_FIND(cuEventRecord);
283         CUDA_LIBRARY_FIND(cuEventQuery);
284         CUDA_LIBRARY_FIND(cuEventSynchronize);
285         CUDA_LIBRARY_FIND(cuEventDestroy);
286         CUDA_LIBRARY_FIND(cuEventElapsedTime);
287         CUDA_LIBRARY_FIND(cuStreamCreate);
288         CUDA_LIBRARY_FIND(cuStreamQuery);
289         CUDA_LIBRARY_FIND(cuStreamSynchronize);
290         CUDA_LIBRARY_FIND(cuStreamDestroy);
291
292         /* cuda 2.1 */
293         CUDA_LIBRARY_FIND(cuModuleLoadDataEx);
294         CUDA_LIBRARY_FIND(cuModuleLoadFatBinary);
295         CUDA_LIBRARY_FIND(cuGLCtxCreate);
296         CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
297         CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
298
299         /* cuda 2.3 */
300         CUDA_LIBRARY_FIND(cuMemHostGetFlags);
301         CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
302         CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
303
304         /* cuda 3.0 */
305         CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync);
306         CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
307         CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
308         CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
309         CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer);
310         CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags);
311         CUDA_LIBRARY_FIND(cuGraphicsMapResources);
312         CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
313         CUDA_LIBRARY_FIND(cuGetExportTable);
314
315         /* cuda 3.1 */
316         CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
317         CUDA_LIBRARY_FIND(cuSurfRefSetArray);
318         CUDA_LIBRARY_FIND(cuSurfRefGetArray);
319         CUDA_LIBRARY_FIND(cuCtxSetLimit);
320         CUDA_LIBRARY_FIND(cuCtxGetLimit);
321
322         /* functions which changed 3.1 -> 3.2 for 64 bit stuff, the cuda library
323            has both the old ones for compatibility and new ones with _v2 postfix,
324            we load the _v2 ones here. */
325         CUDA_LIBRARY_FIND_V2(cuDeviceTotalMem);
326         CUDA_LIBRARY_FIND_V2(cuCtxCreate);
327         CUDA_LIBRARY_FIND_V2(cuModuleGetGlobal);
328         CUDA_LIBRARY_FIND_V2(cuMemGetInfo);
329         CUDA_LIBRARY_FIND_V2(cuMemAlloc);
330         CUDA_LIBRARY_FIND_V2(cuMemAllocPitch);
331         CUDA_LIBRARY_FIND_V2(cuMemFree);
332         CUDA_LIBRARY_FIND_V2(cuMemGetAddressRange);
333         CUDA_LIBRARY_FIND_V2(cuMemAllocHost);
334         CUDA_LIBRARY_FIND_V2(cuMemHostGetDevicePointer);
335         CUDA_LIBRARY_FIND_V2(cuMemcpyHtoD);
336         CUDA_LIBRARY_FIND_V2(cuMemcpyDtoH);
337         CUDA_LIBRARY_FIND_V2(cuMemcpyDtoD);
338         CUDA_LIBRARY_FIND_V2(cuMemcpyDtoA);
339         CUDA_LIBRARY_FIND_V2(cuMemcpyAtoD);
340         CUDA_LIBRARY_FIND_V2(cuMemcpyHtoA);
341         CUDA_LIBRARY_FIND_V2(cuMemcpyAtoH);
342         CUDA_LIBRARY_FIND_V2(cuMemcpyAtoA);
343         CUDA_LIBRARY_FIND_V2(cuMemcpyHtoAAsync);
344         CUDA_LIBRARY_FIND_V2(cuMemcpyAtoHAsync);
345         CUDA_LIBRARY_FIND_V2(cuMemcpy2D);
346         CUDA_LIBRARY_FIND_V2(cuMemcpy2DUnaligned);
347         CUDA_LIBRARY_FIND_V2(cuMemcpy3D);
348         CUDA_LIBRARY_FIND_V2(cuMemcpyHtoDAsync);
349         CUDA_LIBRARY_FIND_V2(cuMemcpyDtoHAsync);
350         CUDA_LIBRARY_FIND_V2(cuMemcpyDtoDAsync);
351         CUDA_LIBRARY_FIND_V2(cuMemcpy2DAsync);
352         CUDA_LIBRARY_FIND_V2(cuMemcpy3DAsync);
353         CUDA_LIBRARY_FIND_V2(cuMemsetD8);
354         CUDA_LIBRARY_FIND_V2(cuMemsetD16);
355         CUDA_LIBRARY_FIND_V2(cuMemsetD32);
356         CUDA_LIBRARY_FIND_V2(cuMemsetD2D8);
357         CUDA_LIBRARY_FIND_V2(cuMemsetD2D16);
358         CUDA_LIBRARY_FIND_V2(cuMemsetD2D32);
359         CUDA_LIBRARY_FIND_V2(cuArrayCreate);
360         CUDA_LIBRARY_FIND_V2(cuArrayGetDescriptor);
361         CUDA_LIBRARY_FIND_V2(cuArray3DCreate);
362         CUDA_LIBRARY_FIND_V2(cuArray3DGetDescriptor);
363         CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress);
364         CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress2D);
365         CUDA_LIBRARY_FIND_V2(cuTexRefGetAddress);
366         CUDA_LIBRARY_FIND_V2(cuGraphicsResourceGetMappedPointer);
367         CUDA_LIBRARY_FIND_V2(cuGLCtxCreate);
368
369         /* cuda 4.0 */
370         CUDA_LIBRARY_FIND(cuCtxSetCurrent);
371
372         /* success */
373         result = true;
374
375         return result;
376 }
377
378 CCL_NAMESPACE_END
379