Cycles: svn merge -r39870:r40266 https://svn.blender.org/svnroot/bf-blender/trunk...
[blender-staging.git] / intern / cycles / util / util_cuda.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #include <stdlib.h>
20
21 #include "util_cuda.h"
22 #include "util_debug.h"
23 #include "util_dynlib.h"
24 #include "util_path.h"
25 #include "util_string.h"
26
27 /* function defininitions */
28
29 tcuInit *cuInit;
30 tcuDriverGetVersion *cuDriverGetVersion;
31 tcuDeviceGet *cuDeviceGet;
32 tcuDeviceGetCount *cuDeviceGetCount;
33 tcuDeviceGetName *cuDeviceGetName;
34 tcuDeviceComputeCapability *cuDeviceComputeCapability;
35 tcuDeviceTotalMem *cuDeviceTotalMem;
36 tcuDeviceGetProperties *cuDeviceGetProperties;
37 tcuDeviceGetAttribute *cuDeviceGetAttribute;
38 tcuCtxCreate *cuCtxCreate;
39 tcuCtxDestroy *cuCtxDestroy;
40 tcuCtxAttach *cuCtxAttach;
41 tcuCtxDetach *cuCtxDetach;
42 tcuCtxPushCurrent *cuCtxPushCurrent;
43 tcuCtxPopCurrent *cuCtxPopCurrent;
44 tcuCtxGetDevice *cuCtxGetDevice;
45 tcuCtxSynchronize *cuCtxSynchronize;
46 tcuModuleLoad *cuModuleLoad;
47 tcuModuleLoadData *cuModuleLoadData;
48 tcuModuleLoadDataEx *cuModuleLoadDataEx;
49 tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
50 tcuModuleUnload *cuModuleUnload;
51 tcuModuleGetFunction *cuModuleGetFunction;
52 tcuModuleGetGlobal *cuModuleGetGlobal;
53 tcuModuleGetTexRef *cuModuleGetTexRef;
54 tcuModuleGetSurfRef *cuModuleGetSurfRef;
55 tcuMemGetInfo *cuMemGetInfo;
56 tcuMemAlloc *cuMemAlloc;
57 tcuMemAllocPitch *cuMemAllocPitch;
58 tcuMemFree *cuMemFree;
59 tcuMemGetAddressRange *cuMemGetAddressRange;
60 tcuMemAllocHost *cuMemAllocHost;
61 tcuMemFreeHost *cuMemFreeHost;
62 tcuMemHostAlloc *cuMemHostAlloc;
63 tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer;
64 tcuMemHostGetFlags *cuMemHostGetFlags;
65 tcuMemcpyHtoD *cuMemcpyHtoD;
66 tcuMemcpyDtoH *cuMemcpyDtoH;
67 tcuMemcpyDtoD *cuMemcpyDtoD;
68 tcuMemcpyDtoA *cuMemcpyDtoA;
69 tcuMemcpyAtoD *cuMemcpyAtoD;
70 tcuMemcpyHtoA *cuMemcpyHtoA;
71 tcuMemcpyAtoH *cuMemcpyAtoH;
72 tcuMemcpyAtoA *cuMemcpyAtoA;
73 tcuMemcpy2D *cuMemcpy2D;
74 tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned;
75 tcuMemcpy3D *cuMemcpy3D;
76 tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync;
77 tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync;
78 tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync;
79 tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync;
80 tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync;
81 tcuMemcpy2DAsync *cuMemcpy2DAsync;
82 tcuMemcpy3DAsync *cuMemcpy3DAsync;
83 tcuMemsetD8 *cuMemsetD8;
84 tcuMemsetD16 *cuMemsetD16;
85 tcuMemsetD32 *cuMemsetD32;
86 tcuMemsetD2D8 *cuMemsetD2D8;
87 tcuMemsetD2D16 *cuMemsetD2D16;
88 tcuMemsetD2D32 *cuMemsetD2D32;
89 tcuFuncSetBlockShape *cuFuncSetBlockShape;
90 tcuFuncSetSharedSize *cuFuncSetSharedSize;
91 tcuFuncGetAttribute *cuFuncGetAttribute;
92 tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
93 tcuArrayCreate *cuArrayCreate;
94 tcuArrayGetDescriptor *cuArrayGetDescriptor;
95 tcuArrayDestroy *cuArrayDestroy;
96 tcuArray3DCreate *cuArray3DCreate;
97 tcuArray3DGetDescriptor *cuArray3DGetDescriptor;
98 tcuTexRefCreate *cuTexRefCreate;
99 tcuTexRefDestroy *cuTexRefDestroy;
100 tcuTexRefSetArray *cuTexRefSetArray;
101 tcuTexRefSetAddress *cuTexRefSetAddress;
102 tcuTexRefSetAddress2D *cuTexRefSetAddress2D;
103 tcuTexRefSetFormat *cuTexRefSetFormat;
104 tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
105 tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
106 tcuTexRefSetFlags *cuTexRefSetFlags;
107 tcuTexRefGetAddress *cuTexRefGetAddress;
108 tcuTexRefGetArray *cuTexRefGetArray;
109 tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
110 tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
111 tcuTexRefGetFormat *cuTexRefGetFormat;
112 tcuTexRefGetFlags *cuTexRefGetFlags;
113 tcuSurfRefSetArray *cuSurfRefSetArray;
114 tcuSurfRefGetArray *cuSurfRefGetArray;
115 tcuParamSetSize *cuParamSetSize;
116 tcuParamSeti *cuParamSeti;
117 tcuParamSetf *cuParamSetf;
118 tcuParamSetv *cuParamSetv;
119 tcuParamSetTexRef *cuParamSetTexRef;
120 tcuLaunch *cuLaunch;
121 tcuLaunchGrid *cuLaunchGrid;
122 tcuLaunchGridAsync *cuLaunchGridAsync;
123 tcuEventCreate *cuEventCreate;
124 tcuEventRecord *cuEventRecord;
125 tcuEventQuery *cuEventQuery;
126 tcuEventSynchronize *cuEventSynchronize;
127 tcuEventDestroy *cuEventDestroy;
128 tcuEventElapsedTime *cuEventElapsedTime;
129 tcuStreamCreate *cuStreamCreate;
130 tcuStreamQuery *cuStreamQuery;
131 tcuStreamSynchronize *cuStreamSynchronize;
132 tcuStreamDestroy *cuStreamDestroy;
133 tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
134 tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
135 tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer;
136 tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
137 tcuGraphicsMapResources *cuGraphicsMapResources;
138 tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
139 tcuGetExportTable *cuGetExportTable;
140 tcuCtxSetLimit *cuCtxSetLimit;
141 tcuCtxGetLimit *cuCtxGetLimit;
142 tcuGLCtxCreate *cuGLCtxCreate;
143 tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
144 tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
145 tcuCtxSetCurrent *cuCtxSetCurrent;
146
147 CCL_NAMESPACE_BEGIN
148
149 /* utility macros */
150
151 #define CUDA_LIBRARY_FIND(name) \
152         name = (t##name*)dynamic_library_find(lib, #name); \
153         assert(name);
154
155 #define CUDA_LIBRARY_FIND_V2(name) \
156         name = (t##name*)dynamic_library_find(lib, #name "_v2"); \
157         assert(name);
158
159 /* initialization function */
160
161 bool cuLibraryInit()
162 {
163         static bool initialized = false;
164         static bool result = false;
165
166         if(initialized)
167                 return result;
168         
169         initialized = true;
170
171         /* library paths */
172 #ifdef _WIN32
173         /* expected in c:/windows/system or similar, no path needed */
174         const char *path = "nvcuda.dll";
175 #elif defined(__APPLE__)
176         /* default installation path */
177         const char *path = "/usr/local/cuda/lib/libcuda.dylib";
178 #else
179         const char *path = "libcuda.so";
180 #endif
181
182         /* load library */
183         DynamicLibrary *lib = dynamic_library_open(path);
184
185         if(lib == NULL)
186                 return false;
187
188         /* detect driver version */
189         int driver_version = 1000;
190
191         CUDA_LIBRARY_FIND(cuDriverGetVersion);
192         if(cuDriverGetVersion)
193                 cuDriverGetVersion(&driver_version);
194
195         /* we require version 4.0 */
196         if(driver_version < 4000)
197                 return false;
198
199         /* fetch all function pointers */
200         CUDA_LIBRARY_FIND(cuInit);
201         CUDA_LIBRARY_FIND(cuDeviceGet);
202         CUDA_LIBRARY_FIND(cuDeviceGetCount);
203         CUDA_LIBRARY_FIND(cuDeviceGetName);
204         CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
205         CUDA_LIBRARY_FIND(cuDeviceTotalMem);
206         CUDA_LIBRARY_FIND(cuDeviceGetProperties);
207         CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
208         CUDA_LIBRARY_FIND(cuCtxCreate);
209         CUDA_LIBRARY_FIND(cuCtxDestroy);
210         CUDA_LIBRARY_FIND(cuCtxAttach);
211         CUDA_LIBRARY_FIND(cuCtxDetach);
212         CUDA_LIBRARY_FIND(cuCtxPushCurrent);
213         CUDA_LIBRARY_FIND(cuCtxPopCurrent);
214         CUDA_LIBRARY_FIND(cuCtxGetDevice);
215         CUDA_LIBRARY_FIND(cuCtxSynchronize);
216         CUDA_LIBRARY_FIND(cuModuleLoad);
217         CUDA_LIBRARY_FIND(cuModuleLoadData);
218         CUDA_LIBRARY_FIND(cuModuleUnload);
219         CUDA_LIBRARY_FIND(cuModuleGetFunction);
220         CUDA_LIBRARY_FIND(cuModuleGetGlobal);
221         CUDA_LIBRARY_FIND(cuModuleGetTexRef);
222         CUDA_LIBRARY_FIND(cuMemGetInfo);
223         CUDA_LIBRARY_FIND(cuMemAlloc);
224         CUDA_LIBRARY_FIND(cuMemAllocPitch);
225         CUDA_LIBRARY_FIND(cuMemFree);
226         CUDA_LIBRARY_FIND(cuMemGetAddressRange);
227         CUDA_LIBRARY_FIND(cuMemAllocHost);
228         CUDA_LIBRARY_FIND(cuMemFreeHost);
229         CUDA_LIBRARY_FIND(cuMemHostAlloc);
230         CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer);
231         CUDA_LIBRARY_FIND(cuMemcpyHtoD);
232         CUDA_LIBRARY_FIND(cuMemcpyDtoH);
233         CUDA_LIBRARY_FIND(cuMemcpyDtoD);
234         CUDA_LIBRARY_FIND(cuMemcpyDtoA);
235         CUDA_LIBRARY_FIND(cuMemcpyAtoD);
236         CUDA_LIBRARY_FIND(cuMemcpyHtoA);
237         CUDA_LIBRARY_FIND(cuMemcpyAtoH);
238         CUDA_LIBRARY_FIND(cuMemcpyAtoA);
239         CUDA_LIBRARY_FIND(cuMemcpy2D);
240         CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned);
241         CUDA_LIBRARY_FIND(cuMemcpy3D);
242         CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync);
243         CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync);
244         CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync);
245         CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync);
246         CUDA_LIBRARY_FIND(cuMemcpy2DAsync);
247         CUDA_LIBRARY_FIND(cuMemcpy3DAsync);
248         CUDA_LIBRARY_FIND(cuMemsetD8);
249         CUDA_LIBRARY_FIND(cuMemsetD16);
250         CUDA_LIBRARY_FIND(cuMemsetD32);
251         CUDA_LIBRARY_FIND(cuMemsetD2D8);
252         CUDA_LIBRARY_FIND(cuMemsetD2D16);
253         CUDA_LIBRARY_FIND(cuMemsetD2D32);
254         CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
255         CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
256         CUDA_LIBRARY_FIND(cuFuncGetAttribute);
257         CUDA_LIBRARY_FIND(cuArrayCreate);
258         CUDA_LIBRARY_FIND(cuArrayGetDescriptor);
259         CUDA_LIBRARY_FIND(cuArrayDestroy);
260         CUDA_LIBRARY_FIND(cuArray3DCreate);
261         CUDA_LIBRARY_FIND(cuArray3DGetDescriptor);
262         CUDA_LIBRARY_FIND(cuTexRefCreate);
263         CUDA_LIBRARY_FIND(cuTexRefDestroy);
264         CUDA_LIBRARY_FIND(cuTexRefSetArray);
265         CUDA_LIBRARY_FIND(cuTexRefSetAddress);
266         CUDA_LIBRARY_FIND(cuTexRefSetAddress2D);
267         CUDA_LIBRARY_FIND(cuTexRefSetFormat);
268         CUDA_LIBRARY_FIND(cuTexRefSetAddressMode);
269         CUDA_LIBRARY_FIND(cuTexRefSetFilterMode);
270         CUDA_LIBRARY_FIND(cuTexRefSetFlags);
271         CUDA_LIBRARY_FIND(cuTexRefGetAddress);
272         CUDA_LIBRARY_FIND(cuTexRefGetArray);
273         CUDA_LIBRARY_FIND(cuTexRefGetAddressMode);
274         CUDA_LIBRARY_FIND(cuTexRefGetFilterMode);
275         CUDA_LIBRARY_FIND(cuTexRefGetFormat);
276         CUDA_LIBRARY_FIND(cuTexRefGetFlags);
277         CUDA_LIBRARY_FIND(cuParamSetSize);
278         CUDA_LIBRARY_FIND(cuParamSeti);
279         CUDA_LIBRARY_FIND(cuParamSetf);
280         CUDA_LIBRARY_FIND(cuParamSetv);
281         CUDA_LIBRARY_FIND(cuParamSetTexRef);
282         CUDA_LIBRARY_FIND(cuLaunch);
283         CUDA_LIBRARY_FIND(cuLaunchGrid);
284         CUDA_LIBRARY_FIND(cuLaunchGridAsync);
285         CUDA_LIBRARY_FIND(cuEventCreate);
286         CUDA_LIBRARY_FIND(cuEventRecord);
287         CUDA_LIBRARY_FIND(cuEventQuery);
288         CUDA_LIBRARY_FIND(cuEventSynchronize);
289         CUDA_LIBRARY_FIND(cuEventDestroy);
290         CUDA_LIBRARY_FIND(cuEventElapsedTime);
291         CUDA_LIBRARY_FIND(cuStreamCreate);
292         CUDA_LIBRARY_FIND(cuStreamQuery);
293         CUDA_LIBRARY_FIND(cuStreamSynchronize);
294         CUDA_LIBRARY_FIND(cuStreamDestroy);
295
296         /* cuda 2.1 */
297         CUDA_LIBRARY_FIND(cuModuleLoadDataEx);
298         CUDA_LIBRARY_FIND(cuModuleLoadFatBinary);
299         CUDA_LIBRARY_FIND(cuGLCtxCreate);
300         CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
301         CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
302
303         /* cuda 2.3 */
304         CUDA_LIBRARY_FIND(cuMemHostGetFlags);
305         CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
306         CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
307
308         /* cuda 3.0 */
309         CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync);
310         CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
311         CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
312         CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
313         CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer);
314         CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags);
315         CUDA_LIBRARY_FIND(cuGraphicsMapResources);
316         CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
317         CUDA_LIBRARY_FIND(cuGetExportTable);
318
319         /* cuda 3.1 */
320         CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
321         CUDA_LIBRARY_FIND(cuSurfRefSetArray);
322         CUDA_LIBRARY_FIND(cuSurfRefGetArray);
323         CUDA_LIBRARY_FIND(cuCtxSetLimit);
324         CUDA_LIBRARY_FIND(cuCtxGetLimit);
325
326         /* functions which changed 3.1 -> 3.2 for 64 bit stuff, the cuda library
327            has both the old ones for compatibility and new ones with _v2 postfix,
328            we load the _v2 ones here. */
329         CUDA_LIBRARY_FIND_V2(cuDeviceTotalMem);
330         CUDA_LIBRARY_FIND_V2(cuCtxCreate);
331         CUDA_LIBRARY_FIND_V2(cuModuleGetGlobal);
332         CUDA_LIBRARY_FIND_V2(cuMemGetInfo);
333         CUDA_LIBRARY_FIND_V2(cuMemAlloc);
334         CUDA_LIBRARY_FIND_V2(cuMemAllocPitch);
335         CUDA_LIBRARY_FIND_V2(cuMemFree);
336         CUDA_LIBRARY_FIND_V2(cuMemGetAddressRange);
337         CUDA_LIBRARY_FIND_V2(cuMemAllocHost);
338         CUDA_LIBRARY_FIND_V2(cuMemHostGetDevicePointer);
339         CUDA_LIBRARY_FIND_V2(cuMemcpyHtoD);
340         CUDA_LIBRARY_FIND_V2(cuMemcpyDtoH);
341         CUDA_LIBRARY_FIND_V2(cuMemcpyDtoD);
342         CUDA_LIBRARY_FIND_V2(cuMemcpyDtoA);
343         CUDA_LIBRARY_FIND_V2(cuMemcpyAtoD);
344         CUDA_LIBRARY_FIND_V2(cuMemcpyHtoA);
345         CUDA_LIBRARY_FIND_V2(cuMemcpyAtoH);
346         CUDA_LIBRARY_FIND_V2(cuMemcpyAtoA);
347         CUDA_LIBRARY_FIND_V2(cuMemcpyHtoAAsync);
348         CUDA_LIBRARY_FIND_V2(cuMemcpyAtoHAsync);
349         CUDA_LIBRARY_FIND_V2(cuMemcpy2D);
350         CUDA_LIBRARY_FIND_V2(cuMemcpy2DUnaligned);
351         CUDA_LIBRARY_FIND_V2(cuMemcpy3D);
352         CUDA_LIBRARY_FIND_V2(cuMemcpyHtoDAsync);
353         CUDA_LIBRARY_FIND_V2(cuMemcpyDtoHAsync);
354         CUDA_LIBRARY_FIND_V2(cuMemcpyDtoDAsync);
355         CUDA_LIBRARY_FIND_V2(cuMemcpy2DAsync);
356         CUDA_LIBRARY_FIND_V2(cuMemcpy3DAsync);
357         CUDA_LIBRARY_FIND_V2(cuMemsetD8);
358         CUDA_LIBRARY_FIND_V2(cuMemsetD16);
359         CUDA_LIBRARY_FIND_V2(cuMemsetD32);
360         CUDA_LIBRARY_FIND_V2(cuMemsetD2D8);
361         CUDA_LIBRARY_FIND_V2(cuMemsetD2D16);
362         CUDA_LIBRARY_FIND_V2(cuMemsetD2D32);
363         CUDA_LIBRARY_FIND_V2(cuArrayCreate);
364         CUDA_LIBRARY_FIND_V2(cuArrayGetDescriptor);
365         CUDA_LIBRARY_FIND_V2(cuArray3DCreate);
366         CUDA_LIBRARY_FIND_V2(cuArray3DGetDescriptor);
367         CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress);
368         CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress2D);
369         CUDA_LIBRARY_FIND_V2(cuTexRefGetAddress);
370         CUDA_LIBRARY_FIND_V2(cuGraphicsResourceGetMappedPointer);
371         CUDA_LIBRARY_FIND_V2(cuGLCtxCreate);
372
373         /* cuda 4.0 */
374         CUDA_LIBRARY_FIND(cuCtxSetCurrent);
375
376 #ifndef WITH_CUDA_BINARIES
377         if(cuCompilerPath() == "")
378                 return false;
379 #endif
380
381         /* success */
382         result = true;
383
384         return result;
385 }
386
387 string cuCompilerPath()
388 {
389 #ifdef _WIN32
390         const char *defaultpath = "C:/CUDA/bin";
391         const char *executable = "nvcc.exe";
392 #else
393         const char *defaultpath = "/usr/local/cuda/bin";
394         const char *executable = "nvcc";
395 #endif
396
397         const char *binpath = getenv("CUDA_BIN_PATH");
398
399         string nvcc;
400
401         if(binpath)
402                 nvcc = path_join(binpath, executable);
403         else
404                 nvcc = path_join(defaultpath, executable);
405
406         return (path_exists(nvcc))? nvcc: "";
407 }
408
409 CCL_NAMESPACE_END
410