add BLI_strcpy_rlen, replace strcat, which was used in misleading way.
[blender.git] / intern / cycles / util / util_cuda.h
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #ifndef __UTIL_CUDA_H__
20 #define __UTIL_CUDA_H__
21
22 #include <stdlib.h>
23 #include "util_opengl.h"
24 #include "util_string.h"
25
26 CCL_NAMESPACE_BEGIN
27
28 /* CUDA is linked in dynamically at runtime, so we can start the application
29  * without requiring a CUDA installation. Code adapted from the example
30  * matrixMulDynlinkJIT in the CUDA SDK. */
31
32 bool cuLibraryInit();
33 bool cuHavePrecompiledKernels();
34 string cuCompilerPath();
35
36 CCL_NAMESPACE_END
37
38 /* defines, structs, enums */
39
40 #define CUDA_VERSION 3020
41
42 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined(__LP64__)
43 typedef unsigned long long CUdeviceptr;
44 #else
45 typedef unsigned int CUdeviceptr;
46 #endif
47
48 typedef int CUdevice;
49 typedef struct CUctx_st *CUcontext;
50 typedef struct CUmod_st *CUmodule;
51 typedef struct CUfunc_st *CUfunction;
52 typedef struct CUarray_st *CUarray;
53 typedef struct CUtexref_st *CUtexref;
54 typedef struct CUsurfref_st *CUsurfref;
55 typedef struct CUevent_st *CUevent;
56 typedef struct CUstream_st *CUstream;
57 typedef struct CUgraphicsResource_st *CUgraphicsResource;
58
59 typedef struct CUuuid_st {
60         char bytes[16];
61 } CUuuid;
62
63 typedef enum CUctx_flags_enum {
64         CU_CTX_SCHED_AUTO  = 0,
65         CU_CTX_SCHED_SPIN  = 1,
66         CU_CTX_SCHED_YIELD = 2,
67         CU_CTX_SCHED_MASK  = 0x3,
68         CU_CTX_BLOCKING_SYNC = 4,
69         CU_CTX_MAP_HOST = 8,
70         CU_CTX_LMEM_RESIZE_TO_MAX = 16,
71         CU_CTX_FLAGS_MASK  = 0x1f
72 } CUctx_flags;
73
74 typedef enum CUevent_flags_enum {
75         CU_EVENT_DEFAULT        = 0,
76         CU_EVENT_BLOCKING_SYNC  = 1,
77         CU_EVENT_DISABLE_TIMING = 2
78 } CUevent_flags;
79
80 typedef enum CUarray_format_enum {
81         CU_AD_FORMAT_UNSIGNED_INT8  = 0x01,
82         CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
83         CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
84         CU_AD_FORMAT_SIGNED_INT8    = 0x08,
85         CU_AD_FORMAT_SIGNED_INT16   = 0x09,
86         CU_AD_FORMAT_SIGNED_INT32   = 0x0a,
87         CU_AD_FORMAT_HALF           = 0x10,
88         CU_AD_FORMAT_FLOAT          = 0x20
89 } CUarray_format;
90
91 typedef enum CUaddress_mode_enum {
92         CU_TR_ADDRESS_MODE_WRAP   = 0,
93         CU_TR_ADDRESS_MODE_CLAMP  = 1,
94         CU_TR_ADDRESS_MODE_MIRROR = 2,
95         CU_TR_ADDRESS_MODE_BORDER = 3
96 } CUaddress_mode;
97
98 typedef enum CUfilter_mode_enum {
99         CU_TR_FILTER_MODE_POINT  = 0,
100         CU_TR_FILTER_MODE_LINEAR = 1
101 } CUfilter_mode;
102
103 typedef enum CUdevice_attribute_enum {
104         CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
105         CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
106         CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
107         CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
108         CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
109         CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
110         CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
111         CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
112         CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
113         CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
114         CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
115         CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
116         CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
117         CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
118         CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
119         CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
120         CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
121         CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
122         CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
123         CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
124         CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
125         CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
126         CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,
127         CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,
128         CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,
129         CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,
130         CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,
131         CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,
132         CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,
133         CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,
134         CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,
135         CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,
136         CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
137         CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,
138         CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
139         CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
140         CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35
141 } CUdevice_attribute;
142
143 typedef struct CUdevprop_st {
144         int maxThreadsPerBlock;
145         int maxThreadsDim[3];
146         int maxGridSize[3];
147         int sharedMemPerBlock;
148         int totalConstantMemory;
149         int SIMDWidth;
150         int memPitch;
151         int regsPerBlock;
152         int clockRate;
153         int textureAlign;
154 } CUdevprop;
155
156 typedef enum CUfunction_attribute_enum {
157         CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
158         CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
159         CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
160         CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
161         CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
162         CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
163         CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
164         CU_FUNC_ATTRIBUTE_MAX
165 } CUfunction_attribute;
166
167 typedef enum CUfunc_cache_enum {
168         CU_FUNC_CACHE_PREFER_NONE    = 0x00,
169         CU_FUNC_CACHE_PREFER_SHARED  = 0x01,
170         CU_FUNC_CACHE_PREFER_L1      = 0x02
171 } CUfunc_cache;
172
173 typedef enum CUmemorytype_enum {
174         CU_MEMORYTYPE_HOST   = 0x01,
175         CU_MEMORYTYPE_DEVICE = 0x02,
176         CU_MEMORYTYPE_ARRAY  = 0x03
177 } CUmemorytype;
178
179 typedef enum CUcomputemode_enum {
180         CU_COMPUTEMODE_DEFAULT    = 0,
181         CU_COMPUTEMODE_EXCLUSIVE  = 1,
182         CU_COMPUTEMODE_PROHIBITED = 2
183 } CUcomputemode;
184
185 typedef enum CUjit_option_enum
186 {
187         CU_JIT_MAX_REGISTERS = 0,
188         CU_JIT_THREADS_PER_BLOCK,
189         CU_JIT_WALL_TIME,
190         CU_JIT_INFO_LOG_BUFFER,
191         CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
192         CU_JIT_ERROR_LOG_BUFFER,
193         CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
194         CU_JIT_OPTIMIZATION_LEVEL,
195         CU_JIT_TARGET_FROM_CUCONTEXT,
196         CU_JIT_TARGET,
197         CU_JIT_FALLBACK_STRATEGY
198
199 } CUjit_option;
200
201 typedef enum CUjit_target_enum
202 {
203         CU_TARGET_COMPUTE_10 = 0,
204         CU_TARGET_COMPUTE_11,
205         CU_TARGET_COMPUTE_12,
206         CU_TARGET_COMPUTE_13,
207         CU_TARGET_COMPUTE_20,
208         CU_TARGET_COMPUTE_21,
209         CU_TARGET_COMPUTE_30,
210         CU_TARGET_COMPUTE_35
211 } CUjit_target;
212
213 typedef enum CUjit_fallback_enum
214 {
215         CU_PREFER_PTX = 0,
216         CU_PREFER_BINARY
217
218 } CUjit_fallback;
219
220 typedef enum CUgraphicsRegisterFlags_enum {
221         CU_GRAPHICS_REGISTER_FLAGS_NONE  = 0x00
222 } CUgraphicsRegisterFlags;
223
224 typedef enum CUgraphicsMapResourceFlags_enum {
225         CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE          = 0x00,
226         CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY     = 0x01,
227         CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02
228 } CUgraphicsMapResourceFlags;
229
230 typedef enum CUarray_cubemap_face_enum {
231         CU_CUBEMAP_FACE_POSITIVE_X  = 0x00,
232         CU_CUBEMAP_FACE_NEGATIVE_X  = 0x01,
233         CU_CUBEMAP_FACE_POSITIVE_Y  = 0x02,
234         CU_CUBEMAP_FACE_NEGATIVE_Y  = 0x03,
235         CU_CUBEMAP_FACE_POSITIVE_Z  = 0x04,
236         CU_CUBEMAP_FACE_NEGATIVE_Z  = 0x05
237 } CUarray_cubemap_face;
238
239 typedef enum CUlimit_enum {
240         CU_LIMIT_STACK_SIZE        = 0x00,
241         CU_LIMIT_PRINTF_FIFO_SIZE  = 0x01,
242         CU_LIMIT_MALLOC_HEAP_SIZE  = 0x02
243 } CUlimit;
244
245 typedef enum cudaError_enum {
246         CUDA_SUCCESS                              = 0,
247         CUDA_ERROR_INVALID_VALUE                  = 1,
248         CUDA_ERROR_OUT_OF_MEMORY                  = 2,
249         CUDA_ERROR_NOT_INITIALIZED                = 3,
250         CUDA_ERROR_DEINITIALIZED                  = 4,
251         CUDA_ERROR_NO_DEVICE                      = 100,
252         CUDA_ERROR_INVALID_DEVICE                 = 101,
253         CUDA_ERROR_INVALID_IMAGE                  = 200,
254         CUDA_ERROR_INVALID_CONTEXT                = 201,
255         CUDA_ERROR_CONTEXT_ALREADY_CURRENT        = 202,
256         CUDA_ERROR_MAP_FAILED                     = 205,
257         CUDA_ERROR_UNMAP_FAILED                   = 206,
258         CUDA_ERROR_ARRAY_IS_MAPPED                = 207,
259         CUDA_ERROR_ALREADY_MAPPED                 = 208,
260         CUDA_ERROR_NO_BINARY_FOR_GPU              = 209,
261         CUDA_ERROR_ALREADY_ACQUIRED               = 210,
262         CUDA_ERROR_NOT_MAPPED                     = 211,
263         CUDA_ERROR_NOT_MAPPED_AS_ARRAY            = 212,
264         CUDA_ERROR_NOT_MAPPED_AS_POINTER          = 213,
265         CUDA_ERROR_ECC_UNCORRECTABLE              = 214,
266         CUDA_ERROR_UNSUPPORTED_LIMIT              = 215,
267         CUDA_ERROR_INVALID_SOURCE                 = 300,
268         CUDA_ERROR_FILE_NOT_FOUND                 = 301,
269         CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
270         CUDA_ERROR_SHARED_OBJECT_INIT_FAILED      = 303,
271         CUDA_ERROR_OPERATING_SYSTEM               = 304,
272         CUDA_ERROR_INVALID_HANDLE                 = 400,
273         CUDA_ERROR_NOT_FOUND                      = 500,
274         CUDA_ERROR_NOT_READY                      = 600,
275         CUDA_ERROR_LAUNCH_FAILED                  = 700,
276         CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES        = 701,
277         CUDA_ERROR_LAUNCH_TIMEOUT                 = 702,
278         CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  = 703,
279         CUDA_ERROR_UNKNOWN                        = 999
280 } CUresult;
281
282 #define CU_MEMHOSTALLOC_PORTABLE        0x01
283 #define CU_MEMHOSTALLOC_DEVICEMAP       0x02
284 #define CU_MEMHOSTALLOC_WRITECOMBINED   0x04
285
286 typedef struct CUDA_MEMCPY2D_st {
287         size_t srcXInBytes;
288         size_t srcY;
289
290         CUmemorytype srcMemoryType;
291         const void *srcHost;
292         CUdeviceptr srcDevice;
293         CUarray srcArray;
294         size_t srcPitch;
295
296         size_t dstXInBytes;
297         size_t dstY;
298
299         CUmemorytype dstMemoryType;
300         void *dstHost;
301         CUdeviceptr dstDevice;
302         CUarray dstArray;
303         size_t dstPitch;
304
305         size_t WidthInBytes;
306         size_t Height;
307 } CUDA_MEMCPY2D;
308
309 typedef struct CUDA_MEMCPY3D_st {
310         size_t srcXInBytes;
311         size_t srcY;
312         size_t srcZ;
313         size_t srcLOD;
314         CUmemorytype srcMemoryType;
315         const void *srcHost;
316         CUdeviceptr srcDevice;
317         CUarray srcArray;
318         void *reserved0;
319         size_t srcPitch;
320         size_t srcHeight;
321
322         size_t dstXInBytes;
323         size_t dstY;
324         size_t dstZ;
325         size_t dstLOD;
326         CUmemorytype dstMemoryType;
327         void *dstHost;
328         CUdeviceptr dstDevice;
329         CUarray dstArray;
330         void *reserved1;
331         size_t dstPitch;
332         size_t dstHeight;
333
334         size_t WidthInBytes;
335         size_t Height;
336         size_t Depth;
337 } CUDA_MEMCPY3D;
338
339 typedef struct CUDA_ARRAY_DESCRIPTOR_st
340 {
341         size_t Width;
342         size_t Height;
343
344         CUarray_format Format;
345         unsigned int NumChannels;
346 } CUDA_ARRAY_DESCRIPTOR;
347
348 typedef struct CUDA_ARRAY3D_DESCRIPTOR_st
349 {
350         size_t Width;
351         size_t Height;
352         size_t Depth;
353
354         CUarray_format Format;
355         unsigned int NumChannels;
356         unsigned int Flags;
357 } CUDA_ARRAY3D_DESCRIPTOR;
358
359 #define CUDA_ARRAY3D_2DARRAY        0x01
360 #define CUDA_ARRAY3D_SURFACE_LDST   0x02
361 #define CU_TRSA_OVERRIDE_FORMAT 0x01
362 #define CU_TRSF_READ_AS_INTEGER         0x01
363 #define CU_TRSF_NORMALIZED_COORDINATES  0x02
364 #define CU_TRSF_SRGB  0x10
365 #define CU_PARAM_TR_DEFAULT -1
366
367 #ifdef _WIN32
368 #define CUDAAPI __stdcall
369 #else
370 #define CUDAAPI
371 #endif
372
373 /* function types */
374
375 typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
376 typedef CUresult CUDAAPI tcuDriverGetVersion(int *driverVersion);
377 typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *device, int ordinal);
378 typedef CUresult CUDAAPI tcuDeviceGetCount(int *count);
379 typedef CUresult CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev);
380 typedef CUresult CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
381 typedef CUresult CUDAAPI tcuDeviceTotalMem(size_t *bytes, CUdevice dev);
382 typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop *prop, CUdevice dev);
383 typedef CUresult CUDAAPI tcuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
384 typedef CUresult CUDAAPI tcuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev);
385 typedef CUresult CUDAAPI tcuCtxDestroy(CUcontext ctx);
386 typedef CUresult CUDAAPI tcuCtxAttach(CUcontext *pctx, unsigned int flags);
387 typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx);
388 typedef CUresult CUDAAPI tcuCtxPushCurrent(CUcontext ctx );
389 typedef CUresult CUDAAPI tcuCtxPopCurrent(CUcontext *pctx);
390 typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice *device);
391 typedef CUresult CUDAAPI tcuCtxSynchronize(void);
392 typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value);
393 typedef CUresult CUDAAPI tcuCtxGetLimit(size_t *pvalue, CUlimit limit);
394 typedef CUresult CUDAAPI tcuCtxGetCacheConfig(CUfunc_cache *pconfig);
395 typedef CUresult CUDAAPI tcuCtxSetCacheConfig(CUfunc_cache config);
396 typedef CUresult CUDAAPI tcuCtxGetApiVersion(CUcontext ctx, unsigned int *version);
397 typedef CUresult CUDAAPI tcuModuleLoad(CUmodule *module, const char *fname);
398 typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule *module, const void *image);
399 typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
400 typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule *module, const void *fatCubin);
401 typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod);
402 typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
403 typedef CUresult CUDAAPI tcuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name);
404 typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const char *name);
405 typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, const char *name);
406 typedef CUresult CUDAAPI tcuMemGetInfo(size_t *free, size_t *total);
407 typedef CUresult CUDAAPI tcuMemAlloc(CUdeviceptr *dptr, size_t bytesize);
408 typedef CUresult CUDAAPI tcuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
409 typedef CUresult CUDAAPI tcuMemFree(CUdeviceptr dptr);
410 typedef CUresult CUDAAPI tcuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, CUdeviceptr dptr);
411 typedef CUresult CUDAAPI tcuMemAllocHost(void **pp, size_t bytesize);
412 typedef CUresult CUDAAPI tcuMemFreeHost(void *p);
413 typedef CUresult CUDAAPI tcuMemHostAlloc(void **pp, size_t bytesize, unsigned int Flags);
414 typedef CUresult CUDAAPI tcuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, unsigned int Flags);
415 typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned int *pFlags, void *p);
416 typedef CUresult CUDAAPI tcuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
417 typedef CUresult CUDAAPI tcuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
418 typedef CUresult CUDAAPI tcuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
419 typedef CUresult CUDAAPI tcuMemcpyDtoA(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount);
420 typedef CUresult CUDAAPI tcuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount);
421 typedef CUresult CUDAAPI tcuMemcpyHtoA(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount);
422 typedef CUresult CUDAAPI tcuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount);
423 typedef CUresult CUDAAPI tcuMemcpyAtoA(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount);
424 typedef CUresult CUDAAPI tcuMemcpy2D(const CUDA_MEMCPY2D *pCopy);
425 typedef CUresult CUDAAPI tcuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy);
426 typedef CUresult CUDAAPI tcuMemcpy3D(const CUDA_MEMCPY3D *pCopy);
427 typedef CUresult CUDAAPI tcuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
428 typedef CUresult CUDAAPI tcuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
429 typedef CUresult CUDAAPI tcuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
430 typedef CUresult CUDAAPI tcuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount, CUstream hStream);
431 typedef CUresult CUDAAPI tcuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream);
432 typedef CUresult CUDAAPI tcuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream);
433 typedef CUresult CUDAAPI tcuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream);
434 typedef CUresult CUDAAPI tcuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N);
435 typedef CUresult CUDAAPI tcuMemsetD16(CUdeviceptr dstDevice, unsigned short us, size_t N);
436 typedef CUresult CUDAAPI tcuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N);
437 typedef CUresult CUDAAPI tcuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height);
438 typedef CUresult CUDAAPI tcuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height);
439 typedef CUresult CUDAAPI tcuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height);
440 typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
441 typedef CUresult CUDAAPI tcuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream);
442 typedef CUresult CUDAAPI tcuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream);
443 typedef CUresult CUDAAPI tcuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream);
444 typedef CUresult CUDAAPI tcuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream);
445 typedef CUresult CUDAAPI tcuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream);
446 typedef CUresult CUDAAPI tcuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTOR *pAllocateArray);
447 typedef CUresult CUDAAPI tcuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
448 typedef CUresult CUDAAPI tcuArrayDestroy(CUarray hArray);
449 typedef CUresult CUDAAPI tcuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray);
450 typedef CUresult CUDAAPI tcuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
451 typedef CUresult CUDAAPI tcuStreamCreate(CUstream *phStream, unsigned int Flags);
452 typedef CUresult CUDAAPI tcuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags);
453 typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream);
454 typedef CUresult CUDAAPI tcuStreamSynchronize(CUstream hStream);
455 typedef CUresult CUDAAPI tcuStreamDestroy(CUstream hStream);
456 typedef CUresult CUDAAPI tcuEventCreate(CUevent *phEvent, unsigned int Flags);
457 typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream);
458 typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent);
459 typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent);
460 typedef CUresult CUDAAPI tcuEventDestroy(CUevent hEvent);
461 typedef CUresult CUDAAPI tcuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
462 typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
463 typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
464 typedef CUresult CUDAAPI tcuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunction hfunc);
465 typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
466 typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned int numbytes);
467 typedef CUresult CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned int value);
468 typedef CUresult CUDAAPI tcuParamSetf(CUfunction hfunc, int offset, float value);
469 typedef CUresult CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void *ptr, unsigned int numbytes);
470 typedef CUresult CUDAAPI tcuLaunch(CUfunction f);
471 typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height);
472 typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
473 typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
474 typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags);
475 typedef CUresult CUDAAPI tcuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
476 typedef CUresult CUDAAPI tcuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch);
477 typedef CUresult CUDAAPI tcuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents);
478 typedef CUresult CUDAAPI tcuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am);
479 typedef CUresult CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm);
480 typedef CUresult CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags);
481 typedef CUresult CUDAAPI tcuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef);
482 typedef CUresult CUDAAPI tcuTexRefGetArray(CUarray *phArray, CUtexref hTexRef);
483 typedef CUresult CUDAAPI tcuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexRef, int dim);
484 typedef CUresult CUDAAPI tcuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef);
485 typedef CUresult CUDAAPI tcuTexRefGetFormat(CUarray_format *pFormat, int *pNumChannels, CUtexref hTexRef);
486 typedef CUresult CUDAAPI tcuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef);
487 typedef CUresult CUDAAPI tcuTexRefCreate(CUtexref *pTexRef);
488 typedef CUresult CUDAAPI tcuTexRefDestroy(CUtexref hTexRef);
489 typedef CUresult CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags);
490 typedef CUresult CUDAAPI tcuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef);
491 typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource);
492 typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
493 typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource);
494 typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned int flags);
495 typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
496 typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
497 typedef CUresult CUDAAPI tcuGetExportTable(const void **ppExportTable, const CUuuid *pExportTableId);
498 typedef CUresult CUDAAPI tcuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevice device );
499 typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource *pCudaResource, GLuint buffer, unsigned int Flags);
500 typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource *pCudaResource, GLuint image, GLenum target, unsigned int Flags);
501 typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx);
502
503 /* function declarations */
504
505 extern tcuInit *cuInit;
506 extern tcuDriverGetVersion *cuDriverGetVersion;
507 extern tcuDeviceGet *cuDeviceGet;
508 extern tcuDeviceGetCount *cuDeviceGetCount;
509 extern tcuDeviceGetName *cuDeviceGetName;
510 extern tcuDeviceComputeCapability *cuDeviceComputeCapability;
511 extern tcuDeviceTotalMem *cuDeviceTotalMem;
512 extern tcuDeviceGetProperties *cuDeviceGetProperties;
513 extern tcuDeviceGetAttribute *cuDeviceGetAttribute;
514 extern tcuCtxCreate *cuCtxCreate;
515 extern tcuCtxDestroy *cuCtxDestroy;
516 extern tcuCtxAttach *cuCtxAttach;
517 extern tcuCtxDetach *cuCtxDetach;
518 extern tcuCtxPushCurrent *cuCtxPushCurrent;
519 extern tcuCtxPopCurrent *cuCtxPopCurrent;
520 extern tcuCtxGetDevice *cuCtxGetDevice;
521 extern tcuCtxSynchronize *cuCtxSynchronize;
522 extern tcuModuleLoad *cuModuleLoad;
523 extern tcuModuleLoadData *cuModuleLoadData;
524 extern tcuModuleLoadDataEx *cuModuleLoadDataEx;
525 extern tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
526 extern tcuModuleUnload *cuModuleUnload;
527 extern tcuModuleGetFunction *cuModuleGetFunction;
528 extern tcuModuleGetGlobal *cuModuleGetGlobal;
529 extern tcuModuleGetTexRef *cuModuleGetTexRef;
530 extern tcuModuleGetSurfRef *cuModuleGetSurfRef;
531 extern tcuMemGetInfo *cuMemGetInfo;
532 extern tcuMemAlloc *cuMemAlloc;
533 extern tcuMemAllocPitch *cuMemAllocPitch;
534 extern tcuMemFree *cuMemFree;
535 extern tcuMemGetAddressRange *cuMemGetAddressRange;
536 extern tcuMemAllocHost *cuMemAllocHost;
537 extern tcuMemFreeHost *cuMemFreeHost;
538 extern tcuMemHostAlloc *cuMemHostAlloc;
539 extern tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer;
540 extern tcuMemHostGetFlags *cuMemHostGetFlags;
541 extern tcuMemcpyHtoD *cuMemcpyHtoD;
542 extern tcuMemcpyDtoH *cuMemcpyDtoH;
543 extern tcuMemcpyDtoD *cuMemcpyDtoD;
544 extern tcuMemcpyDtoA *cuMemcpyDtoA;
545 extern tcuMemcpyAtoD *cuMemcpyAtoD;
546 extern tcuMemcpyHtoA *cuMemcpyHtoA;
547 extern tcuMemcpyAtoH *cuMemcpyAtoH;
548 extern tcuMemcpyAtoA *cuMemcpyAtoA;
549 extern tcuMemcpy2D *cuMemcpy2D;
550 extern tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned;
551 extern tcuMemcpy3D *cuMemcpy3D;
552 extern tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync;
553 extern tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync;
554 extern tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync;
555 extern tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync;
556 extern tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync;
557 extern tcuMemcpy2DAsync *cuMemcpy2DAsync;
558 extern tcuMemcpy3DAsync *cuMemcpy3DAsync;
559 extern tcuMemsetD8 *cuMemsetD8;
560 extern tcuMemsetD16 *cuMemsetD16;
561 extern tcuMemsetD32 *cuMemsetD32;
562 extern tcuMemsetD2D8 *cuMemsetD2D8;
563 extern tcuMemsetD2D16 *cuMemsetD2D16;
564 extern tcuMemsetD2D32 *cuMemsetD2D32;
565 extern tcuFuncSetBlockShape *cuFuncSetBlockShape;
566 extern tcuFuncSetSharedSize *cuFuncSetSharedSize;
567 extern tcuFuncGetAttribute *cuFuncGetAttribute;
568 extern tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
569 extern tcuArrayCreate *cuArrayCreate;
570 extern tcuArrayGetDescriptor *cuArrayGetDescriptor;
571 extern tcuArrayDestroy *cuArrayDestroy;
572 extern tcuArray3DCreate *cuArray3DCreate;
573 extern tcuArray3DGetDescriptor *cuArray3DGetDescriptor;
574 extern tcuTexRefCreate *cuTexRefCreate;
575 extern tcuTexRefDestroy *cuTexRefDestroy;
576 extern tcuTexRefSetArray *cuTexRefSetArray;
577 extern tcuTexRefSetAddress *cuTexRefSetAddress;
578 extern tcuTexRefSetAddress2D *cuTexRefSetAddress2D;
579 extern tcuTexRefSetFormat *cuTexRefSetFormat;
580 extern tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
581 extern tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
582 extern tcuTexRefSetFlags *cuTexRefSetFlags;
583 extern tcuTexRefGetAddress *cuTexRefGetAddress;
584 extern tcuTexRefGetArray *cuTexRefGetArray;
585 extern tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
586 extern tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
587 extern tcuTexRefGetFormat *cuTexRefGetFormat;
588 extern tcuTexRefGetFlags *cuTexRefGetFlags;
589 extern tcuSurfRefSetArray *cuSurfRefSetArray;
590 extern tcuSurfRefGetArray *cuSurfRefGetArray;
591 extern tcuParamSetSize *cuParamSetSize;
592 extern tcuParamSeti *cuParamSeti;
593 extern tcuParamSetf *cuParamSetf;
594 extern tcuParamSetv *cuParamSetv;
595 extern tcuParamSetTexRef *cuParamSetTexRef;
596 extern tcuLaunch *cuLaunch;
597 extern tcuLaunchGrid *cuLaunchGrid;
598 extern tcuLaunchGridAsync *cuLaunchGridAsync;
599 extern tcuEventCreate *cuEventCreate;
600 extern tcuEventRecord *cuEventRecord;
601 extern tcuEventQuery *cuEventQuery;
602 extern tcuEventSynchronize *cuEventSynchronize;
603 extern tcuEventDestroy *cuEventDestroy;
604 extern tcuEventElapsedTime *cuEventElapsedTime;
605 extern tcuStreamCreate *cuStreamCreate;
606 extern tcuStreamQuery *cuStreamQuery;
607 extern tcuStreamSynchronize *cuStreamSynchronize;
608 extern tcuStreamDestroy *cuStreamDestroy;
609 extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
610 extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
611 extern tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer;
612 extern tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
613 extern tcuGraphicsMapResources *cuGraphicsMapResources;
614 extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
615 extern tcuGetExportTable *cuGetExportTable;
616 extern tcuCtxSetLimit *cuCtxSetLimit;
617 extern tcuCtxGetLimit *cuCtxGetLimit;
618 extern tcuGLCtxCreate *cuGLCtxCreate;
619 extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
620 extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
621 extern tcuCtxSetCurrent *cuCtxSetCurrent;
622
623 #endif /* __UTIL_CUDA_H__ */
624