CUEW: Update to latest version
authorSergey Sharybin <sergey.vfx@gmail.com>
Wed, 25 Nov 2015 18:27:40 +0000 (23:27 +0500)
committerSergey Sharybin <sergey.vfx@gmail.com>
Wed, 25 Nov 2015 18:30:46 +0000 (23:30 +0500)
It is now updated against CUDA Toolkit 7.5. Currently should be no functional
changes, just begin some ground work for the future.

extern/cuew/auto/cuda_extra.py
extern/cuew/auto/cuew_gen.py
extern/cuew/include/cuew.h
extern/cuew/src/cuew.c

index fd4f466df8309310493976b1e846610a01e8d53d..5fd2c17933972e39c5e62adb310094d361b1b676 100644 (file)
@@ -101,7 +101,7 @@ int cuewCompilerVersion(void) {
 
   while (!feof(pipe)) {
     if (fgets(buf, sizeof(buf), pipe) != NULL) {
-      strncat(output, buf, sizeof(output) - strlen(output));
+      strncat(output, buf, sizeof(output) - strlen(output) - 1);
     }
   }
 
index a94525c52b12b3cf7af9011910dc03970989678d..75e5bf876f486fc85b7bbd91609c1e358b31b007 100644 (file)
@@ -276,7 +276,11 @@ def parse_files():
                 if line[0].isspace() and line.lstrip().startswith("#define"):
                     line = line[12:-1]
                     token = line.split()
-                    if len(token) == 2 and token[1].endswith("_v2"):
+                    if len(token) == 2 and (token[1].endswith("_v2") or
+                                            token[1].endswith("_v2)")):
+                        if token[1].startswith('__CUDA_API_PTDS') or \
+                           token[1].startswith('__CUDA_API_PTSZ'):
+                            token[1] = token[1][16:-1]
                         DEFINES_V2.append(token)
 
         v = FuncDefVisitor()
@@ -560,7 +564,8 @@ def print_implementation():
         if error in CUDA_ERRORS:
             str = CUDA_ERRORS[error]
         else:
-            str = error[11:]
+            temp = error[11:].replace('_', ' ')
+            str = temp[0] + temp[1:].lower()
         print("    case %s: return \"%s\";" % (error, str))
 
     print("    default: return \"Unknown CUDA error value\";")
index fd03311ad41cefb53c7fac7f4e0765b7e27227a1..1b12e5b44630fb353d610b459e0230c61a6399d5 100644 (file)
@@ -27,13 +27,16 @@ extern "C" {
 #define CUEW_VERSION_MAJOR 1
 #define CUEW_VERSION_MINOR 2
 
-#define CUDA_VERSION 6000
+#define CUDA_VERSION 7050
 #define CU_IPC_HANDLE_SIZE 64
+#define CU_STREAM_LEGACY ((CUstream)0x1)
+#define CU_STREAM_PER_THREAD ((CUstream)0x2)
 #define CU_MEMHOSTALLOC_PORTABLE 0x01
 #define CU_MEMHOSTALLOC_DEVICEMAP 0x02
 #define CU_MEMHOSTALLOC_WRITECOMBINED 0x04
 #define CU_MEMHOSTREGISTER_PORTABLE 0x01
 #define CU_MEMHOSTREGISTER_DEVICEMAP 0x02
+#define CU_MEMHOSTREGISTER_IOMEMORY 0x04
 #define CUDA_ARRAY3D_LAYERED 0x01
 #define CUDA_ARRAY3D_2DARRAY 0x01
 #define CUDA_ARRAY3D_SURFACE_LDST 0x02
@@ -100,10 +103,16 @@ extern "C" {
 #define cuCtxPushCurrent cuCtxPushCurrent_v2
 #define cuStreamDestroy cuStreamDestroy_v2
 #define cuEventDestroy cuEventDestroy_v2
+#define cuLinkCreate cuLinkCreate_v2
+#define cuLinkAddData cuLinkAddData_v2
+#define cuLinkAddFile cuLinkAddFile_v2
+#define cuMemHostRegister cuMemHostRegister_v2
+#define cuGraphicsResourceSetMapFlags cuGraphicsResourceSetMapFlags_v2
 #define cuTexRefSetAddress2D cuTexRefSetAddress2D_v2
 #define cuGLCtxCreate cuGLCtxCreate_v2
 #define cuGLMapBufferObject cuGLMapBufferObject_v2
 #define cuGLMapBufferObjectAsync cuGLMapBufferObjectAsync_v2
+#define cuGLGetDevices cuGLGetDevices_v2
 
 /* Types. */
 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
@@ -172,6 +181,11 @@ typedef enum CUevent_flags_enum {
   CU_EVENT_INTERPROCESS = 0x4,
 } CUevent_flags;
 
+typedef enum CUoccupancy_flags_enum {
+  CU_OCCUPANCY_DEFAULT = 0x0,
+  CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1,
+} CUoccupancy_flags;
+
 typedef enum CUarray_format_enum {
   CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
   CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
@@ -381,7 +395,9 @@ typedef enum CUjit_target_enum {
   CU_TARGET_COMPUTE_30 = 30,
   CU_TARGET_COMPUTE_32 = 32,
   CU_TARGET_COMPUTE_35 = 35,
+  CU_TARGET_COMPUTE_37 = 37,
   CU_TARGET_COMPUTE_50 = 50,
+  CU_TARGET_COMPUTE_52 = 52,
 } CUjit_target;
 
 typedef enum CUjit_fallback_enum {
@@ -474,6 +490,7 @@ typedef enum cudaError_enum {
   CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216,
   CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217,
   CUDA_ERROR_INVALID_PTX = 218,
+  CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219,
   CUDA_ERROR_INVALID_SOURCE = 300,
   CUDA_ERROR_FILE_NOT_FOUND = 301,
   CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
@@ -506,6 +523,7 @@ typedef enum cudaError_enum {
 } CUresult;
 
 typedef void* CUstreamCallback;
+typedef size_t* CUoccupancyB2DSize;
 
 typedef struct CUDA_MEMCPY2D_st {
   size_t srcXInBytes;
@@ -730,6 +748,11 @@ typedef CUresult CUDAAPI tcuDeviceTotalMem_v2(size_t* bytes, CUdevice dev);
 typedef CUresult CUDAAPI tcuDeviceGetAttribute(int* pi, CUdevice_attribute attrib, CUdevice dev);
 typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop* prop, CUdevice dev);
 typedef CUresult CUDAAPI tcuDeviceComputeCapability(int* major, int* minor, CUdevice dev);
+typedef CUresult CUDAAPI tcuDevicePrimaryCtxRetain(CUcontext* pctx, CUdevice dev);
+typedef CUresult CUDAAPI tcuDevicePrimaryCtxRelease(CUdevice dev);
+typedef CUresult CUDAAPI tcuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned flags);
+typedef CUresult CUDAAPI tcuDevicePrimaryCtxGetState(CUdevice dev, unsigned* flags, int* active);
+typedef CUresult CUDAAPI tcuDevicePrimaryCtxReset(CUdevice dev);
 typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext* pctx, unsigned flags, CUdevice dev);
 typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx);
 typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext ctx);
@@ -737,6 +760,7 @@ typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext* pctx);
 typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx);
 typedef CUresult CUDAAPI tcuCtxGetCurrent(CUcontext* pctx);
 typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice* device);
+typedef CUresult CUDAAPI tcuCtxGetFlags(unsigned* flags);
 typedef CUresult CUDAAPI tcuCtxSynchronize(void);
 typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value);
 typedef CUresult CUDAAPI tcuCtxGetLimit(size_t* pvalue, CUlimit limit);
@@ -757,9 +781,9 @@ typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction* hfunc, CUmodule hmod,
 typedef CUresult CUDAAPI tcuModuleGetGlobal_v2(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name);
 typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name);
 typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name);
-typedef CUresult CUDAAPI tcuLinkCreate(unsigned numOptions, CUjit_option* options, void* optionValues, CUlinkState* stateOut);
-typedef CUresult CUDAAPI tcuLinkAddData(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned numOptions, CUjit_option* options, void* optionValues);
-typedef CUresult CUDAAPI tcuLinkAddFile(CUlinkState state, CUjitInputType type, const char* path, unsigned numOptions, CUjit_option* options, void* optionValues);
+typedef CUresult CUDAAPI tcuLinkCreate_v2(unsigned numOptions, CUjit_option* options, void* optionValues, CUlinkState* stateOut);
+typedef CUresult CUDAAPI tcuLinkAddData_v2(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned numOptions, CUjit_option* options, void* optionValues);
+typedef CUresult CUDAAPI tcuLinkAddFile_v2(CUlinkState state, CUjitInputType type, const char* path, unsigned numOptions, CUjit_option* options, void* optionValues);
 typedef CUresult CUDAAPI tcuLinkComplete(CUlinkState state, void* cubinOut, size_t* sizeOut);
 typedef CUresult CUDAAPI tcuLinkDestroy(CUlinkState state);
 typedef CUresult CUDAAPI tcuMemGetInfo_v2(size_t* free, size_t* total);
@@ -780,7 +804,7 @@ typedef CUresult CUDAAPI tcuIpcOpenEventHandle(CUevent* phEvent, CUipcEventHandl
 typedef CUresult CUDAAPI tcuIpcGetMemHandle(CUipcMemHandle* pHandle, CUdeviceptr dptr);
 typedef CUresult CUDAAPI tcuIpcOpenMemHandle(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned Flags);
 typedef CUresult CUDAAPI tcuIpcCloseMemHandle(CUdeviceptr dptr);
-typedef CUresult CUDAAPI tcuMemHostRegister(void* p, size_t bytesize, unsigned Flags);
+typedef CUresult CUDAAPI tcuMemHostRegister_v2(void* p, size_t bytesize, unsigned Flags);
 typedef CUresult CUDAAPI tcuMemHostUnregister(void* p);
 typedef CUresult CUDAAPI tcuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount);
 typedef CUresult CUDAAPI tcuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount);
@@ -828,6 +852,7 @@ typedef CUresult CUDAAPI tcuMipmappedArrayGetLevel(CUarray* pLevelArray, CUmipma
 typedef CUresult CUDAAPI tcuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray);
 typedef CUresult CUDAAPI tcuPointerGetAttribute(void* data, CUpointer_attribute attribute, CUdeviceptr ptr);
 typedef CUresult CUDAAPI tcuPointerSetAttribute(const void* value, CUpointer_attribute attribute, CUdeviceptr ptr);
+typedef CUresult CUDAAPI tcuPointerGetAttributes(unsigned numAttributes, CUpointer_attribute* attributes, void* data, CUdeviceptr ptr);
 typedef CUresult CUDAAPI tcuStreamCreate(CUstream* phStream, unsigned Flags);
 typedef CUresult CUDAAPI tcuStreamCreateWithPriority(CUstream* phStream, unsigned flags, int priority);
 typedef CUresult CUDAAPI tcuStreamGetPriority(CUstream hStream, int* priority);
@@ -858,6 +883,10 @@ typedef CUresult CUDAAPI tcuLaunch(CUfunction f);
 typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height);
 typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
 typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize);
+typedef CUresult CUDAAPI tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, unsigned flags);
+typedef CUresult CUDAAPI tcuOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit);
+typedef CUresult CUDAAPI tcuOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit, unsigned flags);
 typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned Flags);
 typedef CUresult CUDAAPI tcuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned Flags);
 typedef CUresult CUDAAPI tcuTexRefSetAddress_v2(size_t* ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
@@ -900,14 +929,14 @@ typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resour
 typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned arrayIndex, unsigned mipLevel);
 typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray* pMipmappedArray, CUgraphicsResource resource);
 typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer_v2(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource);
-typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned flags);
+typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags_v2(CUgraphicsResource resource, unsigned flags);
 typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned count, CUgraphicsResource* resources, CUstream hStream);
 typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned count, CUgraphicsResource* resources, CUstream hStream);
 typedef CUresult CUDAAPI tcuGetExportTable(const void* ppExportTable, const CUuuid* pExportTableId);
 
 typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource* pCudaResource, GLuint buffer, unsigned Flags);
 typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned Flags);
-typedef CUresult CUDAAPI tcuGLGetDevices(unsigned* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned cudaDeviceCount, CUGLDeviceList deviceList);
+typedef CUresult CUDAAPI tcuGLGetDevices_v2(unsigned* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned cudaDeviceCount, CUGLDeviceList deviceList);
 typedef CUresult CUDAAPI tcuGLCtxCreate_v2(CUcontext* pCtx, unsigned Flags, CUdevice device);
 typedef CUresult CUDAAPI tcuGLInit(void);
 typedef CUresult CUDAAPI tcuGLRegisterBufferObject(GLuint buffer);
@@ -931,6 +960,11 @@ extern tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2;
 extern tcuDeviceGetAttribute *cuDeviceGetAttribute;
 extern tcuDeviceGetProperties *cuDeviceGetProperties;
 extern tcuDeviceComputeCapability *cuDeviceComputeCapability;
+extern tcuDevicePrimaryCtxRetain *cuDevicePrimaryCtxRetain;
+extern tcuDevicePrimaryCtxRelease *cuDevicePrimaryCtxRelease;
+extern tcuDevicePrimaryCtxSetFlags *cuDevicePrimaryCtxSetFlags;
+extern tcuDevicePrimaryCtxGetState *cuDevicePrimaryCtxGetState;
+extern tcuDevicePrimaryCtxReset *cuDevicePrimaryCtxReset;
 extern tcuCtxCreate_v2 *cuCtxCreate_v2;
 extern tcuCtxDestroy_v2 *cuCtxDestroy_v2;
 extern tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2;
@@ -938,6 +972,7 @@ extern tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2;
 extern tcuCtxSetCurrent *cuCtxSetCurrent;
 extern tcuCtxGetCurrent *cuCtxGetCurrent;
 extern tcuCtxGetDevice *cuCtxGetDevice;
+extern tcuCtxGetFlags *cuCtxGetFlags;
 extern tcuCtxSynchronize *cuCtxSynchronize;
 extern tcuCtxSetLimit *cuCtxSetLimit;
 extern tcuCtxGetLimit *cuCtxGetLimit;
@@ -958,9 +993,9 @@ extern tcuModuleGetFunction *cuModuleGetFunction;
 extern tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2;
 extern tcuModuleGetTexRef *cuModuleGetTexRef;
 extern tcuModuleGetSurfRef *cuModuleGetSurfRef;
-extern tcuLinkCreate *cuLinkCreate;
-extern tcuLinkAddData *cuLinkAddData;
-extern tcuLinkAddFile *cuLinkAddFile;
+extern tcuLinkCreate_v2 *cuLinkCreate_v2;
+extern tcuLinkAddData_v2 *cuLinkAddData_v2;
+extern tcuLinkAddFile_v2 *cuLinkAddFile_v2;
 extern tcuLinkComplete *cuLinkComplete;
 extern tcuLinkDestroy *cuLinkDestroy;
 extern tcuMemGetInfo_v2 *cuMemGetInfo_v2;
@@ -981,7 +1016,7 @@ extern tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
 extern tcuIpcGetMemHandle *cuIpcGetMemHandle;
 extern tcuIpcOpenMemHandle *cuIpcOpenMemHandle;
 extern tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
-extern tcuMemHostRegister *cuMemHostRegister;
+extern tcuMemHostRegister_v2 *cuMemHostRegister_v2;
 extern tcuMemHostUnregister *cuMemHostUnregister;
 extern tcuMemcpy *cuMemcpy;
 extern tcuMemcpyPeer *cuMemcpyPeer;
@@ -1029,6 +1064,7 @@ extern tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
 extern tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
 extern tcuPointerGetAttribute *cuPointerGetAttribute;
 extern tcuPointerSetAttribute *cuPointerSetAttribute;
+extern tcuPointerGetAttributes *cuPointerGetAttributes;
 extern tcuStreamCreate *cuStreamCreate;
 extern tcuStreamCreateWithPriority *cuStreamCreateWithPriority;
 extern tcuStreamGetPriority *cuStreamGetPriority;
@@ -1059,6 +1095,10 @@ extern tcuLaunch *cuLaunch;
 extern tcuLaunchGrid *cuLaunchGrid;
 extern tcuLaunchGridAsync *cuLaunchGridAsync;
 extern tcuParamSetTexRef *cuParamSetTexRef;
+extern tcuOccupancyMaxActiveBlocksPerMultiprocessor *cuOccupancyMaxActiveBlocksPerMultiprocessor;
+extern tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
+extern tcuOccupancyMaxPotentialBlockSize *cuOccupancyMaxPotentialBlockSize;
+extern tcuOccupancyMaxPotentialBlockSizeWithFlags *cuOccupancyMaxPotentialBlockSizeWithFlags;
 extern tcuTexRefSetArray *cuTexRefSetArray;
 extern tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray;
 extern tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2;
@@ -1101,14 +1141,14 @@ extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
 extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
 extern tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
 extern tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2;
-extern tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
+extern tcuGraphicsResourceSetMapFlags_v2 *cuGraphicsResourceSetMapFlags_v2;
 extern tcuGraphicsMapResources *cuGraphicsMapResources;
 extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
 extern tcuGetExportTable *cuGetExportTable;
 
 extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
 extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
-extern tcuGLGetDevices *cuGLGetDevices;
+extern tcuGLGetDevices_v2 *cuGLGetDevices_v2;
 extern tcuGLCtxCreate_v2 *cuGLCtxCreate_v2;
 extern tcuGLInit *cuGLInit;
 extern tcuGLRegisterBufferObject *cuGLRegisterBufferObject;
index da892efc0f47c2788d8abfc5497fc69a526a148d..3058e29d89f60804e04dbafaea12e59e456e802b 100644 (file)
@@ -36,7 +36,7 @@
 
 typedef HMODULE DynamicLibrary;
 
-#  define dynamic_library_open(path)         LoadLibrary(path)
+#  define dynamic_library_open(path)         LoadLibraryA(path)
 #  define dynamic_library_close(lib)         FreeLibrary(lib)
 #  define dynamic_library_find(lib, symbol)  GetProcAddress(lib, symbol)
 #else
@@ -70,6 +70,11 @@ tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2;
 tcuDeviceGetAttribute *cuDeviceGetAttribute;
 tcuDeviceGetProperties *cuDeviceGetProperties;
 tcuDeviceComputeCapability *cuDeviceComputeCapability;
+tcuDevicePrimaryCtxRetain *cuDevicePrimaryCtxRetain;
+tcuDevicePrimaryCtxRelease *cuDevicePrimaryCtxRelease;
+tcuDevicePrimaryCtxSetFlags *cuDevicePrimaryCtxSetFlags;
+tcuDevicePrimaryCtxGetState *cuDevicePrimaryCtxGetState;
+tcuDevicePrimaryCtxReset *cuDevicePrimaryCtxReset;
 tcuCtxCreate_v2 *cuCtxCreate_v2;
 tcuCtxDestroy_v2 *cuCtxDestroy_v2;
 tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2;
@@ -77,6 +82,7 @@ tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2;
 tcuCtxSetCurrent *cuCtxSetCurrent;
 tcuCtxGetCurrent *cuCtxGetCurrent;
 tcuCtxGetDevice *cuCtxGetDevice;
+tcuCtxGetFlags *cuCtxGetFlags;
 tcuCtxSynchronize *cuCtxSynchronize;
 tcuCtxSetLimit *cuCtxSetLimit;
 tcuCtxGetLimit *cuCtxGetLimit;
@@ -97,9 +103,9 @@ tcuModuleGetFunction *cuModuleGetFunction;
 tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2;
 tcuModuleGetTexRef *cuModuleGetTexRef;
 tcuModuleGetSurfRef *cuModuleGetSurfRef;
-tcuLinkCreate *cuLinkCreate;
-tcuLinkAddData *cuLinkAddData;
-tcuLinkAddFile *cuLinkAddFile;
+tcuLinkCreate_v2 *cuLinkCreate_v2;
+tcuLinkAddData_v2 *cuLinkAddData_v2;
+tcuLinkAddFile_v2 *cuLinkAddFile_v2;
 tcuLinkComplete *cuLinkComplete;
 tcuLinkDestroy *cuLinkDestroy;
 tcuMemGetInfo_v2 *cuMemGetInfo_v2;
@@ -120,7 +126,7 @@ tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
 tcuIpcGetMemHandle *cuIpcGetMemHandle;
 tcuIpcOpenMemHandle *cuIpcOpenMemHandle;
 tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
-tcuMemHostRegister *cuMemHostRegister;
+tcuMemHostRegister_v2 *cuMemHostRegister_v2;
 tcuMemHostUnregister *cuMemHostUnregister;
 tcuMemcpy *cuMemcpy;
 tcuMemcpyPeer *cuMemcpyPeer;
@@ -168,6 +174,7 @@ tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
 tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
 tcuPointerGetAttribute *cuPointerGetAttribute;
 tcuPointerSetAttribute *cuPointerSetAttribute;
+tcuPointerGetAttributes *cuPointerGetAttributes;
 tcuStreamCreate *cuStreamCreate;
 tcuStreamCreateWithPriority *cuStreamCreateWithPriority;
 tcuStreamGetPriority *cuStreamGetPriority;
@@ -198,6 +205,10 @@ tcuLaunch *cuLaunch;
 tcuLaunchGrid *cuLaunchGrid;
 tcuLaunchGridAsync *cuLaunchGridAsync;
 tcuParamSetTexRef *cuParamSetTexRef;
+tcuOccupancyMaxActiveBlocksPerMultiprocessor *cuOccupancyMaxActiveBlocksPerMultiprocessor;
+tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
+tcuOccupancyMaxPotentialBlockSize *cuOccupancyMaxPotentialBlockSize;
+tcuOccupancyMaxPotentialBlockSizeWithFlags *cuOccupancyMaxPotentialBlockSizeWithFlags;
 tcuTexRefSetArray *cuTexRefSetArray;
 tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray;
 tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2;
@@ -240,14 +251,14 @@ tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
 tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
 tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
 tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2;
-tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
+tcuGraphicsResourceSetMapFlags_v2 *cuGraphicsResourceSetMapFlags_v2;
 tcuGraphicsMapResources *cuGraphicsMapResources;
 tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
 tcuGetExportTable *cuGetExportTable;
 
 tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
 tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
-tcuGLGetDevices *cuGLGetDevices;
+tcuGLGetDevices_v2 *cuGLGetDevices_v2;
 tcuGLCtxCreate_v2 *cuGLCtxCreate_v2;
 tcuGLInit *cuGLInit;
 tcuGLRegisterBufferObject *cuGLRegisterBufferObject;
@@ -328,6 +339,11 @@ int cuewInit(void) {
   CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
   CUDA_LIBRARY_FIND(cuDeviceGetProperties);
   CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
+  CUDA_LIBRARY_FIND(cuDevicePrimaryCtxRetain);
+  CUDA_LIBRARY_FIND(cuDevicePrimaryCtxRelease);
+  CUDA_LIBRARY_FIND(cuDevicePrimaryCtxSetFlags);
+  CUDA_LIBRARY_FIND(cuDevicePrimaryCtxGetState);
+  CUDA_LIBRARY_FIND(cuDevicePrimaryCtxReset);
   CUDA_LIBRARY_FIND(cuCtxCreate_v2);
   CUDA_LIBRARY_FIND(cuCtxDestroy_v2);
   CUDA_LIBRARY_FIND(cuCtxPushCurrent_v2);
@@ -335,6 +351,7 @@ int cuewInit(void) {
   CUDA_LIBRARY_FIND(cuCtxSetCurrent);
   CUDA_LIBRARY_FIND(cuCtxGetCurrent);
   CUDA_LIBRARY_FIND(cuCtxGetDevice);
+  CUDA_LIBRARY_FIND(cuCtxGetFlags);
   CUDA_LIBRARY_FIND(cuCtxSynchronize);
   CUDA_LIBRARY_FIND(cuCtxSetLimit);
   CUDA_LIBRARY_FIND(cuCtxGetLimit);
@@ -355,9 +372,9 @@ int cuewInit(void) {
   CUDA_LIBRARY_FIND(cuModuleGetGlobal_v2);
   CUDA_LIBRARY_FIND(cuModuleGetTexRef);
   CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
-  CUDA_LIBRARY_FIND(cuLinkCreate);
-  CUDA_LIBRARY_FIND(cuLinkAddData);
-  CUDA_LIBRARY_FIND(cuLinkAddFile);
+  CUDA_LIBRARY_FIND(cuLinkCreate_v2);
+  CUDA_LIBRARY_FIND(cuLinkAddData_v2);
+  CUDA_LIBRARY_FIND(cuLinkAddFile_v2);
   CUDA_LIBRARY_FIND(cuLinkComplete);
   CUDA_LIBRARY_FIND(cuLinkDestroy);
   CUDA_LIBRARY_FIND(cuMemGetInfo_v2);
@@ -378,7 +395,7 @@ int cuewInit(void) {
   CUDA_LIBRARY_FIND(cuIpcGetMemHandle);
   CUDA_LIBRARY_FIND(cuIpcOpenMemHandle);
   CUDA_LIBRARY_FIND(cuIpcCloseMemHandle);
-  CUDA_LIBRARY_FIND(cuMemHostRegister);
+  CUDA_LIBRARY_FIND(cuMemHostRegister_v2);
   CUDA_LIBRARY_FIND(cuMemHostUnregister);
   CUDA_LIBRARY_FIND(cuMemcpy);
   CUDA_LIBRARY_FIND(cuMemcpyPeer);
@@ -426,6 +443,7 @@ int cuewInit(void) {
   CUDA_LIBRARY_FIND(cuMipmappedArrayDestroy);
   CUDA_LIBRARY_FIND(cuPointerGetAttribute);
   CUDA_LIBRARY_FIND(cuPointerSetAttribute);
+  CUDA_LIBRARY_FIND(cuPointerGetAttributes);
   CUDA_LIBRARY_FIND(cuStreamCreate);
   CUDA_LIBRARY_FIND(cuStreamCreateWithPriority);
   CUDA_LIBRARY_FIND(cuStreamGetPriority);
@@ -456,6 +474,10 @@ int cuewInit(void) {
   CUDA_LIBRARY_FIND(cuLaunchGrid);
   CUDA_LIBRARY_FIND(cuLaunchGridAsync);
   CUDA_LIBRARY_FIND(cuParamSetTexRef);
+  CUDA_LIBRARY_FIND(cuOccupancyMaxActiveBlocksPerMultiprocessor);
+  CUDA_LIBRARY_FIND(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags);
+  CUDA_LIBRARY_FIND(cuOccupancyMaxPotentialBlockSize);
+  CUDA_LIBRARY_FIND(cuOccupancyMaxPotentialBlockSizeWithFlags);
   CUDA_LIBRARY_FIND(cuTexRefSetArray);
   CUDA_LIBRARY_FIND(cuTexRefSetMipmappedArray);
   CUDA_LIBRARY_FIND(cuTexRefSetAddress_v2);
@@ -498,14 +520,14 @@ int cuewInit(void) {
   CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
   CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedMipmappedArray);
   CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer_v2);
-  CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags);
+  CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags_v2);
   CUDA_LIBRARY_FIND(cuGraphicsMapResources);
   CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
   CUDA_LIBRARY_FIND(cuGetExportTable);
 
   CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
   CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
-  CUDA_LIBRARY_FIND(cuGLGetDevices);
+  CUDA_LIBRARY_FIND(cuGLGetDevices_v2);
   CUDA_LIBRARY_FIND(cuGLCtxCreate_v2);
   CUDA_LIBRARY_FIND(cuGLInit);
   CUDA_LIBRARY_FIND(cuGLRegisterBufferObject);
@@ -528,10 +550,10 @@ const char *cuewErrorString(CUresult result) {
     case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory";
     case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized";
     case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized";
-    case CUDA_ERROR_PROFILER_DISABLED: return "PROFILER_DISABLED";
-    case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "PROFILER_NOT_INITIALIZED";
-    case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "PROFILER_ALREADY_STARTED";
-    case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "PROFILER_ALREADY_STOPPED";
+    case CUDA_ERROR_PROFILER_DISABLED: return "Profiler disabled";
+    case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "Profiler not initialized";
+    case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "Profiler already started";
+    case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "Profiler already stopped";
     case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available";
     case CUDA_ERROR_INVALID_DEVICE: return "Invalid device";
     case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image";
@@ -548,37 +570,38 @@ const char *cuewErrorString(CUresult result) {
     case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Mapped resource not available for access as a pointer";
     case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error detected";
     case CUDA_ERROR_UNSUPPORTED_LIMIT: return "CUlimit not supported by device";
-    case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "CONTEXT_ALREADY_IN_USE";
-    case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "PEER_ACCESS_UNSUPPORTED";
-    case CUDA_ERROR_INVALID_PTX: return "INVALID_PTX";
+    case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "Context already in use";
+    case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "Peer access unsupported";
+    case CUDA_ERROR_INVALID_PTX: return "Invalid ptx";
+    case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics context";
     case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
     case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
     case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve";
     case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed";
-    case CUDA_ERROR_OPERATING_SYSTEM: return "OPERATING_SYSTEM";
+    case CUDA_ERROR_OPERATING_SYSTEM: return "Operating system";
     case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle";
     case CUDA_ERROR_NOT_FOUND: return "Not found";
     case CUDA_ERROR_NOT_READY: return "CUDA not ready";
-    case CUDA_ERROR_ILLEGAL_ADDRESS: return "ILLEGAL_ADDRESS";
+    case CUDA_ERROR_ILLEGAL_ADDRESS: return "Illegal address";
     case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources";
     case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded timeout";
     case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing";
-    case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "PEER_ACCESS_ALREADY_ENABLED";
-    case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "PEER_ACCESS_NOT_ENABLED";
-    case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "PRIMARY_CONTEXT_ACTIVE";
-    case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "CONTEXT_IS_DESTROYED";
-    case CUDA_ERROR_ASSERT: return "ASSERT";
-    case CUDA_ERROR_TOO_MANY_PEERS: return "TOO_MANY_PEERS";
-    case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "HOST_MEMORY_ALREADY_REGISTERED";
-    case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "HOST_MEMORY_NOT_REGISTERED";
-    case CUDA_ERROR_HARDWARE_STACK_ERROR: return "HARDWARE_STACK_ERROR";
-    case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "ILLEGAL_INSTRUCTION";
-    case CUDA_ERROR_MISALIGNED_ADDRESS: return "MISALIGNED_ADDRESS";
-    case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "INVALID_ADDRESS_SPACE";
-    case CUDA_ERROR_INVALID_PC: return "INVALID_PC";
+    case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "Peer access already enabled";
+    case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "Peer access not enabled";
+    case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "Primary context active";
+    case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "Context is destroyed";
+    case CUDA_ERROR_ASSERT: return "Assert";
+    case CUDA_ERROR_TOO_MANY_PEERS: return "Too many peers";
+    case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "Host memory already registered";
+    case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "Host memory not registered";
+    case CUDA_ERROR_HARDWARE_STACK_ERROR: return "Hardware stack error";
+    case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "Illegal instruction";
+    case CUDA_ERROR_MISALIGNED_ADDRESS: return "Misaligned address";
+    case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "Invalid address space";
+    case CUDA_ERROR_INVALID_PC: return "Invalid pc";
     case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed";
-    case CUDA_ERROR_NOT_PERMITTED: return "NOT_PERMITTED";
-    case CUDA_ERROR_NOT_SUPPORTED: return "NOT_SUPPORTED";
+    case CUDA_ERROR_NOT_PERMITTED: return "Not permitted";
+    case CUDA_ERROR_NOT_SUPPORTED: return "Not supported";
     case CUDA_ERROR_UNKNOWN: return "Unknown error";
     default: return "Unknown CUDA error value";
   }
@@ -686,7 +709,7 @@ int cuewCompilerVersion(void) {
 
   while (!feof(pipe)) {
     if (fgets(buf, sizeof(buf), pipe) != NULL) {
-      strncat(output, buf, sizeof(output) - strlen(output) - 1 );
+      strncat(output, buf, sizeof(output) - strlen(output) - 1);
     }
   }