Update CUDA wrangler to latest upstream
authorSergey Sharybin <sergey.vfx@gmail.com>
Thu, 31 Dec 2015 11:02:24 +0000 (16:02 +0500)
committerSergey Sharybin <sergey.vfx@gmail.com>
Thu, 31 Dec 2015 11:02:24 +0000 (16:02 +0500)
Brings support of NVRTC bindings and also makes it easier to tweak
libraries paths and use multiple alternative names for libraries.

extern/cuew/auto/cuew_gen.py
extern/cuew/include/cuew.h
extern/cuew/src/cuew.c

index 75e5bf876f486fc85b7bbd91609c1e358b31b007..6cc48e4f809903dbcdef7ed7bdf10b72193fe44b 100644 (file)
@@ -49,7 +49,7 @@ COPYRIGHT = """/*
  * See the License for the specific language governing permissions and
  * limitations under the License
  */"""
-FILES = ["cuda.h", "cudaGL.h"]
+FILES = ["cuda.h", "cudaGL.h", 'nvrtc.h']
 
 TYPEDEFS = []
 FUNC_TYPEDEFS = []
@@ -89,7 +89,10 @@ class FuncDefVisitor(c_ast.NodeVisitor):
             self.indent -= 1
             return "union {\n" + union + ("  " * self.indent) + "}"
         elif isinstance(node, c_ast.Enum):
-            return 'enum ' + node.name
+            if node.name is not None:
+                return 'enum ' + node.name
+            else:
+                return 'enum '
         elif isinstance(node, c_ast.TypeDecl):
             return self._get_ident_type(node.type)
         else:
@@ -268,7 +271,9 @@ def parse_files():
                     token = line.split()
                     if token[0] not in ("__cuda_cuda_h__",
                                         "CUDA_CB",
-                                        "CUDAAPI"):
+                                        "CUDAAPI",
+                                        "CUDAGL_H",
+                                        "__NVRTC_H__"):
                         DEFINES.append(token)
 
             for line in lines:
@@ -403,7 +408,7 @@ def print_dl_wrapper():
 
 typedef HMODULE DynamicLibrary;
 
-#  define dynamic_library_open(path)         LoadLibrary(path)
+#  define dynamic_library_open(path)         LoadLibraryA(path)
 #  define dynamic_library_close(lib)         FreeLibrary(lib)
 #  define dynamic_library_find(lib, symbol)  GetProcAddress(lib, symbol)
 #else
@@ -419,23 +424,44 @@ typedef void* DynamicLibrary;
 
 
 def print_dl_helper_macro():
-    print("""#define %s_LIBRARY_FIND_CHECKED(name) \\
+    print("""#define _LIBRARY_FIND_CHECKED(lib, name) \\
         name = (t##name *)dynamic_library_find(lib, #name); \\
         assert(name);
 
-#define %s_LIBRARY_FIND(name) \\
+#define _LIBRARY_FIND(lib, name) \\
         name = (t##name *)dynamic_library_find(lib, #name);
 
-static DynamicLibrary lib;""" % (REAL_LIB, REAL_LIB))
+#define %s_LIBRARY_FIND_CHECKED(name) \\
+        _LIBRARY_FIND_CHECKED(cuda_lib, name)
+#define %s_LIBRARY_FIND(name) _LIBRARY_FIND(cuda_lib, name)
+
+#define NVRTC_LIBRARY_FIND_CHECKED(name) \\
+        _LIBRARY_FIND_CHECKED(nvrtc_lib, name)
+#define NVRTC_LIBRARY_FIND(name) _LIBRARY_FIND(nvrtc_lib, name)
+
+static DynamicLibrary cuda_lib;
+static DynamicLibrary nvrtc_lib;""" % (REAL_LIB, REAL_LIB))
     print("")
 
 
-def print_dl_close():
-    print("""static void %sExit(void) {
-  if(lib != NULL) {
+def print_dl_helpers():
+    print("""static DynamicLibrary dynamic_library_open_find(const char **paths) {
+  int i = 0;
+  while (paths[i] != NULL) {
+      DynamicLibrary lib = dynamic_library_open(paths[i]);
+      if (lib != NULL) {
+        return lib;
+      }
+      ++i;
+  }
+  return NULL;
+}
+
+static void %sExit(void) {
+  if(cuda_lib != NULL) {
     /*  Ignore errors. */
-    dynamic_library_close(lib);
-    lib = NULL;
+    dynamic_library_close(cuda_lib);
+    cuda_lib = NULL;
   }
 }""" % (LIB.lower()))
     print("")
@@ -445,12 +471,21 @@ def print_lib_path():
     # TODO(sergey): get rid of hardcoded libraries.
     print("""#ifdef _WIN32
   /* Expected in c:/windows/system or similar, no path needed. */
-  const char *path = "nvcuda.dll";
+  const char *cuda_paths[] = {"nvcuda.dll", NULL};
+  const char *nvrtc_paths[] = {"nvrtc.dll", NULL};
 #elif defined(__APPLE__)
   /* Default installation path. */
-  const char *path = "/usr/local/cuda/lib/libcuda.dylib";
+  const char *cuda_paths[] = {"/usr/local/cuda/lib/libcuda.dylib", NULL};
+  const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL};
 #else
-  const char *path = "libcuda.so";
+  const char *cuda_paths[] = {"libcuda.so", NULL};
+  const char *nvrtc_paths[] = {"libnvrtc.so",
+#  if defined(__x86_64__) || defined(_M_X64)
+                               "/usr/local/cuda/lib64/libnvrtc.so",
+#else
+                               "/usr/local/cuda/lib/libnvrtc.so",
+#endif
+                               NULL};
 #endif""")
 
 
@@ -472,9 +507,11 @@ def print_init_guard():
   }
 
   /* Load library. */
-  lib = dynamic_library_open(path);
+  cuda_lib = dynamic_library_open_find(cuda_paths);
+  nvrtc_lib = dynamic_library_open_find(nvrtc_paths);
 
-  if (lib == NULL) {
+  /* CUDA library is mandatory to have, while nvrtc might be missing. */
+  if (cuda_lib == NULL) {
     result = CUEW_ERROR_OPEN_FAILED;
     return result;
   }""")
@@ -509,10 +546,17 @@ def print_dl_init():
     print("  /* Fetch all function pointers. */")
     for symbol in SYMBOLS:
         if symbol:
+          if not symbol.startswith('nvrtc'):
             print("  %s_LIBRARY_FIND(%s);" % (REAL_LIB, symbol))
         else:
             print("")
 
+    print("  if (nvrtc_lib != NULL) {")
+    for symbol in SYMBOLS:
+        if symbol and symbol.startswith('nvrtc'):
+            print("    NVRTC_LIBRARY_FIND(%s);" % (symbol))
+    print("  }")
+
     print("")
     print("  result = CUEW_SUCCESS;")
     print("  return result;")
@@ -549,7 +593,7 @@ def print_implementation():
             print("")
     print("")
 
-    print_dl_close()
+    print_dl_helpers()
 
     print("/* Implementation function. */")
     print_dl_init()
index 1b12e5b44630fb353d610b459e0230c61a6399d5..47b19b4b3a50bdd14fbee87b03d9bdb226dd4d02 100644 (file)
@@ -51,7 +51,6 @@ extern "C" {
 #define CU_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
 #define CU_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02)
 #define CU_PARAM_TR_DEFAULT -1
-#define CUDAGL_H
 
 /* Functions which changed 3.1 -> 3.2 for 64 bit stuff,
  * the cuda library has both the old ones for compatibility and new
@@ -728,6 +727,19 @@ typedef enum CUGLmap_flags_enum {
   CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
 } CUGLmap_flags;
 
+typedef enum  {
+  NVRTC_SUCCESS = 0,
+  NVRTC_ERROR_OUT_OF_MEMORY = 1,
+  NVRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
+  NVRTC_ERROR_INVALID_INPUT = 3,
+  NVRTC_ERROR_INVALID_PROGRAM = 4,
+  NVRTC_ERROR_INVALID_OPTION = 5,
+  NVRTC_ERROR_COMPILATION = 6,
+  NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
+} nvrtcResult;
+
+typedef struct _nvrtcProgram* nvrtcProgram;
+
 #ifdef _WIN32
 #  define CUDAAPI __stdcall
 #  define CUDA_CB __stdcall
@@ -947,6 +959,16 @@ typedef CUresult CUDAAPI tcuGLSetBufferObjectMapFlags(GLuint buffer, unsigned Fl
 typedef CUresult CUDAAPI tcuGLMapBufferObjectAsync_v2(CUdeviceptr* dptr, size_t* size, GLuint buffer, CUstream hStream);
 typedef CUresult CUDAAPI tcuGLUnmapBufferObjectAsync(GLuint buffer, CUstream hStream);
 
+typedef const char* CUDAAPI tnvrtcGetErrorString(nvrtcResult result);
+typedef nvrtcResult CUDAAPI tnvrtcVersion(int* major, int* minor);
+typedef nvrtcResult CUDAAPI tnvrtcCreateProgram(nvrtcProgram* prog, const char* src, const char* name, int numHeaders, const char* headers, const char* includeNames);
+typedef nvrtcResult CUDAAPI tnvrtcDestroyProgram(nvrtcProgram* prog);
+typedef nvrtcResult CUDAAPI tnvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char* options);
+typedef nvrtcResult CUDAAPI tnvrtcGetPTXSize(nvrtcProgram prog, size_t* ptxSizeRet);
+typedef nvrtcResult CUDAAPI tnvrtcGetPTX(nvrtcProgram prog, char* ptx);
+typedef nvrtcResult CUDAAPI tnvrtcGetProgramLogSize(nvrtcProgram prog, size_t* logSizeRet);
+typedef nvrtcResult CUDAAPI tnvrtcGetProgramLog(nvrtcProgram prog, char* log);
+
 
 /* Function declarations. */
 extern tcuGetErrorString *cuGetErrorString;
@@ -1159,6 +1181,16 @@ extern tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags;
 extern tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2;
 extern tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync;
 
+extern tnvrtcGetErrorString *nvrtcGetErrorString;
+extern tnvrtcVersion *nvrtcVersion;
+extern tnvrtcCreateProgram *nvrtcCreateProgram;
+extern tnvrtcDestroyProgram *nvrtcDestroyProgram;
+extern tnvrtcCompileProgram *nvrtcCompileProgram;
+extern tnvrtcGetPTXSize *nvrtcGetPTXSize;
+extern tnvrtcGetPTX *nvrtcGetPTX;
+extern tnvrtcGetProgramLogSize *nvrtcGetProgramLogSize;
+extern tnvrtcGetProgramLog *nvrtcGetProgramLog;
+
 
 enum {
   CUEW_SUCCESS = 0,
index 70ac349ea0db1378ae45cfe44066d4d9c78e0809..ff454baababc715f2b0dafc94ff4dbed0a4ab756 100644 (file)
@@ -15,9 +15,7 @@
  */
 
 #ifdef _MSC_VER
-#  if _MSC_VER < 1900
-#    define snprintf _snprintf
-#  endif
+#  define snprintf _snprintf
 #  define popen _popen
 #  define pclose _pclose
 #  define _CRT_SECURE_NO_WARNINGS
@@ -51,14 +49,23 @@ typedef void* DynamicLibrary;
 #  define dynamic_library_find(lib, symbol)  dlsym(lib, symbol)
 #endif
 
-#define CUDA_LIBRARY_FIND_CHECKED(name) \
+#define _LIBRARY_FIND_CHECKED(lib, name) \
         name = (t##name *)dynamic_library_find(lib, #name); \
         assert(name);
 
-#define CUDA_LIBRARY_FIND(name) \
+#define _LIBRARY_FIND(lib, name) \
         name = (t##name *)dynamic_library_find(lib, #name);
 
-static DynamicLibrary lib;
+#define CUDA_LIBRARY_FIND_CHECKED(name) \
+        _LIBRARY_FIND_CHECKED(cuda_lib, name)
+#define CUDA_LIBRARY_FIND(name) _LIBRARY_FIND(cuda_lib, name)
+
+#define NVRTC_LIBRARY_FIND_CHECKED(name) \
+        _LIBRARY_FIND_CHECKED(nvrtc_lib, name)
+#define NVRTC_LIBRARY_FIND(name) _LIBRARY_FIND(nvrtc_lib, name)
+
+static DynamicLibrary cuda_lib;
+static DynamicLibrary nvrtc_lib;
 
 /* Function definitions. */
 tcuGetErrorString *cuGetErrorString;
@@ -271,12 +278,34 @@ tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags;
 tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2;
 tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync;
 
+tnvrtcGetErrorString *nvrtcGetErrorString;
+tnvrtcVersion *nvrtcVersion;
+tnvrtcCreateProgram *nvrtcCreateProgram;
+tnvrtcDestroyProgram *nvrtcDestroyProgram;
+tnvrtcCompileProgram *nvrtcCompileProgram;
+tnvrtcGetPTXSize *nvrtcGetPTXSize;
+tnvrtcGetPTX *nvrtcGetPTX;
+tnvrtcGetProgramLogSize *nvrtcGetProgramLogSize;
+tnvrtcGetProgramLog *nvrtcGetProgramLog;
+
+
+static DynamicLibrary dynamic_library_open_find(const char **paths) {
+  int i = 0;
+  while (paths[i] != NULL) {
+      DynamicLibrary lib = dynamic_library_open(paths[i]);
+      if (lib != NULL) {
+        return lib;
+      }
+      ++i;
+  }
+  return NULL;
+}
 
 static void cuewExit(void) {
-  if(lib != NULL) {
+  if(cuda_lib != NULL) {
     /*  Ignore errors. */
-    dynamic_library_close(lib);
-    lib = NULL;
+    dynamic_library_close(cuda_lib);
+    cuda_lib = NULL;
   }
 }
 
@@ -285,12 +314,21 @@ int cuewInit(void) {
   /* Library paths. */
 #ifdef _WIN32
   /* Expected in c:/windows/system or similar, no path needed. */
-  const char *path = "nvcuda.dll";
+  const char *cuda_paths[] = {"nvcuda.dll", NULL};
+  const char *nvrtc_paths[] = {"nvrtc.dll", NULL};
 #elif defined(__APPLE__)
   /* Default installation path. */
-  const char *path = "/usr/local/cuda/lib/libcuda.dylib";
+  const char *cuda_paths[] = {"/usr/local/cuda/lib/libcuda.dylib", NULL};
+  const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL};
+#else
+  const char *cuda_paths[] = {"libcuda.so", NULL};
+  const char *nvrtc_paths[] = {"libnvrtc.so",
+#  if defined(__x86_64__) || defined(_M_X64)
+                               "/usr/local/cuda/lib64/libnvrtc.so",
 #else
-  const char *path = "libcuda.so";
+                               "/usr/local/cuda/lib/libnvrtc.so",
+#endif
+                               NULL};
 #endif
   static int initialized = 0;
   static int result = 0;
@@ -309,9 +347,11 @@ int cuewInit(void) {
   }
 
   /* Load library. */
-  lib = dynamic_library_open(path);
+  cuda_lib = dynamic_library_open_find(cuda_paths);
+  nvrtc_lib = dynamic_library_open_find(nvrtc_paths);
 
-  if (lib == NULL) {
+  /* CUDA library is mandatory to have, while nvrtc might be missing. */
+  if (cuda_lib == NULL) {
     result = CUEW_ERROR_OPEN_FAILED;
     return result;
   }
@@ -541,6 +581,18 @@ int cuewInit(void) {
   CUDA_LIBRARY_FIND(cuGLUnmapBufferObjectAsync);
 
 
+  if (nvrtc_lib != NULL) {
+    NVRTC_LIBRARY_FIND(nvrtcGetErrorString);
+    NVRTC_LIBRARY_FIND(nvrtcVersion);
+    NVRTC_LIBRARY_FIND(nvrtcCreateProgram);
+    NVRTC_LIBRARY_FIND(nvrtcDestroyProgram);
+    NVRTC_LIBRARY_FIND(nvrtcCompileProgram);
+    NVRTC_LIBRARY_FIND(nvrtcGetPTXSize);
+    NVRTC_LIBRARY_FIND(nvrtcGetPTX);
+    NVRTC_LIBRARY_FIND(nvrtcGetProgramLogSize);
+    NVRTC_LIBRARY_FIND(nvrtcGetProgramLog);
+  }
+
   result = CUEW_SUCCESS;
   return result;
 }