Deduplicate CUDA and OpenCL wranglers
authorSergey Sharybin <sergey.vfx@gmail.com>
Tue, 5 Aug 2014 07:57:50 +0000 (13:57 +0600)
committerSergey Sharybin <sergey.vfx@gmail.com>
Tue, 5 Aug 2014 07:57:50 +0000 (13:57 +0600)
For now it was mainly about OpenCL wrangler being duplicated
between Cycles and Compositor, but with OpenSubdiv work those
wranglers were gonna to be duplicated just once again.

This commit makes it so Cycles and Compositor uses wranglers
from this repositories:

  - https://github.com/CudaWrangler/cuew
  - https://github.com/OpenCLWrangler/clew

This repositories are based on the wranglers we used before
and they'll be likely continued maintaining by us plus some
more players in the market.

Pretty much straightforward change with some tricks in the
CMake/SCons to make this libs being passed to the linker
after all other libraries in order to make OpenSubdiv linked
against those wranglers in the future.

For those who're worrying about Cycles being less standalone,
it's not truth, it's rather more flexible now and in the future
different wranglers might be used in Cycles. For now it'll
just mean those libs would need to be put into Cycles repository
together with some other libs from Blender such as mikkspace.

This is mainly platform maintenance commit, should not be any
changes to the user space.

Reviewers: juicyfruit, dingto, campbellbarton

Reviewed By: juicyfruit, dingto, campbellbarton

Differential Revision: https://developer.blender.org/D707

46 files changed:
SConstruct
build_files/cmake/macros.cmake
extern/CMakeLists.txt
extern/SConscript
extern/clew/CMakeLists.txt [new file with mode: 0644]
extern/clew/SConscript [new file with mode: 0644]
extern/clew/include/clew.h [new file with mode: 0644]
extern/clew/src/clew.c [new file with mode: 0644]
extern/cuew/CMakeLists.txt [moved from intern/opencl/CMakeLists.txt with 88% similarity]
extern/cuew/LICENSE [new file with mode: 0644]
extern/cuew/README [new file with mode: 0644]
extern/cuew/SConscript [moved from intern/opencl/SConscript with 85% similarity]
extern/cuew/auto/cuda_errors.py [new file with mode: 0644]
extern/cuew/auto/cuda_extra.py [new file with mode: 0644]
extern/cuew/auto/cuew_gen.py [new file with mode: 0644]
extern/cuew/auto/cuew_gen.sh [new file with mode: 0755]
extern/cuew/auto/stdlib.h [new file with mode: 0644]
extern/cuew/include/cuew.h [new file with mode: 0644]
extern/cuew/src/cuew.c [new file with mode: 0644]
intern/CMakeLists.txt
intern/SConscript
intern/cycles/SConscript
intern/cycles/app/CMakeLists.txt
intern/cycles/device/CMakeLists.txt
intern/cycles/device/device.cpp
intern/cycles/device/device_cuda.cpp
intern/cycles/device/device_intern.h
intern/cycles/device/device_opencl.cpp
intern/cycles/util/CMakeLists.txt
intern/cycles/util/util_cuda.cpp [deleted file]
intern/cycles/util/util_cuda.h [deleted file]
intern/cycles/util/util_opencl.cpp [deleted file]
intern/cycles/util/util_opencl.h [deleted file]
intern/opencl/OCL_opencl.h [deleted file]
intern/opencl/intern/OCL_opencl.c [deleted file]
intern/opencl/intern/clew.c [deleted file]
intern/opencl/intern/clew.h [deleted file]
source/blender/compositor/CMakeLists.txt
source/blender/compositor/SConscript
source/blender/compositor/intern/COM_NodeOperation.h
source/blender/compositor/intern/COM_OpenCLDevice.cpp
source/blender/compositor/intern/COM_OpenCLDevice.h
source/blender/compositor/intern/COM_WorkScheduler.cpp
source/blender/compositor/intern/COM_compositor.cpp
source/blender/compositor/operations/COM_DirectionalBlurOperation.cpp
source/blenderplayer/CMakeLists.txt

index 7c874fb..329f51f 100644 (file)
@@ -70,7 +70,7 @@ quickdebug = None
 
 ##### BEGIN SETUP #####
 
-B.possible_types = ['core', 'player', 'player2', 'intern', 'extern']
+B.possible_types = ['core', 'player', 'player2', 'intern', 'extern', 'system']
 
 B.binarykind = ['blender' , 'blenderplayer']
 ##################################
@@ -815,7 +815,7 @@ SConscript(B.root_build_dir+'/extern/SConscript')
 # libraries to give as objects to linking phase
 mainlist = []
 for tp in B.possible_types:
-    if (not tp == 'player') and (not tp == 'player2'):
+    if (not tp == 'player') and (not tp == 'player2') and (not tp == 'system'):
         mainlist += B.create_blender_liblist(env, tp)
 
 if B.arguments.get('BF_PRIORITYLIST', '0')=='1':
@@ -826,6 +826,11 @@ creob = B.creator(env)
 thestatlibs, thelibincs = B.setup_staticlibs(env)
 thesyslibs = B.setup_syslibs(env)
 
+# Hack to pass OSD libraries to linker before extern_{clew,cuew}
+for x in B.create_blender_liblist(env, 'system'):
+    thesyslibs.append(os.path.basename(x))
+    thelibincs.append(os.path.dirname(x))
+
 if 'blender' in B.targets or not env['WITH_BF_NOBLENDER']:
     env.BlenderProg(B.root_build_dir, "blender", creob + mainlist + thestatlibs + dobj, thesyslibs, [B.root_build_dir+'/lib'] + thelibincs, 'blender')
 if env['WITH_BF_PLAYER']:
index f896fb9..9f7779e 100644 (file)
@@ -411,6 +411,12 @@ macro(setup_liblinks
        endif()
 
        target_link_libraries(${target} ${PLATFORM_LINKLIBS} ${CMAKE_DL_LIBS})
+
+       # We put CLEW and CUEW here because OPENSUBDIV_LIBRARIES dpeends on them..
+       if(WITH_CYCLES OR WITH_COMPOSITOR OR WITH_OPENSUBDIV)
+               target_link_libraries(blender "extern_clew")
+               target_link_libraries(blender "extern_cuew")
+       endif()
 endmacro()
 
 macro(SETUP_BLENDER_SORTED_LIBS)
index eda5b5b..8d85551 100644 (file)
@@ -70,6 +70,11 @@ if(WITH_LZMA)
        add_subdirectory(lzma)
 endif()
 
+if(WITH_CYCLES OR WITH_COMPOSITOR)
+       add_subdirectory(clew)
+       add_subdirectory(cuew)
+endif()
+
 if(WITH_MOD_BOOLEAN)
        add_subdirectory(carve)
 endif()
index 041aa34..714dfd2 100644 (file)
@@ -20,6 +20,10 @@ if env['WITH_BF_ELTOPO']:
 if env['WITH_BF_BULLET']:
     SConscript(['bullet2/src/SConscript'])
 
+if env['WITH_BF_COMPOSITOR'] or env['WITH_BF_CYCLES']:
+    SConscript (['clew/SConscript'])
+    SConscript (['cuew/SConscript'])
+
 if env['WITH_BF_OPENJPEG'] and env['BF_OPENJPEG_LIB'] == '':
     SConscript(['libopenjpeg/SConscript'])
 
diff --git a/extern/clew/CMakeLists.txt b/extern/clew/CMakeLists.txt
new file mode 100644 (file)
index 0000000..f75e933
--- /dev/null
@@ -0,0 +1,42 @@
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# The Original Code is Copyright (C) 2006, Blender Foundation
+# All rights reserved.
+#
+# The Original Code is: all of this file.
+#
+# Contributor(s): Jacques Beaurain.
+#
+# ***** END GPL LICENSE BLOCK *****
+
+set(INC
+       .
+       include
+)
+
+set(INC_SYS
+
+)
+
+set(SRC
+       include/clew.h
+       src/clew.c
+)
+
+add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_1_APIS)
+
+blender_add_lib(extern_clew "${SRC}" "${INC}" "${INC_SYS}")
diff --git a/extern/clew/SConscript b/extern/clew/SConscript
new file mode 100644 (file)
index 0000000..14a03c7
--- /dev/null
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+#
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# The Original Code is Copyright (C) 2006, Blender Foundation
+# All rights reserved.
+#
+# The Original Code is: all of this file.
+#
+# Contributor(s): Nathan Letwory.
+#
+# ***** END GPL LICENSE BLOCK *****
+
+Import ('env')
+
+sources = env.Glob('src/clew.c')
+
+incs = 'include'
+defs = ['CL_USE_DEPRECATED_OPENCL_1_1_APIS']
+
+env.BlenderLib ('extern_clew', sources, Split(incs), defines=defs, libtype=['system'], priority = [999])
diff --git a/extern/clew/include/clew.h b/extern/clew/include/clew.h
new file mode 100644 (file)
index 0000000..624fdb2
--- /dev/null
@@ -0,0 +1,2759 @@
+#ifndef CLEW_HPP_INCLUDED
+#define CLEW_HPP_INCLUDED
+
+//////////////////////////////////////////////////////////////////////////
+//  Copyright (c) 2009-2011 Organic Vectory B.V., KindDragon
+//  Written by George van Venrooij
+//
+//  Distributed under the MIT License.
+//////////////////////////////////////////////////////////////////////////
+
+//! \file clew.h
+//! \brief OpenCL run-time loader header
+//!
+//! This file contains a copy of the contents of CL.H and CL_PLATFORM.H from the 
+//! official OpenCL spec. The purpose of this code is to load the OpenCL dynamic
+//! library at run-time and thus allow the executable to function on many
+//! platforms regardless of the vendor of the OpenCL driver actually installed.
+//! Some of the techniques used here were inspired by work done in the GLEW
+//! library (http://glew.sourceforge.net/)
+
+//  Run-time dynamic linking functionality based on concepts used in GLEW
+#ifdef  __OPENCL_CL_H
+#error cl.h included before clew.h
+#endif
+
+#ifdef  __OPENCL_CL_PLATFORM_H
+#error cl_platform.h included before clew.h
+#endif
+
+//  Prevent cl.h inclusion
+#define __OPENCL_CL_H
+//  Prevent cl_platform.h inclusion
+#define __CL_PLATFORM_H
+
+/*******************************************************************************
+* Copyright (c) 2008-2010 The Khronos Group Inc.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and/or associated documentation files (the
+* "Materials"), to deal in the Materials without restriction, including
+* without limitation the rights to use, copy, modify, merge, publish,
+* distribute, sublicense, and/or sell copies of the Materials, and to
+* permit persons to whom the Materials are furnished to do so, subject to
+* the following conditions:
+*
+* The above copyright notice and this permission notice shall be included
+* in all copies or substantial portions of the Materials.
+*
+* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+******************************************************************************/
+#ifdef __APPLE__
+    /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
+    #include <AvailabilityMacros.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(_WIN32)
+    #define CL_API_ENTRY
+    #define CL_API_CALL     __stdcall
+    #define CL_CALLBACK     __stdcall
+#else
+    #define CL_API_ENTRY
+    #define CL_API_CALL
+    #define CL_CALLBACK
+#endif
+
+#ifdef __APPLE__
+    #define CL_EXTENSION_WEAK_LINK                  __attribute__((weak_import))       
+    #define CL_API_SUFFIX__VERSION_1_0              AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+    #define CL_EXT_SUFFIX__VERSION_1_0              CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+    #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+       #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+       #define CL_API_SUFFIX__VERSION_1_1              CL_EXTENSION_WEAK_LINK
+       #define CL_EXT_SUFFIX__VERSION_1_1              CL_EXTENSION_WEAK_LINK
+       #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+       #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+       #define CL_API_SUFFIX__VERSION_1_2              CL_EXTENSION_WEAK_LINK
+       #define CL_EXT_SUFFIX__VERSION_1_2              CL_EXTENSION_WEAK_LINK
+#else
+    #define CL_EXTENSION_WEAK_LINK
+    #define CL_API_SUFFIX__VERSION_1_0
+    #define CL_EXT_SUFFIX__VERSION_1_0
+    #define CL_API_SUFFIX__VERSION_1_1
+    #define CL_EXT_SUFFIX__VERSION_1_1
+    #define CL_API_SUFFIX__VERSION_1_2
+    #define CL_EXT_SUFFIX__VERSION_1_2
+
+    #if defined(__GNUC__)
+        #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
+            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
+        #else
+            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED __attribute__((deprecated))
+            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
+        #endif
+
+        #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+        #else
+            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated))
+            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+        #endif
+    #elif defined(_WIN32)
+        #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
+            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
+        #else
+            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED __declspec(deprecated)
+        #endif
+
+        #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+        #else
+            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED __declspec(deprecated)
+        #endif
+    #else
+        #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+        #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
+
+        #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+        #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+    #endif
+#endif
+
+#if (defined (_WIN32) && defined(_MSC_VER))
+
+/* scalar types  */
+typedef signed   __int8         cl_char;
+typedef unsigned __int8         cl_uchar;
+typedef signed   __int16        cl_short;
+typedef unsigned __int16        cl_ushort;
+typedef signed   __int32        cl_int;
+typedef unsigned __int32        cl_uint;
+typedef signed   __int64        cl_long;
+typedef unsigned __int64        cl_ulong;
+
+typedef unsigned __int16        cl_half;
+typedef float                   cl_float;
+typedef double                  cl_double;
+
+/* Macro names and corresponding values defined by OpenCL */
+#define CL_CHAR_BIT         8
+#define CL_SCHAR_MAX        127
+#define CL_SCHAR_MIN        (-127-1)
+#define CL_CHAR_MAX         CL_SCHAR_MAX
+#define CL_CHAR_MIN         CL_SCHAR_MIN
+#define CL_UCHAR_MAX        255
+#define CL_SHRT_MAX         32767
+#define CL_SHRT_MIN         (-32767-1)
+#define CL_USHRT_MAX        65535
+#define CL_INT_MAX          2147483647
+#define CL_INT_MIN          (-2147483647-1)
+#define CL_UINT_MAX         0xffffffffU
+#define CL_LONG_MAX         ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
+#define CL_LONG_MIN         ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
+#define CL_ULONG_MAX        ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
+
+#define CL_FLT_DIG          6
+#define CL_FLT_MANT_DIG     24
+#define CL_FLT_MAX_10_EXP   +38
+#define CL_FLT_MAX_EXP      +128
+#define CL_FLT_MIN_10_EXP   -37
+#define CL_FLT_MIN_EXP      -125
+#define CL_FLT_RADIX        2
+#define CL_FLT_MAX          340282346638528859811704183484516925440.0f
+#define CL_FLT_MIN          1.175494350822287507969e-38f
+#define CL_FLT_EPSILON      0x1.0p-23f
+
+#define CL_DBL_DIG          15
+#define CL_DBL_MANT_DIG     53
+#define CL_DBL_MAX_10_EXP   +308
+#define CL_DBL_MAX_EXP      +1024
+#define CL_DBL_MIN_10_EXP   -307
+#define CL_DBL_MIN_EXP      -1021
+#define CL_DBL_RADIX        2
+#define CL_DBL_MAX          179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0
+#define CL_DBL_MIN          2.225073858507201383090e-308
+#define CL_DBL_EPSILON      2.220446049250313080847e-16
+
+#define  CL_M_E             2.718281828459045090796
+#define  CL_M_LOG2E         1.442695040888963387005
+#define  CL_M_LOG10E        0.434294481903251816668
+#define  CL_M_LN2           0.693147180559945286227
+#define  CL_M_LN10          2.302585092994045901094
+#define  CL_M_PI            3.141592653589793115998
+#define  CL_M_PI_2          1.570796326794896557999
+#define  CL_M_PI_4          0.785398163397448278999
+#define  CL_M_1_PI          0.318309886183790691216
+#define  CL_M_2_PI          0.636619772367581382433
+#define  CL_M_2_SQRTPI      1.128379167095512558561
+#define  CL_M_SQRT2         1.414213562373095145475
+#define  CL_M_SQRT1_2       0.707106781186547572737
+
+#define  CL_M_E_F           2.71828174591064f
+#define  CL_M_LOG2E_F       1.44269502162933f
+#define  CL_M_LOG10E_F      0.43429449200630f
+#define  CL_M_LN2_F         0.69314718246460f
+#define  CL_M_LN10_F        2.30258512496948f
+#define  CL_M_PI_F          3.14159274101257f
+#define  CL_M_PI_2_F        1.57079637050629f
+#define  CL_M_PI_4_F        0.78539818525314f
+#define  CL_M_1_PI_F        0.31830987334251f
+#define  CL_M_2_PI_F        0.63661974668503f
+#define  CL_M_2_SQRTPI_F    1.12837922573090f
+#define  CL_M_SQRT2_F       1.41421353816986f
+#define  CL_M_SQRT1_2_F     0.70710676908493f
+
+#define CL_NAN              (CL_INFINITY - CL_INFINITY)
+#define CL_HUGE_VALF        ((cl_float) 1e50)
+#define CL_HUGE_VAL         ((cl_double) 1e500)
+#define CL_MAXFLOAT         CL_FLT_MAX
+#define CL_INFINITY         CL_HUGE_VALF
+
+#else
+
+#include <stdint.h>
+
+/* scalar types  */
+typedef int8_t          cl_char;
+typedef uint8_t         cl_uchar;
+typedef int16_t         cl_short    __attribute__((aligned(2)));
+typedef uint16_t        cl_ushort   __attribute__((aligned(2)));
+typedef int32_t         cl_int      __attribute__((aligned(4)));
+typedef uint32_t        cl_uint     __attribute__((aligned(4)));
+typedef int64_t         cl_long     __attribute__((aligned(8)));
+typedef uint64_t        cl_ulong    __attribute__((aligned(8)));
+
+typedef uint16_t        cl_half     __attribute__((aligned(2)));
+typedef float           cl_float    __attribute__((aligned(4)));
+typedef double          cl_double   __attribute__((aligned(8)));
+
+/* Macro names and corresponding values defined by OpenCL */
+#define CL_CHAR_BIT         8
+#define CL_SCHAR_MAX        127
+#define CL_SCHAR_MIN        (-127-1)
+#define CL_CHAR_MAX         CL_SCHAR_MAX
+#define CL_CHAR_MIN         CL_SCHAR_MIN
+#define CL_UCHAR_MAX        255
+#define CL_SHRT_MAX         32767
+#define CL_SHRT_MIN         (-32767-1)
+#define CL_USHRT_MAX        65535
+#define CL_INT_MAX          2147483647
+#define CL_INT_MIN          (-2147483647-1)
+#define CL_UINT_MAX         0xffffffffU
+#define CL_LONG_MAX         ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
+#define CL_LONG_MIN         ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
+#define CL_ULONG_MAX        ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
+
+#define CL_FLT_DIG          6
+#define CL_FLT_MANT_DIG     24
+#define CL_FLT_MAX_10_EXP   +38
+#define CL_FLT_MAX_EXP      +128
+#define CL_FLT_MIN_10_EXP   -37
+#define CL_FLT_MIN_EXP      -125
+#define CL_FLT_RADIX        2
+#define CL_FLT_MAX          0x1.fffffep127f
+#define CL_FLT_MIN          0x1.0p-126f
+#define CL_FLT_EPSILON      0x1.0p-23f
+
+#define CL_DBL_DIG          15
+#define CL_DBL_MANT_DIG     53
+#define CL_DBL_MAX_10_EXP   +308
+#define CL_DBL_MAX_EXP      +1024
+#define CL_DBL_MIN_10_EXP   -307
+#define CL_DBL_MIN_EXP      -1021
+#define CL_DBL_RADIX        2
+#define CL_DBL_MAX          0x1.fffffffffffffp1023
+#define CL_DBL_MIN          0x1.0p-1022
+#define CL_DBL_EPSILON      0x1.0p-52
+
+#define  CL_M_E             2.718281828459045090796
+#define  CL_M_LOG2E         1.442695040888963387005
+#define  CL_M_LOG10E        0.434294481903251816668
+#define  CL_M_LN2           0.693147180559945286227
+#define  CL_M_LN10          2.302585092994045901094
+#define  CL_M_PI            3.141592653589793115998
+#define  CL_M_PI_2          1.570796326794896557999
+#define  CL_M_PI_4          0.785398163397448278999
+#define  CL_M_1_PI          0.318309886183790691216
+#define  CL_M_2_PI          0.636619772367581382433
+#define  CL_M_2_SQRTPI      1.128379167095512558561
+#define  CL_M_SQRT2         1.414213562373095145475
+#define  CL_M_SQRT1_2       0.707106781186547572737
+
+#define  CL_M_E_F           2.71828174591064f
+#define  CL_M_LOG2E_F       1.44269502162933f
+#define  CL_M_LOG10E_F      0.43429449200630f
+#define  CL_M_LN2_F         0.69314718246460f
+#define  CL_M_LN10_F        2.30258512496948f
+#define  CL_M_PI_F          3.14159274101257f
+#define  CL_M_PI_2_F        1.57079637050629f
+#define  CL_M_PI_4_F        0.78539818525314f
+#define  CL_M_1_PI_F        0.31830987334251f
+#define  CL_M_2_PI_F        0.63661974668503f
+#define  CL_M_2_SQRTPI_F    1.12837922573090f
+#define  CL_M_SQRT2_F       1.41421353816986f
+#define  CL_M_SQRT1_2_F     0.70710676908493f
+
+#if defined( __GNUC__ )
+   #define CL_HUGE_VALF     __builtin_huge_valf()
+   #define CL_HUGE_VAL      __builtin_huge_val()
+   #define CL_NAN           __builtin_nanf( "" )
+#else
+   #define CL_HUGE_VALF     ((cl_float) 1e50)
+   #define CL_HUGE_VAL      ((cl_double) 1e500)
+   float nanf( const char * );
+   #define CL_NAN           nanf( "" )  
+#endif
+#define CL_MAXFLOAT         CL_FLT_MAX
+#define CL_INFINITY         CL_HUGE_VALF
+
+#endif
+
+#include <stddef.h>
+
+/* Mirror types to GL types. Mirror types allow us to avoid deciding which headers to load based on whether we are using GL or GLES here. */
+typedef unsigned int cl_GLuint;
+typedef int          cl_GLint;
+typedef unsigned int cl_GLenum;
+
+/*
+ * Vector types 
+ *
+ *  Note:   OpenCL requires that all types be naturally aligned. 
+ *          This means that vector types must be naturally aligned.
+ *          For example, a vector of four floats must be aligned to
+ *          a 16 byte boundary (calculated as 4 * the natural 4-byte 
+ *          alignment of the float).  The alignment qualifiers here
+ *          will only function properly if your compiler supports them
+ *          and if you don't actively work to defeat them.  For example,
+ *          in order for a cl_float4 to be 16 byte aligned in a struct,
+ *          the start of the struct must itself be 16-byte aligned. 
+ *
+ *          Maintaining proper alignment is the user's responsibility.
+ */
+
+
+#ifdef _MSC_VER
+#if defined(_M_IX86)
+#if _M_IX86_FP >= 0
+#define __SSE__
+#endif
+#if _M_IX86_FP >= 1
+#  ifndef __SSE2__
+#    define __SSE2__
+#  endif
+#endif
+#elif defined(_M_X64)
+#  ifndef __SSE__
+#    define __SSE__
+#  endif
+#  ifndef __SSE2__
+#    define __SSE2__
+#  endif
+#endif
+#endif
+
+/* Define basic vector types */
+#if defined( __VEC__ )
+   #include <altivec.h>   /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */
+   typedef vector unsigned char     __cl_uchar16;
+   typedef vector signed char       __cl_char16;
+   typedef vector unsigned short    __cl_ushort8;
+   typedef vector signed short      __cl_short8;
+   typedef vector unsigned int      __cl_uint4;
+   typedef vector signed int        __cl_int4;
+   typedef vector float             __cl_float4;
+   #define  __CL_UCHAR16__  1
+   #define  __CL_CHAR16__   1
+   #define  __CL_USHORT8__  1
+   #define  __CL_SHORT8__   1
+   #define  __CL_UINT4__    1
+   #define  __CL_INT4__     1
+   #define  __CL_FLOAT4__   1
+#endif
+
+#if defined( __SSE__ )
+    #if defined( __MINGW64__ )
+        #include <intrin.h>
+    #else
+        #include <xmmintrin.h>
+    #endif
+    #if defined( __GNUC__ ) && !defined( __ICC )
+        typedef float __cl_float4   __attribute__((vector_size(16)));
+    #else
+        typedef __m128 __cl_float4;
+    #endif
+    #define __CL_FLOAT4__   1
+#endif
+
+#if defined( __SSE2__ )
+    #if defined( __MINGW64__ )
+        #include <intrin.h>
+    #else
+        #include <emmintrin.h>
+    #endif
+    #if defined( __GNUC__ ) && !defined( __ICC )
+        typedef cl_uchar    __cl_uchar16    __attribute__((vector_size(16)));
+        typedef cl_char     __cl_char16     __attribute__((vector_size(16)));
+        typedef cl_ushort   __cl_ushort8    __attribute__((vector_size(16)));
+        typedef cl_short    __cl_short8     __attribute__((vector_size(16)));
+        typedef cl_uint     __cl_uint4      __attribute__((vector_size(16)));
+        typedef cl_int      __cl_int4       __attribute__((vector_size(16)));
+        typedef cl_ulong    __cl_ulong2     __attribute__((vector_size(16)));
+        typedef cl_long     __cl_long2      __attribute__((vector_size(16)));
+        typedef cl_double   __cl_double2    __attribute__((vector_size(16)));
+    #else
+        typedef __m128i __cl_uchar16;
+        typedef __m128i __cl_char16;
+        typedef __m128i __cl_ushort8;
+        typedef __m128i __cl_short8;
+        typedef __m128i __cl_uint4;
+        typedef __m128i __cl_int4;
+        typedef __m128i __cl_ulong2;
+        typedef __m128i __cl_long2;
+        typedef __m128d __cl_double2;
+    #endif
+    #define __CL_UCHAR16__  1
+    #define __CL_CHAR16__   1
+    #define __CL_USHORT8__  1
+    #define __CL_SHORT8__   1
+    #define __CL_INT4__     1
+    #define __CL_UINT4__    1
+    #define __CL_ULONG2__   1
+    #define __CL_LONG2__    1
+    #define __CL_DOUBLE2__  1
+#endif
+
+#if defined( __MMX__ )
+    #include <mmintrin.h>
+    #if defined( __GNUC__ ) && !defined( __ICC )
+        typedef cl_uchar    __cl_uchar8     __attribute__((vector_size(8)));
+        typedef cl_char     __cl_char8      __attribute__((vector_size(8)));
+        typedef cl_ushort   __cl_ushort4    __attribute__((vector_size(8)));
+        typedef cl_short    __cl_short4     __attribute__((vector_size(8)));
+        typedef cl_uint     __cl_uint2      __attribute__((vector_size(8)));
+        typedef cl_int      __cl_int2       __attribute__((vector_size(8)));
+        typedef cl_ulong    __cl_ulong1     __attribute__((vector_size(8)));
+        typedef cl_long     __cl_long1      __attribute__((vector_size(8)));
+        typedef cl_float    __cl_float2     __attribute__((vector_size(8)));
+    #else
+        typedef __m64       __cl_uchar8;
+        typedef __m64       __cl_char8;
+        typedef __m64       __cl_ushort4;
+        typedef __m64       __cl_short4;
+        typedef __m64       __cl_uint2;
+        typedef __m64       __cl_int2;
+        typedef __m64       __cl_ulong1;
+        typedef __m64       __cl_long1;
+        typedef __m64       __cl_float2;
+    #endif
+    #define __CL_UCHAR8__   1
+    #define __CL_CHAR8__    1
+    #define __CL_USHORT4__  1
+    #define __CL_SHORT4__   1
+    #define __CL_INT2__     1
+    #define __CL_UINT2__    1
+    #define __CL_ULONG1__   1
+    #define __CL_LONG1__    1
+    #define __CL_FLOAT2__   1
+#endif
+
+#if defined( __AVX__ )
+    #if defined( __MINGW64__ )
+        #include <intrin.h>
+    #else
+        #include <immintrin.h> 
+    #endif
+    #if defined( __GNUC__ ) && !defined( __ICC )
+        typedef cl_float    __cl_float8     __attribute__((vector_size(32)));
+        typedef cl_double   __cl_double4    __attribute__((vector_size(32)));
+    #else
+        typedef __m256      __cl_float8;
+        typedef __m256d     __cl_double4;
+    #endif
+    #define __CL_FLOAT8__   1
+    #define __CL_DOUBLE4__  1
+#endif
+
+/* Define alignment keys */
+#if defined( __GNUC__ )
+    #define CL_ALIGNED(_x)          __attribute__ ((aligned(_x)))
+#elif defined( _WIN32) && (_MSC_VER)
+    /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements     */
+    /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx                                                 */
+    /* #include <crtdefs.h>                                                                                             */
+    /* #define CL_ALIGNED(_x)          _CRT_ALIGN(_x)                                                                   */
+    #define CL_ALIGNED(_x)
+#else
+   #warning  Need to implement some method to align data here
+   #define  CL_ALIGNED(_x)
+#endif
+
+/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */
+#if (defined( __GNUC__) && ! defined( __STRICT_ANSI__ )) || (defined( _MSC_VER ) && ! defined( __STDC__ ))
+    /* .xyzw and .s0123...{f|F} are supported */
+    #define CL_HAS_NAMED_VECTOR_FIELDS 1
+    /* .hi and .lo are supported */
+    #define CL_HAS_HI_LO_VECTOR_FIELDS 1
+
+    #define CL_NAMED_STRUCT_SUPPORTED
+#endif
+
+#if defined( CL_NAMED_STRUCT_SUPPORTED) && defined( _MSC_VER )
+#define __extension__ __pragma(warning(suppress:4201))
+#endif
+
+/* Define cl_vector types */
+
+/* ---- cl_charn ---- */
+typedef union
+{
+    cl_char  CL_ALIGNED(2) s[2];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_char  x, y; };
+   __extension__ struct{ cl_char  s0, s1; };
+   __extension__ struct{ cl_char  lo, hi; };
+#endif
+#if defined( __CL_CHAR2__) 
+    __cl_char2     v2;
+#endif
+}cl_char2;
+
+typedef union
+{
+    cl_char  CL_ALIGNED(4) s[4];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_char  x, y, z, w; };
+   __extension__ struct{ cl_char  s0, s1, s2, s3; };
+   __extension__ struct{ cl_char2 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__) 
+    __cl_char2     v2[2];
+#endif
+#if defined( __CL_CHAR4__) 
+    __cl_char4     v4;
+#endif
+}cl_char4;
+
+/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */
+typedef  cl_char4  cl_char3;
+
+typedef union
+{
+    cl_char   CL_ALIGNED(8) s[8];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_char  x, y, z, w; };
+   __extension__ struct{ cl_char  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_char4 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__) 
+    __cl_char2     v2[4];
+#endif
+#if defined( __CL_CHAR4__) 
+    __cl_char4     v4[2];
+#endif
+#if defined( __CL_CHAR8__ )
+    __cl_char8     v8;
+#endif
+}cl_char8;
+
+typedef union
+{
+    cl_char  CL_ALIGNED(16) s[16];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_char  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_char  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_char8 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__) 
+    __cl_char2     v2[8];
+#endif
+#if defined( __CL_CHAR4__) 
+    __cl_char4     v4[4];
+#endif
+#if defined( __CL_CHAR8__ )
+    __cl_char8     v8[2];
+#endif
+#if defined( __CL_CHAR16__ )
+    __cl_char16    v16;
+#endif
+}cl_char16;
+
+
+/* ---- cl_ucharn ---- */
+typedef union
+{
+    cl_uchar  CL_ALIGNED(2) s[2];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_uchar  x, y; };
+   __extension__ struct{ cl_uchar  s0, s1; };
+   __extension__ struct{ cl_uchar  lo, hi; };
+#endif
+#if defined( __cl_uchar2__) 
+    __cl_uchar2     v2;
+#endif
+}cl_uchar2;
+
+typedef union
+{
+    cl_uchar  CL_ALIGNED(4) s[4];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_uchar  x, y, z, w; };
+   __extension__ struct{ cl_uchar  s0, s1, s2, s3; };
+   __extension__ struct{ cl_uchar2 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__) 
+    __cl_uchar2     v2[2];
+#endif
+#if defined( __CL_UCHAR4__) 
+    __cl_uchar4     v4;
+#endif
+}cl_uchar4;
+
+/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */
+typedef  cl_uchar4  cl_uchar3;
+
+typedef union
+{
+    cl_uchar   CL_ALIGNED(8) s[8];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_uchar  x, y, z, w; };
+   __extension__ struct{ cl_uchar  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_uchar4 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__) 
+    __cl_uchar2     v2[4];
+#endif
+#if defined( __CL_UCHAR4__) 
+    __cl_uchar4     v4[2];
+#endif
+#if defined( __CL_UCHAR8__ )
+    __cl_uchar8     v8;
+#endif
+}cl_uchar8;
+
+typedef union
+{
+    cl_uchar  CL_ALIGNED(16) s[16];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_uchar  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_uchar  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_uchar8 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__) 
+    __cl_uchar2     v2[8];
+#endif
+#if defined( __CL_UCHAR4__) 
+    __cl_uchar4     v4[4];
+#endif
+#if defined( __CL_UCHAR8__ )
+    __cl_uchar8     v8[2];
+#endif
+#if defined( __CL_UCHAR16__ )
+    __cl_uchar16    v16;
+#endif
+}cl_uchar16;
+
+
+/* ---- cl_shortn ---- */
+typedef union
+{
+    cl_short  CL_ALIGNED(4) s[2];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_short  x, y; };
+   __extension__ struct{ cl_short  s0, s1; };
+   __extension__ struct{ cl_short  lo, hi; };
+#endif
+#if defined( __CL_SHORT2__) 
+    __cl_short2     v2;
+#endif
+}cl_short2;
+
+typedef union
+{
+    cl_short  CL_ALIGNED(8) s[4];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_short  x, y, z, w; };
+   __extension__ struct{ cl_short  s0, s1, s2, s3; };
+   __extension__ struct{ cl_short2 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__) 
+    __cl_short2     v2[2];
+#endif
+#if defined( __CL_SHORT4__) 
+    __cl_short4     v4;
+#endif
+}cl_short4;
+
+/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */
+typedef  cl_short4  cl_short3;
+
+typedef union
+{
+    cl_short   CL_ALIGNED(16) s[8];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_short  x, y, z, w; };
+   __extension__ struct{ cl_short  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_short4 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__) 
+    __cl_short2     v2[4];
+#endif
+#if defined( __CL_SHORT4__) 
+    __cl_short4     v4[2];
+#endif
+#if defined( __CL_SHORT8__ )
+    __cl_short8     v8;
+#endif
+}cl_short8;
+
+typedef union
+{
+    cl_short  CL_ALIGNED(32) s[16];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_short  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_short  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_short8 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__) 
+    __cl_short2     v2[8];
+#endif
+#if defined( __CL_SHORT4__) 
+    __cl_short4     v4[4];
+#endif
+#if defined( __CL_SHORT8__ )
+    __cl_short8     v8[2];
+#endif
+#if defined( __CL_SHORT16__ )
+    __cl_short16    v16;
+#endif
+}cl_short16;
+
+
+/* ---- cl_ushortn ---- */
+typedef union
+{
+    cl_ushort  CL_ALIGNED(4) s[2];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_ushort  x, y; };
+   __extension__ struct{ cl_ushort  s0, s1; };
+   __extension__ struct{ cl_ushort  lo, hi; };
+#endif
+#if defined( __CL_USHORT2__) 
+    __cl_ushort2     v2;
+#endif
+}cl_ushort2;
+
+typedef union
+{
+    cl_ushort  CL_ALIGNED(8) s[4];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_ushort  x, y, z, w; };
+   __extension__ struct{ cl_ushort  s0, s1, s2, s3; };
+   __extension__ struct{ cl_ushort2 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__) 
+    __cl_ushort2     v2[2];
+#endif
+#if defined( __CL_USHORT4__) 
+    __cl_ushort4     v4;
+#endif
+}cl_ushort4;
+
+/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */
+typedef  cl_ushort4  cl_ushort3;
+
+typedef union
+{
+    cl_ushort   CL_ALIGNED(16) s[8];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_ushort  x, y, z, w; };
+   __extension__ struct{ cl_ushort  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_ushort4 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__) 
+    __cl_ushort2     v2[4];
+#endif
+#if defined( __CL_USHORT4__) 
+    __cl_ushort4     v4[2];
+#endif
+#if defined( __CL_USHORT8__ )
+    __cl_ushort8     v8;
+#endif
+}cl_ushort8;
+
+typedef union
+{
+    cl_ushort  CL_ALIGNED(32) s[16];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_ushort  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_ushort  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_ushort8 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__) 
+    __cl_ushort2     v2[8];
+#endif
+#if defined( __CL_USHORT4__) 
+    __cl_ushort4     v4[4];
+#endif
+#if defined( __CL_USHORT8__ )
+    __cl_ushort8     v8[2];
+#endif
+#if defined( __CL_USHORT16__ )
+    __cl_ushort16    v16;
+#endif
+}cl_ushort16;
+
+/* ---- cl_intn ---- */
+typedef union
+{
+    cl_int  CL_ALIGNED(8) s[2];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_int  x, y; };
+   __extension__ struct{ cl_int  s0, s1; };
+   __extension__ struct{ cl_int  lo, hi; };
+#endif
+#if defined( __CL_INT2__) 
+    __cl_int2     v2;
+#endif
+}cl_int2;
+
+typedef union
+{
+    cl_int  CL_ALIGNED(16) s[4];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_int  x, y, z, w; };
+   __extension__ struct{ cl_int  s0, s1, s2, s3; };
+   __extension__ struct{ cl_int2 lo, hi; };
+#endif
+#if defined( __CL_INT2__) 
+    __cl_int2     v2[2];
+#endif
+#if defined( __CL_INT4__) 
+    __cl_int4     v4;
+#endif
+}cl_int4;
+
+/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */
+typedef  cl_int4  cl_int3;
+
+typedef union
+{
+    cl_int   CL_ALIGNED(32) s[8];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_int  x, y, z, w; };
+   __extension__ struct{ cl_int  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_int4 lo, hi; };
+#endif
+#if defined( __CL_INT2__) 
+    __cl_int2     v2[4];
+#endif
+#if defined( __CL_INT4__) 
+    __cl_int4     v4[2];
+#endif
+#if defined( __CL_INT8__ )
+    __cl_int8     v8;
+#endif
+}cl_int8;
+
+typedef union
+{
+    cl_int  CL_ALIGNED(64) s[16];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_int  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_int  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_int8 lo, hi; };
+#endif
+#if defined( __CL_INT2__) 
+    __cl_int2     v2[8];
+#endif
+#if defined( __CL_INT4__) 
+    __cl_int4     v4[4];
+#endif
+#if defined( __CL_INT8__ )
+    __cl_int8     v8[2];
+#endif
+#if defined( __CL_INT16__ )
+    __cl_int16    v16;
+#endif
+}cl_int16;
+
+
+/* ---- cl_uintn ---- */
+typedef union
+{
+    cl_uint  CL_ALIGNED(8) s[2];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_uint  x, y; };
+   __extension__ struct{ cl_uint  s0, s1; };
+   __extension__ struct{ cl_uint  lo, hi; };
+#endif
+#if defined( __CL_UINT2__) 
+    __cl_uint2     v2;
+#endif
+}cl_uint2;
+
+typedef union
+{
+    cl_uint  CL_ALIGNED(16) s[4];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_uint  x, y, z, w; };
+   __extension__ struct{ cl_uint  s0, s1, s2, s3; };
+   __extension__ struct{ cl_uint2 lo, hi; };
+#endif
+#if defined( __CL_UINT2__) 
+    __cl_uint2     v2[2];
+#endif
+#if defined( __CL_UINT4__) 
+    __cl_uint4     v4;
+#endif
+}cl_uint4;
+
+/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */
+typedef  cl_uint4  cl_uint3;
+
+typedef union
+{
+    cl_uint   CL_ALIGNED(32) s[8];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_uint  x, y, z, w; };
+   __extension__ struct{ cl_uint  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_uint4 lo, hi; };
+#endif
+#if defined( __CL_UINT2__) 
+    __cl_uint2     v2[4];
+#endif
+#if defined( __CL_UINT4__) 
+    __cl_uint4     v4[2];
+#endif
+#if defined( __CL_UINT8__ )
+    __cl_uint8     v8;
+#endif
+}cl_uint8;
+
+typedef union
+{
+    cl_uint  CL_ALIGNED(64) s[16];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_uint  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_uint  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_uint8 lo, hi; };
+#endif
+#if defined( __CL_UINT2__) 
+    __cl_uint2     v2[8];
+#endif
+#if defined( __CL_UINT4__) 
+    __cl_uint4     v4[4];
+#endif
+#if defined( __CL_UINT8__ )
+    __cl_uint8     v8[2];
+#endif
+#if defined( __CL_UINT16__ )
+    __cl_uint16    v16;
+#endif
+}cl_uint16;
+
+/* ---- cl_longn ---- */
+typedef union
+{
+    cl_long  CL_ALIGNED(16) s[2];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_long  x, y; };
+   __extension__ struct{ cl_long  s0, s1; };
+   __extension__ struct{ cl_long  lo, hi; };
+#endif
+#if defined( __CL_LONG2__) 
+    __cl_long2     v2;
+#endif
+}cl_long2;
+
+typedef union
+{
+    cl_long  CL_ALIGNED(32) s[4];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_long  x, y, z, w; };
+   __extension__ struct{ cl_long  s0, s1, s2, s3; };
+   __extension__ struct{ cl_long2 lo, hi; };
+#endif
+#if defined( __CL_LONG2__) 
+    __cl_long2     v2[2];
+#endif
+#if defined( __CL_LONG4__) 
+    __cl_long4     v4;
+#endif
+}cl_long4;
+
+/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */
+typedef  cl_long4  cl_long3;
+
+typedef union
+{
+    cl_long   CL_ALIGNED(64) s[8];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_long  x, y, z, w; };
+   __extension__ struct{ cl_long  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_long4 lo, hi; };
+#endif
+#if defined( __CL_LONG2__) 
+    __cl_long2     v2[4];
+#endif
+#if defined( __CL_LONG4__) 
+    __cl_long4     v4[2];
+#endif
+#if defined( __CL_LONG8__ )
+    __cl_long8     v8;
+#endif
+}cl_long8;
+
+typedef union
+{
+    cl_long  CL_ALIGNED(128) s[16];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_long  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_long  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_long8 lo, hi; };
+#endif
+#if defined( __CL_LONG2__) 
+    __cl_long2     v2[8];
+#endif
+#if defined( __CL_LONG4__) 
+    __cl_long4     v4[4];
+#endif
+#if defined( __CL_LONG8__ )
+    __cl_long8     v8[2];
+#endif
+#if defined( __CL_LONG16__ )
+    __cl_long16    v16;
+#endif
+}cl_long16;
+
+
+/* ---- cl_ulongn ---- */
+typedef union
+{
+    cl_ulong  CL_ALIGNED(16) s[2];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_ulong  x, y; };
+   __extension__ struct{ cl_ulong  s0, s1; };
+   __extension__ struct{ cl_ulong  lo, hi; };
+#endif
+#if defined( __CL_ULONG2__) 
+    __cl_ulong2     v2;
+#endif
+}cl_ulong2;
+
+typedef union
+{
+    cl_ulong  CL_ALIGNED(32) s[4];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_ulong  x, y, z, w; };
+   __extension__ struct{ cl_ulong  s0, s1, s2, s3; };
+   __extension__ struct{ cl_ulong2 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__) 
+    __cl_ulong2     v2[2];
+#endif
+#if defined( __CL_ULONG4__) 
+    __cl_ulong4     v4;
+#endif
+}cl_ulong4;
+
+/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */
+typedef  cl_ulong4  cl_ulong3;
+
+typedef union
+{
+    cl_ulong   CL_ALIGNED(64) s[8];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_ulong  x, y, z, w; };
+   __extension__ struct{ cl_ulong  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_ulong4 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__) 
+    __cl_ulong2     v2[4];
+#endif
+#if defined( __CL_ULONG4__) 
+    __cl_ulong4     v4[2];
+#endif
+#if defined( __CL_ULONG8__ )
+    __cl_ulong8     v8;
+#endif
+}cl_ulong8;
+
+typedef union
+{
+    cl_ulong  CL_ALIGNED(128) s[16];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_ulong  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_ulong  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_ulong8 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__) 
+    __cl_ulong2     v2[8];
+#endif
+#if defined( __CL_ULONG4__) 
+    __cl_ulong4     v4[4];
+#endif
+#if defined( __CL_ULONG8__ )
+    __cl_ulong8     v8[2];
+#endif
+#if defined( __CL_ULONG16__ )
+    __cl_ulong16    v16;
+#endif
+}cl_ulong16;
+
+
+/* --- cl_floatn ---- */
+
+typedef union
+{
+    cl_float  CL_ALIGNED(8) s[2];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_float  x, y; };
+   __extension__ struct{ cl_float  s0, s1; };
+   __extension__ struct{ cl_float  lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__) 
+    __cl_float2     v2;
+#endif
+}cl_float2;
+
+typedef union
+{
+    cl_float  CL_ALIGNED(16) s[4];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_float   x, y, z, w; };
+   __extension__ struct{ cl_float   s0, s1, s2, s3; };
+   __extension__ struct{ cl_float2  lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__) 
+    __cl_float2     v2[2];
+#endif
+#if defined( __CL_FLOAT4__) 
+    __cl_float4     v4;
+#endif
+}cl_float4;
+
+/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */
+typedef  cl_float4  cl_float3;
+
+typedef union
+{
+    cl_float   CL_ALIGNED(32) s[8];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_float   x, y, z, w; };
+   __extension__ struct{ cl_float   s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_float4  lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__) 
+    __cl_float2     v2[4];
+#endif
+#if defined( __CL_FLOAT4__) 
+    __cl_float4     v4[2];
+#endif
+#if defined( __CL_FLOAT8__ )
+    __cl_float8     v8;
+#endif
+}cl_float8;
+
+typedef union
+{
+    cl_float  CL_ALIGNED(64) s[16];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_float  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_float  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_float8 lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__) 
+    __cl_float2     v2[8];
+#endif
+#if defined( __CL_FLOAT4__) 
+    __cl_float4     v4[4];
+#endif
+#if defined( __CL_FLOAT8__ )
+    __cl_float8     v8[2];
+#endif
+#if defined( __CL_FLOAT16__ )
+    __cl_float16    v16;
+#endif
+}cl_float16;
+
+/* --- cl_doublen ---- */
+
+typedef union
+{
+    cl_double  CL_ALIGNED(16) s[2];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_double  x, y; };
+   __extension__ struct{ cl_double s0, s1; };
+   __extension__ struct{ cl_double lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__) 
+    __cl_double2     v2;
+#endif
+}cl_double2;
+
+typedef union
+{
+    cl_double  CL_ALIGNED(32) s[4];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_double  x, y, z, w; };
+   __extension__ struct{ cl_double  s0, s1, s2, s3; };
+   __extension__ struct{ cl_double2 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__) 
+    __cl_double2     v2[2];
+#endif
+#if defined( __CL_DOUBLE4__) 
+    __cl_double4     v4;
+#endif
+}cl_double4;
+
+/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */
+typedef  cl_double4  cl_double3;
+
+typedef union
+{
+    cl_double   CL_ALIGNED(64) s[8];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_double  x, y, z, w; };
+   __extension__ struct{ cl_double  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_double4 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__) 
+    __cl_double2     v2[4];
+#endif
+#if defined( __CL_DOUBLE4__) 
+    __cl_double4     v4[2];
+#endif
+#if defined( __CL_DOUBLE8__ )
+    __cl_double8     v8;
+#endif
+}cl_double8;
+
+typedef union
+{
+    cl_double  CL_ALIGNED(128) s[16];
+#if defined( CL_NAMED_STRUCT_SUPPORTED )
+   __extension__ struct{ cl_double  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_double  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_double8 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__) 
+    __cl_double2     v2[8];
+#endif
+#if defined( __CL_DOUBLE4__) 
+    __cl_double4     v4[4];
+#endif
+#if defined( __CL_DOUBLE8__ )
+    __cl_double8     v8[2];
+#endif
+#if defined( __CL_DOUBLE16__ )
+    __cl_double16    v16;
+#endif
+}cl_double16;
+
+/* Macro to facilitate debugging 
+ * Usage:
+ *   Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. 
+ *   The first line ends with:   CL_PROGRAM_STRING_BEGIN \"
+ *   Each line thereafter of OpenCL C source must end with: \n\
+ *   The last line ends in ";
+ *
+ *   Example:
+ *
+ *   const char *my_program = CL_PROGRAM_STRING_BEGIN "\
+ *   kernel void foo( int a, float * b )             \n\
+ *   {                                               \n\
+ *      // my comment                                \n\
+ *      *b[ get_global_id(0)] = a;                   \n\
+ *   }                                               \n\
+ *   ";
+ *
+ * This should correctly set up the line, (column) and file information for your source 
+ * string so you can do source level debugging.
+ */
+#define  __CL_STRINGIFY( _x )               # _x
+#define  _CL_STRINGIFY( _x )                __CL_STRINGIFY( _x )
+#define  CL_PROGRAM_STRING_DEBUG_INFO       "#line "  _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" 
+
+//  CL.h contents
+/******************************************************************************/
+
+typedef struct _cl_platform_id *    cl_platform_id;
+typedef struct _cl_device_id *      cl_device_id;
+typedef struct _cl_context *        cl_context;
+typedef struct _cl_command_queue *  cl_command_queue;
+typedef struct _cl_mem *            cl_mem;
+typedef struct _cl_program *        cl_program;
+typedef struct _cl_kernel *         cl_kernel;
+typedef struct _cl_event *          cl_event;
+typedef struct _cl_sampler *        cl_sampler;
+
+typedef cl_uint             cl_bool;                     /* WARNING!  Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ 
+typedef cl_ulong            cl_bitfield;
+typedef cl_bitfield         cl_device_type;
+typedef cl_uint             cl_platform_info;
+typedef cl_uint             cl_device_info;
+typedef cl_bitfield         cl_device_fp_config;
+typedef cl_uint             cl_device_mem_cache_type;
+typedef cl_uint             cl_device_local_mem_type;
+typedef cl_bitfield         cl_device_exec_capabilities;
+typedef cl_bitfield         cl_command_queue_properties;
+typedef intptr_t            cl_device_partition_property;
+typedef cl_bitfield         cl_device_affinity_domain;
+
+typedef intptr_t                       cl_context_properties;
+typedef cl_uint             cl_context_info;
+typedef cl_uint             cl_command_queue_info;
+typedef cl_uint             cl_channel_order;
+typedef cl_uint             cl_channel_type;
+typedef cl_bitfield         cl_mem_flags;
+typedef cl_uint             cl_mem_object_type;
+typedef cl_uint             cl_mem_info;
+typedef cl_uint             cl_image_info;
+typedef cl_uint             cl_buffer_create_type;
+typedef cl_uint             cl_addressing_mode;
+typedef cl_uint             cl_filter_mode;
+typedef cl_uint             cl_sampler_info;
+typedef cl_bitfield         cl_map_flags;
+typedef cl_uint             cl_program_info;
+typedef cl_uint             cl_program_build_info;
+typedef cl_int              cl_build_status;
+typedef cl_uint             cl_kernel_info;
+typedef cl_uint             cl_kernel_work_group_info;
+typedef cl_uint             cl_event_info;
+typedef cl_uint             cl_command_type;
+typedef cl_uint             cl_profiling_info;
+
+typedef struct _cl_image_format {
+    cl_channel_order        image_channel_order;
+    cl_channel_type         image_channel_data_type;
+} cl_image_format;
+
+typedef struct _cl_image_desc {
+       cl_mem_object_type      image_type;
+       size_t                  image_width;
+       size_t                  image_height;
+       size_t                  image_depth;
+       size_t                  image_array_size;
+       size_t                  image_row_pitch;
+       size_t                  image_slice_pitch;
+       cl_uint                 num_mip_levels;
+       cl_uint                 num_samples;
+       cl_mem                  buffer;
+   } cl_image_desc;
+
+typedef struct _cl_buffer_region {
+    size_t                  origin;
+    size_t                  size;
+} cl_buffer_region;
+
+/******************************************************************************/
+
+/* Error Codes */
+#define CL_SUCCESS                                  0
+#define CL_DEVICE_NOT_FOUND                         -1
+#define CL_DEVICE_NOT_AVAILABLE                     -2
+#define CL_COMPILER_NOT_AVAILABLE                   -3
+#define CL_MEM_OBJECT_ALLOCATION_FAILURE            -4
+#define CL_OUT_OF_RESOURCES                         -5
+#define CL_OUT_OF_HOST_MEMORY                       -6
+#define CL_PROFILING_INFO_NOT_AVAILABLE             -7
+#define CL_MEM_COPY_OVERLAP                         -8
+#define CL_IMAGE_FORMAT_MISMATCH                    -9
+#define CL_IMAGE_FORMAT_NOT_SUPPORTED               -10
+#define CL_BUILD_PROGRAM_FAILURE                    -11
+#define CL_MAP_FAILURE                              -12
+#define CL_MISALIGNED_SUB_BUFFER_OFFSET             -13
+#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
+#define CL_COMPILE_PROGRAM_FAILURE                  -15
+#define CL_LINKER_NOT_AVAILABLE                     -16
+#define CL_LINK_PROGRAM_FAILURE                     -17
+#define CL_DEVICE_PARTITION_FAILED                  -18
+#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE            -19
+
+#define CL_INVALID_VALUE                            -30
+#define CL_INVALID_DEVICE_TYPE                      -31
+#define CL_INVALID_PLATFORM                         -32
+#define CL_INVALID_DEVICE                           -33
+#define CL_INVALID_CONTEXT                          -34
+#define CL_INVALID_QUEUE_PROPERTIES                 -35
+#define CL_INVALID_COMMAND_QUEUE                    -36
+#define CL_INVALID_HOST_PTR                         -37
+#define CL_INVALID_MEM_OBJECT                       -38
+#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR          -39
+#define CL_INVALID_IMAGE_SIZE                       -40
+#define CL_INVALID_SAMPLER                          -41
+#define CL_INVALID_BINARY                           -42
+#define CL_INVALID_BUILD_OPTIONS                    -43
+#define CL_INVALID_PROGRAM                          -44
+#define CL_INVALID_PROGRAM_EXECUTABLE               -45
+#define CL_INVALID_KERNEL_NAME                      -46
+#define CL_INVALID_KERNEL_DEFINITION                -47
+#define CL_INVALID_KERNEL                           -48
+#define CL_INVALID_ARG_INDEX                        -49
+#define CL_INVALID_ARG_VALUE                        -50
+#define CL_INVALID_ARG_SIZE                         -51
+#define CL_INVALID_KERNEL_ARGS                      -52
+#define CL_INVALID_WORK_DIMENSION                   -53
+#define CL_INVALID_WORK_GROUP_SIZE                  -54
+#define CL_INVALID_WORK_ITEM_SIZE                   -55
+#define CL_INVALID_GLOBAL_OFFSET                    -56
+#define CL_INVALID_EVENT_WAIT_LIST                  -57
+#define CL_INVALID_EVENT                            -58
+#define CL_INVALID_OPERATION                        -59
+#define CL_INVALID_GL_OBJECT                        -60
+#define CL_INVALID_BUFFER_SIZE                      -61
+#define CL_INVALID_MIP_LEVEL                        -62
+#define CL_INVALID_GLOBAL_WORK_SIZE                 -63
+#define CL_INVALID_PROPERTY                         -64
+#define CL_INVALID_IMAGE_DESCRIPTOR                 -65
+#define CL_INVALID_COMPILER_OPTIONS                 -66
+#define CL_INVALID_LINKER_OPTIONS                   -67
+#define CL_INVALID_DEVICE_PARTITION_COUNT           -68
+
+/* OpenCL Version */
+#define CL_VERSION_1_0                              1
+#define CL_VERSION_1_1                              1
+#define CL_VERSION_1_2                              1
+
+/* cl_bool */
+#define CL_FALSE                                    0
+#define CL_TRUE                                     1
+#define CL_BLOCKING                                 CL_TRUE
+#define CL_NON_BLOCKING                             CL_FALSE
+
+/* cl_platform_info */
+#define CL_PLATFORM_PROFILE                         0x0900
+#define CL_PLATFORM_VERSION                         0x0901
+#define CL_PLATFORM_NAME                            0x0902
+#define CL_PLATFORM_VENDOR                          0x0903
+#define CL_PLATFORM_EXTENSIONS                      0x0904
+
+/* cl_device_type - bitfield */
+#define CL_DEVICE_TYPE_DEFAULT                      (1 << 0)
+#define CL_DEVICE_TYPE_CPU                          (1 << 1)
+#define CL_DEVICE_TYPE_GPU                          (1 << 2)
+#define CL_DEVICE_TYPE_ACCELERATOR                  (1 << 3)
+#define CL_DEVICE_TYPE_CUSTOM                       (1 << 4)
+#define CL_DEVICE_TYPE_ALL                          0xFFFFFFFF
+
+/* cl_device_info */
+#define CL_DEVICE_TYPE                              0x1000
+#define CL_DEVICE_VENDOR_ID                         0x1001
+#define CL_DEVICE_MAX_COMPUTE_UNITS                 0x1002
+#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS          0x1003
+#define CL_DEVICE_MAX_WORK_GROUP_SIZE               0x1004
+#define CL_DEVICE_MAX_WORK_ITEM_SIZES               0x1005
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR       0x1006
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT      0x1007
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT        0x1008
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG       0x1009
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT      0x100A
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE     0x100B
+#define CL_DEVICE_MAX_CLOCK_FREQUENCY               0x100C
+#define CL_DEVICE_ADDRESS_BITS                      0x100D
+#define CL_DEVICE_MAX_READ_IMAGE_ARGS               0x100E
+#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS              0x100F
+#define CL_DEVICE_MAX_MEM_ALLOC_SIZE                0x1010
+#define CL_DEVICE_IMAGE2D_MAX_WIDTH                 0x1011
+#define CL_DEVICE_IMAGE2D_MAX_HEIGHT                0x1012
+#define CL_DEVICE_IMAGE3D_MAX_WIDTH                 0x1013
+#define CL_DEVICE_IMAGE3D_MAX_HEIGHT                0x1014
+#define CL_DEVICE_IMAGE3D_MAX_DEPTH                 0x1015
+#define CL_DEVICE_IMAGE_SUPPORT                     0x1016
+#define CL_DEVICE_MAX_PARAMETER_SIZE                0x1017
+#define CL_DEVICE_MAX_SAMPLERS                      0x1018
+#define CL_DEVICE_MEM_BASE_ADDR_ALIGN               0x1019
+#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE          0x101A
+#define CL_DEVICE_SINGLE_FP_CONFIG                  0x101B
+#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE             0x101C
+#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE         0x101D
+#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE             0x101E
+#define CL_DEVICE_GLOBAL_MEM_SIZE                   0x101F
+#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE          0x1020
+#define CL_DEVICE_MAX_CONSTANT_ARGS                 0x1021
+#define CL_DEVICE_LOCAL_MEM_TYPE                    0x1022
+#define CL_DEVICE_LOCAL_MEM_SIZE                    0x1023
+#define CL_DEVICE_ERROR_CORRECTION_SUPPORT          0x1024
+#define CL_DEVICE_PROFILING_TIMER_RESOLUTION        0x1025
+#define CL_DEVICE_ENDIAN_LITTLE                     0x1026
+#define CL_DEVICE_AVAILABLE                         0x1027
+#define CL_DEVICE_COMPILER_AVAILABLE                0x1028
+#define CL_DEVICE_EXECUTION_CAPABILITIES            0x1029
+#define CL_DEVICE_QUEUE_PROPERTIES                  0x102A
+#define CL_DEVICE_NAME                              0x102B
+#define CL_DEVICE_VENDOR                            0x102C
+#define CL_DRIVER_VERSION                           0x102D
+#define CL_DEVICE_PROFILE                           0x102E
+#define CL_DEVICE_VERSION                           0x102F
+#define CL_DEVICE_EXTENSIONS                        0x1030
+#define CL_DEVICE_PLATFORM                          0x1031
+#define CL_DEVICE_DOUBLE_FP_CONFIG                  0x1032
+/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF       0x1034
+#define CL_DEVICE_HOST_UNIFIED_MEMORY               0x1035
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR          0x1036
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT         0x1037
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT           0x1038
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG          0x1039
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT         0x103A
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE        0x103B
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF          0x103C
+#define CL_DEVICE_OPENCL_C_VERSION                  0x103D
+#define CL_DEVICE_LINKER_AVAILABLE                  0x103E
+#define CL_DEVICE_BUILT_IN_KERNELS                  0x103F
+#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE             0x1040
+#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE              0x1041
+#define CL_DEVICE_PARENT_DEVICE                     0x1042
+#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES         0x1043
+#define CL_DEVICE_PARTITION_PROPERTIES              0x1044
+#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN         0x1045
+#define CL_DEVICE_PARTITION_TYPE                    0x1046
+#define CL_DEVICE_REFERENCE_COUNT                   0x1047
+#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC       0x1048
+#define CL_DEVICE_PRINTF_BUFFER_SIZE                0x1049
+#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT             0x104A
+#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT      0x104B
+
+/* cl_device_fp_config - bitfield */
+#define CL_FP_DENORM                                (1 << 0)
+#define CL_FP_INF_NAN                               (1 << 1)
+#define CL_FP_ROUND_TO_NEAREST                      (1 << 2)
+#define CL_FP_ROUND_TO_ZERO                         (1 << 3)
+#define CL_FP_ROUND_TO_INF                          (1 << 4)
+#define CL_FP_FMA                                   (1 << 5)
+#define CL_FP_SOFT_FLOAT                            (1 << 6)
+#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT         (1 << 7)
+
+/* cl_device_mem_cache_type */
+#define CL_NONE                                     0x0
+#define CL_READ_ONLY_CACHE                          0x1
+#define CL_READ_WRITE_CACHE                         0x2
+
+/* cl_device_local_mem_type */
+#define CL_LOCAL                                    0x1
+#define CL_GLOBAL                                   0x2
+
+/* cl_device_exec_capabilities - bitfield */
+#define CL_EXEC_KERNEL                              (1 << 0)
+#define CL_EXEC_NATIVE_KERNEL                       (1 << 1)
+
+/* cl_command_queue_properties - bitfield */
+#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE      (1 << 0)
+#define CL_QUEUE_PROFILING_ENABLE                   (1 << 1)
+
+/* cl_context_info  */
+#define CL_CONTEXT_REFERENCE_COUNT                  0x1080
+#define CL_CONTEXT_DEVICES                          0x1081
+#define CL_CONTEXT_PROPERTIES                       0x1082
+#define CL_CONTEXT_NUM_DEVICES                      0x1083
+
+/* cl_context_info + cl_context_properties */
+#define CL_CONTEXT_PLATFORM                         0x1084
+#define CL_CONTEXT_INTEROP_USER_SYNC                0x1085
+
+/* cl_device_partition_property */
+#define CL_DEVICE_PARTITION_EQUALLY                 0x1086
+#define CL_DEVICE_PARTITION_BY_COUNTS               0x1087
+#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END      0x0
+#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN      0x1088
+
+/* cl_device_affinity_domain */
+#define CL_DEVICE_AFFINITY_DOMAIN_NUMA                     (1 << 0)
+#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE                 (1 << 1)
+#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE                 (1 << 2)
+#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE                 (1 << 3)
+#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE                 (1 << 4)
+#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE       (1 << 5)
+
+/* cl_command_queue_info */
+#define CL_QUEUE_CONTEXT                            0x1090
+#define CL_QUEUE_DEVICE                             0x1091
+#define CL_QUEUE_REFERENCE_COUNT                    0x1092
+#define CL_QUEUE_PROPERTIES                         0x1093
+
+/* cl_mem_flags - bitfield */
+#define CL_MEM_READ_WRITE                           (1 << 0)
+#define CL_MEM_WRITE_ONLY                           (1 << 1)
+#define CL_MEM_READ_ONLY                            (1 << 2)
+#define CL_MEM_USE_HOST_PTR                         (1 << 3)
+#define CL_MEM_ALLOC_HOST_PTR                       (1 << 4)
+#define CL_MEM_COPY_HOST_PTR                        (1 << 5)
+// reserved                                         (1 << 6)
+#define CL_MEM_HOST_WRITE_ONLY                      (1 << 7)
+#define CL_MEM_HOST_READ_ONLY                       (1 << 8)
+#define CL_MEM_HOST_NO_ACCESS                       (1 << 9)
+
+/* cl_mem_migration_flags - bitfield */
+#define CL_MIGRATE_MEM_OBJECT_HOST                  (1 << 0)
+#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED     (1 << 1)
+
+/* cl_channel_order */
+#define CL_R                                        0x10B0
+#define CL_A                                        0x10B1
+#define CL_RG                                       0x10B2
+#define CL_RA                                       0x10B3
+#define CL_RGB                                      0x10B4
+#define CL_RGBA                                     0x10B5
+#define CL_BGRA                                     0x10B6
+#define CL_ARGB                                     0x10B7
+#define CL_INTENSITY                                0x10B8
+#define CL_LUMINANCE                                0x10B9
+#define CL_Rx                                       0x10BA
+#define CL_RGx                                      0x10BB
+#define CL_RGBx                                     0x10BC
+#define CL_DEPTH                                    0x10BD
+#define CL_DEPTH_STENCIL                            0x10BE
+
+/* cl_channel_type */
+#define CL_SNORM_INT8                               0x10D0
+#define CL_SNORM_INT16                              0x10D1
+#define CL_UNORM_INT8                               0x10D2
+#define CL_UNORM_INT16                              0x10D3
+#define CL_UNORM_SHORT_565                          0x10D4
+#define CL_UNORM_SHORT_555                          0x10D5
+#define CL_UNORM_INT_101010                         0x10D6
+#define CL_SIGNED_INT8                              0x10D7
+#define CL_SIGNED_INT16                             0x10D8
+#define CL_SIGNED_INT32                             0x10D9
+#define CL_UNSIGNED_INT8                            0x10DA
+#define CL_UNSIGNED_INT16                           0x10DB
+#define CL_UNSIGNED_INT32                           0x10DC
+#define CL_HALF_FLOAT                               0x10DD
+#define CL_FLOAT                                    0x10DE
+#define CL_UNORM_INT24                              0x10DF
+
+/* cl_mem_object_type */
+#define CL_MEM_OBJECT_BUFFER                        0x10F0
+#define CL_MEM_OBJECT_IMAGE2D                       0x10F1
+#define CL_MEM_OBJECT_IMAGE3D                       0x10F2
+#define CL_MEM_OBJECT_IMAGE2D_ARRAY                 0x10F3
+#define CL_MEM_OBJECT_IMAGE1D                       0x10F4
+#define CL_MEM_OBJECT_IMAGE1D_ARRAY                 0x10F5
+#define CL_MEM_OBJECT_IMAGE1D_BUFFER                0x10F6
+
+/* cl_mem_info */
+#define CL_MEM_TYPE                                 0x1100
+#define CL_MEM_FLAGS                                0x1101
+#define CL_MEM_SIZE                                 0x1102
+#define CL_MEM_HOST_PTR                             0x1103
+#define CL_MEM_MAP_COUNT                            0x1104
+#define CL_MEM_REFERENCE_COUNT                      0x1105
+#define CL_MEM_CONTEXT                              0x1106
+#define CL_MEM_ASSOCIATED_MEMOBJECT                 0x1107
+#define CL_MEM_OFFSET                               0x1108
+
+/* cl_image_info */
+#define CL_IMAGE_FORMAT                             0x1110
+#define CL_IMAGE_ELEMENT_SIZE                       0x1111
+#define CL_IMAGE_ROW_PITCH                          0x1112
+#define CL_IMAGE_SLICE_PITCH                        0x1113
+#define CL_IMAGE_WIDTH                              0x1114
+#define CL_IMAGE_HEIGHT                             0x1115
+#define CL_IMAGE_DEPTH                              0x1116
+#define CL_IMAGE_ARRAY_SIZE                         0x1117
+#define CL_IMAGE_BUFFER                             0x1118
+#define CL_IMAGE_NUM_MIP_LEVELS                     0x1119
+#define CL_IMAGE_NUM_SAMPLES                        0x111A
+
+/* cl_addressing_mode */
+#define CL_ADDRESS_NONE                             0x1130
+#define CL_ADDRESS_CLAMP_TO_EDGE                    0x1131
+#define CL_ADDRESS_CLAMP                            0x1132
+#define CL_ADDRESS_REPEAT                           0x1133
+#define CL_ADDRESS_MIRRORED_REPEAT                  0x1134
+
+/* cl_filter_mode */
+#define CL_FILTER_NEAREST                           0x1140
+#define CL_FILTER_LINEAR                            0x1141
+
+/* cl_sampler_info */
+#define CL_SAMPLER_REFERENCE_COUNT                  0x1150
+#define CL_SAMPLER_CONTEXT                          0x1151
+#define CL_SAMPLER_NORMALIZED_COORDS                0x1152
+#define CL_SAMPLER_ADDRESSING_MODE                  0x1153
+#define CL_SAMPLER_FILTER_MODE                      0x1154
+
+/* cl_map_flags - bitfield */
+#define CL_MAP_READ                                 (1 << 0)
+#define CL_MAP_WRITE                                (1 << 1)
+#define CL_MAP_WRITE_INVALIDATE_REGION              (1 << 2)
+
+/* cl_program_info */
+#define CL_PROGRAM_REFERENCE_COUNT                  0x1160
+#define CL_PROGRAM_CONTEXT                          0x1161
+#define CL_PROGRAM_NUM_DEVICES                      0x1162
+#define CL_PROGRAM_DEVICES                          0x1163
+#define CL_PROGRAM_SOURCE                           0x1164
+#define CL_PROGRAM_BINARY_SIZES                     0x1165
+#define CL_PROGRAM_BINARIES                         0x1166
+#define CL_PROGRAM_NUM_KERNELS                      0x1167
+#define CL_PROGRAM_KERNEL_NAMES                     0x1168
+
+/* cl_program_build_info */
+#define CL_PROGRAM_BUILD_STATUS                     0x1181
+#define CL_PROGRAM_BUILD_OPTIONS                    0x1182
+#define CL_PROGRAM_BUILD_LOG                        0x1183
+#define CL_PROGRAM_BINARY_TYPE                      0x1184
+
+/* cl_build_status */
+#define CL_BUILD_SUCCESS                            0
+#define CL_BUILD_NONE                               -1
+#define CL_BUILD_ERROR                              -2
+#define CL_BUILD_IN_PROGRESS                        -3
+
+/* cl_kernel_info */
+#define CL_KERNEL_FUNCTION_NAME                     0x1190
+#define CL_KERNEL_NUM_ARGS                          0x1191
+#define CL_KERNEL_REFERENCE_COUNT                   0x1192
+#define CL_KERNEL_CONTEXT                           0x1193
+#define CL_KERNEL_PROGRAM                           0x1194
+#define CL_KERNEL_ATTRIBUTES                        0x1195
+
+/* cl_kernel_arg_info */
+#define CL_KERNEL_ARG_ADDRESS_QUALIFIER             0x1196
+#define CL_KERNEL_ARG_ACCESS_QUALIFIER              0x1197
+#define CL_KERNEL_ARG_TYPE_NAME                     0x1198
+#define CL_KERNEL_ARG_TYPE_QUALIFIER                0x1199
+#define CL_KERNEL_ARG_NAME                          0x119A
+
+/* cl_kernel_arg_address_qualifier */
+#define CL_KERNEL_ARG_ADDRESS_GLOBAL                0x119B
+#define CL_KERNEL_ARG_ADDRESS_LOCAL                 0x119C
+#define CL_KERNEL_ARG_ADDRESS_CONSTANT              0x119D
+#define CL_KERNEL_ARG_ADDRESS_PRIVATE               0x119E
+
+/* cl_kernel_arg_access_qualifier */
+#define CL_KERNEL_ARG_ACCESS_READ_ONLY              0x11A0
+#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY             0x11A1
+#define CL_KERNEL_ARG_ACCESS_READ_WRITE             0x11A2
+#define CL_KERNEL_ARG_ACCESS_NONE                   0x11A3
+
+/* cl_kernel_arg_type_qualifer */
+#define CL_KERNEL_ARG_TYPE_NONE                     0
+#define CL_KERNEL_ARG_TYPE_CONST                    (1 << 0)
+#define CL_KERNEL_ARG_TYPE_RESTRICT                 (1 << 1)
+#define CL_KERNEL_ARG_TYPE_VOLATILE                 (1 << 2)
+
+/* cl_kernel_work_group_info */
+#define CL_KERNEL_WORK_GROUP_SIZE                   0x11B0
+#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE           0x11B1
+#define CL_KERNEL_LOCAL_MEM_SIZE                    0x11B2
+#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
+#define CL_KERNEL_PRIVATE_MEM_SIZE                  0x11B4
+#define CL_KERNEL_GLOBAL_WORK_SIZE                  0x11B5
+
+/* cl_event_info  */
+#define CL_EVENT_COMMAND_QUEUE                      0x11D0
+#define CL_EVENT_COMMAND_TYPE                       0x11D1
+#define CL_EVENT_REFERENCE_COUNT                    0x11D2
+#define CL_EVENT_COMMAND_EXECUTION_STATUS           0x11D3
+#define CL_EVENT_CONTEXT                            0x11D4
+
+/* cl_command_type */
+#define CL_COMMAND_NDRANGE_KERNEL                   0x11F0
+#define CL_COMMAND_TASK                             0x11F1
+#define CL_COMMAND_NATIVE_KERNEL                    0x11F2
+#define CL_COMMAND_READ_BUFFER                      0x11F3
+#define CL_COMMAND_WRITE_BUFFER                     0x11F4
+#define CL_COMMAND_COPY_BUFFER                      0x11F5
+#define CL_COMMAND_READ_IMAGE                       0x11F6
+#define CL_COMMAND_WRITE_IMAGE                      0x11F7
+#define CL_COMMAND_COPY_IMAGE                       0x11F8
+#define CL_COMMAND_COPY_IMAGE_TO_BUFFER             0x11F9
+#define CL_COMMAND_COPY_BUFFER_TO_IMAGE             0x11FA
+#define CL_COMMAND_MAP_BUFFER                       0x11FB
+#define CL_COMMAND_MAP_IMAGE                        0x11FC
+#define CL_COMMAND_UNMAP_MEM_OBJECT                 0x11FD
+#define CL_COMMAND_MARKER                           0x11FE
+#define CL_COMMAND_ACQUIRE_GL_OBJECTS               0x11FF
+#define CL_COMMAND_RELEASE_GL_OBJECTS               0x1200
+#define CL_COMMAND_READ_BUFFER_RECT                 0x1201
+#define CL_COMMAND_WRITE_BUFFER_RECT                0x1202
+#define CL_COMMAND_COPY_BUFFER_RECT                 0x1203
+#define CL_COMMAND_USER                             0x1204
+#define CL_COMMAND_BARRIER                          0x1205
+#define CL_COMMAND_MIGRATE_MEM_OBJECTS              0x1206
+#define CL_COMMAND_FILL_BUFFER                      0x1207
+#define CL_COMMAND_FILL_IMAGE                       0x1208
+
+/* command execution status */
+#define CL_COMPLETE                                 0x0
+#define CL_RUNNING                                  0x1
+#define CL_SUBMITTED                                0x2
+#define CL_QUEUED                                   0x3
+  
+/* cl_buffer_create_type  */
+#define CL_BUFFER_CREATE_TYPE_REGION                0x1220
+
+/* cl_profiling_info  */
+#define CL_PROFILING_COMMAND_QUEUED                 0x1280
+#define CL_PROFILING_COMMAND_SUBMIT                 0x1281
+#define CL_PROFILING_COMMAND_START                  0x1282
+#define CL_PROFILING_COMMAND_END                    0x1283
+
+/********************************************************************************************************/
+
+/********************************************************************************************************/
+
+/*  Function signature typedef's */
+
+/* Platform API */
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETPLATFORMIDS)(cl_uint          /* num_entries */,
+                 cl_platform_id * /* platforms */,
+                 cl_uint *        /* num_platforms */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL * 
+PFNCLGETPLATFORMINFO)(cl_platform_id   /* platform */, 
+                  cl_platform_info /* param_name */,
+                  size_t           /* param_value_size */, 
+                  void *           /* param_value */,
+                  size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Device APIs */
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETDEVICEIDS)(cl_platform_id   /* platform */,
+               cl_device_type   /* device_type */, 
+               cl_uint          /* num_entries */, 
+               cl_device_id *   /* devices */, 
+               cl_uint *        /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETDEVICEINFO)(cl_device_id    /* device */,
+                cl_device_info  /* param_name */, 
+                size_t          /* param_value_size */, 
+                void *          /* param_value */,
+                size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLCREATESUBDEVICES)(cl_device_id                         /* in_device */,
+                  const cl_device_partition_property * /* properties */,
+                  cl_uint                              /* num_devices */,
+                  cl_device_id *                       /* out_devices */,
+                  cl_uint *                            /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLRETAINDEVICE)(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLRELEASEDEVICE)(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
+
+// Context APIs  
+typedef CL_API_ENTRY cl_context (CL_API_CALL *
+PFNCLCREATECONTEXT)(const cl_context_properties * /* properties */,
+                cl_uint                       /* num_devices */,
+                const cl_device_id *          /* devices */,
+                void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
+                void *                        /* user_data */,
+                cl_int *                      /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_context (CL_API_CALL *
+PFNCLCREATECONTEXTFROMTYPE)(const cl_context_properties * /* properties */,
+                        cl_device_type                /* device_type */,
+                        void (CL_CALLBACK *     /* pfn_notify*/ )(const char *, const void *, size_t, void *),
+                        void *                        /* user_data */,
+                        cl_int *                      /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLRETAINCONTEXT)(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLRELEASECONTEXT)(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETCONTEXTINFO)(cl_context         /* context */, 
+                 cl_context_info    /* param_name */, 
+                 size_t             /* param_value_size */, 
+                 void *             /* param_value */, 
+                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Command Queue APIs */
+typedef CL_API_ENTRY cl_command_queue (CL_API_CALL *
+PFNCLCREATECOMMANDQUEUE)(cl_context                     /* context */, 
+                     cl_device_id                   /* device */, 
+                     cl_command_queue_properties    /* properties */,
+                     cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLRETAINCOMMANDQUEUE)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLRELEASECOMMANDQUEUE)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETCOMMANDQUEUEINFO)(cl_command_queue      /* command_queue */,
+                      cl_command_queue_info /* param_name */,
+                      size_t                /* param_value_size */,
+                      void *                /* param_value */,
+                      size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLSETCOMMANDQUEUEPROPERTY)(cl_command_queue              /* command_queue */,
+                          cl_command_queue_properties   /* properties */, 
+                          cl_bool                        /* enable */,
+                          cl_command_queue_properties * /* old_properties */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Memory Object APIs */
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *
+PFNCLCREATEBUFFER)(cl_context   /* context */,
+               cl_mem_flags /* flags */,
+               size_t       /* size */,
+               void *       /* host_ptr */,
+               cl_int *     /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *
+PFNCLCREATESUBBUFFER)(cl_mem                           /* buffer */,
+               cl_mem_flags                    /* flags */,
+               cl_buffer_create_type    /* buffer_create_type */,
+               const void *                    /* buffer_create_info */,
+               cl_int *                        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *
+PFNCLCREATEIMAGE)(cl_context              /* context */,
+             cl_mem_flags            /* flags */,
+             const cl_image_format * /* image_format */,
+             const cl_image_desc *   /* image_desc */,
+             void *                  /* host_ptr */,
+             cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLRETAINMEMOBJECT)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLRELEASEMEMOBJECT)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETSUPPORTEDIMAGEFORMATS)(cl_context           /* context */,
+                           cl_mem_flags         /* flags */,
+                           cl_mem_object_type   /* image_type */,
+                           cl_uint              /* num_entries */,
+                           cl_image_format *    /* image_formats */,
+                           cl_uint *            /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETMEMOBJECTINFO)(cl_mem           /* memobj */,
+                   cl_mem_info      /* param_name */, 
+                   size_t           /* param_value_size */,
+                   void *           /* param_value */,
+                   size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETIMAGEINFO)(cl_mem           /* image */,
+               cl_image_info    /* param_name */, 
+               size_t           /* param_value_size */,
+               void *           /* param_value */,
+               size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLSETMEMOBJECTDESTRUCTORCALLBACK)(  cl_mem /* memobj */, 
+                                    void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), 
+                                    void * /*user_data */ )             CL_API_SUFFIX__VERSION_1_1;  
+
+/* Sampler APIs  */
+typedef CL_API_ENTRY cl_sampler (CL_API_CALL *
+PFNCLCREATESAMPLER)(cl_context          /* context */,
+                cl_bool             /* normalized_coords */, 
+                cl_addressing_mode  /* addressing_mode */, 
+                cl_filter_mode      /* filter_mode */,
+                cl_int *            /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLRETAINSAMPLER)(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLRELEASESAMPLER)(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETSAMPLERINFO)(cl_sampler         /* sampler */,
+                 cl_sampler_info    /* param_name */,
+                 size_t             /* param_value_size */,
+                 void *             /* param_value */,
+                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+/* Program Object APIs  */
+typedef CL_API_ENTRY cl_program (CL_API_CALL *
+PFNCLCREATEPROGRAMWITHSOURCE)(cl_context        /* context */,
+                          cl_uint           /* count */,
+                          const char **     /* strings */,
+                          const size_t *    /* lengths */,
+                          cl_int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_program (CL_API_CALL *
+PFNCLCREATEPROGRAMWITHBINARY)(cl_context                     /* context */,
+                          cl_uint                        /* num_devices */,
+                          const cl_device_id *           /* device_list */,
+                          const size_t *                 /* lengths */,
+                          const unsigned char **         /* binaries */,
+                          cl_int *                       /* binary_status */,
+                          cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_program (CL_API_CALL *
+PFNCLCREATEPROGRAMWITHBUILTINKERNELS)(cl_context            /* context */,
+                                 cl_uint               /* num_devices */,
+                                 const cl_device_id *  /* device_list */,
+                                 const char *          /* kernel_names */,
+                                 cl_int *              /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLRETAINPROGRAM)(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLRELEASEPROGRAM)(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLBUILDPROGRAM)(cl_program           /* program */,
+               cl_uint              /* num_devices */,
+               const cl_device_id * /* device_list */,
+               const char *         /* options */, 
+               void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+               void *               /* user_data */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETPROGRAMINFO)(cl_program         /* program */,
+                 cl_program_info    /* param_name */,
+                 size_t             /* param_value_size */,
+                 void *             /* param_value */,
+                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETPROGRAMBUILDINFO)(cl_program            /* program */,
+                      cl_device_id          /* device */,
+                      cl_program_build_info /* param_name */,
+                      size_t                /* param_value_size */,
+                      void *                /* param_value */,
+                      size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Kernel Object APIs */
+typedef CL_API_ENTRY cl_kernel (CL_API_CALL *
+PFNCLCREATEKERNEL)(cl_program      /* program */,
+               const char *    /* kernel_name */,
+               cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLCREATEKERNELSINPROGRAM)(cl_program     /* program */,
+                         cl_uint        /* num_kernels */,
+                         cl_kernel *    /* kernels */,
+                         cl_uint *      /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLRETAINKERNEL)(cl_kernel    /* kernel */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLRELEASEKERNEL)(cl_kernel   /* kernel */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLSETKERNELARG)(cl_kernel    /* kernel */,
+               cl_uint      /* arg_index */,
+               size_t       /* arg_size */,
+               const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETKERNELINFO)(cl_kernel       /* kernel */,
+                cl_kernel_info  /* param_name */,
+                size_t          /* param_value_size */,
+                void *          /* param_value */,
+                size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETKERNELWORKGROUPINFO)(cl_kernel                  /* kernel */,
+                         cl_device_id               /* device */,
+                         cl_kernel_work_group_info  /* param_name */,
+                         size_t                     /* param_value_size */,
+                         void *                     /* param_value */,
+                         size_t *                   /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+// Event Object APIs
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLWAITFOREVENTS)(cl_uint             /* num_events */,
+                const cl_event *    /* event_list */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETEVENTINFO)(cl_event         /* event */,
+               cl_event_info    /* param_name */,
+               size_t           /* param_value_size */,
+               void *           /* param_value */,
+               size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_event (CL_API_CALL *
+PFNCLCREATEUSEREVENT)(cl_context    /* context */,
+                         cl_int *      /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;               
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLRETAINEVENT)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLRELEASEEVENT)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLSETUSEREVENTSTATUS)(cl_event   /* event */,
+                     cl_int     /* execution_status */) CL_API_SUFFIX__VERSION_1_1;
+                     
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLSETEVENTCALLBACK)( cl_event    /* event */,
+                    cl_int      /* command_exec_callback_type */,
+                    void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
+                    void *      /* user_data */) CL_API_SUFFIX__VERSION_1_1;
+
+/* Profiling APIs  */
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETEVENTPROFILINGINFO)(cl_event            /* event */,
+                        cl_profiling_info   /* param_name */,
+                        size_t              /* param_value_size */,
+                        void *              /* param_value */,
+                        size_t *            /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+// Flush and Finish APIs
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLFLUSH)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLFINISH)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Enqueued Commands APIs */
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUEREADBUFFER)(cl_command_queue    /* command_queue */,
+                    cl_mem              /* buffer */,
+                    cl_bool             /* blocking_read */,
+                    size_t              /* offset */,
+                    size_t              /* cb */, 
+                    void *              /* ptr */,
+                    cl_uint             /* num_events_in_wait_list */,
+                    const cl_event *    /* event_wait_list */,
+                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
+                            
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUEREADBUFFERRECT)(cl_command_queue    /* command_queue */,
+                        cl_mem              /* buffer */,
+                        cl_bool             /* blocking_read */,
+                        const size_t *      /* buffer_origin */,
+                        const size_t *      /* host_origin */, 
+                        const size_t *      /* region */,
+                        size_t              /* buffer_row_pitch */,
+                        size_t              /* buffer_slice_pitch */,
+                        size_t              /* host_row_pitch */,
+                        size_t              /* host_slice_pitch */,                        
+                        void *              /* ptr */,
+                        cl_uint             /* num_events_in_wait_list */,
+                        const cl_event *    /* event_wait_list */,
+                        cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
+                            
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUEWRITEBUFFER)(cl_command_queue   /* command_queue */, 
+                     cl_mem             /* buffer */, 
+                     cl_bool            /* blocking_write */, 
+                     size_t             /* offset */, 
+                     size_t             /* cb */, 
+                     const void *       /* ptr */, 
+                     cl_uint            /* num_events_in_wait_list */, 
+                     const cl_event *   /* event_wait_list */, 
+                     cl_event *         /* event */) CL_API_SUFFIX__VERSION_1_0;
+                            
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUEWRITEBUFFERRECT)(cl_command_queue    /* command_queue */,
+                         cl_mem              /* buffer */,
+                         cl_bool             /* blocking_write */,
+                         const size_t *      /* buffer_origin */,
+                         const size_t *      /* host_origin */, 
+                         const size_t *      /* region */,
+                         size_t              /* buffer_row_pitch */,
+                         size_t              /* buffer_slice_pitch */,
+                         size_t              /* host_row_pitch */,
+                         size_t              /* host_slice_pitch */,                        
+                         const void *        /* ptr */,
+                         cl_uint             /* num_events_in_wait_list */,
+                         const cl_event *    /* event_wait_list */,
+                         cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
+                            
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUECOPYBUFFER)(cl_command_queue    /* command_queue */, 
+                    cl_mem              /* src_buffer */,
+                    cl_mem              /* dst_buffer */, 
+                    size_t              /* src_offset */,
+                    size_t              /* dst_offset */,
+                    size_t              /* cb */, 
+                    cl_uint             /* num_events_in_wait_list */,
+                    const cl_event *    /* event_wait_list */,
+                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
+                            
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUECOPYBUFFERRECT)(cl_command_queue    /* command_queue */, 
+                        cl_mem              /* src_buffer */,
+                        cl_mem              /* dst_buffer */, 
+                        const size_t *      /* src_origin */,
+                        const size_t *      /* dst_origin */,
+                        const size_t *      /* region */, 
+                        size_t              /* src_row_pitch */,
+                        size_t              /* src_slice_pitch */,
+                        size_t              /* dst_row_pitch */,
+                        size_t              /* dst_slice_pitch */,
+                        cl_uint             /* num_events_in_wait_list */,
+                        const cl_event *    /* event_wait_list */,
+                        cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
+                            
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUEREADIMAGE)(cl_command_queue     /* command_queue */,
+                   cl_mem               /* image */,
+                   cl_bool              /* blocking_read */, 
+                   const size_t *       /* origin[3] */,
+                   const size_t *       /* region[3] */,
+                   size_t               /* row_pitch */,
+                   size_t               /* slice_pitch */, 
+                   void *               /* ptr */,
+                   cl_uint              /* num_events_in_wait_list */,
+                   const cl_event *     /* event_wait_list */,
+                   cl_event *           /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUEWRITEIMAGE)(cl_command_queue    /* command_queue */,
+                    cl_mem              /* image */,
+                    cl_bool             /* blocking_write */, 
+                    const size_t *      /* origin[3] */,
+                    const size_t *      /* region[3] */,
+                    size_t              /* input_row_pitch */,
+                    size_t              /* input_slice_pitch */, 
+                    const void *        /* ptr */,
+                    cl_uint             /* num_events_in_wait_list */,
+                    const cl_event *    /* event_wait_list */,
+                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUECOPYIMAGE)(cl_command_queue     /* command_queue */,
+                   cl_mem               /* src_image */,
+                   cl_mem               /* dst_image */, 
+                   const size_t *       /* src_origin[3] */,
+                   const size_t *       /* dst_origin[3] */,
+                   const size_t *       /* region[3] */, 
+                   cl_uint              /* num_events_in_wait_list */,
+                   const cl_event *     /* event_wait_list */,
+                   cl_event *           /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUECOPYIMAGETOBUFFER)(cl_command_queue /* command_queue */,
+                           cl_mem           /* src_image */,
+                           cl_mem           /* dst_buffer */, 
+                           const size_t *   /* src_origin[3] */,
+                           const size_t *   /* region[3] */, 
+                           size_t           /* dst_offset */,
+                           cl_uint          /* num_events_in_wait_list */,
+                           const cl_event * /* event_wait_list */,
+                           cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUECOPYBUFFERTOIMAGE)(cl_command_queue /* command_queue */,
+                           cl_mem           /* src_buffer */,
+                           cl_mem           /* dst_image */, 
+                           size_t           /* src_offset */,
+                           const size_t *   /* dst_origin[3] */,
+                           const size_t *   /* region[3] */, 
+                           cl_uint          /* num_events_in_wait_list */,
+                           const cl_event * /* event_wait_list */,
+                           cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY void * (CL_API_CALL *
+PFNCLENQUEUEMAPBUFFER)(cl_command_queue /* command_queue */,
+                   cl_mem           /* buffer */,
+                   cl_bool          /* blocking_map */, 
+                   cl_map_flags     /* map_flags */,
+                   size_t           /* offset */,
+                   size_t           /* cb */,
+                   cl_uint          /* num_events_in_wait_list */,
+                   const cl_event * /* event_wait_list */,
+                   cl_event *       /* event */,
+                   cl_int *         /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY void * (CL_API_CALL *
+PFNCLENQUEUEMAPIMAGE)(cl_command_queue  /* command_queue */,
+                  cl_mem            /* image */, 
+                  cl_bool           /* blocking_map */, 
+                  cl_map_flags      /* map_flags */, 
+                  const size_t *    /* origin[3] */,
+                  const size_t *    /* region[3] */,
+                  size_t *          /* image_row_pitch */,
+                  size_t *          /* image_slice_pitch */,
+                  cl_uint           /* num_events_in_wait_list */,
+                  const cl_event *  /* event_wait_list */,
+                  cl_event *        /* event */,
+                  cl_int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUEUNMAPMEMOBJECT)(cl_command_queue /* command_queue */,
+                        cl_mem           /* memobj */,
+                        void *           /* mapped_ptr */,
+                        cl_uint          /* num_events_in_wait_list */,
+                        const cl_event *  /* event_wait_list */,
+                        cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUENDRANGEKERNEL)(cl_command_queue /* command_queue */,
+                       cl_kernel        /* kernel */,
+                       cl_uint          /* work_dim */,
+                       const size_t *   /* global_work_offset */,
+                       const size_t *   /* global_work_size */,
+                       const size_t *   /* local_work_size */,
+                       cl_uint          /* num_events_in_wait_list */,
+                       const cl_event * /* event_wait_list */,
+                       cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUETASK)(cl_command_queue  /* command_queue */,
+              cl_kernel         /* kernel */,
+              cl_uint           /* num_events_in_wait_list */,
+              const cl_event *  /* event_wait_list */,
+              cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUENATIVEKERNEL)(cl_command_queue  /* command_queue */,
+                      void (*user_func)(void *), 
+                      void *            /* args */,
+                      size_t            /* cb_args */, 
+                      cl_uint           /* num_mem_objects */,
+                      const cl_mem *    /* mem_list */,
+                      const void **     /* args_mem_loc */,
+                      cl_uint           /* num_events_in_wait_list */,
+                      const cl_event *  /* event_wait_list */,
+                      cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Extension function access
+*
+* Returns the extension function address for the given function name,
+* or NULL if a valid function can not be found.  The client must
+* check to make sure the address is not NULL, before using or
+* calling the returned function address.
+*/
+typedef CL_API_ENTRY void * (CL_API_CALL *
+PFNCLGETEXTENSIONFUNCTIONADDRESSFORPLATFORM)(cl_platform_id /* platform */,
+                                         const char *   /* func_name */) CL_API_SUFFIX__VERSION_1_2;
+#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+// Deprecated OpenCL 1.1 APIs
+typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem (CL_API_CALL *
+PFNCLCREATEIMAGE2D)(cl_context              /* context */,
+               cl_mem_flags            /* flags */,
+               const cl_image_format * /* image_format */,
+               size_t                  /* image_width */,
+               size_t                  /* image_height */,
+               size_t                  /* image_row_pitch */,
+               void *                  /* host_ptr */,
+               cl_int *                /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem (CL_API_CALL *
+PFNCLCREATEIMAGE3D)(cl_context              /* context */,
+               cl_mem_flags            /* flags */,
+               const cl_image_format * /* image_format */,
+               size_t                  /* image_width */,
+               size_t                  /* image_height */,
+               size_t                  /* image_depth */,
+               size_t                  /* image_row_pitch */,
+               size_t                  /* image_slice_pitch */,
+               void *                  /* host_ptr */,
+               cl_int *                /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int (CL_API_CALL *
+PFNCLENQUEUEMARKER)(cl_command_queue    /* command_queue */,
+               cl_event *          /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int (CL_API_CALL *
+PFNCLENQUEUEWAITFOREVENTS)(cl_command_queue /* command_queue */,
+                      cl_uint          /* num_events */,
+                      const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int (CL_API_CALL *
+PFNCLENQUEUEBARRIER)(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int (CL_API_CALL *
+PFNCLUNLOADCOMPILER)(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * (CL_API_CALL *
+PFNCLGETEXTENSIONFUNCTIONADDRESS)(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+#endif
+
+
+/* cl_gl */
+
+typedef cl_uint     cl_gl_object_type;
+typedef cl_uint     cl_gl_texture_info;
+typedef cl_uint     cl_gl_platform_info;
+typedef struct __GLsync *cl_GLsync;
+
+/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken           */
+#define CL_GL_OBJECT_BUFFER                     0x2000
+#define CL_GL_OBJECT_TEXTURE2D                  0x2001
+#define CL_GL_OBJECT_TEXTURE3D                  0x2002
+#define CL_GL_OBJECT_RENDERBUFFER               0x2003
+#define CL_GL_OBJECT_TEXTURE2D_ARRAY            0x200E
+#define CL_GL_OBJECT_TEXTURE1D                  0x200F
+#define CL_GL_OBJECT_TEXTURE1D_ARRAY            0x2010
+#define CL_GL_OBJECT_TEXTURE_BUFFER             0x2011
+
+/* cl_gl_texture_info           */
+#define CL_GL_TEXTURE_TARGET                    0x2004
+#define CL_GL_MIPMAP_LEVEL                      0x2005
+#define CL_GL_NUM_SAMPLES                       0x2012
+
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *
+PFNCLCREATEFROMGLBUFFER)(cl_context     /* context */,
+                   cl_mem_flags   /* flags */,
+                   cl_GLuint      /* bufobj */,
+                   int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *
+PFNCLCREATEFROMGLTEXTURE)(cl_context      /* context */,
+                    cl_mem_flags    /* flags */,
+                    cl_GLenum       /* target */,
+                    cl_GLint        /* miplevel */,
+                    cl_GLuint       /* texture */,
+                    cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *
+PFNCLCREATEFROMGLRENDERBUFFER)(cl_context   /* context */,
+                         cl_mem_flags /* flags */,
+                         cl_GLuint    /* renderbuffer */,
+                         cl_int *     /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETGLOBJECTINFO)(cl_mem                /* memobj */,
+                cl_gl_object_type *   /* gl_object_type */,
+                cl_GLuint *           /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETGLTEXTUREINFO)(cl_mem               /* memobj */,
+                 cl_gl_texture_info   /* param_name */,
+                 size_t               /* param_value_size */,
+                 void *               /* param_value */,
+                 size_t *             /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUEACQUIREGLOBJECTS)(cl_command_queue      /* command_queue */,
+                        cl_uint               /* num_objects */,
+                        const cl_mem *        /* mem_objects */,
+                        cl_uint               /* num_events_in_wait_list */,
+                        const cl_event *      /* event_wait_list */,
+                        cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLENQUEUERELEASEGLOBJECTS)(cl_command_queue      /* command_queue */,
+                        cl_uint               /* num_objects */,
+                        const cl_mem *        /* mem_objects */,
+                        cl_uint               /* num_events_in_wait_list */,
+                        const cl_event *      /* event_wait_list */,
+                        cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+
+// Deprecated OpenCL 1.1 APIs
+#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem (CL_API_CALL *
+PFNCLCREATEFROMGLTEXTURE2D)(cl_context      /* context */,
+                      cl_mem_flags    /* flags */,
+                      cl_GLenum       /* target */,
+                      cl_GLint        /* miplevel */,
+                      cl_GLuint       /* texture */,
+                      cl_int *        /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem (CL_API_CALL *
+PFNCLCREATEFROMGLTEXTURE3D)(cl_context      /* context */,
+                      cl_mem_flags    /* flags */,
+                      cl_GLenum       /* target */,
+                      cl_GLint        /* miplevel */,
+                      cl_GLuint       /* texture */,
+                      cl_int *        /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+#endif
+
+/* cl_khr_gl_sharing extension  */
+
+#define cl_khr_gl_sharing 1
+
+typedef cl_uint     cl_gl_context_info;
+
+/* Additional Error Codes  */
+#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR  -1000
+
+/* cl_gl_context_info  */
+#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR    0x2006
+#define CL_DEVICES_FOR_GL_CONTEXT_KHR           0x2007
+
+/* Additional cl_context_properties  */
+#define CL_GL_CONTEXT_KHR                       0x2008
+#define CL_EGL_DISPLAY_KHR                      0x2009
+#define CL_GLX_DISPLAY_KHR                      0x200A
+#define CL_WGL_HDC_KHR                          0x200B
+#define CL_CGL_SHAREGROUP_KHR                   0x200C
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *
+PFNCLGETGLCONTEXTINFOKHR)(const cl_context_properties * /* properties */,
+                    cl_gl_context_info            /* param_name */,
+                    size_t                        /* param_value_size */,
+                    void *                        /* param_value */,
+                    size_t *                      /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
+  const cl_context_properties * properties,
+  cl_gl_context_info            param_name,
+  size_t                        param_value_size,
+  void *                        param_value,
+  size_t *                      param_value_size_ret);
+
+#define CLEW_STATIC
+
+#ifdef CLEW_STATIC
+#  define CLEWAPI extern
+#else
+#  ifdef CLEW_BUILD
+#    define CLEWAPI extern __declspec(dllexport)
+#  else
+#    define CLEWAPI extern __declspec(dllimport)
+#  endif
+#endif
+
+#if defined(_WIN32)
+#define CLEW_FUN_EXPORT extern
+#else
+#define CLEW_FUN_EXPORT CLEWAPI
+#endif
+
+#define CLEW_GET_FUN(x) x
+
+
+//  Variables holding function entry points
+CLEW_FUN_EXPORT     PFNCLGETPLATFORMIDS                 __clewGetPlatformIDs                ;
+CLEW_FUN_EXPORT     PFNCLGETPLATFORMINFO                __clewGetPlatformInfo               ;
+CLEW_FUN_EXPORT     PFNCLGETDEVICEIDS                   __clewGetDeviceIDs                  ;
+CLEW_FUN_EXPORT     PFNCLGETDEVICEINFO                  __clewGetDeviceInfo                 ;
+CLEW_FUN_EXPORT     PFNCLCREATESUBDEVICES               __clewCreateSubDevices              ;
+CLEW_FUN_EXPORT     PFNCLRETAINDEVICE                   __clewRetainDevice                  ;
+CLEW_FUN_EXPORT     PFNCLRELEASEDEVICE                  __clewReleaseDevice                 ;
+CLEW_FUN_EXPORT     PFNCLCREATECONTEXT                  __clewCreateContext                 ;
+CLEW_FUN_EXPORT     PFNCLCREATECONTEXTFROMTYPE          __clewCreateContextFromType         ;
+CLEW_FUN_EXPORT     PFNCLRETAINCONTEXT                  __clewRetainContext                 ;
+CLEW_FUN_EXPORT     PFNCLRELEASECONTEXT                 __clewReleaseContext                ;
+CLEW_FUN_EXPORT     PFNCLGETCONTEXTINFO                 __clewGetContextInfo                ;
+CLEW_FUN_EXPORT     PFNCLCREATECOMMANDQUEUE             __clewCreateCommandQueue            ;
+CLEW_FUN_EXPORT     PFNCLRETAINCOMMANDQUEUE             __clewRetainCommandQueue            ;
+CLEW_FUN_EXPORT     PFNCLRELEASECOMMANDQUEUE            __clewReleaseCommandQueue           ;
+CLEW_FUN_EXPORT     PFNCLGETCOMMANDQUEUEINFO            __clewGetCommandQueueInfo           ;
+CLEW_FUN_EXPORT     PFNCLCREATEBUFFER                   __clewCreateBuffer                  ;
+CLEW_FUN_EXPORT     PFNCLCREATESUBBUFFER                __clewCreateSubBuffer               ;
+CLEW_FUN_EXPORT     PFNCLCREATEIMAGE                    __clewCreateImage                   ;
+CLEW_FUN_EXPORT     PFNCLRETAINMEMOBJECT                __clewRetainMemObject               ;
+CLEW_FUN_EXPORT     PFNCLRELEASEMEMOBJECT               __clewReleaseMemObject              ;
+CLEW_FUN_EXPORT     PFNCLGETSUPPORTEDIMAGEFORMATS       __clewGetSupportedImageFormats      ;
+CLEW_FUN_EXPORT     PFNCLGETMEMOBJECTINFO               __clewGetMemObjectInfo              ;
+CLEW_FUN_EXPORT     PFNCLGETIMAGEINFO                   __clewGetImageInfo                  ;
+CLEW_FUN_EXPORT     PFNCLSETMEMOBJECTDESTRUCTORCALLBACK __clewSetMemObjectDestructorCallback;
+CLEW_FUN_EXPORT     PFNCLCREATESAMPLER                  __clewCreateSampler                 ;
+CLEW_FUN_EXPORT     PFNCLRETAINSAMPLER                  __clewRetainSampler                 ;
+CLEW_FUN_EXPORT     PFNCLRELEASESAMPLER                 __clewReleaseSampler                ;
+CLEW_FUN_EXPORT     PFNCLGETSAMPLERINFO                 __clewGetSamplerInfo                ;
+CLEW_FUN_EXPORT     PFNCLCREATEPROGRAMWITHSOURCE        __clewCreateProgramWithSource       ;
+CLEW_FUN_EXPORT     PFNCLCREATEPROGRAMWITHBINARY        __clewCreateProgramWithBinary       ;
+CLEW_FUN_EXPORT     PFNCLCREATEPROGRAMWITHBUILTINKERNELS __clewCreateProgramWithBuiltInKernels;
+CLEW_FUN_EXPORT     PFNCLRETAINPROGRAM                  __clewRetainProgram                 ;
+CLEW_FUN_EXPORT     PFNCLRELEASEPROGRAM                 __clewReleaseProgram                ;
+CLEW_FUN_EXPORT     PFNCLBUILDPROGRAM                   __clewBuildProgram                  ;
+CLEW_FUN_EXPORT     PFNCLGETPROGRAMINFO                 __clewGetProgramInfo                ;
+CLEW_FUN_EXPORT     PFNCLGETPROGRAMBUILDINFO            __clewGetProgramBuildInfo           ;
+CLEW_FUN_EXPORT     PFNCLCREATEKERNEL                   __clewCreateKernel                  ;
+CLEW_FUN_EXPORT     PFNCLCREATEKERNELSINPROGRAM         __clewCreateKernelsInProgram        ;
+CLEW_FUN_EXPORT     PFNCLRETAINKERNEL                   __clewRetainKernel                  ;
+CLEW_FUN_EXPORT     PFNCLRELEASEKERNEL                  __clewReleaseKernel                 ;
+CLEW_FUN_EXPORT     PFNCLSETKERNELARG                   __clewSetKernelArg                  ;
+CLEW_FUN_EXPORT     PFNCLGETKERNELINFO                  __clewGetKernelInfo                 ;
+CLEW_FUN_EXPORT     PFNCLGETKERNELWORKGROUPINFO         __clewGetKernelWorkGroupInfo        ;
+CLEW_FUN_EXPORT     PFNCLWAITFOREVENTS                  __clewWaitForEvents                 ;
+CLEW_FUN_EXPORT     PFNCLGETEVENTINFO                   __clewGetEventInfo                  ;
+CLEW_FUN_EXPORT     PFNCLCREATEUSEREVENT                __clewCreateUserEvent               ;
+CLEW_FUN_EXPORT     PFNCLRETAINEVENT                    __clewRetainEvent                   ;
+CLEW_FUN_EXPORT     PFNCLRELEASEEVENT                   __clewReleaseEvent                  ;
+CLEW_FUN_EXPORT     PFNCLSETUSEREVENTSTATUS             __clewSetUserEventStatus            ;
+CLEW_FUN_EXPORT     PFNCLSETEVENTCALLBACK               __clewSetEventCallback              ;
+CLEW_FUN_EXPORT     PFNCLGETEVENTPROFILINGINFO          __clewGetEventProfilingInfo         ;
+CLEW_FUN_EXPORT     PFNCLFLUSH                          __clewFlush                         ;
+CLEW_FUN_EXPORT     PFNCLFINISH                         __clewFinish                        ;
+CLEW_FUN_EXPORT     PFNCLENQUEUEREADBUFFER              __clewEnqueueReadBuffer             ;
+CLEW_FUN_EXPORT     PFNCLENQUEUEREADBUFFERRECT          __clewEnqueueReadBufferRect         ;
+CLEW_FUN_EXPORT     PFNCLENQUEUEWRITEBUFFER             __clewEnqueueWriteBuffer            ;
+CLEW_FUN_EXPORT     PFNCLENQUEUEWRITEBUFFERRECT         __clewEnqueueWriteBufferRect        ;
+CLEW_FUN_EXPORT     PFNCLENQUEUECOPYBUFFER              __clewEnqueueCopyBuffer             ;
+CLEW_FUN_EXPORT     PFNCLENQUEUECOPYBUFFERRECT          __clewEnqueueCopyBufferRect         ;
+CLEW_FUN_EXPORT     PFNCLENQUEUEREADIMAGE               __clewEnqueueReadImage              ;
+CLEW_FUN_EXPORT     PFNCLENQUEUEWRITEIMAGE              __clewEnqueueWriteImage             ;
+CLEW_FUN_EXPORT     PFNCLENQUEUECOPYIMAGE               __clewEnqueueCopyImage              ;
+CLEW_FUN_EXPORT     PFNCLENQUEUECOPYIMAGETOBUFFER       __clewEnqueueCopyImageToBuffer      ;
+CLEW_FUN_EXPORT     PFNCLENQUEUECOPYBUFFERTOIMAGE       __clewEnqueueCopyBufferToImage      ;
+CLEW_FUN_EXPORT     PFNCLENQUEUEMAPBUFFER               __clewEnqueueMapBuffer              ;
+CLEW_FUN_EXPORT     PFNCLENQUEUEMAPIMAGE                __clewEnqueueMapImage               ;
+CLEW_FUN_EXPORT     PFNCLENQUEUEUNMAPMEMOBJECT          __clewEnqueueUnmapMemObject         ;
+CLEW_FUN_EXPORT     PFNCLENQUEUENDRANGEKERNEL           __clewEnqueueNDRangeKernel          ;
+CLEW_FUN_EXPORT     PFNCLENQUEUETASK                    __clewEnqueueTask                   ;
+CLEW_FUN_EXPORT     PFNCLENQUEUENATIVEKERNEL            __clewEnqueueNativeKernel           ;
+CLEW_FUN_EXPORT     PFNCLGETEXTENSIONFUNCTIONADDRESSFORPLATFORM __clewGetExtensionFunctionAddressForPlatform;
+
+#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
+CLEW_FUN_EXPORT     PFNCLSETCOMMANDQUEUEPROPERTY        __clewSetCommandQueueProperty       ;
+#endif
+
+#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+CLEW_FUN_EXPORT     PFNCLCREATEIMAGE2D                  __clewCreateImage2D                 ;
+CLEW_FUN_EXPORT     PFNCLCREATEIMAGE3D                  __clewCreateImage3D                 ;
+CLEW_FUN_EXPORT     PFNCLGETEXTENSIONFUNCTIONADDRESS    __clewGetExtensionFunctionAddress   ;
+CLEW_FUN_EXPORT     PFNCLUNLOADCOMPILER                 __clewUnloadCompiler                ;
+CLEW_FUN_EXPORT     PFNCLENQUEUEMARKER                  __clewEnqueueMarker                 ;
+CLEW_FUN_EXPORT     PFNCLENQUEUEWAITFOREVENTS           __clewEnqueueWaitForEvents          ;
+CLEW_FUN_EXPORT     PFNCLENQUEUEBARRIER                 __clewEnqueueBarrier                ;
+#endif
+
+/* cl_gl */
+CLEW_FUN_EXPORT     PFNCLCREATEFROMGLBUFFER             __clewCreateFromGLBuffer            ;
+CLEW_FUN_EXPORT     PFNCLCREATEFROMGLTEXTURE            __clewCreateFromGLTexture           ;
+CLEW_FUN_EXPORT     PFNCLCREATEFROMGLRENDERBUFFER       __clewCreateFromGLRenderbuffer      ;
+CLEW_FUN_EXPORT     PFNCLGETGLOBJECTINFO                __clewGetGLObjectInfo               ;
+CLEW_FUN_EXPORT     PFNCLGETGLTEXTUREINFO               __clewGetGLTextureInfo              ;
+CLEW_FUN_EXPORT     PFNCLENQUEUEACQUIREGLOBJECTS        __clewEnqueueAcquireGLObjects       ;
+CLEW_FUN_EXPORT     PFNCLENQUEUERELEASEGLOBJECTS        __clewEnqueueReleaseGLObjects       ;
+#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+CLEW_FUN_EXPORT     PFNCLCREATEFROMGLTEXTURE2D          __clewCreateFromGLTexture2D         ;
+CLEW_FUN_EXPORT     PFNCLCREATEFROMGLTEXTURE3D          __clewCreateFromGLTexture3D         ;
+#endif
+CLEW_FUN_EXPORT     PFNCLGETGLCONTEXTINFOKHR            __clewGetGLContextInfoKHR           ;
+
+#define        clGetPlatformIDs                CLEW_GET_FUN(__clewGetPlatformIDs                )
+#define        clGetPlatformInfo               CLEW_GET_FUN(__clewGetPlatformInfo               )
+#define        clGetDeviceIDs                  CLEW_GET_FUN(__clewGetDeviceIDs                  )
+#define        clGetDeviceInfo                 CLEW_GET_FUN(__clewGetDeviceInfo                 )
+#define        clCreateContext                 CLEW_GET_FUN(__clewCreateContext                 )
+#define        clCreateContextFromType         CLEW_GET_FUN(__clewCreateContextFromType         )
+#define        clRetainContext                 CLEW_GET_FUN(__clewRetainContext                 )
+#define        clReleaseContext                CLEW_GET_FUN(__clewReleaseContext                )
+#define        clGetContextInfo                CLEW_GET_FUN(__clewGetContextInfo                )
+#define        clCreateCommandQueue            CLEW_GET_FUN(__clewCreateCommandQueue            )
+#define        clRetainCommandQueue            CLEW_GET_FUN(__clewRetainCommandQueue            )
+#define        clReleaseCommandQueue           CLEW_GET_FUN(__clewReleaseCommandQueue           )
+#define        clGetCommandQueueInfo           CLEW_GET_FUN(__clewGetCommandQueueInfo           )
+#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
+#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1!
+/* 
+ *  WARNING:
+ *     This API introduces mutable state into the OpenCL implementation. It has been REMOVED
+ *  to better facilitate thread safety.  The 1.0 API is not thread safe. It is not tested by the
+ *  OpenCL 1.1 conformance test, and consequently may not work or may not work dependably.
+ *  It is likely to be non-performant. Use of this API is not advised. Use at your own risk.
+ *
+ *  Software developers previously relying on this API are instructed to set the command queue 
+ *  properties when creating the queue, instead. 
+ */
+#define        clSetCommandQueueProperty       CLEW_GET_FUN(__clewSetCommandQueueProperty       )
+#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */
+#define        clCreateBuffer                  CLEW_GET_FUN(__clewCreateBuffer                  )
+#define        clCreateSubBuffer               CLEW_GET_FUN(__clewCreateSubBuffer               )
+#define        clCreateImage                   CLEW_GET_FUN(__clewCreateImage                   )
+#define        clRetainMemObject               CLEW_GET_FUN(__clewRetainMemObject               )
+#define        clReleaseMemObject              CLEW_GET_FUN(__clewReleaseMemObject              )
+#define        clGetSupportedImageFormats      CLEW_GET_FUN(__clewGetSupportedImageFormats      )
+#define        clGetMemObjectInfo              CLEW_GET_FUN(__clewGetMemObjectInfo              )
+#define        clGetImageInfo                  CLEW_GET_FUN(__clewGetImageInfo                  )
+#define        clSetMemObjectDestructorCallback CLEW_GET_FUN(__clewSetMemObjectDestructorCallback)
+#define        clCreateSampler                 CLEW_GET_FUN(__clewCreateSampler                 )
+#define        clRetainSampler                 CLEW_GET_FUN(__clewRetainSampler                 )
+#define        clReleaseSampler                CLEW_GET_FUN(__clewReleaseSampler                )
+#define        clGetSamplerInfo                CLEW_GET_FUN(__clewGetSamplerInfo                )
+#define        clCreateProgramWithSource       CLEW_GET_FUN(__clewCreateProgramWithSource       )
+#define        clCreateProgramWithBinary       CLEW_GET_FUN(__clewCreateProgramWithBinary       )
+#define clCreateProgramWithBuiltInKernels CLEW_GET_FUN(__clewCreateProgramWithBuiltInKernels)
+#define        clRetainProgram                 CLEW_GET_FUN(__clewRetainProgram                 )
+#define        clReleaseProgram                CLEW_GET_FUN(__clewReleaseProgram                )
+#define        clBuildProgram                  CLEW_GET_FUN(__clewBuildProgram                  )
+#define        clGetProgramInfo                CLEW_GET_FUN(__clewGetProgramInfo                )
+#define        clGetProgramBuildInfo           CLEW_GET_FUN(__clewGetProgramBuildInfo           )
+#define        clCreateKernel                  CLEW_GET_FUN(__clewCreateKernel                  )
+#define        clCreateKernelsInProgram        CLEW_GET_FUN(__clewCreateKernelsInProgram        )
+#define        clRetainKernel                  CLEW_GET_FUN(__clewRetainKernel                  )
+#define        clReleaseKernel                 CLEW_GET_FUN(__clewReleaseKernel                 )
+#define        clSetKernelArg                  CLEW_GET_FUN(__clewSetKernelArg                  )
+#define        clGetKernelInfo                 CLEW_GET_FUN(__clewGetKernelInfo                 )
+#define        clGetKernelWorkGroupInfo        CLEW_GET_FUN(__clewGetKernelWorkGroupInfo        )
+#define        clWaitForEvents                 CLEW_GET_FUN(__clewWaitForEvents                 )
+#define        clGetEventInfo                  CLEW_GET_FUN(__clewGetEventInfo                  )
+#define        clCreateUserEvent               CLEW_GET_FUN(__clewCreateUserEvent               )
+#define        clRetainEvent                   CLEW_GET_FUN(__clewRetainEvent                   )
+#define        clReleaseEvent                  CLEW_GET_FUN(__clewReleaseEvent                  )
+#define        clSetUserEventStatus            CLEW_GET_FUN(__clewSetUserEventStatus            )
+#define        clSetEventCallback              CLEW_GET_FUN(__clewSetEventCallback              )
+#define        clGetEventProfilingInfo         CLEW_GET_FUN(__clewGetEventProfilingInfo         )
+#define        clFlush                         CLEW_GET_FUN(__clewFlush                         )
+#define        clFinish                        CLEW_GET_FUN(__clewFinish                        )
+#define        clEnqueueReadBuffer             CLEW_GET_FUN(__clewEnqueueReadBuffer             )
+#define        clEnqueueReadBufferRect         CLEW_GET_FUN(__clewEnqueueReadBufferRect         )
+#define        clEnqueueWriteBuffer            CLEW_GET_FUN(__clewEnqueueWriteBuffer            )
+#define        clEnqueueWriteBufferRect        CLEW_GET_FUN(__clewEnqueueWriteBufferRect        )
+#define        clEnqueueCopyBuffer             CLEW_GET_FUN(__clewEnqueueCopyBuffer             )
+#define        clEnqueueCopyBufferRect         CLEW_GET_FUN(__clewEnqueueCopyBufferRect         )
+#define        clEnqueueReadImage              CLEW_GET_FUN(__clewEnqueueReadImage              )
+#define        clEnqueueWriteImage             CLEW_GET_FUN(__clewEnqueueWriteImage             )
+#define        clEnqueueCopyImage              CLEW_GET_FUN(__clewEnqueueCopyImage              )
+#define        clEnqueueCopyImageToBuffer      CLEW_GET_FUN(__clewEnqueueCopyImageToBuffer      )
+#define        clEnqueueCopyBufferToImage      CLEW_GET_FUN(__clewEnqueueCopyBufferToImage      )
+#define        clEnqueueMapBuffer              CLEW_GET_FUN(__clewEnqueueMapBuffer              )
+#define        clEnqueueMapImage               CLEW_GET_FUN(__clewEnqueueMapImage               )
+#define        clEnqueueUnmapMemObject         CLEW_GET_FUN(__clewEnqueueUnmapMemObject         )
+#define        clEnqueueNDRangeKernel          CLEW_GET_FUN(__clewEnqueueNDRangeKernel          )
+#define        clEnqueueTask                   CLEW_GET_FUN(__clewEnqueueTask                   )
+#define        clEnqueueNativeKernel           CLEW_GET_FUN(__clewEnqueueNativeKernel           )
+
+#define clGetExtensionFunctionAddressForPlatform CLEW_GET_FUN(__clewGetExtensionFunctionAddressForPlatform)
+
+#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+#define        clCreateImage2D                 CLEW_GET_FUN(__clewCreateImage2D                 )
+#define        clCreateImage3D                 CLEW_GET_FUN(__clewCreateImage3D                 )
+#define        clGetExtensionFunctionAddress   CLEW_GET_FUN(__clewGetExtensionFunctionAddress   )
+#define        clEnqueueMarker                 CLEW_GET_FUN(__clewEnqueueMarker                 )
+#define        clEnqueueWaitForEvents          CLEW_GET_FUN(__clewEnqueueWaitForEvents          )
+#define        clEnqueueBarrier                CLEW_GET_FUN(__clewEnqueueBarrier                )
+#define        clUnloadCompiler                CLEW_GET_FUN(__clewUnloadCompiler                )
+#endif
+
+/* cl_gl */
+#define        clCreateFromGLBuffer            CLEW_GET_FUN(__clewCreateFromGLBuffer            )
+#define        clCreateFromGLTexture           CLEW_GET_FUN(__clewCreateFromGLTexture           )
+#define        clCreateFromGLRenderbuffer      CLEW_GET_FUN(__clewCreateFromGLRenderbuffer      )
+#define        clGetGLObjectInfo               CLEW_GET_FUN(__clewGetGLObjectInfo               )
+#define        clGetGLTextureInfo              CLEW_GET_FUN(__clGetGLTextureInfo                )
+#define        clEnqueueAcquireGLObjects       CLEW_GET_FUN(__clewEnqueueAcquireGLObjects       )
+#define        clEnqueueReleaseGLObjects       CLEW_GET_FUN(__clewEnqueueReleaseGLObjects       )
+#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+#define        clCreateFromGLTexture2D         CLEW_GET_FUN(__clewCreateFromGLTexture2D         )
+#define        clCreateFromGLTexture3D         CLEW_GET_FUN(__clewCreateFromGLTexture3D         )
+#endif
+#define        clGetGLContextInfoKHR           CLEW_GET_FUN(__clewGetGLContextInfoKHR           )
+
+
+#define CLEW_SUCCESS                0       //!<    Success error code
+#define CLEW_ERROR_OPEN_FAILED      -1      //!<    Error code for failing to open the dynamic library
+#define CLEW_ERROR_ATEXIT_FAILED    -2      //!<    Error code for failing to queue the closing of the dynamic library to atexit()
+
+//! \brief Load OpenCL dynamic library and set function entry points
+int         clewInit        ();
+//! \brief Convert an OpenCL error code to its string equivalent
+const char* clewErrorString (cl_int error);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  //  CLEW_HPP_INCLUDED
diff --git a/extern/clew/src/clew.c b/extern/clew/src/clew.c
new file mode 100644 (file)
index 0000000..8c9316d
--- /dev/null
@@ -0,0 +1,382 @@
+//////////////////////////////////////////////////////////////////////////
+//  Copyright (c) 2009 Organic Vectory B.V.
+//  Written by George van Venrooij
+//
+//  Distributed under the Boost Software License, Version 1.0.
+//  (See accompanying file license.txt)
+//////////////////////////////////////////////////////////////////////////
+
+#include "clew.h"
+
+#ifdef _WIN32
+    #define WIN32_LEAN_AND_MEAN
+    #define VC_EXTRALEAN
+    #include <windows.h>
+
+    typedef HMODULE             CLEW_DYNLIB_HANDLE;
+
+    #define CLEW_DYNLIB_OPEN    LoadLibrary
+    #define CLEW_DYNLIB_CLOSE   FreeLibrary
+    #define CLEW_DYNLIB_IMPORT  GetProcAddress
+#else
+    #include <dlfcn.h>
+    
+    typedef void*                   CLEW_DYNLIB_HANDLE;
+
+    #define CLEW_DYNLIB_OPEN(path)  dlopen(path, RTLD_NOW | RTLD_GLOBAL)
+    #define CLEW_DYNLIB_CLOSE       dlclose
+    #define CLEW_DYNLIB_IMPORT      dlsym
+#endif
+
+#include <stdlib.h>
+
+//! \brief module handle
+static CLEW_DYNLIB_HANDLE module = NULL;
+
+//  Variables holding function entry points
+PFNCLGETPLATFORMIDS                 __clewGetPlatformIDs                = NULL;
+PFNCLGETPLATFORMINFO                __clewGetPlatformInfo               = NULL;
+PFNCLGETDEVICEIDS                   __clewGetDeviceIDs                  = NULL;
+PFNCLGETDEVICEINFO                  __clewGetDeviceInfo                 = NULL;
+PFNCLCREATESUBDEVICES               __clewCreateSubDevices              = NULL;
+PFNCLRETAINDEVICE                   __clewRetainDevice                  = NULL;
+PFNCLRELEASEDEVICE                  __clewReleaseDevice                 = NULL;
+PFNCLCREATECONTEXT                  __clewCreateContext                 = NULL;
+PFNCLCREATECONTEXTFROMTYPE          __clewCreateContextFromType         = NULL;
+PFNCLRETAINCONTEXT                  __clewRetainContext                 = NULL;
+PFNCLRELEASECONTEXT                 __clewReleaseContext                = NULL;
+PFNCLGETCONTEXTINFO                 __clewGetContextInfo                = NULL;
+PFNCLCREATECOMMANDQUEUE             __clewCreateCommandQueue            = NULL;
+PFNCLRETAINCOMMANDQUEUE             __clewRetainCommandQueue            = NULL;
+PFNCLRELEASECOMMANDQUEUE            __clewReleaseCommandQueue           = NULL;
+PFNCLGETCOMMANDQUEUEINFO            __clewGetCommandQueueInfo           = NULL;
+#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
+PFNCLSETCOMMANDQUEUEPROPERTY        __clewSetCommandQueueProperty       = NULL;
+#endif
+PFNCLCREATEBUFFER                   __clewCreateBuffer                  = NULL;
+PFNCLCREATESUBBUFFER                __clewCreateSubBuffer               = NULL;
+PFNCLCREATEIMAGE                    __clewCreateImage                   = NULL;
+PFNCLRETAINMEMOBJECT                __clewRetainMemObject               = NULL;
+PFNCLRELEASEMEMOBJECT               __clewReleaseMemObject              = NULL;
+PFNCLGETSUPPORTEDIMAGEFORMATS       __clewGetSupportedImageFormats      = NULL;
+PFNCLGETMEMOBJECTINFO               __clewGetMemObjectInfo              = NULL;
+PFNCLGETIMAGEINFO                   __clewGetImageInfo                  = NULL;
+PFNCLSETMEMOBJECTDESTRUCTORCALLBACK __clewSetMemObjectDestructorCallback = NULL;
+PFNCLCREATESAMPLER                  __clewCreateSampler                 = NULL;
+PFNCLRETAINSAMPLER                  __clewRetainSampler                 = NULL;
+PFNCLRELEASESAMPLER                 __clewReleaseSampler                = NULL;
+PFNCLGETSAMPLERINFO                 __clewGetSamplerInfo                = NULL;
+PFNCLCREATEPROGRAMWITHSOURCE        __clewCreateProgramWithSource       = NULL;
+PFNCLCREATEPROGRAMWITHBINARY        __clewCreateProgramWithBinary       = NULL;
+PFNCLCREATEPROGRAMWITHBUILTINKERNELS __clewCreateProgramWithBuiltInKernels = NULL;
+PFNCLRETAINPROGRAM                  __clewRetainProgram                 = NULL;
+PFNCLRELEASEPROGRAM                 __clewReleaseProgram                = NULL;
+PFNCLBUILDPROGRAM                   __clewBuildProgram                  = NULL;
+PFNCLGETPROGRAMINFO                 __clewGetProgramInfo                = NULL;
+PFNCLGETPROGRAMBUILDINFO            __clewGetProgramBuildInfo           = NULL;
+PFNCLCREATEKERNEL                   __clewCreateKernel                  = NULL;
+PFNCLCREATEKERNELSINPROGRAM         __clewCreateKernelsInProgram        = NULL;
+PFNCLRETAINKERNEL                   __clewRetainKernel                  = NULL;
+PFNCLRELEASEKERNEL                  __clewReleaseKernel                 = NULL;
+PFNCLSETKERNELARG                   __clewSetKernelArg                  = NULL;
+PFNCLGETKERNELINFO                  __clewGetKernelInfo                 = NULL;
+PFNCLGETKERNELWORKGROUPINFO         __clewGetKernelWorkGroupInfo        = NULL;
+PFNCLWAITFOREVENTS                  __clewWaitForEvents                 = NULL;
+PFNCLGETEVENTINFO                   __clewGetEventInfo                  = NULL;
+PFNCLCREATEUSEREVENT                __clewCreateUserEvent               = NULL;
+PFNCLRETAINEVENT                    __clewRetainEvent                   = NULL;
+PFNCLRELEASEEVENT                   __clewReleaseEvent                  = NULL;
+PFNCLSETUSEREVENTSTATUS             __clewSetUserEventStatus            = NULL;
+PFNCLSETEVENTCALLBACK               __clewSetEventCallback              = NULL;
+PFNCLGETEVENTPROFILINGINFO          __clewGetEventProfilingInfo         = NULL;
+PFNCLFLUSH                          __clewFlush                         = NULL;
+PFNCLFINISH                         __clewFinish                        = NULL;
+PFNCLENQUEUEREADBUFFER              __clewEnqueueReadBuffer             = NULL;
+PFNCLENQUEUEREADBUFFERRECT          __clewEnqueueReadBufferRect         = NULL;
+PFNCLENQUEUEWRITEBUFFER             __clewEnqueueWriteBuffer            = NULL;
+PFNCLENQUEUEWRITEBUFFERRECT         __clewEnqueueWriteBufferRect        = NULL;
+PFNCLENQUEUECOPYBUFFER              __clewEnqueueCopyBuffer             = NULL;
+PFNCLENQUEUEREADIMAGE               __clewEnqueueReadImage              = NULL;
+PFNCLENQUEUEWRITEIMAGE              __clewEnqueueWriteImage             = NULL;
+PFNCLENQUEUECOPYIMAGE               __clewEnqueueCopyImage              = NULL;
+PFNCLENQUEUECOPYBUFFERRECT          __clewEnqueueCopyBufferRect         = NULL;
+PFNCLENQUEUECOPYIMAGETOBUFFER       __clewEnqueueCopyImageToBuffer      = NULL;
+PFNCLENQUEUECOPYBUFFERTOIMAGE       __clewEnqueueCopyBufferToImage      = NULL;
+PFNCLENQUEUEMAPBUFFER               __clewEnqueueMapBuffer              = NULL;
+PFNCLENQUEUEMAPIMAGE                __clewEnqueueMapImage               = NULL;
+PFNCLENQUEUEUNMAPMEMOBJECT          __clewEnqueueUnmapMemObject         = NULL;
+PFNCLENQUEUENDRANGEKERNEL           __clewEnqueueNDRangeKernel          = NULL;
+PFNCLENQUEUETASK                    __clewEnqueueTask                   = NULL;
+PFNCLENQUEUENATIVEKERNEL            __clewEnqueueNativeKernel           = NULL;
+
+
+
+PFNCLGETEXTENSIONFUNCTIONADDRESSFORPLATFORM __clewGetExtensionFunctionAddressForPlatform = NULL;
+
+#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+PFNCLCREATEIMAGE2D                  __clewCreateImage2D                 = NULL;
+PFNCLCREATEIMAGE3D                  __clewCreateImage3D                 = NULL;
+PFNCLENQUEUEMARKER                  __clewEnqueueMarker                 = NULL;
+PFNCLENQUEUEWAITFOREVENTS           __clewEnqueueWaitForEvents          = NULL;
+PFNCLENQUEUEBARRIER                 __clewEnqueueBarrier                = NULL;
+PFNCLUNLOADCOMPILER                 __clewUnloadCompiler                = NULL;
+PFNCLGETEXTENSIONFUNCTIONADDRESS    __clewGetExtensionFunctionAddress   = NULL;
+#endif
+
+/* cl_gl */
+PFNCLCREATEFROMGLBUFFER             __clewCreateFromGLBuffer            = NULL;
+PFNCLCREATEFROMGLTEXTURE            __clewCreateFromGLTexture           = NULL;
+PFNCLCREATEFROMGLRENDERBUFFER       __clewCreateFromGLRenderbuffer      = NULL;
+PFNCLGETGLOBJECTINFO                __clewGetGLObjectInfo               = NULL;
+PFNCLGETGLTEXTUREINFO               __clewGetGLTextureInfo              = NULL;
+PFNCLENQUEUEACQUIREGLOBJECTS        __clewEnqueueAcquireGLObjects       = NULL;
+PFNCLENQUEUERELEASEGLOBJECTS        __clewEnqueueReleaseGLObjects       = NULL;
+#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+PFNCLCREATEFROMGLTEXTURE2D          __clewCreateFromGLTexture2D         = NULL;
+PFNCLCREATEFROMGLTEXTURE3D          __clewCreateFromGLTexture3D         = NULL;
+#endif
+PFNCLGETGLCONTEXTINFOKHR            __clewGetGLContextInfoKHR           = NULL;
+
+
+static void clewExit(void)
+{
+    if (module != NULL)
+    {
+        //  Ignore errors
+        CLEW_DYNLIB_CLOSE(module);
+        module = NULL;
+    }
+}
+
+int clewInit()
+{
+#ifdef _WIN32
+    const char *path = "OpenCL.dll";
+#elif defined(__APPLE__)
+    const char *path = "/Library/Frameworks/OpenCL.framework/OpenCL";
+#else
+    const char *path = "libOpenCL.so";
+#endif
+
+    int error = 0;
+
+    //  Check if already initialized
+    if (module != NULL)
+    {
+        return CLEW_SUCCESS;
+    }
+
+    //  Load library
+    module = CLEW_DYNLIB_OPEN(path);
+
+    //  Check for errors
+    if (module == NULL)
+    {
+        return CLEW_ERROR_OPEN_FAILED;
+    }
+
+    //  Set unloading
+    error = atexit(clewExit);
+
+    if (error)
+    {
+        //  Failure queuing atexit, shutdown with error
+        CLEW_DYNLIB_CLOSE(module);
+        module = NULL;
+
+        return CLEW_ERROR_ATEXIT_FAILED;
+    }
+
+    //  Determine function entry-points
+    __clewGetPlatformIDs                = (PFNCLGETPLATFORMIDS              )CLEW_DYNLIB_IMPORT(module, "clGetPlatformIDs");
+    __clewGetPlatformInfo               = (PFNCLGETPLATFORMINFO             )CLEW_DYNLIB_IMPORT(module, "clGetPlatformInfo");
+    __clewGetDeviceIDs                  = (PFNCLGETDEVICEIDS                )CLEW_DYNLIB_IMPORT(module, "clGetDeviceIDs");
+    __clewGetDeviceInfo                 = (PFNCLGETDEVICEINFO               )CLEW_DYNLIB_IMPORT(module, "clGetDeviceInfo");
+    __clewCreateSubDevices              = (PFNCLCREATESUBDEVICES            )CLEW_DYNLIB_IMPORT(module, "clCreateSubDevices");
+    __clewRetainDevice                  = (PFNCLRETAINDEVICE                )CLEW_DYNLIB_IMPORT(module, "clRetainDevice");
+    __clewReleaseDevice                 = (PFNCLRELEASEDEVICE               )CLEW_DYNLIB_IMPORT(module, "clReleaseDevice");
+    __clewCreateContext                 = (PFNCLCREATECONTEXT               )CLEW_DYNLIB_IMPORT(module, "clCreateContext");
+    __clewCreateContextFromType         = (PFNCLCREATECONTEXTFROMTYPE       )CLEW_DYNLIB_IMPORT(module, "clCreateContextFromType");
+    __clewRetainContext                 = (PFNCLRETAINCONTEXT               )CLEW_DYNLIB_IMPORT(module, "clRetainContext");
+    __clewReleaseContext                = (PFNCLRELEASECONTEXT              )CLEW_DYNLIB_IMPORT(module, "clReleaseContext");
+    __clewGetContextInfo                = (PFNCLGETCONTEXTINFO              )CLEW_DYNLIB_IMPORT(module, "clGetContextInfo");
+    __clewCreateCommandQueue            = (PFNCLCREATECOMMANDQUEUE          )CLEW_DYNLIB_IMPORT(module, "clCreateCommandQueue");
+    __clewRetainCommandQueue            = (PFNCLRETAINCOMMANDQUEUE          )CLEW_DYNLIB_IMPORT(module, "clRetainCommandQueue");
+    __clewReleaseCommandQueue           = (PFNCLRELEASECOMMANDQUEUE         )CLEW_DYNLIB_IMPORT(module, "clReleaseCommandQueue");
+    __clewGetCommandQueueInfo           = (PFNCLGETCOMMANDQUEUEINFO         )CLEW_DYNLIB_IMPORT(module, "clGetCommandQueueInfo");
+#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
+    __clewSetCommandQueueProperty       = (PFNCLSETCOMMANDQUEUEPROPERTY     )CLEW_DYNLIB_IMPORT(module, "clSetCommandQueueProperty");
+#endif
+    __clewCreateBuffer                  = (PFNCLCREATEBUFFER                )CLEW_DYNLIB_IMPORT(module, "clCreateBuffer");
+    __clewCreateSubBuffer               = (PFNCLCREATESUBBUFFER             )CLEW_DYNLIB_IMPORT(module, "clCreateBuffer");
+    __clewCreateImage                   = (PFNCLCREATEIMAGE                 )CLEW_DYNLIB_IMPORT(module, "clCreateImage");
+    __clewRetainMemObject               = (PFNCLRETAINMEMOBJECT             )CLEW_DYNLIB_IMPORT(module, "clRetainMemObject");
+    __clewReleaseMemObject              = (PFNCLRELEASEMEMOBJECT            )CLEW_DYNLIB_IMPORT(module, "clReleaseMemObject");
+    __clewGetSupportedImageFormats      = (PFNCLGETSUPPORTEDIMAGEFORMATS    )CLEW_DYNLIB_IMPORT(module, "clGetSupportedImageFormats");
+    __clewGetMemObjectInfo              = (PFNCLGETMEMOBJECTINFO            )CLEW_DYNLIB_IMPORT(module, "clGetMemObjectInfo");
+    __clewGetImageInfo                  = (PFNCLGETIMAGEINFO                )CLEW_DYNLIB_IMPORT(module, "clGetImageInfo");
+    __clewSetMemObjectDestructorCallback = (PFNCLSETMEMOBJECTDESTRUCTORCALLBACK)CLEW_DYNLIB_IMPORT(module, "clSetMemObjectDestructorCallback");
+    __clewCreateSampler                 = (PFNCLCREATESAMPLER               )CLEW_DYNLIB_IMPORT(module, "clCreateSampler");
+    __clewRetainSampler                 = (PFNCLRETAINSAMPLER               )CLEW_DYNLIB_IMPORT(module, "clRetainSampler");
+    __clewReleaseSampler                = (PFNCLRELEASESAMPLER              )CLEW_DYNLIB_IMPORT(module, "clReleaseSampler");
+    __clewGetSamplerInfo                = (PFNCLGETSAMPLERINFO              )CLEW_DYNLIB_IMPORT(module, "clGetSamplerInfo");
+    __clewCreateProgramWithSource       = (PFNCLCREATEPROGRAMWITHSOURCE     )CLEW_DYNLIB_IMPORT(module, "clCreateProgramWithSource");
+    __clewCreateProgramWithBinary       = (PFNCLCREATEPROGRAMWITHBINARY     )CLEW_DYNLIB_IMPORT(module, "clCreateProgramWithBinary");
+    __clewCreateProgramWithBuiltInKernels =(PFNCLCREATEPROGRAMWITHBUILTINKERNELS)CLEW_DYNLIB_IMPORT(module, "clCreateProgramWithBuiltInKernels");
+    __clewRetainProgram                 = (PFNCLRETAINPROGRAM               )CLEW_DYNLIB_IMPORT(module, "clRetainProgram");
+    __clewReleaseProgram                = (PFNCLRELEASEPROGRAM              )CLEW_DYNLIB_IMPORT(module, "clReleaseProgram");
+    __clewBuildProgram                  = (PFNCLBUILDPROGRAM                )CLEW_DYNLIB_IMPORT(module, "clBuildProgram");
+
+    __clewGetProgramInfo                = (PFNCLGETPROGRAMINFO              )CLEW_DYNLIB_IMPORT(module, "clGetProgramInfo");
+    __clewGetProgramBuildInfo           = (PFNCLGETPROGRAMBUILDINFO         )CLEW_DYNLIB_IMPORT(module, "clGetProgramBuildInfo");
+    __clewCreateKernel                  = (PFNCLCREATEKERNEL                )CLEW_DYNLIB_IMPORT(module, "clCreateKernel");
+    __clewCreateKernelsInProgram        = (PFNCLCREATEKERNELSINPROGRAM      )CLEW_DYNLIB_IMPORT(module, "clCreateKernelsInProgram");
+    __clewRetainKernel                  = (PFNCLRETAINKERNEL                )CLEW_DYNLIB_IMPORT(module, "clRetainKernel");
+    __clewReleaseKernel                 = (PFNCLRELEASEKERNEL               )CLEW_DYNLIB_IMPORT(module, "clReleaseKernel");
+    __clewSetKernelArg                  = (PFNCLSETKERNELARG                )CLEW_DYNLIB_IMPORT(module, "clSetKernelArg");
+    __clewGetKernelInfo                 = (PFNCLGETKERNELINFO               )CLEW_DYNLIB_IMPORT(module, "clGetKernelInfo");
+    __clewGetKernelWorkGroupInfo        = (PFNCLGETKERNELWORKGROUPINFO      )CLEW_DYNLIB_IMPORT(module, "clGetKernelWorkGroupInfo");
+    __clewWaitForEvents                 = (PFNCLWAITFOREVENTS               )CLEW_DYNLIB_IMPORT(module, "clWaitForEvents");
+    __clewGetEventInfo                  = (PFNCLGETEVENTINFO                )CLEW_DYNLIB_IMPORT(module, "clGetEventInfo");
+    __clewCreateUserEvent               = (PFNCLCREATEUSEREVENT             )CLEW_DYNLIB_IMPORT(module, "clCreateUserEvent");
+    __clewRetainEvent                   = (PFNCLRETAINEVENT                 )CLEW_DYNLIB_IMPORT(module, "clRetainEvent");
+    __clewReleaseEvent                  = (PFNCLRELEASEEVENT                )CLEW_DYNLIB_IMPORT(module, "clReleaseEvent");
+    __clewSetUserEventStatus            = (PFNCLSETUSEREVENTSTATUS          )CLEW_DYNLIB_IMPORT(module, "clSetUserEventStatus");
+    __clewSetEventCallback              = (PFNCLSETEVENTCALLBACK            )CLEW_DYNLIB_IMPORT(module, "clSetEventCallback");
+    __clewGetEventProfilingInfo         = (PFNCLGETEVENTPROFILINGINFO       )CLEW_DYNLIB_IMPORT(module, "clGetEventProfilingInfo");
+    __clewFlush                         = (PFNCLFLUSH                       )CLEW_DYNLIB_IMPORT(module, "clFlush");
+    __clewFinish                        = (PFNCLFINISH                      )CLEW_DYNLIB_IMPORT(module, "clFinish");
+    __clewEnqueueReadBuffer             = (PFNCLENQUEUEREADBUFFER           )CLEW_DYNLIB_IMPORT(module, "clEnqueueReadBuffer");
+    __clewEnqueueReadBufferRect         = (PFNCLENQUEUEREADBUFFERRECT       )CLEW_DYNLIB_IMPORT(module, "clEnqueueReadBufferRect");
+    __clewEnqueueWriteBuffer            = (PFNCLENQUEUEWRITEBUFFER          )CLEW_DYNLIB_IMPORT(module, "clEnqueueWriteBuffer");
+    __clewEnqueueWriteBufferRect        = (PFNCLENQUEUEWRITEBUFFERRECT      )CLEW_DYNLIB_IMPORT(module, "clEnqueueWriteBufferRect");
+    __clewEnqueueCopyBuffer             = (PFNCLENQUEUECOPYBUFFER           )CLEW_DYNLIB_IMPORT(module, "clEnqueueCopyBuffer");
+    __clewEnqueueCopyBufferRect         = (PFNCLENQUEUECOPYBUFFERRECT       )CLEW_DYNLIB_IMPORT(module, "clEnqueueCopyBufferRect");
+    __clewEnqueueReadImage              = (PFNCLENQUEUEREADIMAGE            )CLEW_DYNLIB_IMPORT(module, "clEnqueueReadImage");
+    __clewEnqueueWriteImage             = (PFNCLENQUEUEWRITEIMAGE           )CLEW_DYNLIB_IMPORT(module, "clEnqueueWriteImage");
+    __clewEnqueueCopyImage              = (PFNCLENQUEUECOPYIMAGE            )CLEW_DYNLIB_IMPORT(module, "clEnqueueCopyImage");
+    __clewEnqueueCopyImageToBuffer      = (PFNCLENQUEUECOPYIMAGETOBUFFER    )CLEW_DYNLIB_IMPORT(module, "clEnqueueCopyImageToBuffer");
+    __clewEnqueueCopyBufferToImage      = (PFNCLENQUEUECOPYBUFFERTOIMAGE    )CLEW_DYNLIB_IMPORT(module, "clEnqueueCopyBufferToImage");
+    __clewEnqueueMapBuffer              = (PFNCLENQUEUEMAPBUFFER            )CLEW_DYNLIB_IMPORT(module, "clEnqueueMapBuffer");
+    __clewEnqueueMapImage               = (PFNCLENQUEUEMAPIMAGE             )CLEW_DYNLIB_IMPORT(module, "clEnqueueMapImage");
+    __clewEnqueueUnmapMemObject         = (PFNCLENQUEUEUNMAPMEMOBJECT       )CLEW_DYNLIB_IMPORT(module, "clEnqueueUnmapMemObject");
+    __clewEnqueueNDRangeKernel          = (PFNCLENQUEUENDRANGEKERNEL        )CLEW_DYNLIB_IMPORT(module, "clEnqueueNDRangeKernel");
+    __clewEnqueueTask                   = (PFNCLENQUEUETASK                 )CLEW_DYNLIB_IMPORT(module, "clEnqueueTask");
+    __clewEnqueueNativeKernel           = (PFNCLENQUEUENATIVEKERNEL         )CLEW_DYNLIB_IMPORT(module, "clEnqueueNativeKernel");
+
+
+    __clewGetExtensionFunctionAddressForPlatform = (PFNCLGETEXTENSIONFUNCTIONADDRESSFORPLATFORM)CLEW_DYNLIB_IMPORT(module, "clGetExtensionFunctionAddressForPlatform");
+#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+    __clewCreateImage2D                 = (PFNCLCREATEIMAGE2D               )CLEW_DYNLIB_IMPORT(module, "clCreateImage2D");
+    __clewCreateImage3D                 = (PFNCLCREATEIMAGE3D               )CLEW_DYNLIB_IMPORT(module, "clCreateImage3D");
+    __clewEnqueueMarker                 = (PFNCLENQUEUEMARKER               )CLEW_DYNLIB_IMPORT(module, "clEnqueueMarker");
+    __clewEnqueueWaitForEvents          = (PFNCLENQUEUEWAITFOREVENTS        )CLEW_DYNLIB_IMPORT(module, "clEnqueueWaitForEvents");
+    __clewEnqueueBarrier                = (PFNCLENQUEUEBARRIER              )CLEW_DYNLIB_IMPORT(module, "clEnqueueBarrier");
+    __clewUnloadCompiler                = (PFNCLUNLOADCOMPILER              )CLEW_DYNLIB_IMPORT(module, "clUnloadCompiler");
+    __clewGetExtensionFunctionAddress   = (PFNCLGETEXTENSIONFUNCTIONADDRESS )CLEW_DYNLIB_IMPORT(module, "clGetExtensionFunctionAddress");
+#endif
+
+
+    /* cl_gl */
+    __clewCreateFromGLBuffer            = (PFNCLCREATEFROMGLBUFFER          )CLEW_DYNLIB_IMPORT(module, "clCreateFromGLBuffer");
+    __clewCreateFromGLTexture           = (PFNCLCREATEFROMGLTEXTURE         )CLEW_DYNLIB_IMPORT(module, "clCreateFromGLTexture");
+    __clewCreateFromGLRenderbuffer      = (PFNCLCREATEFROMGLRENDERBUFFER    )CLEW_DYNLIB_IMPORT(module, "clCreateFromGLRenderbuffer");
+    __clewGetGLObjectInfo               = (PFNCLGETGLOBJECTINFO             )CLEW_DYNLIB_IMPORT(module, "clGetGLObjectInfo");
+    __clewGetGLTextureInfo              = (PFNCLGETGLTEXTUREINFO            )CLEW_DYNLIB_IMPORT(module, "clGetGLTextureInfo");
+    __clewEnqueueAcquireGLObjects       = (PFNCLENQUEUEACQUIREGLOBJECTS     )CLEW_DYNLIB_IMPORT(module, "clEnqueueAcquireGLObjects");
+    __clewEnqueueReleaseGLObjects       = (PFNCLENQUEUERELEASEGLOBJECTS     )CLEW_DYNLIB_IMPORT(module, "clEnqueueReleaseGLObjects");
+    #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+    __clewCreateFromGLTexture2D         = (PFNCLCREATEFROMGLTEXTURE2D       )CLEW_DYNLIB_IMPORT(module, "clCreateFromGLTexture2D");
+    __clewCreateFromGLTexture3D         = (PFNCLCREATEFROMGLTEXTURE3D       )CLEW_DYNLIB_IMPORT(module, "clCreateFromGLTexture3D");
+    #endif
+    __clewGetGLContextInfoKHR           = (PFNCLGETGLCONTEXTINFOKHR         )CLEW_DYNLIB_IMPORT(module, "clGetGLContextInfoKHR");
+
+
+    if(__clewGetPlatformIDs == NULL) return 0;
+    if(__clewGetPlatformInfo == NULL) return 0;
+    if(__clewGetDeviceIDs == NULL) return 0;
+    if(__clewGetDeviceInfo == NULL) return 0;
+
+    return CLEW_SUCCESS;
+}
+
+const char* clewErrorString(cl_int error)
+{
+    static const char* strings[] =
+    {
+        // Error Codes
+          "CL_SUCCESS"                                  //   0
+        , "CL_DEVICE_NOT_FOUND"                         //  -1
+        , "CL_DEVICE_NOT_AVAILABLE"                     //  -2
+        , "CL_COMPILER_NOT_AVAILABLE"                   //  -3
+        , "CL_MEM_OBJECT_ALLOCATION_FAILURE"            //  -4
+        , "CL_OUT_OF_RESOURCES"                         //  -5
+        , "CL_OUT_OF_HOST_MEMORY"                       //  -6
+        , "CL_PROFILING_INFO_NOT_AVAILABLE"             //  -7
+        , "CL_MEM_COPY_OVERLAP"                         //  -8
+        , "CL_IMAGE_FORMAT_MISMATCH"                    //  -9
+        , "CL_IMAGE_FORMAT_NOT_SUPPORTED"               //  -10
+        , "CL_BUILD_PROGRAM_FAILURE"                    //  -11
+        , "CL_MAP_FAILURE"                              //  -12
+        , "CL_MISALIGNED_SUB_BUFFER_OFFSET"             //  -13
+        , "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"//  -14
+        , "CL_COMPILE_PROGRAM_FAILURE"                  //  -15
+        , "CL_LINKER_NOT_AVAILABLE"                     //  -16
+        , "CL_LINK_PROGRAM_FAILURE"                     //  -17
+        , "CL_DEVICE_PARTITION_FAILED"                  //  -18
+        , "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"            //  -19
+
+        , ""    //  -20
+        , ""    //  -21
+        , ""    //  -22
+        , ""    //  -23
+        , ""    //  -24
+        , ""    //  -25
+        , ""    //  -26
+        , ""    //  -27
+        , ""    //  -28
+        , ""    //  -29
+
+        , "CL_INVALID_VALUE"                            //  -30
+        , "CL_INVALID_DEVICE_TYPE"                      //  -31
+        , "CL_INVALID_PLATFORM"                         //  -32
+        , "CL_INVALID_DEVICE"                           //  -33
+        , "CL_INVALID_CONTEXT"                          //  -34
+        , "CL_INVALID_QUEUE_PROPERTIES"                 //  -35
+        , "CL_INVALID_COMMAND_QUEUE"                    //  -36
+        , "CL_INVALID_HOST_PTR"                         //  -37
+        , "CL_INVALID_MEM_OBJECT"                       //  -38
+        , "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"          //  -39
+        , "CL_INVALID_IMAGE_SIZE"                       //  -40
+        , "CL_INVALID_SAMPLER"                          //  -41
+        , "CL_INVALID_BINARY"                           //  -42
+        , "CL_INVALID_BUILD_OPTIONS"                    //  -43
+        , "CL_INVALID_PROGRAM"                          //  -44
+        , "CL_INVALID_PROGRAM_EXECUTABLE"               //  -45
+        , "CL_INVALID_KERNEL_NAME"                      //  -46
+        , "CL_INVALID_KERNEL_DEFINITION"                //  -47
+        , "CL_INVALID_KERNEL"                           //  -48
+        , "CL_INVALID_ARG_INDEX"                        //  -49
+        , "CL_INVALID_ARG_VALUE"                        //  -50
+        , "CL_INVALID_ARG_SIZE"                         //  -51
+        , "CL_INVALID_KERNEL_ARGS"                      //  -52
+        , "CL_INVALID_WORK_DIMENSION"                   //  -53
+        , "CL_INVALID_WORK_GROUP_SIZE"                  //  -54
+        , "CL_INVALID_WORK_ITEM_SIZE"                   //  -55
+        , "CL_INVALID_GLOBAL_OFFSET"                    //  -56
+        , "CL_INVALID_EVENT_WAIT_LIST"                  //  -57
+        , "CL_INVALID_EVENT"                            //  -58
+        , "CL_INVALID_OPERATION"                        //  -59
+        , "CL_INVALID_GL_OBJECT"                        //  -60
+        , "CL_INVALID_BUFFER_SIZE"                      //  -61
+        , "CL_INVALID_MIP_LEVEL"                        //  -62
+        , "CL_INVALID_GLOBAL_WORK_SIZE"                 //  -63
+        , "CL_INVALID_PROPERTY"                         //  -64
+        , "CL_INVALID_IMAGE_DESCRIPTOR"                 //  -65
+        , "CL_INVALID_COMPILER_OPTIONS"                 //  -66
+        , "CL_INVALID_LINKER_OPTIONS"                   //  -67
+        , "CL_INVALID_DEVICE_PARTITION_COUNT"           //  -68
+    };
+
+    return strings[-error];
+}
similarity index 88%
rename from intern/opencl/CMakeLists.txt
rename to extern/cuew/CMakeLists.txt
index 03855cf..284fbbc 100644 (file)
@@ -25,6 +25,7 @@
 
 set(INC
        .
+       include
 )
 
 set(INC_SYS
@@ -32,11 +33,8 @@ set(INC_SYS
 )
 
 set(SRC
-       OCL_opencl.h
-       intern/clew.h
-       intern/clew.c
-       intern/OCL_opencl.c
+       include/cuew.h
+       src/cuew.c
 )
 
-
-blender_add_lib(bf_intern_opencl "${SRC}" "${INC}" "${INC_SYS}")
+blender_add_lib(extern_cuew "${SRC}" "${INC}" "${INC_SYS}")
diff --git a/extern/cuew/LICENSE b/extern/cuew/LICENSE
new file mode 100644 (file)
index 0000000..c753309
--- /dev/null
@@ -0,0 +1,174 @@
+
+                               Modified Apache 2.0 License
+
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor
+      and its affiliates, except as required to comply with Section 4(c) of
+      the License and to reproduce the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
diff --git a/extern/cuew/README b/extern/cuew/README
new file mode 100644 (file)
index 0000000..3c43b72
--- /dev/null
@@ -0,0 +1,12 @@
+The CUDA Extension Wrangler Library (CUEW) is a cross-platform open-source
+C/C++ extension loading library. CUEW provides efficient run-time mechanisms
+for determining which CUDA functions and extensions extensions are supported
+on the target platform.
+
+CUDA core and extension functionality is exposed in a single header file.
+GUEW has been tested on a variety of operating systems, including Windows,
+Linux, Mac OS X.
+
+LICENSE
+
+CUEW library is released under the Apache 2.0 license.
similarity index 85%
rename from intern/opencl/SConscript
rename to extern/cuew/SConscript
index 41a6d72..9c12c71 100644 (file)
@@ -27,8 +27,9 @@
 
 Import ('env')
 
-sources = env.Glob('intern/*.c')
+sources = env.Glob('src/cuew.c')
 
-incs = '.'
+incs = 'include'
+defs = []
 
-env.BlenderLib ( 'bf_intern_opencl', sources, Split(incs), libtype=['core','player'], priority = [192,192] )
+env.BlenderLib ('extern_cuew', sources, Split(incs), defines=defs, libtype=['system'], priority = [0])
diff --git a/extern/cuew/auto/cuda_errors.py b/extern/cuew/auto/cuda_errors.py
new file mode 100644 (file)
index 0000000..464b776
--- /dev/null
@@ -0,0 +1,35 @@
+CUDA_ERRORS={
+'CUDA_SUCCESS': "No errors",
+'CUDA_ERROR_INVALID_VALUE': "Invalid value",
+'CUDA_ERROR_OUT_OF_MEMORY': "Out of memory",
+'CUDA_ERROR_NOT_INITIALIZED': "Driver not initialized",
+'CUDA_ERROR_DEINITIALIZED': "Driver deinitialized",
+'CUDA_ERROR_NO_DEVICE': "No CUDA-capable device available",
+'CUDA_ERROR_INVALID_DEVICE': "Invalid device",
+'CUDA_ERROR_INVALID_IMAGE': "Invalid kernel image",
+'CUDA_ERROR_INVALID_CONTEXT': "Invalid context",
+'CUDA_ERROR_CONTEXT_ALREADY_CURRENT': "Context already current",
+'CUDA_ERROR_MAP_FAILED': "Map failed",
+'CUDA_ERROR_UNMAP_FAILED': "Unmap failed",
+'CUDA_ERROR_ARRAY_IS_MAPPED': "Array is mapped",
+'CUDA_ERROR_ALREADY_MAPPED': "Already mapped",
+'CUDA_ERROR_NO_BINARY_FOR_GPU': "No binary for GPU",
+'CUDA_ERROR_ALREADY_ACQUIRED': "Already acquired",
+'CUDA_ERROR_NOT_MAPPED': "Not mapped",
+'CUDA_ERROR_NOT_MAPPED_AS_ARRAY': "Mapped resource not available for access as an array",
+'CUDA_ERROR_NOT_MAPPED_AS_POINTER': "Mapped resource not available for access as a pointer",
+'CUDA_ERROR_ECC_UNCORRECTABLE': "Uncorrectable ECC error detected",
+'CUDA_ERROR_UNSUPPORTED_LIMIT': "CUlimit not supported by device",
+'CUDA_ERROR_INVALID_SOURCE': "Invalid source",
+'CUDA_ERROR_FILE_NOT_FOUND': "File not found",
+'CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND': "Link to a shared object failed to resolve",
+'CUDA_ERROR_SHARED_OBJECT_INIT_FAILED': "Shared object initialization failed",
+'CUDA_ERROR_INVALID_HANDLE': "Invalid handle",
+'CUDA_ERROR_NOT_FOUND': "Not found",
+'CUDA_ERROR_NOT_READY': "CUDA not ready",
+'CUDA_ERROR_LAUNCH_FAILED': "Launch failed",
+'CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES': "Launch exceeded resources",
+'CUDA_ERROR_LAUNCH_TIMEOUT': "Launch exceeded timeout",
+'CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING': "Launch with incompatible texturing",
+'CUDA_ERROR_UNKNOWN': "Unknown error",
+}
diff --git a/extern/cuew/auto/cuda_extra.py b/extern/cuew/auto/cuda_extra.py
new file mode 100644 (file)
index 0000000..fd4f466
--- /dev/null
@@ -0,0 +1,125 @@
+extra_code = """
+static void path_join(const char *path1,
+                      const char *path2,
+                      int maxlen,
+                      char *result) {
+#if defined(WIN32) || defined(_WIN32)
+  const char separator = '\\\\';
+#else
+  const char separator = '/';
+#endif
+  int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
+  if (n != -1 && n < maxlen) {
+    result[n] = '\\0';
+  }
+  else {
+    result[maxlen - 1] = '\\0';
+  }
+}
+
+static int path_exists(const char *path) {
+  struct stat st;
+  if (stat(path, &st)) {
+    return 0;
+  }
+  return 1;
+}
+
+const char *cuewCompilerPath(void) {
+#ifdef _WIN32
+  const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
+  const char *executable = "nvcc.exe";
+#else
+  const char *defaultpaths[] = {
+    "/Developer/NVIDIA/CUDA-5.0/bin",
+    "/usr/local/cuda-5.0/bin",
+    "/usr/local/cuda/bin",
+    "/Developer/NVIDIA/CUDA-6.0/bin",
+    "/usr/local/cuda-6.0/bin",
+    "/Developer/NVIDIA/CUDA-5.5/bin",
+    "/usr/local/cuda-5.5/bin",
+    NULL};
+  const char *executable = "nvcc";
+#endif
+  int i;
+
+  const char *binpath = getenv("CUDA_BIN_PATH");
+
+  static char nvcc[65536];
+
+  if (binpath) {
+    path_join(binpath, executable, sizeof(nvcc), nvcc);
+    if (path_exists(nvcc))
+      return nvcc;
+  }
+
+  for (i = 0; defaultpaths[i]; ++i) {
+    path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc);
+    if (path_exists(nvcc))
+      return nvcc;
+  }
+
+#ifndef _WIN32
+  {
+    FILE *handle = popen("which nvcc", "r");
+    if (handle) {
+      char buffer[4096] = {0};
+      int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
+      buffer[len] = '\\0';
+      pclose(handle);
+
+      if (buffer[0])
+        return "nvcc";
+    }
+  }
+#endif
+
+  return NULL;
+}
+
+int cuewCompilerVersion(void) {
+  const char *path = cuewCompilerPath();
+  const char *marker = "Cuda compilation tools, release ";
+  FILE *pipe;
+  int major, minor;
+  char *versionstr;
+  char buf[128];
+  char output[65536] = "\\0";
+  char command[65536] = "\\0";
+
+  if (path == NULL)
+    return 0;
+
+  /* get --version output */
+  strncpy(command, path, sizeof(command));
+  strncat(command, " --version", sizeof(command) - strlen(path));
+  pipe = popen(command, "r");
+  if (!pipe) {
+    fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
+    return 0;
+  }
+
+  while (!feof(pipe)) {
+    if (fgets(buf, sizeof(buf), pipe) != NULL) {
+      strncat(output, buf, sizeof(output) - strlen(output));
+    }
+  }
+
+  pclose(pipe);
+
+  /* parse version number */
+  versionstr = strstr(output, marker);
+  if (versionstr == NULL) {
+    fprintf(stderr, "CUDA: failed to find version number in:\\n\\n%s\\n", output);
+    return 0;
+  }
+  versionstr += strlen(marker);
+
+  if (sscanf(versionstr, "%d.%d", &major, &minor) < 2) {
+    fprintf(stderr, "CUDA: failed to parse version number from:\\n\\n%s\\n", output);
+    return 0;
+  }
+
+  return 10 * major + minor;
+}
+"""
diff --git a/extern/cuew/auto/cuew_gen.py b/extern/cuew/auto/cuew_gen.py
new file mode 100644 (file)
index 0000000..4cdc361
--- /dev/null
@@ -0,0 +1,591 @@
+#!/usr/bin/env python3
+#
+# Copyright 2014 Blender Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License
+
+# This script generates either header or implementation file from
+# a CUDA header files.
+#
+# Usage: cuew hdr|impl [/path/to/cuda/includes]
+#  - hdr means header file will be generated and printed to stdout.
+#  - impl means implementation file will be generated and printed to stdout.
+#  - /path/to/cuda/includes is a path to a folder with cuda.h and cudaGL.h
+#    for which wrangler will be generated.
+
+import os
+import sys
+from cuda_errors import CUDA_ERRORS
+from pycparser import c_parser, c_ast, parse_file
+from subprocess import Popen, PIPE
+
+INCLUDE_DIR = "/usr/include"
+LIB = "CUEW"
+REAL_LIB = "CUDA"
+VERSION_MAJOR = "1"
+VERSION_MINOR = "2"
+COPYRIGHT = """/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */"""
+FILES = ["cuda.h", "cudaGL.h"]
+
+TYPEDEFS = []
+FUNC_TYPEDEFS = []
+SYMBOLS = []
+DEFINES = []
+DEFINES_V2 = []
+ERRORS = []
+
+
+class FuncDefVisitor(c_ast.NodeVisitor):
+    indent = 0
+    prev_complex = False
+    dummy_typedefs = ['size_t', 'CUdeviceptr']
+
+    def _get_quals_string(self, node):
+        if node.quals:
+            return ' '.join(node.quals) + ' '
+        return ''
+
+    def _get_ident_type(self, node):
+        if isinstance(node, c_ast.PtrDecl):
+            return self._get_ident_type(node.type.type) + '*'
+        if isinstance(node, c_ast.ArrayDecl):
+            return self._get_ident_type(node.type)
+        elif isinstance(node, c_ast.Struct):
+            if node.name:
+                return 'struct ' + node.name
+            else:
+                self.indent += 1
+                struct = self._stringify_struct(node)
+                self.indent -= 1
+                return "struct {\n" + \
+                       struct + ("  " * self.indent) + "}"
+        elif isinstance(node, c_ast.Union):
+            self.indent += 1
+            union = self._stringify_struct(node)
+            self.indent -= 1
+            return "union {\n" + union + ("  " * self.indent) + "}"
+        elif isinstance(node, c_ast.Enum):
+            return 'enum ' + node.name
+        elif isinstance(node, c_ast.TypeDecl):
+            return self._get_ident_type(node.type)
+        else:
+            return node.names[0]
+
+    def _stringify_param(self, param):
+        param_type = param.type
+        result = self._get_quals_string(param)
+        result += self._get_ident_type(param_type)
+        if param.name:
+            result += ' ' + param.name
+        if isinstance(param_type, c_ast.ArrayDecl):
+            # TODO(sergey): Workaround to deal with the
+            # preprocessed file where array size got
+            # substituded.
+            dim = param_type.dim.value
+            if param.name == "reserved" and dim == "64":
+                dim = "CU_IPC_HANDLE_SIZE"
+            result += '[' + dim + ']'
+        return result
+
+    def _stringify_params(self, params):
+        result = []
+        for param in params:
+            result.append(self._stringify_param(param))
+        return ', '.join(result)
+
+    def _stringify_struct(self, node):
+        result = ""
+        children = node.children()
+        for child in children:
+            member = self._stringify_param(child[1])
+            result += ("  " * self.indent) + member + ";\n"
+        return result
+
+    def _stringify_enum(self, node):
+        result = ""
+        children = node.children()
+        for child in children:
+            if isinstance(child[1], c_ast.EnumeratorList):
+                enumerators = child[1].enumerators
+                for enumerator in enumerators:
+                    result += ("  " * self.indent) + enumerator.name
+                    if enumerator.value:
+                        result += " = " + enumerator.value.value
+                    result += ",\n"
+                    if enumerator.name.startswith("CUDA_ERROR_"):
+                        ERRORS.append(enumerator.name)
+        return result
+
+    def visit_Decl(self, node):
+        if node.type.__class__.__name__ == 'FuncDecl':
+            if isinstance(node.type, c_ast.FuncDecl):
+                func_decl = node.type
+                func_decl_type = func_decl.type
+
+                typedef = 'typedef '
+                symbol_name = None
+
+                if isinstance(func_decl_type, c_ast.TypeDecl):
+                    symbol_name = func_decl_type.declname
+                    typedef += self._get_quals_string(func_decl_type)
+                    typedef += self._get_ident_type(func_decl_type.type)
+                    typedef += ' CUDAAPI'
+                    typedef += ' t' + symbol_name
+                elif isinstance(func_decl_type, c_ast.PtrDecl):
+                    ptr_type = func_decl_type.type
+                    symbol_name = ptr_type.declname
+                    typedef += self._get_quals_string(ptr_type)
+                    typedef += self._get_ident_type(func_decl_type)
+                    typedef += ' CUDAAPI'
+                    typedef += ' t' + symbol_name
+
+                typedef += '(' + \
+                    self._stringify_params(func_decl.args.params) + \
+                    ');'
+
+                SYMBOLS.append(symbol_name)
+                FUNC_TYPEDEFS.append(typedef)
+
+    def visit_Typedef(self, node):
+        if node.name in self.dummy_typedefs:
+            return
+
+        complex = False
+        type = self._get_ident_type(node.type)
+        quals = self._get_quals_string(node)
+
+        if isinstance(node.type.type, c_ast.Struct):
+            self.indent += 1
+            struct = self._stringify_struct(node.type.type)
+            self.indent -= 1
+            typedef = quals + type + " {\n" + struct + "} " + node.name
+            complex = True
+        elif isinstance(node.type.type, c_ast.Enum):
+            self.indent += 1
+            enum = self._stringify_enum(node.type.type)
+            self.indent -= 1
+            typedef = quals + type + " {\n" + enum + "} " + node.name
+            complex = True
+        else:
+            typedef = quals + type + " " + node.name
+        if complex or self.prev_complex:
+            typedef = "\ntypedef " + typedef + ";"
+        else:
+            typedef = "typedef " + typedef + ";"
+
+        TYPEDEFS.append(typedef)
+
+        self.prev_complex = complex
+
+
+def get_latest_cpp():
+    path_prefix = "/usr/bin"
+    for cpp_version in ["9", "8", "7", "6", "5", "4"]:
+        test_cpp = os.path.join(path_prefix, "cpp-4." + cpp_version)
+        if os.path.exists(test_cpp):
+            return test_cpp
+    return None
+
+
+def preprocess_file(filename, cpp_path):
+    args = [cpp_path, "-I./"]
+    if filename.endswith("GL.h"):
+        args.append("-DCUDAAPI= ")
+    args.append(filename)
+
+    try:
+        pipe = Popen(args,
+                     stdout=PIPE,
+                     universal_newlines=True)
+        text = pipe.communicate()[0]
+    except OSError as e:
+        raise RuntimeError("Unable to invoke 'cpp'.  " +
+            'Make sure its path was passed correctly\n' +
+            ('Original error: %s' % e))
+
+    return text
+
+
+def parse_files():
+    parser = c_parser.CParser()
+    cpp_path = get_latest_cpp()
+
+    for filename in FILES:
+        filepath = os.path.join(INCLUDE_DIR, filename)
+        dummy_typedefs = {}
+        text = preprocess_file(filepath, cpp_path)
+
+        if filepath.endswith("GL.h"):
+            dummy_typedefs = {
+                "CUresult": "int",
+                "CUgraphicsResource": "void *",
+                "CUdevice": "void *",
+                "CUcontext": "void *",
+                "CUdeviceptr": "void *",
+                "CUstream": "void *"
+                }
+
+            text = "typedef int GLint;\n" + text
+            text = "typedef unsigned int GLuint;\n" + text
+            text = "typedef unsigned int GLenum;\n" + text
+            text = "typedef long size_t;\n" + text
+
+        for typedef in sorted(dummy_typedefs):
+            text = "typedef " + dummy_typedefs[typedef] + " " + \
+                typedef + ";\n" + text
+
+        ast = parser.parse(text, filepath)
+
+        with open(filepath) as f:
+            lines = f.readlines()
+            for line in lines:
+                if line.startswith("#define"):
+                    line = line[8:-1]
+                    token = line.split()
+                    if token[0] not in ("__cuda_cuda_h__",
+                                        "CUDA_CB",
+                                        "CUDAAPI"):
+                        DEFINES.append(token)
+
+            for line in lines:
+                # TODO(sergey): Use better matching rule for _v2 symbols.
+                if line[0].isspace() and line.lstrip().startswith("#define"):
+                    line = line[12:-1]
+                    token = line.split()
+                    if len(token) == 2 and token[1].endswith("_v2"):
+                        DEFINES_V2.append(token)
+
+        v = FuncDefVisitor()
+        for typedef in dummy_typedefs:
+            v.dummy_typedefs.append(typedef)
+        v.visit(ast)
+
+        FUNC_TYPEDEFS.append('')
+        SYMBOLS.append('')
+
+
+def print_copyright():
+    print(COPYRIGHT)
+    print("")
+
+
+def open_header_guard():
+    print("#ifndef __%s_H__" % (LIB))
+    print("#define __%s_H__" % (LIB))
+    print("")
+    print("#ifdef __cplusplus")
+    print("extern \"C\" {")
+    print("#endif")
+    print("")
+
+
+def close_header_guard():
+    print("")
+    print("#ifdef __cplusplus")
+    print("}")
+    print("#endif")
+    print("")
+    print("#endif  /* __%s_H__ */" % (LIB))
+
+
+def print_header():
+    print_copyright()
+    open_header_guard()
+
+    # Fot size_t.
+    print("#include <stdlib.h>")
+    print("")
+
+    print("/* Defines. */")
+    print("#define %s_VERSION_MAJOR %s" % (LIB, VERSION_MAJOR))
+    print("#define %s_VERSION_MINOR %s" % (LIB, VERSION_MINOR))
+    print("")
+    for define in DEFINES:
+        print('#define %s' % (' '.join(define)))
+    print("")
+
+    print("""/* Functions which changed 3.1 -> 3.2 for 64 bit stuff,
+ * the cuda library has both the old ones for compatibility and new
+ * ones with _v2 postfix,
+ */""")
+    for define in DEFINES_V2:
+        print('#define %s' % (' '.join(define)))
+    print("")
+
+    print("/* Types. */")
+
+    # We handle this specially because of the file is
+    # getting preprocessed.
+    print("""#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
+typedef unsigned long long CUdeviceptr;
+#else
+typedef unsigned int CUdeviceptr;
+#endif
+""")
+
+    for typedef in TYPEDEFS:
+        print('%s' % (typedef))
+
+    # TDO(sergey): This is only specific to CUDA wrapper.
+    print("""
+#ifdef _WIN32
+#  define CUDAAPI __stdcall
+#  define CUDA_CB __stdcall
+#else
+#  define CUDAAPI
+#  define CUDA_CB
+#endif
+""")
+
+    print("/* Function types. */")
+    for func_typedef in FUNC_TYPEDEFS:
+        print('%s' % (func_typedef))
+    print("")
+
+    print("/* Function declarations. */")
+    for symbol in SYMBOLS:
+        if symbol:
+            print('extern t%s *%s;' % (symbol, symbol))
+        else:
+            print("")
+
+    print("")
+    print("enum {")
+    print("  CUEW_SUCCESS = 0,")
+    print("  CUEW_ERROR_OPEN_FAILED = -1,")
+    print("  CUEW_ERROR_ATEXIT_FAILED = -2,")
+    print("};")
+    print("")
+    print("int %sInit(void);" % (LIB.lower()))
+    # TODO(sergey): Get rid of hardcoded CUresult.
+    print("const char *%sErrorString(CUresult result);" % (LIB.lower()))
+    print("const char *cuewCompilerPath(void);")
+    print("int cuewCompilerVersion(void);")
+
+    close_header_guard()
+
+
+def print_dl_wrapper():
+    print("""#ifdef _WIN32
+#  define WIN32_LEAN_AND_MEAN
+#  define VC_EXTRALEAN
+#  include <windows.h>
+
+/* Utility macros. */
+
+typedef HMODULE DynamicLibrary;
+
+#  define dynamic_library_open(path)         LoadLibrary(path)
+#  define dynamic_library_close(lib)         FreeLibrary(lib)
+#  define dynamic_library_find(lib, symbol)  GetProcAddress(lib, symbol)
+#else
+#  include <dlfcn.h>
+
+typedef void* DynamicLibrary;
+
+#  define dynamic_library_open(path)         dlopen(path, RTLD_NOW)
+#  define dynamic_library_close(lib)         dlclose(lib)
+#  define dynamic_library_find(lib, symbol)  dlsym(lib, symbol)
+#endif
+""")
+
+
+def print_dl_helper_macro():
+    print("""#define %s_LIBRARY_FIND_CHECKED(name) \\
+        name = (t##name *)dynamic_library_find(lib, #name);
+
+#define %s_LIBRARY_FIND(name) \\
+        name = (t##name *)dynamic_library_find(lib, #name); \\
+        assert(name);
+
+static DynamicLibrary lib;""" % (REAL_LIB, REAL_LIB))
+    print("")
+
+
+def print_dl_close():
+    print("""static void %sExit(void) {
+  if(lib != NULL) {
+    /*  Ignore errors. */
+    dynamic_library_close(lib);
+    lib = NULL;
+  }
+}""" % (LIB.lower()))
+    print("")
+
+
+def print_lib_path():
+    # TODO(sergey): get rid of hardcoded libraries.
+    print("""#ifdef _WIN32
+  /* Expected in c:/windows/system or similar, no path needed. */
+  const char *path = "nvcuda.dll";
+#elif defined(__APPLE__)
+  /* Default installation path. */
+  const char *path = "/usr/local/cuda/lib/libcuda.dylib";
+#else
+  const char *path = "libcuda.so";
+#endif""")
+
+
+def print_init_guard():
+    print("""  static int initialized = 0;
+  static int result = 0;
+  int error, driver_version;
+
+  if (initialized) {
+    return result;
+  }
+
+  initialized = 1;
+
+  error = atexit(cuewExit);
+  if (error) {
+    result = CUEW_ERROR_ATEXIT_FAILED;
+    return result;
+  }
+
+  /* Load library. */
+  lib = dynamic_library_open(path);
+
+  if (lib == NULL) {
+    result = CUEW_ERROR_OPEN_FAILED;
+    return result;
+  }""")
+    print("")
+
+
+def print_driver_version_guard():
+    # TODO(sergey): Currently it's hardcoded for CUDA only.
+    print("""  /* Detect driver version. */
+  driver_version = 1000;
+
+  %s_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
+  if (cuDriverGetVersion) {
+    cuDriverGetVersion(&driver_version);
+  }
+
+  /* We require version 4.0. */
+  if (driver_version < 4000) {
+    result = CUEW_ERROR_OPEN_FAILED;
+    return result;
+  }""" % (REAL_LIB))
+
+
+def print_dl_init():
+    print("int %sInit(void) {" % (LIB.lower()))
+
+    print("  /* Library paths. */")
+    print_lib_path()
+    print_init_guard()
+    print_driver_version_guard()
+
+    print("  /* Fetch all function pointers. */")
+    for symbol in SYMBOLS:
+        if symbol:
+            print("  %s_LIBRARY_FIND(%s);" % (REAL_LIB, symbol))
+        else:
+            print("")
+
+    print("")
+    print("  result = CUEW_SUCCESS;")
+    print("  return result;")
+
+    print("}")
+
+
+def print_implementation():
+    print_copyright()
+
+    # TODO(sergey): Get rid of hardcoded header.
+    print("""#ifdef _MSC_VER
+#  define snprintf _snprintf
+#  define popen _popen
+#  define pclose _pclose
+#  define _CRT_SECURE_NO_WARNINGS
+#endif
+""")
+    print("#include <cuew.h>")
+    print("#include <assert.h>")
+    print("#include <stdio.h>")
+    print("#include <string.h>")
+    print("#include <sys/stat.h>")
+    print("")
+
+    print_dl_wrapper()
+    print_dl_helper_macro()
+
+    print("/* Function definitions. */")
+    for symbol in SYMBOLS:
+        if symbol:
+            print('t%s *%s;' % (symbol, symbol))
+        else:
+            print("")
+    print("")
+
+    print_dl_close()
+
+    print("/* Implementation function. */")
+    print_dl_init()
+
+    print("")
+    # TODO(sergey): Get rid of hardcoded CUresult.
+    print("const char *%sErrorString(CUresult result) {" % (LIB.lower()))
+    print("  switch(result) {")
+    print("    case CUDA_SUCCESS: return \"No errors\";")
+
+    for error in ERRORS:
+        if error in CUDA_ERRORS:
+            str = CUDA_ERRORS[error]
+        else:
+            str = error[11:]
+        print("    case %s: return \"%s\";" % (error, str))
+
+    print("    default: return \"Unknown CUDA error value\";")
+    print("  }")
+    print("}")
+
+    from cuda_extra import extra_code
+    print(extra_code)
+
+if __name__ == "__main__":
+
+    if len(sys.argv) != 2 and len(sys.argv) != 3:
+        print("Usage: %s hdr|impl [/path/to/cuda/toolkit/include]" %
+              (sys.argv[0]))
+        exit(1)
+
+    if len(sys.argv) == 3:
+        INCLUDE_DIR = sys.argv[2]
+
+    parse_files()
+
+    if sys.argv[1] == "hdr":
+        print_header()
+    elif sys.argv[1] == "impl":
+        print_implementation()
+    else:
+        print("Unknown command %s" % (sys.argv[1]))
+        exit(1)
diff --git a/extern/cuew/auto/cuew_gen.sh b/extern/cuew/auto/cuew_gen.sh
new file mode 100755 (executable)
index 0000000..b44987b
--- /dev/null
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+# This script invokes cuew_gen.py and updates the
+# header and source files in the repository.
+
+SCRIPT=`realpath -s $0`
+DIR=`dirname $SCRIPT`
+
+python ${DIR}/cuew_gen.py hdr $@ > $DIR/../include/cuew.h
+python ${DIR}/cuew_gen.py impl $@ > $DIR/../src/cuew.c
diff --git a/extern/cuew/auto/stdlib.h b/extern/cuew/auto/stdlib.h
new file mode 100644 (file)
index 0000000..75976c8
--- /dev/null
@@ -0,0 +1,3 @@
+/* This file is needed to workaround issue with parsing system headers. */
+
+typedef long size_t;
diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h
new file mode 100644 (file)
index 0000000..fd03311
--- /dev/null
@@ -0,0 +1,1138 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+#ifndef __CUEW_H__
+#define __CUEW_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdlib.h>
+
+/* Defines. */
+#define CUEW_VERSION_MAJOR 1
+#define CUEW_VERSION_MINOR 2
+
+#define CUDA_VERSION 6000
+#define CU_IPC_HANDLE_SIZE 64
+#define CU_MEMHOSTALLOC_PORTABLE 0x01
+#define CU_MEMHOSTALLOC_DEVICEMAP 0x02
+#define CU_MEMHOSTALLOC_WRITECOMBINED 0x04
+#define CU_MEMHOSTREGISTER_PORTABLE 0x01
+#define CU_MEMHOSTREGISTER_DEVICEMAP 0x02
+#define CUDA_ARRAY3D_LAYERED 0x01
+#define CUDA_ARRAY3D_2DARRAY 0x01
+#define CUDA_ARRAY3D_SURFACE_LDST 0x02
+#define CUDA_ARRAY3D_CUBEMAP 0x04
+#define CUDA_ARRAY3D_TEXTURE_GATHER 0x08
+#define CUDA_ARRAY3D_DEPTH_TEXTURE 0x10
+#define CU_TRSA_OVERRIDE_FORMAT 0x01
+#define CU_TRSF_READ_AS_INTEGER 0x01
+#define CU_TRSF_NORMALIZED_COORDINATES 0x02
+#define CU_TRSF_SRGB 0x10
+#define CU_LAUNCH_PARAM_END ((void*)0x00)
+#define CU_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
+#define CU_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02)
+#define CU_PARAM_TR_DEFAULT -1
+#define CUDAGL_H
+
+/* Functions which changed 3.1 -> 3.2 for 64 bit stuff,
+ * the cuda library has both the old ones for compatibility and new
+ * ones with _v2 postfix,
+ */
+#define cuDeviceTotalMem cuDeviceTotalMem_v2
+#define cuCtxCreate cuCtxCreate_v2
+#define cuModuleGetGlobal cuModuleGetGlobal_v2
+#define cuMemGetInfo cuMemGetInfo_v2
+#define cuMemAlloc cuMemAlloc_v2
+#define cuMemAllocPitch cuMemAllocPitch_v2
+#define cuMemFree cuMemFree_v2
+#define cuMemGetAddressRange cuMemGetAddressRange_v2
+#define cuMemAllocHost cuMemAllocHost_v2
+#define cuMemHostGetDevicePointer cuMemHostGetDevicePointer_v2
+#define cuMemcpyHtoD cuMemcpyHtoD_v2
+#define cuMemcpyDtoH cuMemcpyDtoH_v2
+#define cuMemcpyDtoD cuMemcpyDtoD_v2
+#define cuMemcpyDtoA cuMemcpyDtoA_v2
+#define cuMemcpyAtoD cuMemcpyAtoD_v2
+#define cuMemcpyHtoA cuMemcpyHtoA_v2
+#define cuMemcpyAtoH cuMemcpyAtoH_v2
+#define cuMemcpyAtoA cuMemcpyAtoA_v2
+#define cuMemcpyHtoAAsync cuMemcpyHtoAAsync_v2
+#define cuMemcpyAtoHAsync cuMemcpyAtoHAsync_v2
+#define cuMemcpy2D cuMemcpy2D_v2
+#define cuMemcpy2DUnaligned cuMemcpy2DUnaligned_v2
+#define cuMemcpy3D cuMemcpy3D_v2
+#define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2
+#define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2
+#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
+#define cuMemcpy2DAsync cuMemcpy2DAsync_v2
+#define cuMemcpy3DAsync cuMemcpy3DAsync_v2
+#define cuMemsetD8 cuMemsetD8_v2
+#define cuMemsetD16 cuMemsetD16_v2
+#define cuMemsetD32 cuMemsetD32_v2
+#define cuMemsetD2D8 cuMemsetD2D8_v2
+#define cuMemsetD2D16 cuMemsetD2D16_v2
+#define cuMemsetD2D32 cuMemsetD2D32_v2
+#define cuArrayCreate cuArrayCreate_v2
+#define cuArrayGetDescriptor cuArrayGetDescriptor_v2
+#define cuArray3DCreate cuArray3DCreate_v2
+#define cuArray3DGetDescriptor cuArray3DGetDescriptor_v2
+#define cuTexRefSetAddress cuTexRefSetAddress_v2
+#define cuTexRefGetAddress cuTexRefGetAddress_v2
+#define cuGraphicsResourceGetMappedPointer cuGraphicsResourceGetMappedPointer_v2
+#define cuCtxDestroy cuCtxDestroy_v2
+#define cuCtxPopCurrent cuCtxPopCurrent_v2
+#define cuCtxPushCurrent cuCtxPushCurrent_v2
+#define cuStreamDestroy cuStreamDestroy_v2
+#define cuEventDestroy cuEventDestroy_v2
+#define cuTexRefSetAddress2D cuTexRefSetAddress2D_v2
+#define cuGLCtxCreate cuGLCtxCreate_v2
+#define cuGLMapBufferObject cuGLMapBufferObject_v2
+#define cuGLMapBufferObjectAsync cuGLMapBufferObjectAsync_v2
+
+/* Types. */
+#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
+typedef unsigned long long CUdeviceptr;
+#else
+typedef unsigned int CUdeviceptr;
+#endif
+
+typedef int CUdevice;
+typedef struct CUctx_st* CUcontext;
+typedef struct CUmod_st* CUmodule;
+typedef struct CUfunc_st* CUfunction;
+typedef struct CUarray_st* CUarray;
+typedef struct CUmipmappedArray_st* CUmipmappedArray;
+typedef struct CUtexref_st* CUtexref;
+typedef struct CUsurfref_st* CUsurfref;
+typedef struct CUevent_st* CUevent;
+typedef struct CUstream_st* CUstream;
+typedef struct CUgraphicsResource_st* CUgraphicsResource;
+typedef unsigned CUtexObject;
+typedef unsigned CUsurfObject;
+
+typedef struct CUuuid_st {
+  char bytes[16];
+} CUuuid;
+
+typedef struct CUipcEventHandle_st {
+  char reserved[CU_IPC_HANDLE_SIZE];
+} CUipcEventHandle;
+
+typedef struct CUipcMemHandle_st {
+  char reserved[CU_IPC_HANDLE_SIZE];
+} CUipcMemHandle;
+
+typedef enum CUipcMem_flags_enum {
+  CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 0x1,
+} CUipcMem_flags;
+
+typedef enum CUmemAttach_flags_enum {
+  CU_MEM_ATTACH_GLOBAL = 0x1,
+  CU_MEM_ATTACH_HOST = 0x2,
+  CU_MEM_ATTACH_SINGLE = 0x4,
+} CUmemAttach_flags;
+
+typedef enum CUctx_flags_enum {
+  CU_CTX_SCHED_AUTO = 0x00,
+  CU_CTX_SCHED_SPIN = 0x01,
+  CU_CTX_SCHED_YIELD = 0x02,
+  CU_CTX_SCHED_BLOCKING_SYNC = 0x04,
+  CU_CTX_BLOCKING_SYNC = 0x04,
+  CU_CTX_SCHED_MASK = 0x07,
+  CU_CTX_MAP_HOST = 0x08,
+  CU_CTX_LMEM_RESIZE_TO_MAX = 0x10,
+  CU_CTX_FLAGS_MASK = 0x1f,
+} CUctx_flags;
+
+typedef enum CUstream_flags_enum {
+  CU_STREAM_DEFAULT = 0x0,
+  CU_STREAM_NON_BLOCKING = 0x1,
+} CUstream_flags;
+
+typedef enum CUevent_flags_enum {
+  CU_EVENT_DEFAULT = 0x0,
+  CU_EVENT_BLOCKING_SYNC = 0x1,
+  CU_EVENT_DISABLE_TIMING = 0x2,
+  CU_EVENT_INTERPROCESS = 0x4,
+} CUevent_flags;
+
+typedef enum CUarray_format_enum {
+  CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
+  CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
+  CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
+  CU_AD_FORMAT_SIGNED_INT8 = 0x08,
+  CU_AD_FORMAT_SIGNED_INT16 = 0x09,
+  CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
+  CU_AD_FORMAT_HALF = 0x10,
+  CU_AD_FORMAT_FLOAT = 0x20,
+} CUarray_format;
+
+typedef enum CUaddress_mode_enum {
+  CU_TR_ADDRESS_MODE_WRAP = 0,
+  CU_TR_ADDRESS_MODE_CLAMP = 1,
+  CU_TR_ADDRESS_MODE_MIRROR = 2,
+  CU_TR_ADDRESS_MODE_BORDER = 3,
+} CUaddress_mode;
+
+typedef enum CUfilter_mode_enum {
+  CU_TR_FILTER_MODE_POINT = 0,
+  CU_TR_FILTER_MODE_LINEAR = 1,
+} CUfilter_mode;
+
+typedef enum CUdevice_attribute_enum {
+  CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
+  CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
+  CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
+  CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
+  CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
+  CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
+  CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
+  CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
+  CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
+  CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
+  CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
+  CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
+  CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
+  CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
+  CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
+  CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
+  CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
+  CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
+  CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
+  CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
+  CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
+  CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,
+  CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,
+  CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
+  CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,
+  CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
+  CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
+  CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35,
+  CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36,
+  CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37,
+  CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38,
+  CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
+  CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
+  CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43,
+  CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49,
+  CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50,
+  CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,
+  CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
+  CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
+  CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77,
+  CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78,
+  CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79,
+  CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80,
+  CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81,
+  CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,
+  CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83,
+  CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84,
+  CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85,
+  CU_DEVICE_ATTRIBUTE_MAX,
+} CUdevice_attribute;
+
+typedef struct CUdevprop_st {
+  int maxThreadsPerBlock;
+  int maxThreadsDim[3];
+  int maxGridSize[3];
+  int sharedMemPerBlock;
+  int totalConstantMemory;
+  int SIMDWidth;
+  int memPitch;
+  int regsPerBlock;
+  int clockRate;
+  int textureAlign;
+} CUdevprop;
+
+typedef enum CUpointer_attribute_enum {
+  CU_POINTER_ATTRIBUTE_CONTEXT = 1,
+  CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2,
+  CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3,
+  CU_POINTER_ATTRIBUTE_HOST_POINTER = 4,
+  CU_POINTER_ATTRIBUTE_P2P_TOKENS = 5,
+  CU_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6,
+  CU_POINTER_ATTRIBUTE_BUFFER_ID = 7,
+  CU_POINTER_ATTRIBUTE_IS_MANAGED = 8,
+} CUpointer_attribute;
+
+typedef enum CUfunction_attribute_enum {
+  CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
+  CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
+  CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
+  CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
+  CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
+  CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
+  CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
+  CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7,
+  CU_FUNC_ATTRIBUTE_MAX,
+} CUfunction_attribute;
+
+typedef enum CUfunc_cache_enum {
+  CU_FUNC_CACHE_PREFER_NONE = 0x00,
+  CU_FUNC_CACHE_PREFER_SHARED = 0x01,
+  CU_FUNC_CACHE_PREFER_L1 = 0x02,
+  CU_FUNC_CACHE_PREFER_EQUAL = 0x03,
+} CUfunc_cache;
+
+typedef enum CUsharedconfig_enum {
+  CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x00,
+  CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x01,
+  CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02,
+} CUsharedconfig;
+
+typedef enum CUmemorytype_enum {
+  CU_MEMORYTYPE_HOST = 0x01,
+  CU_MEMORYTYPE_DEVICE = 0x02,
+  CU_MEMORYTYPE_ARRAY = 0x03,
+  CU_MEMORYTYPE_UNIFIED = 0x04,
+} CUmemorytype;
+
+typedef enum CUcomputemode_enum {
+  CU_COMPUTEMODE_DEFAULT = 0,
+  CU_COMPUTEMODE_EXCLUSIVE = 1,
+  CU_COMPUTEMODE_PROHIBITED = 2,
+  CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3,
+} CUcomputemode;
+
+typedef enum CUjit_option_enum {
+  CU_JIT_MAX_REGISTERS = 0,
+  CU_JIT_THREADS_PER_BLOCK,
+  CU_JIT_WALL_TIME,
+  CU_JIT_INFO_LOG_BUFFER,
+  CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
+  CU_JIT_ERROR_LOG_BUFFER,
+  CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
+  CU_JIT_OPTIMIZATION_LEVEL,
+  CU_JIT_TARGET_FROM_CUCONTEXT,
+  CU_JIT_TARGET,
+  CU_JIT_FALLBACK_STRATEGY,
+  CU_JIT_GENERATE_DEBUG_INFO,
+  CU_JIT_LOG_VERBOSE,
+  CU_JIT_GENERATE_LINE_INFO,
+  CU_JIT_CACHE_MODE,
+  CU_JIT_NUM_OPTIONS,
+} CUjit_option;
+
+typedef enum CUjit_target_enum {
+  CU_TARGET_COMPUTE_10 = 10,
+  CU_TARGET_COMPUTE_11 = 11,
+  CU_TARGET_COMPUTE_12 = 12,
+  CU_TARGET_COMPUTE_13 = 13,
+  CU_TARGET_COMPUTE_20 = 20,
+  CU_TARGET_COMPUTE_21 = 21,
+  CU_TARGET_COMPUTE_30 = 30,
+  CU_TARGET_COMPUTE_32 = 32,
+  CU_TARGET_COMPUTE_35 = 35,
+  CU_TARGET_COMPUTE_50 = 50,
+} CUjit_target;
+
+typedef enum CUjit_fallback_enum {
+  CU_PREFER_PTX = 0,
+  CU_PREFER_BINARY,
+} CUjit_fallback;
+
+typedef enum CUjit_cacheMode_enum {
+  CU_JIT_CACHE_OPTION_NONE = 0,
+  CU_JIT_CACHE_OPTION_CG,
+  CU_JIT_CACHE_OPTION_CA,
+} CUjit_cacheMode;
+
+typedef enum CUjitInputType_enum {
+  CU_JIT_INPUT_CUBIN = 0,
+  CU_JIT_INPUT_PTX,
+  CU_JIT_INPUT_FATBINARY,
+  CU_JIT_INPUT_OBJECT,
+  CU_JIT_INPUT_LIBRARY,
+  CU_JIT_NUM_INPUT_TYPES,
+} CUjitInputType;
+
+typedef struct CUlinkState_st* CUlinkState;
+
+typedef enum CUgraphicsRegisterFlags_enum {
+  CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00,
+  CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01,
+  CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02,
+  CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04,
+  CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08,
+} CUgraphicsRegisterFlags;
+
+typedef enum CUgraphicsMapResourceFlags_enum {
+  CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00,
+  CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
+  CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
+} CUgraphicsMapResourceFlags;
+
+typedef enum CUarray_cubemap_face_enum {
+  CU_CUBEMAP_FACE_POSITIVE_X = 0x00,
+  CU_CUBEMAP_FACE_NEGATIVE_X = 0x01,
+  CU_CUBEMAP_FACE_POSITIVE_Y = 0x02,
+  CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03,
+  CU_CUBEMAP_FACE_POSITIVE_Z = 0x04,
+  CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05,
+} CUarray_cubemap_face;
+
+typedef enum CUlimit_enum {
+  CU_LIMIT_STACK_SIZE = 0x00,
+  CU_LIMIT_PRINTF_FIFO_SIZE = 0x01,
+  CU_LIMIT_MALLOC_HEAP_SIZE = 0x02,
+  CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x03,
+  CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x04,
+  CU_LIMIT_MAX,
+} CUlimit;
+
+typedef enum CUresourcetype_enum {
+  CU_RESOURCE_TYPE_ARRAY = 0x00,
+  CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01,
+  CU_RESOURCE_TYPE_LINEAR = 0x02,
+  CU_RESOURCE_TYPE_PITCH2D = 0x03,
+} CUresourcetype;
+
+typedef enum cudaError_enum {
+  CUDA_SUCCESS = 0,
+  CUDA_ERROR_INVALID_VALUE = 1,
+  CUDA_ERROR_OUT_OF_MEMORY = 2,
+  CUDA_ERROR_NOT_INITIALIZED = 3,
+  CUDA_ERROR_DEINITIALIZED = 4,
+  CUDA_ERROR_PROFILER_DISABLED = 5,
+  CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6,
+  CUDA_ERROR_PROFILER_ALREADY_STARTED = 7,
+  CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8,
+  CUDA_ERROR_NO_DEVICE = 100,
+  CUDA_ERROR_INVALID_DEVICE = 101,
+  CUDA_ERROR_INVALID_IMAGE = 200,
+  CUDA_ERROR_INVALID_CONTEXT = 201,
+  CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202,
+  CUDA_ERROR_MAP_FAILED = 205,
+  CUDA_ERROR_UNMAP_FAILED = 206,
+  CUDA_ERROR_ARRAY_IS_MAPPED = 207,
+  CUDA_ERROR_ALREADY_MAPPED = 208,
+  CUDA_ERROR_NO_BINARY_FOR_GPU = 209,
+  CUDA_ERROR_ALREADY_ACQUIRED = 210,
+  CUDA_ERROR_NOT_MAPPED = 211,
+  CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212,
+  CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213,
+  CUDA_ERROR_ECC_UNCORRECTABLE = 214,
+  CUDA_ERROR_UNSUPPORTED_LIMIT = 215,
+  CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216,
+  CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217,
+  CUDA_ERROR_INVALID_PTX = 218,
+  CUDA_ERROR_INVALID_SOURCE = 300,
+  CUDA_ERROR_FILE_NOT_FOUND = 301,
+  CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
+  CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303,
+  CUDA_ERROR_OPERATING_SYSTEM = 304,
+  CUDA_ERROR_INVALID_HANDLE = 400,
+  CUDA_ERROR_NOT_FOUND = 500,
+  CUDA_ERROR_NOT_READY = 600,
+  CUDA_ERROR_ILLEGAL_ADDRESS = 700,
+  CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701,
+  CUDA_ERROR_LAUNCH_TIMEOUT = 702,
+  CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703,
+  CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704,
+  CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705,
+  CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708,
+  CUDA_ERROR_CONTEXT_IS_DESTROYED = 709,
+  CUDA_ERROR_ASSERT = 710,
+  CUDA_ERROR_TOO_MANY_PEERS = 711,
+  CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712,
+  CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713,
+  CUDA_ERROR_HARDWARE_STACK_ERROR = 714,
+  CUDA_ERROR_ILLEGAL_INSTRUCTION = 715,
+  CUDA_ERROR_MISALIGNED_ADDRESS = 716,
+  CUDA_ERROR_INVALID_ADDRESS_SPACE = 717,
+  CUDA_ERROR_INVALID_PC = 718,
+  CUDA_ERROR_LAUNCH_FAILED = 719,
+  CUDA_ERROR_NOT_PERMITTED = 800,
+  CUDA_ERROR_NOT_SUPPORTED = 801,
+  CUDA_ERROR_UNKNOWN = 999,
+} CUresult;
+
+typedef void* CUstreamCallback;
+
+typedef struct CUDA_MEMCPY2D_st {
+  size_t srcXInBytes;
+  size_t srcY;
+  CUmemorytype srcMemoryType;
+  const void* srcHost;
+  CUdeviceptr srcDevice;
+  CUarray srcArray;
+  size_t srcPitch;
+  size_t dstXInBytes;
+  size_t dstY;
+  CUmemorytype dstMemoryType;
+  void* dstHost;
+  CUdeviceptr dstDevice;
+  CUarray dstArray;
+  size_t dstPitch;
+  size_t WidthInBytes;
+  size_t Height;
+} CUDA_MEMCPY2D;
+
+typedef struct CUDA_MEMCPY3D_st {
+  size_t srcXInBytes;
+  size_t srcY;
+  size_t srcZ;
+  size_t srcLOD;
+  CUmemorytype srcMemoryType;
+  const void* srcHost;
+  CUdeviceptr srcDevice;
+  CUarray srcArray;
+  void* reserved0;
+  size_t srcPitch;
+  size_t srcHeight;
+  size_t dstXInBytes;
+  size_t dstY;
+  size_t dstZ;
+  size_t dstLOD;
+  CUmemorytype dstMemoryType;
+  void* dstHost;
+  CUdeviceptr dstDevice;
+  CUarray dstArray;
+  void* reserved1;
+  size_t dstPitch;
+  size_t dstHeight;
+  size_t WidthInBytes;
+  size_t Height;
+  size_t Depth;
+} CUDA_MEMCPY3D;
+
+typedef struct CUDA_MEMCPY3D_PEER_st {
+  size_t srcXInBytes;
+  size_t srcY;
+  size_t srcZ;
+  size_t srcLOD;
+  CUmemorytype srcMemoryType;
+  const void* srcHost;
+  CUdeviceptr srcDevice;
+  CUarray srcArray;
+  CUcontext srcContext;
+  size_t srcPitch;
+  size_t srcHeight;
+  size_t dstXInBytes;
+  size_t dstY;
+  size_t dstZ;
+  size_t dstLOD;
+  CUmemorytype dstMemoryType;
+  void* dstHost;
+  CUdeviceptr dstDevice;
+  CUarray dstArray;
+  CUcontext dstContext;
+  size_t dstPitch;
+  size_t dstHeight;
+  size_t WidthInBytes;
+  size_t Height;
+  size_t Depth;
+} CUDA_MEMCPY3D_PEER;
+
+typedef struct CUDA_ARRAY_DESCRIPTOR_st {
+  size_t Width;
+  size_t Height;
+  CUarray_format Format;
+  unsigned NumChannels;
+} CUDA_ARRAY_DESCRIPTOR;
+
+typedef struct CUDA_ARRAY3D_DESCRIPTOR_st {
+  size_t Width;
+  size_t Height;
+  size_t Depth;
+  CUarray_format Format;
+  unsigned NumChannels;
+  unsigned Flags;
+} CUDA_ARRAY3D_DESCRIPTOR;
+
+typedef struct CUDA_RESOURCE_DESC_st {
+  CUresourcetype resType;
+  union {
+    struct {
+      CUarray hArray;
+    } array;
+    struct {
+      CUmipmappedArray hMipmappedArray;
+    } mipmap;
+    struct {
+      CUdeviceptr devPtr;
+      CUarray_format format;
+      unsigned numChannels;
+      size_t sizeInBytes;
+    } linear;
+    struct {
+      CUdeviceptr devPtr;
+      CUarray_format format;
+      unsigned numChannels;
+      size_t width;
+      size_t height;
+      size_t pitchInBytes;
+    } pitch2D;
+    struct {
+      int reserved[32];
+    } reserved;
+  } res;
+  unsigned flags;
+} CUDA_RESOURCE_DESC;
+
+typedef struct CUDA_TEXTURE_DESC_st {
+  CUaddress_mode addressMode[3];
+  CUfilter_mode filterMode;
+  unsigned flags;
+  unsigned maxAnisotropy;
+  CUfilter_mode mipmapFilterMode;
+  float mipmapLevelBias;
+  float minMipmapLevelClamp;
+  float maxMipmapLevelClamp;
+  int reserved[16];
+} CUDA_TEXTURE_DESC;
+
+typedef enum CUresourceViewFormat_enum {
+  CU_RES_VIEW_FORMAT_NONE = 0x00,
+  CU_RES_VIEW_FORMAT_UINT_1X8 = 0x01,
+  CU_RES_VIEW_FORMAT_UINT_2X8 = 0x02,
+  CU_RES_VIEW_FORMAT_UINT_4X8 = 0x03,
+  CU_RES_VIEW_FORMAT_SINT_1X8 = 0x04,
+  CU_RES_VIEW_FORMAT_SINT_2X8 = 0x05,
+  CU_RES_VIEW_FORMAT_SINT_4X8 = 0x06,
+  CU_RES_VIEW_FORMAT_UINT_1X16 = 0x07,
+  CU_RES_VIEW_FORMAT_UINT_2X16 = 0x08,
+  CU_RES_VIEW_FORMAT_UINT_4X16 = 0x09,
+  CU_RES_VIEW_FORMAT_SINT_1X16 = 0x0a,
+  CU_RES_VIEW_FORMAT_SINT_2X16 = 0x0b,
+  CU_RES_VIEW_FORMAT_SINT_4X16 = 0x0c,
+  CU_RES_VIEW_FORMAT_UINT_1X32 = 0x0d,
+  CU_RES_VIEW_FORMAT_UINT_2X32 = 0x0e,
+  CU_RES_VIEW_FORMAT_UINT_4X32 = 0x0f,
+  CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10,
+  CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11,
+  CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12,
+  CU_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13,
+  CU_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14,
+  CU_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15,
+  CU_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16,
+  CU_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17,
+  CU_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18,
+  CU_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19,
+  CU_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a,
+  CU_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b,
+  CU_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c,
+  CU_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d,
+  CU_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e,
+  CU_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f,
+  CU_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20,
+  CU_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21,
+  CU_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22,
+} CUresourceViewFormat;
+
+typedef struct CUDA_RESOURCE_VIEW_DESC_st {
+  CUresourceViewFormat format;
+  size_t width;
+  size_t height;
+  size_t depth;
+  unsigned firstMipmapLevel;
+  unsigned lastMipmapLevel;
+  unsigned firstLayer;
+  unsigned lastLayer;
+  unsigned reserved[16];
+} CUDA_RESOURCE_VIEW_DESC;
+
+typedef struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st {
+  unsigned p2pToken;
+  unsigned vaSpaceToken;
+} CUDA_POINTER_ATTRIBUTE_P2P_TOKENS;
+typedef unsigned GLenum;
+typedef unsigned GLuint;
+typedef int GLint;
+
+typedef enum CUGLDeviceList_enum {
+  CU_GL_DEVICE_LIST_ALL = 0x01,
+  CU_GL_DEVICE_LIST_CURRENT_FRAME = 0x02,
+  CU_GL_DEVICE_LIST_NEXT_FRAME = 0x03,
+} CUGLDeviceList;
+
+typedef enum CUGLmap_flags_enum {
+  CU_GL_MAP_RESOURCE_FLAGS_NONE = 0x00,
+  CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
+  CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
+} CUGLmap_flags;
+
+#ifdef _WIN32
+#  define CUDAAPI __stdcall
+#  define CUDA_CB __stdcall
+#else
+#  define CUDAAPI
+#  define CUDA_CB
+#endif
+
+/* Function types. */
+typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char* pStr);
+typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char* pStr);
+typedef CUresult CUDAAPI tcuInit(unsigned Flags);
+typedef CUresult CUDAAPI tcuDriverGetVersion(int* driverVersion);
+typedef CUresult CUDAAPI tcuDeviceGet(CUdevice* device, int ordinal);
+typedef CUresult CUDAAPI tcuDeviceGetCount(int* count);
+typedef CUresult CUDAAPI tcuDeviceGetName(char* name, int len, CUdevice dev);
+typedef CUresult CUDAAPI tcuDeviceTotalMem_v2(size_t* bytes, CUdevice dev);
+typedef CUresult CUDAAPI tcuDeviceGetAttribute(int* pi, CUdevice_attribute attrib, CUdevice dev);
+typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop* prop, CUdevice dev);
+typedef CUresult CUDAAPI tcuDeviceComputeCapability(int* major, int* minor, CUdevice dev);
+typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext* pctx, unsigned flags, CUdevice dev);
+typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx);
+typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext ctx);
+typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext* pctx);
+typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx);
+typedef CUresult CUDAAPI tcuCtxGetCurrent(CUcontext* pctx);
+typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice* device);
+typedef CUresult CUDAAPI tcuCtxSynchronize(void);
+typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value);
+typedef CUresult CUDAAPI tcuCtxGetLimit(size_t* pvalue, CUlimit limit);
+typedef CUresult CUDAAPI tcuCtxGetCacheConfig(CUfunc_cache* pconfig);
+typedef CUresult CUDAAPI tcuCtxSetCacheConfig(CUfunc_cache config);
+typedef CUresult CUDAAPI tcuCtxGetSharedMemConfig(CUsharedconfig* pConfig);
+typedef CUresult CUDAAPI tcuCtxSetSharedMemConfig(CUsharedconfig config);
+typedef CUresult CUDAAPI tcuCtxGetApiVersion(CUcontext ctx, unsigned* version);
+typedef CUresult CUDAAPI tcuCtxGetStreamPriorityRange(int* leastPriority, int* greatestPriority);
+typedef CUresult CUDAAPI tcuCtxAttach(CUcontext* pctx, unsigned flags);
+typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx);
+typedef CUresult CUDAAPI tcuModuleLoad(CUmodule* module, const char* fname);
+typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule* module, const void* image);
+typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule* module, const void* image, unsigned numOptions, CUjit_option* options, void* optionValues);
+typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule* module, const void* fatCubin);
+typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod);
+typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name);
+typedef CUresult CUDAAPI tcuModuleGetGlobal_v2(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name);
+typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name);
+typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name);
+typedef CUresult CUDAAPI tcuLinkCreate(unsigned numOptions, CUjit_option* options, void* optionValues, CUlinkState* stateOut);
+typedef CUresult CUDAAPI tcuLinkAddData(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned numOptions, CUjit_option* options, void* optionValues);
+typedef CUresult CUDAAPI tcuLinkAddFile(CUlinkState state, CUjitInputType type, const char* path, unsigned numOptions, CUjit_option* options, void* optionValues);
+typedef CUresult CUDAAPI tcuLinkComplete(CUlinkState state, void* cubinOut, size_t* sizeOut);
+typedef CUresult CUDAAPI tcuLinkDestroy(CUlinkState state);
+typedef CUresult CUDAAPI tcuMemGetInfo_v2(size_t* free, size_t* total);
+typedef CUresult CUDAAPI tcuMemAlloc_v2(CUdeviceptr* dptr, size_t bytesize);
+typedef CUresult CUDAAPI tcuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, size_t Height, unsigned ElementSizeBytes);
+typedef CUresult CUDAAPI tcuMemFree_v2(CUdeviceptr dptr);
+typedef CUresult CUDAAPI tcuMemGetAddressRange_v2(CUdeviceptr* pbase, size_t* psize, CUdeviceptr dptr);
+typedef CUresult CUDAAPI tcuMemAllocHost_v2(void* pp, size_t bytesize);
+typedef CUresult CUDAAPI tcuMemFreeHost(void* p);
+typedef CUresult CUDAAPI tcuMemHostAlloc(void* pp, size_t bytesize, unsigned Flags);
+typedef CUresult CUDAAPI tcuMemHostGetDevicePointer_v2(CUdeviceptr* pdptr, void* p, unsigned Flags);
+typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned* pFlags, void* p);
+typedef CUresult CUDAAPI tcuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned flags);
+typedef CUresult CUDAAPI tcuDeviceGetByPCIBusId(CUdevice* dev, const char* pciBusId);
+typedef CUresult CUDAAPI tcuDeviceGetPCIBusId(char* pciBusId, int len, CUdevice dev);
+typedef CUresult CUDAAPI tcuIpcGetEventHandle(CUipcEventHandle* pHandle, CUevent event);
+typedef CUresult CUDAAPI tcuIpcOpenEventHandle(CUevent* phEvent, CUipcEventHandle handle);
+typedef CUresult CUDAAPI tcuIpcGetMemHandle(CUipcMemHandle* pHandle, CUdeviceptr dptr);
+typedef CUresult CUDAAPI tcuIpcOpenMemHandle(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned Flags);
+typedef CUresult CUDAAPI tcuIpcCloseMemHandle(CUdeviceptr dptr);
+typedef CUresult CUDAAPI tcuMemHostRegister(void* p, size_t bytesize, unsigned Flags);
+typedef CUresult CUDAAPI tcuMemHostUnregister(void* p);
+typedef CUresult CUDAAPI tcuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyDtoH_v2(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyDtoD_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyDtoA_v2(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyAtoD_v2(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyHtoA_v2(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyAtoH_v2(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyAtoA_v2(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D* pCopy);
+typedef CUresult CUDAAPI tcuMemcpy2DUnaligned_v2(const CUDA_MEMCPY2D* pCopy);
+typedef CUresult CUDAAPI tcuMemcpy3D_v2(const CUDA_MEMCPY3D* pCopy);
+typedef CUresult CUDAAPI tcuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER* pCopy);
+typedef CUresult CUDAAPI tcuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpyDtoHAsync_v2(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpyDtoDAsync_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpyHtoAAsync_v2(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpyAtoHAsync_v2(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpy2DAsync_v2(const CUDA_MEMCPY2D* pCopy, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpy3DAsync_v2(const CUDA_MEMCPY3D* pCopy, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER* pCopy, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD8_v2(CUdeviceptr dstDevice, unsigned uc, size_t N);
+typedef CUresult CUDAAPI tcuMemsetD16_v2(CUdeviceptr dstDevice, unsigned us, size_t N);
+typedef CUresult CUDAAPI tcuMemsetD32_v2(CUdeviceptr dstDevice, unsigned ui, size_t N);
+typedef CUresult CUDAAPI tcuMemsetD2D8_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned uc, size_t Width, size_t Height);
+typedef CUresult CUDAAPI tcuMemsetD2D16_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned us, size_t Width, size_t Height);
+typedef CUresult CUDAAPI tcuMemsetD2D32_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned ui, size_t Width, size_t Height);
+typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned uc, size_t N, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD16Async(CUdeviceptr dstDevice, unsigned us, size_t N, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD32Async(CUdeviceptr dstDevice, unsigned ui, size_t N, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned uc, size_t Width, size_t Height, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned us, size_t Width, size_t Height, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned ui, size_t Width, size_t Height, CUstream hStream);
+typedef CUresult CUDAAPI tcuArrayCreate_v2(CUarray* pHandle, const CUDA_ARRAY_DESCRIPTOR* pAllocateArray);
+typedef CUresult CUDAAPI tcuArrayGetDescriptor_v2(CUDA_ARRAY_DESCRIPTOR* pArrayDescriptor, CUarray hArray);
+typedef CUresult CUDAAPI tcuArrayDestroy(CUarray hArray);
+typedef CUresult CUDAAPI tcuArray3DCreate_v2(CUarray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pAllocateArray);
+typedef CUresult CUDAAPI tcuArray3DGetDescriptor_v2(CUDA_ARRAY3D_DESCRIPTOR* pArrayDescriptor, CUarray hArray);
+typedef CUresult CUDAAPI tcuMipmappedArrayCreate(CUmipmappedArray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, unsigned numMipmapLevels);
+typedef CUresult CUDAAPI tcuMipmappedArrayGetLevel(CUarray* pLevelArray, CUmipmappedArray hMipmappedArray, unsigned level);
+typedef CUresult CUDAAPI tcuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray);
+typedef CUresult CUDAAPI tcuPointerGetAttribute(void* data, CUpointer_attribute attribute, CUdeviceptr ptr);
+typedef CUresult CUDAAPI tcuPointerSetAttribute(const void* value, CUpointer_attribute attribute, CUdeviceptr ptr);
+typedef CUresult CUDAAPI tcuStreamCreate(CUstream* phStream, unsigned Flags);
+typedef CUresult CUDAAPI tcuStreamCreateWithPriority(CUstream* phStream, unsigned flags, int priority);
+typedef CUresult CUDAAPI tcuStreamGetPriority(CUstream hStream, int* priority);
+typedef CUresult CUDAAPI tcuStreamGetFlags(CUstream hStream, unsigned* flags);
+typedef CUresult CUDAAPI tcuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned Flags);
+typedef CUresult CUDAAPI tcuStreamAddCallback(CUstream hStream, CUstreamCallback callback, void* userData, unsigned flags);
+typedef CUresult CUDAAPI tcuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, size_t length, unsigned flags);
+typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream);
+typedef CUresult CUDAAPI tcuStreamSynchronize(CUstream hStream);
+typedef CUresult CUDAAPI tcuStreamDestroy_v2(CUstream hStream);
+typedef CUresult CUDAAPI tcuEventCreate(CUevent* phEvent, unsigned Flags);
+typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream);
+typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent);
+typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent);
+typedef CUresult CUDAAPI tcuEventDestroy_v2(CUevent hEvent);
+typedef CUresult CUDAAPI tcuEventElapsedTime(float* pMilliseconds, CUevent hStart, CUevent hEnd);
+typedef CUresult CUDAAPI tcuFuncGetAttribute(int* pi, CUfunction_attribute attrib, CUfunction hfunc);
+typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
+typedef CUresult CUDAAPI tcuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config);
+typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned gridDimX, unsigned gridDimY, unsigned gridDimZ, unsigned blockDimX, unsigned blockDimY, unsigned blockDimZ, unsigned sharedMemBytes, CUstream hStream, void* kernelParams, void* extra);
+typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
+typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned bytes);
+typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned numbytes);
+typedef CUresult CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned value);
+typedef CUresult CUDAAPI tcuParamSetf(CUfunction hfunc, int offset, float value);
+typedef CUresult CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void* ptr, unsigned numbytes);
+typedef CUresult CUDAAPI tcuLaunch(CUfunction f);
+typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height);
+typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
+typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned Flags);
+typedef CUresult CUDAAPI tcuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned Flags);
+typedef CUresult CUDAAPI tcuTexRefSetAddress_v2(size_t* ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
+typedef CUresult CUDAAPI tcuTexRefSetAddress2D_v3(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR* desc, CUdeviceptr dptr, size_t Pitch);
+typedef CUresult CUDAAPI tcuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents);
+typedef CUresult CUDAAPI tcuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am);
+typedef CUresult CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm);
+typedef CUresult CUDAAPI tcuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm);
+typedef CUresult CUDAAPI tcuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias);
+typedef CUresult CUDAAPI tcuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp);
+typedef CUresult CUDAAPI tcuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned maxAniso);
+typedef CUresult CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned Flags);
+typedef CUresult CUDAAPI tcuTexRefGetAddress_v2(CUdeviceptr* pdptr, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetArray(CUarray* phArray, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetMipmappedArray(CUmipmappedArray* phMipmappedArray, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetAddressMode(CUaddress_mode* pam, CUtexref hTexRef, int dim);
+typedef CUresult CUDAAPI tcuTexRefGetFilterMode(CUfilter_mode* pfm, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetFormat(CUarray_format* pFormat, int* pNumChannels, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetMipmapFilterMode(CUfilter_mode* pfm, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetMipmapLevelBias(float* pbias, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, float* pmaxMipmapLevelClamp, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetMaxAnisotropy(int* pmaxAniso, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetFlags(unsigned* pFlags, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefCreate(CUtexref* pTexRef);
+typedef CUresult CUDAAPI tcuTexRefDestroy(CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned Flags);
+typedef CUresult CUDAAPI tcuSurfRefGetArray(CUarray* phArray, CUsurfref hSurfRef);
+typedef CUresult CUDAAPI tcuTexObjectCreate(CUtexObject* pTexObject, const CUDA_RESOURCE_DESC* pResDesc, const CUDA_TEXTURE_DESC* pTexDesc, const CUDA_RESOURCE_VIEW_DESC* pResViewDesc);
+typedef CUresult CUDAAPI tcuTexObjectDestroy(CUtexObject texObject);
+typedef CUresult CUDAAPI tcuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUtexObject texObject);
+typedef CUresult CUDAAPI tcuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC* pTexDesc, CUtexObject texObject);
+typedef CUresult CUDAAPI tcuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC* pResViewDesc, CUtexObject texObject);
+typedef CUresult CUDAAPI tcuSurfObjectCreate(CUsurfObject* pSurfObject, const CUDA_RESOURCE_DESC* pResDesc);
+typedef CUresult CUDAAPI tcuSurfObjectDestroy(CUsurfObject surfObject);
+typedef CUresult CUDAAPI tcuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUsurfObject surfObject);
+typedef CUresult CUDAAPI tcuDeviceCanAccessPeer(int* canAccessPeer, CUdevice dev, CUdevice peerDev);
+typedef CUresult CUDAAPI tcuCtxEnablePeerAccess(CUcontext peerContext, unsigned Flags);
+typedef CUresult CUDAAPI tcuCtxDisablePeerAccess(CUcontext peerContext);
+typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource);
+typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned arrayIndex, unsigned mipLevel);
+typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray* pMipmappedArray, CUgraphicsResource resource);
+typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer_v2(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource);
+typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned flags);
+typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned count, CUgraphicsResource* resources, CUstream hStream);
+typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned count, CUgraphicsResource* resources, CUstream hStream);
+typedef CUresult CUDAAPI tcuGetExportTable(const void* ppExportTable, const CUuuid* pExportTableId);
+
+typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource* pCudaResource, GLuint buffer, unsigned Flags);
+typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned Flags);
+typedef CUresult CUDAAPI tcuGLGetDevices(unsigned* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned cudaDeviceCount, CUGLDeviceList deviceList);
+typedef CUresult CUDAAPI tcuGLCtxCreate_v2(CUcontext* pCtx, unsigned Flags, CUdevice device);
+typedef CUresult CUDAAPI tcuGLInit(void);
+typedef CUresult CUDAAPI tcuGLRegisterBufferObject(GLuint buffer);
+typedef CUresult CUDAAPI tcuGLMapBufferObject_v2(CUdeviceptr* dptr, size_t* size, GLuint buffer);
+typedef CUresult CUDAAPI tcuGLUnmapBufferObject(GLuint buffer);
+typedef CUresult CUDAAPI tcuGLUnregisterBufferObject(GLuint buffer);
+typedef CUresult CUDAAPI tcuGLSetBufferObjectMapFlags(GLuint buffer, unsigned Flags);
+typedef CUresult CUDAAPI tcuGLMapBufferObjectAsync_v2(CUdeviceptr* dptr, size_t* size, GLuint buffer, CUstream hStream);
+typedef CUresult CUDAAPI tcuGLUnmapBufferObjectAsync(GLuint buffer, CUstream hStream);
+
+
+/* Function declarations. */
+extern tcuGetErrorString *cuGetErrorString;
+extern tcuGetErrorName *cuGetErrorName;
+extern tcuInit *cuInit;
+extern tcuDriverGetVersion *cuDriverGetVersion;
+extern tcuDeviceGet *cuDeviceGet;
+extern tcuDeviceGetCount *cuDeviceGetCount;
+extern tcuDeviceGetName *cuDeviceGetName;
+extern tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2;
+extern tcuDeviceGetAttribute *cuDeviceGetAttribute;
+extern tcuDeviceGetProperties *cuDeviceGetProperties;
+extern tcuDeviceComputeCapability *cuDeviceComputeCapability;
+extern tcuCtxCreate_v2 *cuCtxCreate_v2;
+extern tcuCtxDestroy_v2 *cuCtxDestroy_v2;
+extern tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2;
+extern tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2;
+extern tcuCtxSetCurrent *cuCtxSetCurrent;
+extern tcuCtxGetCurrent *cuCtxGetCurrent;
+extern tcuCtxGetDevice *cuCtxGetDevice;
+extern tcuCtxSynchronize *cuCtxSynchronize;
+extern tcuCtxSetLimit *cuCtxSetLimit;
+extern tcuCtxGetLimit *cuCtxGetLimit;
+extern tcuCtxGetCacheConfig *cuCtxGetCacheConfig;
+extern tcuCtxSetCacheConfig *cuCtxSetCacheConfig;
+extern tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig;
+extern tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig;
+extern tcuCtxGetApiVersion *cuCtxGetApiVersion;
+extern tcuCtxGetStreamPriorityRange *cuCtxGetStreamPriorityRange;
+extern tcuCtxAttach *cuCtxAttach;
+extern tcuCtxDetach *cuCtxDetach;
+extern tcuModuleLoad *cuModuleLoad;
+extern tcuModuleLoadData *cuModuleLoadData;
+extern tcuModuleLoadDataEx *cuModuleLoadDataEx;
+extern tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
+extern tcuModuleUnload *cuModuleUnload;
+extern tcuModuleGetFunction *cuModuleGetFunction;
+extern tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2;
+extern tcuModuleGetTexRef *cuModuleGetTexRef;
+extern tcuModuleGetSurfRef *cuModuleGetSurfRef;
+extern tcuLinkCreate *cuLinkCreate;
+extern tcuLinkAddData *cuLinkAddData;
+extern tcuLinkAddFile *cuLinkAddFile;
+extern tcuLinkComplete *cuLinkComplete;
+extern tcuLinkDestroy *cuLinkDestroy;
+extern tcuMemGetInfo_v2 *cuMemGetInfo_v2;
+extern tcuMemAlloc_v2 *cuMemAlloc_v2;
+extern tcuMemAllocPitch_v2 *cuMemAllocPitch_v2;
+extern tcuMemFree_v2 *cuMemFree_v2;
+extern tcuMemGetAddressRange_v2 *cuMemGetAddressRange_v2;
+extern tcuMemAllocHost_v2 *cuMemAllocHost_v2;
+extern tcuMemFreeHost *cuMemFreeHost;
+extern tcuMemHostAlloc *cuMemHostAlloc;
+extern tcuMemHostGetDevicePointer_v2 *cuMemHostGetDevicePointer_v2;
+extern tcuMemHostGetFlags *cuMemHostGetFlags;
+extern tcuMemAllocManaged *cuMemAllocManaged;
+extern tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId;
+extern tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId;
+extern tcuIpcGetEventHandle *cuIpcGetEventHandle;
+extern tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
+extern tcuIpcGetMemHandle *cuIpcGetMemHandle;
+extern tcuIpcOpenMemHandle *cuIpcOpenMemHandle;
+extern tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
+extern tcuMemHostRegister *cuMemHostRegister;
+extern tcuMemHostUnregister *cuMemHostUnregister;
+extern tcuMemcpy *cuMemcpy;
+extern tcuMemcpyPeer *cuMemcpyPeer;
+extern tcuMemcpyHtoD_v2 *cuMemcpyHtoD_v2;
+extern tcuMemcpyDtoH_v2 *cuMemcpyDtoH_v2;
+extern tcuMemcpyDtoD_v2 *cuMemcpyDtoD_v2;
+extern tcuMemcpyDtoA_v2 *cuMemcpyDtoA_v2;
+extern tcuMemcpyAtoD_v2 *cuMemcpyAtoD_v2;
+extern tcuMemcpyHtoA_v2 *cuMemcpyHtoA_v2;
+extern tcuMemcpyAtoH_v2 *cuMemcpyAtoH_v2;
+extern tcuMemcpyAtoA_v2 *cuMemcpyAtoA_v2;
+extern tcuMemcpy2D_v2 *cuMemcpy2D_v2;
+extern tcuMemcpy2DUnaligned_v2 *cuMemcpy2DUnaligned_v2;
+extern tcuMemcpy3D_v2 *cuMemcpy3D_v2;
+extern tcuMemcpy3DPeer *cuMemcpy3DPeer;
+extern tcuMemcpyAsync *cuMemcpyAsync;
+extern tcuMemcpyPeerAsync *cuMemcpyPeerAsync;
+extern tcuMemcpyHtoDAsync_v2 *cuMemcpyHtoDAsync_v2;
+extern tcuMemcpyDtoHAsync_v2 *cuMemcpyDtoHAsync_v2;
+extern tcuMemcpyDtoDAsync_v2 *cuMemcpyDtoDAsync_v2;
+extern tcuMemcpyHtoAAsync_v2 *cuMemcpyHtoAAsync_v2;
+extern tcuMemcpyAtoHAsync_v2 *cuMemcpyAtoHAsync_v2;
+extern tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync_v2;
+extern tcuMemcpy3DAsync_v2 *cuMemcpy3DAsync_v2;
+extern tcuMemcpy3DPeerAsync *cuMemcpy3DPeerAsync;
+extern tcuMemsetD8_v2 *cuMemsetD8_v2;
+extern tcuMemsetD16_v2 *cuMemsetD16_v2;
+extern tcuMemsetD32_v2 *cuMemsetD32_v2;
+extern tcuMemsetD2D8_v2 *cuMemsetD2D8_v2;
+extern tcuMemsetD2D16_v2 *cuMemsetD2D16_v2;
+extern tcuMemsetD2D32_v2 *cuMemsetD2D32_v2;
+extern tcuMemsetD8Async *cuMemsetD8Async;
+extern tcuMemsetD16Async *cuMemsetD16Async;
+extern tcuMemsetD32Async *cuMemsetD32Async;
+extern tcuMemsetD2D8Async *cuMemsetD2D8Async;
+extern tcuMemsetD2D16Async *cuMemsetD2D16Async;
+extern tcuMemsetD2D32Async *cuMemsetD2D32Async;
+extern tcuArrayCreate_v2 *cuArrayCreate_v2;
+extern tcuArrayGetDescriptor_v2 *cuArrayGetDescriptor_v2;
+extern tcuArrayDestroy *cuArrayDestroy;
+extern tcuArray3DCreate_v2 *cuArray3DCreate_v2;
+extern tcuArray3DGetDescriptor_v2 *cuArray3DGetDescriptor_v2;
+extern tcuMipmappedArrayCreate *cuMipmappedArrayCreate;
+extern tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
+extern tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
+extern tcuPointerGetAttribute *cuPointerGetAttribute;
+extern tcuPointerSetAttribute *cuPointerSetAttribute;
+extern tcuStreamCreate *cuStreamCreate;
+extern tcuStreamCreateWithPriority *cuStreamCreateWithPriority;
+extern tcuStreamGetPriority *cuStreamGetPriority;
+extern tcuStreamGetFlags *cuStreamGetFlags;
+extern tcuStreamWaitEvent *cuStreamWaitEvent;
+extern tcuStreamAddCallback *cuStreamAddCallback;
+extern tcuStreamAttachMemAsync *cuStreamAttachMemAsync;
+extern tcuStreamQuery *cuStreamQuery;
+extern tcuStreamSynchronize *cuStreamSynchronize;
+extern tcuStreamDestroy_v2 *cuStreamDestroy_v2;
+extern tcuEventCreate *cuEventCreate;
+extern tcuEventRecord *cuEventRecord;
+extern tcuEventQuery *cuEventQuery;
+extern tcuEventSynchronize *cuEventSynchronize;
+extern tcuEventDestroy_v2 *cuEventDestroy_v2;
+extern tcuEventElapsedTime *cuEventElapsedTime;
+extern tcuFuncGetAttribute *cuFuncGetAttribute;
+extern tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
+extern tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig;
+extern tcuLaunchKernel *cuLaunchKernel;
+extern tcuFuncSetBlockShape *cuFuncSetBlockShape;
+extern tcuFuncSetSharedSize *cuFuncSetSharedSize;
+extern tcuParamSetSize *cuParamSetSize;
+extern tcuParamSeti *cuParamSeti;
+extern tcuParamSetf *cuParamSetf;
+extern tcuParamSetv *cuParamSetv;
+extern tcuLaunch *cuLaunch;
+extern tcuLaunchGrid *cuLaunchGrid;
+extern tcuLaunchGridAsync *cuLaunchGridAsync;
+extern tcuParamSetTexRef *cuParamSetTexRef;
+extern tcuTexRefSetArray *cuTexRefSetArray;
+extern tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray;
+extern tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2;
+extern tcuTexRefSetAddress2D_v3 *cuTexRefSetAddress2D_v3;
+extern tcuTexRefSetFormat *cuTexRefSetFormat;
+extern tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
+extern tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
+extern tcuTexRefSetMipmapFilterMode *cuTexRefSetMipmapFilterMode;
+extern tcuTexRefSetMipmapLevelBias *cuTexRefSetMipmapLevelBias;
+extern tcuTexRefSetMipmapLevelClamp *cuTexRefSetMipmapLevelClamp;
+extern tcuTexRefSetMaxAnisotropy *cuTexRefSetMaxAnisotropy;
+extern tcuTexRefSetFlags *cuTexRefSetFlags;
+extern tcuTexRefGetAddress_v2 *cuTexRefGetAddress_v2;
+extern tcuTexRefGetArray *cuTexRefGetArray;
+extern tcuTexRefGetMipmappedArray *cuTexRefGetMipmappedArray;
+extern tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
+extern tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
+extern tcuTexRefGetFormat *cuTexRefGetFormat;
+extern tcuTexRefGetMipmapFilterMode *cuTexRefGetMipmapFilterMode;
+extern tcuTexRefGetMipmapLevelBias *cuTexRefGetMipmapLevelBias;
+extern tcuTexRefGetMipmapLevelClamp *cuTexRefGetMipmapLevelClamp;
+extern tcuTexRefGetMaxAnisotropy *cuTexRefGetMaxAnisotropy;
+extern tcuTexRefGetFlags *cuTexRefGetFlags;
+extern tcuTexRefCreate *cuTexRefCreate;
+extern tcuTexRefDestroy *cuTexRefDestroy;
+extern tcuSurfRefSetArray *cuSurfRefSetArray;
+extern tcuSurfRefGetArray *cuSurfRefGetArray;
+extern tcuTexObjectCreate *cuTexObjectCreate;
+extern tcuTexObjectDestroy *cuTexObjectDestroy;
+extern tcuTexObjectGetResourceDesc *cuTexObjectGetResourceDesc;
+extern tcuTexObjectGetTextureDesc *cuTexObjectGetTextureDesc;
+extern tcuTexObjectGetResourceViewDesc *cuTexObjectGetResourceViewDesc;
+extern tcuSurfObjectCreate *cuSurfObjectCreate;
+extern tcuSurfObjectDestroy *cuSurfObjectDestroy;
+extern tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc;
+extern tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer;
+extern tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess;
+extern tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess;
+extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
+extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
+extern tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
+extern tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2;
+extern tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
+extern tcuGraphicsMapResources *cuGraphicsMapResources;
+extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
+extern tcuGetExportTable *cuGetExportTable;
+
+extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
+extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
+extern tcuGLGetDevices *cuGLGetDevices;
+extern tcuGLCtxCreate_v2 *cuGLCtxCreate_v2;
+extern tcuGLInit *cuGLInit;
+extern tcuGLRegisterBufferObject *cuGLRegisterBufferObject;
+extern tcuGLMapBufferObject_v2 *cuGLMapBufferObject_v2;
+extern tcuGLUnmapBufferObject *cuGLUnmapBufferObject;
+extern tcuGLUnregisterBufferObject *cuGLUnregisterBufferObject;
+extern tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags;
+extern tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2;
+extern tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync;
+
+
+enum {
+  CUEW_SUCCESS = 0,
+  CUEW_ERROR_OPEN_FAILED = -1,
+  CUEW_ERROR_ATEXIT_FAILED = -2,
+};
+
+int cuewInit(void);
+const char *cuewErrorString(CUresult result);
+const char *cuewCompilerPath(void);
+int cuewCompilerVersion(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* __CUEW_H__ */
diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c
new file mode 100644 (file)
index 0000000..35ffca3
--- /dev/null
@@ -0,0 +1,710 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+#ifdef _MSC_VER
+#  define snprintf _snprintf
+#  define popen _popen
+#  define pclose _pclose
+#  define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include <cuew.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#ifdef _WIN32
+#  define WIN32_LEAN_AND_MEAN
+#  define VC_EXTRALEAN
+#  include <windows.h>
+
+/* Utility macros. */
+
+typedef HMODULE DynamicLibrary;
+
+#  define dynamic_library_open(path)         LoadLibrary(path)
+#  define dynamic_library_close(lib)         FreeLibrary(lib)
+#  define dynamic_library_find(lib, symbol)  GetProcAddress(lib, symbol)
+#else
+#  include <dlfcn.h>
+
+typedef void* DynamicLibrary;
+
+#  define dynamic_library_open(path)         dlopen(path, RTLD_NOW)
+#  define dynamic_library_close(lib)         dlclose(lib)
+#  define dynamic_library_find(lib, symbol)  dlsym(lib, symbol)
+#endif
+
+#define CUDA_LIBRARY_FIND_CHECKED(name) \
+        name = (t##name *)dynamic_library_find(lib, #name);
+
+#define CUDA_LIBRARY_FIND(name) \
+        name = (t##name *)dynamic_library_find(lib, #name); \
+        assert(name);
+
+static DynamicLibrary lib;
+
+/* Function definitions. */
+tcuGetErrorString *cuGetErrorString;
+tcuGetErrorName *cuGetErrorName;
+tcuInit *cuInit;
+tcuDriverGetVersion *cuDriverGetVersion;
+tcuDeviceGet *cuDeviceGet;
+tcuDeviceGetCount *cuDeviceGetCount;
+tcuDeviceGetName *cuDeviceGetName;
+tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2;
+tcuDeviceGetAttribute *cuDeviceGetAttribute;
+tcuDeviceGetProperties *cuDeviceGetProperties;
+tcuDeviceComputeCapability *cuDeviceComputeCapability;
+tcuCtxCreate_v2 *cuCtxCreate_v2;
+tcuCtxDestroy_v2 *cuCtxDestroy_v2;
+tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2;
+tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2;
+tcuCtxSetCurrent *cuCtxSetCurrent;
+tcuCtxGetCurrent *cuCtxGetCurrent;
+tcuCtxGetDevice *cuCtxGetDevice;
+tcuCtxSynchronize *cuCtxSynchronize;
+tcuCtxSetLimit *cuCtxSetLimit;
+tcuCtxGetLimit *cuCtxGetLimit;
+tcuCtxGetCacheConfig *cuCtxGetCacheConfig;
+tcuCtxSetCacheConfig *cuCtxSetCacheConfig;
+tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig;
+tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig;
+tcuCtxGetApiVersion *cuCtxGetApiVersion;
+tcuCtxGetStreamPriorityRange *cuCtxGetStreamPriorityRange;
+tcuCtxAttach *cuCtxAttach;
+tcuCtxDetach *cuCtxDetach;
+tcuModuleLoad *cuModuleLoad;
+tcuModuleLoadData *cuModuleLoadData;
+tcuModuleLoadDataEx *cuModuleLoadDataEx;
+tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
+tcuModuleUnload *cuModuleUnload;
+tcuModuleGetFunction *cuModuleGetFunction;
+tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2;
+tcuModuleGetTexRef *cuModuleGetTexRef;
+tcuModuleGetSurfRef *cuModuleGetSurfRef;
+tcuLinkCreate *cuLinkCreate;
+tcuLinkAddData *cuLinkAddData;
+tcuLinkAddFile *cuLinkAddFile;
+tcuLinkComplete *cuLinkComplete;
+tcuLinkDestroy *cuLinkDestroy;
+tcuMemGetInfo_v2 *cuMemGetInfo_v2;
+tcuMemAlloc_v2 *cuMemAlloc_v2;
+tcuMemAllocPitch_v2 *cuMemAllocPitch_v2;
+tcuMemFree_v2 *cuMemFree_v2;
+tcuMemGetAddressRange_v2 *cuMemGetAddressRange_v2;
+tcuMemAllocHost_v2 *cuMemAllocHost_v2;
+tcuMemFreeHost *cuMemFreeHost;
+tcuMemHostAlloc *cuMemHostAlloc;
+tcuMemHostGetDevicePointer_v2 *cuMemHostGetDevicePointer_v2;
+tcuMemHostGetFlags *cuMemHostGetFlags;
+tcuMemAllocManaged *cuMemAllocManaged;
+tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId;
+tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId;
+tcuIpcGetEventHandle *cuIpcGetEventHandle;
+tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
+tcuIpcGetMemHandle *cuIpcGetMemHandle;
+tcuIpcOpenMemHandle *cuIpcOpenMemHandle;
+tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
+tcuMemHostRegister *cuMemHostRegister;
+tcuMemHostUnregister *cuMemHostUnregister;
+tcuMemcpy *cuMemcpy;
+tcuMemcpyPeer *cuMemcpyPeer;
+tcuMemcpyHtoD_v2 *cuMemcpyHtoD_v2;
+tcuMemcpyDtoH_v2 *cuMemcpyDtoH_v2;
+tcuMemcpyDtoD_v2 *cuMemcpyDtoD_v2;
+tcuMemcpyDtoA_v2 *cuMemcpyDtoA_v2;
+tcuMemcpyAtoD_v2 *cuMemcpyAtoD_v2;
+tcuMemcpyHtoA_v2 *cuMemcpyHtoA_v2;
+tcuMemcpyAtoH_v2 *cuMemcpyAtoH_v2;
+tcuMemcpyAtoA_v2 *cuMemcpyAtoA_v2;
+tcuMemcpy2D_v2 *cuMemcpy2D_v2;
+tcuMemcpy2DUnaligned_v2 *cuMemcpy2DUnaligned_v2;
+tcuMemcpy3D_v2 *cuMemcpy3D_v2;
+tcuMemcpy3DPeer *cuMemcpy3DPeer;
+tcuMemcpyAsync *cuMemcpyAsync;
+tcuMemcpyPeerAsync *cuMemcpyPeerAsync;
+tcuMemcpyHtoDAsync_v2 *cuMemcpyHtoDAsync_v2;
+tcuMemcpyDtoHAsync_v2 *cuMemcpyDtoHAsync_v2;
+tcuMemcpyDtoDAsync_v2 *cuMemcpyDtoDAsync_v2;
+tcuMemcpyHtoAAsync_v2 *cuMemcpyHtoAAsync_v2;
+tcuMemcpyAtoHAsync_v2 *cuMemcpyAtoHAsync_v2;
+tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync_v2;
+tcuMemcpy3DAsync_v2 *cuMemcpy3DAsync_v2;
+tcuMemcpy3DPeerAsync *cuMemcpy3DPeerAsync;
+tcuMemsetD8_v2 *cuMemsetD8_v2;
+tcuMemsetD16_v2 *cuMemsetD16_v2;
+tcuMemsetD32_v2 *cuMemsetD32_v2;
+tcuMemsetD2D8_v2 *cuMemsetD2D8_v2;
+tcuMemsetD2D16_v2 *cuMemsetD2D16_v2;
+tcuMemsetD2D32_v2 *cuMemsetD2D32_v2;
+tcuMemsetD8Async *cuMemsetD8Async;
+tcuMemsetD16Async *cuMemsetD16Async;
+tcuMemsetD32Async *cuMemsetD32Async;
+tcuMemsetD2D8Async *cuMemsetD2D8Async;
+tcuMemsetD2D16Async *cuMemsetD2D16Async;
+tcuMemsetD2D32Async *cuMemsetD2D32Async;
+tcuArrayCreate_v2 *cuArrayCreate_v2;
+tcuArrayGetDescriptor_v2 *cuArrayGetDescriptor_v2;
+tcuArrayDestroy *cuArrayDestroy;
+tcuArray3DCreate_v2 *cuArray3DCreate_v2;
+tcuArray3DGetDescriptor_v2 *cuArray3DGetDescriptor_v2;
+tcuMipmappedArrayCreate *cuMipmappedArrayCreate;
+tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
+tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
+tcuPointerGetAttribute *cuPointerGetAttribute;
+tcuPointerSetAttribute *cuPointerSetAttribute;
+tcuStreamCreate *cuStreamCreate;
+tcuStreamCreateWithPriority *cuStreamCreateWithPriority;
+tcuStreamGetPriority *cuStreamGetPriority;
+tcuStreamGetFlags *cuStreamGetFlags;
+tcuStreamWaitEvent *cuStreamWaitEvent;
+tcuStreamAddCallback *cuStreamAddCallback;
+tcuStreamAttachMemAsync *cuStreamAttachMemAsync;
+tcuStreamQuery *cuStreamQuery;
+tcuStreamSynchronize *cuStreamSynchronize;
+tcuStreamDestroy_v2 *cuStreamDestroy_v2;
+tcuEventCreate *cuEventCreate;
+tcuEventRecord *cuEventRecord;
+tcuEventQuery *cuEventQuery;
+tcuEventSynchronize *cuEventSynchronize;
+tcuEventDestroy_v2 *cuEventDestroy_v2;
+tcuEventElapsedTime *cuEventElapsedTime;
+tcuFuncGetAttribute *cuFuncGetAttribute;
+tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
+tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig;
+tcuLaunchKernel *cuLaunchKernel;
+tcuFuncSetBlockShape *cuFuncSetBlockShape;
+tcuFuncSetSharedSize *cuFuncSetSharedSize;
+tcuParamSetSize *cuParamSetSize;
+tcuParamSeti *cuParamSeti;
+tcuParamSetf *cuParamSetf;
+tcuParamSetv *cuParamSetv;
+tcuLaunch *cuLaunch;
+tcuLaunchGrid *cuLaunchGrid;
+tcuLaunchGridAsync *cuLaunchGridAsync;
+tcuParamSetTexRef *cuParamSetTexRef;
+tcuTexRefSetArray *cuTexRefSetArray;
+tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray;
+tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2;
+tcuTexRefSetAddress2D_v3 *cuTexRefSetAddress2D_v3;
+tcuTexRefSetFormat *cuTexRefSetFormat;
+tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
+tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
+tcuTexRefSetMipmapFilterMode *cuTexRefSetMipmapFilterMode;
+tcuTexRefSetMipmapLevelBias *cuTexRefSetMipmapLevelBias;
+tcuTexRefSetMipmapLevelClamp *cuTexRefSetMipmapLevelClamp;
+tcuTexRefSetMaxAnisotropy *cuTexRefSetMaxAnisotropy;
+tcuTexRefSetFlags *cuTexRefSetFlags;
+tcuTexRefGetAddress_v2 *cuTexRefGetAddress_v2;
+tcuTexRefGetArray *cuTexRefGetArray;
+tcuTexRefGetMipmappedArray *cuTexRefGetMipmappedArray;
+tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
+tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
+tcuTexRefGetFormat *cuTexRefGetFormat;
+tcuTexRefGetMipmapFilterMode *cuTexRefGetMipmapFilterMode;
+tcuTexRefGetMipmapLevelBias *cuTexRefGetMipmapLevelBias;
+tcuTexRefGetMipmapLevelClamp *cuTexRefGetMipmapLevelClamp;
+tcuTexRefGetMaxAnisotropy *cuTexRefGetMaxAnisotropy;
+tcuTexRefGetFlags *cuTexRefGetFlags;
+tcuTexRefCreate *cuTexRefCreate;
+tcuTexRefDestroy *cuTexRefDestroy;
+tcuSurfRefSetArray *cuSurfRefSetArray;
+tcuSurfRefGetArray *cuSurfRefGetArray;
+tcuTexObjectCreate *cuTexObjectCreate;
+tcuTexObjectDestroy *cuTexObjectDestroy;
+tcuTexObjectGetResourceDesc *cuTexObjectGetResourceDesc;
+tcuTexObjectGetTextureDesc *cuTexObjectGetTextureDesc;
+tcuTexObjectGetResourceViewDesc *cuTexObjectGetResourceViewDesc;
+tcuSurfObjectCreate *cuSurfObjectCreate;
+tcuSurfObjectDestroy *cuSurfObjectDestroy;
+tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc;
+tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer;
+tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess;
+tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess;
+tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
+tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
+tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
+tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2;
+tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
+tcuGraphicsMapResources *cuGraphicsMapResources;
+tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
+tcuGetExportTable *cuGetExportTable;
+
+tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
+tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
+tcuGLGetDevices *cuGLGetDevices;
+tcuGLCtxCreate_v2 *cuGLCtxCreate_v2;
+tcuGLInit *cuGLInit;
+tcuGLRegisterBufferObject *cuGLRegisterBufferObject;
+tcuGLMapBufferObject_v2 *cuGLMapBufferObject_v2;
+tcuGLUnmapBufferObject *cuGLUnmapBufferObject;
+tcuGLUnregisterBufferObject *cuGLUnregisterBufferObject;
+tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags;
+tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2;
+tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync;
+
+
+static void cuewExit(void) {
+  if(lib != NULL) {
+    /*  Ignore errors. */
+    dynamic_library_close(lib);
+    lib = NULL;
+  }
+}
+
+/* Implementation function. */
+int cuewInit(void) {
+  /* Library paths. */
+#ifdef _WIN32
+  /* Expected in c:/windows/system or similar, no path needed. */
+  const char *path = "nvcuda.dll";
+#elif defined(__APPLE__)
+  /* Default installation path. */
+  const char *path = "/usr/local/cuda/lib/libcuda.dylib";
+#else
+  const char *path = "libcuda.so";
+#endif
+  static int initialized = 0;
+  static int result = 0;
+  int error, driver_version;
+
+  if (initialized) {
+    return result;
+  }
+
+  initialized = 1;
+
+  error = atexit(cuewExit);
+  if (error) {
+    result = CUEW_ERROR_ATEXIT_FAILED;
+    return result;
+  }
+
+  /* Load library. */
+  lib = dynamic_library_open(path);
+
+  if (lib == NULL) {
+    result = CUEW_ERROR_OPEN_FAILED;
+    return result;
+  }
+
+  /* Detect driver version. */
+  driver_version = 1000;
+
+  CUDA_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
+  if (cuDriverGetVersion) {
+    cuDriverGetVersion(&driver_version);
+  }
+
+  /* We require version 4.0. */
+  if (driver_version < 4000) {
+    result = CUEW_ERROR_OPEN_FAILED;
+    return result;
+  }
+  /* Fetch all function pointers. */
+  CUDA_LIBRARY_FIND(cuGetErrorString);
+  CUDA_LIBRARY_FIND(cuGetErrorName);
+  CUDA_LIBRARY_FIND(cuInit);
+  CUDA_LIBRARY_FIND(cuDriverGetVersion);
+  CUDA_LIBRARY_FIND(cuDeviceGet);
+  CUDA_LIBRARY_FIND(cuDeviceGetCount);
+  CUDA_LIBRARY_FIND(cuDeviceGetName);
+  CUDA_LIBRARY_FIND(cuDeviceTotalMem_v2);
+  CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
+  CUDA_LIBRARY_FIND(cuDeviceGetProperties);
+  CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
+  CUDA_LIBRARY_FIND(cuCtxCreate_v2);
+  CUDA_LIBRARY_FIND(cuCtxDestroy_v2);
+  CUDA_LIBRARY_FIND(cuCtxPushCurrent_v2);
+  CUDA_LIBRARY_FIND(cuCtxPopCurrent_v2);
+  CUDA_LIBRARY_FIND(cuCtxSetCurrent);
+  CUDA_LIBRARY_FIND(cuCtxGetCurrent);
+  CUDA_LIBRARY_FIND(cuCtxGetDevice);
+  CUDA_LIBRARY_FIND(cuCtxSynchronize);
+  CUDA_LIBRARY_FIND(cuCtxSetLimit);
+  CUDA_LIBRARY_FIND(cuCtxGetLimit);
+  CUDA_LIBRARY_FIND(cuCtxGetCacheConfig);
+  CUDA_LIBRARY_FIND(cuCtxSetCacheConfig);
+  CUDA_LIBRARY_FIND(cuCtxGetSharedMemConfig);
+  CUDA_LIBRARY_FIND(cuCtxSetSharedMemConfig);
+  CUDA_LIBRARY_FIND(cuCtxGetApiVersion);
+  CUDA_LIBRARY_FIND(cuCtxGetStreamPriorityRange);
+  CUDA_LIBRARY_FIND(cuCtxAttach);
+  CUDA_LIBRARY_FIND(cuCtxDetach);
+  CUDA_LIBRARY_FIND(cuModuleLoad);
+  CUDA_LIBRARY_FIND(cuModuleLoadData);
+  CUDA_LIBRARY_FIND(cuModuleLoadDataEx);
+  CUDA_LIBRARY_FIND(cuModuleLoadFatBinary);
+  CUDA_LIBRARY_FIND(cuModuleUnload);
+  CUDA_LIBRARY_FIND(cuModuleGetFunction);
+  CUDA_LIBRARY_FIND(cuModuleGetGlobal_v2);
+  CUDA_LIBRARY_FIND(cuModuleGetTexRef);
+  CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
+  CUDA_LIBRARY_FIND(cuLinkCreate);
+  CUDA_LIBRARY_FIND(cuLinkAddData);
+  CUDA_LIBRARY_FIND(cuLinkAddFile);
+  CUDA_LIBRARY_FIND(cuLinkComplete);
+  CUDA_LIBRARY_FIND(cuLinkDestroy);
+  CUDA_LIBRARY_FIND(cuMemGetInfo_v2);
+  CUDA_LIBRARY_FIND(cuMemAlloc_v2);
+  CUDA_LIBRARY_FIND(cuMemAllocPitch_v2);
+  CUDA_LIBRARY_FIND(cuMemFree_v2);
+  CUDA_LIBRARY_FIND(cuMemGetAddressRange_v2);
+  CUDA_LIBRARY_FIND(cuMemAllocHost_v2);
+  CUDA_LIBRARY_FIND(cuMemFreeHost);
+  CUDA_LIBRARY_FIND(cuMemHostAlloc);
+  CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer_v2);
+  CUDA_LIBRARY_FIND(cuMemHostGetFlags);
+  CUDA_LIBRARY_FIND(cuMemAllocManaged);
+  CUDA_LIBRARY_FIND(cuDeviceGetByPCIBusId);
+  CUDA_LIBRARY_FIND(cuDeviceGetPCIBusId);
+  CUDA_LIBRARY_FIND(cuIpcGetEventHandle);
+  CUDA_LIBRARY_FIND(cuIpcOpenEventHandle);
+  CUDA_LIBRARY_FIND(cuIpcGetMemHandle);
+  CUDA_LIBRARY_FIND(cuIpcOpenMemHandle);
+  CUDA_LIBRARY_FIND(cuIpcCloseMemHandle);
+  CUDA_LIBRARY_FIND(cuMemHostRegister);
+  CUDA_LIBRARY_FIND(cuMemHostUnregister);
+  CUDA_LIBRARY_FIND(cuMemcpy);
+  CUDA_LIBRARY_FIND(cuMemcpyPeer);
+  CUDA_LIBRARY_FIND(cuMemcpyHtoD_v2);
+  CUDA_LIBRARY_FIND(cuMemcpyDtoH_v2);
+  CUDA_LIBRARY_FIND(cuMemcpyDtoD_v2);
+  CUDA_LIBRARY_FIND(cuMemcpyDtoA_v2);
+  CUDA_LIBRARY_FIND(cuMemcpyAtoD_v2);
+  CUDA_LIBRARY_FIND(cuMemcpyHtoA_v2);
+  CUDA_LIBRARY_FIND(cuMemcpyAtoH_v2);
+  CUDA_LIBRARY_FIND(cuMemcpyAtoA_v2);
+  CUDA_LIBRARY_FIND(cuMemcpy2D_v2);
+  CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned_v2);
+  CUDA_LIBRARY_FIND(cuMemcpy3D_v2);
+  CUDA_LIBRARY_FIND(cuMemcpy3DPeer);
+  CUDA_LIBRARY_FIND(cuMemcpyAsync);
+  CUDA_LIBRARY_FIND(cuMemcpyPeerAsync);
+  CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync_v2);
+  CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync_v2);
+  CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync_v2);
+  CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync_v2);
+  CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync_v2);
+  CUDA_LIBRARY_FIND(cuMemcpy2DAsync_v2);
+  CUDA_LIBRARY_FIND(cuMemcpy3DAsync_v2);
+  CUDA_LIBRARY_FIND(cuMemcpy3DPeerAsync);
+  CUDA_LIBRARY_FIND(cuMemsetD8_v2);
+  CUDA_LIBRARY_FIND(cuMemsetD16_v2);
+  CUDA_LIBRARY_FIND(cuMemsetD32_v2);
+  CUDA_LIBRARY_FIND(cuMemsetD2D8_v2);
+  CUDA_LIBRARY_FIND(cuMemsetD2D16_v2);
+  CUDA_LIBRARY_FIND(cuMemsetD2D32_v2);
+  CUDA_LIBRARY_FIND(cuMemsetD8Async);
+  CUDA_LIBRARY_FIND(cuMemsetD16Async);
+  CUDA_LIBRARY_FIND(cuMemsetD32Async);
+  CUDA_LIBRARY_FIND(cuMemsetD2D8Async);
+  CUDA_LIBRARY_FIND(cuMemsetD2D16Async);
+  CUDA_LIBRARY_FIND(cuMemsetD2D32Async);
+  CUDA_LIBRARY_FIND(cuArrayCreate_v2);
+  CUDA_LIBRARY_FIND(cuArrayGetDescriptor_v2);
+  CUDA_LIBRARY_FIND(cuArrayDestroy);
+  CUDA_LIBRARY_FIND(cuArray3DCreate_v2);
+  CUDA_LIBRARY_FIND(cuArray3DGetDescriptor_v2);
+  CUDA_LIBRARY_FIND(cuMipmappedArrayCreate);
+  CUDA_LIBRARY_FIND(cuMipmappedArrayGetLevel);
+  CUDA_LIBRARY_FIND(cuMipmappedArrayDestroy);
+  CUDA_LIBRARY_FIND(cuPointerGetAttribute);
+  CUDA_LIBRARY_FIND(cuPointerSetAttribute);
+  CUDA_LIBRARY_FIND(cuStreamCreate);
+  CUDA_LIBRARY_FIND(cuStreamCreateWithPriority);
+  CUDA_LIBRARY_FIND(cuStreamGetPriority);
+  CUDA_LIBRARY_FIND(cuStreamGetFlags);
+  CUDA_LIBRARY_FIND(cuStreamWaitEvent);
+  CUDA_LIBRARY_FIND(cuStreamAddCallback);
+  CUDA_LIBRARY_FIND(cuStreamAttachMemAsync);
+  CUDA_LIBRARY_FIND(cuStreamQuery);
+  CUDA_LIBRARY_FIND(cuStreamSynchronize);
+  CUDA_LIBRARY_FIND(cuStreamDestroy_v2);
+  CUDA_LIBRARY_FIND(cuEventCreate);
+  CUDA_LIBRARY_FIND(cuEventRecord);
+  CUDA_LIBRARY_FIND(cuEventQuery);
+  CUDA_LIBRARY_FIND(cuEventSynchronize);
+  CUDA_LIBRARY_FIND(cuEventDestroy_v2);
+  CUDA_LIBRARY_FIND(cuEventElapsedTime);
+  CUDA_LIBRARY_FIND(cuFuncGetAttribute);
+  CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
+  CUDA_LIBRARY_FIND(cuFuncSetSharedMemConfig);
+  CUDA_LIBRARY_FIND(cuLaunchKernel);
+  CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
+  CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
+  CUDA_LIBRARY_FIND(cuParamSetSize);
+  CUDA_LIBRARY_FIND(cuParamSeti);
+  CUDA_LIBRARY_FIND(cuParamSetf);
+  CUDA_LIBRARY_FIND(cuParamSetv);
+  CUDA_LIBRARY_FIND(cuLaunch);
+  CUDA_LIBRARY_FIND(cuLaunchGrid);
+  CUDA_LIBRARY_FIND(cuLaunchGridAsync);
+  CUDA_LIBRARY_FIND(cuParamSetTexRef);
+  CUDA_LIBRARY_FIND(cuTexRefSetArray);
+  CUDA_LIBRARY_FIND(cuTexRefSetMipmappedArray);
+  CUDA_LIBRARY_FIND(cuTexRefSetAddress_v2);
+  CUDA_LIBRARY_FIND(cuTexRefSetAddress2D_v3);
+  CUDA_LIBRARY_FIND(cuTexRefSetFormat);
+  CUDA_LIBRARY_FIND(cuTexRefSetAddressMode);
+  CUDA_LIBRARY_FIND(cuTexRefSetFilterMode);
+  CUDA_LIBRARY_FIND(cuTexRefSetMipmapFilterMode);
+  CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelBias);
+  CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelClamp);
+  CUDA_LIBRARY_FIND(cuTexRefSetMaxAnisotropy);
+  CUDA_LIBRARY_FIND(cuTexRefSetFlags);
+  CUDA_LIBRARY_FIND(cuTexRefGetAddress_v2);
+  CUDA_LIBRARY_FIND(cuTexRefGetArray);
+  CUDA_LIBRARY_FIND(cuTexRefGetMipmappedArray);
+  CUDA_LIBRARY_FIND(cuTexRefGetAddressMode);
+  CUDA_LIBRARY_FIND(cuTexRefGetFilterMode);
+  CUDA_LIBRARY_FIND(cuTexRefGetFormat);
+  CUDA_LIBRARY_FIND(cuTexRefGetMipmapFilterMode);
+  CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelBias);
+  CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelClamp);
+  CUDA_LIBRARY_FIND(cuTexRefGetMaxAnisotropy);
+  CUDA_LIBRARY_FIND(cuTexRefGetFlags);
+  CUDA_LIBRARY_FIND(cuTexRefCreate);
+  CUDA_LIBRARY_FIND(cuTexRefDestroy);
+  CUDA_LIBRARY_FIND(cuSurfRefSetArray);
+  CUDA_LIBRARY_FIND(cuSurfRefGetArray);
+  CUDA_LIBRARY_FIND(cuTexObjectCreate);
+  CUDA_LIBRARY_FIND(cuTexObjectDestroy);
+  CUDA_LIBRARY_FIND(cuTexObjectGetResourceDesc);
+  CUDA_LIBRARY_FIND(cuTexObjectGetTextureDesc);
+  CUDA_LIBRARY_FIND(cuTexObjectGetResourceViewDesc);
+  CUDA_LIBRARY_FIND(cuSurfObjectCreate);
+  CUDA_LIBRARY_FIND(cuSurfObjectDestroy);
+  CUDA_LIBRARY_FIND(cuSurfObjectGetResourceDesc);
+  CUDA_LIBRARY_FIND(cuDeviceCanAccessPeer);
+  CUDA_LIBRARY_FIND(cuCtxEnablePeerAccess);
+  CUDA_LIBRARY_FIND(cuCtxDisablePeerAccess);
+  CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
+  CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
+  CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedMipmappedArray);
+  CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer_v2);
+  CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags);
+  CUDA_LIBRARY_FIND(cuGraphicsMapResources);
+  CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
+  CUDA_LIBRARY_FIND(cuGetExportTable);
+
+  CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
+  CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
+  CUDA_LIBRARY_FIND(cuGLGetDevices);
+  CUDA_LIBRARY_FIND(cuGLCtxCreate_v2);
+  CUDA_LIBRARY_FIND(cuGLInit);
+  CUDA_LIBRARY_FIND(cuGLRegisterBufferObject);
+  CUDA_LIBRARY_FIND(cuGLMapBufferObject_v2);
+  CUDA_LIBRARY_FIND(cuGLUnmapBufferObject);
+  CUDA_LIBRARY_FIND(cuGLUnregisterBufferObject);
+  CUDA_LIBRARY_FIND(cuGLSetBufferObjectMapFlags);
+  CUDA_LIBRARY_FIND(cuGLMapBufferObjectAsync_v2);
+  CUDA_LIBRARY_FIND(cuGLUnmapBufferObjectAsync);
+
+
+  result = CUEW_SUCCESS;
+  return result;
+}
+
+const char *cuewErrorString(CUresult result) {
+  switch(result) {
+    case CUDA_SUCCESS: return "No errors";
+    case CUDA_ERROR_INVALID_VALUE: return "Invalid value";
+    case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory";
+    case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized";
+    case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized";
+    case CUDA_ERROR_PROFILER_DISABLED: return "PROFILER_DISABLED";
+    case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "PROFILER_NOT_INITIALIZED";
+    case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "PROFILER_ALREADY_STARTED";
+    case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "PROFILER_ALREADY_STOPPED";
+    case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available";
+    case CUDA_ERROR_INVALID_DEVICE: return "Invalid device";
+    case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image";
+    case CUDA_ERROR_INVALID_CONTEXT: return "Invalid context";
+    case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: return "Context already current";
+    case CUDA_ERROR_MAP_FAILED: return "Map failed";
+    case CUDA_ERROR_UNMAP_FAILED: return "Unmap failed";
+    case CUDA_ERROR_ARRAY_IS_MAPPED: return "Array is mapped";
+    case CUDA_ERROR_ALREADY_MAPPED: return "Already mapped";
+    case CUDA_ERROR_NO_BINARY_FOR_GPU: return "No binary for GPU";
+    case CUDA_ERROR_ALREADY_ACQUIRED: return "Already acquired";
+    case CUDA_ERROR_NOT_MAPPED: return "Not mapped";
+    case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "Mapped resource not available for access as an array";
+    case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Mapped resource not available for access as a pointer";
+    case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error detected";
+    case CUDA_ERROR_UNSUPPORTED_LIMIT: return "CUlimit not supported by device";
+    case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "CONTEXT_ALREADY_IN_USE";
+    case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "PEER_ACCESS_UNSUPPORTED";
+    case CUDA_ERROR_INVALID_PTX: return "INVALID_PTX";
+    case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
+    case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
+    case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve";
+    case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed";
+    case CUDA_ERROR_OPERATING_SYSTEM: return "OPERATING_SYSTEM";
+    case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle";
+    case CUDA_ERROR_NOT_FOUND: return "Not found";
+    case CUDA_ERROR_NOT_READY: return "CUDA not ready";
+    case CUDA_ERROR_ILLEGAL_ADDRESS: return "ILLEGAL_ADDRESS";
+    case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources";
+    case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded timeout";
+    case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing";
+    case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "PEER_ACCESS_ALREADY_ENABLED";
+    case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "PEER_ACCESS_NOT_ENABLED";
+    case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "PRIMARY_CONTEXT_ACTIVE";
+    case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "CONTEXT_IS_DESTROYED";
+    case CUDA_ERROR_ASSERT: return "ASSERT";
+    case CUDA_ERROR_TOO_MANY_PEERS: return "TOO_MANY_PEERS";
+    case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "HOST_MEMORY_ALREADY_REGISTERED";
+    case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "HOST_MEMORY_NOT_REGISTERED";
+    case CUDA_ERROR_HARDWARE_STACK_ERROR: return "HARDWARE_STACK_ERROR";
+    case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "ILLEGAL_INSTRUCTION";
+    case CUDA_ERROR_MISALIGNED_ADDRESS: return "MISALIGNED_ADDRESS";
+    case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "INVALID_ADDRESS_SPACE";
+    case CUDA_ERROR_INVALID_PC: return "INVALID_PC";
+    case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed";
+    case CUDA_ERROR_NOT_PERMITTED: return "NOT_PERMITTED";
+    case CUDA_ERROR_NOT_SUPPORTED: return "NOT_SUPPORTED";
+    case CUDA_ERROR_UNKNOWN: return "Unknown error";
+    default: return "Unknown CUDA error value";
+  }
+}
+
+static void path_join(const char *path1,
+                      const char *path2,
+                      int maxlen,
+                      char *result) {
+#if defined(WIN32) || defined(_WIN32)
+  const char separator = '\\';
+#else
+  const char separator = '/';
+#endif
+  int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
+  if (n != -1 && n < maxlen) {
+    result[n] = '\0';
+  }
+  else {
+    result[maxlen - 1] = '\0';
+  }
+}
+
+static int path_exists(const char *path) {
+  struct stat st;
+  if (stat(path, &st)) {
+    return 0;
+  }
+  return 1;
+}
+
+const char *cuewCompilerPath(void) {
+#ifdef _WIN32
+  const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
+  const char *executable = "nvcc.exe";
+#else
+  const char *defaultpaths[] = {
+    "/Developer/NVIDIA/CUDA-5.0/bin",
+    "/usr/local/cuda-5.0/bin",
+    "/usr/local/cuda/bin",
+    "/Developer/NVIDIA/CUDA-6.0/bin",
+    "/usr/local/cuda-6.0/bin",
+    "/Developer/NVIDIA/CUDA-5.5/bin",
+    "/usr/local/cuda-5.5/bin",
+    NULL};
+  const char *executable = "nvcc";
+#endif
+  int i;
+
+  const char *binpath = getenv("CUDA_BIN_PATH");
+
+  static char nvcc[65536];
+
+  if (binpath) {
+    path_join(binpath, executable, sizeof(nvcc), nvcc);
+    if (path_exists(nvcc))
+      return nvcc;
+  }
+
+  for (i = 0; defaultpaths[i]; ++i) {
+    path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc);
+    if (path_exists(nvcc))
+      return nvcc;
+  }
+
+#ifndef _WIN32
+  {
+    FILE *handle = popen("which nvcc", "r");
+    if (handle) {
+      char buffer[4096] = {0};
+      int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
+      buffer[len] = '\0';
+      pclose(handle);
+
+      if (buffer[0])
+        return "nvcc";
+    }
+  }
+#endif
+
+  return NULL;
+}
+
+int cuewCompilerVersion(void) {
+  const char *path = cuewCompilerPath();
+  const char *marker = "Cuda compilation tools, release ";
+  FILE *pipe;
+  int major, minor;
+  char *versionstr;
+  char buf[128];
+  char output[65536] = "\0";
+  char command[65536] = "\0";
+
+  if (path == NULL)
+    return 0;
+
+  /* get --version output */
+  strncpy(command, path, sizeof(command));
+  strncat(command, " --version", sizeof(command) - strlen(path));
+  pipe = popen(command, "r");
+  if (!pipe) {
+    fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
+    return 0;
+  }
+
+  while (!feof(pipe)) {
+    if (fgets(buf, sizeof(buf), pipe) != NULL) {
+      strncat(output, buf, sizeof(output) - strlen(output));
+    }
+  }
+
+  pclose(pipe);
+
+  /* parse version number */
+  versionstr = strstr(output, marker);
+  if (versionstr == NULL) {
+    fprintf(stderr, "CUDA: failed to find version number in:\n\n%s\n", output);
+    return 0;
+  }
+  versionstr += strlen(marker);
+
+  if (sscanf(versionstr, "%d.%d", &major, &minor) < 2) {
+    fprintf(stderr, "CUDA: failed to parse version number from:\n\n%s\n", output);
+    return 0;
+  }
+
+  return 10 * major + minor;
+}
+
index 42f6ca4..0796a43 100644 (file)
@@ -72,10 +72,6 @@ if(WITH_BULLET)
        add_subdirectory(rigidbody)
 endif()
 
-if(WITH_COMPOSITOR)
-       add_subdirectory(opencl)
-endif()
-
 if(WITH_OPENNL)
        add_subdirectory(opennl)
 endif()
index 2080388..0b39fcb 100644 (file)
@@ -59,9 +59,6 @@ if env['WITH_BF_INTERNATIONAL']:
 if env['WITH_BF_BULLET']:
     SConscript (['rigidbody/SConscript'])
 
-if env['WITH_BF_COMPOSITOR']:
-    SConscript (['opencl/SConscript'])
-
 if env['OURPLATFORM'] in ('win32-vc', 'win32-mingw', 'win64-mingw', 'linuxcross', 'win64-vc'):
     SConscript(['utfconv/SConscript'])
 
index 00acde2..a6c947b 100644 (file)
@@ -62,7 +62,7 @@ if env['WITH_BF_CYCLES_OSL']:
 incs.extend('. bvh render device kernel kernel/osl kernel/svm util subd'.split())
 incs.extend('#intern/guardedalloc #source/blender/makesrna #source/blender/makesdna #source/blender/blenlib'.split())
 incs.extend('#source/blender/blenloader ../../source/blender/makesrna/intern'.split())
-incs.extend('#extern/glew/include #intern/mikktspace'.split())
+incs.extend('#extern/glew/include #extern/clew/include #extern/cuew/include #intern/mikktspace'.split())
 incs.append(cycles['BF_OIIO_INC'])
 incs.append(cycles['BF_BOOST_INC'])
 incs.append(cycles['BF_OPENEXR_INC'].split())
index 52806b0..5876ac3 100644 (file)
@@ -28,6 +28,8 @@ set(LIBRARIES
        ${JPEG_LIBRARIES}
        ${ZLIB_LIBRARIES}
        ${TIFF_LIBRARY}
+       extern_clew
+       extern_cuew
 )
 
 if(WIN32)
index ae3309d..a62ce29 100644 (file)
@@ -11,6 +11,8 @@ set(INC
 set(INC_SYS
        ${OPENGL_INCLUDE_DIR}
        ${GLEW_INCLUDE_PATH}
+       ../../../extern/cuew/include
+       ../../../extern/clew/include
 )
 
 set(SRC
index fa1f0ac..efdfa98 100644 (file)
 #include "device.h"
 #include "device_intern.h"
 
-#include "util_cuda.h"
+#include "cuew.h"
+#include "clew.h"
+
 #include "util_debug.h"
 #include "util_foreach.h"
 #include "util_half.h"
 #include "util_math.h"
-#include "util_opencl.h"
 #include "util_opengl.h"
 #include "util_time.h"
 #include "util_types.h"
@@ -141,7 +142,7 @@ Device *Device::create(DeviceInfo& info, Stats &stats, bool background)
                        break;
 #ifdef WITH_CUDA
                case DEVICE_CUDA:
-                       if(cuLibraryInit())
+                       if(device_cuda_init())
                                device = device_cuda_create(info, stats, background);
                        else
                                device = NULL;
@@ -159,7 +160,7 @@ Device *Device::create(DeviceInfo& info, Stats &stats, bool background)
 #endif
 #ifdef WITH_OPENCL
                case DEVICE_OPENCL:
-                       if(clLibraryInit())
+                       if(device_opencl_init())
                                device = device_opencl_create(info, stats, background);
                        else
                                device = NULL;
@@ -213,12 +214,12 @@ vector<DeviceType>& Device::available_types()
                types.push_back(DEVICE_CPU);
 
 #ifdef WITH_CUDA
-               if(cuLibraryInit())
+               if(device_cuda_init())
                        types.push_back(DEVICE_CUDA);
 #endif
 
 #ifdef WITH_OPENCL
-               if(clLibraryInit())
+               if(device_opencl_init())
                        types.push_back(DEVICE_OPENCL);
 #endif
 
@@ -242,12 +243,12 @@ vector<DeviceInfo>& Device::available_devices()
 
        if(!devices_init) {
 #ifdef WITH_CUDA
-               if(cuLibraryInit())
+               if(device_cuda_init())
                        device_cuda_info(devices);
 #endif
 
 #ifdef WITH_OPENCL
-               if(clLibraryInit())
+               if(device_opencl_init())
                        device_opencl_info(devices);
 #endif
 
index f0f32f8..6629069 100644 (file)
@@ -23,7 +23,7 @@
 
 #include "buffers.h"
 
-#include "util_cuda.h"
+#include "cuew.h"
 #include "util_debug.h"
 #include "util_map.h"
 #include "util_opengl.h"
@@ -61,65 +61,10 @@ public:
                return (CUdeviceptr)mem;
        }
 
-       static const char *cuda_error_string(CUresult result)
+       static bool have_precompiled_kernels()
        {
-               switch(result) {
-                       case CUDA_SUCCESS: return "No errors";
-                       case CUDA_ERROR_INVALID_VALUE: return "Invalid value";
-                       case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory";
-                       case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized";
-                       case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized";
-
-                       case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available";
-                       case CUDA_ERROR_INVALID_DEVICE: return "Invalid device";
-
-                       case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image";
-                       case CUDA_ERROR_INVALID_CONTEXT: return "Invalid context";
-                       case CUDA_ERROR_MAP_FAILED: return "Map failed";
-                       case CUDA_ERROR_UNMAP_FAILED: return "Unmap failed";
-                       case CUDA_ERROR_ARRAY_IS_MAPPED: return "Array is mapped";
-                       case CUDA_ERROR_ALREADY_MAPPED: return "Already mapped";
-                       case CUDA_ERROR_NO_BINARY_FOR_GPU: return "No binary for GPU";
-                       case CUDA_ERROR_ALREADY_ACQUIRED: return "Already acquired";
-                       case CUDA_ERROR_NOT_MAPPED: return "Not mapped";
-                       case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "Mapped resource not available for access as an array";
-                       case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Mapped resource not available for access as a pointer";
-                       case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error detected";
-                       case CUDA_ERROR_UNSUPPORTED_LIMIT: return "CUlimit not supported by device";
-                       case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "Context already in use";
-                       case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "Peer access unsupported";
-                       case CUDA_ERROR_INVALID_PTX: return "Invalid PTX code";
-
-                       case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
-                       case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
-                       case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve";
-                       case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed";
-                       case CUDA_ERROR_OPERATING_SYSTEM: return "OS call failed";
-
-                       case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle";
-
-                       case CUDA_ERROR_NOT_FOUND: return "Not found";
-
-                       case CUDA_ERROR_NOT_READY: return "CUDA not ready";
-
-                       case CUDA_ERROR_ILLEGAL_ADDRESS: return "Illegal address";
-                       case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources";
-                       case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded time out";
-                       case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing";
-                       case CUDA_ERROR_HARDWARE_STACK_ERROR: return "Stack error";
-                       case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "Illegal instruction";
-                       case CUDA_ERROR_MISALIGNED_ADDRESS: return "Misaligned address";
-                       case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "Invalid address space";
-                       case CUDA_ERROR_INVALID_PC: return "Invalid program counter";
-                       case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed";
-
-                       case CUDA_ERROR_NOT_PERMITTED: return "Operation not permitted";
-                       case CUDA_ERROR_NOT_SUPPORTED: return "Operation not supported";
-
-                       case CUDA_ERROR_UNKNOWN: return "Unknown error";
-
-                       default: return "Unknown CUDA error value";
-               }
+               string cubins_path = path_get("lib");
+               return path_exists(cubins_path);
        }
 
 /*#ifdef NDEBUG
@@ -141,7 +86,7 @@ public:
                CUresult result = stmt; \
                \
                if(result != CUDA_SUCCESS) { \
-                       string message = string_printf("CUDA error: %s in %s", cuda_error_string(result), #stmt); \
+                       string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \
                        if(error_msg == "") \
                                error_msg = message; \
                        fprintf(stderr, "%s\n", message.c_str()); \
@@ -155,7 +100,7 @@ public:
                if(result == CUDA_SUCCESS)
                        return false;
 
-               string message = string_printf("CUDA error at %s: %s", stmt.c_str(), cuda_error_string(result));
+               string message = string_printf("CUDA error at %s: %s", stmt.c_str(), cuewErrorString(result));
                if(error_msg == "")
                        error_msg = message;
                fprintf(stderr, "%s\n", message.c_str());
@@ -275,7 +220,7 @@ public:
                        return cubin;
 
 #ifdef _WIN32
-               if(cuHavePrecompiledKernels()) {
+               if(have_precompiled_kernels()) {
                        if(major < 2)
                                cuda_error_message(string_printf("CUDA device requires compute capability 2.0 or up, found %d.%d. Your GPU is not supported.", major, minor));
                        else
@@ -285,14 +230,14 @@ public:
 #endif
 
                /* if not, find CUDA compiler */
-               string nvcc = cuCompilerPath();
+               const char *nvcc = cuewCompilerPath();
 
-               if(nvcc == "") {
+               if(nvcc == NULL) {
                        cuda_error_message("CUDA nvcc compiler not found. Install CUDA toolkit in default location.");
                        return "";
                }
 
-               int cuda_version = cuCompilerVersion();
+               int cuda_version = cuewCompilerVersion();
 
                if(cuda_version == 0) {
                        cuda_error_message("CUDA nvcc compiler version could not be parsed.");
@@ -317,7 +262,7 @@ public:
 
                string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" "
                        "-o \"%s\" --ptxas-options=\"-v\" -I\"%s\" -DNVCC -D__KERNEL_CUDA_VERSION__=%d",
-                       nvcc.c_str(), major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version);
+                       nvcc, major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version);
 
                printf("%s\n", command.c_str());
 
@@ -1050,6 +995,28 @@ public:
        }
 };
 
+bool device_cuda_init(void)
+{
+       static bool initialized = false;
+       static bool result = false;
+
+       if (initialized)
+               return result;
+
+       initialized = true;
+
+       if (cuewInit() == CUEW_SUCCESS) {
+               if(CUDADevice::have_precompiled_kernels())
+                       result = true;
+#ifndef _WIN32
+               else if(cuewCompilerPath() != NULL)
+                       result = true;
+#endif
+       }
+
+       return result;
+}
+
 Device *device_cuda_create(DeviceInfo& info, Stats &stats, bool background)
 {
        return new CUDADevice(info, stats, background);
@@ -1063,13 +1030,13 @@ void device_cuda_info(vector<DeviceInfo>& devices)
        result = cuInit(0);
        if(result != CUDA_SUCCESS) {
                if(result != CUDA_ERROR_NO_DEVICE)
-                       fprintf(stderr, "CUDA cuInit: %s\n", CUDADevice::cuda_error_string(result));
+                       fprintf(stderr, "CUDA cuInit: %s\n", cuewErrorString(result));
                return;
        }
 
        result = cuDeviceGetCount(&count);
        if(result != CUDA_SUCCESS) {
-               fprintf(stderr, "CUDA cuDeviceGetCount: %s\n", CUDADevice::cuda_error_string(result));
+               fprintf(stderr, "CUDA cuDeviceGetCount: %s\n", cuewErrorString(result));
                return;
        }
        
index 7eb66c2..80f1e24 100644 (file)
@@ -22,7 +22,9 @@ CCL_NAMESPACE_BEGIN
 class Device;
 
 Device *device_cpu_create(DeviceInfo& info, Stats &stats, bool background);
+bool device_opencl_init(void);
 Device *device_opencl_create(DeviceInfo& info, Stats &stats, bool background);
+bool device_cuda_init(void);
 Device *device_cuda_create(DeviceInfo& info, Stats &stats, bool background);
 Device *device_network_create(DeviceInfo& info, Stats &stats, const char *address);
 Device *device_multi_create(DeviceInfo& info, Stats &stats, bool background);
index 3abda6a..fb10698 100644 (file)
 
 #include "buffers.h"
 
+#include "clew.h"
+
 #include "util_foreach.h"
 #include "util_map.h"
 #include "util_math.h"
 #include "util_md5.h"
-#include "util_opencl.h"
 #include "util_opengl.h"
 #include "util_path.h"
 #include "util_time.h"
@@ -552,7 +553,7 @@ public:
                device_initialized = true;
        }
 
-       static void context_notify_callback(const char *err_info,
+       static void CL_CALLBACK context_notify_callback(const char *err_info,
                const void *private_info, size_t cb, void *user_data)
        {
                char name[256];
@@ -1162,6 +1163,26 @@ Device *device_opencl_create(DeviceInfo& info, Stats &stats, bool background)
        return new OpenCLDevice(info, stats, background);
 }
 
+bool device_opencl_init(void) {
+       static bool initialized = false;
+       static bool result = false;
+
+       if (initialized)
+               return result;
+
+       initialized = true;
+
+       // OpenCL disabled for now, only works with this environment variable set
+       if(!getenv("CYCLES_OPENCL_TEST")) {
+               result = false;
+       }
+       else {
+               result = clewInit() == CLEW_SUCCESS;
+       }
+
+       return result;
+}
+
 void device_opencl_info(vector<DeviceInfo>& devices)
 {
        vector<cl_device_id> device_ids;
index 01b5675..d9b97a7 100644 (file)
@@ -10,10 +10,8 @@ set(INC_SYS
 
 set(SRC
        util_cache.cpp
-       util_cuda.cpp
        util_dynlib.cpp
        util_md5.cpp
-       util_opencl.cpp
        util_path.cpp
        util_string.cpp
        util_simd.cpp
@@ -34,7 +32,6 @@ set(SRC_HEADERS
        util_args.h
        util_boundbox.h
        util_cache.h
-       util_cuda.h
        util_debug.h
        util_dynlib.h
        util_foreach.h
@@ -46,7 +43,6 @@ set(SRC_HEADERS
        util_map.h
        util_math.h
        util_md5.h
-       util_opencl.h
        util_opengl.h
        util_optimization.h
        util_param.h
diff --git a/intern/cycles/util/util_cuda.cpp b/intern/cycles/util/util_cuda.cpp
deleted file mode 100644 (file)
index 5069043..0000000
+++ /dev/null
@@ -1,503 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License
- */
-
-#include <iostream>
-
-#include <stdlib.h>
-#include <stdio.h>
-
-#include "util_cuda.h"
-#include "util_debug.h"
-#include "util_dynlib.h"
-#include "util_path.h"
-#include "util_string.h"
-
-#ifdef _WIN32
-#define popen _popen
-#define pclose _pclose
-#endif
-
-/* function defininitions */
-
-tcuInit *cuInit;
-tcuDriverGetVersion *cuDriverGetVersion;
-tcuDeviceGet *cuDeviceGet;
-tcuDeviceGetCount *cuDeviceGetCount;
-tcuDeviceGetName *cuDeviceGetName;
-tcuDeviceComputeCapability *cuDeviceComputeCapability;
-tcuDeviceTotalMem *cuDeviceTotalMem;
-tcuDeviceGetProperties *cuDeviceGetProperties;
-tcuDeviceGetAttribute *cuDeviceGetAttribute;
-tcuCtxCreate *cuCtxCreate;
-tcuCtxDestroy *cuCtxDestroy;
-tcuCtxAttach *cuCtxAttach;
-tcuCtxDetach *cuCtxDetach;
-tcuCtxPushCurrent *cuCtxPushCurrent;
-tcuCtxPopCurrent *cuCtxPopCurrent;
-tcuCtxGetDevice *cuCtxGetDevice;
-tcuCtxSynchronize *cuCtxSynchronize;
-tcuModuleLoad *cuModuleLoad;
-tcuModuleLoadData *cuModuleLoadData;
-tcuModuleLoadDataEx *cuModuleLoadDataEx;
-tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
-tcuModuleUnload *cuModuleUnload;
-tcuModuleGetFunction *cuModuleGetFunction;
-tcuModuleGetGlobal *cuModuleGetGlobal;
-tcuModuleGetTexRef *cuModuleGetTexRef;
-tcuModuleGetSurfRef *cuModuleGetSurfRef;
-tcuMemGetInfo *cuMemGetInfo;
-tcuMemAlloc *cuMemAlloc;
-tcuMemAllocPitch *cuMemAllocPitch;
-tcuMemFree *cuMemFree;
-tcuMemGetAddressRange *cuMemGetAddressRange;
-tcuMemAllocHost *cuMemAllocHost;
-tcuMemFreeHost *cuMemFreeHost;
-tcuMemHostAlloc *cuMemHostAlloc;
-tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer;
-tcuMemHostGetFlags *cuMemHostGetFlags;
-tcuMemcpyHtoD *cuMemcpyHtoD;
-tcuMemcpyDtoH *cuMemcpyDtoH;
-tcuMemcpyDtoD *cuMemcpyDtoD;
-tcuMemcpyDtoA *cuMemcpyDtoA;
-tcuMemcpyAtoD *cuMemcpyAtoD;
-tcuMemcpyHtoA *cuMemcpyHtoA;
-tcuMemcpyAtoH *cuMemcpyAtoH;
-tcuMemcpyAtoA *cuMemcpyAtoA;
-tcuMemcpy2D *cuMemcpy2D;
-tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned;
-tcuMemcpy3D *cuMemcpy3D;
-tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync;
-tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync;
-tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync;
-tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync;
-tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync;
-tcuMemcpy2DAsync *cuMemcpy2DAsync;
-tcuMemcpy3DAsync *cuMemcpy3DAsync;
-tcuMemsetD8 *cuMemsetD8;
-tcuMemsetD16 *cuMemsetD16;
-tcuMemsetD32 *cuMemsetD32;
-tcuMemsetD2D8 *cuMemsetD2D8;
-tcuMemsetD2D16 *cuMemsetD2D16;
-tcuMemsetD2D32 *cuMemsetD2D32;
-tcuFuncSetBlockShape *cuFuncSetBlockShape;
-tcuFuncSetSharedSize *cuFuncSetSharedSize;
-tcuFuncGetAttribute *cuFuncGetAttribute;
-tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
-tcuArrayCreate *cuArrayCreate;
-tcuArrayGetDescriptor *cuArrayGetDescriptor;
-tcuArrayDestroy *cuArrayDestroy;
-tcuArray3DCreate *cuArray3DCreate;
-tcuArray3DGetDescriptor *cuArray3DGetDescriptor;
-tcuTexRefCreate *cuTexRefCreate;
-tcuTexRefDestroy *cuTexRefDestroy;
-tcuTexRefSetArray *cuTexRefSetArray;
-tcuTexRefSetAddress *cuTexRefSetAddress;
-tcuTexRefSetAddress2D *cuTexRefSetAddress2D;
-tcuTexRefSetFormat *cuTexRefSetFormat;
-tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
-tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
-tcuTexRefSetFlags *cuTexRefSetFlags;
-tcuTexRefGetAddress *cuTexRefGetAddress;
-tcuTexRefGetArray *cuTexRefGetArray;
-tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
-tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
-tcuTexRefGetFormat *cuTexRefGetFormat;
-tcuTexRefGetFlags *cuTexRefGetFlags;
-tcuSurfRefSetArray *cuSurfRefSetArray;
-tcuSurfRefGetArray *cuSurfRefGetArray;
-tcuParamSetSize *cuParamSetSize;
-tcuParamSeti *cuParamSeti;
-tcuParamSetf *cuParamSetf;
-tcuParamSetv *cuParamSetv;
-tcuParamSetTexRef *cuParamSetTexRef;
-tcuLaunch *cuLaunch;
-tcuLaunchGrid *cuLaunchGrid;
-tcuLaunchGridAsync *cuLaunchGridAsync;
-tcuEventCreate *cuEventCreate;
-tcuEventRecord *cuEventRecord;
-tcuEventQuery *cuEventQuery;
-tcuEventSynchronize *cuEventSynchronize;
-tcuEventDestroy *cuEventDestroy;
-tcuEventElapsedTime *cuEventElapsedTime;
-tcuStreamCreate *cuStreamCreate;
-tcuStreamQuery *cuStreamQuery;
-tcuStreamSynchronize *cuStreamSynchronize;
-tcuStreamDestroy *cuStreamDestroy;
-tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
-tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
-tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer;
-tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
-tcuGraphicsMapResources *cuGraphicsMapResources;
-tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
-tcuGetExportTable *cuGetExportTable;
-tcuCtxSetLimit *cuCtxSetLimit;
-tcuCtxGetLimit *cuCtxGetLimit;
-tcuGLCtxCreate *cuGLCtxCreate;
-tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
-tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
-tcuCtxSetCurrent *cuCtxSetCurrent;
-tcuLaunchKernel *cuLaunchKernel;
-
-CCL_NAMESPACE_BEGIN
-
-/* utility macros */
-#define CUDA_LIBRARY_FIND_CHECKED(name) \
-       name = (t##name*)dynamic_library_find(lib, #name);
-
-#define CUDA_LIBRARY_FIND(name) \
-       name = (t##name*)dynamic_library_find(lib, #name); \
-       assert(name);
-
-#define CUDA_LIBRARY_FIND_V2(name) \
-       name = (t##name*)dynamic_library_find(lib, #name "_v2"); \
-       assert(name);
-
-/* initialization function */
-
-bool cuLibraryInit()
-{
-       static bool initialized = false;
-       static bool result = false;
-
-       if(initialized)
-               return result;
-       
-       initialized = true;
-
-       /* library paths */
-#ifdef _WIN32
-       /* expected in c:/windows/system or similar, no path needed */
-       const char *path = "nvcuda.dll";
-       const char *alternative_path = NULL;
-#elif defined(__APPLE__)
-       /* default installation path */
-       const char *path = "/usr/local/cuda/lib/libcuda.dylib";
-       const char *alternative_path = NULL;
-#else
-       const char *path = "libcuda.so";
-       const char *alternative_path = "libcuda.so.1";
-#endif
-
-       /* load library */
-       DynamicLibrary *lib = dynamic_library_open(path);
-
-       if(lib == NULL && alternative_path)
-               lib = dynamic_library_open(alternative_path);
-
-       if(lib == NULL)
-               return false;
-
-       /* detect driver version */
-       int driver_version = 1000;
-
-       CUDA_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
-       if(cuDriverGetVersion)
-               cuDriverGetVersion(&driver_version);
-
-       /* we require version 4.0 */
-       if(driver_version < 4000)
-               return false;
-
-       /* fetch all function pointers */
-       CUDA_LIBRARY_FIND(cuInit);
-       CUDA_LIBRARY_FIND(cuDeviceGet);
-       CUDA_LIBRARY_FIND(cuDeviceGetCount);
-       CUDA_LIBRARY_FIND(cuDeviceGetName);
-       CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
-       CUDA_LIBRARY_FIND(cuDeviceTotalMem);
-       CUDA_LIBRARY_FIND(cuDeviceGetProperties);
-       CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
-       CUDA_LIBRARY_FIND(cuCtxCreate);
-       CUDA_LIBRARY_FIND(cuCtxDestroy);
-       CUDA_LIBRARY_FIND(cuCtxAttach);
-       CUDA_LIBRARY_FIND(cuCtxDetach);
-       CUDA_LIBRARY_FIND(cuCtxPushCurrent);
-       CUDA_LIBRARY_FIND(cuCtxPopCurrent);
-       CUDA_LIBRARY_FIND(cuCtxGetDevice);
-       CUDA_LIBRARY_FIND(cuCtxSynchronize);
-       CUDA_LIBRARY_FIND(cuModuleLoad);
-       CUDA_LIBRARY_FIND(cuModuleLoadData);
-       CUDA_LIBRARY_FIND(cuModuleUnload);
-       CUDA_LIBRARY_FIND(cuModuleGetFunction);
-       CUDA_LIBRARY_FIND(cuModuleGetGlobal);
-       CUDA_LIBRARY_FIND(cuModuleGetTexRef);
-       CUDA_LIBRARY_FIND(cuMemGetInfo);
-       CUDA_LIBRARY_FIND(cuMemAlloc);
-       CUDA_LIBRARY_FIND(cuMemAllocPitch);
-       CUDA_LIBRARY_FIND(cuMemFree);
-       CUDA_LIBRARY_FIND(cuMemGetAddressRange);
-       CUDA_LIBRARY_FIND(cuMemAllocHost);
-       CUDA_LIBRARY_FIND(cuMemFreeHost);
-       CUDA_LIBRARY_FIND(cuMemHostAlloc);
-       CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer);
-       CUDA_LIBRARY_FIND(cuMemcpyHtoD);
-       CUDA_LIBRARY_FIND(cuMemcpyDtoH);
-       CUDA_LIBRARY_FIND(cuMemcpyDtoD);
-       CUDA_LIBRARY_FIND(cuMemcpyDtoA);
-       CUDA_LIBRARY_FIND(cuMemcpyAtoD);
-       CUDA_LIBRARY_FIND(cuMemcpyHtoA);
-       CUDA_LIBRARY_FIND(cuMemcpyAtoH);
-       CUDA_LIBRARY_FIND(cuMemcpyAtoA);
-       CUDA_LIBRARY_FIND(cuMemcpy2D);
-       CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned);
-       CUDA_LIBRARY_FIND(cuMemcpy3D);
-       CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync);
-       CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync);
-       CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync);
-       CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync);
-       CUDA_LIBRARY_FIND(cuMemcpy2DAsync);
-       CUDA_LIBRARY_FIND(cuMemcpy3DAsync);
-       CUDA_LIBRARY_FIND(cuMemsetD8);
-       CUDA_LIBRARY_FIND(cuMemsetD16);
-       CUDA_LIBRARY_FIND(cuMemsetD32);
-       CUDA_LIBRARY_FIND(cuMemsetD2D8);
-       CUDA_LIBRARY_FIND(cuMemsetD2D16);
-       CUDA_LIBRARY_FIND(cuMemsetD2D32);
-       CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
-       CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
-       CUDA_LIBRARY_FIND(cuFuncGetAttribute);
-       CUDA_LIBRARY_FIND(cuArrayCreate);
-       CUDA_LIBRARY_FIND(cuArrayGetDescriptor);
-       CUDA_LIBRARY_FIND(cuArrayDestroy);
-       CUDA_LIBRARY_FIND(cuArray3DCreate);
-       CUDA_LIBRARY_FIND(cuArray3DGetDescriptor);
-       CUDA_LIBRARY_FIND(cuTexRefCreate);
-       CUDA_LIBRARY_FIND(cuTexRefDestroy);
-       CUDA_LIBRARY_FIND(cuTexRefSetArray);
-       CUDA_LIBRARY_FIND(cuTexRefSetAddress);
-       CUDA_LIBRARY_FIND(cuTexRefSetAddress2D);
-       CUDA_LIBRARY_FIND(cuTexRefSetFormat);
-       CUDA_LIBRARY_FIND(cuTexRefSetAddressMode);
-       CUDA_LIBRARY_FIND(cuTexRefSetFilterMode);
-       CUDA_LIBRARY_FIND(cuTexRefSetFlags);
-       CUDA_LIBRARY_FIND(cuTexRefGetAddress);
-       CUDA_LIBRARY_FIND(cuTexRefGetArray);
-       CUDA_LIBRARY_FIND(cuTexRefGetAddressMode);
-       CUDA_LIBRARY_FIND(cuTexRefGetFilterMode);
-       CUDA_LIBRARY_FIND(cuTexRefGetFormat);
-       CUDA_LIBRARY_FIND(cuTexRefGetFlags);
-       CUDA_LIBRARY_FIND(cuParamSetSize);
-       CUDA_LIBRARY_FIND(cuParamSeti);
-       CUDA_LIBRARY_FIND(cuParamSetf);
-       CUDA_LIBRARY_FIND(cuParamSetv);
-       CUDA_LIBRARY_FIND(cuParamSetTexRef);
-       CUDA_LIBRARY_FIND(cuLaunch);
-       CUDA_LIBRARY_FIND(cuLaunchGrid);
-       CUDA_LIBRARY_FIND(cuLaunchGridAsync);
-       CUDA_LIBRARY_FIND(cuEventCreate);
-       CUDA_LIBRARY_FIND(cuEventRecord);
-       CUDA_LIBRARY_FIND(cuEventQuery);
-       CUDA_LIBRARY_FIND(cuEventSynchronize);
-       CUDA_LIBRARY_FIND(cuEventDestroy);
-       CUDA_LIBRARY_FIND(cuEventElapsedTime);
-       CUDA_LIBRARY_FIND(cuStreamCreate);
-       CUDA_LIBRARY_FIND(cuStreamQuery);
-       CUDA_LIBRARY_FIND(cuStreamSynchronize);
-       CUDA_LIBRARY_FIND(cuStreamDestroy);
-
-       /* cuda 2.1 */
-       CUDA_LIBRARY_FIND(cuModuleLoadDataEx);
-       CUDA_LIBRARY_FIND(cuModuleLoadFatBinary);
-       CUDA_LIBRARY_FIND(cuGLCtxCreate);
-       CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
-       CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
-
-       /* cuda 2.3 */
-       CUDA_LIBRARY_FIND(cuMemHostGetFlags);
-       CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
-       CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
-
-       /* cuda 3.0 */
-       CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync);
-       CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
-       CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
-       CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
-       CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer);
-       CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags);
-       CUDA_LIBRARY_FIND(cuGraphicsMapResources);
-       CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
-       CUDA_LIBRARY_FIND(cuGetExportTable);
-
-       /* cuda 3.1 */
-       CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
-       CUDA_LIBRARY_FIND(cuSurfRefSetArray);
-       CUDA_LIBRARY_FIND(cuSurfRefGetArray);
-       CUDA_LIBRARY_FIND(cuCtxSetLimit);
-       CUDA_LIBRARY_FIND(cuCtxGetLimit);
-
-       /* functions which changed 3.1 -> 3.2 for 64 bit stuff, the cuda library
-        * has both the old ones for compatibility and new ones with _v2 postfix,
-        * we load the _v2 ones here. */
-       CUDA_LIBRARY_FIND_V2(cuDeviceTotalMem);
-       CUDA_LIBRARY_FIND_V2(cuCtxCreate);
-       CUDA_LIBRARY_FIND_V2(cuModuleGetGlobal);
-       CUDA_LIBRARY_FIND_V2(cuMemGetInfo);
-       CUDA_LIBRARY_FIND_V2(cuMemAlloc);
-       CUDA_LIBRARY_FIND_V2(cuMemAllocPitch);
-       CUDA_LIBRARY_FIND_V2(cuMemFree);
-       CUDA_LIBRARY_FIND_V2(cuMemGetAddressRange);
-       CUDA_LIBRARY_FIND_V2(cuMemAllocHost);
-       CUDA_LIBRARY_FIND_V2(cuMemHostGetDevicePointer);
-       CUDA_LIBRARY_FIND_V2(cuMemcpyHtoD);
-       CUDA_LIBRARY_FIND_V2(cuMemcpyDtoH);
-       CUDA_LIBRARY_FIND_V2(cuMemcpyDtoD);
-       CUDA_LIBRARY_FIND_V2(cuMemcpyDtoA);
-       CUDA_LIBRARY_FIND_V2(cuMemcpyAtoD);
-       CUDA_LIBRARY_FIND_V2(cuMemcpyHtoA);
-       CUDA_LIBRARY_FIND_V2(cuMemcpyAtoH);
-       CUDA_LIBRARY_FIND_V2(cuMemcpyAtoA);
-       CUDA_LIBRARY_FIND_V2(cuMemcpyHtoAAsync);
-       CUDA_LIBRARY_FIND_V2(cuMemcpyAtoHAsync);
-       CUDA_LIBRARY_FIND_V2(cuMemcpy2D);
-       CUDA_LIBRARY_FIND_V2(cuMemcpy2DUnaligned);
-       CUDA_LIBRARY_FIND_V2(cuMemcpy3D);
-       CUDA_LIBRARY_FIND_V2(cuMemcpyHtoDAsync);
-       CUDA_LIBRARY_FIND_V2(cuMemcpyDtoHAsync);
-       CUDA_LIBRARY_FIND_V2(cuMemcpyDtoDAsync);
-       CUDA_LIBRARY_FIND_V2(cuMemcpy2DAsync);
-       CUDA_LIBRARY_FIND_V2(cuMemcpy3DAsync);
-       CUDA_LIBRARY_FIND_V2(cuMemsetD8);
-       CUDA_LIBRARY_FIND_V2(cuMemsetD16);
-       CUDA_LIBRARY_FIND_V2(cuMemsetD32);
-       CUDA_LIBRARY_FIND_V2(cuMemsetD2D8);
-       CUDA_LIBRARY_FIND_V2(cuMemsetD2D16);
-       CUDA_LIBRARY_FIND_V2(cuMemsetD2D32);
-       CUDA_LIBRARY_FIND_V2(cuArrayCreate);
-       CUDA_LIBRARY_FIND_V2(cuArrayGetDescriptor);
-       CUDA_LIBRARY_FIND_V2(cuArray3DCreate);
-       CUDA_LIBRARY_FIND_V2(cuArray3DGetDescriptor);
-       CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress);
-       CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress2D);
-       CUDA_LIBRARY_FIND_V2(cuTexRefGetAddress);
-       CUDA_LIBRARY_FIND_V2(cuGraphicsResourceGetMappedPointer);
-       CUDA_LIBRARY_FIND_V2(cuGLCtxCreate);
-
-       /* cuda 4.0 */
-       CUDA_LIBRARY_FIND(cuCtxSetCurrent);
-       CUDA_LIBRARY_FIND(cuLaunchKernel);
-
-       if(cuHavePrecompiledKernels())
-               result = true;
-#ifndef _WIN32
-       else if(cuCompilerPath() != "")
-               result = true;
-#endif
-
-       return result;
-}
-
-bool cuHavePrecompiledKernels()
-{
-       string cubins_path = path_get("lib");
-
-       return path_exists(cubins_path);
-}
-
-string cuCompilerPath()
-{
-#ifdef _WIN32
-       const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
-       const char *executable = "nvcc.exe";
-#else
-       const char *defaultpaths[] = {
-               "/Developer/NVIDIA/CUDA-5.0/bin",
-               "/usr/local/cuda-5.0/bin",
-               "/usr/local/cuda/bin",
-               "/Developer/NVIDIA/CUDA-6.0/bin",
-               "/usr/local/cuda-6.0/bin",
-               "/Developer/NVIDIA/CUDA-5.5/bin",
-               "/usr/local/cuda-5.5/bin",
-               NULL};
-       const char *executable = "nvcc";
-#endif
-
-       const char *binpath = getenv("CUDA_BIN_PATH");
-
-       string nvcc;
-
-       if(binpath) {
-               nvcc = path_join(binpath, executable);
-               if(path_exists(nvcc))
-                       return nvcc;
-       }
-
-       for(int i = 0; defaultpaths[i]; i++) {
-               nvcc = path_join(defaultpaths[i], executable);
-               if(path_exists(nvcc))
-                       return nvcc;
-       }
-
-#ifndef _WIN32
-       {
-               FILE *handle = popen("which nvcc", "r");
-               if(handle) {
-                       char buffer[4096] = {0};
-                       int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
-                       buffer[len] = '\0';
-                       pclose(handle);
-
-                       if(buffer[0])
-                               return "nvcc";
-               }
-       }
-#endif
-
-       return "";
-}
-
-int cuCompilerVersion()
-{
-       string path = cuCompilerPath();
-       if(path == "")
-               return 0;
-       
-       /* get --version output */
-       FILE *pipe = popen((path + " --version").c_str(), "r");
-       if(!pipe) {
-               fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
-               return 0;
-       }
-
-       char buf[128];
-       string output = "";
-
-       while(!feof(pipe))
-               if(fgets(buf, 128, pipe) != NULL)
-                       output += buf;
-
-       pclose(pipe);
-
-       /* parse version number */
-       string marker = "Cuda compilation tools, release ";
-       size_t offset = output.find(marker);
-       if(offset == string::npos) {
-               fprintf(stderr, "CUDA: failed to find version number in:\n\n%s\n", output.c_str());
-               return 0;
-       }
-
-       string versionstr = output.substr(offset + marker.size(), string::npos);
-       int major, minor;
-
-       if(sscanf(versionstr.c_str(), "%d.%d", &major, &minor) < 2) {
-               fprintf(stderr, "CUDA: failed to parse version number from:\n\n%s\n", output.c_str());
-               return 0;
-       }
-
-       return 10*major + minor;
-}
-
-CCL_NAMESPACE_END
-
diff --git a/intern/cycles/util/util_cuda.h b/intern/cycles/util/util_cuda.h
deleted file mode 100644 (file)
index a633fb2..0000000
+++ /dev/null
@@ -1,636 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License
- */
-
-#ifndef __UTIL_CUDA_H__
-#define __UTIL_CUDA_H__
-
-#include <stdlib.h>
-#include "util_opengl.h"
-#include "util_string.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* CUDA is linked in dynamically at runtime, so we can start the application
- * without requiring a CUDA installation. Code adapted from the example
- * matrixMulDynlinkJIT in the CUDA SDK. */
-
-bool cuLibraryInit();
-bool cuHavePrecompiledKernels();
-string cuCompilerPath();
-int cuCompilerVersion();
-
-CCL_NAMESPACE_END
-
-/* defines, structs, enums */
-
-#define CUDA_VERSION 3020
-
-#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined(__LP64__)
-typedef unsigned long long CUdeviceptr;
-#else
-typedef unsigned int CUdeviceptr;
-#endif
-
-typedef int CUdevice;
-typedef struct CUctx_st *CUcontext;
-typedef struct CUmod_st *CUmodule;
-typedef struct CUfunc_st *CUfunction;
-typedef struct CUarray_st *CUarray;
-typedef struct CUtexref_st *CUtexref;
-typedef struct CUsurfref_st *CUsurfref;
-typedef struct CUevent_st *CUevent;
-typedef struct CUstream_st *CUstream;
-typedef struct CUgraphicsResource_st *CUgraphicsResource;
-
-typedef struct CUuuid_st {
-       char bytes[16];
-} CUuuid;
-
-typedef enum CUctx_flags_enum {
-       CU_CTX_SCHED_AUTO  = 0,
-       CU_CTX_SCHED_SPIN  = 1,
-       CU_CTX_SCHED_YIELD = 2,
-       CU_CTX_SCHED_MASK  = 0x3,
-       CU_CTX_BLOCKING_SYNC = 4,
-       CU_CTX_MAP_HOST = 8,
-       CU_CTX_LMEM_RESIZE_TO_MAX = 16,
-       CU_CTX_FLAGS_MASK  = 0x1f
-} CUctx_flags;
-
-typedef enum CUevent_flags_enum {
-       CU_EVENT_DEFAULT        = 0,
-       CU_EVENT_BLOCKING_SYNC  = 1,
-       CU_EVENT_DISABLE_TIMING = 2
-} CUevent_flags;
-
-typedef enum CUarray_format_enum {
-       CU_AD_FORMAT_UNSIGNED_INT8  = 0x01,
-       CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
-       CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
-       CU_AD_FORMAT_SIGNED_INT8    = 0x08,
-       CU_AD_FORMAT_SIGNED_INT16   = 0x09,
-       CU_AD_FORMAT_SIGNED_INT32   = 0x0a,
-       CU_AD_FORMAT_HALF           = 0x10,
-       CU_AD_FORMAT_FLOAT          = 0x20
-} CUarray_format;
-
-typedef enum CUaddress_mode_enum {
-       CU_TR_ADDRESS_MODE_WRAP   = 0,
-       CU_TR_ADDRESS_MODE_CLAMP  = 1,
-       CU_TR_ADDRESS_MODE_MIRROR = 2,
-       CU_TR_ADDRESS_MODE_BORDER = 3
-} CUaddress_mode;
-
-typedef enum CUfilter_mode_enum {
-       CU_TR_FILTER_MODE_POINT  = 0,
-       CU_TR_FILTER_MODE_LINEAR = 1
-} CUfilter_mode;
-
-typedef enum CUdevice_attribute_enum {
-       CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
-       CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
-       CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
-       CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
-       CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
-       CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
-       CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
-       CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
-       CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
-       CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
-       CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
-       CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
-       CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
-       CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
-       CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
-       CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
-       CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
-       CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
-       CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
-       CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
-       CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
-       CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
-       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,
-       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,
-       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,
-       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,
-       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,
-       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,
-       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,
-       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,
-       CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,
-       CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,
-       CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
-       CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,
-       CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
-       CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
-       CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35
-} CUdevice_attribute;
-
-typedef struct CUdevprop_st {
-       int maxThreadsPerBlock;
-       int maxThreadsDim[3];
-       int maxGridSize[3];
-       int sharedMemPerBlock;
-       int totalConstantMemory;
-       int SIMDWidth;
-       int memPitch;
-       int regsPerBlock;
-       int clockRate;
-       int textureAlign;
-} CUdevprop;
-
-typedef enum CUfunction_attribute_enum {
-       CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
-       CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
-       CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
-       CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
-       CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
-       CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
-       CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
-       CU_FUNC_ATTRIBUTE_MAX
-} CUfunction_attribute;
-
-typedef enum CUfunc_cache_enum {
-       CU_FUNC_CACHE_PREFER_NONE    = 0x00,
-       CU_FUNC_CACHE_PREFER_SHARED  = 0x01,
-       CU_FUNC_CACHE_PREFER_L1      = 0x02
-} CUfunc_cache;
-
-typedef enum CUmemorytype_enum {
-       CU_MEMORYTYPE_HOST   = 0x01,
-       CU_MEMORYTYPE_DEVICE = 0x02,
-       CU_MEMORYTYPE_ARRAY  = 0x03
-} CUmemorytype;
-
-typedef enum CUcomputemode_enum {
-       CU_COMPUTEMODE_DEFAULT    = 0,
-       CU_COMPUTEMODE_EXCLUSIVE  = 1,
-       CU_COMPUTEMODE_PROHIBITED = 2
-} CUcomputemode;
-
-typedef enum CUjit_option_enum
-{
-       CU_JIT_MAX_REGISTERS = 0,
-       CU_JIT_THREADS_PER_BLOCK,
-       CU_JIT_WALL_TIME,
-       CU_JIT_INFO_LOG_BUFFER,
-       CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
-       CU_JIT_ERROR_LOG_BUFFER,
-       CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
-       CU_JIT_OPTIMIZATION_LEVEL,
-       CU_JIT_TARGET_FROM_CUCONTEXT,
-       CU_JIT_TARGET,
-       CU_JIT_FALLBACK_STRATEGY
-
-} CUjit_option;
-
-typedef enum CUjit_target_enum
-{
-       CU_TARGET_COMPUTE_10 = 0,
-       CU_TARGET_COMPUTE_11,
-       CU_TARGET_COMPUTE_12,
-       CU_TARGET_COMPUTE_13,
-       CU_TARGET_COMPUTE_20,
-       CU_TARGET_COMPUTE_21,
-       CU_TARGET_COMPUTE_30,
-       CU_TARGET_COMPUTE_35,
-       CU_TARGET_COMPUTE_50
-} CUjit_target;
-
-typedef enum CUjit_fallback_enum
-{
-       CU_PREFER_PTX = 0,
-       CU_PREFER_BINARY
-
-} CUjit_fallback;
-
-typedef enum CUgraphicsRegisterFlags_enum {
-       CU_GRAPHICS_REGISTER_FLAGS_NONE  = 0x00
-} CUgraphicsRegisterFlags;
-
-typedef enum CUgraphicsMapResourceFlags_enum {
-       CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE          = 0x00,
-       CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY     = 0x01,
-       CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02
-} CUgraphicsMapResourceFlags;
-
-typedef enum CUarray_cubemap_face_enum {
-       CU_CUBEMAP_FACE_POSITIVE_X  = 0x00,
-       CU_CUBEMAP_FACE_NEGATIVE_X  = 0x01,
-       CU_CUBEMAP_FACE_POSITIVE_Y  = 0x02,
-       CU_CUBEMAP_FACE_NEGATIVE_Y  = 0x03,
-       CU_CUBEMAP_FACE_POSITIVE_Z  = 0x04,
-       CU_CUBEMAP_FACE_NEGATIVE_Z  = 0x05
-} CUarray_cubemap_face;
-
-typedef enum CUlimit_enum {
-       CU_LIMIT_STACK_SIZE        = 0x00,
-       CU_LIMIT_PRINTF_FIFO_SIZE  = 0x01,
-       CU_LIMIT_MALLOC_HEAP_SIZE  = 0x02
-} CUlimit;
-
-typedef enum cudaError_enum {
-       CUDA_SUCCESS                              = 0,
-       CUDA_ERROR_INVALID_VALUE                  = 1,
-       CUDA_ERROR_OUT_OF_MEMORY                  = 2,
-       CUDA_ERROR_NOT_INITIALIZED                = 3,
-       CUDA_ERROR_DEINITIALIZED                  = 4,
-       CUDA_ERROR_NO_DEVICE                      = 100,
-       CUDA_ERROR_INVALID_DEVICE                 = 101,
-       CUDA_ERROR_INVALID_IMAGE                  = 200,
-       CUDA_ERROR_INVALID_CONTEXT                = 201,
-       CUDA_ERROR_MAP_FAILED                     = 205,
-       CUDA_ERROR_UNMAP_FAILED                   = 206,
-       CUDA_ERROR_ARRAY_IS_MAPPED                = 207,
-       CUDA_ERROR_ALREADY_MAPPED                 = 208,
-       CUDA_ERROR_NO_BINARY_FOR_GPU              = 209,
-       CUDA_ERROR_ALREADY_ACQUIRED               = 210,
-       CUDA_ERROR_NOT_MAPPED                     = 211,
-       CUDA_ERROR_NOT_MAPPED_AS_ARRAY            = 212,
-       CUDA_ERROR_NOT_MAPPED_AS_POINTER          = 213,
-       CUDA_ERROR_ECC_UNCORRECTABLE              = 214,
-       CUDA_ERROR_UNSUPPORTED_LIMIT              = 215,
-       CUDA_ERROR_CONTEXT_ALREADY_IN_USE         = 216,
-       CUDA_ERROR_PEER_ACCESS_UNSUPPORTED        = 217,
-       CUDA_ERROR_INVALID_PTX                    = 218,
-       CUDA_ERROR_INVALID_SOURCE                 = 300,
-       CUDA_ERROR_FILE_NOT_FOUND                 = 301,
-       CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
-       CUDA_ERROR_SHARED_OBJECT_INIT_FAILED      = 303,
-       CUDA_ERROR_OPERATING_SYSTEM               = 304,
-       CUDA_ERROR_INVALID_HANDLE                 = 400,
-       CUDA_ERROR_NOT_FOUND                      = 500,
-       CUDA_ERROR_NOT_READY                      = 600,
-       CUDA_ERROR_ILLEGAL_ADDRESS                = 700,
-       CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES        = 701,
-       CUDA_ERROR_LAUNCH_TIMEOUT                 = 702,
-       CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  = 703,
-       CUDA_ERROR_HARDWARE_STACK_ERROR           = 714,
-       CUDA_ERROR_ILLEGAL_INSTRUCTION            = 715,
-       CUDA_ERROR_MISALIGNED_ADDRESS             = 716,
-       CUDA_ERROR_INVALID_ADDRESS_SPACE          = 717,
-       CUDA_ERROR_INVALID_PC                     = 718,
-       CUDA_ERROR_LAUNCH_FAILED                  = 719,
-       CUDA_ERROR_NOT_PERMITTED                  = 800,
-       CUDA_ERROR_NOT_SUPPORTED                  = 801,
-       CUDA_ERROR_UNKNOWN                        = 999
-} CUresult;
-
-#define CU_MEMHOSTALLOC_PORTABLE        0x01
-#define CU_MEMHOSTALLOC_DEVICEMAP       0x02
-#define CU_MEMHOSTALLOC_WRITECOMBINED   0x04
-
-typedef struct CUDA_MEMCPY2D_st {
-       size_t srcXInBytes;
-       size_t srcY;
-
-       CUmemorytype srcMemoryType;
-       const void *srcHost;
-       CUdeviceptr srcDevice;
-       CUarray srcArray;
-       size_t srcPitch;
-
-       size_t dstXInBytes;
-       size_t dstY;
-
-       CUmemorytype dstMemoryType;
-       void *dstHost;
-       CUdeviceptr dstDevice;
-       CUarray dstArray;
-       size_t dstPitch;
-
-       size_t WidthInBytes;
-       size_t Height;
-} CUDA_MEMCPY2D;
-
-typedef struct CUDA_MEMCPY3D_st {
-       size_t srcXInBytes;
-       size_t srcY;
-       size_t srcZ;
-       size_t srcLOD;
-       CUmemorytype srcMemoryType;
-       const void *srcHost;
-       CUdeviceptr srcDevice;
-       CUarray srcArray;
-       void *reserved0;
-       size_t srcPitch;
-       size_t srcHeight;
-
-       size_t dstXInBytes;
-       size_t dstY;
-       size_t dstZ;
-       size_t dstLOD;
-       CUmemorytype dstMemoryType;
-       void *dstHost;
-       CUdeviceptr dstDevice;
-       CUarray dstArray;
-       void *reserved1;
-       size_t dstPitch;
-       size_t dstHeight;
-
-       size_t WidthInBytes;
-       size_t Height;
-       size_t Depth;
-} CUDA_MEMCPY3D;
-
-typedef struct CUDA_ARRAY_DESCRIPTOR_st
-{
-       size_t Width;
-       size_t Height;
-
-       CUarray_format Format;
-       unsigned int NumChannels;
-} CUDA_ARRAY_DESCRIPTOR;
-
-typedef struct CUDA_ARRAY3D_DESCRIPTOR_st
-{
-       size_t Width;
-       size_t Height;
-       size_t Depth;
-
-       CUarray_format Format;
-       unsigned int NumChannels;
-       unsigned int Flags;
-} CUDA_ARRAY3D_DESCRIPTOR;
-
-#define CUDA_ARRAY3D_2DARRAY        0x01
-#define CUDA_ARRAY3D_SURFACE_LDST   0x02
-#define CU_TRSA_OVERRIDE_FORMAT 0x01
-#define CU_TRSF_READ_AS_INTEGER         0x01
-#define CU_TRSF_NORMALIZED_COORDINATES  0x02
-#define CU_TRSF_SRGB  0x10
-#define CU_PARAM_TR_DEFAULT -1
-
-#ifdef _WIN32
-#define CUDAAPI __stdcall
-#else
-#define CUDAAPI
-#endif
-
-/* function types */
-
-typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
-typedef CUresult CUDAAPI tcuDriverGetVersion(int *driverVersion);
-typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *device, int ordinal);
-typedef CUresult CUDAAPI tcuDeviceGetCount(int *count);
-typedef CUresult CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev);
-typedef CUresult CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
-typedef CUresult CUDAAPI tcuDeviceTotalMem(size_t *bytes, CUdevice dev);
-typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop *prop, CUdevice dev);
-typedef CUresult CUDAAPI tcuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
-typedef CUresult CUDAAPI tcuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev);
-typedef CUresult CUDAAPI tcuCtxDestroy(CUcontext ctx);
-typedef CUresult CUDAAPI tcuCtxAttach(CUcontext *pctx, unsigned int flags);
-typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx);
-typedef CUresult CUDAAPI tcuCtxPushCurrent(CUcontext ctx );
-typedef CUresult CUDAAPI tcuCtxPopCurrent(CUcontext *pctx);
-typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice *device);
-typedef CUresult CUDAAPI tcuCtxSynchronize(void);
-typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value);
-typedef CUresult CUDAAPI tcuCtxGetLimit(size_t *pvalue, CUlimit limit);
-typedef CUresult CUDAAPI tcuCtxGetCacheConfig(CUfunc_cache *pconfig);
-typedef CUresult CUDAAPI tcuCtxSetCacheConfig(CUfunc_cache config);
-typedef CUresult CUDAAPI tcuCtxGetApiVersion(CUcontext ctx, unsigned int *version);
-typedef CUresult CUDAAPI tcuModuleLoad(CUmodule *module, const char *fname);
-typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule *module, const void *image);
-typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
-typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule *module, const void *fatCubin);
-typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod);
-typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
-typedef CUresult CUDAAPI tcuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name);
-typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const char *name);
-typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, const char *name);
-typedef CUresult CUDAAPI tcuMemGetInfo(size_t *free, size_t *total);
-typedef CUresult CUDAAPI tcuMemAlloc(CUdeviceptr *dptr, size_t bytesize);
-typedef CUresult CUDAAPI tcuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
-typedef CUresult CUDAAPI tcuMemFree(CUdeviceptr dptr);
-typedef CUresult CUDAAPI tcuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, CUdeviceptr dptr);
-typedef CUresult CUDAAPI tcuMemAllocHost(void **pp, size_t bytesize);
-typedef CUresult CUDAAPI tcuMemFreeHost(void *p);
-typedef CUresult CUDAAPI tcuMemHostAlloc(void **pp, size_t bytesize, unsigned int Flags);
-typedef CUresult CUDAAPI tcuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, unsigned int Flags);
-typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned int *pFlags, void *p);
-typedef CUresult CUDAAPI tcuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
-typedef CUresult CUDAAPI tcuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
-typedef CUresult CUDAAPI tcuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
-typedef CUresult CUDAAPI tcuMemcpyDtoA(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount);
-typedef CUresult CUDAAPI tcuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount);
-typedef CUresult CUDAAPI tcuMemcpyHtoA(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount);
-typedef CUresult CUDAAPI tcuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount);
-typedef CUresult CUDAAPI tcuMemcpyAtoA(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount);
-typedef CUresult CUDAAPI tcuMemcpy2D(const CUDA_MEMCPY2D *pCopy);
-typedef CUresult CUDAAPI tcuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy);
-typedef CUresult CUDAAPI tcuMemcpy3D(const CUDA_MEMCPY3D *pCopy);
-typedef CUresult CUDAAPI tcuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N);
-typedef CUresult CUDAAPI tcuMemsetD16(CUdeviceptr dstDevice, unsigned short us, size_t N);
-typedef CUresult CUDAAPI tcuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N);
-typedef CUresult CUDAAPI tcuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height);
-typedef CUresult CUDAAPI tcuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height);
-typedef CUresult CUDAAPI tcuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height);
-typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream);
-typedef CUresult CUDAAPI tcuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream);
-typedef CUresult CUDAAPI tcuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTOR *pAllocateArray);
-typedef CUresult CUDAAPI tcuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
-typedef CUresult CUDAAPI tcuArrayDestroy(CUarray hArray);
-typedef CUresult CUDAAPI tcuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray);
-typedef CUresult CUDAAPI tcuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
-typedef CUresult CUDAAPI tcuStreamCreate(CUstream *phStream, unsigned int Flags);
-typedef CUresult CUDAAPI tcuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags);
-typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream);
-typedef CUresult CUDAAPI tcuStreamSynchronize(CUstream hStream);
-typedef CUresult CUDAAPI tcuStreamDestroy(CUstream hStream);
-typedef CUresult CUDAAPI tcuEventCreate(CUevent *phEvent, unsigned int Flags);
-typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream);
-typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent);
-typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent);
-typedef CUresult CUDAAPI tcuEventDestroy(CUevent hEvent);
-typedef CUresult CUDAAPI tcuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
-typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
-typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
-typedef CUresult CUDAAPI tcuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunction hfunc);
-typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
-typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned int numbytes);
-typedef CUresult CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned int value);
-typedef CUresult CUDAAPI tcuParamSetf(CUfunction hfunc, int offset, float value);
-typedef CUresult CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void *ptr, unsigned int numbytes);
-typedef CUresult CUDAAPI tcuLaunch(CUfunction f);
-typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height);
-typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
-typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
-typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags);
-typedef CUresult CUDAAPI tcuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
-typedef CUresult CUDAAPI tcuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch);
-typedef CUresult CUDAAPI tcuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents);
-typedef CUresult CUDAAPI tcuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am);
-typedef CUresult CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm);
-typedef CUresult CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags);
-typedef CUresult CUDAAPI tcuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef);
-typedef CUresult CUDAAPI tcuTexRefGetArray(CUarray *phArray, CUtexref hTexRef);
-typedef CUresult CUDAAPI tcuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexRef, int dim);
-typedef CUresult CUDAAPI tcuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef);
-typedef CUresult CUDAAPI tcuTexRefGetFormat(CUarray_format *pFormat, int *pNumChannels, CUtexref hTexRef);
-typedef CUresult CUDAAPI tcuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef);
-typedef CUresult CUDAAPI tcuTexRefCreate(CUtexref *pTexRef);
-typedef CUresult CUDAAPI tcuTexRefDestroy(CUtexref hTexRef);
-typedef CUresult CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags);
-typedef CUresult CUDAAPI tcuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef);
-typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource);
-typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
-typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource);
-typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned int flags);
-typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
-typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
-typedef CUresult CUDAAPI tcuGetExportTable(const void **ppExportTable, const CUuuid *pExportTableId);
-typedef CUresult CUDAAPI tcuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevice device );
-typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource *pCudaResource, GLuint buffer, unsigned int Flags);
-typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource *pCudaResource, GLuint image, GLenum target, unsigned int Flags);
-typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx);
-typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned gridDimX, unsigned gridDimY, unsigned gridDimZ, unsigned blockDimX, unsigned blockDimY, unsigned blockDimZ, unsigned sharedMemBytes, CUstream hStream, void* kernelParams, void* extra);
-
-/* function declarations */
-
-extern tcuInit *cuInit;
-extern tcuDriverGetVersion *cuDriverGetVersion;
-extern tcuDeviceGet *cuDeviceGet;
-extern tcuDeviceGetCount *cuDeviceGetCount;
-extern tcuDeviceGetName *cuDeviceGetName;
-extern tcuDeviceComputeCapability *cuDeviceComputeCapability;
-extern tcuDeviceTotalMem *cuDeviceTotalMem;
-extern tcuDeviceGetProperties *cuDeviceGetProperties;
-extern tcuDeviceGetAttribute *cuDeviceGetAttribute;
-extern tcuCtxCreate *cuCtxCreate;
-extern tcuCtxDestroy *cuCtxDestroy;
-extern tcuCtxAttach *cuCtxAttach;
-extern tcuCtxDetach *cuCtxDetach;
-extern tcuCtxPushCurrent *cuCtxPushCurrent;
-extern tcuCtxPopCurrent *cuCtxPopCurrent;
-extern tcuCtxGetDevice *cuCtxGetDevice;
-extern tcuCtxSynchronize *cuCtxSynchronize;
-extern tcuModuleLoad *cuModuleLoad;
-extern tcuModuleLoadData *cuModuleLoadData;
-extern tcuModuleLoadDataEx *cuModuleLoadDataEx;
-extern tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
-extern tcuModuleUnload *cuModuleUnload;
-extern tcuModuleGetFunction *cuModuleGetFunction;
-extern tcuModuleGetGlobal *cuModuleGetGlobal;
-extern tcuModuleGetTexRef *cuModuleGetTexRef;
-extern tcuModuleGetSurfRef *cuModuleGetSurfRef;
-extern tcuMemGetInfo *cuMemGetInfo;
-extern tcuMemAlloc *cuMemAlloc;
-extern tcuMemAllocPitch *cuMemAllocPitch;
-extern tcuMemFree *cuMemFree;
-extern tcuMemGetAddressRange *cuMemGetAddressRange;
-extern tcuMemAllocHost *cuMemAllocHost;
-extern tcuMemFreeHost *cuMemFreeHost;
-extern tcuMemHostAlloc *cuMemHostAlloc;
-extern tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer;
-extern tcuMemHostGetFlags *cuMemHostGetFlags;
-extern tcuMemcpyHtoD *cuMemcpyHtoD;
-extern tcuMemcpyDtoH *cuMemcpyDtoH;
-extern tcuMemcpyDtoD *cuMemcpyDtoD;
-extern tcuMemcpyDtoA *cuMemcpyDtoA;
-extern tcuMemcpyAtoD *cuMemcpyAtoD;
-extern tcuMemcpyHtoA *cuMemcpyHtoA;
-extern tcuMemcpyAtoH *cuMemcpyAtoH;
-extern tcuMemcpyAtoA *cuMemcpyAtoA;
-extern tcuMemcpy2D *cuMemcpy2D;
-extern tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned;
-extern tcuMemcpy3D *cuMemcpy3D;
-extern tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync;
-extern tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync;
-extern tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync;
-extern tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync;
-extern tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync;
-extern tcuMemcpy2DAsync *cuMemcpy2DAsync;
-extern tcuMemcpy3DAsync *cuMemcpy3DAsync;
-extern tcuMemsetD8 *cuMemsetD8;
-extern tcuMemsetD16 *cuMemsetD16;
-extern tcuMemsetD32 *cuMemsetD32;
-extern tcuMemsetD2D8 *cuMemsetD2D8;
-extern tcuMemsetD2D16 *cuMemsetD2D16;
-extern tcuMemsetD2D32 *cuMemsetD2D32;
-extern tcuFuncSetBlockShape *cuFuncSetBlockShape;
-extern tcuFuncSetSharedSize *cuFuncSetSharedSize;
-extern tcuFuncGetAttribute *cuFuncGetAttribute;
-extern tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
-extern tcuArrayCreate *cuArrayCreate;
-extern tcuArrayGetDescriptor *cuArrayGetDescriptor;
-extern tcuArrayDestroy *cuArrayDestroy;
-extern tcuArray3DCreate *cuArray3DCreate;
-extern tcuArray3DGetDescriptor *cuArray3DGetDescriptor;
-extern tcuTexRefCreate *cuTexRefCreate;
-extern tcuTexRefDestroy *cuTexRefDestroy;
-extern tcuTexRefSetArray *cuTexRefSetArray;
-extern tcuTexRefSetAddress *cuTexRefSetAddress;
-extern tcuTexRefSetAddress2D *cuTexRefSetAddress2D;
-extern tcuTexRefSetFormat *cuTexRefSetFormat;
-extern tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
-extern tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
-extern tcuTexRefSetFlags *cuTexRefSetFlags;
-extern tcuTexRefGetAddress *cuTexRefGetAddress;
-extern tcuTexRefGetArray *cuTexRefGetArray;
-extern tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
-extern tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
-extern tcuTexRefGetFormat *cuTexRefGetFormat;
-extern tcuTexRefGetFlags *cuTexRefGetFlags;
-extern tcuSurfRefSetArray *cuSurfRefSetArray;
-extern tcuSurfRefGetArray *cuSurfRefGetArray;
-extern tcuParamSetSize *cuParamSetSize;
-extern tcuParamSeti *cuParamSeti;
-extern tcuParamSetf *cuParamSetf;
-extern tcuParamSetv *cuParamSetv;
-extern tcuParamSetTexRef *cuParamSetTexRef;
-extern tcuLaunch *cuLaunch;
-extern tcuLaunchGrid *cuLaunchGrid;
-extern tcuLaunchGridAsync *cuLaunchGridAsync;
-extern tcuEventCreate *cuEventCreate;
-extern tcuEventRecord *cuEventRecord;
-extern tcuEventQuery *cuEventQuery;
-extern tcuEventSynchronize *cuEventSynchronize;
-extern tcuEventDestroy *cuEventDestroy;
-extern tcuEventElapsedTime *cuEventElapsedTime;
-extern tcuStreamCreate *cuStreamCreate;
-extern tcuStreamQuery *cuStreamQuery;
-extern tcuStreamSynchronize *cuStreamSynchronize;
-extern tcuStreamDestroy *cuStreamDestroy;
-extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
-extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
-extern tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer;
-extern tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
-extern tcuGraphicsMapResources *cuGraphicsMapResources;
-extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
-extern tcuGetExportTable *cuGetExportTable;
-extern tcuCtxSetLimit *cuCtxSetLimit;
-extern tcuCtxGetLimit *cuCtxGetLimit;
-extern tcuGLCtxCreate *cuGLCtxCreate;
-extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
-extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
-extern tcuCtxSetCurrent *cuCtxSetCurrent;
-extern tcuLaunchKernel *cuLaunchKernel;
-
-#endif /* __UTIL_CUDA_H__ */
-
diff --git a/intern/cycles/util/util_opencl.cpp b/intern/cycles/util/util_opencl.cpp
deleted file mode 100644 (file)
index c2d6bc6..0000000
+++ /dev/null
@@ -1,337 +0,0 @@
-//////////////////////////////////////////////////////////////////////////
-//  Copyright (c) 2009 Organic Vectory B.V.
-//  Written by George van Venrooij
-//
-//  Distributed under the Boost Software License, Version 1.0.
-//  (See accompanying file doc/license/Boost.txt)
-//  Extracted from the CLCC project - http://clcc.sourceforge.net/
-//////////////////////////////////////////////////////////////////////////
-
-#include <stdlib.h>
-
-#include "util_opencl.h"
-
-#ifndef CLCC_GENERATE_DOCUMENTATION
-#ifdef _WIN32
-#  define WIN32_LEAN_AND_MEAN
-#  define VC_EXTRALEAN
-#  include <windows.h>
-
-   typedef HMODULE             CLCC_DYNLIB_HANDLE;
-
-#  define CLCC_DYNLIB_OPEN    LoadLibrary
-#  define CLCC_DYNLIB_CLOSE   FreeLibrary
-#  define CLCC_DYNLIB_IMPORT  GetProcAddress
-#else
-#  include <dlfcn.h>
-
-   typedef void*                   CLCC_DYNLIB_HANDLE;
-
-#  define CLCC_DYNLIB_OPEN(path)  dlopen(path, RTLD_NOW | RTLD_GLOBAL)
-#  define CLCC_DYNLIB_CLOSE       dlclose
-#  define CLCC_DYNLIB_IMPORT      dlsym
-#endif
-#else
-//  typedef implementation_defined  CLCC_DYNLIB_HANDLE;
-//#  define CLCC_DYNLIB_OPEN(path)  implementation_defined
-//#  define CLCC_DYNLIB_CLOSE       implementation_defined
-//#  define CLCC_DYNLIB_IMPORT      implementation_defined
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-//! \brief module handle
-static CLCC_DYNLIB_HANDLE module = NULL;
-
-//  Variables holding function entry points
-#ifndef CLCC_GENERATE_DOCUMENTATION
-PFNCLGETPLATFORMIDS                 __clewGetPlatformIDs                = NULL;
-PFNCLGETPLATFORMINFO                __clewGetPlatformInfo               = NULL;
-PFNCLGETDEVICEIDS                   __clewGetDeviceIDs                  = NULL;
-PFNCLGETDEVICEINFO                  __clewGetDeviceInfo                 = NULL;
-PFNCLCREATECONTEXT                  __clewCreateContext                 = NULL;
-PFNCLCREATECONTEXTFROMTYPE          __clewCreateContextFromType         = NULL;
-PFNCLRETAINCONTEXT                  __clewRetainContext                 = NULL;
-PFNCLRELEASECONTEXT                 __clewReleaseContext                = NULL;
-PFNCLGETCONTEXTINFO                 __clewGetContextInfo                = NULL;
-PFNCLCREATECOMMANDQUEUE             __clewCreateCommandQueue            = NULL;
-PFNCLRETAINCOMMANDQUEUE             __clewRetainCommandQueue            = NULL;
-PFNCLRELEASECOMMANDQUEUE            __clewReleaseCommandQueue           = NULL;
-PFNCLGETCOMMANDQUEUEINFO            __clewGetCommandQueueInfo           = NULL;
-PFNCLSETCOMMANDQUEUEPROPERTY        __clewSetCommandQueueProperty       = NULL;
-PFNCLCREATEBUFFER                   __clewCreateBuffer                  = NULL;
-PFNCLCREATEIMAGE2D                  __clewCreateImage2D                 = NULL;
-PFNCLCREATEIMAGE3D                  __clewCreateImage3D                 = NULL;
-PFNCLRETAINMEMOBJECT                __clewRetainMemObject               = NULL;
-PFNCLRELEASEMEMOBJECT               __clewReleaseMemObject              = NULL;
-PFNCLGETSUPPORTEDIMAGEFORMATS       __clewGetSupportedImageFormats      = NULL;
-PFNCLGETMEMOBJECTINFO               __clewGetMemObjectInfo              = NULL;
-PFNCLGETIMAGEINFO                   __clewGetImageInfo                  = NULL;
-PFNCLCREATESAMPLER                  __clewCreateSampler                 = NULL;
-PFNCLRETAINSAMPLER                  __clewRetainSampler                 = NULL;
-PFNCLRELEASESAMPLER                 __clewReleaseSampler                = NULL;
-PFNCLGETSAMPLERINFO                 __clewGetSamplerInfo                = NULL;
-PFNCLCREATEPROGRAMWITHSOURCE        __clewCreateProgramWithSource       = NULL;
-PFNCLCREATEPROGRAMWITHBINARY        __clewCreateProgramWithBinary       = NULL;
-PFNCLRETAINPROGRAM                  __clewRetainProgram                 = NULL;
-PFNCLRELEASEPROGRAM                 __clewReleaseProgram                = NULL;
-PFNCLBUILDPROGRAM                   __clewBuildProgram                  = NULL;
-PFNCLUNLOADCOMPILER                 __clewUnloadCompiler                = NULL;
-PFNCLGETPROGRAMINFO                 __clewGetProgramInfo                = NULL;
-PFNCLGETPROGRAMBUILDINFO            __clewGetProgramBuildInfo           = NULL;
-PFNCLCREATEKERNEL                   __clewCreateKernel                  = NULL;
-PFNCLCREATEKERNELSINPROGRAM         __clewCreateKernelsInProgram        = NULL;
-PFNCLRETAINKERNEL                   __clewRetainKernel                  = NULL;
-PFNCLRELEASEKERNEL                  __clewReleaseKernel                 = NULL;
-PFNCLSETKERNELARG                   __clewSetKernelArg                  = NULL;
-PFNCLGETKERNELINFO                  __clewGetKernelInfo                 = NULL;
-PFNCLGETKERNELWORKGROUPINFO         __clewGetKernelWorkGroupInfo        = NULL;
-PFNCLWAITFOREVENTS                  __clewWaitForEvents                 = NULL;
-PFNCLGETEVENTINFO                   __clewGetEventInfo                  = NULL;
-PFNCLRETAINEVENT                    __clewRetainEvent                   = NULL;
-PFNCLRELEASEEVENT                   __clewReleaseEvent                  = NULL;
-PFNCLGETEVENTPROFILINGINFO          __clewGetEventProfilingInfo         = NULL;
-PFNCLFLUSH                          __clewFlush                         = NULL;
-PFNCLFINISH                         __clewFinish                        = NULL;
-PFNCLENQUEUEREADBUFFER              __clewEnqueueReadBuffer             = NULL;
-PFNCLENQUEUEWRITEBUFFER             __clewEnqueueWriteBuffer            = NULL;
-PFNCLENQUEUECOPYBUFFER              __clewEnqueueCopyBuffer             = NULL;
-PFNCLENQUEUEREADIMAGE               __clewEnqueueReadImage              = NULL;
-PFNCLENQUEUEWRITEIMAGE              __clewEnqueueWriteImage             = NULL;
-PFNCLENQUEUECOPYIMAGE               __clewEnqueueCopyImage              = NULL;
-PFNCLENQUEUECOPYIMAGETOBUFFER       __clewEnqueueCopyImageToBuffer      = NULL;
-PFNCLENQUEUECOPYBUFFERTOIMAGE       __clewEnqueueCopyBufferToImage      = NULL;
-PFNCLENQUEUEMAPBUFFER               __clewEnqueueMapBuffer              = NULL;
-PFNCLENQUEUEMAPIMAGE                __clewEnqueueMapImage               = NULL;
-PFNCLENQUEUEUNMAPMEMOBJECT          __clewEnqueueUnmapMemObject         = NULL;
-PFNCLENQUEUENDRANGEKERNEL           __clewEnqueueNDRangeKernel          = NULL;
-PFNCLENQUEUETASK                    __clewEnqueueTask                   = NULL;
-PFNCLENQUEUENATIVEKERNEL            __clewEnqueueNativeKernel           = NULL;
-PFNCLENQUEUEMARKER                  __clewEnqueueMarker                 = NULL;
-PFNCLENQUEUEWAITFOREVENTS           __clewEnqueueWaitForEvents          = NULL;
-PFNCLENQUEUEBARRIER                 __clewEnqueueBarrier                = NULL;
-PFNCLGETEXTENSIONFUNCTIONADDRESS    __clewGetExtensionFunctionAddress   = NULL;
-#endif  //  CLCC_GENERATE_DOCUMENTATION
-
-
-#if 0
-//! \brief Unloads OpenCL dynamic library, should not be called directly
-static void clewExit(void)
-{
-       if (module != NULL)
-       {
-               //  Ignore errors
-               CLCC_DYNLIB_CLOSE(module);
-               module = NULL;
-       }
-}
-#endif
-
-//! \param path path to dynamic library to load
-//! \return CLEW_ERROR_OPEN_FAILED if the library could not be opened
-//! CLEW_ERROR_ATEXIT_FAILED if atexit(clewExit) failed
-//! CLEW_SUCCESS when the library was succesfully loaded
-int clLibraryInit()
-{
-#ifdef _WIN32
-       const char *path = "OpenCL.dll";
-#elif defined(__APPLE__)
-       const char *path = "/Library/Frameworks/OpenCL.framework/OpenCL";
-#else
-       const char *path = "libOpenCL.so";
-#endif
-
-       // OpenCL disabled for now, only works with this environment variable set
-       if(!getenv("CYCLES_OPENCL_TEST"))
-               return 0;
-
-       //  Check if already initialized
-       if (module != NULL)
-       {
-               return 1;
-       }
-
-       //  Load library
-       module = CLCC_DYNLIB_OPEN(path);
-
-       //  Check for errors
-       if (module == NULL)
-       {
-               return 0;
-       }
-
-       // Disabled because we retain OpenCL context and it's difficult to ensure
-       // this will exit after releasing the context
-#if 0
-       //  Set unloading
-       int error = atexit(clewExit);
-
-       if (error)
-       {
-               //  Failure queing atexit, shutdown with error
-               CLCC_DYNLIB_CLOSE(module);
-               module = NULL;
-
-               return 0;
-       }
-#endif
-
-       //  Determine function entry-points
-       __clewGetPlatformIDs                = (PFNCLGETPLATFORMIDS              )CLCC_DYNLIB_IMPORT(module, "clGetPlatformIDs");
-       __clewGetPlatformInfo               = (PFNCLGETPLATFORMINFO             )CLCC_DYNLIB_IMPORT(module, "clGetPlatformInfo");
-       __clewGetDeviceIDs                  = (PFNCLGETDEVICEIDS                )CLCC_DYNLIB_IMPORT(module, "clGetDeviceIDs");
-       __clewGetDeviceInfo                 = (PFNCLGETDEVICEINFO               )CLCC_DYNLIB_IMPORT(module, "clGetDeviceInfo");
-       __clewCreateContext                 = (PFNCLCREATECONTEXT               )CLCC_DYNLIB_IMPORT(module, "clCreateContext");
-       __clewCreateContextFromType         = (PFNCLCREATECONTEXTFROMTYPE       )CLCC_DYNLIB_IMPORT(module, "clCreateContextFromType");
-       __clewRetainContext                 = (PFNCLRETAINCONTEXT               )CLCC_DYNLIB_IMPORT(module, "clRetainContext");
-       __clewReleaseContext                = (PFNCLRELEASECONTEXT              )CLCC_DYNLIB_IMPORT(module, "clReleaseContext");
-       __clewGetContextInfo                = (PFNCLGETCONTEXTINFO              )CLCC_DYNLIB_IMPORT(module, "clGetContextInfo");
-       __clewCreateCommandQueue            = (PFNCLCREATECOMMANDQUEUE          )CLCC_DYNLIB_IMPORT(module, "clCreateCommandQueue");
-       __clewRetainCommandQueue            = (PFNCLRETAINCOMMANDQUEUE          )CLCC_DYNLIB_IMPORT(module, "clRetainCommandQueue");
-       __clewReleaseCommandQueue           = (PFNCLRELEASECOMMANDQUEUE         )CLCC_DYNLIB_IMPORT(module, "clReleaseCommandQueue");
-       __clewGetCommandQueueInfo           = (PFNCLGETCOMMANDQUEUEINFO         )CLCC_DYNLIB_IMPORT(module, "clGetCommandQueueInfo");
-       __clewSetCommandQueueProperty       = (PFNCLSETCOMMANDQUEUEPROPERTY     )CLCC_DYNLIB_IMPORT(module, "clSetCommandQueueProperty");
-       __clewCreateBuffer                  = (PFNCLCREATEBUFFER                )CLCC_DYNLIB_IMPORT(module, "clCreateBuffer");
-       __clewCreateImage2D                 = (PFNCLCREATEIMAGE2D               )CLCC_DYNLIB_IMPORT(module, "clCreateImage2D");
-       __clewCreateImage3D                 = (PFNCLCREATEIMAGE3D               )CLCC_DYNLIB_IMPORT(module, "clCreateImage3D");
-       __clewRetainMemObject               = (PFNCLRETAINMEMOBJECT             )CLCC_DYNLIB_IMPORT(module, "clRetainMemObject");
-       __clewReleaseMemObject              = (PFNCLRELEASEMEMOBJECT            )CLCC_DYNLIB_IMPORT(module, "clReleaseMemObject");
-       __clewGetSupportedImageFormats      = (PFNCLGETSUPPORTEDIMAGEFORMATS    )CLCC_DYNLIB_IMPORT(module, "clGetSupportedImageFormats");
-       __clewGetMemObjectInfo              = (PFNCLGETMEMOBJECTINFO            )CLCC_DYNLIB_IMPORT(module, "clGetMemObjectInfo");
-       __clewGetImageInfo                  = (PFNCLGETIMAGEINFO                )CLCC_DYNLIB_IMPORT(module, "clGetImageInfo");
-       __clewCreateSampler                 = (PFNCLCREATESAMPLER               )CLCC_DYNLIB_IMPORT(module, "clCreateSampler");
-       __clewRetainSampler                 = (PFNCLRETAINSAMPLER               )CLCC_DYNLIB_IMPORT(module, "clRetainSampler");
-       __clewReleaseSampler                = (PFNCLRELEASESAMPLER              )CLCC_DYNLIB_IMPORT(module, "clReleaseSampler");
-       __clewGetSamplerInfo                = (PFNCLGETSAMPLERINFO              )CLCC_DYNLIB_IMPORT(module, "clGetSamplerInfo");
-       __clewCreateProgramWithSource       = (PFNCLCREATEPROGRAMWITHSOURCE     )CLCC_DYNLIB_IMPORT(module, "clCreateProgramWithSource");
-       __clewCreateProgramWithBinary       = (PFNCLCREATEPROGRAMWITHBINARY     )CLCC_DYNLIB_IMPORT(module, "clCreateProgramWithBinary");
-       __clewRetainProgram                 = (PFNCLRETAINPROGRAM               )CLCC_DYNLIB_IMPORT(module, "clRetainProgram");
-       __clewReleaseProgram                = (PFNCLRELEASEPROGRAM              )CLCC_DYNLIB_IMPORT(module, "clReleaseProgram");
-       __clewBuildProgram                  = (PFNCLBUILDPROGRAM                )CLCC_DYNLIB_IMPORT(module, "clBuildProgram");
-       __clewUnloadCompiler                = (PFNCLUNLOADCOMPILER              )CLCC_DYNLIB_IMPORT(module, "clUnloadCompiler");
-       __clewGetProgramInfo                = (PFNCLGETPROGRAMINFO              )CLCC_DYNLIB_IMPORT(module, "clGetProgramInfo");
-       __clewGetProgramBuildInfo           = (PFNCLGETPROGRAMBUILDINFO         )CLCC_DYNLIB_IMPORT(module, "clGetProgramBuildInfo");
-       __clewCreateKernel                  = (PFNCLCREATEKERNEL                )CLCC_DYNLIB_IMPORT(module, "clCreateKernel");
-       __clewCreateKernelsInProgram        = (PFNCLCREATEKERNELSINPROGRAM      )CLCC_DYNLIB_IMPORT(module, "clCreateKernelsInProgram");
-       __clewRetainKernel                  = (PFNCLRETAINKERNEL                )CLCC_DYNLIB_IMPORT(module, "clRetainKernel");
-       __clewReleaseKernel                 = (PFNCLRELEASEKERNEL               )CLCC_DYNLIB_IMPORT(module, "clReleaseKernel");
-       __clewSetKernelArg                  = (PFNCLSETKERNELARG                )CLCC_DYNLIB_IMPORT(module, "clSetKernelArg");
-       __clewGetKernelInfo                 = (PFNCLGETKERNELINFO               )CLCC_DYNLIB_IMPORT(module, "clGetKernelInfo");
-       __clewGetKernelWorkGroupInfo        = (PFNCLGETKERNELWORKGROUPINFO      )CLCC_DYNLIB_IMPORT(module, "clGetKernelWorkGroupInfo");
-       __clewWaitForEvents                 = (PFNCLWAITFOREVENTS               )CLCC_DYNLIB_IMPORT(module, "clWaitForEvents");
-       __clewGetEventInfo                  = (PFNCLGETEVENTINFO                )CLCC_DYNLIB_IMPORT(module, "clGetEventInfo");
-       __clewRetainEvent                   = (PFNCLRETAINEVENT                 )CLCC_DYNLIB_IMPORT(module, "clRetainEvent");
-       __clewReleaseEvent                  = (PFNCLRELEASEEVENT                )CLCC_DYNLIB_IMPORT(module, "clReleaseEvent");
-       __clewGetEventProfilingInfo         = (PFNCLGETEVENTPROFILINGINFO       )CLCC_DYNLIB_IMPORT(module, "clGetEventProfilingInfo");
-       __clewFlush                         = (PFNCLFLUSH                       )CLCC_DYNLIB_IMPORT(module, "clFlush");
-       __clewFinish                        = (PFNCLFINISH                      )CLCC_DYNLIB_IMPORT(module, "clFinish");
-       __clewEnqueueReadBuffer             = (PFNCLENQUEUEREADBUFFER           )CLCC_DYNLIB_IMPORT(module, "clEnqueueReadBuffer");
-       __clewEnqueueWriteBuffer            = (PFNCLENQUEUEWRITEBUFFER          )CLCC_DYNLIB_IMPORT(module, "clEnqueueWriteBuffer");
-       __clewEnqueueCopyBuffer             = (PFNCLENQUEUECOPYBUFFER           )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyBuffer");
-       __clewEnqueueReadImage              = (PFNCLENQUEUEREADIMAGE            )CLCC_DYNLIB_IMPORT(module, "clEnqueueReadImage");
-       __clewEnqueueWriteImage             = (PFNCLENQUEUEWRITEIMAGE           )CLCC_DYNLIB_IMPORT(module, "clEnqueueWriteImage");