Cycles / CUDA:
authorThomas Dinges <blender@dingto.org>
Tue, 8 Oct 2013 15:29:28 +0000 (15:29 +0000)
committerThomas Dinges <blender@dingto.org>
Tue, 8 Oct 2013 15:29:28 +0000 (15:29 +0000)
* Remove support for  CUDA Toolkit 4.x, only Toolkit 5.0 and above are supported now.
* Remove support for sm_1x cards (< Fermi) for good. We didn't officially support those cards for a few releases already, now remove some special code that was still there.

intern/cycles/device/device_cuda.cpp
intern/cycles/kernel/CMakeLists.txt
intern/cycles/kernel/SConscript
intern/cycles/kernel/kernel_shader.h

index 5440bd91987eda9368a4a8aba55a8cf97275e4f9..4ce7f6fd72979fe2e1d730bf32aa2436d076d0d8 100644 (file)
@@ -227,14 +227,12 @@ public:
 
        bool support_device(bool experimental)
        {
-               if(!experimental) {
-                       int major, minor;
-                       cuDeviceComputeCapability(&major, &minor, cuDevId);
+               int major, minor;
+               cuDeviceComputeCapability(&major, &minor, cuDevId);
 
-                       if(major < 2) {
-                               cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor));
-                               return false;
-                       }
+               if(major < 2) {
+                       cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor));
+                       return false;
                }
 
                return true;
@@ -286,8 +284,12 @@ public:
                        cuda_error_message("CUDA nvcc compiler version could not be parsed.");
                        return "";
                }
+               if(cuda_version < 50) {
+                       printf("Unsupported CUDA version %d.%d detected, you need CUDA 5.0.\n", cuda_version/10, cuda_version%10);
+                       return "";
+               }
 
-               if(cuda_version != 50)
+               else if(cuda_version > 50)
                        printf("CUDA version %d.%d detected, build may succeed but only CUDA 5.0 is officially supported.\n", cuda_version/10, cuda_version%10);
 
                /* compile */
@@ -296,36 +298,14 @@ public:
                const int machine = system_cpu_bits();
                string arch_flags;
 
-               /* build flags depending on CUDA version and arch */
-               if(cuda_version < 50) {
-                       /* CUDA 4.x */
-                       if(major == 1) {
-                               /* sm_1x */
-                               arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0";
-                       }
-                       else if(major == 2) {
-                               /* sm_2x */
-                               arch_flags = "--maxrregcount=24";
-                       }
-                       else {
-                               /* sm_3x */
-                               arch_flags = "--maxrregcount=32";
-                       }
+               /* CUDA 5.x build flags for different archs */
+               if(major == 2) {
+                       /* sm_2x */
+                       arch_flags = "--maxrregcount=32 --use_fast_math";
                }
-               else {
-                       /* CUDA 5.x */
-                       if(major == 1) {
-                               /* sm_1x */
-                               arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math";
-                       }
-                       else if(major == 2) {
-                               /* sm_2x */
-                               arch_flags = "--maxrregcount=32 --use_fast_math";
-                       }
-                       else {
-                               /* sm_3x */
-                               arch_flags = "--maxrregcount=32 --use_fast_math";
-                       }
+               else if(major == 3) {
+                       /* sm_3x */
+                       arch_flags = "--maxrregcount=32 --use_fast_math";
                }
 
                double starttime = time_dt();
index eaa4e304ebb99102baf1c9b69750e77473f2b9cc..56ba0e0874342b4898f4dd471579ffcafc8a4edb 100644 (file)
@@ -151,36 +151,16 @@ if(WITH_CYCLES_CUDA_BINARIES)
 
                set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
 
-               # build flags depending on CUDA version and arch
-               if(CUDA_VERSION LESS 50)
-                       # CUDA 4.x
-                       if(${arch} MATCHES "sm_1[0-9]")
-                               # sm_1x
-                               set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0")
-                       elseif(${arch} MATCHES "sm_2[0-9]")
-                               # sm_2x
-                               set(cuda_arch_flags "--maxrregcount=24")
-                       else()
-                               # sm_3x
-                               set(cuda_arch_flags "--maxrregcount=32")
-                       endif()
-
-                       set(cuda_math_flags "")
-               else()
-                       # CUDA 5.x
-                       if(${arch} MATCHES "sm_1[0-9]")
-                               # sm_1x
-                               set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0")
-                       elseif(${arch} MATCHES "sm_2[0-9]")
-                               # sm_2x
-                               set(cuda_arch_flags "--maxrregcount=32")
-                       else()
-                               # sm_3x
-                               set(cuda_arch_flags "--maxrregcount=32")
-                       endif()
-
-                       set(cuda_math_flags "--use_fast_math")
+               # CUDA 5.x build flags for different archs
+               if(${arch} MATCHES "sm_2[0-9]")
+                       # sm_2x
+                       set(cuda_arch_flags "--maxrregcount=32")
+               elseif(${arch} MATCHES "sm_3[0-9]")
+                       # sm_3x
+                       set(cuda_arch_flags "--maxrregcount=32")
                endif()
+
+               set(cuda_math_flags "--use_fast_math")
                
                if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35")
                        message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping")
index 6459c3ed1831443b87e40f04cc7c689714738469..a0522d9ba8e520c218d6fd81187610e59abf3a41 100644 (file)
@@ -86,33 +86,13 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
     for arch in cuda_archs:
         cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch)
 
-               # build flags depending on CUDA version and arch
-        if cuda_version < 50:
-            if arch == "sm_35":
-                print("Can't build kernel for CUDA sm_35 architecture, skipping")
-                continue
-
-            # CUDA 4.x
-            if arch.startswith("sm_1"):
-                # sm_1x
-                cuda_arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0"
-            elif arch.startswith("sm_2"):
-                # sm_2x
-                cuda_arch_flags = "--maxrregcount=24"
-            else:
-                # sm_3x
-                cuda_arch_flags = "--maxrregcount=32"
-        else:
-            # CUDA 5.x
-            if arch.startswith("sm_1"):
-                # sm_1x
-                cuda_arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math"
-            elif arch.startswith("sm_2"):
-                # sm_2x
-                cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
-            else:
-                # sm_3x
-                cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
+        # CUDA 5.x build flags for different archs
+        if arch.startswith("sm_2"):
+            # sm_2x
+            cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
+        elif arch.startswith("sm_3"):
+            # sm_3x
+            cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
 
         command = "\"%s\" -arch=%s %s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, cuda_arch_flags, kernel_file, cubin_file)
 
index ee71a0cfcf4b06ad26007c87db081d546aa829c0..81630caed9a540b636e9ca1fda7bd25172706d9a 100644 (file)
@@ -36,15 +36,8 @@ CCL_NAMESPACE_BEGIN
 /* ShaderData setup from incoming ray */
 
 #ifdef __OBJECT_MOTION__
-#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
-__device_noinline
-#else
-__device
-#endif
-void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
+__device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
 {
-       /* note that this is a separate non-inlined function to work around crash
-        * on CUDA sm 2.0, otherwise kernel execution crashes (compiler bug?) */
        if(sd->flag & SD_OBJECT_MOTION) {
                sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
                sd->ob_itfm= transform_quick_inverse(sd->ob_tfm);
@@ -56,12 +49,7 @@ void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float tim
 }
 #endif
 
-#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
-__device_noinline
-#else
-__device
-#endif
-void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
+__device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
        const Intersection *isect, const Ray *ray, int bounce)
 {
 #ifdef __INSTANCING__
@@ -249,12 +237,7 @@ __device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData
 
 /* ShaderData setup from position sampled on mesh */
 
-#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
-__device_noinline
-#else
-__device
-#endif
-void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
+__device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
        const float3 P, const float3 Ng, const float3 I,
        int shader, int object, int prim, float u, float v, float t, float time, int bounce, int segment)
 {