Cycles: update build configurations to include CUDA sm_35 architecture. When using
authorBrecht Van Lommel <brechtvanlommel@pandora.be>
Thu, 20 Jun 2013 13:10:47 +0000 (13:10 +0000)
committerBrecht Van Lommel <brechtvanlommel@pandora.be>
Thu, 20 Jun 2013 13:10:47 +0000 (13:10 +0000)
a compiler older than CUDA 5.0 it will give a warning and skip this architecture.

15 files changed:
CMakeLists.txt
build_files/buildbot/config/user-config-cuda-glibc211-i686.py
build_files/buildbot/config/user-config-cuda-glibc211-x86_64.py
build_files/buildbot/config/user-config-mac-i386.py
build_files/buildbot/config/user-config-mac-x86_64.py
build_files/scons/config/darwin-config.py
build_files/scons/config/linux-config.py
build_files/scons/config/win32-mingw-config.py
build_files/scons/config/win32-vc-config.py
build_files/scons/config/win64-mingw-config.py
build_files/scons/config/win64-vc-config.py
intern/cycles/device/device_cuda.cpp
intern/cycles/kernel/CMakeLists.txt
intern/cycles/kernel/SConscript
intern/cycles/util/util_types.h

index e2dda8553d66fef080df3c489986f799bcf1ba80..acd01f43fcdc0f5ba8ab99956ea8f086d6510324 100644 (file)
@@ -264,7 +264,7 @@ option(WITH_CYCLES                                  "Enable cycles Render Engine" ON)
 option(WITH_CYCLES_TEST                                "Build cycles test application" OFF)
 option(WITH_CYCLES_OSL                         "Build Cycles with OSL support" OFF)
 option(WITH_CYCLES_CUDA_BINARIES       "Build cycles CUDA binaries" OFF)
-set(CYCLES_CUDA_BINARIES_ARCH sm_20 sm_21 sm_30 CACHE STRING "CUDA architectures to build binaries for")
+set(CYCLES_CUDA_BINARIES_ARCH sm_20 sm_21 sm_30 sm_35 CACHE STRING "CUDA architectures to build binaries for")
 mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
 unset(PLATFORM_DEFAULT)
 
index 0e56c4326f901e6d51c890f1fd31c6203e8e4be9..69053d7ff3948915b700595f7a81d42ae2d69eae 100644 (file)
@@ -2,4 +2,4 @@ BF_BUILDDIR = '../blender-build/linux-glibc211-i686'
 BF_INSTALLDIR = '../blender-install/linux-glibc211-i686'
 BF_NUMJOBS = 1
 
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
index 59725522e1868a1c56814d83b2c3a5f901f0bc2c..c9b765f55acaccfb4e3a04cc5e2f7d17968007b8 100644 (file)
@@ -2,4 +2,4 @@ BF_BUILDDIR = '../blender-build/linux-glibc211-x86_64'
 BF_INSTALLDIR = '../blender-install/linux-glibc211-x86_64'
 BF_NUMJOBS = 1
 
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
index e7b1b68810008a1951a0cf02b3e92c5dcc3a06da..60f8b5705ac3f90054537409419d8451a37495d7 100644 (file)
@@ -315,7 +315,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
 
 WITH_BF_CYCLES_CUDA_BINARIES = True
 BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
 
 #Freestyle
 WITH_BF_FREESTYLE = True
index f5f6bf84796344b967748743bc6bbea49d912c34..4fab8d6d5666fc5b29851920b57492f9417b7369 100644 (file)
@@ -315,7 +315,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
 
 WITH_BF_CYCLES_CUDA_BINARIES = True
 BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
 
 #Freestyle
 WITH_BF_FREESTYLE = True
index 0e4137112102093797858bf2320f306042275ff5..f7192e3dd8151922ee01d011543f09dcdc61c311 100644 (file)
@@ -315,7 +315,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
 
 WITH_BF_CYCLES_CUDA_BINARIES = False
 BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
 
 #Freestyle
 WITH_BF_FREESTYLE = True
index 418d2ef41b8e5643bfc0271937c523f8befc2b7b..6106142f8a98beb6a2a61c03c9ababea49764e4a 100644 (file)
@@ -210,7 +210,7 @@ WITH_BF_CYCLES = WITH_BF_OIIO and WITH_BF_BOOST
 
 WITH_BF_CYCLES_CUDA_BINARIES = False
 BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
 
 WITH_BF_OPENMP = True
 
index 5d3c285f5301bb53865b3a542fd2b020d0527f85..d71feb8d2e92d05eab19623087e59917772da7ff 100644 (file)
@@ -149,7 +149,7 @@ BF_OPENCOLLADA_LIBPATH = '${BF_OPENCOLLADA}/lib/opencollada'
 WITH_BF_CYCLES = True
 WITH_BF_CYCLES_CUDA_BINARIES = False
 BF_CYCLES_CUDA_NVCC = "" # Path to the NVIDIA CUDA compiler
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
 
 WITH_BF_OIIO = True
 BF_OIIO = LIBDIR + '/openimageio'
index bceab628b47fc3b8712606e0fe32a27d3b1a11b0..101d0402757e54101859503785063e4ab1903d4c 100644 (file)
@@ -215,7 +215,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
 #CUDA
 WITH_BF_CYCLES_CUDA_BINARIES = False
 #BF_CYCLES_CUDA_NVCC = "" # Path to the nvidia compiler
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
 
 #Ray trace optimization
 WITH_BF_RAYOPTIMIZATION = True
index b817f24da11573985615ab488865711a51bab644..e82b6e8a73f49d08a04f25cc7e6c271d8b39b95e 100644 (file)
@@ -146,7 +146,7 @@ BF_OPENCOLLADA_LIBPATH = '${BF_OPENCOLLADA}/lib/opencollada'
 WITH_BF_CYCLES = True
 WITH_BF_CYCLES_CUDA_BINARIES = False
 BF_CYCLES_CUDA_NVCC = "" # Path to the NVIDIA CUDA compiler
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
 
 WITH_BF_OIIO = True
 BF_OIIO = LIBDIR + '/openimageio'
index c7bf2fa0bd253da50286cfb0e69f72e514134e4e..803cac144aba919be10d7446b79739c51418d3ea 100644 (file)
@@ -212,7 +212,7 @@ BF_BOOST_LIBPATH = '${BF_BOOST}/lib'
 #CUDA
 WITH_BF_CYCLES_CUDA_BINARIES = False
 #BF_CYCLES_CUDA_NVCC = "" # Path to the nvidia compiler
-BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
+BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30', 'sm_35']
 
 #Ray trace optimization
 WITH_BF_RAYOPTIMIZATION = True
index 1f96ed0ae839dbf816701d1330bff9939c47e01d..27c54af515380aaa877ff5de0c5d7b55b19819de 100644 (file)
@@ -304,7 +304,7 @@ public:
                        }
                }
                else {
-                       /* CUDA 4.x */
+                       /* CUDA 5.x */
                        if(major == 1) {
                                /* sm_1x */
                                arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math";
index 5e9dd15b812e28b93fa1b0e1adb512b1344835ad..2fa1393e935581494784f92f97ac879882150047 100644 (file)
@@ -130,6 +130,12 @@ if(WITH_CYCLES_CUDA_BINARIES)
        string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT})
        set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
 
+       # warn for other versions
+       if(CUDA_VERSION MATCHES "50")
+       else()
+               message(WARNING "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, build may succeed but only CUDA 5.0 is officially supported")
+       endif()
+
        # build for each arch
        set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS})
        set(cuda_cubins)
@@ -139,12 +145,6 @@ if(WITH_CYCLES_CUDA_BINARIES)
 
                set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
 
-               # warn for other versions
-               if(CUDA_VERSION MATCHES "50")
-               else()
-                       message(STATUS "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, build may succeed but only CUDA 5.0 is officially supported")
-               endif()
-
                # build flags depending on CUDA version and arch
                if(CUDA_VERSION LESS 50)
                        # CUDA 4.x
@@ -176,13 +176,17 @@ if(WITH_CYCLES_CUDA_BINARIES)
                        set(cuda_math_flags "--use_fast_math")
                endif()
                
-               add_custom_command(
-                       OUTPUT ${cuda_cubin}
-                       COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" ${cuda_arch_flags} ${cuda_version_flags} ${cuda_math_flags} -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC
-                       DEPENDS ${cuda_sources})
+               if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35")
+                       message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping")
+               else()
+                       add_custom_command(
+                               OUTPUT ${cuda_cubin}
+                               COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" ${cuda_arch_flags} ${cuda_version_flags} ${cuda_math_flags} -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC
+                               DEPENDS ${cuda_sources})
 
-               delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
-               list(APPEND cuda_cubins ${cuda_cubin})
+                       delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
+                       list(APPEND cuda_cubins ${cuda_cubin})
+               endif()
        endforeach()
 
        add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
index 353ec1ce9d842f8da04dcfabc08558abd710a117..6459c3ed1831443b87e40f04cc7c689714738469 100644 (file)
@@ -88,6 +88,10 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
 
                # build flags depending on CUDA version and arch
         if cuda_version < 50:
+            if arch == "sm_35":
+                print("Can't build kernel for CUDA sm_35 architecture, skipping")
+                continue
+
             # CUDA 4.x
             if arch.startswith("sm_1"):
                 # sm_1x
index 4fd1e9d8807789157948c09ad318b91cd052f3ed..a67c55acf3d02bb202387718da35806e2ef1cb66 100644 (file)
@@ -517,7 +517,7 @@ __device_inline const __m128 shuffle_swap(const __m128& a, const shuffle_swap_t&
 
 #else
 
-/* somewhat slower version for SSE3 */
+/* somewhat slower version for SSE2 */
 typedef int shuffle_swap_t;
 
 __device_inline const shuffle_swap_t shuffle_swap_identity(void)