Cycles: Add an option to build single kernel only which fits current CPU
authorSergey Sharybin <sergey.vfx@gmail.com>
Fri, 25 Mar 2016 15:09:05 +0000 (16:09 +0100)
committerSergey Sharybin <sergey.vfx@gmail.com>
Fri, 25 Mar 2016 15:09:05 +0000 (16:09 +0100)
This seems quite useful for the development, so you don't need to wait
all the kernels to be re-compiled when working on a new feature, which
speeds up re-iteration.

Marked as an advanced option, so if it doesn't work so well in practice
it's safe to revert anyway.

CMakeLists.txt
intern/cycles/CMakeLists.txt
intern/cycles/kernel/kernels/cpu/kernel.cpp
intern/cycles/util/util_simd.cpp

index 4010ee6416f632e32092332e3160b12ffd982d58..12f58706103b1edb4afaab94acce86f1a6165441 100644 (file)
@@ -390,8 +390,10 @@ mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
 unset(PLATFORM_DEFAULT)
 option(WITH_CYCLES_LOGGING     "Build Cycles with logging support" ON)
 option(WITH_CYCLES_DEBUG       "Build Cycles with extra debug capabilities" OFF)
+option(WITH_CYCLES_NATIVE_ONLY "Build Cycles with native kernel only (which fits current CPU, use for development only)" OFF)
 mark_as_advanced(WITH_CYCLES_LOGGING)
 mark_as_advanced(WITH_CYCLES_DEBUG)
+mark_as_advanced(WITH_CYCLES_NATIVE_ONLY)
 
 option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime" ON)
 mark_as_advanced(WITH_CUDA_DYNLOAD)
index 13b5de360d5c18f44585b14e8fdc1f0b877d4371..3b6c25c370e597121dd57601909dd2dcb93745d5 100644 (file)
@@ -14,7 +14,15 @@ include(cmake/external_libs.cmake)
 # todo: this code could be refactored a bit to avoid duplication
 # note: CXX_HAS_SSE is needed in case passing SSE flags fails altogether (gcc-arm)
 
-if(NOT WITH_CPU_SSE)
+if(WITH_CYCLES_NATIVE_ONLY)
+       set(CXX_HAS_SSE FALSE)
+       set(CXX_HAS_AVX FALSE)
+       set(CXX_HAS_AVX2 FALSE)
+       add_definitions(
+               -DWITH_KERNEL_NATIVE
+       )
+       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
+elseif(NOT WITH_CPU_SSE)
        set(CXX_HAS_SSE FALSE)
        set(CXX_HAS_AVX FALSE)
        set(CXX_HAS_AVX2 FALSE)
index 643eefcdc6c9f030eab42b05c67382758a735cf4..45091f6f33dde298a68fd1bd6b94312dfb7f0529 100644 (file)
 
 /* CPU kernel entry points */
 
-/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this one with SSE2 intrinsics */
+/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this
+ * one with SSE2 intrinsics.
+ */
 #if defined(__x86_64__) || defined(_M_X64)
 #  define __KERNEL_SSE2__
 #endif
 
+/* When building kernel for native machine detect kernel features from the flags
+ * set by compiler.
+ */
+#ifdef WITH_KERNEL_NATIVE
+#  ifdef __SSE2__
+#    ifndef __KERNEL_SSE2__
+#      define __KERNEL_SSE2__
+#    endif
+#  endif
+#  ifdef __SSE3__
+#    define __KERNEL_SSE3__
+#  endif
+#  ifdef __SSSE3__
+#    define __KERNEL_SSSE3__
+#  endif
+#  ifdef __SSE4_1__
+#    define __KERNEL_SSE41__
+#  endif
+#  ifdef __AVX__
+#    define __KERNEL_AVX__
+#  endif
+#  ifdef __AVX2__
+#    define __KERNEL_AVX2__
+#  endif
+#endif
+
 /* quiet unused define warnings */
 #if defined(__KERNEL_SSE2__)
     /* do nothing */
index eb9e32800e18473c3d58fc79fa77a787aebd8303..de2df6125782b06404e10ead3542cf76f5738844 100644 (file)
@@ -15,7 +15,8 @@
  * limitations under the License.
  */
 
-#ifdef WITH_KERNEL_SSE2
+#if (defined(WITH_KERNEL_SSE2)) || \
+    (defined(WITH_KERNEL_NATIVE) && defined(__SSE2__))
 
 #define __KERNEL_SSE2__
 #include "util_simd.h"