Cycles: Added Embree as BVH option for CPU renders.
authorStefan Werner <stefan.werner@tangent-animation.com>
Wed, 7 Nov 2018 11:58:12 +0000 (12:58 +0100)
committerStefan Werner <stefan.werner@tangent-animation.com>
Wed, 7 Nov 2018 11:58:12 +0000 (12:58 +0100)
Note that this is turned off by default and must be enabled at build time with the CMake WITH_CYCLES_EMBREE flag.
Embree must be built as a static library with ray masking turned on, the `make deps` scripts have been updated accordingly.
There, Embree is off by default too and must be enabled with the WITH_EMBREE flag.

Using Embree allows for much faster rendering of deformation motion blur while reducing the memory footprint.

TODO: GPU implementation, deduplication of data, leveraging more of Embrees features (e.g. tessellation cache).

Differential Revision: https://developer.blender.org/D3682

37 files changed:
CMakeLists.txt
build_files/build_environment/CMakeLists.txt
build_files/build_environment/cmake/embree.cmake [new file with mode: 0644]
build_files/build_environment/cmake/harvest.cmake
build_files/build_environment/cmake/options.cmake
build_files/build_environment/cmake/versions.cmake
build_files/cmake/Modules/FindEmbree.cmake [new file with mode: 0644]
build_files/cmake/macros.cmake
build_files/cmake/platform/platform_apple.cmake
build_files/cmake/platform/platform_unix.cmake
build_files/cmake/platform/platform_win32.cmake
build_files/cmake/platform/platform_win32_msvc.cmake [new file with mode: 0644]
intern/cycles/CMakeLists.txt
intern/cycles/app/CMakeLists.txt
intern/cycles/blender/addon/properties.py
intern/cycles/blender/addon/ui.py
intern/cycles/blender/blender_python.cpp
intern/cycles/blender/blender_sync.cpp
intern/cycles/bvh/CMakeLists.txt
intern/cycles/bvh/bvh.cpp
intern/cycles/bvh/bvh.h
intern/cycles/bvh/bvh_embree.cpp [new file with mode: 0644]
intern/cycles/bvh/bvh_embree.h [new file with mode: 0644]
intern/cycles/bvh/bvh_params.h
intern/cycles/cmake/external_libs.cmake
intern/cycles/device/device_cpu.cpp
intern/cycles/kernel/CMakeLists.txt
intern/cycles/kernel/bvh/bvh.h
intern/cycles/kernel/bvh/bvh_embree.h [new file with mode: 0644]
intern/cycles/kernel/geom/geom_curve_intersect.h
intern/cycles/kernel/geom/geom_object.h
intern/cycles/kernel/kernel_types.h
intern/cycles/render/mesh.cpp
intern/cycles/render/scene.cpp
intern/cycles/render/scene.h
intern/cycles/util/util_transform.h
source/blender/python/intern/CMakeLists.txt

index fc7f3ac..eb8df07 100644 (file)
@@ -405,6 +405,7 @@ option(WITH_CYCLES                                  "Enable Cycles Render Engine" ON)
 option(WITH_CYCLES_STANDALONE          "Build Cycles standalone application" OFF)
 option(WITH_CYCLES_STANDALONE_GUI      "Build Cycles standalone with GUI" OFF)
 option(WITH_CYCLES_OSL                         "Build Cycles with OSL support" ${_init_CYCLES_OSL})
+option(WITH_CYCLES_EMBREE                      "Build Cycles with Embree support" OFF)
 option(WITH_CYCLES_OPENSUBDIV          "Build Cycles with OpenSubdiv support" ${_init_CYCLES_OPENSUBDIV})
 option(WITH_CYCLES_CUDA_BINARIES       "Build Cycles CUDA binaries" OFF)
 option(WITH_CYCLES_CUBIN_COMPILER      "Build cubins with nvrtc based compiler instead of nvcc" OFF)
index 00e486b..8c1d38c 100644 (file)
@@ -97,6 +97,10 @@ if(WITH_WEBP)
        include(cmake/webp.cmake)
 endif()
 
+if(WITH_EMBREE)
+       include(cmake/embree.cmake)
+endif()
+
 if(WIN32)
        # HMD branch deps
        include(cmake/hidapi.cmake)
diff --git a/build_files/build_environment/cmake/embree.cmake b/build_files/build_environment/cmake/embree.cmake
new file mode 100644 (file)
index 0000000..34e5904
--- /dev/null
@@ -0,0 +1,46 @@
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# ***** END GPL LICENSE BLOCK *****
+
+# Note the utility apps may use png/tiff/gif system libraries, but the
+# library itself does not depend on them, so should give no problems.
+
+set(EMBREE_EXTRA_ARGS
+       -DEMBREE_ISPC_SUPPORT=OFF
+       -DEMBREE_TUTORIALS=OFF
+       -DEMBREE_STATIC_LIB=ON
+       -DEMBREE_RAY_MASK=ON
+       -DEMBREE_FILTER_FUNCTION=ON
+       -DEMBREE_BACKFACE_CULLING=OFF
+       -DEMBREE_TASKING_SYSTEM=INTERNAL
+       -DEMBREE_MAX_ISA=AVX2
+)
+
+if(WIN32)
+       set(EMBREE_BUILD_DIR ${BUILD_MODE}/)
+else()
+       set(EMBREE_BUILD_DIR)
+endif()
+
+ExternalProject_Add(external_embree
+       URL ${EMBREE_URI}
+       DOWNLOAD_DIR ${DOWNLOAD_DIR}
+       URL_HASH MD5=${EMBREE_HASH}
+       PREFIX ${BUILD_DIR}/embree
+       CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/embree ${DEFAULT_CMAKE_FLAGS} ${EMBREE_EXTRA_ARGS}
+       INSTALL_DIR ${LIBDIR}/embree
+)
index 4ec71bf..d0d8b4c 100644 (file)
@@ -57,7 +57,9 @@ if(BUILD_MODE STREQUAL Release)
                                # hidapi
                                ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/hidapi/ ${HARVEST_TARGET}/hidapi/ &&
                                # webp, straight up copy
-                               ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/webp ${HARVEST_TARGET}/webp
+                               ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/webp ${HARVEST_TARGET}/webp &&
+                               # embree
+                               ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/embree ${HARVEST_TARGET}/embree
                DEPENDS
        )
 endif()
@@ -191,5 +193,7 @@ harvest(vpx/lib ffmpeg/lib "*.a")
 harvest(webp/lib ffmpeg/lib "*.a")
 harvest(x264/lib ffmpeg/lib "*.a")
 harvest(xvidcore/lib ffmpeg/lib "*.a")
+harvest(embree/include embree/include "*.h")
+harvest(embree/lib embree/lib "*.a")
 
 endif()
index f312555..20e9750 100644 (file)
@@ -20,6 +20,7 @@ if(WIN32)
        option(ENABLE_MINGW64 "Enable building of ffmpeg/iconv/libsndfile/lapack/fftw3 by installing mingw64" ON)
 endif()
 option(WITH_WEBP "Enable building of oiio with webp support" OFF)
+option(WITH_EMBREE "Enable building of Embree" OFF)
 set(MAKE_THREADS 1 CACHE STRING "Number of threads to run make with")
 
 if(NOT BUILD_MODE)
index 7bd994c..85f5976 100644 (file)
@@ -298,3 +298,7 @@ set(SSL_HASH ebbfc844a8c8cc0ea5dc10b86c9ce97f401837f3fa08c17b2cdadc118253cf99)
 set(SQLITE_VERSION 3.24.0)
 set(SQLITE_URI https://www.sqlite.org/2018/sqlite-src-3240000.zip)
 set(SQLITE_HASH fb558c49ee21a837713c4f1e7e413309aabdd9c7)
+
+set(EMBREE_VERSION 3.2.4)
+set(EMBREE_URI https://github.com/embree/embree/archive/v${EMBREE_VERSION}.zip)
+set(EMBREE_HASH 3d4a1147002ff43939d45140aa9d6fb8)
diff --git a/build_files/cmake/Modules/FindEmbree.cmake b/build_files/cmake/Modules/FindEmbree.cmake
new file mode 100644 (file)
index 0000000..d3ef8c6
--- /dev/null
@@ -0,0 +1,108 @@
+# - Find Embree library
+# Find the native Embree includes and library
+# This module defines
+#  EMBREE_INCLUDE_DIRS, where to find rtcore.h, Set when
+#                            EMBREE_INCLUDE_DIR is found.
+#  EMBREE_LIBRARIES, libraries to link against to use Embree.
+#  EMBREE_ROOT_DIR, The base directory to search for Embree.
+#                        This can also be an environment variable.
+#  EMBREEFOUND, If false, do not try to use Embree.
+#
+# also defined, but not for general use are
+#  EMBREE_LIBRARY, where to find the Embree library.
+
+#=============================================================================
+# Copyright 2018 Blender Foundation.
+#
+# Distributed under the OSI-approved BSD License (the "License");
+# see accompanying file Copyright.txt for details.
+#
+# This software is distributed WITHOUT ANY WARRANTY; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the License for more information.
+#=============================================================================
+
+# If EMBREE_ROOT_DIR was defined in the environment, use it.
+IF(NOT EMBREE_ROOT_DIR AND NOT $ENV{EMBREE_ROOT_DIR} STREQUAL "")
+  SET(EMBREE_ROOT_DIR $ENV{EMBREE_ROOT_DIR})
+ENDIF()
+
+SET(_embree_SEARCH_DIRS
+  ${EMBREE_ROOT_DIR}
+  /usr/local
+  /sw # Fink
+  /opt/local # DarwinPorts
+  /opt/embree
+  /opt/lib/embree
+)
+
+FIND_PATH(EMBREE_INCLUDE_DIR
+  NAMES
+    embree3/rtcore.h
+  HINTS
+    ${_embree_SEARCH_DIRS}
+  PATH_SUFFIXES
+    include
+)
+
+
+SET(_embree_FIND_COMPONENTS
+  embree_avx
+  embree_avx2
+  embree_sse42
+  embree3
+  lexers
+  math
+  simd
+  sys
+  tasking
+)
+
+SET(_embree_LIBRARIES)
+FOREACH(COMPONENT ${_embree_FIND_COMPONENTS})
+  STRING(TOUPPER ${COMPONENT} UPPERCOMPONENT)
+
+  FIND_LIBRARY(EMBREE_${UPPERCOMPONENT}_LIBRARY
+    NAMES
+      ${COMPONENT}
+    HINTS
+      ${_embree_SEARCH_DIRS}
+    PATH_SUFFIXES
+      lib64 lib
+    )
+  LIST(APPEND _embree_LIBRARIES "${EMBREE_${UPPERCOMPONENT}_LIBRARY}")
+ENDFOREACH()
+
+
+FIND_LIBRARY(EMBREE_LIBRARY
+  NAMES
+    libembree3
+  HINTS
+    ${_embree_SEARCH_DIRS}
+  PATH_SUFFIXES
+    lib64 lib
+)
+
+# handle the QUIETLY and REQUIRED arguments and set EMBREE_FOUND to TRUE if
+# all listed variables are TRUE
+INCLUDE(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(EMBREE DEFAULT_MSG
+    _embree_LIBRARIES EMBREE_INCLUDE_DIR)
+
+IF(EMBREE_FOUND)
+  SET(EMBREE_LIBRARIES ${_embree_LIBRARIES})
+  SET(EMBREE_INCLUDE_DIRS ${EMBREE_INCLUDE_DIR})
+ENDIF(EMBREE_FOUND)
+
+MARK_AS_ADVANCED(
+  EMBREE_INCLUDE_DIR
+)
+
+FOREACH(COMPONENT ${_embree_FIND_COMPONENTS})
+  STRING(TOUPPER ${COMPONENT} UPPERCOMPONENT)
+  MARK_AS_ADVANCED(EMBREE_${UPPERCOMPONENT}_LIBRARY)
+ENDFOREACH()
+
+UNSET(_embree_SEARCH_DIRS)
+UNSET(_embree_FIND_COMPONENTS)
+UNSET(_embree_LIBRARIES)
index acbbdef..2b4d454 100644 (file)
@@ -435,6 +435,9 @@ function(setup_liblinks
        if(WITH_CYCLES_OSL)
                target_link_libraries(${target} ${OSL_LIBRARIES})
        endif()
+       if(WITH_CYCLES_EMBREE)
+               target_link_libraries(${target} ${EMBREE_LIBRARIES})
+       endif()
        if(WITH_BOOST)
                target_link_libraries(${target} ${BOOST_LIBRARIES})
                if(Boost_USE_STATIC_LIBS AND Boost_USE_ICU)
index c7bef56..b57c351 100644 (file)
@@ -378,6 +378,11 @@ if(WITH_CYCLES_OSL)
        endif()
 endif()
 
+if(WITH_CYCLES_EMBREE)
+       find_package(Embree 3.2.4 REQUIRED)
+       set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} -Xlinker -stack_size -Xlinker 0x100000")
+endif()
+
 if(WITH_OPENMP)
        execute_process(COMMAND ${CMAKE_C_COMPILER} --version OUTPUT_VARIABLE COMPILER_VENDOR)
        string(SUBSTRING "${COMPILER_VENDOR}" 0 5 VENDOR_NAME) # truncate output
index f8643a5..6880296 100644 (file)
@@ -359,6 +359,10 @@ if(WITH_OPENCOLORIO)
        endif()
 endif()
 
+if(WITH_CYCLES_EMBREE)
+       find_package(Embree 3.2.4 REQUIRED)
+endif()
+
 if(WITH_LLVM)
        if(EXISTS ${LIBDIR})
                set(LLVM_STATIC ON)
index 440f514..1bf0468 100644 (file)
@@ -608,6 +608,10 @@ if(WITH_CYCLES_OSL)
        endif()
 endif()
 
+if(WITH_CYCLES_EMBREE)
+       find_package(Embree 3.2.4 REQUIRED)
+endif()
+
 if (WINDOWS_PYTHON_DEBUG)
        # Include the system scripts in the blender_python_system_scripts project.
        FILE(GLOB_RECURSE inFiles "${CMAKE_SOURCE_DIR}/release/scripts/*.*" )
diff --git a/build_files/cmake/platform/platform_win32_msvc.cmake b/build_files/cmake/platform/platform_win32_msvc.cmake
new file mode 100644 (file)
index 0000000..9d9b53b
--- /dev/null
@@ -0,0 +1,506 @@
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# The Original Code is Copyright (C) 2016, Blender Foundation
+# All rights reserved.
+#
+# Contributor(s): Sergey Sharybin.
+#
+# ***** END GPL LICENSE BLOCK *****
+
+# Libraries configuration for Windows when compiling with MSVC.
+
+macro(warn_hardcoded_paths package_name
+       )
+       if(WITH_WINDOWS_FIND_MODULES)
+               message(WARNING "Using HARDCODED ${package_name} locations")
+       endif(WITH_WINDOWS_FIND_MODULES)
+endmacro()
+
+macro(windows_find_package package_name
+       )
+       if(WITH_WINDOWS_FIND_MODULES)
+               find_package(${package_name})
+       endif(WITH_WINDOWS_FIND_MODULES)
+endmacro()
+
+macro(find_package_wrapper)
+       if(WITH_WINDOWS_FIND_MODULES)
+               find_package(${ARGV})
+       endif()
+endmacro()
+
+add_definitions(-DWIN32)
+# Minimum MSVC Version
+if(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+       if(MSVC_VERSION EQUAL 1800)
+               set(_min_ver "18.0.31101")
+               if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${_min_ver})
+                       message(FATAL_ERROR
+                               "Visual Studio 2013 (Update 4, ${_min_ver}) required, "
+                               "found (${CMAKE_CXX_COMPILER_VERSION})")
+               endif()
+       endif()
+       if(MSVC_VERSION EQUAL 1900)
+               set(_min_ver "19.0.24210")
+               if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${_min_ver})
+                       message(FATAL_ERROR
+                               "Visual Studio 2015 (Update 3, ${_min_ver}) required, "
+                               "found (${CMAKE_CXX_COMPILER_VERSION})")
+               endif()
+       endif()
+endif()
+unset(_min_ver)
+
+# needed for some MSVC installations
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /SAFESEH:NO")
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /SAFESEH:NO")
+set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} /SAFESEH:NO")
+
+list(APPEND PLATFORM_LINKLIBS
+       ws2_32 vfw32 winmm kernel32 user32 gdi32 comdlg32
+       advapi32 shfolder shell32 ole32 oleaut32 uuid psapi Dbghelp
+)
+
+if(WITH_INPUT_IME)
+       list(APPEND PLATFORM_LINKLIBS imm32)
+endif()
+
+add_definitions(
+       -D_CRT_NONSTDC_NO_DEPRECATE
+       -D_CRT_SECURE_NO_DEPRECATE
+       -D_SCL_SECURE_NO_DEPRECATE
+       -D_CONSOLE
+       -D_LIB
+)
+
+# MSVC11 needs _ALLOW_KEYWORD_MACROS to build
+add_definitions(-D_ALLOW_KEYWORD_MACROS)
+
+# We want to support Vista level ABI
+add_definitions(-D_WIN32_WINNT=0x600)
+
+# Make cmake find the msvc redistributables
+set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_SKIP TRUE)
+include(InstallRequiredSystemLibraries)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /nologo /J /Gd /MP /EHsc")
+set(CMAKE_C_FLAGS     "${CMAKE_C_FLAGS} /nologo /J /Gd /MP")
+
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd")
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /MTd")
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT")
+set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /MT")
+set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /MT")
+set(CMAKE_C_FLAGS_MINSIZEREL "${CMAKE_C_FLAGS_MINSIZEREL} /MT")
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /MT")
+set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} /MT")
+
+set(PLATFORM_LINKFLAGS "/SUBSYSTEM:CONSOLE /STACK:2097152 /INCREMENTAL:NO ")
+set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} /NODEFAULTLIB:msvcrt.lib /NODEFAULTLIB:msvcmrt.lib /NODEFAULTLIB:msvcurt.lib /NODEFAULTLIB:msvcrtd.lib ")
+
+# Ignore meaningless for us linker warnings.
+set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} /ignore:4049 /ignore:4217 /ignore:4221")
+set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /ignore:4221")
+
+if(CMAKE_CL_64)
+       set(PLATFORM_LINKFLAGS "/MACHINE:X64 ${PLATFORM_LINKFLAGS}")
+else()
+       set(PLATFORM_LINKFLAGS "/MACHINE:IX86 /LARGEADDRESSAWARE ${PLATFORM_LINKFLAGS}")
+endif()
+
+set(PLATFORM_LINKFLAGS_DEBUG "/IGNORE:4099 /NODEFAULTLIB:libcmt.lib /NODEFAULTLIB:libc.lib")
+
+if(NOT DEFINED LIBDIR)
+
+       # Setup 64bit and 64bit windows systems
+       if(CMAKE_CL_64)
+               message(STATUS "64 bit compiler detected.")
+               set(LIBDIR_BASE "win64")
+       else()
+               message(STATUS "32 bit compiler detected.")
+               set(LIBDIR_BASE "windows")
+       endif()
+       if(MSVC_VERSION EQUAL 1910)
+               message(STATUS "Visual Studio 2017 detected.")
+               set(LIBDIR ${CMAKE_SOURCE_DIR}/../lib/${LIBDIR_BASE}_vc14)
+       elseif(MSVC_VERSION EQUAL 1900)
+               message(STATUS "Visual Studio 2015 detected.")
+               set(LIBDIR ${CMAKE_SOURCE_DIR}/../lib/${LIBDIR_BASE}_vc14)
+       else()
+               message(STATUS "Visual Studio 2013 detected.")
+               set(LIBDIR ${CMAKE_SOURCE_DIR}/../lib/${LIBDIR_BASE}_vc12)
+       endif()
+else()
+       message(STATUS "Using pre-compiled LIBDIR: ${LIBDIR}")
+endif()
+if(NOT EXISTS "${LIBDIR}/")
+       message(FATAL_ERROR "Windows requires pre-compiled libs at: '${LIBDIR}'")
+endif()
+
+# Add each of our libraries to our cmake_prefix_path so find_package() could work
+file(GLOB children RELATIVE ${LIBDIR} ${LIBDIR}/*)
+foreach(child ${children})
+       if(IS_DIRECTORY ${LIBDIR}/${child})
+               list(APPEND CMAKE_PREFIX_PATH  ${LIBDIR}/${child})
+       endif()
+endforeach()
+
+set(ZLIB_INCLUDE_DIRS ${LIBDIR}/zlib/include)
+set(ZLIB_LIBRARIES ${LIBDIR}/zlib/lib/libz_st.lib)
+set(ZLIB_INCLUDE_DIR ${LIBDIR}/zlib/include)
+set(ZLIB_LIBRARY ${LIBDIR}/zlib/lib/libz_st.lib)
+set(ZLIB_DIR ${LIBDIR}/zlib)
+
+windows_find_package(zlib) # we want to find before finding things that depend on it like png
+windows_find_package(png)
+
+if(NOT PNG_FOUND)
+       warn_hardcoded_paths(libpng)
+       set(PNG_PNG_INCLUDE_DIR ${LIBDIR}/png/include)
+       set(PNG_LIBRARIES libpng)
+       set(PNG "${LIBDIR}/png")
+       set(PNG_INCLUDE_DIRS "${PNG}/include")
+       set(PNG_LIBPATH ${PNG}/lib) # not cmake defined
+endif()
+
+set(JPEG_NAMES ${JPEG_NAMES} libjpeg)
+windows_find_package(jpeg REQUIRED)
+if(NOT JPEG_FOUND)
+       warn_hardcoded_paths(jpeg)
+       set(JPEG_INCLUDE_DIR ${LIBDIR}/jpeg/include)
+       set(JPEG_LIBRARIES ${LIBDIR}/jpeg/lib/libjpeg.lib)
+endif()
+
+set(PTHREADS_INCLUDE_DIRS ${LIBDIR}/pthreads/include)
+set(PTHREADS_LIBRARIES ${LIBDIR}/pthreads/lib/pthreadVC2.lib)
+
+set(FREETYPE ${LIBDIR}/freetype)
+set(FREETYPE_INCLUDE_DIRS
+       ${LIBDIR}/freetype/include
+       ${LIBDIR}/freetype/include/freetype2
+)
+set(FREETYPE_LIBRARY ${LIBDIR}/freetype/lib/freetype2ST.lib)
+windows_find_package(freetype REQUIRED)
+
+if(WITH_FFTW3)
+       set(FFTW3 ${LIBDIR}/fftw3)
+       set(FFTW3_LIBRARIES libfftw)
+       set(FFTW3_INCLUDE_DIRS ${FFTW3}/include)
+       set(FFTW3_LIBPATH ${FFTW3}/lib)
+endif()
+
+if(WITH_OPENCOLLADA)
+       set(OPENCOLLADA ${LIBDIR}/opencollada)
+
+       set(OPENCOLLADA_INCLUDE_DIRS
+               ${OPENCOLLADA}/include/opencollada/COLLADAStreamWriter
+               ${OPENCOLLADA}/include/opencollada/COLLADABaseUtils
+               ${OPENCOLLADA}/include/opencollada/COLLADAFramework
+               ${OPENCOLLADA}/include/opencollada/COLLADASaxFrameworkLoader
+               ${OPENCOLLADA}/include/opencollada/GeneratedSaxParser
+       )
+
+       set(OPENCOLLADA_LIBRARIES
+               ${OPENCOLLADA}/lib/opencollada/OpenCOLLADASaxFrameworkLoader.lib
+               ${OPENCOLLADA}/lib/opencollada/OpenCOLLADAFramework.lib
+               ${OPENCOLLADA}/lib/opencollada/OpenCOLLADABaseUtils.lib
+               ${OPENCOLLADA}/lib/opencollada/OpenCOLLADAStreamWriter.lib
+               ${OPENCOLLADA}/lib/opencollada/MathMLSolver.lib
+               ${OPENCOLLADA}/lib/opencollada/GeneratedSaxParser.lib
+               ${OPENCOLLADA}/lib/opencollada/xml.lib
+               ${OPENCOLLADA}/lib/opencollada/buffer.lib
+               ${OPENCOLLADA}/lib/opencollada/ftoa.lib
+       )
+
+       if(NOT WITH_LLVM)
+               list(APPEND OPENCOLLADA_LIBRARIES ${OPENCOLLADA}/lib/opencollada/UTF.lib)
+       endif()
+
+       set(PCRE_LIBRARIES
+               ${OPENCOLLADA}/lib/opencollada/pcre.lib
+       )
+endif()
+
+if(WITH_CODEC_FFMPEG)
+       set(FFMPEG_INCLUDE_DIRS
+               ${LIBDIR}/ffmpeg/include
+               ${LIBDIR}/ffmpeg/include/msvc
+       )
+       windows_find_package(FFMPEG)
+       if(NOT FFMPEG_FOUND)
+               warn_hardcoded_paths(ffmpeg)
+               set(FFMPEG_LIBRARY_VERSION 57)
+               set(FFMPEG_LIBRARY_VERSION_AVU 55)
+               set(FFMPEG_LIBRARIES
+                       ${LIBDIR}/ffmpeg/lib/avcodec.lib
+                       ${LIBDIR}/ffmpeg/lib/avformat.lib
+                       ${LIBDIR}/ffmpeg/lib/avdevice.lib
+                       ${LIBDIR}/ffmpeg/lib/avutil.lib
+                       ${LIBDIR}/ffmpeg/lib/swscale.lib
+                       )
+       endif()
+endif()
+
+if(WITH_IMAGE_OPENEXR)
+       set(OPENEXR_ROOT_DIR ${LIBDIR}/openexr)
+       set(OPENEXR_VERSION "2.1")
+       windows_find_package(OPENEXR REQUIRED)
+       if(NOT OPENEXR_FOUND)
+               warn_hardcoded_paths(OpenEXR)
+               set(OPENEXR ${LIBDIR}/openexr)
+               set(OPENEXR_INCLUDE_DIR ${OPENEXR}/include)
+               set(OPENEXR_INCLUDE_DIRS ${OPENEXR_INCLUDE_DIR} ${OPENEXR}/include/OpenEXR)
+               set(OPENEXR_LIBPATH ${OPENEXR}/lib)
+               set(OPENEXR_LIBRARIES
+                       optimized ${OPENEXR_LIBPATH}/Iex-2_2.lib
+                       optimized ${OPENEXR_LIBPATH}/Half.lib
+                       optimized ${OPENEXR_LIBPATH}/IlmImf-2_2.lib
+                       optimized ${OPENEXR_LIBPATH}/Imath-2_2.lib
+                       optimized ${OPENEXR_LIBPATH}/IlmThread-2_2.lib
+                       debug ${OPENEXR_LIBPATH}/Iex-2_2_d.lib
+                       debug ${OPENEXR_LIBPATH}/Half_d.lib
+                       debug ${OPENEXR_LIBPATH}/IlmImf-2_2_d.lib
+                       debug ${OPENEXR_LIBPATH}/Imath-2_2_d.lib
+                       debug ${OPENEXR_LIBPATH}/IlmThread-2_2_d.lib
+               )
+       endif()
+endif()
+
+if(WITH_IMAGE_TIFF)
+       # Try to find tiff first then complain and set static and maybe wrong paths
+       windows_find_package(TIFF)
+       if(NOT TIFF_FOUND)
+               warn_hardcoded_paths(libtiff)
+               set(TIFF_LIBRARY ${LIBDIR}/tiff/lib/libtiff.lib)
+               set(TIFF_INCLUDE_DIR ${LIBDIR}/tiff/include)
+       endif()
+endif()
+
+if(WITH_JACK)
+       set(JACK_INCLUDE_DIRS
+               ${LIBDIR}/jack/include/jack
+               ${LIBDIR}/jack/include
+       )
+       set(JACK_LIBRARIES optimized ${LIBDIR}/jack/lib/libjack.lib debug ${LIBDIR}/jack/lib/libjack_d.lib)
+endif()
+
+if(WITH_PYTHON)
+       set(PYTHON_VERSION 3.5) # CACHE STRING)
+
+       string(REPLACE "." "" _PYTHON_VERSION_NO_DOTS ${PYTHON_VERSION})
+       # Use shared libs for vc2008 and vc2010 until we actually have vc2010 libs
+       set(PYTHON_LIBRARY ${LIBDIR}/python/lib/python${_PYTHON_VERSION_NO_DOTS}.lib)
+       unset(_PYTHON_VERSION_NO_DOTS)
+
+       # Shared includes for both vc2008 and vc2010
+       set(PYTHON_INCLUDE_DIR ${LIBDIR}/python/include/python${PYTHON_VERSION})
+
+       # uncached vars
+       set(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}")
+       set(PYTHON_LIBRARIES  "${PYTHON_LIBRARY}")
+endif()
+
+if(WITH_BOOST)
+       if(WITH_CYCLES_OSL)
+               set(boost_extra_libs wave)
+       endif()
+       if(WITH_INTERNATIONAL)
+               list(APPEND boost_extra_libs locale)
+       endif()
+       if(WITH_OPENVDB)
+               list(APPEND boost_extra_libs iostreams)
+       endif()
+       set(Boost_USE_STATIC_RUNTIME ON) # prefix lib
+       set(Boost_USE_MULTITHREADED ON) # suffix -mt
+       set(Boost_USE_STATIC_LIBS ON) # suffix -s
+       if (WITH_WINDOWS_FIND_MODULES)
+               find_package(Boost COMPONENTS date_time filesystem thread regex system ${boost_extra_libs})
+       endif (WITH_WINDOWS_FIND_MODULES)
+       if(NOT Boost_FOUND)
+               warn_hardcoded_paths(BOOST)
+               set(BOOST ${LIBDIR}/boost)
+               set(BOOST_INCLUDE_DIR ${BOOST}/include)
+               if(MSVC12)
+                       set(BOOST_LIBPATH ${BOOST}/lib)
+                       set(BOOST_POSTFIX "vc120-mt-s-1_60.lib")
+                       set(BOOST_DEBUG_POSTFIX "vc120-mt-sgd-1_60.lib")
+               else()
+                       set(BOOST_LIBPATH ${BOOST}/lib)
+                       set(BOOST_POSTFIX "vc140-mt-s-1_60.lib")
+                       set(BOOST_DEBUG_POSTFIX "vc140-mt-sgd-1_60.lib")
+               endif()
+               set(BOOST_LIBRARIES
+                       optimized libboost_date_time-${BOOST_POSTFIX}
+                       optimized libboost_filesystem-${BOOST_POSTFIX}
+                       optimized libboost_regex-${BOOST_POSTFIX}
+                       optimized libboost_system-${BOOST_POSTFIX}
+                       optimized libboost_thread-${BOOST_POSTFIX}
+                       debug libboost_date_time-${BOOST_DEBUG_POSTFIX}
+                       debug libboost_filesystem-${BOOST_DEBUG_POSTFIX}
+                       debug libboost_regex-${BOOST_DEBUG_POSTFIX}
+                       debug libboost_system-${BOOST_DEBUG_POSTFIX}
+                       debug libboost_thread-${BOOST_DEBUG_POSTFIX}
+               )
+               if(WITH_CYCLES_OSL)
+                       set(BOOST_LIBRARIES ${BOOST_LIBRARIES}
+                               optimized libboost_wave-${BOOST_POSTFIX}
+                               debug libboost_wave-${BOOST_DEBUG_POSTFIX})
+               endif()
+               if(WITH_INTERNATIONAL)
+                       set(BOOST_LIBRARIES ${BOOST_LIBRARIES}
+                               optimized libboost_locale-${BOOST_POSTFIX}
+                               debug libboost_locale-${BOOST_DEBUG_POSTFIX})
+               endif()
+       else() # we found boost using find_package
+               set(BOOST_INCLUDE_DIR ${Boost_INCLUDE_DIRS})
+               set(BOOST_LIBRARIES ${Boost_LIBRARIES})
+               set(BOOST_LIBPATH ${Boost_LIBRARY_DIRS})
+       endif()
+       set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB")
+endif()
+
+if(WITH_OPENIMAGEIO)
+       windows_find_package(OpenImageIO)
+       set(OPENIMAGEIO ${LIBDIR}/openimageio)
+       set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO}/include)
+       set(OIIO_OPTIMIZED optimized OpenImageIO optimized OpenImageIO_Util)
+       set(OIIO_DEBUG debug OpenImageIO_d debug OpenImageIO_Util_d)
+       set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG})
+       set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib)
+       set(OPENIMAGEIO_DEFINITIONS "-DUSE_TBB=0")
+       set(OPENCOLORIO_DEFINITIONS "-DOCIO_STATIC_BUILD")
+       set(OPENIMAGEIO_IDIFF "${OPENIMAGEIO}/bin/idiff.exe")
+       add_definitions(-DOIIO_STATIC_BUILD)
+       add_definitions(-DOIIO_NO_SSE=1)
+endif()
+
+if(WITH_LLVM)
+       set(LLVM_ROOT_DIR ${LIBDIR}/llvm CACHE PATH     "Path to the LLVM installation")
+       file(GLOB LLVM_LIBRARY_OPTIMIZED ${LLVM_ROOT_DIR}/lib/*.lib)
+
+       if(EXISTS ${LLVM_ROOT_DIR}/debug/lib)
+               foreach(LLVM_OPTIMIZED_LIB ${LLVM_LIBRARY_OPTIMIZED})
+                       get_filename_component(LIBNAME ${LLVM_OPTIMIZED_LIB} ABSOLUTE)
+                       list(APPEND LLVM_LIBS optimized ${LIBNAME})
+               endforeach(LLVM_OPTIMIZED_LIB)
+
+               file(GLOB LLVM_LIBRARY_DEBUG ${LLVM_ROOT_DIR}/debug/lib/*.lib)
+
+               foreach(LLVM_DEBUG_LIB ${LLVM_LIBRARY_DEBUG})
+                       get_filename_component(LIBNAME ${LLVM_DEBUG_LIB} ABSOLUTE)
+                       list(APPEND LLVM_LIBS debug ${LIBNAME})
+               endforeach(LLVM_DEBUG_LIB)
+
+               set(LLVM_LIBRARY ${LLVM_LIBS})
+       else()
+               message(WARNING "LLVM debug libs not present on this system. Using release libs for debug builds.")
+               set(LLVM_LIBRARY ${LLVM_LIBRARY_OPTIMIZED})
+       endif()
+
+endif()
+
+if(WITH_OPENCOLORIO)
+       set(OPENCOLORIO ${LIBDIR}/opencolorio)
+       set(OPENCOLORIO_INCLUDE_DIRS ${OPENCOLORIO}/include)
+       set(OPENCOLORIO_LIBRARIES OpenColorIO)
+       set(OPENCOLORIO_LIBPATH ${LIBDIR}/opencolorio/lib)
+       set(OPENCOLORIO_DEFINITIONS)
+endif()
+
+if(WITH_CYCLES_EMBREE)
+       find_package(Embree 3.2.4 REQUIRED)
+endif()
+
+if(WITH_OPENVDB)
+       set(BLOSC_LIBRARIES optimized ${LIBDIR}/blosc/lib/libblosc.lib debug ${LIBDIR}/blosc/lib/libblosc_d.lib)
+       set(TBB_LIBRARIES optimized ${LIBDIR}/tbb/lib/tbb.lib debug ${LIBDIR}/tbb/lib/tbb_debug.lib)
+       set(TBB_INCLUDE_DIR ${LIBDIR}/tbb/include)
+       set(OPENVDB ${LIBDIR}/openvdb)
+       set(OPENVDB_INCLUDE_DIRS ${OPENVDB}/include ${TBB_INCLUDE_DIR})
+       set(OPENVDB_LIBRARIES optimized openvdb debug openvdb_d ${TBB_LIBRARIES} ${BLOSC_LIBRARIES})
+       set(OPENVDB_LIBPATH ${LIBDIR}/openvdb/lib)
+endif()
+
+if(WITH_ALEMBIC)
+       set(ALEMBIC ${LIBDIR}/alembic)
+       set(ALEMBIC_INCLUDE_DIR ${ALEMBIC}/include)
+       set(ALEMBIC_INCLUDE_DIRS ${ALEMBIC_INCLUDE_DIR})
+       set(ALEMBIC_LIBPATH ${ALEMBIC}/lib)
+       set(ALEMBIC_LIBRARIES optimized alembic debug alembic_d)
+       set(ALEMBIC_FOUND 1)
+endif()
+
+if(WITH_MOD_CLOTH_ELTOPO)
+       set(LAPACK ${LIBDIR}/lapack)
+       # set(LAPACK_INCLUDE_DIR ${LAPACK}/include)
+       set(LAPACK_LIBPATH ${LAPACK}/lib)
+       set(LAPACK_LIBRARIES
+               ${LIBDIR}/lapack/lib/libf2c.lib
+               ${LIBDIR}/lapack/lib/clapack_nowrap.lib
+               ${LIBDIR}/lapack/lib/BLAS_nowrap.lib
+       )
+endif()
+
+if(WITH_OPENSUBDIV OR WITH_CYCLES_OPENSUBDIV)
+       set(OPENSUBDIV_INCLUDE_DIR ${LIBDIR}/opensubdiv/include)
+       set(OPENSUBDIV_LIBPATH ${LIBDIR}/opensubdiv/lib)
+       set(OPENSUBDIV_LIBRARIES
+               optimized ${OPENSUBDIV_LIBPATH}/osdCPU.lib
+               optimized ${OPENSUBDIV_LIBPATH}/osdGPU.lib
+               debug ${OPENSUBDIV_LIBPATH}/osdCPU_d.lib
+               debug ${OPENSUBDIV_LIBPATH}/osdGPU_d.lib
+       )
+       set(OPENSUBDIV_HAS_OPENMP TRUE)
+       set(OPENSUBDIV_HAS_TBB FALSE)
+       set(OPENSUBDIV_HAS_OPENCL TRUE)
+       set(OPENSUBDIV_HAS_CUDA FALSE)
+       set(OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK TRUE)
+       set(OPENSUBDIV_HAS_GLSL_COMPUTE TRUE)
+       windows_find_package(OpenSubdiv)
+endif()
+
+if(WITH_SDL)
+       set(SDL ${LIBDIR}/sdl)
+       set(SDL_INCLUDE_DIR ${SDL}/include)
+       set(SDL_LIBPATH ${SDL}/lib)
+       set(SDL_LIBRARY SDL2)
+endif()
+
+# Audio IO
+if(WITH_SYSTEM_AUDASPACE)
+       set(AUDASPACE_INCLUDE_DIRS ${LIBDIR}/audaspace/include/audaspace)
+       set(AUDASPACE_LIBRARIES ${LIBDIR}/audaspace/lib/audaspace.lib)
+       set(AUDASPACE_C_INCLUDE_DIRS ${LIBDIR}/audaspace/include/audaspace)
+       set(AUDASPACE_C_LIBRARIES ${LIBDIR}/audaspace/lib/audaspace-c.lib)
+       set(AUDASPACE_PY_INCLUDE_DIRS ${LIBDIR}/audaspace/include/audaspace)
+       set(AUDASPACE_PY_LIBRARIES ${LIBDIR}/audaspace/lib/audaspace-py.lib)
+endif()
+
+# used in many places so include globally, like OpenGL
+blender_include_dirs_sys("${PTHREADS_INCLUDE_DIRS}")
+
+#find signtool
+set(ProgramFilesX86_NAME "ProgramFiles(x86)") #env dislikes the ( )
+find_program(SIGNTOOL_EXE signtool
+       HINTS
+               "$ENV{${ProgramFilesX86_NAME}}/Windows Kits/10/bin/x86/"
+               "$ENV{ProgramFiles}/Windows Kits/10/bin/x86/"
+               "$ENV{${ProgramFilesX86_NAME}}/Windows Kits/8.1/bin/x86/"
+               "$ENV{ProgramFiles}/Windows Kits/8.1/bin/x86/"
+               "$ENV{${ProgramFilesX86_NAME}}/Windows Kits/8.0/bin/x86/"
+               "$ENV{ProgramFiles}/Windows Kits/8.0/bin/x86/"
+)
index 54d14f6..873bbfa 100644 (file)
@@ -217,6 +217,15 @@ if(WITH_CYCLES_OSL)
        )
 endif()
 
+if(WITH_CYCLES_EMBREE)
+       add_definitions(-DWITH_EMBREE)
+       add_definitions(-DEMBREE_STATIC_LIB)
+       include_directories(
+               SYSTEM
+               ${EMBREE_INCLUDE_DIRS}
+       )
+endif()
+
 if(WITH_CYCLES_OPENSUBDIV)
        add_definitions(-DWITH_OPENSUBDIV)
        include_directories(
index 34ff298..2c1367a 100644 (file)
@@ -77,6 +77,9 @@ macro(cycles_target_link_libraries target)
        if(WITH_CYCLES_OSL)
                target_link_libraries(${target} ${OSL_LIBRARIES} ${LLVM_LIBRARIES})
        endif()
+       if(WITH_CYCLES_EMBREE)
+               target_link_libraries(${target} ${EMBREE_LIBRARIES})
+       endif()
        if(WITH_CYCLES_OPENSUBDIV)
                target_link_libraries(${target} ${OPENSUBDIV_LIBRARIES})
        endif()
index 45d6e35..d986ba8 100644 (file)
@@ -547,6 +547,11 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
             description="Use special type BVH optimized for hair (uses more ram but renders faster)",
             default=True,
         )
+        cls.use_bvh_embree = BoolProperty(
+            name="Use Embree",
+            description="Use Embree as ray accelerator",
+            default=False,
+        )
         cls.debug_bvh_time_steps = IntProperty(
             name="BVH Time Steps",
             description="Split BVH primitives by this number of time steps to speed up render time in cost of memory",
index 0fdefe5..2f1adfe 100644 (file)
@@ -17,6 +17,7 @@
 # <pep8 compliant>
 
 import bpy
+import _cycles
 
 from bpy.types import (
     Panel,
@@ -430,11 +431,18 @@ class CYCLES_RENDER_PT_performance(CyclesButtonsPanel, Panel):
         col.separator()
 
         col.label(text="Acceleration structure:")
+        if _cycles.with_embree:
+            row = col.row()
+            row.active = use_cpu(context)
+            row.prop(cscene, "use_bvh_embree")
+        row = col.row()
         col.prop(cscene, "debug_use_spatial_splits")
-        col.prop(cscene, "debug_use_hair_bvh")
+        row = col.row()
+        row.active = not cscene.use_bvh_embree or not _cycles.with_embree
+        row.prop(cscene, "debug_use_hair_bvh")
 
         row = col.row()
-        row.active = not cscene.debug_use_spatial_splits
+        row.active = not cscene.debug_use_spatial_splits and not cscene.use_bvh_embree
         row.prop(cscene, "debug_bvh_time_steps")
 
         col = layout.column()
@@ -491,8 +499,6 @@ class CYCLES_RENDER_PT_layer_passes(CyclesButtonsPanel, Panel):
     bl_options = {'DEFAULT_CLOSED'}
 
     def draw(self, context):
-        import _cycles
-
         layout = self.layout
 
         scene = context.scene
index 4b01eb5..997176f 100644 (file)
@@ -844,5 +844,13 @@ void *CCL_python_module_init()
        Py_INCREF(Py_False);
 #endif /* WITH_NETWORK */
 
+#ifdef WITH_EMBREE
+       PyModule_AddObject(mod, "with_embree", Py_True);
+       Py_INCREF(Py_True);
+#else /* WITH_EMBREE */
+       PyModule_AddObject(mod, "with_embree", Py_False);
+       Py_INCREF(Py_False);
+#endif /* WITH_EMBREE */
+
        return (void*)mod;
 }
index 70bb6de..832847c 100644 (file)
@@ -731,6 +731,9 @@ SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
                params.bvh_layout = DebugFlags().cpu.bvh_layout;
        }
 
+#ifdef WITH_EMBREE
+       params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree") ? BVH_LAYOUT_EMBREE : params.bvh_layout;
+#endif
        return params;
 }
 
index fcd2857..6014624 100644 (file)
@@ -13,6 +13,7 @@ set(SRC
        bvh8.cpp
        bvh_binning.cpp
        bvh_build.cpp
+       bvh_embree.cpp
        bvh_node.cpp
        bvh_sort.cpp
        bvh_split.cpp
@@ -26,6 +27,7 @@ set(SRC_HEADERS
        bvh8.h
        bvh_binning.h
        bvh_build.h
+       bvh_embree.h
        bvh_node.h
        bvh_params.h
        bvh_sort.h
index bc73a3a..ac0614e 100644 (file)
 #include "bvh/bvh_build.h"
 #include "bvh/bvh_node.h"
 
+#ifdef WITH_EMBREE
+#include "bvh/bvh_embree.h"
+#endif
+
 #include "util/util_foreach.h"
 #include "util/util_logging.h"
 #include "util/util_progress.h"
@@ -41,6 +45,7 @@ const char *bvh_layout_name(BVHLayout layout)
                case BVH_LAYOUT_BVH4: return "BVH4";
                case BVH_LAYOUT_BVH8: return "BVH8";
                case BVH_LAYOUT_NONE: return "NONE";
+               case BVH_LAYOUT_EMBREE: return "EMBREE";
                case BVH_LAYOUT_ALL:  return "ALL";
        }
        LOG(DFATAL) << "Unsupported BVH layout was passed.";
@@ -96,6 +101,10 @@ BVH *BVH::create(const BVHParams& params, const vector<Object*>& objects)
                        return new BVH4(params, objects);
                case BVH_LAYOUT_BVH8:
                        return new BVH8(params, objects);
+               case BVH_LAYOUT_EMBREE:
+#ifdef WITH_EMBREE
+                       return new BVHEmbree(params, objects);
+#endif
                case BVH_LAYOUT_NONE:
                case BVH_LAYOUT_ALL:
                        break;
@@ -106,7 +115,7 @@ BVH *BVH::create(const BVHParams& params, const vector<Object*>& objects)
 
 /* Building */
 
-void BVH::build(Progress& progress)
+void BVH::build(Progress& progress, Stats*)
 {
        progress.set_substatus("Building BVH");
 
index 86be0ba..f14c8f5 100644 (file)
@@ -25,6 +25,7 @@
 
 CCL_NAMESPACE_BEGIN
 
+class Stats;
 class BVHNode;
 struct BVHStackEntry;
 class BVHParams;
@@ -35,7 +36,6 @@ class Progress;
 
 #define BVH_ALIGN     4096
 #define TRI_NODE_SIZE 3
-
 /* Packed BVH
  *
  * BVH stored as it will be used for traversal on the rendering device. */
@@ -91,7 +91,7 @@ public:
        static BVH *create(const BVHParams& params, const vector<Object*>& objects);
        virtual ~BVH() {}
 
-       void build(Progress& progress);
+       virtual void build(Progress& progress, Stats *stats=NULL);
        void refit(Progress& progress);
 
 protected:
diff --git a/intern/cycles/bvh/bvh_embree.cpp b/intern/cycles/bvh/bvh_embree.cpp
new file mode 100644 (file)
index 0000000..87891b6
--- /dev/null
@@ -0,0 +1,884 @@
+/*
+ * Copyright 2018, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This class implemens a ray accelerator for Cycles using Intel's Embree library.
+ * It supports triangles, curves, object and deformation blur and instancing.
+ * Not supported are thick line segments, those have no native equivalent in Embree.
+ * They could be implemented using Embree's thick curves, at the expense of wasted memory.
+ * User defined intersections for Embree could also be an option, but since Embree only uses aligned BVHs
+ * for user geometry, this would come with reduced performance and/or higher memory usage.
+ *
+ * Since Embree allows object to be either curves or triangles but not both, Cycles object IDs are maapped
+ * to Embree IDs by multiplying by two and adding one for curves.
+ *
+ * This implementation shares RTCDevices between Cycles instances. Eventually each instance should get
+ * a separate RTCDevice to correctly keep track of memory usage.
+ *
+ * Vertex and index buffers are duplicated between Cycles device arrays and Embree. These could be merged,
+ * which would requrie changes to intersection refinement, shader setup, mesh light sampling and a few
+ * other places in Cycles where direct access to vertex data is required.
+ */
+
+#ifdef WITH_EMBREE
+
+#include <pmmintrin.h>
+#include <xmmintrin.h>
+#include <embree3/rtcore_geometry.h>
+
+#include "bvh/bvh_embree.h"
+
+/* Kernel includes are necessary so that the filter function for Embree can access the packed BVH. */
+#include "kernel/bvh/bvh_embree.h"
+#include "kernel/kernel_compat_cpu.h"
+#include "kernel/split/kernel_split_data_types.h"
+#include "kernel/kernel_globals.h"
+#include "kernel/kernel_random.h"
+
+#include "render/mesh.h"
+#include "render/object.h"
+#include "util/util_foreach.h"
+#include "util/util_logging.h"
+#include "util/util_progress.h"
+
+CCL_NAMESPACE_BEGIN
+
+#define IS_HAIR(x) (x & 1)
+
+/* This gets called by Embree at every valid ray/object intersection.
+ * Things like recording subsurface or shadow hits for later evaluation
+ * as well as filtering for volume objects happen here.
+ * Cycles' own BVH does that directly inside the traversal calls.
+ */
+static void rtc_filter_func(const RTCFilterFunctionNArguments *args)
+{
+       /* Current implementation in Cycles assumes only single-ray intersection queries. */
+       assert(args->N == 1);
+
+       const RTCRay *ray = (RTCRay*)args->ray;
+       const RTCHit *hit = (RTCHit*)args->hit;
+       CCLIntersectContext *ctx = ((IntersectContext*)args->context)->userRayExt;
+       KernelGlobals *kg = ctx->kg;
+
+       /* Check if there is backfacing hair to ignore. */
+       if(IS_HAIR(hit->geomID) && (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+          && !(kernel_data.curve.curveflags & CURVE_KN_BACKFACING)
+          && !(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)) {
+               if(dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z), make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
+                       *args->valid = 0;
+                       return;
+               }
+       }
+}
+
+static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments* args)
+{
+       assert(args->N == 1);
+
+       const RTCRay *ray = (RTCRay*)args->ray;
+       RTCHit *hit = (RTCHit*)args->hit;
+       CCLIntersectContext *ctx = ((IntersectContext*)args->context)->userRayExt;
+       KernelGlobals *kg = ctx->kg;
+
+       /* For all ray types: Check if there is backfacing hair to ignore */
+       if(IS_HAIR(hit->geomID) && (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+          && !(kernel_data.curve.curveflags & CURVE_KN_BACKFACING)
+          && !(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)) {
+               if(dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z), make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
+                       *args->valid = 0;
+                       return;
+               }
+       }
+
+       switch(ctx->type) {
+               case CCLIntersectContext::RAY_SHADOW_ALL: {
+                       /* Append the intersection to the end of the array. */
+                       if(ctx->num_hits < ctx->max_hits) {
+                               Intersection current_isect;
+                               kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+                               for(size_t i = 0; i < ctx->max_hits; ++i) {
+                                       if(current_isect.object == ctx->isect_s[i].object &&
+                                          current_isect.prim == ctx->isect_s[i].prim &&
+                                          current_isect.t == ctx->isect_s[i].t) {
+                                               /* This intersection was already recorded, skip it. */
+                                               *args->valid = 0;
+                                               break;
+                                       }
+                               }
+                               Intersection *isect = &ctx->isect_s[ctx->num_hits];
+                               ++ctx->num_hits;
+                               *isect = current_isect;
+                               int prim = kernel_tex_fetch(__prim_index, isect->prim);
+                               int shader = 0;
+                               if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
+                                       shader = kernel_tex_fetch(__tri_shader, prim);
+                               }
+                               else {
+                                       float4 str = kernel_tex_fetch(__curves, prim);
+                                       shader = __float_as_int(str.z);
+                               }
+                               int flag = kernel_tex_fetch(__shaders, shader & SHADER_MASK).flags;
+                               /* If no transparent shadows, all light is blocked. */
+                               if(flag & (SD_HAS_TRANSPARENT_SHADOW)) {
+                                       /* This tells Embree to continue tracing. */
+                                       *args->valid = 0;
+                               }
+                       }
+                       else {
+                               /* Increase the number of hits beyond ray.max_hits
+                                * so that the caller can detect this as opaque. */
+                               ++ctx->num_hits;
+                       }
+                       break;
+               }
+               case CCLIntersectContext::RAY_SSS: {
+                       /* No intersection information requested, just return a hit. */
+                       if(ctx->max_hits == 0) {
+                               break;
+                       }
+
+                       /* See triangle_intersect_subsurface() for the native equivalent. */
+                       for(int i = min(ctx->max_hits, ctx->ss_isect->num_hits) - 1; i >= 0; --i) {
+                               if(ctx->ss_isect->hits[i].t == ray->tfar) {
+                                       /* This tells Embree to continue tracing. */
+                                       *args->valid = 0;
+                                       break;
+                               }
+                       }
+
+                       ++ctx->ss_isect->num_hits;
+                       int hit_idx;
+
+                       if(ctx->ss_isect->num_hits <= ctx->max_hits) {
+                               hit_idx = ctx->ss_isect->num_hits - 1;
+                       }
+                       else {
+                               /* reservoir sampling: if we are at the maximum number of
+                                * hits, randomly replace element or skip it */
+                               hit_idx = lcg_step_uint(ctx->lcg_state) % ctx->ss_isect->num_hits;
+
+                               if(hit_idx >= ctx->max_hits) {
+                                       /* This tells Embree to continue tracing. */
+                                       *args->valid = 0;
+                                       break;
+                               }
+                       }
+                       /* record intersection */
+                       kernel_embree_convert_local_hit(kg, ray, hit, &ctx->ss_isect->hits[hit_idx], ctx->sss_object_id);
+                       ctx->ss_isect->Ng[hit_idx].x = hit->Ng_x;
+                       ctx->ss_isect->Ng[hit_idx].y = hit->Ng_y;
+                       ctx->ss_isect->Ng[hit_idx].z = hit->Ng_z;
+                       ctx->ss_isect->Ng[hit_idx] = normalize(ctx->ss_isect->Ng[hit_idx]);
+                       /* This tells Embree to continue tracing .*/
+                       *args->valid = 0;
+                       break;
+               }
+               case CCLIntersectContext::RAY_VOLUME_ALL: {
+                       /* Append the intersection to the end of the array. */
+                       if(ctx->num_hits < ctx->max_hits) {
+                               Intersection current_isect;
+                               kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+                               for(size_t i = 0; i < ctx->max_hits; ++i) {
+                                       if(current_isect.object == ctx->isect_s[i].object &&
+                                          current_isect.prim == ctx->isect_s[i].prim &&
+                                          current_isect.t == ctx->isect_s[i].t) {
+                                               /* This intersection was already recorded, skip it. */
+                                               *args->valid = 0;
+                                               break;
+                                       }
+                               }
+                               Intersection *isect = &ctx->isect_s[ctx->num_hits];
+                               ++ctx->num_hits;
+                               *isect = current_isect;
+                               /* Only primitives from volume object. */
+                               uint tri_object = (isect->object == OBJECT_NONE) ?
+                                                                  kernel_tex_fetch(__prim_object, isect->prim) : isect->object;
+                               int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                               if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                                       --ctx->num_hits;
+                               }
+                               /* This tells Embree to continue tracing. */
+                               *args->valid = 0;
+                               break;
+                       }
+               }
+               case CCLIntersectContext::RAY_REGULAR:
+               default:
+                       /* Nothing to do here. */
+                       break;
+       }
+}
+
+static size_t unaccounted_mem = 0;
+
+static bool rtc_memory_monitor_func(void* userPtr, const ssize_t bytes, const bool)
+{
+       Stats *stats = (Stats*)userPtr;
+       if(stats) {
+               if(bytes > 0) {
+                       stats->mem_alloc(bytes);
+               }
+               else {
+                       stats->mem_free(-bytes);
+               }
+       }
+       else {
+               /* A stats pointer may not yet be available. Keep track of the memory usage for later. */
+               if(bytes >= 0) {
+                       atomic_add_and_fetch_z(&unaccounted_mem, bytes);
+               }
+               else {
+                       atomic_sub_and_fetch_z(&unaccounted_mem, -bytes);
+               }
+       }
+       return true;
+}
+
+static void rtc_error_func(void*, enum RTCError, const char* str)
+{
+       VLOG(1) << str;
+}
+
+static double progress_start_time = 0.0f;
+
+static bool rtc_progress_func(void* user_ptr, const double n)
+{
+       Progress *progress = (Progress*)user_ptr;
+
+       if(time_dt() - progress_start_time < 0.25) {
+               return true;
+       }
+
+       string msg = string_printf("Building BVH %.0f%%", n * 100.0);
+       progress->set_substatus(msg);
+       progress_start_time = time_dt();
+
+       return !progress->get_cancel();
+}
+
+/* This is to have a shared device between all BVH instances.
+   It would be useful to actually to use a separte RTCDevice per Cycles instance\10. */
+RTCDevice BVHEmbree::rtc_shared_device = NULL;
+int BVHEmbree::rtc_shared_users = 0;
+thread_mutex BVHEmbree::rtc_shared_mutex;
+
+BVHEmbree::BVHEmbree(const BVHParams& params_, const vector<Object*>& objects_)
+: BVH(params_, objects_), scene(NULL), mem_used(0), top_level(NULL), stats(NULL),
+  curve_subdivisions(params.curve_subdivisions), build_quality(RTC_BUILD_QUALITY_REFIT),
+  use_curves(params_.curve_flags & CURVE_KN_INTERPOLATE),
+  use_ribbons(params.curve_flags & CURVE_KN_RIBBONS), dynamic_scene(true)
+{
+       _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
+       _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
+       thread_scoped_lock lock(rtc_shared_mutex);
+       if(rtc_shared_users == 0) {
+               rtc_shared_device = rtcNewDevice("verbose=0");
+               /* Check here if Embree was built with the correct flags. */
+               ssize_t ret = rtcGetDeviceProperty (rtc_shared_device,RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED);
+               if(ret != 1) {
+                       assert(0);
+                       VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED flag."\
+                                  "Ray visiblity will not work.";
+               }
+               ret = rtcGetDeviceProperty (rtc_shared_device,RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED);
+               if(ret != 1) {
+                       assert(0);
+                       VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED flag."\
+                                  "Renders may not look as expected.";
+               }
+               ret = rtcGetDeviceProperty (rtc_shared_device,RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED);
+               if(ret != 1) {
+                       assert(0);
+                       VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED flag. "\
+                                  "Line primitives will not be rendered.";
+               }
+               ret = rtcGetDeviceProperty (rtc_shared_device,RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED);
+               if(ret != 1) {
+                       assert(0);
+                       VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED flag. "\
+                                  "Triangle primitives will not be rendered.";
+               }
+               ret = rtcGetDeviceProperty (rtc_shared_device,RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED);
+               if(ret != 0) {
+                       assert(0);
+                       VLOG(1) << "Embree is compiled with the RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED flag. "\
+                                  "Renders may not look as expected.";
+               }
+       }
+       ++rtc_shared_users;
+
+       rtcSetDeviceErrorFunction(rtc_shared_device, rtc_error_func, NULL);
+
+       pack.root_index = -1;
+}
+
+BVHEmbree::~BVHEmbree()
+{
+       if(!params.top_level) {
+               destroy(scene);
+       }
+}
+
+void BVHEmbree::destroy(RTCScene scene)
+{
+       if(scene) {
+               rtcReleaseScene(scene);
+               scene = NULL;
+       }
+       thread_scoped_lock lock(rtc_shared_mutex);
+       --rtc_shared_users;
+       if(rtc_shared_users == 0) {
+               rtcReleaseDevice (rtc_shared_device);
+               rtc_shared_device = NULL;
+       }
+}
+
+void BVHEmbree::delete_rtcScene()
+{
+       if(scene) {
+               /* When this BVH is used as an instance in a top level BVH, don't delete now
+                * Let the top_level BVH know that it should delete it later. */
+               if(top_level) {
+                       top_level->add_delayed_delete_scene(scene);
+               }
+               else {
+                       rtcReleaseScene(scene);
+                       if(delayed_delete_scenes.size()) {
+                               foreach(RTCScene s, delayed_delete_scenes) {
+                                       rtcReleaseScene(s);
+                               }
+                       }
+                       delayed_delete_scenes.clear();
+               }
+               scene = NULL;
+       }
+}
+
+void BVHEmbree::build(Progress& progress, Stats *stats_)
+{
+       assert(rtc_shared_device);
+       stats = stats_;
+       rtcSetDeviceMemoryMonitorFunction(rtc_shared_device, rtc_memory_monitor_func, stats);
+
+       progress.set_substatus("Building BVH");
+
+       if(scene) {
+               rtcReleaseScene(scene);
+               scene = NULL;
+       }
+
+       const bool dynamic = params.bvh_type == SceneParams::BVH_DYNAMIC;
+
+       scene = rtcNewScene(rtc_shared_device);
+       const RTCSceneFlags scene_flags = (dynamic ? RTC_SCENE_FLAG_DYNAMIC : RTC_SCENE_FLAG_NONE) |
+                                          RTC_SCENE_FLAG_COMPACT | RTC_SCENE_FLAG_ROBUST;
+       rtcSetSceneFlags(scene, scene_flags);
+       build_quality = dynamic ? RTC_BUILD_QUALITY_LOW :
+                      (params.use_spatial_split ? RTC_BUILD_QUALITY_HIGH : RTC_BUILD_QUALITY_MEDIUM);
+       rtcSetSceneBuildQuality(scene, build_quality);
+
+       int i = 0;
+
+       pack.object_node.clear();
+
+       foreach(Object *ob, objects) {
+               if(params.top_level) {
+                       if(!ob->is_traceable()) {
+                               ++i;
+                               continue;
+                       }
+                       if(!ob->mesh->is_instanced()) {
+                               add_object(ob, i);
+                       }
+                       else {
+                               add_instance(ob, i);
+                       }
+               }
+               else {
+                       add_object(ob, i);
+               }
+               ++i;
+               if(progress.get_cancel()) return;
+       }
+
+       if(progress.get_cancel()) {
+               delete_rtcScene();
+               stats = NULL;
+               return;
+       }
+
+       rtcSetSceneProgressMonitorFunction(scene, rtc_progress_func, &progress);
+       rtcCommitScene(scene);
+
+       pack_primitives();
+
+       if(progress.get_cancel()) {
+               delete_rtcScene();
+               stats = NULL;
+               return;
+       }
+
+       progress.set_substatus("Packing geometry");
+       pack_nodes(NULL);
+
+       stats = NULL;
+}
+
+void BVHEmbree::add_object(Object *ob, int i)
+{
+       Mesh *mesh = ob->mesh;
+       if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && mesh->num_triangles() > 0) {
+               add_triangles(ob, i);
+       }
+       if(params.primitive_mask & PRIMITIVE_ALL_CURVE && mesh->num_curves() > 0) {
+               add_curves(ob, i);
+       }
+}
+
+void BVHEmbree::add_instance(Object *ob, int i)
+{
+       if(!ob || !ob->mesh) {
+               assert(0);
+               return;
+       }
+       BVHEmbree *instance_bvh = (BVHEmbree*)(ob->mesh->bvh);
+
+       if(instance_bvh->top_level != this) {
+               instance_bvh->top_level = this;
+       }
+
+       const size_t num_motion_steps = ob->use_motion() ? ob->motion.size() : 1;
+       RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, RTC_GEOMETRY_TYPE_INSTANCE);
+       rtcSetGeometryInstancedScene(geom_id, instance_bvh->scene);
+       rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
+
+       if(ob->use_motion()) {
+               for(size_t step = 0; step < num_motion_steps; ++step) {
+                       rtcSetGeometryTransform(geom_id, step, RTC_FORMAT_FLOAT3X4_ROW_MAJOR, (const float*)&ob->motion[step]);
+               }
+       }
+       else {
+               rtcSetGeometryTransform(geom_id, 0, RTC_FORMAT_FLOAT3X4_ROW_MAJOR, (const float*)&ob->tfm);
+       }
+
+       pack.prim_index.push_back_slow(-1);
+       pack.prim_object.push_back_slow(i);
+       pack.prim_type.push_back_slow(PRIMITIVE_NONE);
+       pack.prim_tri_index.push_back_slow(-1);
+
+       rtcSetGeometryUserData(geom_id, (void*) instance_bvh->scene);
+       rtcSetGeometryMask(geom_id, ob->visibility);
+
+       rtcCommitGeometry(geom_id);
+       rtcAttachGeometryByID(scene, geom_id, i*2);
+       rtcReleaseGeometry(geom_id);
+}
+
+void BVHEmbree::add_triangles(Object *ob, int i)
+{
+       size_t prim_offset = pack.prim_index.size();
+       Mesh *mesh = ob->mesh;
+       const Attribute *attr_mP = NULL;
+       size_t num_motion_steps = 1;
+       if(mesh->has_motion_blur()) {
+               attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+               if(attr_mP) {
+                       num_motion_steps = mesh->motion_steps;
+                       if(num_motion_steps > RTC_MAX_TIME_STEP_COUNT) {
+                               assert(0);
+                               num_motion_steps = RTC_MAX_TIME_STEP_COUNT;
+                       }
+               }
+       }
+
+       const size_t num_triangles = mesh->num_triangles();
+       RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, RTC_GEOMETRY_TYPE_TRIANGLE);
+       rtcSetGeometryBuildQuality(geom_id, build_quality);
+       rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
+
+       unsigned *rtc_indices = (unsigned*)rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_INDEX, 0,
+                                                                  RTC_FORMAT_UINT3, sizeof (int) * 3, num_triangles);
+       assert(rtc_indices);
+       if(!rtc_indices) {
+               VLOG(1) << "Embree could not create new geometry buffer for mesh " << mesh->name.c_str() << ".\n";
+               return;
+       }
+       for(size_t j = 0; j < num_triangles; ++j) {
+               Mesh::Triangle t = mesh->get_triangle(j);
+               rtc_indices[j*3] = t.v[0];
+               rtc_indices[j*3+1] = t.v[1];
+               rtc_indices[j*3+2] = t.v[2];
+       }
+
+       update_tri_vertex_buffer(geom_id, mesh);
+
+       pack.prim_object.reserve(pack.prim_object.size() + num_triangles);
+       pack.prim_type.reserve(pack.prim_type.size() + num_triangles);
+       pack.prim_index.reserve(pack.prim_index.size() + num_triangles);
+       pack.prim_tri_index.reserve(pack.prim_index.size() + num_triangles);
+       for(size_t j = 0; j < num_triangles; ++j) {
+               pack.prim_object.push_back_reserved(i);
+               pack.prim_type.push_back_reserved(num_motion_steps > 1 ? PRIMITIVE_MOTION_TRIANGLE : PRIMITIVE_TRIANGLE);
+               pack.prim_index.push_back_reserved(j);
+               pack.prim_tri_index.push_back_reserved(j);
+       }
+
+       rtcSetGeometryUserData(geom_id, (void*) prim_offset);
+       rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func);
+       rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
+       rtcSetGeometryMask(geom_id, ob->visibility);
+
+       rtcCommitGeometry(geom_id);
+       rtcAttachGeometryByID(scene, geom_id, i*2);
+       rtcReleaseGeometry(geom_id);
+}
+
+void BVHEmbree::update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh)
+{
+       const Attribute *attr_mP = NULL;
+       size_t num_motion_steps = 1;
+       int t_mid = 0;
+       if(mesh->has_motion_blur()) {
+               attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+               if(attr_mP) {
+                       num_motion_steps = mesh->motion_steps;
+                       t_mid = (num_motion_steps - 1) / 2;
+                       if(num_motion_steps > RTC_MAX_TIME_STEP_COUNT) {
+                               assert(0);
+                               num_motion_steps = RTC_MAX_TIME_STEP_COUNT;
+                       }
+               }
+       }
+       const size_t num_verts = mesh->verts.size();
+
+       for(int t = 0; t < num_motion_steps; ++t) {
+               const float3 *verts;
+               if(t == t_mid) {
+                       verts = &mesh->verts[0];
+               }
+               else {
+                       int t_ = (t > t_mid) ? (t - 1) : t;
+                       verts = &attr_mP->data_float3()[t_ * num_verts];
+               }
+
+               float *rtc_verts = (float*) rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t,
+                                                                   RTC_FORMAT_FLOAT3, sizeof(float) * 3, num_verts + 1);
+               assert(rtc_verts);
+               if(rtc_verts) {
+                       for(size_t j = 0; j < num_verts; ++j) {
+                               rtc_verts[0] = verts[j].x;
+                               rtc_verts[1] = verts[j].y;
+                               rtc_verts[2] = verts[j].z;
+                               rtc_verts += 3;
+                       }
+               }
+       }
+}
+
+void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh)
+{
+       const Attribute *attr_mP = NULL;
+       size_t num_motion_steps = 1;
+       if(mesh->has_motion_blur()) {
+               attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+               if(attr_mP) {
+                       num_motion_steps = mesh->motion_steps;
+               }
+       }
+       
+       const size_t num_curves = mesh->num_curves();
+       size_t num_keys = 0;
+       for(size_t j = 0; j < num_curves; ++j) {
+               const Mesh::Curve c = mesh->get_curve(j);
+               num_keys += c.num_keys;
+       }
+
+       /* Copy the CV data to Embree */
+       const int t_mid = (num_motion_steps - 1) / 2;
+       const float *curve_radius = &mesh->curve_radius[0];
+       for(int t = 0; t < num_motion_steps; ++t) {
+               const float3 *verts;
+               if(t == t_mid || attr_mP == NULL) {
+                       verts = &mesh->curve_keys[0];
+               }
+               else {
+                       int t_ = (t > t_mid) ? (t - 1) : t;
+                       verts = &attr_mP->data_float3()[t_ * num_keys];
+               }
+
+               float4 *rtc_verts = (float4*)rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t,
+                                                                    RTC_FORMAT_FLOAT4, sizeof (float) * 4, num_keys);
+               float4 *rtc_tangents = NULL;
+               if(use_curves) {
+                       rtc_tangents = (float4*)rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_TANGENT, t,
+                                                                                                                               RTC_FORMAT_FLOAT4, sizeof (float) * 4, num_keys);
+                       assert(rtc_tangents);
+               }
+               assert(rtc_verts);
+               if(rtc_verts) {
+                       if(use_curves && rtc_tangents) {
+                               const size_t num_curves = mesh->num_curves();
+                               for(size_t j = 0; j < num_curves; ++j) {
+                                       Mesh::Curve c = mesh->get_curve(j);
+                                       int fk = c.first_key;
+                                       rtc_verts[0] = float3_to_float4(verts[fk]);
+                                       rtc_verts[0].w = curve_radius[fk];
+                                       rtc_tangents[0] = float3_to_float4(verts[fk + 1] - verts[fk]);
+                                       rtc_tangents[0].w = curve_radius[fk + 1] - curve_radius[fk];
+                                       ++fk;
+                                       int k = 1;
+                                       for(;k < c.num_segments(); ++k, ++fk) {
+                                               rtc_verts[k] = float3_to_float4(verts[fk]);
+                                               rtc_verts[k].w = curve_radius[fk];
+                                               rtc_tangents[k] = float3_to_float4((verts[fk + 1] - verts[fk - 1]) * 0.5f);
+                                               rtc_tangents[k].w = (curve_radius[fk + 1] - curve_radius[fk - 1]) * 0.5f;
+                                       }
+                                       rtc_verts[k] = float3_to_float4(verts[fk]);
+                                       rtc_verts[k].w = curve_radius[fk];
+                                       rtc_tangents[k] = float3_to_float4(verts[fk] - verts[fk - 1]);
+                                       rtc_tangents[k].w = curve_radius[fk] - curve_radius[fk - 1];
+                                       rtc_verts += c.num_keys;
+                                       rtc_tangents += c.num_keys;
+                               }
+                       }
+                       else {
+                               for(size_t j = 0; j < num_keys; ++j) {
+                                       rtc_verts[j] = float3_to_float4(verts[j]);
+                                       rtc_verts[j].w = curve_radius[j];
+                               }
+                       }
+               }
+       }
+}
+
+void BVHEmbree::add_curves(Object *ob, int i)
+{
+       size_t prim_offset = pack.prim_index.size();
+       const Mesh *mesh = ob->mesh;
+       const Attribute *attr_mP = NULL;
+       size_t num_motion_steps = 1;
+       if(mesh->has_motion_blur()) {
+               attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+               if(attr_mP) {
+                       num_motion_steps = mesh->motion_steps;
+               }
+       }
+
+       const size_t num_curves = mesh->num_curves();
+       size_t num_segments = 0;
+       for(size_t j = 0; j < num_curves; ++j) {
+               Mesh::Curve c = mesh->get_curve(j);
+               assert(c.num_segments() > 0);
+               num_segments += c.num_segments();
+       }
+
+       /* Make room for Cycles specific data. */
+       pack.prim_object.reserve(pack.prim_object.size() + num_segments);
+       pack.prim_type.reserve(pack.prim_type.size() + num_segments);
+       pack.prim_index.reserve(pack.prim_index.size() + num_segments);
+       pack.prim_tri_index.reserve(pack.prim_index.size() + num_segments);
+
+       enum RTCGeometryType type = (!use_curves) ? RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE :
+                                   (use_ribbons ? RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE :
+                                                  RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE);
+
+       RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, type);
+       rtcSetGeometryTessellationRate(geom_id, curve_subdivisions);
+       unsigned *rtc_indices = (unsigned*) rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_INDEX, 0,
+                                                                                                                               RTC_FORMAT_UINT, sizeof (int), num_segments);
+       size_t rtc_index = 0;
+       for(size_t j = 0; j < num_curves; ++j) {
+               Mesh::Curve c = mesh->get_curve(j);
+               for(size_t k = 0; k < c.num_segments(); ++k) {
+                       rtc_indices[rtc_index] = c.first_key + k;
+                       /* Cycles specific data. */
+                       pack.prim_object.push_back_reserved(i);
+                       pack.prim_type.push_back_reserved(PRIMITIVE_PACK_SEGMENT(num_motion_steps > 1 ?
+                                                                                                                                        PRIMITIVE_MOTION_CURVE : PRIMITIVE_CURVE, k));
+                       pack.prim_index.push_back_reserved(j);
+                       pack.prim_tri_index.push_back_reserved(rtc_index);
+
+                       ++rtc_index;
+               }
+       }
+
+       rtcSetGeometryBuildQuality(geom_id, build_quality);
+       rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
+
+       update_curve_vertex_buffer(geom_id, mesh);
+
+       rtcSetGeometryUserData(geom_id, (void*) prim_offset);
+       rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func);
+       rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
+       rtcSetGeometryMask(geom_id, ob->visibility);
+
+       rtcCommitGeometry(geom_id);
+       rtcAttachGeometryByID(scene, geom_id, i * 2 + 1);
+       rtcReleaseGeometry(geom_id);
+}
+
+void BVHEmbree::pack_nodes(const BVHNode *)
+{
+       /* Quite a bit of this code is for compatibility with Cycles' native BVH. */
+       if(!params.top_level) {
+               return;
+       }
+
+       for(size_t i = 0; i < pack.prim_index.size(); ++i) {
+               if(pack.prim_index[i] != -1) {
+                       if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
+                               pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->curve_offset;
+                       else
+                               pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->tri_offset;
+               }
+       }
+
+       size_t prim_offset = pack.prim_index.size();
+
+       /* reserve */
+       size_t prim_index_size = pack.prim_index.size();
+       size_t prim_tri_verts_size = pack.prim_tri_verts.size();
+
+       size_t pack_prim_index_offset = prim_index_size;
+       size_t pack_prim_tri_verts_offset = prim_tri_verts_size;
+       size_t object_offset = 0;
+
+       map<Mesh*, int> mesh_map;
+
+       foreach(Object *ob, objects) {
+               Mesh *mesh = ob->mesh;
+               BVH *bvh = mesh->bvh;
+
+               if(mesh->need_build_bvh()) {
+                       if(mesh_map.find(mesh) == mesh_map.end()) {
+                               prim_index_size += bvh->pack.prim_index.size();
+                               prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
+                               mesh_map[mesh] = 1;
+                       }
+               }
+       }
+
+       mesh_map.clear();
+
+       pack.prim_index.resize(prim_index_size);
+       pack.prim_type.resize(prim_index_size);
+       pack.prim_object.resize(prim_index_size);
+       pack.prim_visibility.clear();
+       pack.prim_tri_verts.resize(prim_tri_verts_size);
+       pack.prim_tri_index.resize(prim_index_size);
+       pack.object_node.resize(objects.size());
+
+       int *pack_prim_index = (pack.prim_index.size())? &pack.prim_index[0]: NULL;
+       int *pack_prim_type = (pack.prim_type.size())? &pack.prim_type[0]: NULL;
+       int *pack_prim_object = (pack.prim_object.size())? &pack.prim_object[0]: NULL;
+       float4 *pack_prim_tri_verts = (pack.prim_tri_verts.size())? &pack.prim_tri_verts[0]: NULL;
+       uint *pack_prim_tri_index = (pack.prim_tri_index.size())? &pack.prim_tri_index[0]: NULL;
+
+       /* merge */
+       foreach(Object *ob, objects) {
+               Mesh *mesh = ob->mesh;
+
+               /* We assume that if mesh doesn't need own BVH it was already included
+                * into a top-level BVH and no packing here is needed.
+                */
+               if(!mesh->need_build_bvh()) {
+                       pack.object_node[object_offset++] = prim_offset;
+                       continue;
+               }
+
+               /* if mesh already added once, don't add it again, but used set
+                * node offset for this object */
+               map<Mesh*, int>::iterator it = mesh_map.find(mesh);
+
+               if(mesh_map.find(mesh) != mesh_map.end()) {
+                       int noffset = it->second;
+                       pack.object_node[object_offset++] = noffset;
+                       continue;
+               }
+
+               BVHEmbree *bvh = (BVHEmbree*)mesh->bvh;
+
+               rtc_memory_monitor_func(stats, unaccounted_mem, true);
+               unaccounted_mem = 0;
+
+               int mesh_tri_offset = mesh->tri_offset;
+               int mesh_curve_offset = mesh->curve_offset;
+
+               /* fill in node indexes for instances */
+               pack.object_node[object_offset++] = prim_offset;
+
+               mesh_map[mesh] = pack.object_node[object_offset-1];
+
+               /* merge primitive, object and triangle indexes */
+               if(bvh->pack.prim_index.size()) {
+                       size_t bvh_prim_index_size = bvh->pack.prim_index.size();
+                       int *bvh_prim_index = &bvh->pack.prim_index[0];
+                       int *bvh_prim_type = &bvh->pack.prim_type[0];
+                       uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0];
+
+                       for(size_t i = 0; i < bvh_prim_index_size; ++i) {
+                               if(bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
+                                       pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset;
+                                       pack_prim_tri_index[pack_prim_index_offset] = -1;
+                               }
+                               else {
+                                       pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset;
+                                       pack_prim_tri_index[pack_prim_index_offset] =
+                                       bvh_prim_tri_index[i] + pack_prim_tri_verts_offset;
+                               }
+
+                               pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
+                               pack_prim_object[pack_prim_index_offset] = 0;
+
+                               ++pack_prim_index_offset;
+                       }
+               }
+
+               /* Merge triangle vertices data. */
+               if(bvh->pack.prim_tri_verts.size()) {
+                       const size_t prim_tri_size = bvh->pack.prim_tri_verts.size();
+                       memcpy(pack_prim_tri_verts + pack_prim_tri_verts_offset,
+                                  &bvh->pack.prim_tri_verts[0],
+                                  prim_tri_size*sizeof(float4));
+                       pack_prim_tri_verts_offset += prim_tri_size;
+               }
+
+               prim_offset += bvh->pack.prim_index.size();
+       }
+}
+
+void BVHEmbree::refit_nodes()
+{
+       /* Update all vertex buffers, then tell Embree to rebuild/-fit the BVHs. */
+       unsigned geom_id = 0;
+       foreach(Object *ob, objects) {
+               if(!params.top_level || (ob->is_traceable() && !ob->mesh->is_instanced())) {
+                       if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && ob->mesh->num_triangles() > 0) {
+                               update_tri_vertex_buffer(rtcGetGeometry(scene, geom_id), ob->mesh);
+                               rtcCommitGeometry(rtcGetGeometry(scene,geom_id));
+                       }
+
+                       if(params.primitive_mask & PRIMITIVE_ALL_CURVE && ob->mesh->num_curves() > 0) {
+                               update_curve_vertex_buffer(rtcGetGeometry(scene, geom_id+1), ob->mesh);
+                               rtcCommitGeometry(rtcGetGeometry(scene,geom_id+1));
+                       }
+               }
+               geom_id += 2;
+       }
+       rtcCommitScene(scene);
+}
+CCL_NAMESPACE_END
+
+#endif /* WITH_EMBREE */
diff --git a/intern/cycles/bvh/bvh_embree.h b/intern/cycles/bvh/bvh_embree.h
new file mode 100644 (file)
index 0000000..4b3219d
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2018, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH_EMBREE_H__
+#define __BVH_EMBREE_H__
+
+#ifdef WITH_EMBREE
+
+#include <embree3/rtcore.h>
+#include <embree3/rtcore_scene.h>
+
+#include "bvh/bvh.h"
+#include "bvh/bvh_params.h"
+
+#include "util/util_thread.h"
+#include "util/util_types.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Mesh;
+
+class BVHEmbree : public BVH
+{
+public:
+       virtual void build(Progress& progress, Stats *stats) override;
+       virtual ~BVHEmbree();
+       RTCScene scene;
+       static void destroy(RTCScene);
+protected:
+       friend class BVH;
+       BVHEmbree(const BVHParams& params, const vector<Object*>& objects);
+
+       virtual void pack_nodes(const BVHNode*) override;
+       virtual void refit_nodes() override;
+
+       void add_object(Object *ob, int i);
+       void add_instance(Object *ob, int i);
+       void add_curves(Object *ob, int i);
+       void add_triangles(Object *ob, int i);
+
+       ssize_t mem_used;
+
+       void add_delayed_delete_scene(RTCScene scene) { delayed_delete_scenes.push_back(scene); }
+       BVHEmbree *top_level;
+private:
+       void delete_rtcScene();
+       void update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh);
+       void update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh);
+
+       static RTCDevice rtc_shared_device;
+       static int rtc_shared_users;
+       static thread_mutex rtc_shared_mutex;
+
+       Stats *stats;
+       vector<RTCScene> delayed_delete_scenes;
+       int curve_subdivisions;
+       enum RTCBuildQuality build_quality;
+       bool use_curves, use_ribbons, dynamic_scene;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_EMBREE */
+
+#endif /* __BVH_EMBREE_H__ */
index d8dd7df..ebede6a 100644 (file)
@@ -90,6 +90,13 @@ public:
        /* Same as above, but for triangle primitives. */
        int num_motion_triangle_steps;
 
+       /* Same as in SceneParams. */
+       int bvh_type;
+
+       /* These are needed for Embree. */
+       int curve_flags;
+       int curve_subdivisions;
+
        /* fixed parameters */
        enum {
                MAX_DEPTH = 64,
@@ -123,6 +130,11 @@ public:
 
                num_motion_curve_steps = 0;
                num_motion_triangle_steps = 0;
+
+               bvh_type = 0;
+
+               curve_flags = 0;
+               curve_subdivisions = 4;
        }
 
        /* SAH costs */
index 2e386a6..d0f473a 100644 (file)
@@ -132,6 +132,12 @@ if(CYCLES_STANDALONE_REPOSITORY)
        set(BOOST_LIBPATH ${Boost_LIBRARY_DIRS})
        set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB")
 
+       ####
+       # embree
+       if(WITH_CYCLES_EMBREE)
+               find_package(embree 3.2.4 REQUIRED)
+       endif()
+
        ####
        # Logging
        if(WITH_CYCLES_LOGGING)
index 731d6c0..9e27a48 100644 (file)
@@ -286,6 +286,9 @@ public:
                if(DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
                        bvh_layout_mask |= BVH_LAYOUT_BVH8;
                }
+#ifdef WITH_EMBREE
+               bvh_layout_mask |= BVH_LAYOUT_EMBREE;
+#endif /* WITH_EMBREE */
                return bvh_layout_mask;
        }
 
@@ -702,6 +705,9 @@ public:
                int start_sample = tile.start_sample;
                int end_sample = tile.start_sample + tile.num_samples;
 
+               _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
+               _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
+               
                for(int sample = start_sample; sample < end_sample; sample++) {
                        if(task.get_cancel() || task_pool.canceled()) {
                                if(task.need_finish_queue == false)
index 08efede..92cb66b 100644 (file)
@@ -82,6 +82,7 @@ set(SRC_BVH_HEADERS
        bvh/obvh_traversal.h
        bvh/obvh_volume.h
        bvh/obvh_volume_all.h
+       bvh/bvh_embree.h
 )
 
 set(SRC_HEADERS
index d11d999..0a5998a 100644 (file)
  * the code has been extended and modified to support more primitives and work
  * with CPU/CUDA/OpenCL. */
 
+#ifdef __EMBREE__
+#include "kernel/bvh/bvh_embree.h"
+#endif
+
 CCL_NAMESPACE_BEGIN
 
 #include "kernel/bvh/bvh_types.h"
@@ -185,6 +189,21 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
        if (!scene_intersect_valid(&ray)) {
                return false;
        }
+#ifdef __EMBREE__
+       if(kernel_data.bvh.scene) {
+               isect->t = ray.t;
+               CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
+               IntersectContext rtc_ctx(&ctx);
+               RTCRayHit ray_hit;
+               kernel_embree_setup_rayhit(ray, ray_hit, visibility);
+               rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
+               if(ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID && ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
+                       kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
+                       return true;
+               }
+               return false;
+       }
+#endif /* __EMBREE__ */
 #ifdef __OBJECT_MOTION__
        if(kernel_data.bvh.have_motion) {
 #  ifdef __HAIR__
@@ -232,6 +251,55 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
        if (!scene_intersect_valid(&ray)) {
                return false;
        }
+#ifdef __EMBREE__
+       if(kernel_data.bvh.scene) {
+               CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SSS);
+               ctx.lcg_state = lcg_state;
+               ctx.max_hits = max_hits;
+               ctx.ss_isect = local_isect;
+               local_isect->num_hits = 0;
+               ctx.sss_object_id = local_object;
+               IntersectContext rtc_ctx(&ctx);
+               RTCRay rtc_ray;
+               kernel_embree_setup_ray(ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
+
+               /* Get the Embree scene for this intersection. */
+               RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
+               if(geom) {
+                       Transform ob_itfm;
+                       float3 P = ray.P;
+                       float3 dir = ray.D;
+                       float3 idir = ray.D;
+                       const int object_flag = kernel_tex_fetch(__object_flag, local_object);
+                       if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+                               Transform ob_itfm;
+                               rtc_ray.tfar = bvh_instance_motion_push(kg,
+                                                                                                  local_object,
+                                                                                                  &ray,
+                                                                                                  &P,
+                                                                                                  &dir,
+                                                                                                  &idir,
+                                                                                                  ray.t,
+                                                                                                  &ob_itfm);
+                               /* bvh_instance_motion_push() returns the inverse transform but it's not needed here. */
+                               (void)ob_itfm;
+
+                               rtc_ray.org_x = P.x;
+                               rtc_ray.org_y = P.y;
+                               rtc_ray.org_z = P.z;
+                               rtc_ray.dir_x = dir.x;
+                               rtc_ray.dir_y = dir.y;
+                               rtc_ray.dir_z = dir.z;
+                       }
+                       RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
+                       if(scene) {
+                               rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
+                       }
+               }
+
+               return local_isect->num_hits > 0;
+       }
+#endif /* __EMBREE__ */
 #ifdef __OBJECT_MOTION__
        if(kernel_data.bvh.have_motion) {
                return bvh_intersect_local_motion(kg,
@@ -262,6 +330,24 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
        if (!scene_intersect_valid(ray)) {
                return false;
        }
+#  ifdef __EMBREE__
+       if(kernel_data.bvh.scene) {
+               CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
+               ctx.isect_s = isect;
+               ctx.max_hits = max_hits;
+               ctx.num_hits = 0;
+               IntersectContext rtc_ctx(&ctx);
+               RTCRay rtc_ray;
+               kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_SHADOW);
+               rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
+
+               if(ctx.num_hits > max_hits) {
+                       return true;
+               }
+               *num_hits = ctx.num_hits;
+               return rtc_ray.tfar == -INFINITY;
+       }
+#  endif
 #  ifdef __OBJECT_MOTION__
        if(kernel_data.bvh.have_motion) {
 #    ifdef __HAIR__
@@ -355,6 +441,19 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
        if (!scene_intersect_valid(ray)) {
                return false;
        }
+#  ifdef __EMBREE__
+       if(kernel_data.bvh.scene) {
+               CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
+               ctx.isect_s = isect;
+               ctx.max_hits = max_hits;
+               ctx.num_hits = 0;
+               IntersectContext rtc_ctx(&ctx);
+               RTCRay rtc_ray;
+               kernel_embree_setup_ray(*ray, rtc_ray, visibility);
+               rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
+               return rtc_ray.tfar == -INFINITY;
+       }
+#  endif
 #  ifdef __OBJECT_MOTION__
        if(kernel_data.bvh.have_motion) {
                return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h
new file mode 100644 (file)
index 0000000..34a099e
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2018, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <embree3/rtcore_ray.h>
+#include <embree3/rtcore_scene.h>
+
+#include "kernel/kernel_compat_cpu.h"
+#include "kernel/split/kernel_split_data_types.h"
+#include "kernel/kernel_globals.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+struct CCLIntersectContext  {
+       typedef enum {
+               RAY_REGULAR = 0,
+               RAY_SHADOW_ALL = 1,
+               RAY_SSS = 2,
+               RAY_VOLUME_ALL = 3,
+               
+       } RayType;
+
+       KernelGlobals *kg;
+       RayType type;
+
+       /* for shadow rays */
+       Intersection *isect_s;
+       int max_hits;
+       int num_hits;
+
+       /* for SSS Rays: */
+       LocalIntersection *ss_isect;
+       int sss_object_id;
+       uint *lcg_state;
+
+       CCLIntersectContext(KernelGlobals *kg_,  RayType type_)
+       {
+               kg = kg_;
+               type = type_;
+               max_hits = 1;
+               num_hits = 0;
+               isect_s = NULL;
+               ss_isect = NULL;
+               sss_object_id = -1;
+               lcg_state = NULL;
+       }
+};
+
+class IntersectContext
+{
+public:
+       IntersectContext(CCLIntersectContext* ctx)
+       {
+               rtcInitIntersectContext(&context);
+               userRayExt = ctx;
+       }
+       RTCIntersectContext context;
+       CCLIntersectContext* userRayExt;
+};
+
+ccl_device_inline void kernel_embree_setup_ray(const Ray& ray, RTCRay& rtc_ray, const uint visibility)
+{
+       rtc_ray.org_x = ray.P.x;
+       rtc_ray.org_y = ray.P.y;
+       rtc_ray.org_z = ray.P.z;
+       rtc_ray.dir_x = ray.D.x;
+       rtc_ray.dir_y = ray.D.y;
+       rtc_ray.dir_z = ray.D.z;
+       rtc_ray.tnear = 0.0f;
+       rtc_ray.tfar = ray.t;
+       rtc_ray.time = ray.time;
+       rtc_ray.mask = visibility;
+}
+
+ccl_device_inline void kernel_embree_setup_rayhit(const Ray& ray, RTCRayHit& rayhit, const uint visibility)
+{
+       kernel_embree_setup_ray(ray, rayhit.ray, visibility);
+       rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
+       rayhit.hit.primID = RTC_INVALID_GEOMETRY_ID;
+}
+
+ccl_device_inline void kernel_embree_convert_hit(KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect)
+{
+       bool is_hair = hit->geomID & 1;
+       isect->u = is_hair ? hit->u : 1.0f - hit->v - hit->u;
+       isect->v = is_hair ? hit->v : hit->u;
+       isect->t = ray->tfar;
+       isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
+       if(hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
+               RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
+               isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) + kernel_tex_fetch(__object_node, hit->instID[0]/2);
+               isect->object = hit->instID[0]/2;
+       }
+       else {
+               isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
+               isect->object = OBJECT_NONE;
+       }
+       isect->type = kernel_tex_fetch(__prim_type, isect->prim);
+}
+
+ccl_device_inline void kernel_embree_convert_local_hit(KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int local_object_id)
+{
+       isect->u = 1.0f - hit->v - hit->u;
+       isect->v = hit->u;
+       isect->t = ray->tfar;
+       isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
+       RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, local_object_id * 2));
+       isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) + kernel_tex_fetch(__object_node, local_object_id);
+       isect->object = local_object_id;
+       isect->type = kernel_tex_fetch(__prim_type, isect->prim);
+}
+
+CCL_NAMESPACE_END
index 4cfbe21..5901429 100644 (file)
@@ -817,16 +817,24 @@ ccl_device_inline float3 curve_refine(KernelGlobals *kg,
                        sd->Ng = normalize(-(D - tg * (dot(tg, D))));
                }
                else {
-                       /* direction from inside to surface of curve */
-                       float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]);
-                       sd->Ng = normalize(P - p_curr);
+#ifdef __EMBREE__
+                       if(kernel_data.bvh.scene) {
+                               sd->Ng = normalize(isect->Ng);
+                       }
+                       else
+#endif
+                       {
+                               /* direction from inside to surface of curve */
+                               float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]);
+                               sd->Ng = normalize(P - p_curr);
 
-                       /* adjustment for changing radius */
-                       float gd = isect->v;
+                               /* adjustment for changing radius */
+                               float gd = isect->v;
 
-                       if(gd != 0.0f) {
-                               sd->Ng = sd->Ng - gd * tg;
-                               sd->Ng = normalize(sd->Ng);
+                               if(gd != 0.0f) {
+                                       sd->Ng = sd->Ng - gd * tg;
+                                       sd->Ng = normalize(sd->Ng);
+                               }
                        }
                }
 
index 0eb8ce2..669c932 100644 (file)
@@ -78,6 +78,12 @@ ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int
        const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1;
 
        Transform tfm;
+#ifdef __EMBREE__
+       if(kernel_data.bvh.scene) {
+               transform_motion_array_interpolate_straight(&tfm, motion, num_steps, time);
+       }
+       else
+#endif
        transform_motion_array_interpolate(&tfm, motion, num_steps, time);
 
        return tfm;
index 230e90c..a30169c 100644 (file)
 #  define __KERNEL_CPU__
 #endif
 
+#if defined(__KERNEL_CPU__) && defined(WITH_EMBREE)
+#include <embree3/rtcore.h>
+#include <embree3/rtcore_scene.h>
+#endif
+
 /* TODO(sergey): This is only to make it possible to include this header
  * from outside of the kernel. but this could be done somewhat cleaner?
  */
@@ -97,6 +102,9 @@ CCL_NAMESPACE_BEGIN
 #  define __SHADOW_RECORD_ALL__
 #  define __VOLUME_DECOUPLED__
 #  define __VOLUME_RECORD_ALL__
+#  ifdef WITH_EMBREE
+#    define __EMBREE__
+#  endif
 #endif  /* __KERNEL_CPU__ */
 
 #ifdef __KERNEL_CUDA__
@@ -722,6 +730,9 @@ typedef struct Ray {
 /* Intersection */
 
 typedef struct Intersection {
+#ifdef __EMBREE__
+       float3 Ng;
+#endif
        float t, u, v;
        int prim;
        int object;
@@ -1396,7 +1407,7 @@ typedef enum KernelBVHLayout {
        BVH_LAYOUT_BVH2 = (1 << 0),
        BVH_LAYOUT_BVH4 = (1 << 1),
        BVH_LAYOUT_BVH8 = (1 << 2),
-
+       BVH_LAYOUT_EMBREE = (1 << 3),
        BVH_LAYOUT_DEFAULT = BVH_LAYOUT_BVH8,
        BVH_LAYOUT_ALL = (unsigned int)(-1),
 } KernelBVHLayout;
@@ -1409,7 +1420,13 @@ typedef struct KernelBVH {
        int have_instancing;
        int bvh_layout;
        int use_bvh_steps;
-       int pad1, pad2;
+       int pad1;
+#ifdef __EMBREE__
+       RTCScene scene;
+#else
+       void *unused;
+#endif
+       int pad2, pad3;
 } KernelBVH;
 static_assert_align(KernelBVH, 16);
 
index 6f61938..adc9a07 100644 (file)
 #include "util/util_progress.h"
 #include "util/util_set.h"
 
+#ifdef WITH_EMBREE
+#  include "bvh/bvh_embree.h"
+#endif
+
 CCL_NAMESPACE_BEGIN
 
 /* Triangle */
@@ -1073,6 +1077,9 @@ void Mesh::compute_bvh(Device *device,
                                                      params->use_bvh_unaligned_nodes;
                        bparams.num_motion_triangle_steps = params->num_bvh_time_steps;
                        bparams.num_motion_curve_steps = params->num_bvh_time_steps;
+                       bparams.bvh_type = params->bvh_type;
+                       bparams.curve_flags = dscene->data.curve.curveflags;
+                       bparams.curve_subdivisions = dscene->data.curve.subdivisions;
 
                        delete bvh;
                        bvh = BVH::create(bparams, objects);
@@ -1861,14 +1868,32 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *
                                      scene->params.use_bvh_unaligned_nodes;
        bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps;
        bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps;
+       bparams.bvh_type = scene->params.bvh_type;
+       bparams.curve_flags = dscene->data.curve.curveflags;
+       bparams.curve_subdivisions = dscene->data.curve.subdivisions;
 
        VLOG(1) << "Using " << bvh_layout_name(bparams.bvh_layout)
                << " layout.";
 
+#ifdef WITH_EMBREE
+       if(bparams.bvh_layout == BVH_LAYOUT_EMBREE) {
+               if(dscene->data.bvh.scene) {
+                       BVHEmbree::destroy(dscene->data.bvh.scene);
+               }
+       }
+#endif
+
        BVH *bvh = BVH::create(bparams, scene->objects);
-       bvh->build(progress);
+       bvh->build(progress, &device->stats);
 
        if(progress.get_cancel()) {
+#ifdef WITH_EMBREE
+               if(bparams.bvh_layout == BVH_LAYOUT_EMBREE) {
+                       if(dscene->data.bvh.scene) {
+                               BVHEmbree::destroy(dscene->data.bvh.scene);
+                       }
+               }
+#endif
                delete bvh;
                return;
        }
@@ -1923,6 +1948,16 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *
        dscene->data.bvh.bvh_layout = bparams.bvh_layout;
        dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
 
+
+#ifdef WITH_EMBREE
+       if(bparams.bvh_layout == BVH_LAYOUT_EMBREE) {
+               dscene->data.bvh.scene = ((BVHEmbree*)bvh)->scene;
+       }
+       else {
+               dscene->data.bvh.scene = NULL;
+       }
+#endif
+
        delete bvh;
 }
 
index 9f93fed..ccaca87 100644 (file)
@@ -214,6 +214,11 @@ void Scene::device_update(Device *device_, Progress& progress)
        progress.set_status("Updating Objects");
        object_manager->device_update(device, &dscene, this, progress);
 
+       if(progress.get_cancel() || device->have_error()) return;
+       
+       progress.set_status("Updating Hair Systems");
+       curve_system_manager->device_update(device, &dscene, this, progress);
+
        if(progress.get_cancel() || device->have_error()) return;
 
        progress.set_status("Updating Particle Systems");
@@ -240,12 +245,7 @@ void Scene::device_update(Device *device_, Progress& progress)
        camera->device_update_volume(device, &dscene, this);
 
        if(progress.get_cancel() || device->have_error()) return;
-
-       progress.set_status("Updating Hair Systems");
-       curve_system_manager->device_update(device, &dscene, this, progress);
-
-       if(progress.get_cancel() || device->have_error()) return;
-
+       
        progress.set_status("Updating Lookup Tables");
        lookup_tables->device_update(device, &dscene);
 
index dd80695..543feed 100644 (file)
@@ -166,7 +166,6 @@ public:
        bool use_bvh_spatial_split;
        bool use_bvh_unaligned_nodes;
        int num_bvh_time_steps;
-
        bool persistent_data;
        int texture_limit;
 
index e781f85..d3bfb1d 100644 (file)
@@ -422,6 +422,26 @@ ccl_device void transform_motion_array_interpolate(Transform *tfm,
        transform_compose(tfm, &decomp);
 }
 
+ccl_device void transform_motion_array_interpolate_straight(Transform *tfm, const ccl_global DecomposedTransform *motion, uint numsteps, float time)
+{
+       /* Figure out which steps we need to interpolate. */
+       int maxstep = numsteps - 1;
+       int step = min((int)(time*maxstep), maxstep - 1);
+       float t = time * maxstep - step;
+
+       const ccl_global DecomposedTransform *a = motion + step;
+       const ccl_global DecomposedTransform *b = motion + step + 1;
+       Transform step1, step2;
+
+       transform_compose(&step1, a);
+       transform_compose(&step2, b);
+
+       /* matrix lerp */
+       tfm->x = (1.0f - t) * step1.x + t * step2.x;
+       tfm->y = (1.0f - t) * step1.y + t * step2.y;
+       tfm->z = (1.0f - t) * step1.z + t * step2.z;
+}
+
 #ifndef __KERNEL_GPU__
 
 class BoundBox2D;
index ae16bd4..357cb17 100644 (file)
@@ -165,6 +165,10 @@ if(WITH_CYCLES_OSL)
        add_definitions(-DWITH_CYCLES_OSL)
 endif()
 
+if(WITH_CYCLES_EMBREE)
+       add_definitions(-DWITH_CYCLES_EMBREE)
+endif()
+
 if(WITH_FREESTYLE)
        list(APPEND INC
                ../../freestyle/intern/python