Merge branch 'master' into blender2.8
authorSergey Sharybin <sergey.vfx@gmail.com>
Wed, 28 Nov 2018 13:42:38 +0000 (14:42 +0100)
committerSergey Sharybin <sergey.vfx@gmail.com>
Wed, 28 Nov 2018 13:42:38 +0000 (14:42 +0100)
20 files changed:
build_files/cmake/macros.cmake
intern/CMakeLists.txt
intern/numaapi/AUTHORS [new file with mode: 0644]
intern/numaapi/CMakeLists.txt [new file with mode: 0644]
intern/numaapi/LICENSE [new file with mode: 0644]
intern/numaapi/README [new file with mode: 0644]
intern/numaapi/README.blender [new file with mode: 0644]
intern/numaapi/include/numaapi.h [new file with mode: 0644]
intern/numaapi/source/build_config.h [new file with mode: 0644]
intern/numaapi/source/numaapi.c [new file with mode: 0644]
intern/numaapi/source/numaapi_linux.c [new file with mode: 0644]
intern/numaapi/source/numaapi_stub.c [new file with mode: 0644]
intern/numaapi/source/numaapi_win32.c [new file with mode: 0644]
source/blender/blenlib/BLI_system.h
source/blender/blenlib/BLI_threads.h
source/blender/blenlib/CMakeLists.txt
source/blender/blenlib/intern/system.c
source/blender/blenlib/intern/threads.c
source/blender/windowmanager/intern/wm_jobs.c
source/creator/creator.c

index 948cedfc48e56b6072704f7e19eaa2d11a08a520..e250eae19a4d3b1b21487efe644cbaca7c0e3569 100644 (file)
@@ -695,6 +695,7 @@ function(SETUP_BLENDER_SORTED_LIBS)
                bf_intern_glew_mx
                bf_intern_clog
                bf_intern_opensubdiv
+               bf_intern_numaapi
        )
 
        if(NOT WITH_SYSTEM_GLOG)
index c7f0f414fb1f63fda51a621b64c9bf2ebd0a63ca..16bbefc1c45cfdf878ec1455a2dd4f827cf86115 100644 (file)
@@ -30,6 +30,7 @@ add_subdirectory(ghost)
 add_subdirectory(guardedalloc)
 add_subdirectory(libmv)
 add_subdirectory(memutil)
+add_subdirectory(numaapi)
 add_subdirectory(opencolorio)
 add_subdirectory(opensubdiv)
 add_subdirectory(mikktspace)
diff --git a/intern/numaapi/AUTHORS b/intern/numaapi/AUTHORS
new file mode 100644 (file)
index 0000000..a824c03
--- /dev/null
@@ -0,0 +1 @@
+Sergey Sharybin <sergey.vfx@gmail.com>
diff --git a/intern/numaapi/CMakeLists.txt b/intern/numaapi/CMakeLists.txt
new file mode 100644 (file)
index 0000000..587a00b
--- /dev/null
@@ -0,0 +1,39 @@
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# ***** END GPL LICENSE BLOCK *****
+
+set(INC
+       ./include
+)
+
+set(INC_SYS
+
+)
+
+set(SRC
+       source/numaapi.c
+       source/numaapi_linux.c
+       source/numaapi_stub.c
+       source/numaapi_win32.c
+
+       include/numaapi.h
+       source/build_config.h
+)
+
+add_definitions(-DWITH_DYNLOAD)
+
+blender_add_lib(bf_intern_numaapi "${SRC}" "${INC}" "${INC_SYS}")
diff --git a/intern/numaapi/LICENSE b/intern/numaapi/LICENSE
new file mode 100644 (file)
index 0000000..3562d7a
--- /dev/null
@@ -0,0 +1,19 @@
+Copyright (c) 2016 libnumaapi authors.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+IN THE SOFTWARE.
diff --git a/intern/numaapi/README b/intern/numaapi/README
new file mode 100644 (file)
index 0000000..a510ff1
--- /dev/null
@@ -0,0 +1,7 @@
+LibNumaAPI is aimed to provide one common cross-platform API for all
+possible platforms, so cross-platform applications might not worry
+about implementation details.
+
+LICENSE
+
+LibNumaAPI library is released under the MIT license.
diff --git a/intern/numaapi/README.blender b/intern/numaapi/README.blender
new file mode 100644 (file)
index 0000000..6610737
--- /dev/null
@@ -0,0 +1,5 @@
+Project: LibNumaAPI
+URL: https://github.com/Nazg-Gul/libNumaAPI
+License: MIT
+Upstream version: f83d41ec4d7
+Local modifications: None
diff --git a/intern/numaapi/include/numaapi.h b/intern/numaapi/include/numaapi.h
new file mode 100644 (file)
index 0000000..a4f32d8
--- /dev/null
@@ -0,0 +1,108 @@
+// Copyright (c) 2016, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin (sergey.vfx@gmail.com)
+
+#ifndef __LIBNUMAAPI_H__
+#define __LIBNUMAAPI_H__
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NUMAAPI_VERSION_MAJOR 1
+#define NUMAAPI_VERSION_MINOR 0
+
+typedef enum NUMAAPI_Result {
+  NUMAAPI_SUCCESS       = 0,
+  // NUMA is not available on this platform.
+  NUMAAPI_NOT_AVAILABLE = 1,
+  // Generic error, no real details are available,
+  NUMAAPI_ERROR         = 2,
+  // Error installing atexit() handlers.
+  NUMAAPI_ERROR_ATEXIT  = 3,
+} NUMAAPI_Result;
+
+////////////////////////////////////////////////////////////////////////////////
+// Initialization.
+
+// Initialize NUMA API.
+//
+// This is first call which should be called before any other NUMA functions
+// can be used.
+NUMAAPI_Result numaAPI_Initialize(void);
+
+// Get string representation of NUMAPIResult.
+const char* numaAPI_ResultAsString(NUMAAPI_Result result);
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology query.
+
+// Get number of available nodes.
+//
+// This is in fact an index of last node plus one and it's not guaranteed
+// that all nodes up to this one are available.
+int numaAPI_GetNumNodes(void);
+
+// Returns truth if the given node is available for compute.
+bool numaAPI_IsNodeAvailable(int node);
+
+// Getnumber of available processors on a given node.
+int numaAPI_GetNumNodeProcessors(int node);
+
+////////////////////////////////////////////////////////////////////////////////
+// Affinities.
+
+// Runs the current process and its children on a specific node.
+//
+// Returns truth if affinity has successfully changed.
+//
+// NOTE: This function can not change active CPU group. Mainly designed to deal
+// with Threadripper 2 topology, to make it possible to gain maximum performance
+// for the main application thread.
+bool numaAPI_RunProcessOnNode(int node);
+
+// Runs the current thread and its children on a specific node.
+//
+// Returns truth if affinity has successfully changed.
+bool numaAPI_RunThreadOnNode(int node);
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory management.
+
+// Allocate memory on a given node,
+void* numaAPI_AllocateOnNode(size_t size, int node);
+
+// Allocate memory in the local memory, closest to the current node.
+void* numaAPI_AllocateLocal(size_t size);
+
+// Frees size bytes of memory starting at start.
+//
+// TODO(sergey): Consider making it regular free() semantic.
+void numaAPI_Free(void* start, size_t size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // __LIBNUMAAPI_H__
diff --git a/intern/numaapi/source/build_config.h b/intern/numaapi/source/build_config.h
new file mode 100644 (file)
index 0000000..444adcc
--- /dev/null
@@ -0,0 +1,379 @@
+// Copyright (c) 2018, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin (sergey.vfx@gmail.com)
+
+#ifndef __BUILD_CONFIG_H__
+#define __BUILD_CONFIG_H__
+
+#include <limits.h>
+#include <stdint.h>
+
+// Initially is based on Chromium's build_config.h, with tweaks and extensions
+// needed for this project.
+//
+// NOTE: All commonly used symbols (which are checked on a "top" level, from
+// outside of any platform-specific ifdef block) are to be explicitly defined
+// to 0 when they are not "active". This is extra lines of code in this file,
+// but is not being edited that often. Such approach helps catching cases when
+// one attempted to access build configuration variable without including the
+// header by simply using -Wundef compiler attribute.
+//
+// NOTE: Not having things explicitly defined to 0 is harmless (in terms it
+// follows same rules as Google projects) and will simply cause compiler to
+// become more noisy, which is simple to correct.
+
+////////////////////////////////////////////////////////////////////////////////
+// A set of macros to use for platform detection.
+
+#if defined(__native_client__)
+// __native_client__ must be first, so that other OS_ defines are not set.
+#  define OS_NACL 1
+#elif defined(_AIX)
+#  define OS_AIX 1
+#elif defined(ANDROID)
+#  define OS_ANDROID 1
+#elif defined(__APPLE__)
+// Only include TargetConditions after testing ANDROID as some android builds
+// on mac don't have this header available and it's not needed unless the target
+// is really mac/ios.
+#  include <TargetConditionals.h>
+#  define OS_MACOSX 1
+#  if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
+#    define OS_IOS 1
+#  endif  // defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
+#elif defined(__HAIKU__)
+#  define OS_HAIKU 1
+#elif defined(__hpux)
+#  define OS_HPUX 1
+#elif defined(__linux__)
+#  define OS_LINUX 1
+// Include a system header to pull in features.h for glibc/uclibc macros.
+#  include <unistd.h>
+#  if defined(__GLIBC__) && !defined(__UCLIBC__)
+// We really are using glibc, not uClibc pretending to be glibc.
+#    define LIBC_GLIBC 1
+#  endif
+#elif defined(__sgi)
+#  define OS_IRIX 1
+#elif defined(_WIN32)
+#  define OS_WIN 1
+#elif defined(__FreeBSD__)
+#  define OS_FREEBSD 1
+#elif defined(__NetBSD__)
+#  define OS_NETBSD 1
+#elif defined(__OpenBSD__)
+#  define OS_OPENBSD 1
+#elif defined(__sun)
+#  define OS_SOLARIS 1
+#elif defined(__QNXNTO__)
+#  define OS_QNX 1
+#else
+#  error Please add support for your platform in build_config.h
+#endif
+
+#if !defined(OS_AIX)
+#  define OS_AIX 0
+#endif
+#if !defined(OS_NACL)
+#  define OS_NACL 0
+#endif
+#if !defined(OS_ANDROID)
+#  define OS_ANDROID 0
+#endif
+#if !defined(OS_MACOSX)
+#  define OS_MACOSX 0
+#endif
+#if !defined(OS_IOS)
+#  define OS_IOS 0
+#endif
+#if !defined(OS_HAIKU)
+#  define OS_HAIKU 0
+#endif
+#if !defined(OS_HPUX)
+#  define OS_HPUX 0
+#endif
+#if !defined(OS_IRIX)
+#  define OS_IRIX 0
+#endif
+#if !defined(OS_LINUX)
+#  define OS_LINUX 0
+#endif
+#if !defined(LIBC_GLIBC)
+#  define LIBC_GLIBC 0
+#endif
+#if !defined(OS_WIN)
+#  define OS_WIN 0
+#endif
+#if !defined(OS_FREEBSD)
+#  define OS_FREEBSD 0
+#endif
+#if !defined(OS_NETBSD)
+#  define OS_NETBSD 0
+#endif
+#if !defined(OS_OPENBSD)
+#  define OS_OPENBSD 0
+#endif
+#if !defined(OS_SOLARIS)
+#  define OS_SOLARIS 0
+#endif
+#if !defined(OS_QNX)
+#  define OS_QNX 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// *BSD OS family detection.
+//
+// For access to standard BSD features, use OS_BSD instead of a
+// more specific macro.
+#if OS_FREEBSD || OS_OPENBSD || OS_NETBSD
+#  define OS_BSD 1
+#else
+#  define OS_BSD 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// POSIX system detection.
+//
+// For access to standard POSIXish features use OS_POSIX instead of a
+// more specific macro.
+#if OS_MACOSX || OS_LINUX || OS_BSD || OS_SOLARIS ||OS_ANDROID || OS_NACL ||  \
+    OS_QNX || OS_HAIKU || OS_AIX || OS_HPUX || OS_IRIX
+#  define OS_POSIX 1
+#else
+#  define OS_POSIX 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Compiler detection, including its capabilities.
+
+#if defined(__clang__)
+#  define COMPILER_CLANG 1
+#elif defined(__GNUC__)
+#  define COMPILER_GCC 1
+#  define COMPILER_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#elif defined(_MSC_VER)
+#  define COMPILER_MSVC 1
+#  define COMPILER_MSVC_VERSION (_MSC_VER)
+#elif defined(__MINGW32__)
+#  define COMPILER_MINGW32 1
+#elif defined(__MINGW64__)
+#  define COMPILER_MINGW64 1
+#else
+#  error Please add support for your compiler in build_config.h
+#endif
+
+#if !defined(COMPILER_CLANG)
+#  define COMPILER_CLANG 0
+#endif
+#if !defined(COMPILER_GCC)
+#  define COMPILER_GCC 0
+#endif
+#if !defined(COMPILER_MSVC)
+#  define COMPILER_MSVC 0
+#endif
+#if !defined(COMPILER_MINGW32)
+#  define COMPILER_MINGW32 0
+#endif
+#if !defined(COMPILER_MINGW64)
+#  define COMPILER_MINGW64 0
+#endif
+
+// Compiler is any of MinGW family.
+#if COMPILER_MINGW32 || COMPILER_MINGW64
+#  define COMPILER_MINGW 1
+#else
+#  define COMPILER_MINGW 0
+#endif
+
+// Check what is the latest C++ specification the compiler supports.
+//
+// NOTE: Use explicit definition here to avoid expansion-to-defined warning from
+// being geenrated. While this will most likely a false-positive warning in this
+// particular case, that warning might be helpful to catch errors elsewhere.
+
+// C++11 check.
+#if ((defined(__cplusplus) && (__cplusplus > 199711L)) || \
+     (defined(_MSC_VER) && (_MSC_VER >= 1800)))
+#  define COMPILER_SUPPORTS_CXX11 1
+#else
+#  define COMPILER_SUPPORTS_CXX11 0
+#endif
+// C++14 check.
+#if (defined(__cplusplus) && (__cplusplus > 201311L))
+#  define COMPILER_SUPPORTS_CXX14  1
+#else
+#  define COMPILER_SUPPORTS_CXX14  0
+#endif
+// C++17 check.
+#if (defined(__cplusplus) && (__cplusplus > 201611L))
+#  define COMPILER_SUPPORTS_CXX17  1
+#else
+#  define COMPILER_SUPPORTS_CXX17  0
+#endif
+// C++20 check.
+#if (defined(__cplusplus) && (__cplusplus > 201911L))
+#  define COMPILER_SUPPORTS_CXX20  1
+#else
+#  define COMPILER_SUPPORTS_CXX20  0
+#endif
+
+// COMPILER_USE_ADDRESS_SANITIZER is defined when program is detected that
+// compilation happened wit haddress sanitizer enabled. This allows to give
+// tips to sanitizer, or maybe work around some known issues with third party
+// libraries.
+#if !defined(COMPILER_USE_ADDRESS_SANITIZER)
+#  if defined(__has_feature)
+#    define COMPILER_USE_ADDRESS_SANITIZER 1
+#  elif defined(__SANITIZE_ADDRESS__)
+#    define COMPILER_USE_ADDRESS_SANITIZER 1
+#  endif
+#endif
+
+#if !defined(COMPILER_USE_ADDRESS_SANITIZER)
+#  define COMPILER_USE_ADDRESS_SANITIZER 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Processor architecture detection.
+//
+// For more info on what's defined, see:
+//
+//   http://msdn.microsoft.com/en-us/library/b0084kay.aspx
+//   http://www.agner.org/optimize/calling_conventions.pdf
+//
+//   or with gcc, run: "echo | gcc -E -dM -"
+#if defined(_M_X64) || defined(__x86_64__)
+#  define ARCH_CPU_X86_FAMILY 1
+#  define ARCH_CPU_X86_64 1
+#  define ARCH_CPU_64_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(_M_IX86) || defined(__i386__)
+#  define ARCH_CPU_X86_FAMILY 1
+#  define ARCH_CPU_X86 1
+#  define ARCH_CPU_32_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__ARMEL__)
+#  define ARCH_CPU_ARM_FAMILY 1
+#  define ARCH_CPU_ARMEL 1
+#  define ARCH_CPU_32_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__aarch64__)
+#  define ARCH_CPU_ARM_FAMILY 1
+#  define ARCH_CPU_ARM64 1
+#  define ARCH_CPU_64_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__pnacl__)
+#  define ARCH_CPU_32_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__MIPSEL__)
+#  if defined(__LP64__)
+#    define ARCH_CPU_MIPS64_FAMILY 1
+#    define ARCH_CPU_MIPS64EL 1
+#    define ARCH_CPU_64_BITS 1
+#    define ARCH_CPU_LITTLE_ENDIAN 1
+#  else
+#    define ARCH_CPU_MIPS_FAMILY 1
+#    define ARCH_CPU_MIPSEL 1
+#    define ARCH_CPU_32_BITS 1
+#    define ARCH_CPU_LITTLE_ENDIAN 1
+#  endif
+#elif defined(__MIPSEB__)
+#  if defined(__LP64__)
+#    define ARCH_CPU_MIPS64_FAMILY 1
+#    define ARCH_CPU_MIPS64EB 1
+#    define ARCH_CPU_64_BITS 1
+#    define ARCH_CPU_BIG_ENDIAN 1
+#  else
+#    define ARCH_CPU_MIPS_FAMILY 1
+#    define ARCH_CPU_MIPSEB 1
+#    define ARCH_CPU_32_BITS 1
+#    define ARCH_CPU_BIG_ENDIAN 1
+#  endif
+#else
+#  error Please add support for your architecture in build_config.h
+#endif
+
+#if !defined(ARCH_CPU_LITTLE_ENDIAN)
+#  define ARCH_CPU_LITTLE_ENDIAN 0
+#endif
+#if !defined(ARCH_CPU_BIG_ENDIAN)
+#  define ARCH_CPU_BIG_ENDIAN 0
+#endif
+
+#if !defined(ARCH_CPU_32_BITS)
+#  define ARCH_CPU_32_BITS 0
+#endif
+#if !defined(ARCH_CPU_64_BITS)
+#  define ARCH_CPU_64_BITS 0
+#endif
+
+#if !defined(ARCH_CPU_X86_FAMILY)
+#  define ARCH_CPU_X86_FAMILY 0
+#endif
+#if !defined(ARCH_CPU_ARM_FAMILY)
+#  define ARCH_CPU_ARM_FAMILY 0
+#endif
+#if !defined(ARCH_CPU_MIPS_FAMILY)
+#  define ARCH_CPU_MIPS_FAMILY 0
+#endif
+#if !defined(ARCH_CPU_MIPS64_FAMILY)
+#  define ARCH_CPU_MIPS64_FAMILY 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Sizes of platform-dependent types.
+
+#if defined(__SIZEOF_POINTER__)
+#  define PLATFORM_SIZEOF_PTR __SIZEOF_POINTER__
+#elif defined(UINTPTR_MAX)
+#  if (UINTPTR_MAX == 0xffffffff)
+#    define PLATFORM_SIZEOF_PTR 4
+#  elif (UINTPTR_MAX == 0xffffffffffffffff)  // NOLINT
+#    define PLATFORM_SIZEOF_PTR 8
+#  endif
+#elif defined(__WORDSIZE)
+#  if (__WORDSIZE == 32)
+#    define PLATFORM_SIZEOF_PTR 4
+#  else if (__WORDSIZE == 64)
+#    define PLATFORM_SIZEOF_PTR 8
+#  endif
+#endif
+#if !defined(PLATFORM_SIZEOF_PTR)
+#  error "Cannot find pointer size"
+#endif
+
+#if (UINT_MAX == 0xffffffff)
+#  define PLATFORM_SIZEOF_INT 4
+#elif (UINT_MAX == 0xffffffffffffffff)  // NOLINT
+#  define PLATFORM_SIZEOF_INT 8
+#else
+#  error "Cannot find int size"
+#endif
+
+#if (USHRT_MAX == 0xffffffff)
+#  define PLATFORM_SIZEOF_SHORT 4
+#elif (USHRT_MAX == 0xffff)  // NOLINT
+#  define PLATFORM_SIZEOF_SHORT 2
+#else
+#  error "Cannot find short size"
+#endif
+
+#endif  // __BUILD_CONFIG_H__
diff --git a/intern/numaapi/source/numaapi.c b/intern/numaapi/source/numaapi.c
new file mode 100644 (file)
index 0000000..ddd9199
--- /dev/null
@@ -0,0 +1,37 @@
+// Copyright (c) 2018, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin (sergey.vfx@gmail.com)
+
+#include "numaapi.h"
+
+#include <assert.h>
+
+// Get string representation of NUMAPIResult.
+const char* numaAPI_ResultAsString(NUMAAPI_Result result) {
+  switch (result) {
+    case NUMAAPI_SUCCESS: return "SUCCESS";
+    case NUMAAPI_NOT_AVAILABLE: return "NOT_AVAILABLE";
+    case NUMAAPI_ERROR: return "ERROR";
+    case NUMAAPI_ERROR_ATEXIT: return "ERROR_AT_EXIT";
+  }
+  assert(!"Unknown result was passed to numapi_ResultAsString().");
+  return "UNKNOWN";
+}
diff --git a/intern/numaapi/source/numaapi_linux.c b/intern/numaapi/source/numaapi_linux.c
new file mode 100644 (file)
index 0000000..559e97b
--- /dev/null
@@ -0,0 +1,272 @@
+// Copyright (c) 2016, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin (sergey.vfx@gmail.com)
+
+#include "build_config.h"
+
+#if OS_LINUX
+
+#include "numaapi.h"
+
+#include <stdlib.h>
+
+#ifndef WITH_DYNLOAD
+#  include <numa.h>
+#else
+#  include <dlfcn.h>
+#endif
+
+#ifdef WITH_DYNLOAD
+
+// Descriptor numa library.
+static void* numa_lib;
+
+// Types of all symbols which are read from the library.
+struct bitmask;
+typedef int tnuma_available(void);
+typedef int tnuma_max_node(void);
+typedef int tnuma_node_to_cpus(int node, struct bitmask* mask);
+typedef long tnuma_node_size(int node, long* freep);
+typedef int tnuma_run_on_node(int node);
+typedef void* tnuma_alloc_onnode(size_t size, int node);
+typedef void* tnuma_alloc_local(size_t size);
+typedef void tnuma_free(void* start, size_t size);
+typedef struct bitmask* tnuma_bitmask_clearall(struct bitmask *bitmask);
+typedef int tnuma_bitmask_isbitset(const struct bitmask *bitmask,
+                                   unsigned int n);
+typedef struct bitmask* tnuma_bitmask_setbit(struct bitmask *bitmask,
+                                             unsigned int n);
+typedef unsigned int tnuma_bitmask_nbytes(struct bitmask *bitmask);
+typedef void tnuma_bitmask_free(struct bitmask *bitmask);
+typedef struct bitmask* tnuma_allocate_cpumask(void);
+typedef struct bitmask* tnuma_allocate_nodemask(void);
+typedef void tnuma_free_cpumask(struct bitmask* bitmask);
+typedef void tnuma_free_nodemask(struct bitmask* bitmask);
+typedef int tnuma_run_on_node_mask(struct bitmask *nodemask);
+typedef void tnuma_set_interleave_mask(struct bitmask *nodemask);
+typedef void tnuma_set_localalloc(void);
+
+// Actual symbols.
+static tnuma_available* numa_available;
+static tnuma_max_node* numa_max_node;
+static tnuma_node_to_cpus* numa_node_to_cpus;
+static tnuma_node_size* numa_node_size;
+static tnuma_run_on_node* numa_run_on_node;
+static tnuma_alloc_onnode* numa_alloc_onnode;
+static tnuma_alloc_local* numa_alloc_local;
+static tnuma_free* numa_free;
+static tnuma_bitmask_clearall* numa_bitmask_clearall;
+static tnuma_bitmask_isbitset* numa_bitmask_isbitset;
+static tnuma_bitmask_setbit* numa_bitmask_setbit;
+static tnuma_bitmask_nbytes* numa_bitmask_nbytes;
+static tnuma_bitmask_free* numa_bitmask_free;
+static tnuma_allocate_cpumask* numa_allocate_cpumask;
+static tnuma_allocate_nodemask* numa_allocate_nodemask;
+static tnuma_free_nodemask* numa_free_nodemask;
+static tnuma_free_cpumask* numa_free_cpumask;
+static tnuma_run_on_node_mask* numa_run_on_node_mask;
+static tnuma_set_interleave_mask* numa_set_interleave_mask;
+static tnuma_set_localalloc* numa_set_localalloc;
+
+static void* findLibrary(const char** paths) {
+  int i = 0;
+  while (paths[i] != NULL) {
+      void* lib = dlopen(paths[i], RTLD_LAZY);
+      if (lib != NULL) {
+        return lib;
+      }
+      ++i;
+  }
+  return NULL;
+}
+
+static void numaExit(void) {
+  if (numa_lib == NULL) {
+    return;
+  }
+  dlclose(numa_lib);
+  numa_lib = NULL;
+}
+
+static NUMAAPI_Result loadNumaSymbols(void) {
+  // Prevent multiple initializations.
+  static bool initialized = false;
+  static NUMAAPI_Result result = NUMAAPI_NOT_AVAILABLE;
+  if (initialized) {
+    return result;
+  }
+  initialized = true;
+  // Find appropriate .so library.
+  const char* numa_paths[] = {
+      "libnuma.so.1",
+      "libnuma.so",
+      NULL};
+  // Register de-initialization.
+  const int error = atexit(numaExit);
+  if (error) {
+    result = NUMAAPI_ERROR_ATEXIT;
+    return result;
+  }
+  // Load library.
+  numa_lib = findLibrary(numa_paths);
+  if (numa_lib == NULL) {
+    result = NUMAAPI_NOT_AVAILABLE;
+    return result;
+  }
+  // Load symbols.
+
+#define _LIBRARY_FIND(lib, name)          \
+  do {                                    \
+    name = (t##name *)dlsym(lib, #name);  \
+  } while (0)
+#define NUMA_LIBRARY_FIND(name) _LIBRARY_FIND(numa_lib, name)
+
+  NUMA_LIBRARY_FIND(numa_available);
+  NUMA_LIBRARY_FIND(numa_max_node);
+  NUMA_LIBRARY_FIND(numa_node_to_cpus);
+  NUMA_LIBRARY_FIND(numa_node_size);
+  NUMA_LIBRARY_FIND(numa_run_on_node);
+  NUMA_LIBRARY_FIND(numa_alloc_onnode);
+  NUMA_LIBRARY_FIND(numa_alloc_local);
+  NUMA_LIBRARY_FIND(numa_free);
+  NUMA_LIBRARY_FIND(numa_bitmask_clearall);
+  NUMA_LIBRARY_FIND(numa_bitmask_isbitset);
+  NUMA_LIBRARY_FIND(numa_bitmask_setbit);
+  NUMA_LIBRARY_FIND(numa_bitmask_nbytes);
+  NUMA_LIBRARY_FIND(numa_bitmask_free);
+  NUMA_LIBRARY_FIND(numa_allocate_cpumask);
+  NUMA_LIBRARY_FIND(numa_allocate_nodemask);
+  NUMA_LIBRARY_FIND(numa_free_cpumask);
+  NUMA_LIBRARY_FIND(numa_free_nodemask);
+  NUMA_LIBRARY_FIND(numa_run_on_node_mask);
+  NUMA_LIBRARY_FIND(numa_set_interleave_mask);
+  NUMA_LIBRARY_FIND(numa_set_localalloc);
+
+#undef NUMA_LIBRARY_FIND
+#undef _LIBRARY_FIND
+
+  result = NUMAAPI_SUCCESS;
+  return result;
+}
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Initialization.
+
+NUMAAPI_Result numaAPI_Initialize(void) {
+#ifdef WITH_DYNLOAD
+  NUMAAPI_Result result = loadNumaSymbols();
+  if (result != NUMAAPI_SUCCESS) {
+    return result;
+  }
+#endif
+  if (numa_available() < 0) {
+    return NUMAAPI_NOT_AVAILABLE;
+  }
+  return NUMAAPI_SUCCESS;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology query.
+
+int numaAPI_GetNumNodes(void) {
+  return numa_max_node() + 1;
+}
+
+bool numaAPI_IsNodeAvailable(int node) {
+  if (numa_node_size(node, NULL) > 0) {
+    return true;
+  }
+  return false;
+}
+
+int numaAPI_GetNumNodeProcessors(int node) {
+  struct bitmask* cpu_mask = numa_allocate_cpumask();
+  numa_node_to_cpus(node, cpu_mask);
+  const unsigned int num_bytes = numa_bitmask_nbytes(cpu_mask);
+  const unsigned int num_bits = num_bytes  *8;
+  // TODO(sergey): There might be faster way calculating number of set bits.
+  int num_processors = 0;
+  for (unsigned int bit = 0; bit < num_bits; ++bit) {
+    if (numa_bitmask_isbitset(cpu_mask, bit)) {
+      ++num_processors;
+    }
+  }
+#ifdef WITH_DYNLOAD
+  if (numa_free_cpumask != NULL) {
+    numa_free_cpumask(cpu_mask);
+  } else {
+    numa_bitmask_free(cpu_mask);
+  }
+#else
+  numa_free_cpumask(cpu_mask);
+#endif
+  return num_processors;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Affinities.
+
+bool numaAPI_RunProcessOnNode(int node) {
+  numaAPI_RunThreadOnNode(node);
+  return true;
+}
+
+bool numaAPI_RunThreadOnNode(int node) {
+  // Construct bit mask from node index.
+  struct bitmask* node_mask = numa_allocate_nodemask();
+  numa_bitmask_clearall(node_mask);
+  numa_bitmask_setbit(node_mask, node);
+  numa_run_on_node_mask(node_mask);
+  // TODO(sergey): The following commands are based on x265 code, we might want
+  // to make those optional, or require to call those explicitly.
+  //
+  // Current assumption is that this is similar to SetThreadGroupAffinity().
+  numa_set_interleave_mask(node_mask);
+  numa_set_localalloc();
+#ifdef WITH_DYNLOAD
+  if (numa_free_nodemask != NULL) {
+    numa_free_nodemask(node_mask);
+  } else {
+    numa_bitmask_free(node_mask);
+  }
+#else
+  numa_free_nodemask(node_mask);
+#endif
+  return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory management.
+
+void* numaAPI_AllocateOnNode(size_t size, int node) {
+  return numa_alloc_onnode(size, node);
+}
+
+void* numaAPI_AllocateLocal(size_t size) {
+  return numa_alloc_local(size);
+}
+
+void numaAPI_Free(void* start, size_t size) {
+  numa_free(start, size);
+}
+
+#endif  // OS_LINUX
diff --git a/intern/numaapi/source/numaapi_stub.c b/intern/numaapi/source/numaapi_stub.c
new file mode 100644 (file)
index 0000000..318fd72
--- /dev/null
@@ -0,0 +1,82 @@
+// Copyright (c) 2016, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin (sergey.vfx@gmail.com)
+
+#include "numaapi.h"
+
+#include "build_config.h"
+
+// Stub implementation for platforms which doesn't have NUMA support.
+
+#if !OS_LINUX && !OS_WIN
+
+////////////////////////////////////////////////////////////////////////////////
+// Initialization.
+
+NUMAPIResult numaAPI_Initialize(void) {
+  return UMAAPI_NOT_AVAILABLE;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology query.
+
+int numaAPI_GetNumNodes(void) {
+  return 0;
+}
+
+bool numApiIsNodeAvailable(int node) {
+  (void) node;  // Ignored.
+  return false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Affinities.
+
+bool numaAPI_RunProcessOnNode(int node) {
+  (void) node;  // Ignored.
+  return false;
+}
+
+bool numaAPI_RunThreadOnNode(int node) {
+  (void) node;  // Ignored.
+  return false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory management.
+
+void* numaAPI_AllocateOnNode(size_t size, int node) {
+  (void) size;  // Ignored.
+  (void) node;  // Ignored.
+  return 0;
+}
+
+void* numaAPI_AllocateLocal(size_t size) {
+  (void) size;  // Ignored.
+  return NULL;
+}
+
+void numaAPI_Free(void* start, size_t size) {
+  (void) start;  // Ignored.
+  (void) size;  // Ignored.
+}
+
+#endif  // !OS_LINUX && !OS_WIN
diff --git a/intern/numaapi/source/numaapi_win32.c b/intern/numaapi/source/numaapi_win32.c
new file mode 100644 (file)
index 0000000..a000b3c
--- /dev/null
@@ -0,0 +1,253 @@
+// Copyright (c) 2016, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin (sergey.vfx@gmail.com)
+
+#include "build_config.h"
+
+#if OS_WIN
+
+#include "numaapi.h"
+
+#ifndef NOGDI
+#  define NOGDI
+#endif
+#ifndef NOMINMAX
+#  define NOMINMAX
+#endif
+#ifndef WIN32_LEAN_AND_MEAN
+#  define WIN32_LEAN_AND_MEAN
+#endif
+#ifndef NOCOMM
+#  define NOCOMM
+#endif
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <windows.h>
+
+#if ARCH_CPU_64_BITS
+#  include <VersionHelpers.h>
+#endif
+
+#include <stdio.h>
+
+////////////////////////////////////////////////////////////////////////////////
+// Initialization.
+
+// Kernel library, from where the symbols come.
+static HMODULE kernel_lib;
+
+// Types of all symbols which are read from the library.
+
+// NUMA function types.
+typedef BOOL t_GetNumaHighestNodeNumber(PULONG highest_node_number);
+typedef BOOL t_GetNumaNodeProcessorMask(UCHAR node, ULONGLONG* processor_mask);
+typedef BOOL t_GetNumaNodeProcessorMaskEx(USHORT node,
+                                          GROUP_AFFINITY* processor_mask);
+typedef BOOL t_GetNumaProcessorNode(UCHAR processor, UCHAR* node_number);
+typedef void* t_VirtualAllocExNuma(HANDLE process_handle,
+                                   LPVOID address,
+                                   SIZE_T size,
+                                   DWORD  allocation_type,
+                                   DWORD  protect,
+                                   DWORD  preferred);
+typedef BOOL t_VirtualFree(void* address, SIZE_T size, DWORD free_type);
+// Threading function types.
+typedef BOOL t_SetProcessAffinityMask(HANDLE process_handle,
+                                      DWORD_PTR process_affinity_mask);
+typedef BOOL t_SetThreadGroupAffinity(HANDLE thread_handle,
+                                      const GROUP_AFFINITY* GroupAffinity,
+                                      GROUP_AFFINITY* PreviousGroupAffinity);
+typedef DWORD t_GetCurrentProcessorNumber(void);
+
+// NUMA symbols.
+static t_GetNumaHighestNodeNumber* _GetNumaHighestNodeNumber;
+static t_GetNumaNodeProcessorMask* _GetNumaNodeProcessorMask;
+static t_GetNumaNodeProcessorMaskEx* _GetNumaNodeProcessorMaskEx;
+static t_GetNumaProcessorNode* _GetNumaProcessorNode;
+static t_VirtualAllocExNuma* _VirtualAllocExNuma;
+static t_VirtualFree* _VirtualFree;
+// Threading symbols.
+static t_SetProcessAffinityMask* _SetProcessAffinityMask;
+static t_SetThreadGroupAffinity* _SetThreadGroupAffinity;
+static t_GetCurrentProcessorNumber* _GetCurrentProcessorNumber;
+
+static void numaExit(void) {
+  // TODO(sergey): Consider closing library here.
+}
+
+static NUMAAPI_Result loadNumaSymbols(void) {
+  // Prevent multiple initializations.
+  static bool initialized = false;
+  static NUMAAPI_Result result = NUMAAPI_NOT_AVAILABLE;
+  if (initialized) {
+    return result;
+  }
+  initialized = true;
+  // Register de-initialization.
+  const int error = atexit(numaExit);
+  if (error) {
+    result = NUMAAPI_ERROR_ATEXIT;
+    return result;
+  }
+  // Load library.
+  kernel_lib = LoadLibraryA("Kernel32.dll");
+  // Load symbols.
+
+#define _LIBRARY_FIND(lib, name)                   \
+  do {                                             \
+    _##name = (t_##name *)GetProcAddress(lib, #name);  \
+  } while (0)
+#define KERNEL_LIBRARY_FIND(name) _LIBRARY_FIND(kernel_lib, name)
+
+  // NUMA.
+  KERNEL_LIBRARY_FIND(GetNumaHighestNodeNumber);
+  KERNEL_LIBRARY_FIND(GetNumaNodeProcessorMask);
+  KERNEL_LIBRARY_FIND(GetNumaNodeProcessorMaskEx);
+  KERNEL_LIBRARY_FIND(GetNumaProcessorNode);
+  KERNEL_LIBRARY_FIND(VirtualAllocExNuma);
+  KERNEL_LIBRARY_FIND(VirtualFree);
+  // Threading.
+  KERNEL_LIBRARY_FIND(SetProcessAffinityMask);
+  KERNEL_LIBRARY_FIND(SetThreadGroupAffinity);
+  KERNEL_LIBRARY_FIND(GetCurrentProcessorNumber);
+
+#undef KERNEL_LIBRARY_FIND
+#undef _LIBRARY_FIND
+
+  result = NUMAAPI_SUCCESS;
+  return result;
+}
+
+NUMAAPI_Result numaAPI_Initialize(void) {
+#if !ARCH_CPU_64_BITS
+  // No NUMA on 32 bit platforms.
+  return LIBNUMAAPI_NOT_AVAILABLE;
+#else
+  if (!IsWindows7OrGreater()) {
+    // Require Windows 7 or higher.
+    NUMAAPI_NOT_AVAILABLE;
+  }
+  loadNumaSymbols();
+  return NUMAAPI_SUCCESS;
+#endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology query.
+
+int numaAPI_GetNumNodes(void) {
+  ULONG highest_node_number;
+  if (!_GetNumaHighestNodeNumber(&highest_node_number)) {
+    return 0;
+  }
+  // TODO(sergey): Resolve the type narrowing.
+  // NOTE: This is not necessarily a total amount of nodes in the system.
+  return (int)highest_node_number + 1;
+}
+
+bool numaAPI_IsNodeAvailable(int node) {
+  // Trick to detect whether the node is usable or not: check whether
+  // there are any processors associated with it.
+  //
+  // This is needed because numaApiGetNumNodes() is not guaranteed to
+  // give total amount of nodes and some nodes might be unavailable.
+  ULONGLONG processor_mask;
+  if (!_GetNumaNodeProcessorMask(node, &processor_mask)) {
+    return false;
+  }
+  if (processor_mask == 0) {
+    return false;
+  }
+  return true;
+}
+
+int numaAPI_GetNumNodeProcessors(int node) {
+  ULONGLONG processor_mask;
+  if (!_GetNumaNodeProcessorMask(node, &processor_mask)) {
+    return 0;
+  }
+  // TODO(sergey): There might be faster way calculating number of set bits.
+  int num_processors = 0;
+  while (processor_mask != 0) {
+    num_processors += (processor_mask & 1);
+    processor_mask = (processor_mask >> 1);
+  }
+  return num_processors;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Affinities.
+
+bool numaAPI_RunProcessOnNode(int node) {
+  // TODO(sergey): Make sure requested node is within active CPU group.
+  // Change affinity of the proces to make it to run on a given node.
+  HANDLE process_handle = GetCurrentProcess();
+  ULONGLONG processor_mask;
+  if (_GetNumaNodeProcessorMask(node, &processor_mask) == 0) {
+    return false;
+  }
+  if (_SetProcessAffinityMask(process_handle, processor_mask) == 0) {
+    return false;
+  }
+  return true;
+}
+
+bool numaAPI_RunThreadOnNode(int node) {
+  HANDLE thread_handle = GetCurrentThread();
+  GROUP_AFFINITY group_affinity = { 0 };
+  if (_GetNumaNodeProcessorMaskEx(node, &group_affinity) == 0) {
+    return false;
+  }
+  if (_SetThreadGroupAffinity(thread_handle, &group_affinity, NULL) == 0) {
+    return false;
+  }
+  return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory management.
+
+void* numaAPI_AllocateOnNode(size_t size, int node) {
+  return _VirtualAllocExNuma(GetCurrentProcess(),
+                             NULL,
+                             size,
+                             MEM_RESERVE | MEM_COMMIT,
+                             PAGE_READWRITE,
+                             node);
+}
+
+void* numaAPI_AllocateLocal(size_t size) {
+  UCHAR current_processor = (UCHAR)_GetCurrentProcessorNumber();
+  UCHAR node;
+  if (!_GetNumaProcessorNode(current_processor, &node)) {
+    return NULL;
+  }
+  return numaAPI_AllocateOnNode(size, node);
+}
+
+void numaAPI_Free(void* start, size_t size) {
+  if (!_VirtualFree(start, size, MEM_RELEASE)) {
+    // TODO(sergey): Throw an error!
+  }
+}
+
+#endif  // OS_WIN
index f51b9623803eae619d036a3c1f8fdd05d4d5fbb4..7f88f8a18b1d392cdcb415233d0c403c4313e4fc 100644 (file)
 int BLI_cpu_support_sse2(void);
 void BLI_system_backtrace(FILE *fp);
 
+
+/* Get CPU brand, result is to be MEM_freeN()-ed. */
+char *BLI_cpu_brand_string(void);
+
 /* getpid */
 #ifdef WIN32
 #  define BLI_SYSTEM_PID_H <process.h>
index 81f8445783b195d7d5fff7421403d664856bc2c8..631a65ccade0af18d27726144ac015c8c37ccf2a 100644 (file)
@@ -204,6 +204,12 @@ void BLI_thread_queue_nowait(ThreadQueue *queue);
 #  define BLI_thread_local_set(name, value) name = value
 #endif  /* defined(__APPLE__) */
 
+/* **** Special functions to help performance on crazy NUMA setups. **** */
+
+/* Make sure process/thread is using NUMA node with fast memory access. */
+void BLI_thread_put_process_on_fast_node(void);
+void BLI_thread_put_thread_on_fast_node(void);
+
 #ifdef __cplusplus
 }
 #endif
index 91887c1ef5ede43700a474f31fcfc6ae2853a64e..921ecc29e18b8dce60e29ffb43423266bbc70854 100644 (file)
@@ -30,6 +30,7 @@ set(INC
        ../../../intern/guardedalloc
        ../../../intern/atomic
        ../../../intern/eigen
+       ../../../intern/numaapi/include
        ../../../extern/wcwidth
 )
 
index ecb977c6e6154cd1ef530268856aed2244e3a01c..38fe2c7a9eb8497369db24148c0bbf6ff1226c0f 100644 (file)
@@ -27,6 +27,7 @@
 
 #include "BLI_utildefines.h"
 #include "BLI_system.h"
+#include "BLI_string.h"
 
 #include "MEM_guardedalloc.h"
 
@@ -138,3 +139,40 @@ void BLI_system_backtrace(FILE *fp)
 
 }
 /* end BLI_system_backtrace */
+
+/* NOTE: The code for CPU brand string is adopted from Cycles. */
+
+#if !defined(_WIN32) || defined(FREE_WINDOWS)
+static void __cpuid(int data[4], int selector)
+{
+#if defined(__x86_64__)
+       asm("cpuid" : "=a" (data[0]), "=b" (data[1]), "=c" (data[2]), "=d" (data[3]) : "a"(selector));
+#elif defined(__i386__)
+       asm("pushl %%ebx    \n\t"
+               "cpuid          \n\t"
+               "movl %%ebx, %1 \n\t"
+               "popl %%ebx     \n\t"
+               : "=a" (data[0]), "=r" (data[1]), "=c" (data[2]), "=d" (data[3])
+               : "a"(selector)
+               : "ebx");
+#else
+       data[0] = data[1] = data[2] = data[3] = 0;
+#endif
+}
+#endif
+
+char *BLI_cpu_brand_string(void)
+{
+       char buf[48] = { 0 };
+       int result[4] = { 0 };
+       __cpuid(result, 0x80000000);
+       if (result[0] >= (int)0x80000004) {
+               __cpuid((int*)(buf + 0), 0x80000002);
+               __cpuid((int*)(buf + 16), 0x80000003);
+               __cpuid((int*)(buf + 32), 0x80000004);
+               char *brand = BLI_strdup(buf);
+               /* TODO(sergey): Make it a bit more presentable by removing trademark. */
+               return brand;
+       }
+       return NULL;
+}
index 862ce3911093897d4a55e020752e4437637ac31c..f67d621f4a1b153aa3bcedc6b904c47f51225b90 100644 (file)
@@ -37,6 +37,7 @@
 
 #include "BLI_listbase.h"
 #include "BLI_gsqueue.h"
+#include "BLI_system.h"
 #include "BLI_task.h"
 #include "BLI_threads.h"
 
@@ -55,6 +56,7 @@
 #endif
 
 #include "atomic_ops.h"
+#include "numaapi.h"
 
 #if defined(__APPLE__) && defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && !defined(__clang__)
 #  define USE_APPLE_OMP_FIX
@@ -126,6 +128,7 @@ static pthread_mutex_t _colormanage_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_mutex_t _fftw_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_mutex_t _view3d_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_t mainid;
+static bool is_numa_available = false;
 static unsigned int thread_levels = 0;  /* threads can be invoked inside threads */
 static int num_threads_override = 0;
 
@@ -155,6 +158,9 @@ void BLI_threadapi_init(void)
        mainid = pthread_self();
 
        BLI_spin_init(&_malloc_lock);
+       if (numaAPI_Initialize() == NUMAAPI_SUCCESS) {
+               is_numa_available = true;
+       }
 }
 
 void BLI_threadapi_exit(void)
@@ -840,3 +846,98 @@ void BLI_threaded_malloc_end(void)
                MEM_set_lock_callback(NULL, NULL);
        }
 }
+
+/* **** Special functions to help performance on crazy NUMA setups. **** */
+
+static bool check_is_threadripper2_alike_topology(void)
+{
+       /* NOTE: We hope operating system does not support CPU hotswap to
+        * a different brand. And that SMP of different types is also not
+        * encouraged by the system. */
+       static bool is_initialized = false;
+       static bool is_threadripper2 = false;
+       if (is_initialized) {
+               return is_threadripper2;
+       }
+       is_initialized = true;
+       char *cpu_brand = BLI_cpu_brand_string();
+       if (cpu_brand == NULL) {
+               return false;
+       }
+       if (strstr(cpu_brand, "Threadripper")) {
+               /* NOTE: We consinder all Threadrippers having similar topology to
+               * the second one. This is because we are trying to utilize NUMA node
+               * 0 as much as possible. This node does exist on earlier versions of
+               * threadripper and setting affinity to it should not have negative
+               * effect.
+               * This allows us to avoid per-model check, making the code more
+               * reliable for the CPUs which are not yet released.
+               */
+               if (strstr(cpu_brand, "2990WX") || strstr(cpu_brand, "2950X")) {
+                       is_threadripper2 = true;
+               }
+       }
+       /* NOTE: While all dies of EPYC has memory controller, only two f them
+        * has access to a lower-indexed DDR slots. Those dies are same as on
+        * Threadripper2 with the memory controller.
+        * Now, it is rather likely that reasonable amount of users don't max
+        * up their DR slots, making it only two dies connected to a DDR slot
+        * with actual memory in it. */
+       if (strstr(cpu_brand, "EPYC")) {
+               /* NOTE: Similarly to Threadripper we do not do model check. */
+               is_threadripper2 = true;
+       }
+       return is_threadripper2;
+}
+
+static void threadripper_put_process_on_fast_node(void)
+{
+       if (!is_numa_available) {
+               return;
+       }
+       /* NOTE: Technically, we can use NUMA nodes 0 and 2 and usning both of
+        * them in the affinity mask will allow OS to schedule threads more
+        * flexible,possibly increasing overall performance when multiple apps
+        * are crunching numbers.
+        *
+        * However, if scene fits into memory adjacent to a single die we don't
+        * want OS to re-schedule the process to another die since that will make
+        * it further away from memory allocated for .blend file. */
+       /* NOTE: Even if NUMA is avasilable in the API but is disabled in BIOS on
+        * this workstation we still process here. If NUMA is disabled it will be a
+        * single node, so our action is no-visible-changes, but allows to keep
+        * things simple and unified. */
+       numaAPI_RunProcessOnNode(0);
+}
+
+static void threadripper_put_thread_on_fast_node(void)
+{
+       if (!is_numa_available) {
+               return;
+       }
+       /* NOTE: This is where things becomes more interesting. On the one hand
+        * we can use nodes 0 and 2 and allow operating system to do balancing
+        * of processes/threads for the maximum performance when multiple apps
+        * are running.
+        * On another hand, however, we probably want to use same node as the
+        * main thread since that's where the memory of .blend file is likely
+        * to be allocated.
+        * Since the main thread is currently on node 0, we also put thread on
+        * same node. */
+       /* See additional note about NUMA disabled in BIOS above. */
+       numaAPI_RunThreadOnNode(0);
+}
+
+void BLI_thread_put_process_on_fast_node(void)
+{
+       if (check_is_threadripper2_alike_topology()) {
+               threadripper_put_process_on_fast_node();
+       }
+}
+
+void BLI_thread_put_thread_on_fast_node(void)
+{
+       if (check_is_threadripper2_alike_topology()) {
+               threadripper_put_thread_on_fast_node();
+       }
+}
index 92d51c9a40053cc28cf0e2e5dd8ad58918b6ec96..cb627b465f406ea671efd8d8cee41e945a582e7d 100644 (file)
@@ -334,6 +334,7 @@ static void *do_job_thread(void *job_v)
 {
        wmJob *wm_job = job_v;
 
+       BLI_thread_put_thread_on_fast_node();
        wm_job->startjob(wm_job->run_customdata, &wm_job->stop, &wm_job->do_update, &wm_job->progress);
        wm_job->ready = true;
 
index 5a61f077a84e0c27e87eb974ad569de08fac8dce..ce25a71c6d84f1036ce8854c3a98e58cf842dc98 100644 (file)
@@ -52,6 +52,7 @@
 #include "BLI_callbacks.h"
 #include "BLI_string.h"
 #include "BLI_system.h"
+#include "BLI_threads.h"
 
 /* mostly init functions */
 #include "BKE_appdir.h"
@@ -364,6 +365,7 @@ int main(
        BKE_appdir_program_path_init(argv[0]);
 
        BLI_threadapi_init();
+       BLI_thread_put_process_on_fast_node();
 
        DNA_sdna_current_init();