Fix T60145: Cycles resets manually set affinity
[blender.git] / intern / cycles / util / util_system.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "util/util_system.h"
18
19 #include "util/util_logging.h"
20 #include "util/util_types.h"
21 #include "util/util_string.h"
22
23 #include <numaapi.h>
24
25 #ifdef _WIN32
26 #  if(!defined(FREE_WINDOWS))
27 #    include <intrin.h>
28 #  endif
29 #  include "util_windows.h"
30 #elif defined(__APPLE__)
31 #  include <sys/sysctl.h>
32 #  include <sys/types.h>
33 #else
34 #  include <unistd.h>
35 #endif
36
37 CCL_NAMESPACE_BEGIN
38
39 bool system_cpu_ensure_initialized()
40 {
41         static bool is_initialized = false;
42         static bool result = false;
43         if(is_initialized) {
44                 return result;
45         }
46         is_initialized = true;
47         const NUMAAPI_Result numa_result = numaAPI_Initialize();
48         result = (numa_result == NUMAAPI_SUCCESS);
49         return result;
50 }
51
52 /* Fallback solution, which doesn't use NUMA/CPU groups. */
53 static int system_cpu_thread_count_fallback()
54 {
55 #ifdef _WIN32
56         SYSTEM_INFO info;
57         GetSystemInfo(&info);
58         return info.dwNumberOfProcessors;
59 #elif defined(__APPLE__)
60         int count;
61         size_t len = sizeof(count);
62         int mib[2] = { CTL_HW, HW_NCPU };
63         sysctl(mib, 2, &count, &len, NULL, 0);
64         return count;
65 #else
66         return sysconf(_SC_NPROCESSORS_ONLN);
67 #endif
68 }
69
70 int system_cpu_thread_count()
71 {
72         const int num_nodes = system_cpu_num_numa_nodes();
73         int num_threads = 0;
74         for(int node = 0; node < num_nodes; ++node) {
75                 if(!system_cpu_is_numa_node_available(node)) {
76                         continue;
77                 }
78                 num_threads += system_cpu_num_numa_node_processors(node);
79         }
80         return num_threads;
81 }
82
83 int system_cpu_num_numa_nodes()
84 {
85         if(!system_cpu_ensure_initialized()) {
86                 /* Fallback to a single node with all the threads. */
87                 return 1;
88         }
89         return numaAPI_GetNumNodes();
90 }
91
92 bool system_cpu_is_numa_node_available(int node)
93 {
94         if(!system_cpu_ensure_initialized()) {
95                 return true;
96         }
97         return numaAPI_IsNodeAvailable(node);
98 }
99
100 int system_cpu_num_numa_node_processors(int node)
101 {
102         if(!system_cpu_ensure_initialized()) {
103                 return system_cpu_thread_count_fallback();
104         }
105         return numaAPI_GetNumNodeProcessors(node);
106 }
107
108 bool system_cpu_run_thread_on_node(int node)
109 {
110         if(!system_cpu_ensure_initialized()) {
111                 return true;
112         }
113         return numaAPI_RunThreadOnNode(node);
114 }
115
116 int system_cpu_num_active_group_processors()
117 {
118         if(!system_cpu_ensure_initialized()) {
119                 return system_cpu_thread_count_fallback();
120         }
121         return numaAPI_GetNumCurrentNodesProcessors();
122 }
123
124 #if !defined(_WIN32) || defined(FREE_WINDOWS)
125 static void __cpuid(int data[4], int selector)
126 {
127 #if defined(__x86_64__)
128         asm("cpuid" : "=a" (data[0]), "=b" (data[1]), "=c" (data[2]), "=d" (data[3]) : "a"(selector));
129 #elif defined(__i386__)
130         asm("pushl %%ebx    \n\t"
131             "cpuid          \n\t"
132             "movl %%ebx, %1 \n\t"
133             "popl %%ebx     \n\t"
134             : "=a" (data[0]), "=r" (data[1]), "=c" (data[2]), "=d" (data[3])
135             : "a"(selector)
136             : "ebx");
137 #else
138         data[0] = data[1] = data[2] = data[3] = 0;
139 #endif
140 }
141 #endif
142
143 string system_cpu_brand_string()
144 {
145         char buf[48] = {0};
146         int result[4] = {0};
147
148         __cpuid(result, 0x80000000);
149
150         if(result[0] >= (int)0x80000004) {
151                 __cpuid((int*)(buf+0), 0x80000002);
152                 __cpuid((int*)(buf+16), 0x80000003);
153                 __cpuid((int*)(buf+32), 0x80000004);
154
155                 string brand = buf;
156
157                 /* make it a bit more presentable */
158                 brand = string_remove_trademark(brand);
159
160                 return brand;
161         }
162
163         return "Unknown CPU";
164 }
165
166 int system_cpu_bits()
167 {
168         return (sizeof(void*)*8);
169 }
170
171 #if defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86)
172
173 struct CPUCapabilities {
174         bool x64;
175         bool mmx;
176         bool sse;
177         bool sse2;
178         bool sse3;
179         bool ssse3;
180         bool sse41;
181         bool sse42;
182         bool sse4a;
183         bool avx;
184         bool f16c;
185         bool avx2;
186         bool xop;
187         bool fma3;
188         bool fma4;
189         bool bmi1;
190         bool bmi2;
191 };
192
193 static CPUCapabilities& system_cpu_capabilities()
194 {
195         static CPUCapabilities caps;
196         static bool caps_init = false;
197
198         if(!caps_init) {
199                 int result[4], num;
200
201                 memset(&caps, 0, sizeof(caps));
202
203                 __cpuid(result, 0);
204                 num = result[0];
205
206                 if(num >= 1) {
207                         __cpuid(result, 0x00000001);
208                         caps.mmx = (result[3] & ((int)1 << 23)) != 0;
209                         caps.sse = (result[3] & ((int)1 << 25)) != 0;
210                         caps.sse2 = (result[3] & ((int)1 << 26)) != 0;
211                         caps.sse3 = (result[2] & ((int)1 <<  0)) != 0;
212
213                         caps.ssse3 = (result[2] & ((int)1 <<  9)) != 0;
214                         caps.sse41 = (result[2] & ((int)1 << 19)) != 0;
215                         caps.sse42 = (result[2] & ((int)1 << 20)) != 0;
216
217                         caps.fma3 = (result[2] & ((int)1 << 12)) != 0;
218                         caps.avx = false;
219                         bool os_uses_xsave_xrestore = (result[2] & ((int)1 << 27)) != 0;
220                         bool cpu_avx_support = (result[2] & ((int)1 << 28)) != 0;
221
222                         if( os_uses_xsave_xrestore && cpu_avx_support) {
223                                 // Check if the OS will save the YMM registers
224                                 uint32_t xcr_feature_mask;
225 #if defined(__GNUC__)
226                                 int edx; /* not used */
227                                 /* actual opcode for xgetbv */
228                                 __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (xcr_feature_mask) , "=d" (edx) : "c" (0) );
229 #elif defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
230                                 xcr_feature_mask = (uint32_t)_xgetbv(_XCR_XFEATURE_ENABLED_MASK);  /* min VS2010 SP1 compiler is required */
231 #else
232                                 xcr_feature_mask = 0;
233 #endif
234                                 caps.avx = (xcr_feature_mask & 0x6) == 0x6;
235                         }
236
237                         caps.f16c = (result[2] & ((int)1 << 29)) != 0;
238
239                         __cpuid(result, 0x00000007);
240                         caps.bmi1 = (result[1] & ((int)1 << 3)) != 0;
241                         caps.bmi2 = (result[1] & ((int)1 << 8)) != 0;
242                         caps.avx2 = (result[1] & ((int)1 << 5)) != 0;
243                 }
244
245                 caps_init = true;
246         }
247
248         return caps;
249 }
250
251 bool system_cpu_support_sse2()
252 {
253         CPUCapabilities& caps = system_cpu_capabilities();
254         return caps.sse && caps.sse2;
255 }
256
257 bool system_cpu_support_sse3()
258 {
259         CPUCapabilities& caps = system_cpu_capabilities();
260         return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3;
261 }
262
263 bool system_cpu_support_sse41()
264 {
265         CPUCapabilities& caps = system_cpu_capabilities();
266         return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41;
267 }
268
269 bool system_cpu_support_avx()
270 {
271         CPUCapabilities& caps = system_cpu_capabilities();
272         return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 &&
273                caps.sse41 && caps.avx;
274 }
275
276 bool system_cpu_support_avx2()
277 {
278         CPUCapabilities& caps = system_cpu_capabilities();
279         return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 &&
280                caps.avx && caps.f16c && caps.avx2 && caps.fma3 && caps.bmi1 &&
281                caps.bmi2;
282 }
283 #else
284
285 bool system_cpu_support_sse2()
286 {
287         return false;
288 }
289
290 bool system_cpu_support_sse3()
291 {
292         return false;
293 }
294
295 bool system_cpu_support_sse41()
296 {
297         return false;
298 }
299
300 bool system_cpu_support_avx()
301 {
302         return false;
303 }
304 bool system_cpu_support_avx2()
305 {
306         return false;
307 }
308
309 #endif
310
311 size_t system_physical_ram()
312 {
313 #ifdef _WIN32
314         MEMORYSTATUSEX ram;
315         ram.dwLength = sizeof (ram);
316         GlobalMemoryStatusEx(&ram);
317         return ram.ullTotalPhys * 1024;
318 #elif defined(__APPLE__)
319         uint64_t ram = 0;
320         size_t len = sizeof(ram);
321         if(sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) {
322                 return ram;
323         }
324         return 0;
325 #else
326         size_t ps = sysconf(_SC_PAGESIZE);
327         size_t pn = sysconf(_SC_PHYS_PAGES);
328         return ps * pn;
329 #endif
330 }
331
332 CCL_NAMESPACE_END