fc6db1f66623af7660e66b81902de0a26d07c929
[blender.git] / intern / cycles / util / util_system.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "util/util_system.h"
18
19 #include "util/util_logging.h"
20 #include "util/util_types.h"
21 #include "util/util_string.h"
22
23 #include <numaapi.h>
24
25 #ifdef _WIN32
26 #  if(!defined(FREE_WINDOWS))
27 #    include <intrin.h>
28 #  endif
29 #  include "util_windows.h"
30 #elif defined(__APPLE__)
31 #  include <sys/ioctl.h>
32 #  include <sys/sysctl.h>
33 #  include <sys/types.h>
34 #else
35 #  include <unistd.h>
36 #  include <sys/ioctl.h>
37 #endif
38
39 CCL_NAMESPACE_BEGIN
40
41 bool system_cpu_ensure_initialized()
42 {
43         static bool is_initialized = false;
44         static bool result = false;
45         if(is_initialized) {
46                 return result;
47         }
48         is_initialized = true;
49         const NUMAAPI_Result numa_result = numaAPI_Initialize();
50         result = (numa_result == NUMAAPI_SUCCESS);
51         return result;
52 }
53
54 /* Fallback solution, which doesn't use NUMA/CPU groups. */
55 static int system_cpu_thread_count_fallback()
56 {
57 #ifdef _WIN32
58         SYSTEM_INFO info;
59         GetSystemInfo(&info);
60         return info.dwNumberOfProcessors;
61 #elif defined(__APPLE__)
62         int count;
63         size_t len = sizeof(count);
64         int mib[2] = { CTL_HW, HW_NCPU };
65         sysctl(mib, 2, &count, &len, NULL, 0);
66         return count;
67 #else
68         return sysconf(_SC_NPROCESSORS_ONLN);
69 #endif
70 }
71
72 int system_cpu_thread_count()
73 {
74         const int num_nodes = system_cpu_num_numa_nodes();
75         int num_threads = 0;
76         for(int node = 0; node < num_nodes; ++node) {
77                 if(!system_cpu_is_numa_node_available(node)) {
78                         continue;
79                 }
80                 num_threads += system_cpu_num_numa_node_processors(node);
81         }
82         return num_threads;
83 }
84
85 int system_cpu_num_numa_nodes()
86 {
87         if(!system_cpu_ensure_initialized()) {
88                 /* Fallback to a single node with all the threads. */
89                 return 1;
90         }
91         return numaAPI_GetNumNodes();
92 }
93
94 bool system_cpu_is_numa_node_available(int node)
95 {
96         if(!system_cpu_ensure_initialized()) {
97                 return true;
98         }
99         return numaAPI_IsNodeAvailable(node);
100 }
101
102 int system_cpu_num_numa_node_processors(int node)
103 {
104         if(!system_cpu_ensure_initialized()) {
105                 return system_cpu_thread_count_fallback();
106         }
107         return numaAPI_GetNumNodeProcessors(node);
108 }
109
110 bool system_cpu_run_thread_on_node(int node)
111 {
112         if(!system_cpu_ensure_initialized()) {
113                 return true;
114         }
115         return numaAPI_RunThreadOnNode(node);
116 }
117
118 int system_console_width()
119 {
120         int columns = 0;
121
122 #ifdef _WIN32
123         CONSOLE_SCREEN_BUFFER_INFO csbi;
124         if(GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) {
125                 columns = csbi.dwSize.X;
126         }
127 #else
128         struct winsize w;
129         if(ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) {
130                 columns = w.ws_col;
131         }
132 #endif
133
134         return (columns > 0) ? columns : 80;
135 }
136
137 int system_cpu_num_active_group_processors()
138 {
139         if(!system_cpu_ensure_initialized()) {
140                 return system_cpu_thread_count_fallback();
141         }
142         return numaAPI_GetNumCurrentNodesProcessors();
143 }
144
145 #if !defined(_WIN32) || defined(FREE_WINDOWS)
146 static void __cpuid(int data[4], int selector)
147 {
148 #if defined(__x86_64__)
149         asm("cpuid" : "=a" (data[0]), "=b" (data[1]), "=c" (data[2]), "=d" (data[3]) : "a"(selector));
150 #elif defined(__i386__)
151         asm("pushl %%ebx    \n\t"
152             "cpuid          \n\t"
153             "movl %%ebx, %1 \n\t"
154             "popl %%ebx     \n\t"
155             : "=a" (data[0]), "=r" (data[1]), "=c" (data[2]), "=d" (data[3])
156             : "a"(selector)
157             : "ebx");
158 #else
159         data[0] = data[1] = data[2] = data[3] = 0;
160 #endif
161 }
162 #endif
163
164 string system_cpu_brand_string()
165 {
166         char buf[48] = {0};
167         int result[4] = {0};
168
169         __cpuid(result, 0x80000000);
170
171         if(result[0] >= (int)0x80000004) {
172                 __cpuid((int*)(buf+0), 0x80000002);
173                 __cpuid((int*)(buf+16), 0x80000003);
174                 __cpuid((int*)(buf+32), 0x80000004);
175
176                 string brand = buf;
177
178                 /* make it a bit more presentable */
179                 brand = string_remove_trademark(brand);
180
181                 return brand;
182         }
183
184         return "Unknown CPU";
185 }
186
187 int system_cpu_bits()
188 {
189         return (sizeof(void*)*8);
190 }
191
192 #if defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86)
193
194 struct CPUCapabilities {
195         bool x64;
196         bool mmx;
197         bool sse;
198         bool sse2;
199         bool sse3;
200         bool ssse3;
201         bool sse41;
202         bool sse42;
203         bool sse4a;
204         bool avx;
205         bool f16c;
206         bool avx2;
207         bool xop;
208         bool fma3;
209         bool fma4;
210         bool bmi1;
211         bool bmi2;
212 };
213
214 static CPUCapabilities& system_cpu_capabilities()
215 {
216         static CPUCapabilities caps;
217         static bool caps_init = false;
218
219         if(!caps_init) {
220                 int result[4], num;
221
222                 memset(&caps, 0, sizeof(caps));
223
224                 __cpuid(result, 0);
225                 num = result[0];
226
227                 if(num >= 1) {
228                         __cpuid(result, 0x00000001);
229                         caps.mmx = (result[3] & ((int)1 << 23)) != 0;
230                         caps.sse = (result[3] & ((int)1 << 25)) != 0;
231                         caps.sse2 = (result[3] & ((int)1 << 26)) != 0;
232                         caps.sse3 = (result[2] & ((int)1 <<  0)) != 0;
233
234                         caps.ssse3 = (result[2] & ((int)1 <<  9)) != 0;
235                         caps.sse41 = (result[2] & ((int)1 << 19)) != 0;
236                         caps.sse42 = (result[2] & ((int)1 << 20)) != 0;
237
238                         caps.fma3 = (result[2] & ((int)1 << 12)) != 0;
239                         caps.avx = false;
240                         bool os_uses_xsave_xrestore = (result[2] & ((int)1 << 27)) != 0;
241                         bool cpu_avx_support = (result[2] & ((int)1 << 28)) != 0;
242
243                         if( os_uses_xsave_xrestore && cpu_avx_support) {
244                                 // Check if the OS will save the YMM registers
245                                 uint32_t xcr_feature_mask;
246 #if defined(__GNUC__)
247                                 int edx; /* not used */
248                                 /* actual opcode for xgetbv */
249                                 __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (xcr_feature_mask) , "=d" (edx) : "c" (0) );
250 #elif defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
251                                 xcr_feature_mask = (uint32_t)_xgetbv(_XCR_XFEATURE_ENABLED_MASK);  /* min VS2010 SP1 compiler is required */
252 #else
253                                 xcr_feature_mask = 0;
254 #endif
255                                 caps.avx = (xcr_feature_mask & 0x6) == 0x6;
256                         }
257
258                         caps.f16c = (result[2] & ((int)1 << 29)) != 0;
259
260                         __cpuid(result, 0x00000007);
261                         caps.bmi1 = (result[1] & ((int)1 << 3)) != 0;
262                         caps.bmi2 = (result[1] & ((int)1 << 8)) != 0;
263                         caps.avx2 = (result[1] & ((int)1 << 5)) != 0;
264                 }
265
266                 caps_init = true;
267         }
268
269         return caps;
270 }
271
272 bool system_cpu_support_sse2()
273 {
274         CPUCapabilities& caps = system_cpu_capabilities();
275         return caps.sse && caps.sse2;
276 }
277
278 bool system_cpu_support_sse3()
279 {
280         CPUCapabilities& caps = system_cpu_capabilities();
281         return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3;
282 }
283
284 bool system_cpu_support_sse41()
285 {
286         CPUCapabilities& caps = system_cpu_capabilities();
287         return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41;
288 }
289
290 bool system_cpu_support_avx()
291 {
292         CPUCapabilities& caps = system_cpu_capabilities();
293         return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 &&
294                caps.sse41 && caps.avx;
295 }
296
297 bool system_cpu_support_avx2()
298 {
299         CPUCapabilities& caps = system_cpu_capabilities();
300         return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 &&
301                caps.avx && caps.f16c && caps.avx2 && caps.fma3 && caps.bmi1 &&
302                caps.bmi2;
303 }
304 #else
305
306 bool system_cpu_support_sse2()
307 {
308         return false;
309 }
310
311 bool system_cpu_support_sse3()
312 {
313         return false;
314 }
315
316 bool system_cpu_support_sse41()
317 {
318         return false;
319 }
320
321 bool system_cpu_support_avx()
322 {
323         return false;
324 }
325 bool system_cpu_support_avx2()
326 {
327         return false;
328 }
329
330 #endif
331
332 size_t system_physical_ram()
333 {
334 #ifdef _WIN32
335         MEMORYSTATUSEX ram;
336         ram.dwLength = sizeof (ram);
337         GlobalMemoryStatusEx(&ram);
338         return ram.ullTotalPhys * 1024;
339 #elif defined(__APPLE__)
340         uint64_t ram = 0;
341         size_t len = sizeof(ram);
342         if(sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) {
343                 return ram;
344         }
345         return 0;
346 #else
347         size_t ps = sysconf(_SC_PAGESIZE);
348         size_t pn = sysconf(_SC_PHYS_PAGES);
349         return ps * pn;
350 #endif
351 }
352
353 CCL_NAMESPACE_END