Cycles: Support multithreaded compilation of kernels
[blender.git] / intern / cycles / util / util_system.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "util/util_system.h"
18
19 #include "util/util_logging.h"
20 #include "util/util_types.h"
21 #include "util/util_string.h"
22
23 #include <numaapi.h>
24
25 #include <OpenImageIO/sysutil.h>
26 OIIO_NAMESPACE_USING
27
28 #ifdef _WIN32
29 #  if(!defined(FREE_WINDOWS))
30 #    include <intrin.h>
31 #  endif
32 #  include "util_windows.h"
33 #elif defined(__APPLE__)
34 #  include <sys/ioctl.h>
35 #  include <sys/sysctl.h>
36 #  include <sys/types.h>
37 #else
38 #  include <unistd.h>
39 #  include <sys/ioctl.h>
40 #endif
41
42 CCL_NAMESPACE_BEGIN
43
44 bool system_cpu_ensure_initialized()
45 {
46         static bool is_initialized = false;
47         static bool result = false;
48         if(is_initialized) {
49                 return result;
50         }
51         is_initialized = true;
52         const NUMAAPI_Result numa_result = numaAPI_Initialize();
53         result = (numa_result == NUMAAPI_SUCCESS);
54         return result;
55 }
56
57 /* Fallback solution, which doesn't use NUMA/CPU groups. */
58 static int system_cpu_thread_count_fallback()
59 {
60 #ifdef _WIN32
61         SYSTEM_INFO info;
62         GetSystemInfo(&info);
63         return info.dwNumberOfProcessors;
64 #elif defined(__APPLE__)
65         int count;
66         size_t len = sizeof(count);
67         int mib[2] = { CTL_HW, HW_NCPU };
68         sysctl(mib, 2, &count, &len, NULL, 0);
69         return count;
70 #else
71         return sysconf(_SC_NPROCESSORS_ONLN);
72 #endif
73 }
74
75 int system_cpu_thread_count()
76 {
77         const int num_nodes = system_cpu_num_numa_nodes();
78         int num_threads = 0;
79         for(int node = 0; node < num_nodes; ++node) {
80                 if(!system_cpu_is_numa_node_available(node)) {
81                         continue;
82                 }
83                 num_threads += system_cpu_num_numa_node_processors(node);
84         }
85         return num_threads;
86 }
87
88 int system_cpu_num_numa_nodes()
89 {
90         if(!system_cpu_ensure_initialized()) {
91                 /* Fallback to a single node with all the threads. */
92                 return 1;
93         }
94         return numaAPI_GetNumNodes();
95 }
96
97 bool system_cpu_is_numa_node_available(int node)
98 {
99         if(!system_cpu_ensure_initialized()) {
100                 return true;
101         }
102         return numaAPI_IsNodeAvailable(node);
103 }
104
105 int system_cpu_num_numa_node_processors(int node)
106 {
107         if(!system_cpu_ensure_initialized()) {
108                 return system_cpu_thread_count_fallback();
109         }
110         return numaAPI_GetNumNodeProcessors(node);
111 }
112
113 bool system_cpu_run_thread_on_node(int node)
114 {
115         if(!system_cpu_ensure_initialized()) {
116                 return true;
117         }
118         return numaAPI_RunThreadOnNode(node);
119 }
120
121 int system_console_width()
122 {
123         int columns = 0;
124
125 #ifdef _WIN32
126         CONSOLE_SCREEN_BUFFER_INFO csbi;
127         if(GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) {
128                 columns = csbi.dwSize.X;
129         }
130 #else
131         struct winsize w;
132         if(ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) {
133                 columns = w.ws_col;
134         }
135 #endif
136
137         return (columns > 0) ? columns : 80;
138 }
139
140 int system_cpu_num_active_group_processors()
141 {
142         if(!system_cpu_ensure_initialized()) {
143                 return system_cpu_thread_count_fallback();
144         }
145         return numaAPI_GetNumCurrentNodesProcessors();
146 }
147
148 #if !defined(_WIN32) || defined(FREE_WINDOWS)
149 static void __cpuid(int data[4], int selector)
150 {
151 #if defined(__x86_64__)
152         asm("cpuid" : "=a" (data[0]), "=b" (data[1]), "=c" (data[2]), "=d" (data[3]) : "a"(selector));
153 #elif defined(__i386__)
154         asm("pushl %%ebx    \n\t"
155             "cpuid          \n\t"
156             "movl %%ebx, %1 \n\t"
157             "popl %%ebx     \n\t"
158             : "=a" (data[0]), "=r" (data[1]), "=c" (data[2]), "=d" (data[3])
159             : "a"(selector)
160             : "ebx");
161 #else
162         data[0] = data[1] = data[2] = data[3] = 0;
163 #endif
164 }
165 #endif
166
167 string system_cpu_brand_string()
168 {
169         char buf[48] = {0};
170         int result[4] = {0};
171
172         __cpuid(result, 0x80000000);
173
174         if(result[0] >= (int)0x80000004) {
175                 __cpuid((int*)(buf+0), 0x80000002);
176                 __cpuid((int*)(buf+16), 0x80000003);
177                 __cpuid((int*)(buf+32), 0x80000004);
178
179                 string brand = buf;
180
181                 /* make it a bit more presentable */
182                 brand = string_remove_trademark(brand);
183
184                 return brand;
185         }
186
187         return "Unknown CPU";
188 }
189
190 int system_cpu_bits()
191 {
192         return (sizeof(void*)*8);
193 }
194
195 #if defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86)
196
197 struct CPUCapabilities {
198         bool x64;
199         bool mmx;
200         bool sse;
201         bool sse2;
202         bool sse3;
203         bool ssse3;
204         bool sse41;
205         bool sse42;
206         bool sse4a;
207         bool avx;
208         bool f16c;
209         bool avx2;
210         bool xop;
211         bool fma3;
212         bool fma4;
213         bool bmi1;
214         bool bmi2;
215 };
216
217 static CPUCapabilities& system_cpu_capabilities()
218 {
219         static CPUCapabilities caps;
220         static bool caps_init = false;
221
222         if(!caps_init) {
223                 int result[4], num;
224
225                 memset(&caps, 0, sizeof(caps));
226
227                 __cpuid(result, 0);
228                 num = result[0];
229
230                 if(num >= 1) {
231                         __cpuid(result, 0x00000001);
232                         caps.mmx = (result[3] & ((int)1 << 23)) != 0;
233                         caps.sse = (result[3] & ((int)1 << 25)) != 0;
234                         caps.sse2 = (result[3] & ((int)1 << 26)) != 0;
235                         caps.sse3 = (result[2] & ((int)1 <<  0)) != 0;
236
237                         caps.ssse3 = (result[2] & ((int)1 <<  9)) != 0;
238                         caps.sse41 = (result[2] & ((int)1 << 19)) != 0;
239                         caps.sse42 = (result[2] & ((int)1 << 20)) != 0;
240
241                         caps.fma3 = (result[2] & ((int)1 << 12)) != 0;
242                         caps.avx = false;
243                         bool os_uses_xsave_xrestore = (result[2] & ((int)1 << 27)) != 0;
244                         bool cpu_avx_support = (result[2] & ((int)1 << 28)) != 0;
245
246                         if( os_uses_xsave_xrestore && cpu_avx_support) {
247                                 // Check if the OS will save the YMM registers
248                                 uint32_t xcr_feature_mask;
249 #if defined(__GNUC__)
250                                 int edx; /* not used */
251                                 /* actual opcode for xgetbv */
252                                 __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (xcr_feature_mask) , "=d" (edx) : "c" (0) );
253 #elif defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
254                                 xcr_feature_mask = (uint32_t)_xgetbv(_XCR_XFEATURE_ENABLED_MASK);  /* min VS2010 SP1 compiler is required */
255 #else
256                                 xcr_feature_mask = 0;
257 #endif
258                                 caps.avx = (xcr_feature_mask & 0x6) == 0x6;
259                         }
260
261                         caps.f16c = (result[2] & ((int)1 << 29)) != 0;
262
263                         __cpuid(result, 0x00000007);
264                         caps.bmi1 = (result[1] & ((int)1 << 3)) != 0;
265                         caps.bmi2 = (result[1] & ((int)1 << 8)) != 0;
266                         caps.avx2 = (result[1] & ((int)1 << 5)) != 0;
267                 }
268
269                 caps_init = true;
270         }
271
272         return caps;
273 }
274
275 bool system_cpu_support_sse2()
276 {
277         CPUCapabilities& caps = system_cpu_capabilities();
278         return caps.sse && caps.sse2;
279 }
280
281 bool system_cpu_support_sse3()
282 {
283         CPUCapabilities& caps = system_cpu_capabilities();
284         return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3;
285 }
286
287 bool system_cpu_support_sse41()
288 {
289         CPUCapabilities& caps = system_cpu_capabilities();
290         return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41;
291 }
292
293 bool system_cpu_support_avx()
294 {
295         CPUCapabilities& caps = system_cpu_capabilities();
296         return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 &&
297                caps.sse41 && caps.avx;
298 }
299
300 bool system_cpu_support_avx2()
301 {
302         CPUCapabilities& caps = system_cpu_capabilities();
303         return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 &&
304                caps.avx && caps.f16c && caps.avx2 && caps.fma3 && caps.bmi1 &&
305                caps.bmi2;
306 }
307 #else
308
309 bool system_cpu_support_sse2()
310 {
311         return false;
312 }
313
314 bool system_cpu_support_sse3()
315 {
316         return false;
317 }
318
319 bool system_cpu_support_sse41()
320 {
321         return false;
322 }
323
324 bool system_cpu_support_avx()
325 {
326         return false;
327 }
328 bool system_cpu_support_avx2()
329 {
330         return false;
331 }
332
333 #endif
334
335 bool system_call_self(const vector<string>& args)
336 {
337         /* Escape program and arguments in case they contain spaces. */
338         string cmd = "\"" + Sysutil::this_program_path() + "\"";
339
340         for(int i = 0; i < args.size(); i++) {
341                 cmd += " \"" + args[i] + "\"";
342         }
343
344         /* Quiet output. */
345 #ifdef _WIN32
346         cmd += " > nul";
347 #else
348         cmd += " > /dev/null";
349 #endif
350
351         return (system(cmd.c_str()) == 0);
352 }
353
354 size_t system_physical_ram()
355 {
356 #ifdef _WIN32
357         MEMORYSTATUSEX ram;
358         ram.dwLength = sizeof (ram);
359         GlobalMemoryStatusEx(&ram);
360         return ram.ullTotalPhys * 1024;
361 #elif defined(__APPLE__)
362         uint64_t ram = 0;
363         size_t len = sizeof(ram);
364         if(sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) {
365                 return ram;
366         }
367         return 0;
368 #else
369         size_t ps = sysconf(_SC_PAGESIZE);
370         size_t pn = sysconf(_SC_PHYS_PAGES);
371         return ps * pn;
372 #endif
373 }
374
375 CCL_NAMESPACE_END