Merge branch 'blender2.7'
[blender.git] / intern / cycles / device / device.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <stdlib.h>
18 #include <string.h>
19
20 #include "device/device.h"
21 #include "device/device_intern.h"
22
23 #include "util/util_foreach.h"
24 #include "util/util_half.h"
25 #include "util/util_logging.h"
26 #include "util/util_math.h"
27 #include "util/util_opengl.h"
28 #include "util/util_time.h"
29 #include "util/util_system.h"
30 #include "util/util_types.h"
31 #include "util/util_vector.h"
32 #include "util/util_string.h"
33
34 CCL_NAMESPACE_BEGIN
35
36 bool Device::need_types_update = true;
37 bool Device::need_devices_update = true;
38 thread_mutex Device::device_mutex;
39 vector<DeviceInfo> Device::opencl_devices;
40 vector<DeviceInfo> Device::cuda_devices;
41 vector<DeviceInfo> Device::cpu_devices;
42 vector<DeviceInfo> Device::network_devices;
43 uint Device::devices_initialized_mask = 0;
44
45 /* Device Requested Features */
46
47 std::ostream& operator <<(std::ostream &os,
48                           const DeviceRequestedFeatures& requested_features)
49 {
50         os << "Experimental features: "
51            << (requested_features.experimental ? "On" : "Off") << std::endl;
52         os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
53         /* TODO(sergey): Decode bitflag into list of names. */
54         os << "Nodes features: " << requested_features.nodes_features << std::endl;
55         os << "Use Hair: "
56            << string_from_bool(requested_features.use_hair) << std::endl;
57         os << "Use Object Motion: "
58            << string_from_bool(requested_features.use_object_motion) << std::endl;
59         os << "Use Camera Motion: "
60            << string_from_bool(requested_features.use_camera_motion) << std::endl;
61         os << "Use Baking: "
62            << string_from_bool(requested_features.use_baking) << std::endl;
63         os << "Use Subsurface: "
64            << string_from_bool(requested_features.use_subsurface) << std::endl;
65         os << "Use Volume: "
66            << string_from_bool(requested_features.use_volume) << std::endl;
67         os << "Use Branched Integrator: "
68            << string_from_bool(requested_features.use_integrator_branched) << std::endl;
69         os << "Use Patch Evaluation: "
70            << string_from_bool(requested_features.use_patch_evaluation) << std::endl;
71         os << "Use Transparent Shadows: "
72            << string_from_bool(requested_features.use_transparent) << std::endl;
73         os << "Use Principled BSDF: "
74            << string_from_bool(requested_features.use_principled) << std::endl;
75         os << "Use Denoising: "
76            << string_from_bool(requested_features.use_denoising) << std::endl;
77         return os;
78 }
79
80 /* Device */
81
82 Device::~Device()
83 {
84         if(!background) {
85                 if(vertex_buffer != 0) {
86                         glDeleteBuffers(1, &vertex_buffer);
87                 }
88                 if(fallback_shader_program != 0) {
89                         glDeleteProgram(fallback_shader_program);
90                 }
91         }
92 }
93
94 /* TODO move shaders to standalone .glsl file. */
95 const char *FALLBACK_VERTEX_SHADER =
96 "#version 330\n"
97 "uniform vec2 fullscreen;\n"
98 "in vec2 texCoord;\n"
99 "in vec2 pos;\n"
100 "out vec2 texCoord_interp;\n"
101 "\n"
102 "vec2 normalize_coordinates()\n"
103 "{\n"
104 "       return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
105 "}\n"
106 "\n"
107 "void main()\n"
108 "{\n"
109 "       gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
110 "       texCoord_interp = texCoord;\n"
111 "}\n\0";
112
113 const char *FALLBACK_FRAGMENT_SHADER =
114 "#version 330\n"
115 "uniform sampler2D image_texture;\n"
116 "in vec2 texCoord_interp;\n"
117 "out vec4 fragColor;\n"
118 "\n"
119 "void main()\n"
120 "{\n"
121 "       fragColor = texture(image_texture, texCoord_interp);\n"
122 "}\n\0";
123
124 static void shader_print_errors(const char *task, const char *log, const char *code)
125 {
126         LOG(ERROR) << "Shader: " << task << " error:";
127         LOG(ERROR) << "===== shader string ====";
128
129         stringstream stream(code);
130         string partial;
131
132         int line = 1;
133         while(getline(stream, partial, '\n')) {
134                 if(line < 10) {
135                         LOG(ERROR) << " " << line << " " << partial;
136                 }
137                 else {
138                         LOG(ERROR) << line << " " << partial;
139                 }
140                 line++;
141         }
142         LOG(ERROR) << log;
143 }
144
145 static int bind_fallback_shader(void)
146 {
147         GLint status;
148         GLchar log[5000];
149         GLsizei length = 0;
150         GLuint program = 0;
151
152         struct Shader {
153                 const char *source;
154                 GLenum type;
155         } shaders[2] = {
156             {FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
157             {FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}
158     };
159
160         program = glCreateProgram();
161
162         for(int i = 0; i < 2; i++) {
163                 GLuint shader = glCreateShader(shaders[i].type);
164
165                 string source_str = shaders[i].source;
166                 const char *c_str = source_str.c_str();
167
168                 glShaderSource(shader, 1, &c_str, NULL);
169                 glCompileShader(shader);
170
171                 glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
172
173                 if(!status) {
174                         glGetShaderInfoLog(shader, sizeof(log), &length, log);
175                         shader_print_errors("compile", log, c_str);
176                         return 0;
177                 }
178
179                 glAttachShader(program, shader);
180         }
181
182         /* Link output. */
183         glBindFragDataLocation(program, 0, "fragColor");
184
185         /* Link and error check. */
186         glLinkProgram(program);
187
188         glGetProgramiv(program, GL_LINK_STATUS, &status);
189         if(!status) {
190                 glGetShaderInfoLog(program, sizeof(log), &length, log);
191                 shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER);
192                 shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER);
193                 return 0;
194         }
195
196         return program;
197 }
198
199 bool Device::bind_fallback_display_space_shader(const float width, const float height)
200 {
201         if(fallback_status == FALLBACK_SHADER_STATUS_ERROR) {
202                 return false;
203         }
204
205         if(fallback_status == FALLBACK_SHADER_STATUS_NONE) {
206                 fallback_shader_program = bind_fallback_shader();
207                 fallback_status = FALLBACK_SHADER_STATUS_ERROR;
208
209                 if(fallback_shader_program == 0) {
210                         return false;
211                 }
212
213                 glUseProgram(fallback_shader_program);
214                 image_texture_location = glGetUniformLocation(fallback_shader_program, "image_texture");
215                 if(image_texture_location < 0) {
216                         LOG(ERROR) << "Shader doesn't containt the 'image_texture' uniform.";
217                         return false;
218                 }
219
220                 fullscreen_location = glGetUniformLocation(fallback_shader_program, "fullscreen");
221                 if(fullscreen_location < 0) {
222                         LOG(ERROR) << "Shader doesn't containt the 'fullscreen' uniform.";
223                         return false;
224                 }
225
226                 fallback_status = FALLBACK_SHADER_STATUS_SUCCESS;
227         }
228
229         /* Run this every time. */
230         glUseProgram(fallback_shader_program);
231         glUniform1i(image_texture_location, 0);
232         glUniform2f(fullscreen_location, width, height);
233         return true;
234 }
235
236 void Device::draw_pixels(
237     device_memory& rgba, int y,
238     int w, int h, int width, int height,
239     int dx, int dy, int dw, int dh,
240     bool transparent, const DeviceDrawParams &draw_params)
241 {
242         const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
243
244         assert(rgba.type == MEM_PIXELS);
245         mem_copy_from(rgba, y, w, h, rgba.memory_elements_size(1));
246
247         GLuint texid;
248         glActiveTexture(GL_TEXTURE0);
249         glGenTextures(1, &texid);
250         glBindTexture(GL_TEXTURE_2D, texid);
251
252         if(rgba.data_type == TYPE_HALF) {
253                 GLhalf *data_pointer = (GLhalf*)rgba.host_pointer;
254                 data_pointer += 4 * y * w;
255                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, w, h, 0, GL_RGBA, GL_HALF_FLOAT, data_pointer);
256         }
257         else {
258                 uint8_t *data_pointer = (uint8_t*)rgba.host_pointer;
259                 data_pointer += 4 * y * w;
260                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, data_pointer);
261         }
262
263         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
264         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
265
266         if(transparent) {
267                 glEnable(GL_BLEND);
268                 glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
269         }
270
271         GLint shader_program;
272         if(use_fallback_shader) {
273                 if(!bind_fallback_display_space_shader(dw, dh)) {
274                         return;
275                 }
276                 shader_program = fallback_shader_program;
277         }
278         else {
279                 draw_params.bind_display_space_shader_cb();
280                 glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
281         }
282
283         if(!vertex_buffer) {
284                 glGenBuffers(1, &vertex_buffer);
285         }
286
287         glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
288         /* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered */
289         glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
290
291         float *vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
292
293         if(vpointer) {
294                 /* texture coordinate - vertex pair */
295                 vpointer[0] = 0.0f;
296                 vpointer[1] = 0.0f;
297                 vpointer[2] = dx;
298                 vpointer[3] = dy;
299
300                 vpointer[4] = 1.0f;
301                 vpointer[5] = 0.0f;
302                 vpointer[6] = (float)width + dx;
303                 vpointer[7] = dy;
304
305                 vpointer[8] = 1.0f;
306                 vpointer[9] = 1.0f;
307                 vpointer[10] = (float)width + dx;
308                 vpointer[11] = (float)height + dy;
309
310                 vpointer[12] = 0.0f;
311                 vpointer[13] = 1.0f;
312                 vpointer[14] = dx;
313                 vpointer[15] = (float)height + dy;
314
315                 if(vertex_buffer) {
316                         glUnmapBuffer(GL_ARRAY_BUFFER);
317                 }
318         }
319
320         GLuint vertex_array_object;
321         GLuint position_attribute, texcoord_attribute;
322
323         glGenVertexArrays(1, &vertex_array_object);
324         glBindVertexArray(vertex_array_object);
325
326         texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
327         position_attribute = glGetAttribLocation(shader_program, "pos");
328
329         glEnableVertexAttribArray(texcoord_attribute);
330         glEnableVertexAttribArray(position_attribute);
331
332         glVertexAttribPointer(texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
333         glVertexAttribPointer(position_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)(sizeof(float) * 2));
334
335         glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
336
337         if(vertex_buffer) {
338                 glBindBuffer(GL_ARRAY_BUFFER, 0);
339         }
340
341         if(use_fallback_shader) {
342                 glUseProgram(0);
343         }
344         else {
345                 draw_params.unbind_display_space_shader_cb();
346         }
347
348         glBindTexture(GL_TEXTURE_2D, 0);
349         glDeleteTextures(1, &texid);
350
351         if(transparent) {
352                 glDisable(GL_BLEND);
353         }
354 }
355
356 Device *Device::create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background)
357 {
358         Device *device;
359
360         switch(info.type) {
361                 case DEVICE_CPU:
362                         device = device_cpu_create(info, stats, profiler, background);
363                         break;
364 #ifdef WITH_CUDA
365                 case DEVICE_CUDA:
366                         if(device_cuda_init())
367                                 device = device_cuda_create(info, stats, profiler, background);
368                         else
369                                 device = NULL;
370                         break;
371 #endif
372 #ifdef WITH_MULTI
373                 case DEVICE_MULTI:
374                         device = device_multi_create(info, stats, profiler, background);
375                         break;
376 #endif
377 #ifdef WITH_NETWORK
378                 case DEVICE_NETWORK:
379                         device = device_network_create(info, stats, profiler, "127.0.0.1");
380                         break;
381 #endif
382 #ifdef WITH_OPENCL
383                 case DEVICE_OPENCL:
384                         if(device_opencl_init())
385                                 device = device_opencl_create(info, stats, profiler, background);
386                         else
387                                 device = NULL;
388                         break;
389 #endif
390                 default:
391                         return NULL;
392         }
393
394         return device;
395 }
396
397 DeviceType Device::type_from_string(const char *name)
398 {
399         if(strcmp(name, "CPU") == 0)
400                 return DEVICE_CPU;
401         else if(strcmp(name, "CUDA") == 0)
402                 return DEVICE_CUDA;
403         else if(strcmp(name, "OPENCL") == 0)
404                 return DEVICE_OPENCL;
405         else if(strcmp(name, "NETWORK") == 0)
406                 return DEVICE_NETWORK;
407         else if(strcmp(name, "MULTI") == 0)
408                 return DEVICE_MULTI;
409
410         return DEVICE_NONE;
411 }
412
413 string Device::string_from_type(DeviceType type)
414 {
415         if(type == DEVICE_CPU)
416                 return "CPU";
417         else if(type == DEVICE_CUDA)
418                 return "CUDA";
419         else if(type == DEVICE_OPENCL)
420                 return "OPENCL";
421         else if(type == DEVICE_NETWORK)
422                 return "NETWORK";
423         else if(type == DEVICE_MULTI)
424                 return "MULTI";
425
426         return "";
427 }
428
429 vector<DeviceType> Device::available_types()
430 {
431         vector<DeviceType> types;
432         types.push_back(DEVICE_CPU);
433 #ifdef WITH_CUDA
434         types.push_back(DEVICE_CUDA);
435 #endif
436 #ifdef WITH_OPENCL
437         types.push_back(DEVICE_OPENCL);
438 #endif
439 #ifdef WITH_NETWORK
440         types.push_back(DEVICE_NETWORK);
441 #endif
442         return types;
443 }
444
445 vector<DeviceInfo> Device::available_devices(uint mask)
446 {
447         /* Lazy initialize devices. On some platforms OpenCL or CUDA drivers can
448          * be broken and cause crashes when only trying to get device info, so
449          * we don't want to do any initialization until the user chooses to. */
450         thread_scoped_lock lock(device_mutex);
451         vector<DeviceInfo> devices;
452
453 #ifdef WITH_OPENCL
454         if(mask & DEVICE_MASK_OPENCL) {
455                 if(!(devices_initialized_mask & DEVICE_MASK_OPENCL)) {
456                         if(device_opencl_init()) {
457                                 device_opencl_info(opencl_devices);
458                         }
459                         devices_initialized_mask |= DEVICE_MASK_OPENCL;
460                 }
461                 foreach(DeviceInfo& info, opencl_devices) {
462                         devices.push_back(info);
463                 }
464         }
465 #endif
466
467 #ifdef WITH_CUDA
468         if(mask & DEVICE_MASK_CUDA) {
469                 if(!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
470                         if(device_cuda_init()) {
471                                 device_cuda_info(cuda_devices);
472                         }
473                         devices_initialized_mask |= DEVICE_MASK_CUDA;
474                 }
475                 foreach(DeviceInfo& info, cuda_devices) {
476                         devices.push_back(info);
477                 }
478         }
479 #endif
480
481         if(mask & DEVICE_MASK_CPU) {
482                 if(!(devices_initialized_mask & DEVICE_MASK_CPU)) {
483                         device_cpu_info(cpu_devices);
484                         devices_initialized_mask |= DEVICE_MASK_CPU;
485                 }
486                 foreach(DeviceInfo& info, cpu_devices) {
487                         devices.push_back(info);
488                 }
489         }
490
491 #ifdef WITH_NETWORK
492         if(mask & DEVICE_MASK_NETWORK) {
493                 if(!(devices_initialized_mask & DEVICE_MASK_NETWORK)) {
494                         device_network_info(network_devices);
495                         devices_initialized_mask |= DEVICE_MASK_NETWORK;
496                 }
497                 foreach(DeviceInfo& info, network_devices) {
498                         devices.push_back(info);
499                 }
500         }
501 #endif
502
503         return devices;
504 }
505
506 string Device::device_capabilities(uint mask)
507 {
508         thread_scoped_lock lock(device_mutex);
509         string capabilities = "";
510
511         if(mask & DEVICE_MASK_CPU) {
512                 capabilities += "\nCPU device capabilities: ";
513                 capabilities += device_cpu_capabilities() + "\n";
514         }
515
516 #ifdef WITH_OPENCL
517         if(mask & DEVICE_MASK_OPENCL) {
518                 if(device_opencl_init()) {
519                         capabilities += "\nOpenCL device capabilities:\n";
520                         capabilities += device_opencl_capabilities();
521                 }
522         }
523 #endif
524
525 #ifdef WITH_CUDA
526         if(mask & DEVICE_MASK_CUDA) {
527                 if(device_cuda_init()) {
528                         capabilities += "\nCUDA device capabilities:\n";
529                         capabilities += device_cuda_capabilities();
530                 }
531         }
532 #endif
533
534         return capabilities;
535 }
536
537 DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int threads, bool background)
538 {
539         assert(subdevices.size() > 0);
540
541         if(subdevices.size() == 1) {
542                 /* No multi device needed. */
543                 return subdevices.front();
544         }
545
546         DeviceInfo info;
547         info.type = DEVICE_MULTI;
548         info.id = "MULTI";
549         info.description = "Multi Device";
550         info.num = 0;
551
552         info.has_half_images = true;
553         info.has_volume_decoupled = true;
554         info.has_osl = true;
555         info.has_profiling = true;
556
557         foreach(const DeviceInfo &device, subdevices) {
558                 /* Ensure CPU device does not slow down GPU. */
559                 if(device.type == DEVICE_CPU && subdevices.size() > 1) {
560                         if(background) {
561                                 int orig_cpu_threads = (threads)? threads: system_cpu_thread_count();
562                                 int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);
563
564                                 VLOG(1) << "CPU render threads reduced from "
565                                                 << orig_cpu_threads << " to " << cpu_threads
566                                                 << ", to dedicate to GPU.";
567
568                                 if(cpu_threads >= 1) {
569                                         DeviceInfo cpu_device = device;
570                                         cpu_device.cpu_threads = cpu_threads;
571                                         info.multi_devices.push_back(cpu_device);
572                                 }
573                                 else {
574                                         continue;
575                                 }
576                         }
577                         else {
578                                 VLOG(1) << "CPU render threads disabled for interactive render.";
579                                 continue;
580                         }
581                 }
582                 else {
583                         info.multi_devices.push_back(device);
584                 }
585
586                 /* Accumulate device info. */
587                 info.has_half_images &= device.has_half_images;
588                 info.has_volume_decoupled &= device.has_volume_decoupled;
589                 info.has_osl &= device.has_osl;
590                 info.has_profiling &= device.has_profiling;
591         }
592
593         return info;
594 }
595
596 void Device::tag_update()
597 {
598         free_memory();
599 }
600
601 void Device::free_memory()
602 {
603         devices_initialized_mask = 0;
604         cuda_devices.clear();
605         opencl_devices.clear();
606         cpu_devices.clear();
607         network_devices.clear();
608 }
609
610 CCL_NAMESPACE_END