Merge branch 'master' into blender2.8
[blender.git] / intern / cycles / device / device.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <stdlib.h>
18 #include <string.h>
19
20 #include "device/device.h"
21 #include "device/device_intern.h"
22
23 #include "util/util_foreach.h"
24 #include "util/util_half.h"
25 #include "util/util_logging.h"
26 #include "util/util_math.h"
27 #include "util/util_opengl.h"
28 #include "util/util_time.h"
29 #include "util/util_system.h"
30 #include "util/util_types.h"
31 #include "util/util_vector.h"
32 #include "util/util_string.h"
33
34 CCL_NAMESPACE_BEGIN
35
36 bool Device::need_types_update = true;
37 bool Device::need_devices_update = true;
38 thread_mutex Device::device_mutex;
39 vector<DeviceType> Device::types;
40 vector<DeviceInfo> Device::devices;
41
42 /* Device Requested Features */
43
44 std::ostream& operator <<(std::ostream &os,
45                           const DeviceRequestedFeatures& requested_features)
46 {
47         os << "Experimental features: "
48            << (requested_features.experimental ? "On" : "Off") << std::endl;
49         os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
50         /* TODO(sergey): Decode bitflag into list of names. */
51         os << "Nodes features: " << requested_features.nodes_features << std::endl;
52         os << "Use Hair: "
53            << string_from_bool(requested_features.use_hair) << std::endl;
54         os << "Use Object Motion: "
55            << string_from_bool(requested_features.use_object_motion) << std::endl;
56         os << "Use Camera Motion: "
57            << string_from_bool(requested_features.use_camera_motion) << std::endl;
58         os << "Use Baking: "
59            << string_from_bool(requested_features.use_baking) << std::endl;
60         os << "Use Subsurface: "
61            << string_from_bool(requested_features.use_subsurface) << std::endl;
62         os << "Use Volume: "
63            << string_from_bool(requested_features.use_volume) << std::endl;
64         os << "Use Branched Integrator: "
65            << string_from_bool(requested_features.use_integrator_branched) << std::endl;
66         os << "Use Patch Evaluation: "
67            << string_from_bool(requested_features.use_patch_evaluation) << std::endl;
68         os << "Use Transparent Shadows: "
69            << string_from_bool(requested_features.use_transparent) << std::endl;
70         os << "Use Principled BSDF: "
71            << string_from_bool(requested_features.use_principled) << std::endl;
72         os << "Use Denoising: "
73            << string_from_bool(requested_features.use_denoising) << std::endl;
74         return os;
75 }
76
77 /* Device */
78
79 Device::~Device()
80 {
81         if(!background) {
82                 if(vertex_buffer != 0) {
83                         glDeleteBuffers(1, &vertex_buffer);
84                 }
85                 if(fallback_shader_program != 0) {
86                         glDeleteProgram(fallback_shader_program);
87                 }
88         }
89 }
90
91 /* TODO move shaders to standalone .glsl file. */
92 const char *FALLBACK_VERTEX_SHADER =
93 "#version 330\n"
94 "uniform vec2 fullscreen;\n"
95 "in vec2 texCoord;\n"
96 "in vec2 pos;\n"
97 "out vec2 texCoord_interp;\n"
98 "\n"
99 "vec2 normalize_coordinates()\n"
100 "{\n"
101 "       return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
102 "}\n"
103 "\n"
104 "void main()\n"
105 "{\n"
106 "       gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
107 "       texCoord_interp = texCoord;\n"
108 "}\n\0";
109
110 const char *FALLBACK_FRAGMENT_SHADER =
111 "#version 330\n"
112 "uniform sampler2D image_texture;\n"
113 "in vec2 texCoord_interp;\n"
114 "out vec4 fragColor;\n"
115 "\n"
116 "void main()\n"
117 "{\n"
118 "       fragColor = texture(image_texture, texCoord_interp);\n"
119 "}\n\0";
120
121 static void shader_print_errors(const char *task, const char *log, const char *code)
122 {
123         LOG(ERROR) << "Shader: " << task << " error:";
124         LOG(ERROR) << "===== shader string ====";
125
126         stringstream stream(code);
127         string partial;
128
129         int line = 1;
130         while(getline(stream, partial, '\n')) {
131                 if(line < 10) {
132                         LOG(ERROR) << " " << line << " " << partial;
133                 }
134                 else {
135                         LOG(ERROR) << line << " " << partial;
136                 }
137                 line++;
138         }
139         LOG(ERROR) << log;
140 }
141
142 static int bind_fallback_shader(void)
143 {
144         GLint status;
145         GLchar log[5000];
146         GLsizei length = 0;
147         GLuint program = 0;
148
149         struct Shader {
150                 const char *source;
151                 GLenum type;
152         } shaders[2] = {
153             {FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
154             {FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}
155     };
156
157         program = glCreateProgram();
158
159         for(int i = 0; i < 2; i++) {
160                 GLuint shader = glCreateShader(shaders[i].type);
161
162                 string source_str = shaders[i].source;
163                 const char *c_str = source_str.c_str();
164
165                 glShaderSource(shader, 1, &c_str, NULL);
166                 glCompileShader(shader);
167
168                 glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
169
170                 if(!status) {
171                         glGetShaderInfoLog(shader, sizeof(log), &length, log);
172                         shader_print_errors("compile", log, c_str);
173                         return 0;
174                 }
175
176                 glAttachShader(program, shader);
177         }
178
179         /* Link output. */
180         glBindFragDataLocation(program, 0, "fragColor");
181
182         /* Link and error check. */
183         glLinkProgram(program);
184
185         glGetProgramiv(program, GL_LINK_STATUS, &status);
186         if(!status) {
187                 glGetShaderInfoLog(program, sizeof(log), &length, log);
188                 shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER);
189                 shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER);
190                 return 0;
191         }
192
193         return program;
194 }
195
196 bool Device::bind_fallback_display_space_shader(const float width, const float height)
197 {
198         if(fallback_status == FALLBACK_SHADER_STATUS_ERROR) {
199                 return false;
200         }
201
202         if(fallback_status == FALLBACK_SHADER_STATUS_NONE) {
203                 fallback_shader_program = bind_fallback_shader();
204                 fallback_status = FALLBACK_SHADER_STATUS_ERROR;
205
206                 if(fallback_shader_program == 0) {
207                         return false;
208                 }
209
210                 glUseProgram(fallback_shader_program);
211                 image_texture_location = glGetUniformLocation(fallback_shader_program, "image_texture");
212                 if(image_texture_location < 0) {
213                         LOG(ERROR) << "Shader doesn't containt the 'image_texture' uniform.";
214                         return false;
215                 }
216
217                 fullscreen_location = glGetUniformLocation(fallback_shader_program, "fullscreen");
218                 if(fullscreen_location < 0) {
219                         LOG(ERROR) << "Shader doesn't containt the 'fullscreen' uniform.";
220                         return false;
221                 }
222
223                 fallback_status = FALLBACK_SHADER_STATUS_SUCCESS;
224         }
225
226         /* Run this every time. */
227         glUseProgram(fallback_shader_program);
228         glUniform1i(image_texture_location, 0);
229         glUniform2f(fullscreen_location, width, height);
230         return true;
231 }
232
233 void Device::draw_pixels(
234     device_memory& rgba, int y,
235     int w, int h, int width, int height,
236     int dx, int dy, int dw, int dh,
237     bool transparent, const DeviceDrawParams &draw_params)
238 {
239         const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
240
241         assert(rgba.type == MEM_PIXELS);
242         mem_copy_from(rgba, y, w, h, rgba.memory_elements_size(1));
243
244         GLuint texid;
245         glGenTextures(1, &texid);
246         glBindTexture(GL_TEXTURE_2D, texid);
247
248         if(rgba.data_type == TYPE_HALF) {
249                 GLhalf *data_pointer = (GLhalf*)rgba.host_pointer;
250                 data_pointer += 4 * y * w;
251                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, w, h, 0, GL_RGBA, GL_HALF_FLOAT, data_pointer);
252         }
253         else {
254                 uint8_t *data_pointer = (uint8_t*)rgba.host_pointer;
255                 data_pointer += 4 * y * w;
256                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, data_pointer);
257         }
258
259         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
260         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
261
262         if(transparent) {
263                 glEnable(GL_BLEND);
264                 glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
265         }
266
267         GLint shader_program;
268         if(use_fallback_shader) {
269                 if(!bind_fallback_display_space_shader(dw, dh)) {
270                         return;
271                 }
272                 shader_program = fallback_shader_program;
273         }
274         else {
275                 draw_params.bind_display_space_shader_cb();
276                 glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
277         }
278
279         if(!vertex_buffer) {
280                 glGenBuffers(1, &vertex_buffer);
281         }
282
283         glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
284         /* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered */
285         glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
286
287         float *vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
288
289         if(vpointer) {
290                 /* texture coordinate - vertex pair */
291                 vpointer[0] = 0.0f;
292                 vpointer[1] = 0.0f;
293                 vpointer[2] = dx;
294                 vpointer[3] = dy;
295
296                 vpointer[4] = 1.0f;
297                 vpointer[5] = 0.0f;
298                 vpointer[6] = (float)width + dx;
299                 vpointer[7] = dy;
300
301                 vpointer[8] = 1.0f;
302                 vpointer[9] = 1.0f;
303                 vpointer[10] = (float)width + dx;
304                 vpointer[11] = (float)height + dy;
305
306                 vpointer[12] = 0.0f;
307                 vpointer[13] = 1.0f;
308                 vpointer[14] = dx;
309                 vpointer[15] = (float)height + dy;
310
311                 if(vertex_buffer) {
312                         glUnmapBuffer(GL_ARRAY_BUFFER);
313                 }
314         }
315
316         GLuint vertex_array_object;
317         GLuint position_attribute, texcoord_attribute;
318
319         glGenVertexArrays(1, &vertex_array_object);
320         glBindVertexArray(vertex_array_object);
321
322         texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
323         position_attribute = glGetAttribLocation(shader_program, "pos");
324
325         glEnableVertexAttribArray(texcoord_attribute);
326         glEnableVertexAttribArray(position_attribute);
327
328         glVertexAttribPointer(texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
329         glVertexAttribPointer(position_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)(sizeof(float) * 2));
330
331         glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
332
333         if(vertex_buffer) {
334                 glBindBuffer(GL_ARRAY_BUFFER, 0);
335         }
336
337         if(use_fallback_shader) {
338                 glUseProgram(0);
339         }
340         else {
341                 draw_params.unbind_display_space_shader_cb();
342         }
343
344         glBindTexture(GL_TEXTURE_2D, 0);
345         glDeleteTextures(1, &texid);
346
347         if(transparent) {
348                 glDisable(GL_BLEND);
349         }
350 }
351
352 Device *Device::create(DeviceInfo& info, Stats &stats, bool background)
353 {
354         Device *device;
355
356         switch(info.type) {
357                 case DEVICE_CPU:
358                         device = device_cpu_create(info, stats, background);
359                         break;
360 #ifdef WITH_CUDA
361                 case DEVICE_CUDA:
362                         if(device_cuda_init())
363                                 device = device_cuda_create(info, stats, background);
364                         else
365                                 device = NULL;
366                         break;
367 #endif
368 #ifdef WITH_MULTI
369                 case DEVICE_MULTI:
370                         device = device_multi_create(info, stats, background);
371                         break;
372 #endif
373 #ifdef WITH_NETWORK
374                 case DEVICE_NETWORK:
375                         device = device_network_create(info, stats, "127.0.0.1");
376                         break;
377 #endif
378 #ifdef WITH_OPENCL
379                 case DEVICE_OPENCL:
380                         if(device_opencl_init())
381                                 device = device_opencl_create(info, stats, background);
382                         else
383                                 device = NULL;
384                         break;
385 #endif
386                 default:
387                         return NULL;
388         }
389
390         return device;
391 }
392
393 DeviceType Device::type_from_string(const char *name)
394 {
395         if(strcmp(name, "CPU") == 0)
396                 return DEVICE_CPU;
397         else if(strcmp(name, "CUDA") == 0)
398                 return DEVICE_CUDA;
399         else if(strcmp(name, "OPENCL") == 0)
400                 return DEVICE_OPENCL;
401         else if(strcmp(name, "NETWORK") == 0)
402                 return DEVICE_NETWORK;
403         else if(strcmp(name, "MULTI") == 0)
404                 return DEVICE_MULTI;
405
406         return DEVICE_NONE;
407 }
408
409 string Device::string_from_type(DeviceType type)
410 {
411         if(type == DEVICE_CPU)
412                 return "CPU";
413         else if(type == DEVICE_CUDA)
414                 return "CUDA";
415         else if(type == DEVICE_OPENCL)
416                 return "OPENCL";
417         else if(type == DEVICE_NETWORK)
418                 return "NETWORK";
419         else if(type == DEVICE_MULTI)
420                 return "MULTI";
421
422         return "";
423 }
424
425 vector<DeviceType>& Device::available_types()
426 {
427         thread_scoped_lock lock(device_mutex);
428         if(need_types_update) {
429                 types.clear();
430                 types.push_back(DEVICE_CPU);
431 #ifdef WITH_CUDA
432                 if(device_cuda_init()) {
433                         types.push_back(DEVICE_CUDA);
434                 }
435 #endif
436 #ifdef WITH_OPENCL
437                 if(device_opencl_init()) {
438                         types.push_back(DEVICE_OPENCL);
439                 }
440 #endif
441 #ifdef WITH_NETWORK
442                 types.push_back(DEVICE_NETWORK);
443 #endif
444                 need_types_update = false;
445         }
446         return types;
447 }
448
449 vector<DeviceInfo>& Device::available_devices()
450 {
451         thread_scoped_lock lock(device_mutex);
452         if(need_devices_update) {
453                 devices.clear();
454 #ifdef WITH_OPENCL
455                 if(device_opencl_init()) {
456                         device_opencl_info(devices);
457                 }
458 #endif
459 #ifdef WITH_CUDA
460                 if(device_cuda_init()) {
461                         device_cuda_info(devices);
462                 }
463 #endif
464                 device_cpu_info(devices);
465 #ifdef WITH_NETWORK
466                 device_network_info(devices);
467 #endif
468                 need_devices_update = false;
469         }
470         return devices;
471 }
472
473 string Device::device_capabilities()
474 {
475         string capabilities = "CPU device capabilities: ";
476         capabilities += device_cpu_capabilities() + "\n";
477
478 #ifdef WITH_OPENCL
479         if(device_opencl_init()) {
480                 capabilities += "\nOpenCL device capabilities:\n";
481                 capabilities += device_opencl_capabilities();
482         }
483 #endif
484
485 #ifdef WITH_CUDA
486         if(device_cuda_init()) {
487                 capabilities += "\nCUDA device capabilities:\n";
488                 capabilities += device_cuda_capabilities();
489         }
490 #endif
491
492         return capabilities;
493 }
494
495 DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int threads, bool background)
496 {
497         assert(subdevices.size() > 1);
498
499         DeviceInfo info;
500         info.type = DEVICE_MULTI;
501         info.id = "MULTI";
502         info.description = "Multi Device";
503         info.num = 0;
504
505         info.has_half_images = true;
506         info.has_volume_decoupled = true;
507         info.has_osl = true;
508         info.has_profiling = true;
509
510         foreach(const DeviceInfo &device, subdevices) {
511                 /* Ensure CPU device does not slow down GPU. */
512                 if(device.type == DEVICE_CPU && subdevices.size() > 1) {
513                         if(background) {
514                                 int orig_cpu_threads = (threads)? threads: system_cpu_thread_count();
515                                 int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);
516
517                                 VLOG(1) << "CPU render threads reduced from "
518                                                 << orig_cpu_threads << " to " << cpu_threads
519                                                 << ", to dedicate to GPU.";
520
521                                 if(cpu_threads >= 1) {
522                                         DeviceInfo cpu_device = device;
523                                         cpu_device.cpu_threads = cpu_threads;
524                                         info.multi_devices.push_back(cpu_device);
525                                 }
526                                 else {
527                                         continue;
528                                 }
529                         }
530                         else {
531                                 VLOG(1) << "CPU render threads disabled for interactive render.";
532                                 continue;
533                         }
534                 }
535                 else {
536                         info.multi_devices.push_back(device);
537                 }
538
539                 /* Accumulate device info. */
540                 info.has_half_images &= device.has_half_images;
541                 info.has_volume_decoupled &= device.has_volume_decoupled;
542                 info.has_osl &= device.has_osl;
543                 info.has_profiling &= device.has_profiling;
544         }
545
546         return info;
547 }
548
549 void Device::tag_update()
550 {
551         need_types_update = true;
552         need_devices_update = true;
553 }
554
555 void Device::free_memory()
556 {
557         need_types_update = true;
558         need_devices_update = true;
559         types.free_memory();
560         devices.free_memory();
561 }
562
563 CCL_NAMESPACE_END