Cycles: Replace __MAX_CLOSURE__ build option with runtime integrator variable
[blender.git] / intern / cycles / device / device.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <stdlib.h>
18 #include <string.h>
19
20 #include "device/device.h"
21 #include "device/device_intern.h"
22
23 #include "util/util_debug.h"
24 #include "util/util_foreach.h"
25 #include "util/util_half.h"
26 #include "util/util_logging.h"
27 #include "util/util_math.h"
28 #include "util/util_opengl.h"
29 #include "util/util_time.h"
30 #include "util/util_system.h"
31 #include "util/util_types.h"
32 #include "util/util_vector.h"
33 #include "util/util_string.h"
34
35 CCL_NAMESPACE_BEGIN
36
37 bool Device::need_types_update = true;
38 bool Device::need_devices_update = true;
39 thread_mutex Device::device_mutex;
40 vector<DeviceType> Device::types;
41 vector<DeviceInfo> Device::devices;
42
43 /* Device Requested Features */
44
45 std::ostream& operator <<(std::ostream &os,
46                           const DeviceRequestedFeatures& requested_features)
47 {
48         os << "Experimental features: "
49            << (requested_features.experimental ? "On" : "Off") << std::endl;
50         os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
51         /* TODO(sergey): Decode bitflag into list of names. */
52         os << "Nodes features: " << requested_features.nodes_features << std::endl;
53         os << "Use Hair: "
54            << string_from_bool(requested_features.use_hair) << std::endl;
55         os << "Use Object Motion: "
56            << string_from_bool(requested_features.use_object_motion) << std::endl;
57         os << "Use Camera Motion: "
58            << string_from_bool(requested_features.use_camera_motion) << std::endl;
59         os << "Use Baking: "
60            << string_from_bool(requested_features.use_baking) << std::endl;
61         os << "Use Subsurface: "
62            << string_from_bool(requested_features.use_subsurface) << std::endl;
63         os << "Use Volume: "
64            << string_from_bool(requested_features.use_volume) << std::endl;
65         os << "Use Branched Integrator: "
66            << string_from_bool(requested_features.use_integrator_branched) << std::endl;
67         os << "Use Patch Evaluation: "
68            << string_from_bool(requested_features.use_patch_evaluation) << std::endl;
69         os << "Use Transparent Shadows: "
70            << string_from_bool(requested_features.use_transparent) << std::endl;
71         os << "Use Principled BSDF: "
72            << string_from_bool(requested_features.use_principled) << std::endl;
73         os << "Use Denoising: "
74            << string_from_bool(requested_features.use_denoising) << std::endl;
75         return os;
76 }
77
78 /* Device */
79
80 Device::~Device()
81 {
82         if(!background && vertex_buffer != 0) {
83                 glDeleteBuffers(1, &vertex_buffer);
84         }
85 }
86
87 void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dx, int dy, int width, int height, bool transparent,
88         const DeviceDrawParams &draw_params)
89 {
90         assert(rgba.type == MEM_PIXELS);
91
92         mem_copy_from(rgba, y, w, h, rgba.memory_elements_size(1));
93
94         if(transparent) {
95                 glEnable(GL_BLEND);
96                 glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
97         }
98
99         glColor3f(1.0f, 1.0f, 1.0f);
100
101         if(rgba.data_type == TYPE_HALF) {
102                 /* for multi devices, this assumes the inefficient method that we allocate
103                  * all pixels on the device even though we only render to a subset */
104                 GLhalf *host_pointer = (GLhalf*)rgba.host_pointer;
105                 float vbuffer[16], *basep;
106                 float *vp = NULL;
107
108                 host_pointer += 4*y*w;
109
110                 /* draw half float texture, GLSL shader for display transform assumed to be bound */
111                 GLuint texid;
112                 glGenTextures(1, &texid);
113                 glBindTexture(GL_TEXTURE_2D, texid);
114                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, w, h, 0, GL_RGBA, GL_HALF_FLOAT, host_pointer);
115                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
116                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
117
118                 glEnable(GL_TEXTURE_2D);
119
120                 if(draw_params.bind_display_space_shader_cb) {
121                         draw_params.bind_display_space_shader_cb();
122                 }
123
124                 if(GLEW_VERSION_1_5) {
125                         if(!vertex_buffer)
126                                 glGenBuffers(1, &vertex_buffer);
127
128                         glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
129                         /* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered */
130                         glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
131
132                         vp = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
133
134                         basep = NULL;
135                 }
136                 else {
137                         basep = vbuffer;
138                         vp = vbuffer;
139                 }
140
141                 if(vp) {
142                         /* texture coordinate - vertex pair */
143                         vp[0] = 0.0f;
144                         vp[1] = 0.0f;
145                         vp[2] = dx;
146                         vp[3] = dy;
147
148                         vp[4] = 1.0f;
149                         vp[5] = 0.0f;
150                         vp[6] = (float)width + dx;
151                         vp[7] = dy;
152
153                         vp[8] = 1.0f;
154                         vp[9] = 1.0f;
155                         vp[10] = (float)width + dx;
156                         vp[11] = (float)height + dy;
157
158                         vp[12] = 0.0f;
159                         vp[13] = 1.0f;
160                         vp[14] = dx;
161                         vp[15] = (float)height + dy;
162
163                         if(vertex_buffer)
164                                 glUnmapBuffer(GL_ARRAY_BUFFER);
165                 }
166
167                 glTexCoordPointer(2, GL_FLOAT, 4 * sizeof(float), basep);
168                 glVertexPointer(2, GL_FLOAT, 4 * sizeof(float), ((char *)basep) + 2 * sizeof(float));
169
170                 glEnableClientState(GL_VERTEX_ARRAY);
171                 glEnableClientState(GL_TEXTURE_COORD_ARRAY);
172
173                 glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
174
175                 glDisableClientState(GL_TEXTURE_COORD_ARRAY);
176                 glDisableClientState(GL_VERTEX_ARRAY);
177
178                 if(vertex_buffer) {
179                         glBindBuffer(GL_ARRAY_BUFFER, 0);
180                 }
181
182                 if(draw_params.unbind_display_space_shader_cb) {
183                         draw_params.unbind_display_space_shader_cb();
184                 }
185
186                 glBindTexture(GL_TEXTURE_2D, 0);
187                 glDisable(GL_TEXTURE_2D);
188                 glDeleteTextures(1, &texid);
189         }
190         else {
191                 /* fallback for old graphics cards that don't support GLSL, half float,
192                  * and non-power-of-two textures */
193                 glPixelZoom((float)width/(float)w, (float)height/(float)h);
194                 glRasterPos2f(dx, dy);
195
196                 uint8_t *pixels = (uint8_t*)rgba.host_pointer;
197
198                 pixels += 4*y*w;
199
200                 glDrawPixels(w, h, GL_RGBA, GL_UNSIGNED_BYTE, pixels);
201
202                 glRasterPos2f(0.0f, 0.0f);
203                 glPixelZoom(1.0f, 1.0f);
204         }
205
206         if(transparent)
207                 glDisable(GL_BLEND);
208 }
209
210 Device *Device::create(DeviceInfo& info, Stats &stats, bool background)
211 {
212         Device *device;
213
214         switch(info.type) {
215                 case DEVICE_CPU:
216                         device = device_cpu_create(info, stats, background);
217                         break;
218 #ifdef WITH_CUDA
219                 case DEVICE_CUDA:
220                         if(device_cuda_init())
221                                 device = device_cuda_create(info, stats, background);
222                         else
223                                 device = NULL;
224                         break;
225 #endif
226 #ifdef WITH_MULTI
227                 case DEVICE_MULTI:
228                         device = device_multi_create(info, stats, background);
229                         break;
230 #endif
231 #ifdef WITH_NETWORK
232                 case DEVICE_NETWORK:
233                         device = device_network_create(info, stats, "127.0.0.1");
234                         break;
235 #endif
236 #ifdef WITH_OPENCL
237                 case DEVICE_OPENCL:
238                         if(device_opencl_init())
239                                 device = device_opencl_create(info, stats, background);
240                         else
241                                 device = NULL;
242                         break;
243 #endif
244                 default:
245                         return NULL;
246         }
247
248         return device;
249 }
250
251 DeviceType Device::type_from_string(const char *name)
252 {
253         if(strcmp(name, "CPU") == 0)
254                 return DEVICE_CPU;
255         else if(strcmp(name, "CUDA") == 0)
256                 return DEVICE_CUDA;
257         else if(strcmp(name, "OPENCL") == 0)
258                 return DEVICE_OPENCL;
259         else if(strcmp(name, "NETWORK") == 0)
260                 return DEVICE_NETWORK;
261         else if(strcmp(name, "MULTI") == 0)
262                 return DEVICE_MULTI;
263
264         return DEVICE_NONE;
265 }
266
267 string Device::string_from_type(DeviceType type)
268 {
269         if(type == DEVICE_CPU)
270                 return "CPU";
271         else if(type == DEVICE_CUDA)
272                 return "CUDA";
273         else if(type == DEVICE_OPENCL)
274                 return "OPENCL";
275         else if(type == DEVICE_NETWORK)
276                 return "NETWORK";
277         else if(type == DEVICE_MULTI)
278                 return "MULTI";
279
280         return "";
281 }
282
283 vector<DeviceType>& Device::available_types()
284 {
285         thread_scoped_lock lock(device_mutex);
286         if(need_types_update) {
287                 types.clear();
288                 types.push_back(DEVICE_CPU);
289 #ifdef WITH_CUDA
290                 if(device_cuda_init()) {
291                         types.push_back(DEVICE_CUDA);
292                 }
293 #endif
294 #ifdef WITH_OPENCL
295                 if(device_opencl_init()) {
296                         types.push_back(DEVICE_OPENCL);
297                 }
298 #endif
299 #ifdef WITH_NETWORK
300                 types.push_back(DEVICE_NETWORK);
301 #endif
302                 need_types_update = false;
303         }
304         return types;
305 }
306
307 vector<DeviceInfo>& Device::available_devices()
308 {
309         thread_scoped_lock lock(device_mutex);
310         if(need_devices_update) {
311                 devices.clear();
312 #ifdef WITH_OPENCL
313                 if(device_opencl_init()) {
314                         device_opencl_info(devices);
315                 }
316 #endif
317 #ifdef WITH_CUDA
318                 if(device_cuda_init()) {
319                         device_cuda_info(devices);
320                 }
321 #endif
322                 device_cpu_info(devices);
323 #ifdef WITH_NETWORK
324                 device_network_info(devices);
325 #endif
326                 need_devices_update = false;
327         }
328         return devices;
329 }
330
331 string Device::device_capabilities()
332 {
333         string capabilities = "CPU device capabilities: ";
334         capabilities += device_cpu_capabilities() + "\n";
335
336 #ifdef WITH_OPENCL
337         if(device_opencl_init()) {
338                 capabilities += "\nOpenCL device capabilities:\n";
339                 capabilities += device_opencl_capabilities();
340         }
341 #endif
342
343 #ifdef WITH_CUDA
344         if(device_cuda_init()) {
345                 capabilities += "\nCUDA device capabilities:\n";
346                 capabilities += device_cuda_capabilities();
347         }
348 #endif
349
350         return capabilities;
351 }
352
353 DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int threads, bool background)
354 {
355         assert(subdevices.size() > 1);
356
357         DeviceInfo info;
358         info.type = DEVICE_MULTI;
359         info.id = "MULTI";
360         info.description = "Multi Device";
361         info.num = 0;
362
363         info.has_fermi_limits = false;
364         info.has_half_images = true;
365         info.has_volume_decoupled = true;
366         info.has_qbvh = true;
367         info.has_osl = true;
368
369         foreach(const DeviceInfo &device, subdevices) {
370                 /* Ensure CPU device does not slow down GPU. */
371                 if(device.type == DEVICE_CPU && subdevices.size() > 1) {
372                         if(background) {
373                                 int orig_cpu_threads = (threads)? threads: system_cpu_thread_count();
374                                 int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);
375
376                                 VLOG(1) << "CPU render threads reduced from "
377                                                 << orig_cpu_threads << " to " << cpu_threads
378                                                 << ", to dedicate to GPU.";
379
380                                 if(cpu_threads >= 1) {
381                                         DeviceInfo cpu_device = device;
382                                         cpu_device.cpu_threads = cpu_threads;
383                                         info.multi_devices.push_back(cpu_device);
384                                 }
385                                 else {
386                                         continue;
387                                 }
388                         }
389                         else {
390                                 VLOG(1) << "CPU render threads disabled for interactive render.";
391                                 continue;
392                         }
393                 }
394                 else {
395                         info.multi_devices.push_back(device);
396                 }
397
398                 /* Accumulate device info. */
399                 info.has_fermi_limits = info.has_fermi_limits ||
400                                         device.has_fermi_limits;
401                 info.has_half_images &= device.has_half_images;
402                 info.has_volume_decoupled &= device.has_volume_decoupled;
403                 info.has_qbvh &= device.has_qbvh;
404                 info.has_osl &= device.has_osl;
405         }
406
407         return info;
408 }
409
410 void Device::tag_update()
411 {
412         need_types_update = true;
413         need_devices_update = true;
414 }
415
416 void Device::free_memory()
417 {
418         need_types_update = true;
419         need_devices_update = true;
420         types.free_memory();
421         devices.free_memory();
422 }
423
424 CCL_NAMESPACE_END