Cycles: add single program debug option for split kernel
[blender-staging.git] / intern / cycles / device / device.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <stdlib.h>
18 #include <string.h>
19
20 #include "device.h"
21 #include "device_intern.h"
22
23 #include "util_debug.h"
24 #include "util_foreach.h"
25 #include "util_half.h"
26 #include "util_math.h"
27 #include "util_opengl.h"
28 #include "util_time.h"
29 #include "util_types.h"
30 #include "util_vector.h"
31 #include "util_string.h"
32
33 CCL_NAMESPACE_BEGIN
34
35 bool Device::need_types_update = true;
36 bool Device::need_devices_update = true;
37 vector<DeviceType> Device::types;
38 vector<DeviceInfo> Device::devices;
39
40 /* Device Requested Features */
41
42 std::ostream& operator <<(std::ostream &os,
43                           const DeviceRequestedFeatures& requested_features)
44 {
45         os << "Experimental features: "
46            << (requested_features.experimental ? "On" : "Off") << std::endl;
47         os << "Max closure count: " << requested_features.max_closure << std::endl;
48         os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
49         /* TODO(sergey): Decode bitflag into list of names. */
50         os << "Nodes features: " << requested_features.nodes_features << std::endl;
51         os << "Use hair: "
52            << string_from_bool(requested_features.use_hair) << std::endl;
53         os << "Use object motion: "
54            << string_from_bool(requested_features.use_object_motion) << std::endl;
55         os << "Use camera motion: "
56            << string_from_bool(requested_features.use_camera_motion) << std::endl;
57         os << "Use Baking: "
58            << string_from_bool(requested_features.use_baking) << std::endl;
59         os << "Use Subsurface: "
60            << string_from_bool(requested_features.use_subsurface) << std::endl;
61         os << "Use Volume: "
62            << string_from_bool(requested_features.use_volume) << std::endl;
63         os << "Use Branched Integrator: "
64            << string_from_bool(requested_features.use_integrator_branched) << std::endl;
65         os << "Use Patch Evaluation: "
66            << string_from_bool(requested_features.use_patch_evaluation) << std::endl;
67         os << "Use Transparent Shadows: "
68            << string_from_bool(requested_features.use_transparent) << std::endl;
69         return os;
70 }
71
72 /* Device */
73
74 Device::~Device()
75 {
76         if(!background && vertex_buffer != 0) {
77                 glDeleteBuffers(1, &vertex_buffer);
78         }
79 }
80
81 void Device::pixels_alloc(device_memory& mem)
82 {
83         mem_alloc("pixels", mem, MEM_READ_WRITE);
84 }
85
86 void Device::pixels_copy_from(device_memory& mem, int y, int w, int h)
87 {
88         if(mem.data_type == TYPE_HALF)
89                 mem_copy_from(mem, y, w, h, sizeof(half4));
90         else
91                 mem_copy_from(mem, y, w, h, sizeof(uchar4));
92 }
93
94 void Device::pixels_free(device_memory& mem)
95 {
96         mem_free(mem);
97 }
98
99 void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dx, int dy, int width, int height, bool transparent,
100         const DeviceDrawParams &draw_params)
101 {
102         pixels_copy_from(rgba, y, w, h);
103
104         if(transparent) {
105                 glEnable(GL_BLEND);
106                 glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
107         }
108
109         glColor3f(1.0f, 1.0f, 1.0f);
110
111         if(rgba.data_type == TYPE_HALF) {
112                 /* for multi devices, this assumes the inefficient method that we allocate
113                  * all pixels on the device even though we only render to a subset */
114                 GLhalf *data_pointer = (GLhalf*)rgba.data_pointer;
115                 float vbuffer[16], *basep;
116                 float *vp = NULL;
117
118                 data_pointer += 4*y*w;
119
120                 /* draw half float texture, GLSL shader for display transform assumed to be bound */
121                 GLuint texid;
122                 glGenTextures(1, &texid);
123                 glBindTexture(GL_TEXTURE_2D, texid);
124                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, w, h, 0, GL_RGBA, GL_HALF_FLOAT, data_pointer);
125                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
126                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
127
128                 glEnable(GL_TEXTURE_2D);
129
130                 if(draw_params.bind_display_space_shader_cb) {
131                         draw_params.bind_display_space_shader_cb();
132                 }
133
134                 if(GLEW_VERSION_1_5) {
135                         if(!vertex_buffer)
136                                 glGenBuffers(1, &vertex_buffer);
137
138                         glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
139                         /* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered */
140                         glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
141
142                         vp = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
143
144                         basep = NULL;
145                 }
146                 else {
147                         basep = vbuffer;
148                         vp = vbuffer;
149                 }
150
151                 if(vp) {
152                         /* texture coordinate - vertex pair */
153                         vp[0] = 0.0f;
154                         vp[1] = 0.0f;
155                         vp[2] = dx;
156                         vp[3] = dy;
157
158                         vp[4] = 1.0f;
159                         vp[5] = 0.0f;
160                         vp[6] = (float)width + dx;
161                         vp[7] = dy;
162
163                         vp[8] = 1.0f;
164                         vp[9] = 1.0f;
165                         vp[10] = (float)width + dx;
166                         vp[11] = (float)height + dy;
167
168                         vp[12] = 0.0f;
169                         vp[13] = 1.0f;
170                         vp[14] = dx;
171                         vp[15] = (float)height + dy;
172
173                         if(vertex_buffer)
174                                 glUnmapBuffer(GL_ARRAY_BUFFER);
175                 }
176
177                 glTexCoordPointer(2, GL_FLOAT, 4 * sizeof(float), basep);
178                 glVertexPointer(2, GL_FLOAT, 4 * sizeof(float), ((char *)basep) + 2 * sizeof(float));
179
180                 glEnableClientState(GL_VERTEX_ARRAY);
181                 glEnableClientState(GL_TEXTURE_COORD_ARRAY);
182
183                 glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
184
185                 glDisableClientState(GL_TEXTURE_COORD_ARRAY);
186                 glDisableClientState(GL_VERTEX_ARRAY);
187
188                 if(vertex_buffer) {
189                         glBindBuffer(GL_ARRAY_BUFFER, 0);
190                 }
191
192                 if(draw_params.unbind_display_space_shader_cb) {
193                         draw_params.unbind_display_space_shader_cb();
194                 }
195
196                 glBindTexture(GL_TEXTURE_2D, 0);
197                 glDisable(GL_TEXTURE_2D);
198                 glDeleteTextures(1, &texid);
199         }
200         else {
201                 /* fallback for old graphics cards that don't support GLSL, half float,
202                  * and non-power-of-two textures */
203                 glPixelZoom((float)width/(float)w, (float)height/(float)h);
204                 glRasterPos2f(dx, dy);
205
206                 uint8_t *pixels = (uint8_t*)rgba.data_pointer;
207
208                 pixels += 4*y*w;
209
210                 glDrawPixels(w, h, GL_RGBA, GL_UNSIGNED_BYTE, pixels);
211
212                 glRasterPos2f(0.0f, 0.0f);
213                 glPixelZoom(1.0f, 1.0f);
214         }
215
216         if(transparent)
217                 glDisable(GL_BLEND);
218 }
219
220 Device *Device::create(DeviceInfo& info, Stats &stats, bool background)
221 {
222         Device *device;
223
224         switch(info.type) {
225                 case DEVICE_CPU:
226                         device = device_cpu_create(info, stats, background);
227                         break;
228 #ifdef WITH_CUDA
229                 case DEVICE_CUDA:
230                         if(device_cuda_init())
231                                 device = device_cuda_create(info, stats, background);
232                         else
233                                 device = NULL;
234                         break;
235 #endif
236 #ifdef WITH_MULTI
237                 case DEVICE_MULTI:
238                         device = device_multi_create(info, stats, background);
239                         break;
240 #endif
241 #ifdef WITH_NETWORK
242                 case DEVICE_NETWORK:
243                         device = device_network_create(info, stats, "127.0.0.1");
244                         break;
245 #endif
246 #ifdef WITH_OPENCL
247                 case DEVICE_OPENCL:
248                         if(device_opencl_init())
249                                 device = device_opencl_create(info, stats, background);
250                         else
251                                 device = NULL;
252                         break;
253 #endif
254                 default:
255                         return NULL;
256         }
257
258         return device;
259 }
260
261 DeviceType Device::type_from_string(const char *name)
262 {
263         if(strcmp(name, "CPU") == 0)
264                 return DEVICE_CPU;
265         else if(strcmp(name, "CUDA") == 0)
266                 return DEVICE_CUDA;
267         else if(strcmp(name, "OPENCL") == 0)
268                 return DEVICE_OPENCL;
269         else if(strcmp(name, "NETWORK") == 0)
270                 return DEVICE_NETWORK;
271         else if(strcmp(name, "MULTI") == 0)
272                 return DEVICE_MULTI;
273
274         return DEVICE_NONE;
275 }
276
277 string Device::string_from_type(DeviceType type)
278 {
279         if(type == DEVICE_CPU)
280                 return "CPU";
281         else if(type == DEVICE_CUDA)
282                 return "CUDA";
283         else if(type == DEVICE_OPENCL)
284                 return "OPENCL";
285         else if(type == DEVICE_NETWORK)
286                 return "NETWORK";
287         else if(type == DEVICE_MULTI)
288                 return "MULTI";
289
290         return "";
291 }
292
293 vector<DeviceType>& Device::available_types()
294 {
295         if(need_types_update) {
296                 types.clear();
297                 types.push_back(DEVICE_CPU);
298
299 #ifdef WITH_CUDA
300                 if(device_cuda_init())
301                         types.push_back(DEVICE_CUDA);
302 #endif
303
304 #ifdef WITH_OPENCL
305                 if(device_opencl_init())
306                         types.push_back(DEVICE_OPENCL);
307 #endif
308
309 #ifdef WITH_NETWORK
310                 types.push_back(DEVICE_NETWORK);
311 #endif
312
313                 need_types_update = false;
314         }
315
316         return types;
317 }
318
319 vector<DeviceInfo>& Device::available_devices()
320 {
321         if(need_devices_update) {
322                 devices.clear();
323 #ifdef WITH_CUDA
324                 if(device_cuda_init())
325                         device_cuda_info(devices);
326 #endif
327
328 #ifdef WITH_OPENCL
329                 if(device_opencl_init())
330                         device_opencl_info(devices);
331 #endif
332
333                 device_cpu_info(devices);
334
335 #ifdef WITH_NETWORK
336                 device_network_info(devices);
337 #endif
338
339                 need_devices_update = false;
340         }
341
342         return devices;
343 }
344
345 string Device::device_capabilities()
346 {
347         string capabilities = "CPU device capabilities: ";
348         capabilities += device_cpu_capabilities() + "\n";
349 #ifdef WITH_CUDA
350         if(device_cuda_init()) {
351                 capabilities += "\nCUDA device capabilities:\n";
352                 capabilities += device_cuda_capabilities();
353         }
354 #endif
355
356 #ifdef WITH_OPENCL
357         if(device_opencl_init()) {
358                 capabilities += "\nOpenCL device capabilities:\n";
359                 capabilities += device_opencl_capabilities();
360         }
361 #endif
362
363         return capabilities;
364 }
365
366 DeviceInfo Device::get_multi_device(vector<DeviceInfo> subdevices)
367 {
368         assert(subdevices.size() > 1);
369
370         DeviceInfo info;
371         info.type = DEVICE_MULTI;
372         info.id = "MULTI";
373         info.description = "Multi Device";
374         info.multi_devices = subdevices;
375         info.num = 0;
376
377         info.has_bindless_textures = true;
378         info.pack_images = false;
379         foreach(DeviceInfo &device, subdevices) {
380                 assert(device.type == info.multi_devices[0].type);
381
382                 info.pack_images |= device.pack_images;
383                 info.has_bindless_textures &= device.has_bindless_textures;
384         }
385
386         return info;
387 }
388
389 void Device::tag_update()
390 {
391         need_types_update = true;
392         need_devices_update = true;
393 }
394
395 void Device::free_memory()
396 {
397         need_types_update = true;
398         need_devices_update = true;
399         types.free_memory();
400         devices.free_memory();
401 }
402
403 CCL_NAMESPACE_END