Fix T41222 Blender gives weird output when baking (4096*4096) resolution on GPU
[blender-staging.git] / intern / cycles / device / device_multi.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License
15  */
16
17 #include <stdlib.h>
18 #include <sstream>
19
20 #include "device.h"
21 #include "device_intern.h"
22 #include "device_network.h"
23
24 #include "buffers.h"
25
26 #include "util_foreach.h"
27 #include "util_list.h"
28 #include "util_map.h"
29 #include "util_time.h"
30
31 CCL_NAMESPACE_BEGIN
32
33 class MultiDevice : public Device
34 {
35 public:
36         struct SubDevice {
37                 SubDevice(Device *device_)
38                 : device(device_) {}
39
40                 Device *device;
41                 map<device_ptr, device_ptr> ptr_map;
42         };
43
44         list<SubDevice> devices;
45         device_ptr unique_ptr;
46
47         MultiDevice(DeviceInfo& info, Stats &stats, bool background_)
48         : Device(info, stats, background_), unique_ptr(1)
49         {
50                 Device *device;
51
52                 foreach(DeviceInfo& subinfo, info.multi_devices) {
53                         device = Device::create(subinfo, stats, background);
54                         devices.push_back(SubDevice(device));
55                 }
56
57 #ifdef WITH_NETWORK
58                 /* try to add network devices */
59                 ServerDiscovery discovery(true);
60                 time_sleep(1.0);
61
62                 vector<string> servers = discovery.get_server_list();
63
64                 foreach(string& server, servers) {
65                         device = device_network_create(info, stats, server.c_str());
66                         if(device)
67                                 devices.push_back(SubDevice(device));
68                 }
69 #endif
70         }
71
72         ~MultiDevice()
73         {
74                 foreach(SubDevice& sub, devices)
75                         delete sub.device;
76         }
77
78         const string& error_message()
79         {
80                 foreach(SubDevice& sub, devices) {
81                         if(sub.device->error_message() != "") {
82                                 if(error_msg == "")
83                                         error_msg = sub.device->error_message();
84                                 break;
85                         }
86                 }
87
88                 return error_msg;
89         }
90
91         bool load_kernels(bool experimental)
92         {
93                 foreach(SubDevice& sub, devices)
94                         if(!sub.device->load_kernels(experimental))
95                                 return false;
96
97                 return true;
98         }
99
100         void mem_alloc(device_memory& mem, MemoryType type)
101         {
102                 foreach(SubDevice& sub, devices) {
103                         mem.device_pointer = 0;
104                         sub.device->mem_alloc(mem, type);
105                         sub.ptr_map[unique_ptr] = mem.device_pointer;
106                 }
107
108                 mem.device_pointer = unique_ptr++;
109         }
110
111         void mem_copy_to(device_memory& mem)
112         {
113                 device_ptr tmp = mem.device_pointer;
114
115                 foreach(SubDevice& sub, devices) {
116                         mem.device_pointer = sub.ptr_map[tmp];
117                         sub.device->mem_copy_to(mem);
118                 }
119
120                 mem.device_pointer = tmp;
121         }
122
123         void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
124         {
125                 device_ptr tmp = mem.device_pointer;
126                 int i = 0, sub_h = h/devices.size();
127
128                 foreach(SubDevice& sub, devices) {
129                         int sy = y + i*sub_h;
130                         int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
131
132                         mem.device_pointer = sub.ptr_map[tmp];
133                         sub.device->mem_copy_from(mem, sy, w, sh, elem);
134                         i++;
135                 }
136
137                 mem.device_pointer = tmp;
138         }
139
140         void mem_zero(device_memory& mem)
141         {
142                 device_ptr tmp = mem.device_pointer;
143
144                 foreach(SubDevice& sub, devices) {
145                         mem.device_pointer = sub.ptr_map[tmp];
146                         sub.device->mem_zero(mem);
147                 }
148
149                 mem.device_pointer = tmp;
150         }
151
152         void mem_free(device_memory& mem)
153         {
154                 device_ptr tmp = mem.device_pointer;
155
156                 foreach(SubDevice& sub, devices) {
157                         mem.device_pointer = sub.ptr_map[tmp];
158                         sub.device->mem_free(mem);
159                         sub.ptr_map.erase(sub.ptr_map.find(tmp));
160                 }
161
162                 mem.device_pointer = 0;
163         }
164
165         void const_copy_to(const char *name, void *host, size_t size)
166         {
167                 foreach(SubDevice& sub, devices)
168                         sub.device->const_copy_to(name, host, size);
169         }
170
171         void tex_alloc(const char *name, device_memory& mem, InterpolationType interpolation, bool periodic)
172         {
173                 foreach(SubDevice& sub, devices) {
174                         mem.device_pointer = 0;
175                         sub.device->tex_alloc(name, mem, interpolation, periodic);
176                         sub.ptr_map[unique_ptr] = mem.device_pointer;
177                 }
178
179                 mem.device_pointer = unique_ptr++;
180         }
181
182         void tex_free(device_memory& mem)
183         {
184                 device_ptr tmp = mem.device_pointer;
185
186                 foreach(SubDevice& sub, devices) {
187                         mem.device_pointer = sub.ptr_map[tmp];
188                         sub.device->tex_free(mem);
189                         sub.ptr_map.erase(sub.ptr_map.find(tmp));
190                 }
191
192                 mem.device_pointer = 0;
193         }
194
195         void pixels_alloc(device_memory& mem)
196         {
197                 foreach(SubDevice& sub, devices) {
198                         mem.device_pointer = 0;
199                         sub.device->pixels_alloc(mem);
200                         sub.ptr_map[unique_ptr] = mem.device_pointer;
201                 }
202
203                 mem.device_pointer = unique_ptr++;
204         }
205
206         void pixels_free(device_memory& mem)
207         {
208                 device_ptr tmp = mem.device_pointer;
209
210                 foreach(SubDevice& sub, devices) {
211                         mem.device_pointer = sub.ptr_map[tmp];
212                         sub.device->pixels_free(mem);
213                         sub.ptr_map.erase(sub.ptr_map.find(tmp));
214                 }
215
216                 mem.device_pointer = 0;
217         }
218
219         void pixels_copy_from(device_memory& mem, int y, int w, int h)
220         {
221                 device_ptr tmp = mem.device_pointer;
222                 int i = 0, sub_h = h/devices.size();
223
224                 foreach(SubDevice& sub, devices) {
225                         int sy = y + i*sub_h;
226                         int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
227
228                         mem.device_pointer = sub.ptr_map[tmp];
229                         sub.device->pixels_copy_from(mem, sy, w, sh);
230                         i++;
231                 }
232
233                 mem.device_pointer = tmp;
234         }
235
236         void draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int width, int height, bool transparent,
237                 const DeviceDrawParams &draw_params)
238         {
239                 device_ptr tmp = rgba.device_pointer;
240                 int i = 0, sub_h = h/devices.size();
241                 int sub_height = height/devices.size();
242
243                 foreach(SubDevice& sub, devices) {
244                         int sy = y + i*sub_h;
245                         int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
246                         int sheight = (i == (int)devices.size() - 1)? height - sub_height*i: sub_height;
247                         int sdy = dy + i*sub_height;
248                         /* adjust math for w/width */
249
250                         rgba.device_pointer = sub.ptr_map[tmp];
251                         sub.device->draw_pixels(rgba, sy, w, sh, sdy, width, sheight, transparent, draw_params);
252                         i++;
253                 }
254
255                 rgba.device_pointer = tmp;
256         }
257
258         void map_tile(Device *sub_device, RenderTile& tile)
259         {
260                 foreach(SubDevice& sub, devices) {
261                         if(sub.device == sub_device) {
262                                 if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer];
263                                 if(tile.rng_state) tile.rng_state = sub.ptr_map[tile.rng_state];
264                         }
265                 }
266         }
267
268         int device_number(Device *sub_device)
269         {
270                 int i = 0;
271
272                 foreach(SubDevice& sub, devices) {
273                         if(sub.device == sub_device)
274                                 return i;
275                         i++;
276                 }
277
278                 return -1;
279         }
280
281         int get_split_task_count(DeviceTask& task)
282         {
283                 int total_tasks = 0;
284                 list<DeviceTask> tasks;
285                 task.split(tasks, devices.size());
286                 foreach(SubDevice& sub, devices) {
287                         if(!tasks.empty()) {
288                                 DeviceTask subtask = tasks.front();
289                                 tasks.pop_front();
290
291                                 total_tasks += sub.device->get_split_task_count(subtask);
292                         }
293                 }
294                 return total_tasks;
295         }
296
297         void task_add(DeviceTask& task)
298         {
299                 list<DeviceTask> tasks;
300                 task.split(tasks, devices.size());
301
302                 foreach(SubDevice& sub, devices) {
303                         if(!tasks.empty()) {
304                                 DeviceTask subtask = tasks.front();
305                                 tasks.pop_front();
306
307                                 if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer];
308                                 if(task.rgba_byte) subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
309                                 if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half];
310                                 if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
311                                 if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];
312
313                                 sub.device->task_add(subtask);
314                         }
315                 }
316         }
317
318         void task_wait()
319         {
320                 foreach(SubDevice& sub, devices)
321                         sub.device->task_wait();
322         }
323
324         void task_cancel()
325         {
326                 foreach(SubDevice& sub, devices)
327                         sub.device->task_cancel();
328         }
329 };
330
331 Device *device_multi_create(DeviceInfo& info, Stats &stats, bool background)
332 {
333         return new MultiDevice(info, stats, background);
334 }
335
336 static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool with_display, bool with_advanced_shading, const char *id_fmt, int num)
337 {
338         DeviceInfo info;
339
340         /* create map to find duplicate descriptions */
341         map<string, int> dupli_map;
342         map<string, int>::iterator dt;
343         int num_added = 0, num_display = 0;
344
345         info.advanced_shading = with_advanced_shading;
346         info.pack_images = false;
347         info.extended_images = true;
348
349         foreach(DeviceInfo& subinfo, devices) {
350                 if(subinfo.type == type) {
351                         if(subinfo.advanced_shading != info.advanced_shading)
352                                 continue;
353                         if(subinfo.display_device) {
354                                 if(with_display)
355                                         num_display++;
356                                 else
357                                         continue;
358                         }
359
360                         string key = subinfo.description;
361
362                         if(dupli_map.find(key) == dupli_map.end())
363                                 dupli_map[key] = 1;
364                         else
365                                 dupli_map[key]++;
366
367                         info.multi_devices.push_back(subinfo);
368                         if(subinfo.display_device)
369                                 info.display_device = true;
370                         info.pack_images = info.pack_images || subinfo.pack_images;
371                         info.extended_images = info.extended_images && subinfo.extended_images;
372                         num_added++;
373                 }
374         }
375
376         if(num_added <= 1 || (with_display && num_display == 0))
377                 return false;
378
379         /* generate string */
380         stringstream desc;
381         vector<string> last_tokens;
382         bool first = true;
383
384         for(dt = dupli_map.begin(); dt != dupli_map.end(); dt++) {
385                 if(!first) desc << " + ";
386                 first = false;
387
388                 /* get name and count */
389                 string name = dt->first;
390                 int count = dt->second;
391
392                 /* strip common prefixes */
393                 vector<string> tokens;
394                 string_split(tokens, dt->first);
395
396                 if(tokens.size() > 1) {
397                         int i;
398
399                         for(i = 0; i < tokens.size() && i < last_tokens.size(); i++)
400                                 if(tokens[i] != last_tokens[i])
401                                         break;
402
403                         name = "";
404                         for(; i < tokens.size(); i++) {
405                                 name += tokens[i];
406                                 if(i != tokens.size() - 1)
407                                         name += " ";
408                         }
409                 }
410
411                 last_tokens = tokens;
412
413                 /* add */
414                 if(count > 1)
415                         desc << name << " (" << count << "x)";
416                 else
417                         desc << name;
418         }
419
420         /* add info */
421         info.type = DEVICE_MULTI;
422         info.description = desc.str();
423         info.id = string_printf(id_fmt, num);
424         info.display_device = with_display;
425         info.num = 0;
426
427         if(with_display)
428                 devices.push_back(info);
429         else
430                 devices.insert(devices.begin(), info);
431         
432         return true;
433 }
434
435 void device_multi_info(vector<DeviceInfo>& devices)
436 {
437         int num = 0;
438
439         if(!device_multi_add(devices, DEVICE_CUDA, false, true, "CUDA_MULTI_%d", num++))
440                 device_multi_add(devices, DEVICE_CUDA, false, false, "CUDA_MULTI_%d", num++);
441         if(!device_multi_add(devices, DEVICE_CUDA, true, true, "CUDA_MULTI_%d", num++))
442                 device_multi_add(devices, DEVICE_CUDA, true, false, "CUDA_MULTI_%d", num++);
443
444         num = 0;
445         if(!device_multi_add(devices, DEVICE_OPENCL, false, true, "OPENCL_MULTI_%d", num++))
446                 device_multi_add(devices, DEVICE_OPENCL, false, false, "OPENCL_MULTI_%d", num++);
447         if(!device_multi_add(devices, DEVICE_OPENCL, true, true, "OPENCL_MULTI_%d", num++))
448                 device_multi_add(devices, DEVICE_OPENCL, true, false, "OPENCL_MULTI_%d", num++);
449 }
450
451 CCL_NAMESPACE_END
452