Merge branch 'blender2.7'
[blender.git] / intern / cycles / device / device_multi.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <stdlib.h>
18 #include <sstream>
19
20 #include "device/device.h"
21 #include "device/device_intern.h"
22 #include "device/device_network.h"
23
24 #include "render/buffers.h"
25
26 #include "util/util_foreach.h"
27 #include "util/util_list.h"
28 #include "util/util_logging.h"
29 #include "util/util_map.h"
30 #include "util/util_time.h"
31
32 CCL_NAMESPACE_BEGIN
33
34 class MultiDevice : public Device
35 {
36 public:
37         struct SubDevice {
38                 explicit SubDevice(Device *device_)
39                 : device(device_) {}
40
41                 Device *device;
42                 map<device_ptr, device_ptr> ptr_map;
43         };
44
45         list<SubDevice> devices;
46         device_ptr unique_key;
47
48         MultiDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_)
49         : Device(info, stats, profiler, background_), unique_key(1)
50         {
51                 foreach(DeviceInfo& subinfo, info.multi_devices) {
52                         Device *device = Device::create(subinfo, sub_stats_, profiler, background);
53
54                         /* Always add CPU devices at the back since GPU devices can change
55                          * host memory pointers, which CPU uses as device pointer. */
56                         if(subinfo.type == DEVICE_CPU) {
57                                 devices.push_back(SubDevice(device));
58                         }
59                         else {
60                                 devices.push_front(SubDevice(device));
61                         }
62                 }
63
64 #ifdef WITH_NETWORK
65                 /* try to add network devices */
66                 ServerDiscovery discovery(true);
67                 time_sleep(1.0);
68
69                 vector<string> servers = discovery.get_server_list();
70
71                 foreach(string& server, servers) {
72                         Device *device = device_network_create(info, stats, profiler, server.c_str());
73                         if(device)
74                                 devices.push_back(SubDevice(device));
75                 }
76 #endif
77         }
78
79         ~MultiDevice()
80         {
81                 foreach(SubDevice& sub, devices)
82                         delete sub.device;
83         }
84
85         const string& error_message()
86         {
87                 foreach(SubDevice& sub, devices) {
88                         if(sub.device->error_message() != "") {
89                                 if(error_msg == "")
90                                         error_msg = sub.device->error_message();
91                                 break;
92                         }
93                 }
94
95                 return error_msg;
96         }
97
98         virtual bool show_samples() const
99         {
100                 if(devices.size() > 1) {
101                         return false;
102                 }
103                 return devices.front().device->show_samples();
104         }
105
106         virtual BVHLayoutMask get_bvh_layout_mask() const {
107                 BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
108                 foreach(const SubDevice& sub_device, devices) {
109                         bvh_layout_mask &= sub_device.device->get_bvh_layout_mask();
110                 }
111                 return bvh_layout_mask;
112         }
113
114         bool load_kernels(const DeviceRequestedFeatures& requested_features)
115         {
116                 foreach(SubDevice& sub, devices)
117                         if(!sub.device->load_kernels(requested_features))
118                                 return false;
119
120                 return true;
121         }
122
123         bool wait_for_availability(const DeviceRequestedFeatures& requested_features)
124         {
125                 foreach(SubDevice& sub, devices)
126                         if(!sub.device->wait_for_availability(requested_features))
127                                 return false;
128
129                 return true;
130         }
131
132         DeviceKernelStatus get_active_kernel_switch_state()
133         {
134                 DeviceKernelStatus result = DEVICE_KERNEL_USING_FEATURE_KERNEL;
135
136                 foreach(SubDevice& sub, devices) {
137                         DeviceKernelStatus subresult = sub.device->get_active_kernel_switch_state();
138                         switch (subresult) {
139                                 case DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL:
140                                         result = subresult;
141                                         break;
142
143                                 case DEVICE_KERNEL_FEATURE_KERNEL_INVALID:
144                                 case DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE:
145                                         return subresult;
146
147                                 case DEVICE_KERNEL_USING_FEATURE_KERNEL:
148                                         break;
149                         }
150                 }
151                 return result;
152         }
153
154         void mem_alloc(device_memory& mem)
155         {
156                 device_ptr key = unique_key++;
157
158                 foreach(SubDevice& sub, devices) {
159                         mem.device = sub.device;
160                         mem.device_pointer = 0;
161                         mem.device_size = 0;
162
163                         sub.device->mem_alloc(mem);
164                         sub.ptr_map[key] = mem.device_pointer;
165                 }
166
167                 mem.device = this;
168                 mem.device_pointer = key;
169                 stats.mem_alloc(mem.device_size);
170         }
171
172         void mem_copy_to(device_memory& mem)
173         {
174                 device_ptr existing_key = mem.device_pointer;
175                 device_ptr key = (existing_key)? existing_key: unique_key++;
176                 size_t existing_size = mem.device_size;
177
178                 foreach(SubDevice& sub, devices) {
179                         mem.device = sub.device;
180                         mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0;
181                         mem.device_size = existing_size;
182
183                         sub.device->mem_copy_to(mem);
184                         sub.ptr_map[key] = mem.device_pointer;
185                 }
186
187                 mem.device = this;
188                 mem.device_pointer = key;
189                 stats.mem_alloc(mem.device_size - existing_size);
190         }
191
192         void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
193         {
194                 device_ptr key = mem.device_pointer;
195                 int i = 0, sub_h = h/devices.size();
196
197                 foreach(SubDevice& sub, devices) {
198                         int sy = y + i*sub_h;
199                         int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
200
201                         mem.device = sub.device;
202                         mem.device_pointer = sub.ptr_map[key];
203
204                         sub.device->mem_copy_from(mem, sy, w, sh, elem);
205                         i++;
206                 }
207
208                 mem.device = this;
209                 mem.device_pointer = key;
210         }
211
212         void mem_zero(device_memory& mem)
213         {
214                 device_ptr existing_key = mem.device_pointer;
215                 device_ptr key = (existing_key)? existing_key: unique_key++;
216                 size_t existing_size = mem.device_size;
217
218                 foreach(SubDevice& sub, devices) {
219                         mem.device = sub.device;
220                         mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0;
221                         mem.device_size = existing_size;
222
223                         sub.device->mem_zero(mem);
224                         sub.ptr_map[key] = mem.device_pointer;
225                 }
226
227                 mem.device = this;
228                 mem.device_pointer = key;
229                 stats.mem_alloc(mem.device_size - existing_size);
230         }
231
232         void mem_free(device_memory& mem)
233         {
234                 device_ptr key = mem.device_pointer;
235                 size_t existing_size = mem.device_size;
236
237                 foreach(SubDevice& sub, devices) {
238                         mem.device = sub.device;
239                         mem.device_pointer = sub.ptr_map[key];
240                         mem.device_size = existing_size;
241
242                         sub.device->mem_free(mem);
243                         sub.ptr_map.erase(sub.ptr_map.find(key));
244                 }
245
246                 mem.device = this;
247                 mem.device_pointer = 0;
248                 mem.device_size = 0;
249                 stats.mem_free(existing_size);
250         }
251
252         void const_copy_to(const char *name, void *host, size_t size)
253         {
254                 foreach(SubDevice& sub, devices)
255                         sub.device->const_copy_to(name, host, size);
256         }
257
258         void draw_pixels(
259             device_memory& rgba, int y,
260             int w, int h, int width, int height,
261             int dx, int dy, int dw, int dh,
262             bool transparent, const DeviceDrawParams &draw_params)
263         {
264                 device_ptr key = rgba.device_pointer;
265                 int i = 0, sub_h = h/devices.size();
266                 int sub_height = height/devices.size();
267
268                 foreach(SubDevice& sub, devices) {
269                         int sy = y + i*sub_h;
270                         int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
271                         int sheight = (i == (int)devices.size() - 1)? height - sub_height*i: sub_height;
272                         int sdy = dy + i*sub_height;
273                         /* adjust math for w/width */
274
275                         rgba.device_pointer = sub.ptr_map[key];
276                         sub.device->draw_pixels(rgba, sy, w, sh, width, sheight, dx, sdy, dw, dh, transparent, draw_params);
277                         i++;
278                 }
279
280                 rgba.device_pointer = key;
281         }
282
283         void map_tile(Device *sub_device, RenderTile& tile)
284         {
285                 foreach(SubDevice& sub, devices) {
286                         if(sub.device == sub_device) {
287                                 if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer];
288                         }
289                 }
290         }
291
292         int device_number(Device *sub_device)
293         {
294                 int i = 0;
295
296                 foreach(SubDevice& sub, devices) {
297                         if(sub.device == sub_device)
298                                 return i;
299                         i++;
300                 }
301
302                 return -1;
303         }
304
305         void map_neighbor_tiles(Device *sub_device, RenderTile *tiles)
306         {
307                 for(int i = 0; i < 9; i++) {
308                         if(!tiles[i].buffers) {
309                                 continue;
310                         }
311
312                         /* If the tile was rendered on another device, copy its memory to
313                          * to the current device now, for the duration of the denoising task.
314                          * Note that this temporarily modifies the RenderBuffers and calls
315                          * the device, so this function is not thread safe. */
316                         device_vector<float> &mem = tiles[i].buffers->buffer;
317                         if(mem.device != sub_device) {
318                                 /* Only copy from device to host once. This is faster, but
319                                  * also required for the case where a CPU thread is denoising
320                                  * a tile rendered on the GPU. In that case we have to avoid
321                                  * overwriting the buffer being denoised by the CPU thread. */
322                                 if(!tiles[i].buffers->map_neighbor_copied) {
323                                         tiles[i].buffers->map_neighbor_copied = true;
324                                         mem.copy_from_device(0, mem.data_size, 1);
325                                 }
326
327                                 mem.swap_device(sub_device, 0, 0);
328
329                                 mem.copy_to_device();
330                                 tiles[i].buffer = mem.device_pointer;
331                                 tiles[i].device_size = mem.device_size;
332
333                                 mem.restore_device();
334                         }
335                 }
336         }
337
338         void unmap_neighbor_tiles(Device * sub_device, RenderTile * tiles)
339         {
340                 /* Copy denoised result back to the host. */
341                 device_vector<float> &mem = tiles[9].buffers->buffer;
342                 mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer);
343                 mem.copy_from_device(0, mem.data_size, 1);
344                 mem.restore_device();
345                 /* Copy denoised result to the original device. */
346                 mem.copy_to_device();
347
348                 for(int i = 0; i < 9; i++) {
349                         if(!tiles[i].buffers) {
350                                 continue;
351                         }
352
353                         device_vector<float> &mem = tiles[i].buffers->buffer;
354                         if(mem.device != sub_device) {
355                                 mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer);
356                                 sub_device->mem_free(mem);
357                                 mem.restore_device();
358                         }
359                 }
360         }
361
362         int get_split_task_count(DeviceTask& task)
363         {
364                 int total_tasks = 0;
365                 list<DeviceTask> tasks;
366                 task.split(tasks, devices.size());
367                 foreach(SubDevice& sub, devices) {
368                         if(!tasks.empty()) {
369                                 DeviceTask subtask = tasks.front();
370                                 tasks.pop_front();
371
372                                 total_tasks += sub.device->get_split_task_count(subtask);
373                         }
374                 }
375                 return total_tasks;
376         }
377
378         void task_add(DeviceTask& task)
379         {
380                 list<DeviceTask> tasks;
381                 task.split(tasks, devices.size());
382
383                 foreach(SubDevice& sub, devices) {
384                         if(!tasks.empty()) {
385                                 DeviceTask subtask = tasks.front();
386                                 tasks.pop_front();
387
388                                 if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer];
389                                 if(task.rgba_byte) subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
390                                 if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half];
391                                 if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
392                                 if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];
393
394                                 sub.device->task_add(subtask);
395                         }
396                 }
397         }
398
399         void task_wait()
400         {
401                 foreach(SubDevice& sub, devices)
402                         sub.device->task_wait();
403         }
404
405         void task_cancel()
406         {
407                 foreach(SubDevice& sub, devices)
408                         sub.device->task_cancel();
409         }
410
411 protected:
412         Stats sub_stats_;
413 };
414
415 Device *device_multi_create(DeviceInfo& info, Stats &stats, Profiler& profiler, bool background)
416 {
417         return new MultiDevice(info, stats, profiler, background);
418 }
419
420 CCL_NAMESPACE_END