Merge branch 'master' into blender2.8
[blender.git] / intern / cycles / device / device_multi.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <stdlib.h>
18 #include <sstream>
19
20 #include "device/device.h"
21 #include "device/device_intern.h"
22 #include "device/device_network.h"
23
24 #include "render/buffers.h"
25
26 #include "util/util_foreach.h"
27 #include "util/util_list.h"
28 #include "util/util_logging.h"
29 #include "util/util_map.h"
30 #include "util/util_time.h"
31
32 CCL_NAMESPACE_BEGIN
33
34 class MultiDevice : public Device
35 {
36 public:
37         struct SubDevice {
38                 explicit SubDevice(Device *device_)
39                 : device(device_) {}
40
41                 Device *device;
42                 map<device_ptr, device_ptr> ptr_map;
43         };
44
45         list<SubDevice> devices;
46         device_ptr unique_key;
47
48         MultiDevice(DeviceInfo& info, Stats &stats, bool background_)
49         : Device(info, stats, background_), unique_key(1)
50         {
51                 foreach(DeviceInfo& subinfo, info.multi_devices) {
52                         Device *device = Device::create(subinfo, sub_stats_, background);
53
54                         /* Always add CPU devices at the back since GPU devices can change
55                          * host memory pointers, which CPU uses as device pointer. */
56                         if(subinfo.type == DEVICE_CPU) {
57                                 devices.push_back(SubDevice(device));
58                         }
59                         else {
60                                 devices.push_front(SubDevice(device));
61                         }
62                 }
63
64 #ifdef WITH_NETWORK
65                 /* try to add network devices */
66                 ServerDiscovery discovery(true);
67                 time_sleep(1.0);
68
69                 vector<string> servers = discovery.get_server_list();
70
71                 foreach(string& server, servers) {
72                         Device *device = device_network_create(info, stats, server.c_str());
73                         if(device)
74                                 devices.push_back(SubDevice(device));
75                 }
76 #endif
77         }
78
79         ~MultiDevice()
80         {
81                 foreach(SubDevice& sub, devices)
82                         delete sub.device;
83         }
84
85         const string& error_message()
86         {
87                 foreach(SubDevice& sub, devices) {
88                         if(sub.device->error_message() != "") {
89                                 if(error_msg == "")
90                                         error_msg = sub.device->error_message();
91                                 break;
92                         }
93                 }
94
95                 return error_msg;
96         }
97
98         virtual bool show_samples() const
99         {
100                 if(devices.size() > 1) {
101                         return false;
102                 }
103                 return devices.front().device->show_samples();
104         }
105
106         virtual BVHLayoutMask get_bvh_layout_mask() const {
107                 BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
108                 foreach(const SubDevice& sub_device, devices) {
109                         bvh_layout_mask &= sub_device.device->get_bvh_layout_mask();
110                 }
111                 return bvh_layout_mask;
112         }
113
114         bool load_kernels(const DeviceRequestedFeatures& requested_features)
115         {
116                 foreach(SubDevice& sub, devices)
117                         if(!sub.device->load_kernels(requested_features))
118                                 return false;
119
120                 return true;
121         }
122
123         void mem_alloc(device_memory& mem)
124         {
125                 device_ptr key = unique_key++;
126
127                 foreach(SubDevice& sub, devices) {
128                         mem.device = sub.device;
129                         mem.device_pointer = 0;
130                         mem.device_size = 0;
131
132                         sub.device->mem_alloc(mem);
133                         sub.ptr_map[key] = mem.device_pointer;
134                 }
135
136                 mem.device = this;
137                 mem.device_pointer = key;
138                 stats.mem_alloc(mem.device_size);
139         }
140
141         void mem_copy_to(device_memory& mem)
142         {
143                 device_ptr existing_key = mem.device_pointer;
144                 device_ptr key = (existing_key)? existing_key: unique_key++;
145                 size_t existing_size = mem.device_size;
146
147                 foreach(SubDevice& sub, devices) {
148                         mem.device = sub.device;
149                         mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0;
150                         mem.device_size = existing_size;
151
152                         sub.device->mem_copy_to(mem);
153                         sub.ptr_map[key] = mem.device_pointer;
154                 }
155
156                 mem.device = this;
157                 mem.device_pointer = key;
158                 stats.mem_alloc(mem.device_size - existing_size);
159         }
160
161         void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
162         {
163                 device_ptr key = mem.device_pointer;
164                 int i = 0, sub_h = h/devices.size();
165
166                 foreach(SubDevice& sub, devices) {
167                         int sy = y + i*sub_h;
168                         int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
169
170                         mem.device = sub.device;
171                         mem.device_pointer = sub.ptr_map[key];
172
173                         sub.device->mem_copy_from(mem, sy, w, sh, elem);
174                         i++;
175                 }
176
177                 mem.device = this;
178                 mem.device_pointer = key;
179         }
180
181         void mem_zero(device_memory& mem)
182         {
183                 device_ptr existing_key = mem.device_pointer;
184                 device_ptr key = (existing_key)? existing_key: unique_key++;
185                 size_t existing_size = mem.device_size;
186
187                 foreach(SubDevice& sub, devices) {
188                         mem.device = sub.device;
189                         mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0;
190                         mem.device_size = existing_size;
191
192                         sub.device->mem_zero(mem);
193                         sub.ptr_map[key] = mem.device_pointer;
194                 }
195
196                 mem.device = this;
197                 mem.device_pointer = key;
198                 stats.mem_alloc(mem.device_size - existing_size);
199         }
200
201         void mem_free(device_memory& mem)
202         {
203                 device_ptr key = mem.device_pointer;
204                 size_t existing_size = mem.device_size;
205
206                 foreach(SubDevice& sub, devices) {
207                         mem.device = sub.device;
208                         mem.device_pointer = sub.ptr_map[key];
209                         mem.device_size = existing_size;
210
211                         sub.device->mem_free(mem);
212                         sub.ptr_map.erase(sub.ptr_map.find(key));
213                 }
214
215                 mem.device = this;
216                 mem.device_pointer = 0;
217                 mem.device_size = 0;
218                 stats.mem_free(existing_size);
219         }
220
221         void const_copy_to(const char *name, void *host, size_t size)
222         {
223                 foreach(SubDevice& sub, devices)
224                         sub.device->const_copy_to(name, host, size);
225         }
226
227         void draw_pixels(
228             device_memory& rgba, int y,
229             int w, int h, int width, int height,
230             int dx, int dy, int dw, int dh,
231             bool transparent, const DeviceDrawParams &draw_params)
232         {
233                 device_ptr key = rgba.device_pointer;
234                 int i = 0, sub_h = h/devices.size();
235                 int sub_height = height/devices.size();
236
237                 foreach(SubDevice& sub, devices) {
238                         int sy = y + i*sub_h;
239                         int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
240                         int sheight = (i == (int)devices.size() - 1)? height - sub_height*i: sub_height;
241                         int sdy = dy + i*sub_height;
242                         /* adjust math for w/width */
243
244                         rgba.device_pointer = sub.ptr_map[key];
245                         sub.device->draw_pixels(rgba, sy, w, sh, width, sheight, dx, sdy, dw, dh, transparent, draw_params);
246                         i++;
247                 }
248
249                 rgba.device_pointer = key;
250         }
251
252         void map_tile(Device *sub_device, RenderTile& tile)
253         {
254                 foreach(SubDevice& sub, devices) {
255                         if(sub.device == sub_device) {
256                                 if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer];
257                         }
258                 }
259         }
260
261         int device_number(Device *sub_device)
262         {
263                 int i = 0;
264
265                 foreach(SubDevice& sub, devices) {
266                         if(sub.device == sub_device)
267                                 return i;
268                         i++;
269                 }
270
271                 return -1;
272         }
273
274         void map_neighbor_tiles(Device *sub_device, RenderTile *tiles)
275         {
276                 for(int i = 0; i < 9; i++) {
277                         if(!tiles[i].buffers) {
278                                 continue;
279                         }
280
281                         /* If the tile was rendered on another device, copy its memory to
282                          * to the current device now, for the duration of the denoising task.
283                          * Note that this temporarily modifies the RenderBuffers and calls
284                          * the device, so this function is not thread safe. */
285                         device_vector<float> &mem = tiles[i].buffers->buffer;
286                         if(mem.device != sub_device) {
287                                 /* Only copy from device to host once. This is faster, but
288                                  * also required for the case where a CPU thread is denoising
289                                  * a tile rendered on the GPU. In that case we have to avoid
290                                  * overwriting the buffer being denoised by the CPU thread. */
291                                 if(!tiles[i].buffers->map_neighbor_copied) {
292                                         tiles[i].buffers->map_neighbor_copied = true;
293                                         mem.copy_from_device(0, mem.data_size, 1);
294                                 }
295
296                                 mem.swap_device(sub_device, 0, 0);
297
298                                 mem.copy_to_device();
299                                 tiles[i].buffer = mem.device_pointer;
300                                 tiles[i].device_size = mem.device_size;
301
302                                 mem.restore_device();
303                         }
304                 }
305         }
306
307         void unmap_neighbor_tiles(Device * sub_device, RenderTile * tiles)
308         {
309                 /* Copy denoised result back to the host. */
310                 device_vector<float> &mem = tiles[9].buffers->buffer;
311                 mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer);
312                 mem.copy_from_device(0, mem.data_size, 1);
313                 mem.restore_device();
314                 /* Copy denoised result to the original device. */
315                 mem.copy_to_device();
316
317                 for(int i = 0; i < 9; i++) {
318                         if(!tiles[i].buffers) {
319                                 continue;
320                         }
321
322                         device_vector<float> &mem = tiles[i].buffers->buffer;
323                         if(mem.device != sub_device) {
324                                 mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer);
325                                 sub_device->mem_free(mem);
326                                 mem.restore_device();
327                         }
328                 }
329         }
330
331         int get_split_task_count(DeviceTask& task)
332         {
333                 int total_tasks = 0;
334                 list<DeviceTask> tasks;
335                 task.split(tasks, devices.size());
336                 foreach(SubDevice& sub, devices) {
337                         if(!tasks.empty()) {
338                                 DeviceTask subtask = tasks.front();
339                                 tasks.pop_front();
340
341                                 total_tasks += sub.device->get_split_task_count(subtask);
342                         }
343                 }
344                 return total_tasks;
345         }
346
347         void task_add(DeviceTask& task)
348         {
349                 list<DeviceTask> tasks;
350                 task.split(tasks, devices.size());
351
352                 foreach(SubDevice& sub, devices) {
353                         if(!tasks.empty()) {
354                                 DeviceTask subtask = tasks.front();
355                                 tasks.pop_front();
356
357                                 if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer];
358                                 if(task.rgba_byte) subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
359                                 if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half];
360                                 if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
361                                 if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];
362
363                                 sub.device->task_add(subtask);
364                         }
365                 }
366         }
367
368         void task_wait()
369         {
370                 foreach(SubDevice& sub, devices)
371                         sub.device->task_wait();
372         }
373
374         void task_cancel()
375         {
376                 foreach(SubDevice& sub, devices)
377                         sub.device->task_cancel();
378         }
379
380 protected:
381         Stats sub_stats_;
382 };
383
384 Device *device_multi_create(DeviceInfo& info, Stats &stats, bool background)
385 {
386         return new MultiDevice(info, stats, background);
387 }
388
389 CCL_NAMESPACE_END