Merge branch 'master' into blender2.8
[blender.git] / intern / cycles / device / device_multi.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <stdlib.h>
18 #include <sstream>
19
20 #include "device/device.h"
21 #include "device/device_intern.h"
22 #include "device/device_network.h"
23
24 #include "render/buffers.h"
25
26 #include "util/util_foreach.h"
27 #include "util/util_list.h"
28 #include "util/util_logging.h"
29 #include "util/util_map.h"
30 #include "util/util_time.h"
31
32 CCL_NAMESPACE_BEGIN
33
34 class MultiDevice : public Device
35 {
36 public:
37         struct SubDevice {
38                 explicit SubDevice(Device *device_)
39                 : device(device_) {}
40
41                 Device *device;
42                 map<device_ptr, device_ptr> ptr_map;
43         };
44
45         list<SubDevice> devices;
46         device_ptr unique_key;
47
48         MultiDevice(DeviceInfo& info, Stats &stats, bool background_)
49         : Device(info, stats, background_), unique_key(1)
50         {
51                 foreach(DeviceInfo& subinfo, info.multi_devices) {
52                         Device *device = Device::create(subinfo, sub_stats_, background);
53
54                         /* Always add CPU devices at the back since GPU devices can change
55                          * host memory pointers, which CPU uses as device pointer. */
56                         if(subinfo.type == DEVICE_CPU) {
57                                 devices.push_back(SubDevice(device));
58                         }
59                         else {
60                                 devices.push_front(SubDevice(device));
61                         }
62                 }
63
64 #ifdef WITH_NETWORK
65                 /* try to add network devices */
66                 ServerDiscovery discovery(true);
67                 time_sleep(1.0);
68
69                 vector<string> servers = discovery.get_server_list();
70
71                 foreach(string& server, servers) {
72                         Device *device = device_network_create(info, stats, server.c_str());
73                         if(device)
74                                 devices.push_back(SubDevice(device));
75                 }
76 #endif
77         }
78
79         ~MultiDevice()
80         {
81                 foreach(SubDevice& sub, devices)
82                         delete sub.device;
83         }
84
85         const string& error_message()
86         {
87                 foreach(SubDevice& sub, devices) {
88                         if(sub.device->error_message() != "") {
89                                 if(error_msg == "")
90                                         error_msg = sub.device->error_message();
91                                 break;
92                         }
93                 }
94
95                 return error_msg;
96         }
97
98         virtual bool show_samples() const
99         {
100                 if(devices.size() > 1) {
101                         return false;
102                 }
103                 return devices.front().device->show_samples();
104         }
105
106         bool load_kernels(const DeviceRequestedFeatures& requested_features)
107         {
108                 foreach(SubDevice& sub, devices)
109                         if(!sub.device->load_kernels(requested_features))
110                                 return false;
111
112                 return true;
113         }
114
115         void mem_alloc(device_memory& mem)
116         {
117                 device_ptr key = unique_key++;
118
119                 foreach(SubDevice& sub, devices) {
120                         mem.device = sub.device;
121                         mem.device_pointer = 0;
122                         mem.device_size = 0;
123
124                         sub.device->mem_alloc(mem);
125                         sub.ptr_map[key] = mem.device_pointer;
126                 }
127
128                 mem.device = this;
129                 mem.device_pointer = key;
130                 stats.mem_alloc(mem.device_size);
131         }
132
133         void mem_copy_to(device_memory& mem)
134         {
135                 device_ptr existing_key = mem.device_pointer;
136                 device_ptr key = (existing_key)? existing_key: unique_key++;
137                 size_t existing_size = mem.device_size;
138
139                 foreach(SubDevice& sub, devices) {
140                         mem.device = sub.device;
141                         mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0;
142                         mem.device_size = existing_size;
143
144                         sub.device->mem_copy_to(mem);
145                         sub.ptr_map[key] = mem.device_pointer;
146                 }
147
148                 mem.device = this;
149                 mem.device_pointer = key;
150                 stats.mem_alloc(mem.device_size - existing_size);
151         }
152
153         void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
154         {
155                 device_ptr key = mem.device_pointer;
156                 int i = 0, sub_h = h/devices.size();
157
158                 foreach(SubDevice& sub, devices) {
159                         int sy = y + i*sub_h;
160                         int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
161
162                         mem.device = sub.device;
163                         mem.device_pointer = sub.ptr_map[key];
164
165                         sub.device->mem_copy_from(mem, sy, w, sh, elem);
166                         i++;
167                 }
168
169                 mem.device = this;
170                 mem.device_pointer = key;
171         }
172
173         void mem_zero(device_memory& mem)
174         {
175                 device_ptr existing_key = mem.device_pointer;
176                 device_ptr key = (existing_key)? existing_key: unique_key++;
177                 size_t existing_size = mem.device_size;
178
179                 foreach(SubDevice& sub, devices) {
180                         mem.device = sub.device;
181                         mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0;
182                         mem.device_size = existing_size;
183
184                         sub.device->mem_zero(mem);
185                         sub.ptr_map[key] = mem.device_pointer;
186                 }
187
188                 mem.device = this;
189                 mem.device_pointer = key;
190                 stats.mem_alloc(mem.device_size - existing_size);
191         }
192
193         void mem_free(device_memory& mem)
194         {
195                 device_ptr key = mem.device_pointer;
196                 size_t existing_size = mem.device_size;
197
198                 foreach(SubDevice& sub, devices) {
199                         mem.device = sub.device;
200                         mem.device_pointer = sub.ptr_map[key];
201                         mem.device_size = existing_size;
202
203                         sub.device->mem_free(mem);
204                         sub.ptr_map.erase(sub.ptr_map.find(key));
205                 }
206
207                 mem.device = this;
208                 mem.device_pointer = 0;
209                 mem.device_size = 0;
210                 stats.mem_free(existing_size);
211         }
212
213         void const_copy_to(const char *name, void *host, size_t size)
214         {
215                 foreach(SubDevice& sub, devices)
216                         sub.device->const_copy_to(name, host, size);
217         }
218
219         void draw_pixels(
220             device_memory& rgba, int y,
221             int w, int h, int width, int height,
222             int dx, int dy, int dw, int dh,
223             bool transparent, const DeviceDrawParams &draw_params)
224         {
225                 device_ptr key = rgba.device_pointer;
226                 int i = 0, sub_h = h/devices.size();
227                 int sub_height = height/devices.size();
228
229                 foreach(SubDevice& sub, devices) {
230                         int sy = y + i*sub_h;
231                         int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
232                         int sheight = (i == (int)devices.size() - 1)? height - sub_height*i: sub_height;
233                         int sdy = dy + i*sub_height;
234                         /* adjust math for w/width */
235
236                         rgba.device_pointer = sub.ptr_map[key];
237                         sub.device->draw_pixels(rgba, sy, w, sh, width, sheight, dx, sdy, dw, dh, transparent, draw_params);
238                         i++;
239                 }
240
241                 rgba.device_pointer = key;
242         }
243
244         void map_tile(Device *sub_device, RenderTile& tile)
245         {
246                 foreach(SubDevice& sub, devices) {
247                         if(sub.device == sub_device) {
248                                 if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer];
249                         }
250                 }
251         }
252
253         int device_number(Device *sub_device)
254         {
255                 int i = 0;
256
257                 foreach(SubDevice& sub, devices) {
258                         if(sub.device == sub_device)
259                                 return i;
260                         i++;
261                 }
262
263                 return -1;
264         }
265
266         void map_neighbor_tiles(Device *sub_device, RenderTile *tiles)
267         {
268                 for(int i = 0; i < 9; i++) {
269                         if(!tiles[i].buffers) {
270                                 continue;
271                         }
272
273                         /* If the tile was rendered on another device, copy its memory to
274                          * to the current device now, for the duration of the denoising task.
275                          * Note that this temporarily modifies the RenderBuffers and calls
276                          * the device, so this function is not thread safe. */
277                         device_vector<float> &mem = tiles[i].buffers->buffer;
278                         if(mem.device != sub_device) {
279                                 /* Only copy from device to host once. This is faster, but
280                                  * also required for the case where a CPU thread is denoising
281                                  * a tile rendered on the GPU. In that case we have to avoid
282                                  * overwriting the buffer being denoised by the CPU thread. */
283                                 if(!tiles[i].buffers->map_neighbor_copied) {
284                                         tiles[i].buffers->map_neighbor_copied = true;
285                                         mem.copy_from_device(0, mem.data_size, 1);
286                                 }
287
288                                 mem.swap_device(sub_device, 0, 0);
289
290                                 mem.copy_to_device();
291                                 tiles[i].buffer = mem.device_pointer;
292                                 tiles[i].device_size = mem.device_size;
293
294                                 mem.restore_device();
295                         }
296                 }
297         }
298
299         void unmap_neighbor_tiles(Device * sub_device, RenderTile * tiles)
300         {
301                 /* Copy denoised result back to the host. */
302                 device_vector<float> &mem = tiles[9].buffers->buffer;
303                 mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer);
304                 mem.copy_from_device(0, mem.data_size, 1);
305                 mem.restore_device();
306                 /* Copy denoised result to the original device. */
307                 mem.copy_to_device();
308
309                 for(int i = 0; i < 9; i++) {
310                         if(!tiles[i].buffers) {
311                                 continue;
312                         }
313
314                         device_vector<float> &mem = tiles[i].buffers->buffer;
315                         if(mem.device != sub_device) {
316                                 mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer);
317                                 sub_device->mem_free(mem);
318                                 mem.restore_device();
319                         }
320                 }
321         }
322
323         int get_split_task_count(DeviceTask& task)
324         {
325                 int total_tasks = 0;
326                 list<DeviceTask> tasks;
327                 task.split(tasks, devices.size());
328                 foreach(SubDevice& sub, devices) {
329                         if(!tasks.empty()) {
330                                 DeviceTask subtask = tasks.front();
331                                 tasks.pop_front();
332
333                                 total_tasks += sub.device->get_split_task_count(subtask);
334                         }
335                 }
336                 return total_tasks;
337         }
338
339         void task_add(DeviceTask& task)
340         {
341                 list<DeviceTask> tasks;
342                 task.split(tasks, devices.size());
343
344                 foreach(SubDevice& sub, devices) {
345                         if(!tasks.empty()) {
346                                 DeviceTask subtask = tasks.front();
347                                 tasks.pop_front();
348
349                                 if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer];
350                                 if(task.rgba_byte) subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
351                                 if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half];
352                                 if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
353                                 if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];
354
355                                 sub.device->task_add(subtask);
356                         }
357                 }
358         }
359
360         void task_wait()
361         {
362                 foreach(SubDevice& sub, devices)
363                         sub.device->task_wait();
364         }
365
366         void task_cancel()
367         {
368                 foreach(SubDevice& sub, devices)
369                         sub.device->task_cancel();
370         }
371
372 protected:
373         Stats sub_stats_;
374 };
375
376 Device *device_multi_create(DeviceInfo& info, Stats &stats, bool background)
377 {
378         return new MultiDevice(info, stats, background);
379 }
380
381 CCL_NAMESPACE_END