Cycles / CUDA: Increase maximum image textures on GPU.
[blender.git] / intern / cycles / device / device_multi.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License
15  */
16
17 #include <stdlib.h>
18 #include <sstream>
19
20 #include "device.h"
21 #include "device_intern.h"
22 #include "device_network.h"
23
24 #include "buffers.h"
25
26 #include "util_foreach.h"
27 #include "util_list.h"
28 #include "util_map.h"
29 #include "util_time.h"
30
31 CCL_NAMESPACE_BEGIN
32
33 class MultiDevice : public Device
34 {
35 public:
36         struct SubDevice {
37                 SubDevice(Device *device_)
38                 : device(device_) {}
39
40                 Device *device;
41                 map<device_ptr, device_ptr> ptr_map;
42         };
43
44         list<SubDevice> devices;
45         device_ptr unique_ptr;
46
47         MultiDevice(DeviceInfo& info, Stats &stats, bool background_)
48         : Device(info, stats, background_), unique_ptr(1)
49         {
50                 Device *device;
51
52                 foreach(DeviceInfo& subinfo, info.multi_devices) {
53                         device = Device::create(subinfo, stats, background);
54                         devices.push_back(SubDevice(device));
55                 }
56
57 #ifdef WITH_NETWORK
58                 /* try to add network devices */
59                 ServerDiscovery discovery(true);
60                 time_sleep(1.0);
61
62                 vector<string> servers = discovery.get_server_list();
63
64                 foreach(string& server, servers) {
65                         device = device_network_create(info, stats, server.c_str());
66                         if(device)
67                                 devices.push_back(SubDevice(device));
68                 }
69 #endif
70         }
71
72         ~MultiDevice()
73         {
74                 foreach(SubDevice& sub, devices)
75                         delete sub.device;
76         }
77
78         const string& error_message()
79         {
80                 foreach(SubDevice& sub, devices) {
81                         if(sub.device->error_message() != "") {
82                                 if(error_msg == "")
83                                         error_msg = sub.device->error_message();
84                                 break;
85                         }
86                 }
87
88                 return error_msg;
89         }
90
91         bool load_kernels(bool experimental)
92         {
93                 foreach(SubDevice& sub, devices)
94                         if(!sub.device->load_kernels(experimental))
95                                 return false;
96
97                 return true;
98         }
99
100         void mem_alloc(device_memory& mem, MemoryType type)
101         {
102                 foreach(SubDevice& sub, devices) {
103                         mem.device_pointer = 0;
104                         sub.device->mem_alloc(mem, type);
105                         sub.ptr_map[unique_ptr] = mem.device_pointer;
106                 }
107
108                 mem.device_pointer = unique_ptr++;
109         }
110
111         void mem_copy_to(device_memory& mem)
112         {
113                 device_ptr tmp = mem.device_pointer;
114
115                 foreach(SubDevice& sub, devices) {
116                         mem.device_pointer = sub.ptr_map[tmp];
117                         sub.device->mem_copy_to(mem);
118                 }
119
120                 mem.device_pointer = tmp;
121         }
122
123         void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
124         {
125                 device_ptr tmp = mem.device_pointer;
126                 int i = 0, sub_h = h/devices.size();
127
128                 foreach(SubDevice& sub, devices) {
129                         int sy = y + i*sub_h;
130                         int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
131
132                         mem.device_pointer = sub.ptr_map[tmp];
133                         sub.device->mem_copy_from(mem, sy, w, sh, elem);
134                         i++;
135                 }
136
137                 mem.device_pointer = tmp;
138         }
139
140         void mem_zero(device_memory& mem)
141         {
142                 device_ptr tmp = mem.device_pointer;
143
144                 foreach(SubDevice& sub, devices) {
145                         mem.device_pointer = sub.ptr_map[tmp];
146                         sub.device->mem_zero(mem);
147                 }
148
149                 mem.device_pointer = tmp;
150         }
151
152         void mem_free(device_memory& mem)
153         {
154                 device_ptr tmp = mem.device_pointer;
155
156                 foreach(SubDevice& sub, devices) {
157                         mem.device_pointer = sub.ptr_map[tmp];
158                         sub.device->mem_free(mem);
159                         sub.ptr_map.erase(sub.ptr_map.find(tmp));
160                 }
161
162                 mem.device_pointer = 0;
163         }
164
165         void const_copy_to(const char *name, void *host, size_t size)
166         {
167                 foreach(SubDevice& sub, devices)
168                         sub.device->const_copy_to(name, host, size);
169         }
170
171         void tex_alloc(const char *name, device_memory& mem, InterpolationType interpolation, bool periodic)
172         {
173                 foreach(SubDevice& sub, devices) {
174                         mem.device_pointer = 0;
175                         sub.device->tex_alloc(name, mem, interpolation, periodic);
176                         sub.ptr_map[unique_ptr] = mem.device_pointer;
177                 }
178
179                 mem.device_pointer = unique_ptr++;
180         }
181
182         void tex_free(device_memory& mem)
183         {
184                 device_ptr tmp = mem.device_pointer;
185
186                 foreach(SubDevice& sub, devices) {
187                         mem.device_pointer = sub.ptr_map[tmp];
188                         sub.device->tex_free(mem);
189                         sub.ptr_map.erase(sub.ptr_map.find(tmp));
190                 }
191
192                 mem.device_pointer = 0;
193         }
194
195         void pixels_alloc(device_memory& mem)
196         {
197                 foreach(SubDevice& sub, devices) {
198                         mem.device_pointer = 0;
199                         sub.device->pixels_alloc(mem);
200                         sub.ptr_map[unique_ptr] = mem.device_pointer;
201                 }
202
203                 mem.device_pointer = unique_ptr++;
204         }
205
206         void pixels_free(device_memory& mem)
207         {
208                 device_ptr tmp = mem.device_pointer;
209
210                 foreach(SubDevice& sub, devices) {
211                         mem.device_pointer = sub.ptr_map[tmp];
212                         sub.device->pixels_free(mem);
213                         sub.ptr_map.erase(sub.ptr_map.find(tmp));
214                 }
215
216                 mem.device_pointer = 0;
217         }
218
219         void pixels_copy_from(device_memory& mem, int y, int w, int h)
220         {
221                 device_ptr tmp = mem.device_pointer;
222                 int i = 0, sub_h = h/devices.size();
223
224                 foreach(SubDevice& sub, devices) {
225                         int sy = y + i*sub_h;
226                         int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
227
228                         mem.device_pointer = sub.ptr_map[tmp];
229                         sub.device->pixels_copy_from(mem, sy, w, sh);
230                         i++;
231                 }
232
233                 mem.device_pointer = tmp;
234         }
235
236         void draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int width, int height, bool transparent,
237                 const DeviceDrawParams &draw_params)
238         {
239                 device_ptr tmp = rgba.device_pointer;
240                 int i = 0, sub_h = h/devices.size();
241                 int sub_height = height/devices.size();
242
243                 foreach(SubDevice& sub, devices) {
244                         int sy = y + i*sub_h;
245                         int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
246                         int sheight = (i == (int)devices.size() - 1)? height - sub_height*i: sub_height;
247                         int sdy = dy + i*sub_height;
248                         /* adjust math for w/width */
249
250                         rgba.device_pointer = sub.ptr_map[tmp];
251                         sub.device->draw_pixels(rgba, sy, w, sh, sdy, width, sheight, transparent, draw_params);
252                         i++;
253                 }
254
255                 rgba.device_pointer = tmp;
256         }
257
258         void map_tile(Device *sub_device, RenderTile& tile)
259         {
260                 foreach(SubDevice& sub, devices) {
261                         if(sub.device == sub_device) {
262                                 if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer];
263                                 if(tile.rng_state) tile.rng_state = sub.ptr_map[tile.rng_state];
264                         }
265                 }
266         }
267
268         int device_number(Device *sub_device)
269         {
270                 int i = 0;
271
272                 foreach(SubDevice& sub, devices) {
273                         if(sub.device == sub_device)
274                                 return i;
275                         i++;
276                 }
277
278                 return -1;
279         }
280
281         void task_add(DeviceTask& task)
282         {
283                 list<DeviceTask> tasks;
284                 task.split(tasks, devices.size());
285
286                 foreach(SubDevice& sub, devices) {
287                         if(!tasks.empty()) {
288                                 DeviceTask subtask = tasks.front();
289                                 tasks.pop_front();
290
291                                 if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer];
292                                 if(task.rgba_byte) subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
293                                 if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half];
294                                 if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
295                                 if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];
296
297                                 sub.device->task_add(subtask);
298                         }
299                 }
300         }
301
302         void task_wait()
303         {
304                 foreach(SubDevice& sub, devices)
305                         sub.device->task_wait();
306         }
307
308         void task_cancel()
309         {
310                 foreach(SubDevice& sub, devices)
311                         sub.device->task_cancel();
312         }
313 };
314
315 Device *device_multi_create(DeviceInfo& info, Stats &stats, bool background)
316 {
317         return new MultiDevice(info, stats, background);
318 }
319
320 static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool with_display, bool with_advanced_shading, const char *id_fmt, int num)
321 {
322         DeviceInfo info;
323
324         /* create map to find duplicate descriptions */
325         map<string, int> dupli_map;
326         map<string, int>::iterator dt;
327         int num_added = 0, num_display = 0;
328
329         info.advanced_shading = with_advanced_shading;
330         info.pack_images = false;
331         info.extended_images = true;
332
333         foreach(DeviceInfo& subinfo, devices) {
334                 if(subinfo.type == type) {
335                         if(subinfo.advanced_shading != info.advanced_shading)
336                                 continue;
337                         if(subinfo.display_device) {
338                                 if(with_display)
339                                         num_display++;
340                                 else
341                                         continue;
342                         }
343
344                         string key = subinfo.description;
345
346                         if(dupli_map.find(key) == dupli_map.end())
347                                 dupli_map[key] = 1;
348                         else
349                                 dupli_map[key]++;
350
351                         info.multi_devices.push_back(subinfo);
352                         if(subinfo.display_device)
353                                 info.display_device = true;
354                         info.pack_images = info.pack_images || subinfo.pack_images;
355                         info.extended_images = info.extended_images && subinfo.extended_images;
356                         num_added++;
357                 }
358         }
359
360         if(num_added <= 1 || (with_display && num_display == 0))
361                 return false;
362
363         /* generate string */
364         stringstream desc;
365         vector<string> last_tokens;
366         bool first = true;
367
368         for(dt = dupli_map.begin(); dt != dupli_map.end(); dt++) {
369                 if(!first) desc << " + ";
370                 first = false;
371
372                 /* get name and count */
373                 string name = dt->first;
374                 int count = dt->second;
375
376                 /* strip common prefixes */
377                 vector<string> tokens;
378                 string_split(tokens, dt->first);
379
380                 if(tokens.size() > 1) {
381                         int i;
382
383                         for(i = 0; i < tokens.size() && i < last_tokens.size(); i++)
384                                 if(tokens[i] != last_tokens[i])
385                                         break;
386
387                         name = "";
388                         for(; i < tokens.size(); i++) {
389                                 name += tokens[i];
390                                 if(i != tokens.size() - 1)
391                                         name += " ";
392                         }
393                 }
394
395                 last_tokens = tokens;
396
397                 /* add */
398                 if(count > 1)
399                         desc << name << " (" << count << "x)";
400                 else
401                         desc << name;
402         }
403
404         /* add info */
405         info.type = DEVICE_MULTI;
406         info.description = desc.str();
407         info.id = string_printf(id_fmt, num);
408         info.display_device = with_display;
409         info.num = 0;
410
411         if(with_display)
412                 devices.push_back(info);
413         else
414                 devices.insert(devices.begin(), info);
415         
416         return true;
417 }
418
419 void device_multi_info(vector<DeviceInfo>& devices)
420 {
421         int num = 0;
422
423         if(!device_multi_add(devices, DEVICE_CUDA, false, true, "CUDA_MULTI_%d", num++))
424                 device_multi_add(devices, DEVICE_CUDA, false, false, "CUDA_MULTI_%d", num++);
425         if(!device_multi_add(devices, DEVICE_CUDA, true, true, "CUDA_MULTI_%d", num++))
426                 device_multi_add(devices, DEVICE_CUDA, true, false, "CUDA_MULTI_%d", num++);
427
428         num = 0;
429         if(!device_multi_add(devices, DEVICE_OPENCL, false, true, "OPENCL_MULTI_%d", num++))
430                 device_multi_add(devices, DEVICE_OPENCL, false, false, "OPENCL_MULTI_%d", num++);
431         if(!device_multi_add(devices, DEVICE_OPENCL, true, true, "OPENCL_MULTI_%d", num++))
432                 device_multi_add(devices, DEVICE_OPENCL, true, false, "OPENCL_MULTI_%d", num++);
433 }
434
435 CCL_NAMESPACE_END
436