Cycles: multi GPU rendering support.
[blender.git] / intern / cycles / device / device_cpu.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #include <stdlib.h>
20 #include <string.h>
21
22 #include "device.h"
23 #include "device_intern.h"
24
25 #include "kernel.h"
26 #include "kernel_types.h"
27
28 #include "osl_shader.h"
29
30 #include "util_debug.h"
31 #include "util_foreach.h"
32 #include "util_function.h"
33 #include "util_opengl.h"
34 #include "util_progress.h"
35 #include "util_system.h"
36 #include "util_thread.h"
37
38 CCL_NAMESPACE_BEGIN
39
40 class CPUDevice : public Device
41 {
42 public:
43         vector<thread*> threads;
44         ThreadQueue<DeviceTask> tasks;
45         KernelGlobals *kg;
46         
47         CPUDevice(int threads_num)
48         {
49                 kg = kernel_globals_create();
50
51                 /* do now to avoid thread issues */
52                 system_cpu_support_optimized();
53
54                 if(threads_num == 0)
55                         threads_num = system_cpu_thread_count();
56
57                 threads.resize(threads_num);
58
59                 for(size_t i = 0; i < threads.size(); i++)
60                         threads[i] = new thread(function_bind(&CPUDevice::thread_run, this, i));
61         }
62
63         ~CPUDevice()
64         {
65                 tasks.stop();
66
67                 foreach(thread *t, threads) {
68                         t->join();
69                         delete t;
70                 }
71
72                 kernel_globals_free(kg);
73         }
74
75         bool support_full_kernel()
76         {
77                 return true;
78         }
79
80         string description()
81         {
82                 return system_cpu_brand_string();
83         }
84
85         void mem_alloc(device_memory& mem, MemoryType type)
86         {
87                 mem.device_pointer = mem.data_pointer;
88         }
89
90         void mem_copy_to(device_memory& mem)
91         {
92                 /* no-op */
93         }
94
95         void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
96         {
97                 /* no-op */
98         }
99
100         void mem_zero(device_memory& mem)
101         {
102                 memset((void*)mem.device_pointer, 0, mem.memory_size());
103         }
104
105         void mem_free(device_memory& mem)
106         {
107                 mem.device_pointer = 0;
108         }
109
110         void const_copy_to(const char *name, void *host, size_t size)
111         {
112                 kernel_const_copy(kg, name, host, size);
113         }
114
115         void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
116         {
117                 kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
118                 mem.device_pointer = mem.data_pointer;
119         }
120
121         void tex_free(device_memory& mem)
122         {
123                 mem.device_pointer = 0;
124         }
125
126         void *osl_memory()
127         {
128 #ifdef WITH_OSL
129                 return kernel_osl_memory(kg);
130 #else
131                 return NULL;
132 #endif
133         }
134
135         void thread_run(int t)
136         {
137                 DeviceTask task;
138
139                 while(tasks.worker_wait_pop(task)) {
140                         if(task.type == DeviceTask::PATH_TRACE)
141                                 thread_path_trace(task);
142                         else if(task.type == DeviceTask::TONEMAP)
143                                 thread_tonemap(task);
144                         else if(task.type == DeviceTask::SHADER)
145                                 thread_shader(task);
146
147                         tasks.worker_done();
148                 }
149         }
150
151         void thread_path_trace(DeviceTask& task)
152         {
153                 if(tasks.worker_cancel())
154                         return;
155
156 #ifdef WITH_OSL
157                 if(kernel_osl_use(kg))
158                         OSLShader::thread_init(kg);
159 #endif
160
161 #ifdef WITH_OPTIMIZED_KERNEL
162                 if(system_cpu_support_optimized()) {
163                         for(int y = task.y; y < task.y + task.h; y++) {
164                                 for(int x = task.x; x < task.x + task.w; x++)
165                                         kernel_cpu_optimized_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state,
166                                                 task.sample, x, y, task.offset, task.stride);
167
168                                 if(tasks.worker_cancel())
169                                         break;
170                         }
171                 }
172                 else
173 #endif
174                 {
175                         for(int y = task.y; y < task.y + task.h; y++) {
176                                 for(int x = task.x; x < task.x + task.w; x++)
177                                         kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state,
178                                                 task.sample, x, y, task.offset, task.stride);
179
180                                 if(tasks.worker_cancel())
181                                         break;
182                         }
183                 }
184
185 #ifdef WITH_OSL
186                 if(kernel_osl_use(kg))
187                         OSLShader::thread_free(kg);
188 #endif
189         }
190
191         void thread_tonemap(DeviceTask& task)
192         {
193 #ifdef WITH_OPTIMIZED_KERNEL
194                 if(system_cpu_support_optimized()) {
195                         for(int y = task.y; y < task.y + task.h; y++)
196                                 for(int x = task.x; x < task.x + task.w; x++)
197                                         kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer,
198                                                 task.sample, task.resolution, x, y, task.offset, task.stride);
199                 }
200                 else
201 #endif
202                 {
203                         for(int y = task.y; y < task.y + task.h; y++)
204                                 for(int x = task.x; x < task.x + task.w; x++)
205                                         kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer,
206                                                 task.sample, task.resolution, x, y, task.offset, task.stride);
207                 }
208         }
209
210         void thread_shader(DeviceTask& task)
211         {
212 #ifdef WITH_OSL
213                 if(kernel_osl_use(kg))
214                         OSLShader::thread_init(kg);
215 #endif
216
217 #ifdef WITH_OPTIMIZED_KERNEL
218                 if(system_cpu_support_optimized()) {
219                         for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
220                                 kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float3*)task.shader_output, task.shader_eval_type, x);
221
222                                 if(tasks.worker_cancel())
223                                         break;
224                         }
225                 }
226                 else
227 #endif
228                 {
229                         for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
230                                 kernel_cpu_shader(kg, (uint4*)task.shader_input, (float3*)task.shader_output, task.shader_eval_type, x);
231
232                                 if(tasks.worker_cancel())
233                                         break;
234                         }
235                 }
236
237 #ifdef WITH_OSL
238                 if(kernel_osl_use(kg))
239                         OSLShader::thread_free(kg);
240 #endif
241         }
242
243         void task_add(DeviceTask& task)
244         {
245                 /* split task into smaller ones, more than number of threads for uneven
246                    workloads where some parts of the image render slower than others */
247                 task.split(tasks, threads.size()*10);
248         }
249
250         void task_wait()
251         {
252                 tasks.wait_done();
253         }
254
255         void task_cancel()
256         {
257                 tasks.cancel();
258         }
259 };
260
261 Device *device_cpu_create(DeviceInfo& info, int threads)
262 {
263         return new CPUDevice(threads);
264 }
265
266 void device_cpu_info(vector<DeviceInfo>& devices)
267 {
268         DeviceInfo info;
269
270         info.type = DEVICE_CPU;
271         info.description = system_cpu_brand_string();
272         info.id = "CPU";
273         info.num = 0;
274
275         devices.push_back(info);
276 }
277
278 CCL_NAMESPACE_END
279