519c458ffdf990118dd26f7de63b1dd462bd96ca
[blender.git] / intern / cycles / device / device_cpu.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #include <stdlib.h>
20 #include <string.h>
21
22 #include "device.h"
23 #include "device_intern.h"
24
25 #include "kernel.h"
26 #include "kernel_types.h"
27
28 #include "osl_shader.h"
29
30 #include "buffers.h"
31
32 #include "util_debug.h"
33 #include "util_foreach.h"
34 #include "util_function.h"
35 #include "util_opengl.h"
36 #include "util_progress.h"
37 #include "util_system.h"
38 #include "util_thread.h"
39
40 CCL_NAMESPACE_BEGIN
41
42 class CPUDevice : public Device
43 {
44 public:
45         TaskPool task_pool;
46         KernelGlobals *kg;
47         
48         CPUDevice(Stats &stats, int threads_num) : Device(stats)
49         {
50                 kg = kernel_globals_create();
51
52                 /* do now to avoid thread issues */
53                 system_cpu_support_optimized();
54         }
55
56         ~CPUDevice()
57         {
58                 task_pool.stop();
59                 kernel_globals_free(kg);
60         }
61
62         bool support_advanced_shading()
63         {
64                 return true;
65         }
66
67         void mem_alloc(device_memory& mem, MemoryType type)
68         {
69                 mem.device_pointer = mem.data_pointer;
70
71                 stats.mem_alloc(mem.memory_size());
72         }
73
74         void mem_copy_to(device_memory& mem)
75         {
76                 /* no-op */
77         }
78
79         void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
80         {
81                 /* no-op */
82         }
83
84         void mem_zero(device_memory& mem)
85         {
86                 memset((void*)mem.device_pointer, 0, mem.memory_size());
87         }
88
89         void mem_free(device_memory& mem)
90         {
91                 mem.device_pointer = 0;
92
93                 stats.mem_free(mem.memory_size());
94         }
95
96         void const_copy_to(const char *name, void *host, size_t size)
97         {
98                 kernel_const_copy(kg, name, host, size);
99         }
100
101         void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
102         {
103                 kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
104                 mem.device_pointer = mem.data_pointer;
105
106                 stats.mem_alloc(mem.memory_size());
107         }
108
109         void tex_free(device_memory& mem)
110         {
111                 mem.device_pointer = 0;
112
113                 stats.mem_free(mem.memory_size());
114         }
115
116         void *osl_memory()
117         {
118 #ifdef WITH_OSL
119                 return kernel_osl_memory(kg);
120 #else
121                 return NULL;
122 #endif
123         }
124
125         void thread_run(DeviceTask *task)
126         {
127                 if(task->type == DeviceTask::PATH_TRACE)
128                         thread_path_trace(*task);
129                 else if(task->type == DeviceTask::TONEMAP)
130                         thread_tonemap(*task);
131                 else if(task->type == DeviceTask::SHADER)
132                         thread_shader(*task);
133         }
134
135         class CPUDeviceTask : public DeviceTask {
136         public:
137                 CPUDeviceTask(CPUDevice *device, DeviceTask& task)
138                 : DeviceTask(task)
139                 {
140                         run = function_bind(&CPUDevice::thread_run, device, this);
141                 }
142         };
143
144         void thread_path_trace(DeviceTask& task)
145         {
146                 if(task_pool.cancelled()) {
147                         if(task.need_finish_queue == false)
148                                 return;
149                 }
150
151 #ifdef WITH_OSL
152                 if(kernel_osl_use(kg))
153                         OSLShader::thread_init(kg);
154 #endif
155
156                 RenderTile tile;
157                 
158                 while(task.acquire_tile(this, tile)) {
159                         float *render_buffer = (float*)tile.buffer;
160                         uint *rng_state = (uint*)tile.rng_state;
161                         int start_sample = tile.start_sample;
162                         int end_sample = tile.start_sample + tile.num_samples;
163
164 #ifdef WITH_OPTIMIZED_KERNEL
165                         if(system_cpu_support_optimized()) {
166                                 for(int sample = start_sample; sample < end_sample; sample++) {
167                                         if (task.get_cancel() || task_pool.cancelled()) {
168                                                 if(task.need_finish_queue == false)
169                                                         break;
170                                         }
171
172                                         for(int y = tile.y; y < tile.y + tile.h; y++) {
173                                                 for(int x = tile.x; x < tile.x + tile.w; x++) {
174                                                         kernel_cpu_optimized_path_trace(kg, render_buffer, rng_state,
175                                                                 sample, x, y, tile.offset, tile.stride);
176                                                 }
177                                         }
178
179                                         tile.sample = sample + 1;
180
181                                         task.update_progress(tile);
182                                 }
183                         }
184                         else
185 #endif
186                         {
187                                 for(int sample = start_sample; sample < end_sample; sample++) {
188                                         if (task.get_cancel() || task_pool.cancelled()) {
189                                                 if(task.need_finish_queue == false)
190                                                         break;
191                                         }
192
193                                         for(int y = tile.y; y < tile.y + tile.h; y++) {
194                                                 for(int x = tile.x; x < tile.x + tile.w; x++) {
195                                                         kernel_cpu_path_trace(kg, render_buffer, rng_state,
196                                                                 sample, x, y, tile.offset, tile.stride);
197                                                 }
198                                         }
199
200                                         tile.sample = sample + 1;
201
202                                         task.update_progress(tile);
203                                 }
204                         }
205
206                         task.release_tile(tile);
207
208                         if(task_pool.cancelled()) {
209                                 if(task.need_finish_queue == false)
210                                         break;
211                         }
212                 }
213
214 #ifdef WITH_OSL
215                 if(kernel_osl_use(kg))
216                         OSLShader::thread_free(kg);
217 #endif
218         }
219
220         void thread_tonemap(DeviceTask& task)
221         {
222 #ifdef WITH_OPTIMIZED_KERNEL
223                 if(system_cpu_support_optimized()) {
224                         for(int y = task.y; y < task.y + task.h; y++)
225                                 for(int x = task.x; x < task.x + task.w; x++)
226                                         kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
227                                                 task.sample, task.resolution, x, y, task.offset, task.stride);
228                 }
229                 else
230 #endif
231                 {
232                         for(int y = task.y; y < task.y + task.h; y++)
233                                 for(int x = task.x; x < task.x + task.w; x++)
234                                         kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
235                                                 task.sample, task.resolution, x, y, task.offset, task.stride);
236                 }
237         }
238
239         void thread_shader(DeviceTask& task)
240         {
241 #ifdef WITH_OSL
242                 if(kernel_osl_use(kg))
243                         OSLShader::thread_init(kg);
244 #endif
245
246 #ifdef WITH_OPTIMIZED_KERNEL
247                 if(system_cpu_support_optimized()) {
248                         for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
249                                 kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
250
251                                 if(task_pool.cancelled())
252                                         break;
253                         }
254                 }
255                 else
256 #endif
257                 {
258                         for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
259                                 kernel_cpu_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
260
261                                 if(task_pool.cancelled())
262                                         break;
263                         }
264                 }
265
266 #ifdef WITH_OSL
267                 if(kernel_osl_use(kg))
268                         OSLShader::thread_free(kg);
269 #endif
270         }
271
272         void task_add(DeviceTask& task)
273         {
274                 /* split task into smaller ones, more than number of threads for uneven
275                  * workloads where some parts of the image render slower than others */
276                 list<DeviceTask> tasks;
277                 task.split(tasks, TaskScheduler::num_threads()+1);
278
279                 foreach(DeviceTask& task, tasks)
280                         task_pool.push(new CPUDeviceTask(this, task));
281         }
282
283         void task_wait()
284         {
285                 task_pool.wait_work();
286         }
287
288         void task_cancel()
289         {
290                 task_pool.cancel();
291         }
292 };
293
294 Device *device_cpu_create(DeviceInfo& info, Stats &stats, int threads)
295 {
296         return new CPUDevice(stats, threads);
297 }
298
299 void device_cpu_info(vector<DeviceInfo>& devices)
300 {
301         DeviceInfo info;
302
303         info.type = DEVICE_CPU;
304         info.description = system_cpu_brand_string();
305         info.id = "CPU";
306         info.num = 0;
307         info.advanced_shading = true;
308         info.pack_images = false;
309
310         devices.insert(devices.begin(), info);
311 }
312
313 CCL_NAMESPACE_END
314