Merging r48971 through r48980 from trunk into soc-2011-tomato
[blender.git] / intern / cycles / device / device_cpu.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #include <stdlib.h>
20 #include <string.h>
21
22 #include "device.h"
23 #include "device_intern.h"
24
25 #include "kernel.h"
26 #include "kernel_types.h"
27
28 #include "osl_shader.h"
29
30 #include "buffers.h"
31
32 #include "util_debug.h"
33 #include "util_foreach.h"
34 #include "util_function.h"
35 #include "util_opengl.h"
36 #include "util_progress.h"
37 #include "util_system.h"
38 #include "util_thread.h"
39
40 CCL_NAMESPACE_BEGIN
41
42 class CPUDevice : public Device
43 {
44 public:
45         TaskPool task_pool;
46         KernelGlobals *kg;
47         
48         CPUDevice(int threads_num)
49         {
50                 kg = kernel_globals_create();
51
52                 /* do now to avoid thread issues */
53                 system_cpu_support_optimized();
54         }
55
56         ~CPUDevice()
57         {
58                 task_pool.stop();
59                 kernel_globals_free(kg);
60         }
61
62         bool support_advanced_shading()
63         {
64                 return true;
65         }
66
67         void mem_alloc(device_memory& mem, MemoryType type)
68         {
69                 mem.device_pointer = mem.data_pointer;
70         }
71
72         void mem_copy_to(device_memory& mem)
73         {
74                 /* no-op */
75         }
76
77         void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
78         {
79                 /* no-op */
80         }
81
82         void mem_zero(device_memory& mem)
83         {
84                 memset((void*)mem.device_pointer, 0, mem.memory_size());
85         }
86
87         void mem_free(device_memory& mem)
88         {
89                 mem.device_pointer = 0;
90         }
91
92         void const_copy_to(const char *name, void *host, size_t size)
93         {
94                 kernel_const_copy(kg, name, host, size);
95         }
96
97         void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
98         {
99                 kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
100                 mem.device_pointer = mem.data_pointer;
101         }
102
103         void tex_free(device_memory& mem)
104         {
105                 mem.device_pointer = 0;
106         }
107
108         void *osl_memory()
109         {
110 #ifdef WITH_OSL
111                 return kernel_osl_memory(kg);
112 #else
113                 return NULL;
114 #endif
115         }
116
117         void thread_run(DeviceTask *task)
118         {
119                 if(task->type == DeviceTask::PATH_TRACE)
120                         thread_path_trace(*task);
121                 else if(task->type == DeviceTask::TONEMAP)
122                         thread_tonemap(*task);
123                 else if(task->type == DeviceTask::SHADER)
124                         thread_shader(*task);
125         }
126
127         class CPUDeviceTask : public DeviceTask {
128         public:
129                 CPUDeviceTask(CPUDevice *device, DeviceTask& task)
130                 : DeviceTask(task)
131                 {
132                         run = function_bind(&CPUDevice::thread_run, device, this);
133                 }
134         };
135
136         void thread_path_trace(DeviceTask& task)
137         {
138                 if(task_pool.cancelled())
139                         return;
140
141 #ifdef WITH_OSL
142                 if(kernel_osl_use(kg))
143                         OSLShader::thread_init(kg);
144 #endif
145
146                 RenderTile tile;
147                 
148                 while(task.acquire_tile(this, tile)) {
149                         float *render_buffer = (float*)tile.buffer;
150                         uint *rng_state = (uint*)tile.rng_state;
151                         int start_sample = tile.start_sample;
152                         int end_sample = tile.start_sample + tile.num_samples;
153
154 #ifdef WITH_OPTIMIZED_KERNEL
155                         if(system_cpu_support_optimized()) {
156                                 for(int y = tile.y; y < tile.y + tile.h; y++) {
157                                         for(int x = tile.x; x < tile.x + tile.w; x++)
158                                                 for(int sample = start_sample; sample < end_sample; sample++)
159                                                         kernel_cpu_optimized_path_trace(kg, render_buffer, rng_state,
160                                                                 sample, x, y, tile.offset, tile.stride);
161
162                                         if(task_pool.cancelled())
163                                                 break;
164                                 }
165                         }
166                         else
167 #endif
168                         {
169                                 for(int y = tile.y; y < tile.y + tile.h; y++) {
170                                         for(int x = tile.x; x < tile.x + tile.w; x++)
171                                                 for(int sample = start_sample; sample < end_sample; sample++)
172                                                         kernel_cpu_path_trace(kg, render_buffer, rng_state,
173                                                                 sample, x, y, tile.offset, tile.stride);
174
175                                         if(task_pool.cancelled())
176                                                 break;
177                                 }
178                         }
179
180                         task.release_tile(tile);
181
182                         if(task_pool.cancelled())
183                                 break;
184                 }
185
186 #ifdef WITH_OSL
187                 if(kernel_osl_use(kg))
188                         OSLShader::thread_free(kg);
189 #endif
190         }
191
192         void thread_tonemap(DeviceTask& task)
193         {
194 #ifdef WITH_OPTIMIZED_KERNEL
195                 if(system_cpu_support_optimized()) {
196                         for(int y = task.y; y < task.y + task.h; y++)
197                                 for(int x = task.x; x < task.x + task.w; x++)
198                                         kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
199                                                 task.sample, task.resolution, x, y, task.offset, task.stride);
200                 }
201                 else
202 #endif
203                 {
204                         for(int y = task.y; y < task.y + task.h; y++)
205                                 for(int x = task.x; x < task.x + task.w; x++)
206                                         kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
207                                                 task.sample, task.resolution, x, y, task.offset, task.stride);
208                 }
209         }
210
211         void thread_shader(DeviceTask& task)
212         {
213 #ifdef WITH_OSL
214                 if(kernel_osl_use(kg))
215                         OSLShader::thread_init(kg);
216 #endif
217
218 #ifdef WITH_OPTIMIZED_KERNEL
219                 if(system_cpu_support_optimized()) {
220                         for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
221                                 kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
222
223                                 if(task_pool.cancelled())
224                                         break;
225                         }
226                 }
227                 else
228 #endif
229                 {
230                         for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
231                                 kernel_cpu_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
232
233                                 if(task_pool.cancelled())
234                                         break;
235                         }
236                 }
237
238 #ifdef WITH_OSL
239                 if(kernel_osl_use(kg))
240                         OSLShader::thread_free(kg);
241 #endif
242         }
243
244         void task_add(DeviceTask& task)
245         {
246                 /* split task into smaller ones, more than number of threads for uneven
247                  * workloads where some parts of the image render slower than others */
248                 list<DeviceTask> tasks;
249                 task.split(tasks, TaskScheduler::num_threads()+1);
250
251                 foreach(DeviceTask& task, tasks)
252                         task_pool.push(new CPUDeviceTask(this, task));
253         }
254
255         void task_wait()
256         {
257                 task_pool.wait_work();
258         }
259
260         void task_cancel()
261         {
262                 task_pool.cancel();
263         }
264 };
265
266 Device *device_cpu_create(DeviceInfo& info, int threads)
267 {
268         return new CPUDevice(threads);
269 }
270
271 void device_cpu_info(vector<DeviceInfo>& devices)
272 {
273         DeviceInfo info;
274
275         info.type = DEVICE_CPU;
276         info.description = system_cpu_brand_string();
277         info.id = "CPU";
278         info.num = 0;
279         info.advanced_shading = true;
280         info.pack_images = false;
281
282         devices.insert(devices.begin(), info);
283 }
284
285 CCL_NAMESPACE_END
286