2 * Copyright 2011, Blender Foundation.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software Foundation,
16 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 #include "device_intern.h"
26 #include "kernel_types.h"
28 #include "osl_shader.h"
32 #include "util_debug.h"
33 #include "util_foreach.h"
34 #include "util_function.h"
35 #include "util_opengl.h"
36 #include "util_progress.h"
37 #include "util_system.h"
38 #include "util_thread.h"
42 class CPUDevice : public Device
48 CPUDevice(Stats &stats, int threads_num) : Device(stats)
50 kg = kernel_globals_create();
52 /* do now to avoid thread issues */
53 system_cpu_support_optimized();
59 kernel_globals_free(kg);
62 bool support_advanced_shading()
67 void mem_alloc(device_memory& mem, MemoryType type)
69 mem.device_pointer = mem.data_pointer;
71 stats.mem_alloc(mem.memory_size());
74 void mem_copy_to(device_memory& mem)
79 void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
84 void mem_zero(device_memory& mem)
86 memset((void*)mem.device_pointer, 0, mem.memory_size());
89 void mem_free(device_memory& mem)
91 mem.device_pointer = 0;
93 stats.mem_free(mem.memory_size());
96 void const_copy_to(const char *name, void *host, size_t size)
98 kernel_const_copy(kg, name, host, size);
101 void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
103 kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
104 mem.device_pointer = mem.data_pointer;
106 stats.mem_alloc(mem.memory_size());
109 void tex_free(device_memory& mem)
111 mem.device_pointer = 0;
113 stats.mem_free(mem.memory_size());
119 return kernel_osl_memory(kg);
125 void thread_run(DeviceTask *task)
127 if(task->type == DeviceTask::PATH_TRACE)
128 thread_path_trace(*task);
129 else if(task->type == DeviceTask::TONEMAP)
130 thread_tonemap(*task);
131 else if(task->type == DeviceTask::SHADER)
132 thread_shader(*task);
135 class CPUDeviceTask : public DeviceTask {
137 CPUDeviceTask(CPUDevice *device, DeviceTask& task)
140 run = function_bind(&CPUDevice::thread_run, device, this);
144 void thread_path_trace(DeviceTask& task)
146 if(task_pool.cancelled()) {
147 if(task.need_finish_queue == false)
152 if(kernel_osl_use(kg))
153 OSLShader::thread_init(kg);
158 while(task.acquire_tile(this, tile)) {
159 float *render_buffer = (float*)tile.buffer;
160 uint *rng_state = (uint*)tile.rng_state;
161 int start_sample = tile.start_sample;
162 int end_sample = tile.start_sample + tile.num_samples;
164 #ifdef WITH_OPTIMIZED_KERNEL
165 if(system_cpu_support_optimized()) {
166 for(int sample = start_sample; sample < end_sample; sample++) {
167 if (task.get_cancel() || task_pool.cancelled()) {
168 if(task.need_finish_queue == false)
172 for(int y = tile.y; y < tile.y + tile.h; y++) {
173 for(int x = tile.x; x < tile.x + tile.w; x++) {
174 kernel_cpu_optimized_path_trace(kg, render_buffer, rng_state,
175 sample, x, y, tile.offset, tile.stride);
179 tile.sample = sample + 1;
181 task.update_progress(tile);
187 for(int sample = start_sample; sample < end_sample; sample++) {
188 if (task.get_cancel() || task_pool.cancelled()) {
189 if(task.need_finish_queue == false)
193 for(int y = tile.y; y < tile.y + tile.h; y++) {
194 for(int x = tile.x; x < tile.x + tile.w; x++) {
195 kernel_cpu_path_trace(kg, render_buffer, rng_state,
196 sample, x, y, tile.offset, tile.stride);
200 tile.sample = sample + 1;
202 task.update_progress(tile);
206 task.release_tile(tile);
208 if(task_pool.cancelled()) {
209 if(task.need_finish_queue == false)
215 if(kernel_osl_use(kg))
216 OSLShader::thread_free(kg);
220 void thread_tonemap(DeviceTask& task)
222 #ifdef WITH_OPTIMIZED_KERNEL
223 if(system_cpu_support_optimized()) {
224 for(int y = task.y; y < task.y + task.h; y++)
225 for(int x = task.x; x < task.x + task.w; x++)
226 kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
227 task.sample, task.resolution, x, y, task.offset, task.stride);
232 for(int y = task.y; y < task.y + task.h; y++)
233 for(int x = task.x; x < task.x + task.w; x++)
234 kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
235 task.sample, task.resolution, x, y, task.offset, task.stride);
239 void thread_shader(DeviceTask& task)
242 if(kernel_osl_use(kg))
243 OSLShader::thread_init(kg);
246 #ifdef WITH_OPTIMIZED_KERNEL
247 if(system_cpu_support_optimized()) {
248 for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
249 kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
251 if(task_pool.cancelled())
258 for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
259 kernel_cpu_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
261 if(task_pool.cancelled())
267 if(kernel_osl_use(kg))
268 OSLShader::thread_free(kg);
272 void task_add(DeviceTask& task)
274 /* split task into smaller ones, more than number of threads for uneven
275 * workloads where some parts of the image render slower than others */
276 list<DeviceTask> tasks;
277 task.split(tasks, TaskScheduler::num_threads()+1);
279 foreach(DeviceTask& task, tasks)
280 task_pool.push(new CPUDeviceTask(this, task));
285 task_pool.wait_work();
294 Device *device_cpu_create(DeviceInfo& info, Stats &stats, int threads)
296 return new CPUDevice(stats, threads);
299 void device_cpu_info(vector<DeviceInfo>& devices)
303 info.type = DEVICE_CPU;
304 info.description = system_cpu_brand_string();
307 info.advanced_shading = true;
308 info.pack_images = false;
310 devices.insert(devices.begin(), info);