ec84047c44fe8e9fb88489de27a72f810b85cc91
[blender.git] / intern / cycles / device / device_cpu.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #include <stdlib.h>
20 #include <string.h>
21
22 #include "device.h"
23 #include "device_intern.h"
24
25 #include "kernel.h"
26 #include "kernel_types.h"
27
28 #include "osl_shader.h"
29
30 #include "util_debug.h"
31 #include "util_foreach.h"
32 #include "util_function.h"
33 #include "util_opengl.h"
34 #include "util_progress.h"
35 #include "util_system.h"
36 #include "util_thread.h"
37
38 CCL_NAMESPACE_BEGIN
39
40 class CPUDevice : public Device
41 {
42 public:
43         TaskPool task_pool;
44         KernelGlobals *kg;
45         
46         CPUDevice(int threads_num)
47         : task_pool(function_bind(&CPUDevice::thread_run, this, _1, _2))
48         {
49                 kg = kernel_globals_create();
50
51                 /* do now to avoid thread issues */
52                 system_cpu_support_optimized();
53         }
54
55         ~CPUDevice()
56         {
57                 task_pool.stop();
58                 kernel_globals_free(kg);
59         }
60
61         bool support_advanced_shading()
62         {
63                 return true;
64         }
65
66         void mem_alloc(device_memory& mem, MemoryType type)
67         {
68                 mem.device_pointer = mem.data_pointer;
69         }
70
71         void mem_copy_to(device_memory& mem)
72         {
73                 /* no-op */
74         }
75
76         void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
77         {
78                 /* no-op */
79         }
80
81         void mem_zero(device_memory& mem)
82         {
83                 memset((void*)mem.device_pointer, 0, mem.memory_size());
84         }
85
86         void mem_free(device_memory& mem)
87         {
88                 mem.device_pointer = 0;
89         }
90
91         void const_copy_to(const char *name, void *host, size_t size)
92         {
93                 kernel_const_copy(kg, name, host, size);
94         }
95
96         void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
97         {
98                 kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
99                 mem.device_pointer = mem.data_pointer;
100         }
101
102         void tex_free(device_memory& mem)
103         {
104                 mem.device_pointer = 0;
105         }
106
107         void *osl_memory()
108         {
109 #ifdef WITH_OSL
110                 return kernel_osl_memory(kg);
111 #else
112                 return NULL;
113 #endif
114         }
115
116         void thread_run(Task *task_, int thread_id)
117         {
118                 DeviceTask *task = (DeviceTask*)task_;
119
120                 if(task->type == DeviceTask::PATH_TRACE)
121                         thread_path_trace(*task);
122                 else if(task->type == DeviceTask::TONEMAP)
123                         thread_tonemap(*task);
124                 else if(task->type == DeviceTask::SHADER)
125                         thread_shader(*task);
126         }
127
128         void thread_path_trace(DeviceTask& task)
129         {
130                 if(task_pool.cancelled())
131                         return;
132
133 #ifdef WITH_OSL
134                 if(kernel_osl_use(kg))
135                         OSLShader::thread_init(kg);
136 #endif
137
138 #ifdef WITH_OPTIMIZED_KERNEL
139                 if(system_cpu_support_optimized()) {
140                         for(int y = task.y; y < task.y + task.h; y++) {
141                                 for(int x = task.x; x < task.x + task.w; x++)
142                                         kernel_cpu_optimized_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state,
143                                                 task.sample, x, y, task.offset, task.stride);
144
145                                 if(task_pool.cancelled())
146                                         break;
147                         }
148                 }
149                 else
150 #endif
151                 {
152                         for(int y = task.y; y < task.y + task.h; y++) {
153                                 for(int x = task.x; x < task.x + task.w; x++)
154                                         kernel_cpu_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state,
155                                                 task.sample, x, y, task.offset, task.stride);
156
157                                 if(task_pool.cancelled())
158                                         break;
159                         }
160                 }
161
162 #ifdef WITH_OSL
163                 if(kernel_osl_use(kg))
164                         OSLShader::thread_free(kg);
165 #endif
166         }
167
168         void thread_tonemap(DeviceTask& task)
169         {
170 #ifdef WITH_OPTIMIZED_KERNEL
171                 if(system_cpu_support_optimized()) {
172                         for(int y = task.y; y < task.y + task.h; y++)
173                                 for(int x = task.x; x < task.x + task.w; x++)
174                                         kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
175                                                 task.sample, task.resolution, x, y, task.offset, task.stride);
176                 }
177                 else
178 #endif
179                 {
180                         for(int y = task.y; y < task.y + task.h; y++)
181                                 for(int x = task.x; x < task.x + task.w; x++)
182                                         kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
183                                                 task.sample, task.resolution, x, y, task.offset, task.stride);
184                 }
185         }
186
187         void thread_shader(DeviceTask& task)
188         {
189 #ifdef WITH_OSL
190                 if(kernel_osl_use(kg))
191                         OSLShader::thread_init(kg);
192 #endif
193
194 #ifdef WITH_OPTIMIZED_KERNEL
195                 if(system_cpu_support_optimized()) {
196                         for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
197                                 kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
198
199                                 if(task_pool.cancelled())
200                                         break;
201                         }
202                 }
203                 else
204 #endif
205                 {
206                         for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
207                                 kernel_cpu_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
208
209                                 if(task_pool.cancelled())
210                                         break;
211                         }
212                 }
213
214 #ifdef WITH_OSL
215                 if(kernel_osl_use(kg))
216                         OSLShader::thread_free(kg);
217 #endif
218         }
219
220         void task_add(DeviceTask& task)
221         {
222                 /* split task into smaller ones, more than number of threads for uneven
223                    workloads where some parts of the image render slower than others */
224                 list<DeviceTask> tasks;
225
226                 task.split(tasks, TaskScheduler::num_threads()*10);
227
228                 foreach(DeviceTask& task, tasks)
229                         task_pool.push(new DeviceTask(task));
230         }
231
232         void task_wait()
233         {
234                 task_pool.wait();
235         }
236
237         void task_cancel()
238         {
239                 task_pool.cancel();
240         }
241 };
242
243 Device *device_cpu_create(DeviceInfo& info, int threads)
244 {
245         return new CPUDevice(threads);
246 }
247
248 void device_cpu_info(vector<DeviceInfo>& devices)
249 {
250         DeviceInfo info;
251
252         info.type = DEVICE_CPU;
253         info.description = system_cpu_brand_string();
254         info.id = "CPU";
255         info.num = 0;
256         info.advanced_shading = true;
257
258         devices.insert(devices.begin(), info);
259 }
260
261 CCL_NAMESPACE_END
262