Revert r50528: "Performance fix for Cycles: Don't wait in the main UI thread when...
[blender.git] / intern / cycles / device / device_cpu.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #include <stdlib.h>
20 #include <string.h>
21
22 #include "device.h"
23 #include "device_intern.h"
24
25 #include "kernel.h"
26 #include "kernel_types.h"
27
28 #include "osl_shader.h"
29
30 #include "buffers.h"
31
32 #include "util_debug.h"
33 #include "util_foreach.h"
34 #include "util_function.h"
35 #include "util_opengl.h"
36 #include "util_progress.h"
37 #include "util_system.h"
38 #include "util_thread.h"
39
40 CCL_NAMESPACE_BEGIN
41
42 class CPUDevice : public Device
43 {
44 public:
45         TaskPool task_pool;
46         KernelGlobals *kg;
47         
48         CPUDevice(int threads_num)
49         {
50                 kg = kernel_globals_create();
51
52                 /* do now to avoid thread issues */
53                 system_cpu_support_optimized();
54         }
55
56         ~CPUDevice()
57         {
58                 task_pool.stop();
59                 kernel_globals_free(kg);
60         }
61
62         bool support_advanced_shading()
63         {
64                 return true;
65         }
66
67         void mem_alloc(device_memory& mem, MemoryType type)
68         {
69                 mem.device_pointer = mem.data_pointer;
70         }
71
72         void mem_copy_to(device_memory& mem)
73         {
74                 /* no-op */
75         }
76
77         void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
78         {
79                 /* no-op */
80         }
81
82         void mem_zero(device_memory& mem)
83         {
84                 memset((void*)mem.device_pointer, 0, mem.memory_size());
85         }
86
87         void mem_free(device_memory& mem)
88         {
89                 mem.device_pointer = 0;
90         }
91
92         void const_copy_to(const char *name, void *host, size_t size)
93         {
94                 kernel_const_copy(kg, name, host, size);
95         }
96
97         void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
98         {
99                 kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
100                 mem.device_pointer = mem.data_pointer;
101         }
102
103         void tex_free(device_memory& mem)
104         {
105                 mem.device_pointer = 0;
106         }
107
108         void *osl_memory()
109         {
110 #ifdef WITH_OSL
111                 return kernel_osl_memory(kg);
112 #else
113                 return NULL;
114 #endif
115         }
116
117         void thread_run(DeviceTask *task)
118         {
119                 if(task->type == DeviceTask::PATH_TRACE)
120                         thread_path_trace(*task);
121                 else if(task->type == DeviceTask::TONEMAP)
122                         thread_tonemap(*task);
123                 else if(task->type == DeviceTask::SHADER)
124                         thread_shader(*task);
125         }
126
127         class CPUDeviceTask : public DeviceTask {
128         public:
129                 CPUDeviceTask(CPUDevice *device, DeviceTask& task)
130                 : DeviceTask(task)
131                 {
132                         run = function_bind(&CPUDevice::thread_run, device, this);
133                 }
134         };
135
136         void thread_path_trace(DeviceTask& task)
137         {
138                 if(task_pool.cancelled())
139                         return;
140
141 #ifdef WITH_OSL
142                 if(kernel_osl_use(kg))
143                         OSLShader::thread_init(kg);
144 #endif
145
146                 RenderTile tile;
147                 
148                 while(task.acquire_tile(this, tile)) {
149                         float *render_buffer = (float*)tile.buffer;
150                         uint *rng_state = (uint*)tile.rng_state;
151                         int start_sample = tile.start_sample;
152                         int end_sample = tile.start_sample + tile.num_samples;
153
154 #ifdef WITH_OPTIMIZED_KERNEL
155                         if(system_cpu_support_optimized()) {
156                                 for(int sample = start_sample; sample < end_sample; sample++) {
157                                         if (task.get_cancel() || task_pool.cancelled())
158                                                 break;
159
160                                         for(int y = tile.y; y < tile.y + tile.h; y++) {
161                                                 for(int x = tile.x; x < tile.x + tile.w; x++) {
162                                                         kernel_cpu_optimized_path_trace(kg, render_buffer, rng_state,
163                                                                 sample, x, y, tile.offset, tile.stride);
164                                                 }
165                                         }
166
167                                         tile.sample = sample + 1;
168
169                                         task.update_progress(tile);
170                                 }
171                         }
172                         else
173 #endif
174                         {
175                                 for(int sample = start_sample; sample < end_sample; sample++) {
176                                         if (task.get_cancel() || task_pool.cancelled())
177                                                 break;
178
179                                         for(int y = tile.y; y < tile.y + tile.h; y++) {
180                                                 for(int x = tile.x; x < tile.x + tile.w; x++) {
181                                                         kernel_cpu_path_trace(kg, render_buffer, rng_state,
182                                                                 sample, x, y, tile.offset, tile.stride);
183                                                 }
184                                         }
185
186                                         tile.sample = sample + 1;
187
188                                         task.update_progress(tile);
189                                 }
190                         }
191
192                         task.release_tile(tile);
193
194                         if(task_pool.cancelled())
195                                 break;
196                 }
197
198 #ifdef WITH_OSL
199                 if(kernel_osl_use(kg))
200                         OSLShader::thread_free(kg);
201 #endif
202         }
203
204         void thread_tonemap(DeviceTask& task)
205         {
206 #ifdef WITH_OPTIMIZED_KERNEL
207                 if(system_cpu_support_optimized()) {
208                         for(int y = task.y; y < task.y + task.h; y++)
209                                 for(int x = task.x; x < task.x + task.w; x++)
210                                         kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
211                                                 task.sample, task.resolution, x, y, task.offset, task.stride);
212                 }
213                 else
214 #endif
215                 {
216                         for(int y = task.y; y < task.y + task.h; y++)
217                                 for(int x = task.x; x < task.x + task.w; x++)
218                                         kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
219                                                 task.sample, task.resolution, x, y, task.offset, task.stride);
220                 }
221         }
222
223         void thread_shader(DeviceTask& task)
224         {
225 #ifdef WITH_OSL
226                 if(kernel_osl_use(kg))
227                         OSLShader::thread_init(kg);
228 #endif
229
230 #ifdef WITH_OPTIMIZED_KERNEL
231                 if(system_cpu_support_optimized()) {
232                         for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
233                                 kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
234
235                                 if(task_pool.cancelled())
236                                         break;
237                         }
238                 }
239                 else
240 #endif
241                 {
242                         for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
243                                 kernel_cpu_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
244
245                                 if(task_pool.cancelled())
246                                         break;
247                         }
248                 }
249
250 #ifdef WITH_OSL
251                 if(kernel_osl_use(kg))
252                         OSLShader::thread_free(kg);
253 #endif
254         }
255
256         void task_add(DeviceTask& task)
257         {
258                 /* split task into smaller ones, more than number of threads for uneven
259                  * workloads where some parts of the image render slower than others */
260                 list<DeviceTask> tasks;
261                 task.split(tasks, TaskScheduler::num_threads()+1);
262
263                 foreach(DeviceTask& task, tasks)
264                         task_pool.push(new CPUDeviceTask(this, task));
265         }
266
267         void task_wait()
268         {
269                 task_pool.wait_work();
270         }
271
272         void task_cancel()
273         {
274                 task_pool.cancel();
275         }
276 };
277
278 Device *device_cpu_create(DeviceInfo& info, int threads)
279 {
280         return new CPUDevice(threads);
281 }
282
283 void device_cpu_info(vector<DeviceInfo>& devices)
284 {
285         DeviceInfo info;
286
287         info.type = DEVICE_CPU;
288         info.description = system_cpu_brand_string();
289         info.id = "CPU";
290         info.num = 0;
291         info.advanced_shading = true;
292         info.pack_images = false;
293
294         devices.insert(devices.begin(), info);
295 }
296
297 CCL_NAMESPACE_END
298