2409cc65998dea57e5749a27f4c97c3599e1c8fc
[blender.git] / intern / cycles / device / device_cpu.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #include <stdlib.h>
20 #include <string.h>
21
22 #include "device.h"
23 #include "device_intern.h"
24
25 #include "kernel.h"
26 #include "kernel_types.h"
27
28 #include "osl_shader.h"
29
30 #include "util_debug.h"
31 #include "util_foreach.h"
32 #include "util_function.h"
33 #include "util_opengl.h"
34 #include "util_progress.h"
35 #include "util_system.h"
36 #include "util_thread.h"
37
38 CCL_NAMESPACE_BEGIN
39
40 class CPUDevice : public Device
41 {
42 public:
43         vector<thread*> threads;
44         ThreadQueue<DeviceTask> tasks;
45         KernelGlobals *kg;
46         
47         CPUDevice(int threads_num)
48         {
49                 kg = kernel_globals_create();
50
51                 if(threads_num == 0)
52                         threads_num = system_cpu_thread_count();
53
54                 threads.resize(threads_num);
55
56                 for(size_t i = 0; i < threads.size(); i++)
57                         threads[i] = new thread(function_bind(&CPUDevice::thread_run, this, i));
58         }
59
60         ~CPUDevice()
61         {
62                 tasks.stop();
63
64                 foreach(thread *t, threads) {
65                         t->join();
66                         delete t;
67                 }
68
69                 kernel_globals_free(kg);
70         }
71
72         string description()
73         {
74                 return system_cpu_brand_string();
75         }
76
77         void mem_alloc(device_memory& mem, MemoryType type)
78         {
79                 mem.device_pointer = mem.data_pointer;
80         }
81
82         void mem_copy_to(device_memory& mem)
83         {
84                 /* no-op */
85         }
86
87         void mem_copy_from(device_memory& mem, size_t offset, size_t size)
88         {
89                 /* no-op */
90         }
91
92         void mem_zero(device_memory& mem)
93         {
94                 memset((void*)mem.device_pointer, 0, mem.memory_size());
95         }
96
97         void mem_free(device_memory& mem)
98         {
99                 mem.device_pointer = 0;
100         }
101
102         void const_copy_to(const char *name, void *host, size_t size)
103         {
104                 kernel_const_copy(kg, name, host, size);
105         }
106
107         void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
108         {
109                 kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
110                 mem.device_pointer = mem.data_pointer;
111         }
112
113         void tex_free(device_memory& mem)
114         {
115                 mem.device_pointer = 0;
116         }
117
118         void *osl_memory()
119         {
120 #ifdef WITH_OSL
121                 return kernel_osl_memory(kg);
122 #else
123                 return NULL;
124 #endif
125         }
126
127         void thread_run(int t)
128         {
129                 DeviceTask task;
130
131                 while(tasks.worker_wait_pop(task)) {
132                         if(task.type == DeviceTask::PATH_TRACE)
133                                 thread_path_trace(task);
134                         else if(task.type == DeviceTask::TONEMAP)
135                                 thread_tonemap(task);
136                         else if(task.type == DeviceTask::DISPLACE)
137                                 thread_displace(task);
138
139                         tasks.worker_done();
140                 }
141         }
142
143         void thread_path_trace(DeviceTask& task)
144         {
145                 if(tasks.worker_cancel())
146                         return;
147
148 #ifdef WITH_OSL
149                 if(kernel_osl_use(kg))
150                         OSLShader::thread_init(kg);
151 #endif
152
153                 for(int y = task.y; y < task.y + task.h; y++) {
154                         for(int x = task.x; x < task.x + task.w; x++)
155                                 kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y);
156
157                         if(tasks.worker_cancel())
158                                 break;
159                 }
160
161 #ifdef WITH_OSL
162                 if(kernel_osl_use(kg))
163                         OSLShader::thread_free(kg);
164 #endif
165         }
166
167         void thread_tonemap(DeviceTask& task)
168         {
169                 for(int y = task.y; y < task.y + task.h; y++) {
170                         for(int x = task.x; x < task.x + task.w; x++)
171                                 kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y);
172                 }
173         }
174
175         void thread_displace(DeviceTask& task)
176         {
177 #ifdef WITH_OSL
178                 if(kernel_osl_use(kg))
179                         OSLShader::thread_init(kg);
180 #endif
181
182                 for(int x = task.displace_x; x < task.displace_x + task.displace_w; x++) {
183                         kernel_cpu_displace(kg, (uint4*)task.displace_input, (float3*)task.displace_offset, x);
184
185                         if(tasks.worker_cancel())
186                                 break;
187                 }
188
189 #ifdef WITH_OSL
190                 if(kernel_osl_use(kg))
191                         OSLShader::thread_free(kg);
192 #endif
193         }
194
195         void task_add(DeviceTask& task)
196         {
197                 /* split task into smaller ones, more than number of threads for uneven
198                    workloads where some parts of the image render slower than others */
199                 task.split(tasks, threads.size()*10);
200         }
201
202         void task_wait()
203         {
204                 tasks.wait_done();
205         }
206
207         void task_cancel()
208         {
209                 tasks.cancel();
210         }
211 };
212
213 Device *device_cpu_create(int threads)
214 {
215         return new CPUDevice(threads);
216 }
217
218 CCL_NAMESPACE_END
219