Cycles: enable multi closure sampling and transparent shadows only on CPU and
[blender.git] / intern / cycles / device / device_cpu.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #include <stdlib.h>
20 #include <string.h>
21
22 #include "device.h"
23 #include "device_intern.h"
24
25 #include "kernel.h"
26 #include "kernel_types.h"
27
28 #include "osl_shader.h"
29
30 #include "util_debug.h"
31 #include "util_foreach.h"
32 #include "util_function.h"
33 #include "util_opengl.h"
34 #include "util_progress.h"
35 #include "util_system.h"
36 #include "util_thread.h"
37
38 CCL_NAMESPACE_BEGIN
39
40 class CPUDevice : public Device
41 {
42 public:
43         vector<thread*> threads;
44         ThreadQueue<DeviceTask> tasks;
45         KernelGlobals *kg;
46         
47         CPUDevice(int threads_num)
48         {
49                 kg = kernel_globals_create();
50
51                 if(threads_num == 0)
52                         threads_num = system_cpu_thread_count();
53
54                 threads.resize(threads_num);
55
56                 for(size_t i = 0; i < threads.size(); i++)
57                         threads[i] = new thread(function_bind(&CPUDevice::thread_run, this, i));
58         }
59
60         ~CPUDevice()
61         {
62                 tasks.stop();
63
64                 foreach(thread *t, threads) {
65                         t->join();
66                         delete t;
67                 }
68
69                 kernel_globals_free(kg);
70         }
71
72         bool support_full_kernel()
73         {
74                 return true;
75         }
76
77         string description()
78         {
79                 return system_cpu_brand_string();
80         }
81
82         void mem_alloc(device_memory& mem, MemoryType type)
83         {
84                 mem.device_pointer = mem.data_pointer;
85         }
86
87         void mem_copy_to(device_memory& mem)
88         {
89                 /* no-op */
90         }
91
92         void mem_copy_from(device_memory& mem, size_t offset, size_t size)
93         {
94                 /* no-op */
95         }
96
97         void mem_zero(device_memory& mem)
98         {
99                 memset((void*)mem.device_pointer, 0, mem.memory_size());
100         }
101
102         void mem_free(device_memory& mem)
103         {
104                 mem.device_pointer = 0;
105         }
106
107         void const_copy_to(const char *name, void *host, size_t size)
108         {
109                 kernel_const_copy(kg, name, host, size);
110         }
111
112         void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
113         {
114                 kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
115                 mem.device_pointer = mem.data_pointer;
116         }
117
118         void tex_free(device_memory& mem)
119         {
120                 mem.device_pointer = 0;
121         }
122
123         void *osl_memory()
124         {
125 #ifdef WITH_OSL
126                 return kernel_osl_memory(kg);
127 #else
128                 return NULL;
129 #endif
130         }
131
132         void thread_run(int t)
133         {
134                 DeviceTask task;
135
136                 while(tasks.worker_wait_pop(task)) {
137                         if(task.type == DeviceTask::PATH_TRACE)
138                                 thread_path_trace(task);
139                         else if(task.type == DeviceTask::TONEMAP)
140                                 thread_tonemap(task);
141                         else if(task.type == DeviceTask::DISPLACE)
142                                 thread_displace(task);
143
144                         tasks.worker_done();
145                 }
146         }
147
148         void thread_path_trace(DeviceTask& task)
149         {
150                 if(tasks.worker_cancel())
151                         return;
152
153 #ifdef WITH_OSL
154                 if(kernel_osl_use(kg))
155                         OSLShader::thread_init(kg);
156 #endif
157
158                 for(int y = task.y; y < task.y + task.h; y++) {
159                         for(int x = task.x; x < task.x + task.w; x++)
160                                 kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y);
161
162                         if(tasks.worker_cancel())
163                                 break;
164                 }
165
166 #ifdef WITH_OSL
167                 if(kernel_osl_use(kg))
168                         OSLShader::thread_free(kg);
169 #endif
170         }
171
172         void thread_tonemap(DeviceTask& task)
173         {
174                 for(int y = task.y; y < task.y + task.h; y++) {
175                         for(int x = task.x; x < task.x + task.w; x++)
176                                 kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y);
177                 }
178         }
179
180         void thread_displace(DeviceTask& task)
181         {
182 #ifdef WITH_OSL
183                 if(kernel_osl_use(kg))
184                         OSLShader::thread_init(kg);
185 #endif
186
187                 for(int x = task.displace_x; x < task.displace_x + task.displace_w; x++) {
188                         kernel_cpu_displace(kg, (uint4*)task.displace_input, (float3*)task.displace_offset, x);
189
190                         if(tasks.worker_cancel())
191                                 break;
192                 }
193
194 #ifdef WITH_OSL
195                 if(kernel_osl_use(kg))
196                         OSLShader::thread_free(kg);
197 #endif
198         }
199
200         void task_add(DeviceTask& task)
201         {
202                 /* split task into smaller ones, more than number of threads for uneven
203                    workloads where some parts of the image render slower than others */
204                 task.split(tasks, threads.size()*10);
205         }
206
207         void task_wait()
208         {
209                 tasks.wait_done();
210         }
211
212         void task_cancel()
213         {
214                 tasks.cancel();
215         }
216 };
217
218 Device *device_cpu_create(int threads)
219 {
220         return new CPUDevice(threads);
221 }
222
223 CCL_NAMESPACE_END
224