Cycles: Support multithreaded compilation of kernels
[blender.git] / intern / cycles / device / opencl / opencl_mega.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifdef WITH_OPENCL
18
19 #include "device/opencl/opencl.h"
20
21 #include "render/buffers.h"
22
23 #include "kernel/kernel_types.h"
24
25 #include "util/util_md5.h"
26 #include "util/util_path.h"
27 #include "util/util_time.h"
28
29 CCL_NAMESPACE_BEGIN
30
31 class OpenCLDeviceMegaKernel : public OpenCLDeviceBase
32 {
33 public:
34         OpenCLProgram path_trace_program;
35
36         OpenCLDeviceMegaKernel(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_)
37         : OpenCLDeviceBase(info, stats, profiler, background_),
38           path_trace_program(this,
39                              get_opencl_program_name(false, "megakernel"),
40                              get_opencl_program_filename(false, "megakernel"),
41                              "-D__COMPILE_ONLY_MEGAKERNEL__ ")
42         {
43         }
44
45
46         virtual bool show_samples() const
47         {
48                 return true;
49         }
50
51         virtual BVHLayoutMask get_bvh_layout_mask() const
52         {
53                 return BVH_LAYOUT_BVH2;
54         }
55
56         const string get_opencl_program_name(bool /*single_program*/, const string& kernel_name)
57         {
58                 return kernel_name;
59         }
60
61         const string get_opencl_program_filename(bool /*single_program*/, const string& /*kernel_name*/)
62         {
63                 return "kernel.cl";
64         }
65
66         virtual bool add_kernel_programs(const DeviceRequestedFeatures& /*requested_features*/,
67                                   vector<OpenCLProgram*> &programs)
68         {
69                 path_trace_program.add_kernel(ustring("path_trace"));
70                 programs.push_back(&path_trace_program);
71                 return true;
72         }
73
74         ~OpenCLDeviceMegaKernel()
75         {
76                 task_pool.stop();
77                 path_trace_program.release();
78         }
79
80         void path_trace(RenderTile& rtile, int sample)
81         {
82                 scoped_timer timer(&rtile.buffers->render_time);
83
84                 /* Cast arguments to cl types. */
85                 cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
86                 cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
87                 cl_int d_x = rtile.x;
88                 cl_int d_y = rtile.y;
89                 cl_int d_w = rtile.w;
90                 cl_int d_h = rtile.h;
91                 cl_int d_offset = rtile.offset;
92                 cl_int d_stride = rtile.stride;
93
94                 /* Sample arguments. */
95                 cl_int d_sample = sample;
96
97                 cl_kernel ckPathTraceKernel = path_trace_program(ustring("path_trace"));
98
99                 cl_uint start_arg_index =
100                         kernel_set_args(ckPathTraceKernel,
101                                         0,
102                                         d_data,
103                                         d_buffer);
104
105                 set_kernel_arg_buffers(ckPathTraceKernel, &start_arg_index);
106
107                 start_arg_index += kernel_set_args(ckPathTraceKernel,
108                                                    start_arg_index,
109                                                    d_sample,
110                                                    d_x,
111                                                    d_y,
112                                                    d_w,
113                                                    d_h,
114                                                    d_offset,
115                                                    d_stride);
116
117                 enqueue_kernel(ckPathTraceKernel, d_w, d_h);
118         }
119
120         void thread_run(DeviceTask *task)
121         {
122                 if(task->type == DeviceTask::FILM_CONVERT) {
123                         film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
124                 }
125                 else if(task->type == DeviceTask::SHADER) {
126                         shader(*task);
127                 }
128                 else if(task->type == DeviceTask::RENDER) {
129                         RenderTile tile;
130                         DenoisingTask denoising(this, *task);
131
132                         /* Keep rendering tiles until done. */
133                         while(task->acquire_tile(this, tile)) {
134                                 if(tile.task == RenderTile::PATH_TRACE) {
135                                         int start_sample = tile.start_sample;
136                                         int end_sample = tile.start_sample + tile.num_samples;
137
138                                         for(int sample = start_sample; sample < end_sample; sample++) {
139                                                 if(task->get_cancel()) {
140                                                         if(task->need_finish_queue == false)
141                                                                 break;
142                                                 }
143
144                                                 path_trace(tile, sample);
145
146                                                 tile.sample = sample + 1;
147
148                                                 task->update_progress(&tile, tile.w*tile.h);
149                                         }
150
151                                         /* Complete kernel execution before release tile */
152                                         /* This helps in multi-device render;
153                                          * The device that reaches the critical-section function
154                                          * release_tile waits (stalling other devices from entering
155                                          * release_tile) for all kernels to complete. If device1 (a
156                                          * slow-render device) reaches release_tile first then it would
157                                          * stall device2 (a fast-render device) from proceeding to render
158                                          * next tile.
159                                          */
160                                         clFinish(cqCommandQueue);
161                                 }
162                                 else if(tile.task == RenderTile::DENOISE) {
163                                         tile.sample = tile.start_sample + tile.num_samples;
164                                         denoise(tile, denoising);
165                                         task->update_progress(&tile, tile.w*tile.h);
166                                 }
167
168                                 task->release_tile(tile);
169                         }
170                 }
171         }
172
173         bool is_split_kernel()
174         {
175                 return false;
176         }
177 };
178
179 Device *opencl_create_mega_device(DeviceInfo& info, Stats& stats, Profiler &profiler, bool background)
180 {
181         return new OpenCLDeviceMegaKernel(info, stats, profiler, background);
182 }
183
184 CCL_NAMESPACE_END
185
186 #endif