9c42cb5852025effc03059f029bd25531d475f53
[blender-staging.git] / intern / cycles / device / device_split_kernel.h
1 /*
2  * Copyright 2011-2016 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifndef __DEVICE_SPLIT_KERNEL_H__
18 #define __DEVICE_SPLIT_KERNEL_H__
19
20 #include "device/device.h"
21 #include "render/buffers.h"
22
23 CCL_NAMESPACE_BEGIN
24
25 /* When allocate global memory in chunks. We may not be able to
26  * allocate exactly "CL_DEVICE_MAX_MEM_ALLOC_SIZE" bytes in chunks;
27  * Since some bytes may be needed for aligning chunks of memory;
28  * This is the amount of memory that we dedicate for that purpose.
29  */
30 #define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
31
32 /* Types used for split kernel */
33
34 class KernelDimensions {
35 public:
36         size_t global_size[2];
37         size_t local_size[2];
38
39         KernelDimensions(size_t global_size_[2], size_t local_size_[2])
40         {
41                 memcpy(global_size, global_size_, sizeof(global_size));
42                 memcpy(local_size, local_size_, sizeof(local_size));
43         }
44 };
45
46 class SplitKernelFunction {
47 public:
48         virtual ~SplitKernelFunction() {}
49
50         /* enqueue the kernel, returns false if there is an error */
51         virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) = 0;
52 };
53
54 class DeviceSplitKernel {
55 private:
56         Device *device;
57
58         SplitKernelFunction *kernel_path_init;
59         SplitKernelFunction *kernel_scene_intersect;
60         SplitKernelFunction *kernel_lamp_emission;
61         SplitKernelFunction *kernel_do_volume;
62         SplitKernelFunction *kernel_queue_enqueue;
63         SplitKernelFunction *kernel_indirect_background;
64         SplitKernelFunction *kernel_shader_setup;
65         SplitKernelFunction *kernel_shader_sort;
66         SplitKernelFunction *kernel_shader_eval;
67         SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
68         SplitKernelFunction *kernel_subsurface_scatter;
69         SplitKernelFunction *kernel_direct_lighting;
70         SplitKernelFunction *kernel_shadow_blocked_ao;
71         SplitKernelFunction *kernel_shadow_blocked_dl;
72         SplitKernelFunction *kernel_enqueue_inactive;
73         SplitKernelFunction *kernel_next_iteration_setup;
74         SplitKernelFunction *kernel_indirect_subsurface;
75         SplitKernelFunction *kernel_buffer_update;
76
77         /* Global memory variables [porting]; These memory is used for
78          * co-operation between different kernels; Data written by one
79          * kernel will be available to another kernel via this global
80          * memory.
81          */
82         device_memory split_data;
83         device_vector<uchar> ray_state;
84         device_only_memory<int> queue_index; /* Array of size num_queues that tracks the size of each queue. */
85
86         /* Flag to make sceneintersect and lampemission kernel use queues. */
87         device_only_memory<char> use_queues_flag;
88
89         /* Approximate time it takes to complete one sample */
90         double avg_time_per_sample;
91
92         /* Work pool with respect to each work group. */
93         device_only_memory<unsigned int> work_pool_wgs;
94
95         /* clos_max value for which the kernels have been loaded currently. */
96         int current_max_closure;
97
98         /* Marked True in constructor and marked false at the end of path_trace(). */
99         bool first_tile;
100
101         /* Cached global size */
102         size_t global_size[2];
103
104 public:
105         explicit DeviceSplitKernel(Device* device);
106         virtual ~DeviceSplitKernel();
107
108         bool load_kernels(const DeviceRequestedFeatures& requested_features);
109         bool path_trace(DeviceTask *task,
110                         RenderTile& rtile,
111                         device_memory& kgbuffer,
112                         device_memory& kernel_data);
113
114         virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
115         size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size);
116
117         virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
118                                                     RenderTile& rtile,
119                                                     int num_global_elements,
120                                                     device_memory& kernel_globals,
121                                                     device_memory& kernel_data_,
122                                                     device_memory& split_data,
123                                                     device_memory& ray_state,
124                                                     device_memory& queue_index,
125                                                     device_memory& use_queues_flag,
126                                                     device_memory& work_pool_wgs) = 0;
127
128         virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name,
129                                                                const DeviceRequestedFeatures&) = 0;
130         virtual int2 split_kernel_local_size() = 0;
131         virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task) = 0;
132 };
133
134 CCL_NAMESPACE_END
135
136 #endif /* __DEVICE_SPLIT_KERNEL_H__ */
137
138
139