Cycles: Replace __MAX_CLOSURE__ build option with runtime integrator variable
[blender.git] / intern / cycles / device / device_split_kernel.h
1 /*
2  * Copyright 2011-2016 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifndef __DEVICE_SPLIT_KERNEL_H__
18 #define __DEVICE_SPLIT_KERNEL_H__
19
20 #include "device/device.h"
21 #include "render/buffers.h"
22
23 CCL_NAMESPACE_BEGIN
24
25 /* When allocate global memory in chunks. We may not be able to
26  * allocate exactly "CL_DEVICE_MAX_MEM_ALLOC_SIZE" bytes in chunks;
27  * Since some bytes may be needed for aligning chunks of memory;
28  * This is the amount of memory that we dedicate for that purpose.
29  */
30 #define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
31
32 /* Types used for split kernel */
33
34 class KernelDimensions {
35 public:
36         size_t global_size[2];
37         size_t local_size[2];
38
39         KernelDimensions(size_t global_size_[2], size_t local_size_[2])
40         {
41                 memcpy(global_size, global_size_, sizeof(global_size));
42                 memcpy(local_size, local_size_, sizeof(local_size));
43         }
44 };
45
46 class SplitKernelFunction {
47 public:
48         virtual ~SplitKernelFunction() {}
49
50         /* enqueue the kernel, returns false if there is an error */
51         virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) = 0;
52 };
53
54 class DeviceSplitKernel {
55 private:
56         Device *device;
57
58         SplitKernelFunction *kernel_path_init;
59         SplitKernelFunction *kernel_scene_intersect;
60         SplitKernelFunction *kernel_lamp_emission;
61         SplitKernelFunction *kernel_do_volume;
62         SplitKernelFunction *kernel_queue_enqueue;
63         SplitKernelFunction *kernel_indirect_background;
64         SplitKernelFunction *kernel_shader_setup;
65         SplitKernelFunction *kernel_shader_sort;
66         SplitKernelFunction *kernel_shader_eval;
67         SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
68         SplitKernelFunction *kernel_subsurface_scatter;
69         SplitKernelFunction *kernel_direct_lighting;
70         SplitKernelFunction *kernel_shadow_blocked_ao;
71         SplitKernelFunction *kernel_shadow_blocked_dl;
72         SplitKernelFunction *kernel_enqueue_inactive;
73         SplitKernelFunction *kernel_next_iteration_setup;
74         SplitKernelFunction *kernel_indirect_subsurface;
75         SplitKernelFunction *kernel_buffer_update;
76
77         /* Global memory variables [porting]; These memory is used for
78          * co-operation between different kernels; Data written by one
79          * kernel will be available to another kernel via this global
80          * memory.
81          */
82         device_only_memory<uchar> split_data;
83         device_vector<uchar> ray_state;
84         device_only_memory<int> queue_index; /* Array of size num_queues that tracks the size of each queue. */
85
86         /* Flag to make sceneintersect and lampemission kernel use queues. */
87         device_only_memory<char> use_queues_flag;
88
89         /* Approximate time it takes to complete one sample */
90         double avg_time_per_sample;
91
92         /* Work pool with respect to each work group. */
93         device_only_memory<unsigned int> work_pool_wgs;
94
95         /* Marked True in constructor and marked false at the end of path_trace(). */
96         bool first_tile;
97
98         /* Cached global size */
99         size_t global_size[2];
100
101 public:
102         explicit DeviceSplitKernel(Device* device);
103         virtual ~DeviceSplitKernel();
104
105         bool load_kernels(const DeviceRequestedFeatures& requested_features);
106         bool path_trace(DeviceTask *task,
107                         RenderTile& rtile,
108                         device_memory& kgbuffer,
109                         device_memory& kernel_data);
110
111         virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
112         size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size);
113
114         virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
115                                                     RenderTile& rtile,
116                                                     int num_global_elements,
117                                                     device_memory& kernel_globals,
118                                                     device_memory& kernel_data_,
119                                                     device_memory& split_data,
120                                                     device_memory& ray_state,
121                                                     device_memory& queue_index,
122                                                     device_memory& use_queues_flag,
123                                                     device_memory& work_pool_wgs) = 0;
124
125         virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name,
126                                                                const DeviceRequestedFeatures&) = 0;
127         virtual int2 split_kernel_local_size() = 0;
128         virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task) = 0;
129 };
130
131 CCL_NAMESPACE_END
132
133 #endif /* __DEVICE_SPLIT_KERNEL_H__ */
134
135
136