Cycles: Make shadow catcher an optional feature for OpenCL
[blender.git] / intern / cycles / device / device_split_kernel.h
1 /*
2  * Copyright 2011-2016 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifndef __DEVICE_SPLIT_KERNEL_H__
18 #define __DEVICE_SPLIT_KERNEL_H__
19
20 #include "device.h"
21 #include "buffers.h"
22
23 CCL_NAMESPACE_BEGIN
24
25 /* When allocate global memory in chunks. We may not be able to
26  * allocate exactly "CL_DEVICE_MAX_MEM_ALLOC_SIZE" bytes in chunks;
27  * Since some bytes may be needed for aligning chunks of memory;
28  * This is the amount of memory that we dedicate for that purpose.
29  */
30 #define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
31
32 /* Types used for split kernel */
33
34 class KernelDimensions {
35 public:
36         size_t global_size[2];
37         size_t local_size[2];
38
39         KernelDimensions(size_t global_size_[2], size_t local_size_[2])
40         {
41                 memcpy(global_size, global_size_, sizeof(global_size));
42                 memcpy(local_size, local_size_, sizeof(local_size));
43         }
44 };
45
46 class SplitKernelFunction {
47 public:
48         virtual ~SplitKernelFunction() {}
49
50         /* enqueue the kernel, returns false if there is an error */
51         virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) = 0;
52 };
53
54 class DeviceSplitKernel {
55 private:
56         Device *device;
57
58         SplitKernelFunction *kernel_path_init;
59         SplitKernelFunction *kernel_scene_intersect;
60         SplitKernelFunction *kernel_lamp_emission;
61         SplitKernelFunction *kernel_do_volume;
62         SplitKernelFunction *kernel_queue_enqueue;
63         SplitKernelFunction *kernel_indirect_background;
64         SplitKernelFunction *kernel_shader_eval;
65         SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
66         SplitKernelFunction *kernel_subsurface_scatter;
67         SplitKernelFunction *kernel_direct_lighting;
68         SplitKernelFunction *kernel_shadow_blocked_ao;
69         SplitKernelFunction *kernel_shadow_blocked_dl;
70         SplitKernelFunction *kernel_next_iteration_setup;
71         SplitKernelFunction *kernel_indirect_subsurface;
72         SplitKernelFunction *kernel_buffer_update;
73
74         /* Global memory variables [porting]; These memory is used for
75          * co-operation between different kernels; Data written by one
76          * kernel will be available to another kernel via this global
77          * memory.
78          */
79         device_memory split_data;
80         device_vector<uchar> ray_state;
81         device_memory queue_index; /* Array of size num_queues * sizeof(int) that tracks the size of each queue. */
82
83         /* Flag to make sceneintersect and lampemission kernel use queues. */
84         device_memory use_queues_flag;
85
86         /* Approximate time it takes to complete one sample */
87         double avg_time_per_sample;
88
89         /* Work pool with respect to each work group. */
90         device_memory work_pool_wgs;
91
92         /* clos_max value for which the kernels have been loaded currently. */
93         int current_max_closure;
94
95         /* Marked True in constructor and marked false at the end of path_trace(). */
96         bool first_tile;
97
98 public:
99         explicit DeviceSplitKernel(Device* device);
100         virtual ~DeviceSplitKernel();
101
102         bool load_kernels(const DeviceRequestedFeatures& requested_features);
103         bool path_trace(DeviceTask *task,
104                         RenderTile& rtile,
105                         device_memory& kgbuffer,
106                         device_memory& kernel_data);
107
108         virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
109         size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size);
110
111         virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
112                                                     RenderTile& rtile,
113                                                     int num_global_elements,
114                                                     device_memory& kernel_globals,
115                                                     device_memory& kernel_data_,
116                                                     device_memory& split_data,
117                                                     device_memory& ray_state,
118                                                     device_memory& queue_index,
119                                                     device_memory& use_queues_flag,
120                                                     device_memory& work_pool_wgs) = 0;
121
122         virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&) = 0;
123         virtual int2 split_kernel_local_size() = 0;
124         virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task) = 0;
125 };
126
127 CCL_NAMESPACE_END
128
129 #endif /* __DEVICE_SPLIT_KERNEL_H__ */
130
131
132