b3106fd56329725ef13a00792c588decb0b7b187
[blender-staging.git] / intern / cycles / device / device_split_kernel.h
1 /*
2  * Copyright 2011-2016 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifndef __DEVICE_SPLIT_KERNEL_H__
18 #define __DEVICE_SPLIT_KERNEL_H__
19
20 #include "device.h"
21 #include "buffers.h"
22
23 CCL_NAMESPACE_BEGIN
24
25 /* When allocate global memory in chunks. We may not be able to
26  * allocate exactly "CL_DEVICE_MAX_MEM_ALLOC_SIZE" bytes in chunks;
27  * Since some bytes may be needed for aligning chunks of memory;
28  * This is the amount of memory that we dedicate for that purpose.
29  */
30 #define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB
31
32 /* Types used for split kernel */
33
34 class KernelDimensions {
35 public:
36         size_t global_size[2];
37         size_t local_size[2];
38
39         KernelDimensions(size_t global_size_[2], size_t local_size_[2])
40         {
41                 memcpy(global_size, global_size_, sizeof(global_size));
42                 memcpy(local_size, local_size_, sizeof(local_size));
43         }
44 };
45
46 class SplitKernelFunction {
47 public:
48         virtual ~SplitKernelFunction() {}
49
50         /* enqueue the kernel, returns false if there is an error */
51         virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) = 0;
52 };
53
54 class DeviceSplitKernel {
55 private:
56         Device *device;
57
58         SplitKernelFunction *kernel_scene_intersect;
59         SplitKernelFunction *kernel_lamp_emission;
60         SplitKernelFunction *kernel_queue_enqueue;
61         SplitKernelFunction *kernel_background_buffer_update;
62         SplitKernelFunction *kernel_shader_eval;
63         SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
64         SplitKernelFunction *kernel_direct_lighting;
65         SplitKernelFunction *kernel_shadow_blocked;
66         SplitKernelFunction *kernel_next_iteration_setup;
67         SplitKernelFunction *kernel_sum_all_radiance;
68
69         /* Global memory variables [porting]; These memory is used for
70          * co-operation between different kernels; Data written by one
71          * kernel will be available to another kernel via this global
72          * memory.
73          */
74         device_memory split_data;
75         device_vector<uchar> ray_state;
76         device_memory queue_index; /* Array of size num_queues * sizeof(int) that tracks the size of each queue. */
77
78         /* Flag to make sceneintersect and lampemission kernel use queues. */
79         device_memory use_queues_flag;
80
81         /* Approximate time it takes to complete one sample */
82         double avg_time_per_sample;
83
84         /* Work pool with respect to each work group. */
85         device_memory work_pool_wgs;
86
87         /* clos_max value for which the kernels have been loaded currently. */
88         int current_max_closure;
89
90         /* Marked True in constructor and marked false at the end of path_trace(). */
91         bool first_tile;
92
93 public:
94         explicit DeviceSplitKernel(Device* device);
95         virtual ~DeviceSplitKernel();
96
97         bool load_kernels(const DeviceRequestedFeatures& requested_features);
98         bool path_trace(DeviceTask *task,
99                         RenderTile& rtile,
100                         device_memory& kgbuffer,
101                         device_memory& kernel_data);
102
103         size_t max_elements_for_max_buffer_size(size_t max_buffer_size, size_t passes_size);
104
105         virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
106                                                     RenderTile& rtile,
107                                                     int num_global_elements,
108                                                     device_memory& kernel_globals,
109                                                     device_memory& kernel_data_,
110                                                     device_memory& split_data,
111                                                     device_memory& ray_state,
112                                                     device_memory& queue_index,
113                                                     device_memory& use_queues_flag,
114                                                     device_memory& work_pool_wgs) = 0;
115
116         virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&) = 0;
117         virtual int2 split_kernel_local_size() = 0;
118         virtual int2 split_kernel_global_size(DeviceTask *task) = 0;
119 };
120
121 CCL_NAMESPACE_END
122
123 #endif /* __DEVICE_SPLIT_KERNEL_H__ */
124
125
126