5604363dcd98da227d7f8e192309f5840fecd2bb
[blender-staging.git] / intern / cycles / kernel / split / kernel_data_init.h
1 /*
2  * Copyright 2011-2015 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Note on kernel_data_initialization kernel
20  * This kernel Initializes structures needed in path-iteration kernels.
21  * This is the first kernel in ray-tracing logic.
22  *
23  * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE
24  *
25  * Its input and output are as follows,
26  *
27  * Un-initialized rng---------------|--- kernel_data_initialization ---|--- Initialized rng
28  * Un-initialized throughput -------|                                  |--- Initialized throughput
29  * Un-initialized L_transparent ----|                                  |--- Initialized L_transparent
30  * Un-initialized PathRadiance -----|                                  |--- Initialized PathRadiance
31  * Un-initialized Ray --------------|                                  |--- Initialized Ray
32  * Un-initialized PathState --------|                                  |--- Initialized PathState
33  * Un-initialized QueueData --------|                                  |--- Initialized QueueData (to QUEUE_EMPTY_SLOT)
34  * Un-initialized QueueIndex -------|                                  |--- Initialized QueueIndex (to 0)
35  * Un-initialized use_queues_flag---|                                  |--- Initialized use_queues_flag (to false)
36  * Un-initialized ray_state --------|                                  |--- Initialized ray_state
37  * parallel_samples --------------- |                                  |--- Initialized per_sample_output_buffers
38  * rng_state -----------------------|                                  |--- Initialized work_array
39  * data ----------------------------|                                  |--- Initialized work_pool_wgs
40  * start_sample --------------------|                                  |
41  * sx ------------------------------|                                  |
42  * sy ------------------------------|                                  |
43  * sw ------------------------------|                                  |
44  * sh ------------------------------|                                  |
45  * stride --------------------------|                                  |
46  * queuesize -----------------------|                                  |
47  * num_samples ---------------------|                                  |
48  *
49  * Note on Queues :
50  * All slots in queues are initialized to queue empty slot;
51  * The number of elements in the queues is initialized to 0;
52  */
53
54 #ifndef __KERNEL_CPU__
55 ccl_device void kernel_data_init(
56 #else
57 void KERNEL_FUNCTION_FULL_NAME(data_init)(
58 #endif
59         KernelGlobals *kg,
60         ccl_constant KernelData *data,
61         ccl_global void *split_data_buffer,
62         int num_elements,
63         ccl_global char *ray_state,
64         ccl_global uint *rng_state,
65
66 #ifdef __KERNEL_OPENCL__
67 #define KERNEL_TEX(type, ttype, name)                                   \
68         ccl_global type *name,
69 #include "../kernel_textures.h"
70 #endif
71
72         int start_sample,
73         int end_sample,
74         int sx, int sy, int sw, int sh, int offset, int stride,
75         ccl_global int *Queue_index,                 /* Tracks the number of elements in queues */
76         int queuesize,                               /* size (capacity) of the queue */
77         ccl_global char *use_queues_flag,            /* flag to decide if scene-intersect kernel should use queues to fetch ray index */
78         ccl_global unsigned int *work_pools,      /* Work pool for each work group */
79         unsigned int num_samples,
80         ccl_global float *buffer)
81 {
82 #ifdef __KERNEL_OPENCL__
83         kg->data = data;
84 #endif
85
86         kernel_split_params.x = sx;
87         kernel_split_params.y = sy;
88         kernel_split_params.w = sw;
89         kernel_split_params.h = sh;
90
91         kernel_split_params.offset = offset;
92         kernel_split_params.stride = stride;
93
94         kernel_split_params.rng_state = rng_state;
95
96         kernel_split_params.start_sample = start_sample;
97         kernel_split_params.end_sample = end_sample;
98
99         kernel_split_params.work_pools = work_pools;
100         kernel_split_params.num_samples = num_samples;
101
102         kernel_split_params.queue_index = Queue_index;
103         kernel_split_params.queue_size = queuesize;
104         kernel_split_params.use_queues_flag = use_queues_flag;
105
106         kernel_split_params.buffer = buffer;
107
108         split_data_init(&kernel_split_state, num_elements, split_data_buffer, ray_state);
109
110 #ifdef __KERNEL_OPENCL__
111 #define KERNEL_TEX(type, ttype, name) \
112         kg->name = name;
113 #include "../kernel_textures.h"
114 #endif
115
116         int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
117
118         /* Initialize queue data and queue index. */
119         if(thread_index < queuesize) {
120                 /* Initialize active ray queue. */
121                 kernel_split_state.queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
122                 /* Initialize background and buffer update queue. */
123                 kernel_split_state.queue_data[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
124                 /* Initialize shadow ray cast of AO queue. */
125                 kernel_split_state.queue_data[QUEUE_SHADOW_RAY_CAST_AO_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
126                 /* Initialize shadow ray cast of direct lighting queue. */
127                 kernel_split_state.queue_data[QUEUE_SHADOW_RAY_CAST_DL_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
128         }
129
130         if(thread_index == 0) {
131                 Queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
132                 Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
133                 Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0;
134                 Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
135                 /* The scene-intersect kernel should not use the queues very first time.
136                  * since the queue would be empty.
137                  */
138                 *use_queues_flag = 0;
139         }
140
141         int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
142
143         /* This is the first assignment to ray_state;
144          * So we dont use ASSIGN_RAY_STATE macro.
145          */
146         kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
147
148         unsigned int my_sample;
149         unsigned int pixel_x;
150         unsigned int pixel_y;
151         unsigned int tile_x;
152         unsigned int tile_y;
153         unsigned int my_sample_tile;
154
155         unsigned int work_index = 0;
156         /* Get work. */
157         if(!get_next_work(kg, &work_index, ray_index)) {
158                 /* No more work, mark ray as inactive */
159                 kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
160
161                 return;
162         }
163
164         /* Get the sample associated with the work. */
165         my_sample = get_work_sample(kg, work_index, ray_index) + start_sample;
166
167         my_sample_tile = 0;
168
169         /* Get pixel and tile position associated with the work. */
170         get_work_pixel_tile_position(kg, &pixel_x, &pixel_y,
171                                 &tile_x, &tile_y,
172                                 work_index,
173                                 ray_index);
174         kernel_split_state.work_array[ray_index] = work_index;
175
176         rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride;
177
178         ccl_global float *per_sample_output_buffers = kernel_split_state.per_sample_output_buffers;
179         per_sample_output_buffers += ((tile_x + (tile_y * stride)) + (my_sample_tile)) * kernel_data.film.pass_stride;
180
181         /* Initialize random numbers and ray. */
182         kernel_path_trace_setup(kg,
183                                 rng_state,
184                                 my_sample,
185                                 pixel_x, pixel_y,
186                                 &kernel_split_state.rng[ray_index],
187                                 &kernel_split_state.ray[ray_index]);
188
189         if(kernel_split_state.ray[ray_index].t != 0.0f) {
190                 /* Initialize throughput, L_transparent, Ray, PathState;
191                  * These rays proceed with path-iteration.
192                  */
193                 kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
194                 kernel_split_state.L_transparent[ray_index] = 0.0f;
195                 path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass);
196                 path_state_init(kg,
197                                 &kernel_split_state.sd_DL_shadow[ray_index],
198                                 &kernel_split_state.path_state[ray_index],
199                                 &kernel_split_state.rng[ray_index],
200                                 my_sample,
201                                 &kernel_split_state.ray[ray_index]);
202 #ifdef __KERNEL_DEBUG__
203                 debug_data_init(&kernel_split_state.debug_data[ray_index]);
204 #endif
205         }
206         else {
207                 /* These rays do not participate in path-iteration. */
208                 float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
209                 /* Accumulate result in output buffer. */
210                 kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
211                 path_rng_end(kg, rng_state, kernel_split_state.rng[ray_index]);
212                 ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
213         }
214 }
215
216 CCL_NAMESPACE_END
217