e02e55b5f186aa92797c2aeaf1e47ffd26458887
[blender.git] / intern / cycles / kernel / split / kernel_background_buffer_update.h
1 /*
2  * Copyright 2011-2015 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "kernel_split_common.h"
18
19 /* Note on kernel_background_buffer_update kernel.
20  * This is the fourth kernel in the ray tracing logic, and the third
21  * of the path iteration kernels. This kernel takes care of rays that hit
22  * the background (sceneintersect kernel), and for the rays of
23  * state RAY_UPDATE_BUFFER it updates the ray's accumulated radiance in
24  * the output buffer. This kernel also takes care of rays that have been determined
25  * to-be-regenerated.
26  *
27  * We will empty QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue in this kernel
28  *
29  * Typically all rays that are in state RAY_HIT_BACKGROUND, RAY_UPDATE_BUFFER
30  * will be eventually set to RAY_TO_REGENERATE state in this kernel. Finally all rays of ray_state
31  * RAY_TO_REGENERATE will be regenerated and put in queue QUEUE_ACTIVE_AND_REGENERATED_RAYS.
32  *
33  * The input and output are as follows,
34  *
35  * rng_coop ---------------------------------------------|--- kernel_background_buffer_update --|--- PathRadiance_coop
36  * throughput_coop --------------------------------------|                                      |--- L_transparent_coop
37  * per_sample_output_buffers ----------------------------|                                      |--- per_sample_output_buffers
38  * Ray_coop ---------------------------------------------|                                      |--- ray_state
39  * PathState_coop ---------------------------------------|                                      |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
40  * L_transparent_coop -----------------------------------|                                      |--- Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS)
41  * ray_state --------------------------------------------|                                      |--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
42  * Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ----|                                      |--- Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS)
43  * Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS) ------|                                      |--- work_array
44  * parallel_samples -------------------------------------|                                      |--- PathState_coop
45  * end_sample -------------------------------------------|                                      |--- throughput_coop
46  * kg (globals) -----------------------------------------|                                      |--- rng_coop
47  * rng_state --------------------------------------------|                                      |--- Ray
48  * PathRadiance_coop ------------------------------------|                                      |
49  * sw ---------------------------------------------------|                                      |
50  * sh ---------------------------------------------------|                                      |
51  * sx ---------------------------------------------------|                                      |
52  * sy ---------------------------------------------------|                                      |
53  * stride -----------------------------------------------|                                      |
54  * work_array -------------------------------------------|                                      |--- work_array
55  * queuesize --------------------------------------------|                                      |
56  * start_sample -----------------------------------------|                                      |--- work_pool_wgs
57  * work_pool_wgs ----------------------------------------|                                      |
58  * num_samples ------------------------------------------|                                      |
59  *
60  * note on sd : sd argument is neither an input nor an output for this kernel. It is just filled and consumed here itself.
61  * Note on Queues :
62  * This kernel fetches rays from QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue.
63  *
64  * State of queues when this kernel is called :
65  * At entry,
66  * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE rays
67  * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND, RAY_TO_REGENERATE rays
68  * At exit,
69  * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays
70  * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty
71  */
72 ccl_device char kernel_background_buffer_update(
73         KernelGlobals *kg,
74         ShaderData *sd,
75         ccl_global float *per_sample_output_buffers,
76         ccl_global uint *rng_state,
77         ccl_global uint *rng_coop,             /* Required for buffer Update */
78         ccl_global float3 *throughput_coop,    /* Required for background hit processing */
79         PathRadiance *PathRadiance_coop,       /* Required for background hit processing and buffer Update */
80         ccl_global Ray *Ray_coop,              /* Required for background hit processing */
81         ccl_global PathState *PathState_coop,  /* Required for background hit processing */
82         ccl_global float *L_transparent_coop,  /* Required for background hit processing and buffer Update */
83         ccl_global char *ray_state,            /* Stores information on the current state of a ray */
84         int sw, int sh, int sx, int sy, int stride,
85         int rng_state_offset_x,
86         int rng_state_offset_y,
87         int rng_state_stride,
88         ccl_global unsigned int *work_array,   /* Denotes work of each ray */
89         int end_sample,
90         int start_sample,
91 #ifdef __WORK_STEALING__
92         ccl_global unsigned int *work_pool_wgs,
93         unsigned int num_samples,
94 #endif
95 #ifdef __KERNEL_DEBUG__
96         DebugData *debugdata_coop,
97 #endif
98         int parallel_samples,                  /* Number of samples to be processed in parallel */
99         int ray_index)
100 {
101         char enqueue_flag = 0;
102 #ifdef __KERNEL_DEBUG__
103         DebugData *debug_data = &debugdata_coop[ray_index];
104 #endif
105         ccl_global PathState *state = &PathState_coop[ray_index];
106         PathRadiance *L = L = &PathRadiance_coop[ray_index];
107         ccl_global Ray *ray = &Ray_coop[ray_index];
108         ccl_global float3 *throughput = &throughput_coop[ray_index];
109         ccl_global float *L_transparent = &L_transparent_coop[ray_index];
110         ccl_global uint *rng = &rng_coop[ray_index];
111
112 #ifdef __WORK_STEALING__
113         unsigned int my_work;
114         ccl_global float *initial_per_sample_output_buffers;
115         ccl_global uint *initial_rng;
116 #endif
117         unsigned int sample;
118         unsigned int tile_x;
119         unsigned int tile_y;
120         unsigned int pixel_x;
121         unsigned int pixel_y;
122         unsigned int my_sample_tile;
123
124 #ifdef __WORK_STEALING__
125         my_work = work_array[ray_index];
126         sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
127         get_pixel_tile_position(&pixel_x, &pixel_y,
128                                 &tile_x, &tile_y,
129                                 my_work,
130                                 sw, sh, sx, sy,
131                                 parallel_samples,
132                                 ray_index);
133         my_sample_tile = 0;
134         initial_per_sample_output_buffers = per_sample_output_buffers;
135         initial_rng = rng_state;
136 #else  /* __WORK_STEALING__ */
137         sample = work_array[ray_index];
138         int tile_index = ray_index / parallel_samples;
139         /* buffer and rng_state's stride is "stride". Find x and y using ray_index */
140         tile_x = tile_index % sw;
141         tile_y = tile_index / sw;
142         my_sample_tile = ray_index - (tile_index * parallel_samples);
143 #endif  /* __WORK_STEALING__ */
144
145         rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride;
146         per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
147
148         if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
149                 /* eval background shader if nothing hit */
150                 if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
151                         *L_transparent = (*L_transparent) + average((*throughput));
152 #ifdef __PASSES__
153                         if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
154 #endif
155                                 ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
156                 }
157
158                 if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
159 #ifdef __BACKGROUND__
160                         /* sample background shader */
161                         float3 L_background = indirect_background(kg, state, ray, sd);
162                         path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
163 #endif
164                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
165                 }
166         }
167
168         if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
169                 float3 L_sum = path_radiance_clamp_and_sum(kg, L);
170                 kernel_write_light_passes(kg, per_sample_output_buffers, L, sample);
171 #ifdef __KERNEL_DEBUG__
172                 kernel_write_debug_passes(kg, per_sample_output_buffers, state, debug_data, sample);
173 #endif
174                 float4 L_rad = make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - (*L_transparent));
175
176                 /* accumulate result in output buffer */
177                 kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad);
178                 path_rng_end(kg, rng_state, *rng);
179
180                 ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
181         }
182
183         if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
184 #ifdef __WORK_STEALING__
185                 /* We have completed current work; So get next work */
186                 int valid_work = get_next_work(work_pool_wgs, &my_work, sw, sh, num_samples, parallel_samples, ray_index);
187                 if(!valid_work) {
188                         /* If work is invalid, this means no more work is available and the thread may exit */
189                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
190                 }
191 #else  /* __WORK_STEALING__ */
192                 if((sample + parallel_samples) >= end_sample) {
193                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
194                 }
195 #endif  /* __WORK_STEALING__ */
196
197                 if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
198 #ifdef __WORK_STEALING__
199                         work_array[ray_index] = my_work;
200                         /* Get the sample associated with the current work */
201                         sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
202                         /* Get pixel and tile position associated with current work */
203                         get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index);
204                         my_sample_tile = 0;
205
206                         /* Remap rng_state according to the current work */
207                         rng_state = initial_rng + ((rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride);
208                         /* Remap per_sample_output_buffers according to the current work */
209                         per_sample_output_buffers = initial_per_sample_output_buffers
210                                 + (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
211 #else  /* __WORK_STEALING__ */
212                         work_array[ray_index] = sample + parallel_samples;
213                         sample = work_array[ray_index];
214
215                         /* Get ray position from ray index */
216                         pixel_x = sx + ((ray_index / parallel_samples) % sw);
217                         pixel_y = sy + ((ray_index / parallel_samples) / sw);
218 #endif  /* __WORK_STEALING__ */
219
220                         /* Initialize random numbers and ray. */
221                         kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, rng, ray);
222
223                         if(ray->t != 0.0f) {
224                                 /* Initialize throughput, L_transparent, Ray, PathState;
225                                  * These rays proceed with path-iteration.
226                                  */
227                                 *throughput = make_float3(1.0f, 1.0f, 1.0f);
228                                 *L_transparent = 0.0f;
229                                 path_radiance_init(L, kernel_data.film.use_light_pass);
230                                 path_state_init(kg, state, rng, sample, ray);
231 #ifdef __KERNEL_DEBUG__
232                                 debug_data_init(debug_data);
233 #endif
234                                 ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
235                                 enqueue_flag = 1;
236                         } else {
237                                 /* These rays do not participate in path-iteration. */
238                                 float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
239                                 /* Accumulate result in output buffer. */
240                                 kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad);
241                                 path_rng_end(kg, rng_state, *rng);
242
243                                 ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
244                         }
245                 }
246         }
247         return enqueue_flag;
248 }