Cycles: Remove few function arguments needed only for the split kernel
[blender.git] / intern / cycles / kernel / split / kernel_background_buffer_update.h
1 /*
2  * Copyright 2011-2015 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "kernel_split_common.h"
18
19 /* Note on kernel_background_buffer_update kernel.
20  * This is the fourth kernel in the ray tracing logic, and the third
21  * of the path iteration kernels. This kernel takes care of rays that hit
22  * the background (sceneintersect kernel), and for the rays of
23  * state RAY_UPDATE_BUFFER it updates the ray's accumulated radiance in
24  * the output buffer. This kernel also takes care of rays that have been determined
25  * to-be-regenerated.
26  *
27  * We will empty QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue in this kernel
28  *
29  * Typically all rays that are in state RAY_HIT_BACKGROUND, RAY_UPDATE_BUFFER
30  * will be eventually set to RAY_TO_REGENERATE state in this kernel. Finally all rays of ray_state
31  * RAY_TO_REGENERATE will be regenerated and put in queue QUEUE_ACTIVE_AND_REGENERATED_RAYS.
32  *
33  * The input and output are as follows,
34  *
35  * rng_coop ---------------------------------------------|--- kernel_background_buffer_update --|--- PathRadiance_coop
36  * throughput_coop --------------------------------------|                                      |--- L_transparent_coop
37  * per_sample_output_buffers ----------------------------|                                      |--- per_sample_output_buffers
38  * Ray_coop ---------------------------------------------|                                      |--- ray_state
39  * PathState_coop ---------------------------------------|                                      |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
40  * L_transparent_coop -----------------------------------|                                      |--- Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS)
41  * ray_state --------------------------------------------|                                      |--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
42  * Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ----|                                      |--- Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS)
43  * Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS) ------|                                      |--- work_array
44  * parallel_samples -------------------------------------|                                      |--- PathState_coop
45  * end_sample -------------------------------------------|                                      |--- throughput_coop
46  * kg (globals) -----------------------------------------|                                      |--- rng_coop
47  * rng_state --------------------------------------------|                                      |--- Ray
48  * PathRadiance_coop ------------------------------------|                                      |
49  * sw ---------------------------------------------------|                                      |
50  * sh ---------------------------------------------------|                                      |
51  * sx ---------------------------------------------------|                                      |
52  * sy ---------------------------------------------------|                                      |
53  * stride -----------------------------------------------|                                      |
54  * work_array -------------------------------------------|                                      |--- work_array
55  * queuesize --------------------------------------------|                                      |
56  * start_sample -----------------------------------------|                                      |--- work_pool_wgs
57  * work_pool_wgs ----------------------------------------|                                      |
58  * num_samples ------------------------------------------|                                      |
59  *
60  * note on sd : sd argument is neither an input nor an output for this kernel. It is just filled and consumed here itself.
61  * Note on Queues :
62  * This kernel fetches rays from QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue.
63  *
64  * State of queues when this kernel is called :
65  * At entry,
66  * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE rays
67  * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND, RAY_TO_REGENERATE rays
68  * At exit,
69  * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays
70  * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty
71  */
72 ccl_device char kernel_background_buffer_update(
73         KernelGlobals *kg,
74         ccl_global float *per_sample_output_buffers,
75         ccl_global uint *rng_state,
76         ccl_global uint *rng_coop,             /* Required for buffer Update */
77         ccl_global float3 *throughput_coop,    /* Required for background hit processing */
78         PathRadiance *PathRadiance_coop,       /* Required for background hit processing and buffer Update */
79         ccl_global Ray *Ray_coop,              /* Required for background hit processing */
80         ccl_global PathState *PathState_coop,  /* Required for background hit processing */
81         ccl_global float *L_transparent_coop,  /* Required for background hit processing and buffer Update */
82         ccl_global char *ray_state,            /* Stores information on the current state of a ray */
83         int sw, int sh, int sx, int sy, int stride,
84         int rng_state_offset_x,
85         int rng_state_offset_y,
86         int rng_state_stride,
87         ccl_global unsigned int *work_array,   /* Denotes work of each ray */
88         int end_sample,
89         int start_sample,
90 #ifdef __WORK_STEALING__
91         ccl_global unsigned int *work_pool_wgs,
92         unsigned int num_samples,
93 #endif
94 #ifdef __KERNEL_DEBUG__
95         DebugData *debugdata_coop,
96 #endif
97         int parallel_samples,                  /* Number of samples to be processed in parallel */
98         int ray_index)
99 {
100         char enqueue_flag = 0;
101 #ifdef __KERNEL_DEBUG__
102         DebugData *debug_data = &debugdata_coop[ray_index];
103 #endif
104         ccl_global PathState *state = &PathState_coop[ray_index];
105         PathRadiance *L = L = &PathRadiance_coop[ray_index];
106         ccl_global Ray *ray = &Ray_coop[ray_index];
107         ccl_global float3 *throughput = &throughput_coop[ray_index];
108         ccl_global float *L_transparent = &L_transparent_coop[ray_index];
109         ccl_global uint *rng = &rng_coop[ray_index];
110
111 #ifdef __WORK_STEALING__
112         unsigned int my_work;
113         ccl_global float *initial_per_sample_output_buffers;
114         ccl_global uint *initial_rng;
115 #endif
116         unsigned int sample;
117         unsigned int tile_x;
118         unsigned int tile_y;
119         unsigned int pixel_x;
120         unsigned int pixel_y;
121         unsigned int my_sample_tile;
122
123 #ifdef __WORK_STEALING__
124         my_work = work_array[ray_index];
125         sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
126         get_pixel_tile_position(&pixel_x, &pixel_y,
127                                 &tile_x, &tile_y,
128                                 my_work,
129                                 sw, sh, sx, sy,
130                                 parallel_samples,
131                                 ray_index);
132         my_sample_tile = 0;
133         initial_per_sample_output_buffers = per_sample_output_buffers;
134         initial_rng = rng_state;
135 #else  /* __WORK_STEALING__ */
136         sample = work_array[ray_index];
137         int tile_index = ray_index / parallel_samples;
138         /* buffer and rng_state's stride is "stride". Find x and y using ray_index */
139         tile_x = tile_index % sw;
140         tile_y = tile_index / sw;
141         my_sample_tile = ray_index - (tile_index * parallel_samples);
142 #endif  /* __WORK_STEALING__ */
143
144         rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride;
145         per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
146
147         if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
148                 /* eval background shader if nothing hit */
149                 if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
150                         *L_transparent = (*L_transparent) + average((*throughput));
151 #ifdef __PASSES__
152                         if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
153 #endif
154                                 ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
155                 }
156
157                 if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
158 #ifdef __BACKGROUND__
159                         /* sample background shader */
160                         float3 L_background = indirect_background(kg, state, ray);
161                         path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
162 #endif
163                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
164                 }
165         }
166
167         if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
168                 float3 L_sum = path_radiance_clamp_and_sum(kg, L);
169                 kernel_write_light_passes(kg, per_sample_output_buffers, L, sample);
170 #ifdef __KERNEL_DEBUG__
171                 kernel_write_debug_passes(kg, per_sample_output_buffers, state, debug_data, sample);
172 #endif
173                 float4 L_rad = make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - (*L_transparent));
174
175                 /* accumulate result in output buffer */
176                 kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad);
177                 path_rng_end(kg, rng_state, *rng);
178
179                 ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
180         }
181
182         if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
183 #ifdef __WORK_STEALING__
184                 /* We have completed current work; So get next work */
185                 int valid_work = get_next_work(work_pool_wgs, &my_work, sw, sh, num_samples, parallel_samples, ray_index);
186                 if(!valid_work) {
187                         /* If work is invalid, this means no more work is available and the thread may exit */
188                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
189                 }
190 #else  /* __WORK_STEALING__ */
191                 if((sample + parallel_samples) >= end_sample) {
192                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
193                 }
194 #endif  /* __WORK_STEALING__ */
195
196                 if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
197 #ifdef __WORK_STEALING__
198                         work_array[ray_index] = my_work;
199                         /* Get the sample associated with the current work */
200                         sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
201                         /* Get pixel and tile position associated with current work */
202                         get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index);
203                         my_sample_tile = 0;
204
205                         /* Remap rng_state according to the current work */
206                         rng_state = initial_rng + ((rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride);
207                         /* Remap per_sample_output_buffers according to the current work */
208                         per_sample_output_buffers = initial_per_sample_output_buffers
209                                 + (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
210 #else  /* __WORK_STEALING__ */
211                         work_array[ray_index] = sample + parallel_samples;
212                         sample = work_array[ray_index];
213
214                         /* Get ray position from ray index */
215                         pixel_x = sx + ((ray_index / parallel_samples) % sw);
216                         pixel_y = sy + ((ray_index / parallel_samples) / sw);
217 #endif  /* __WORK_STEALING__ */
218
219                         /* Initialize random numbers and ray. */
220                         kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, rng, ray);
221
222                         if(ray->t != 0.0f) {
223                                 /* Initialize throughput, L_transparent, Ray, PathState;
224                                  * These rays proceed with path-iteration.
225                                  */
226                                 *throughput = make_float3(1.0f, 1.0f, 1.0f);
227                                 *L_transparent = 0.0f;
228                                 path_radiance_init(L, kernel_data.film.use_light_pass);
229                                 path_state_init(kg, state, rng, sample, ray);
230 #ifdef __KERNEL_DEBUG__
231                                 debug_data_init(debug_data);
232 #endif
233                                 ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
234                                 enqueue_flag = 1;
235                         } else {
236                                 /* These rays do not participate in path-iteration. */
237                                 float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
238                                 /* Accumulate result in output buffer. */
239                                 kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad);
240                                 path_rng_end(kg, rng_state, *rng);
241
242                                 ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
243                         }
244                 }
245         }
246         return enqueue_flag;
247 }