3a7f1629e66551b828af28e255be9211f6a7c89c
[blender.git] / intern / cycles / kernel / split / kernel_holdout_emission_blurring_pathtermination_ao.h
1 /*
2  * Copyright 2011-2015 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Note on kernel_holdout_emission_blurring_pathtermination_ao kernel.
20  * This is the sixth kernel in the ray tracing logic. This is the fifth
21  * of the path iteration kernels. This kernel takes care of the logic to process
22  * "material of type holdout", indirect primitive emission, bsdf blurring,
23  * probabilistic path termination and AO.
24  *
25  * This kernels determines the rays for which a shadow_blocked() function associated with AO should be executed.
26  * Those rays for which a shadow_blocked() function for AO must be executed are marked with flag RAY_SHADOW_RAY_CAST_ao and
27  * enqueued into the queue QUEUE_SHADOW_RAY_CAST_AO_RAYS
28  *
29  * Ray state of rays that are terminated in this kernel are changed to RAY_UPDATE_BUFFER
30  *
31  * The input and output are as follows,
32  *
33  * rng_coop ---------------------------------------------|--- kernel_holdout_emission_blurring_pathtermination_ao ---|--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
34  * throughput_coop --------------------------------------|                                                           |--- PathState_coop
35  * PathRadiance_coop ------------------------------------|                                                           |--- throughput_coop
36  * Intersection_coop ------------------------------------|                                                           |--- L_transparent_coop
37  * PathState_coop ---------------------------------------|                                                           |--- per_sample_output_buffers
38  * L_transparent_coop -----------------------------------|                                                           |--- PathRadiance_coop
39  * sd ---------------------------------------------------|                                                           |--- ShaderData
40  * ray_state --------------------------------------------|                                                           |--- ray_state
41  * Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) -------|                                                           |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
42  * Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---|                                                           |--- AOAlpha_coop
43  * kg (globals) -----------------------------------------|                                                           |--- AOBSDF_coop
44  * parallel_samples -------------------------------------|                                                           |--- AOLightRay_coop
45  * per_sample_output_buffers ----------------------------|                                                           |
46  * sw ---------------------------------------------------|                                                           |
47  * sh ---------------------------------------------------|                                                           |
48  * sx ---------------------------------------------------|                                                           |
49  * sy ---------------------------------------------------|                                                           |
50  * stride -----------------------------------------------|                                                           |
51  * work_array -------------------------------------------|                                                           |
52  * queuesize --------------------------------------------|                                                           |
53  * start_sample -----------------------------------------|                                                           |
54  *
55  * Note on Queues :
56  * This kernel fetches rays from the queue QUEUE_ACTIVE_AND_REGENERATED_RAYS and processes only
57  * the rays of state RAY_ACTIVE.
58  * There are different points in this kernel where a ray may terminate and reach RAY_UPDATE_BUFFER
59  * state. These rays are enqueued into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. These rays will
60  * still be present in QUEUE_ACTIVE_AND_REGENERATED_RAYS queue, but since their ray-state has been
61  * changed to RAY_UPDATE_BUFFER, there is no problem.
62  *
63  * State of queues when this kernel is called :
64  * At entry,
65  * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays
66  * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE rays.
67  * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be empty.
68  * At exit,
69  * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE, RAY_REGENERATED and RAY_UPDATE_BUFFER rays
70  * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays
71  * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with flag RAY_SHADOW_RAY_CAST_AO
72  */
73 ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobals *kg)
74 {
75         ccl_local unsigned int local_queue_atomics_bg;
76         ccl_local unsigned int local_queue_atomics_ao;
77         if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
78                 local_queue_atomics_bg = 0;
79                 local_queue_atomics_ao = 0;
80         }
81         ccl_barrier(CCL_LOCAL_MEM_FENCE);
82
83         char enqueue_flag = 0;
84         char enqueue_flag_AO_SHADOW_RAY_CAST = 0;
85         int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
86         ray_index = get_ray_index(kg, ray_index,
87                                   QUEUE_ACTIVE_AND_REGENERATED_RAYS,
88                                   kernel_split_state.queue_data,
89                                   kernel_split_params.queue_size,
90                                   0);
91
92 #ifdef __COMPUTE_DEVICE_GPU__
93         /* If we are executing on a GPU device, we exit all threads that are not
94          * required.
95          *
96          * If we are executing on a CPU device, then we need to keep all threads
97          * active since we have barrier() calls later in the kernel. CPU devices,
98          * expect all threads to execute barrier statement.
99          */
100         if(ray_index == QUEUE_EMPTY_SLOT) {
101                 return;
102         }
103 #endif  /* __COMPUTE_DEVICE_GPU__ */
104
105 #ifndef __COMPUTE_DEVICE_GPU__
106         if(ray_index != QUEUE_EMPTY_SLOT) {
107 #endif
108
109         int stride = kernel_split_params.stride;
110
111         unsigned int work_index;
112         unsigned int pixel_x;
113         unsigned int pixel_y;
114
115         unsigned int tile_x;
116         unsigned int tile_y;
117         int my_sample_tile;
118         unsigned int sample;
119
120         ccl_global RNG *rng = 0x0;
121         ccl_global PathState *state = 0x0;
122         float3 throughput;
123
124         ccl_global char *ray_state = kernel_split_state.ray_state;
125         ShaderData *sd = kernel_split_state.sd;
126         ccl_global float *per_sample_output_buffers = kernel_split_state.per_sample_output_buffers;
127
128         if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
129
130                 throughput = kernel_split_state.throughput[ray_index];
131                 state = &kernel_split_state.path_state[ray_index];
132                 rng = &kernel_split_state.rng[ray_index];
133
134                 work_index = kernel_split_state.work_array[ray_index];
135                 sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample;
136                 get_work_pixel_tile_position(kg, &pixel_x, &pixel_y,
137                                         &tile_x, &tile_y,
138                                         work_index,
139                                         ray_index);
140                 my_sample_tile = 0;
141
142                 per_sample_output_buffers +=
143                     ((tile_x + (tile_y * stride)) + my_sample_tile) *
144                     kernel_data.film.pass_stride;
145
146                 /* holdout */
147 #ifdef __HOLDOUT__
148                 if(((ccl_fetch(sd, flag) & SD_HOLDOUT) ||
149                     (ccl_fetch(sd, object_flag) & SD_OBJECT_HOLDOUT_MASK)) &&
150                    (state->flag & PATH_RAY_CAMERA))
151                 {
152                         if(kernel_data.background.transparent) {
153                                 float3 holdout_weight;
154                                 if(ccl_fetch(sd, object_flag) & SD_OBJECT_HOLDOUT_MASK) {
155                                         holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
156                                 }
157                                 else {
158                                         holdout_weight = shader_holdout_eval(kg, sd);
159                                 }
160                                 /* any throughput is ok, should all be identical here */
161                                 kernel_split_state.L_transparent[ray_index] += average(holdout_weight*throughput);
162                         }
163                         if(ccl_fetch(sd, object_flag) & SD_OBJECT_HOLDOUT_MASK) {
164                                 ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
165                                 enqueue_flag = 1;
166                         }
167                 }
168 #endif  /* __HOLDOUT__ */
169         }
170
171         if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
172                 PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
173                 /* Holdout mask objects do not write data passes. */
174                 kernel_write_data_passes(kg,
175                                          per_sample_output_buffers,
176                                          L,
177                                          sd,
178                                          sample,
179                                          state,
180                                          throughput);
181                 /* Blurring of bsdf after bounces, for rays that have a small likelihood
182                  * of following this particular path (diffuse, rough glossy.
183                  */
184                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
185                         float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
186                         if(blur_pdf < 1.0f) {
187                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
188                                 shader_bsdf_blur(kg, sd, blur_roughness);
189                         }
190                 }
191
192 #ifdef __EMISSION__
193                 /* emission */
194                 if(ccl_fetch(sd, flag) & SD_EMISSION) {
195                         /* TODO(sergey): is isect.t wrong here for transparent surfaces? */
196                         float3 emission = indirect_primitive_emission(
197                                 kg,
198                                 sd,
199                                 kernel_split_state.isect[ray_index].t,
200                                 state->flag,
201                                 state->ray_pdf);
202                         path_radiance_accum_emission(L, throughput, emission, state->bounce);
203                 }
204 #endif  /* __EMISSION__ */
205
206                 /* Path termination. this is a strange place to put the termination, it's
207                  * mainly due to the mixed in MIS that we use. gives too many unneeded
208                  * shader evaluations, only need emission if we are going to terminate.
209                  */
210                 float probability = path_state_terminate_probability(kg, state, throughput);
211
212                 if(probability == 0.0f) {
213                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
214                         enqueue_flag = 1;
215                 }
216
217                 if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
218                         if(probability != 1.0f) {
219                                 float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
220                                 if(terminate >= probability) {
221                                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
222                                         enqueue_flag = 1;
223                                 }
224                                 else {
225                                         kernel_split_state.throughput[ray_index] = throughput/probability;
226                                 }
227                         }
228                 }
229         }
230
231 #ifdef __AO__
232         if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
233                 /* ambient occlusion */
234                 if(kernel_data.integrator.use_ambient_occlusion ||
235                    (ccl_fetch(sd, flag) & SD_AO))
236                 {
237                         /* todo: solve correlation */
238                         float bsdf_u, bsdf_v;
239                         path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
240
241                         float ao_factor = kernel_data.background.ao_factor;
242                         float3 ao_N;
243                         kernel_split_state.ao_bsdf[ray_index] = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
244                         kernel_split_state.ao_alpha[ray_index] = shader_bsdf_alpha(kg, sd);
245
246                         float3 ao_D;
247                         float ao_pdf;
248                         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
249
250                         if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
251                                 Ray _ray;
252                                 _ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
253                                 _ray.D = ao_D;
254                                 _ray.t = kernel_data.background.ao_distance;
255 #ifdef __OBJECT_MOTION__
256                                 _ray.time = ccl_fetch(sd, time);
257 #endif
258                                 _ray.dP = ccl_fetch(sd, dP);
259                                 _ray.dD = differential3_zero();
260                                 kernel_split_state.ao_light_ray[ray_index] = _ray;
261
262                                 ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
263                                 enqueue_flag_AO_SHADOW_RAY_CAST = 1;
264                         }
265                 }
266         }
267 #endif  /* __AO__ */
268
269 #ifndef __COMPUTE_DEVICE_GPU__
270         }
271 #endif
272
273         /* Enqueue RAY_UPDATE_BUFFER rays. */
274         enqueue_ray_index_local(ray_index,
275                                 QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
276                                 enqueue_flag,
277                                 kernel_split_params.queue_size,
278                                 &local_queue_atomics_bg,
279                                 kernel_split_state.queue_data,
280                                 kernel_split_params.queue_index);
281
282 #ifdef __AO__
283         /* Enqueue to-shadow-ray-cast rays. */
284         enqueue_ray_index_local(ray_index,
285                                 QUEUE_SHADOW_RAY_CAST_AO_RAYS,
286                                 enqueue_flag_AO_SHADOW_RAY_CAST,
287                                 kernel_split_params.queue_size,
288                                 &local_queue_atomics_ao,
289                                 kernel_split_state.queue_data,
290                                 kernel_split_params.queue_index);
291 #endif
292 }
293
294 CCL_NAMESPACE_END
295