Cycles: Remove sum_all_radiance kernel
[blender.git] / intern / cycles / kernel / split / kernel_holdout_emission_blurring_pathtermination_ao.h
1 /*
2  * Copyright 2011-2015 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Note on kernel_holdout_emission_blurring_pathtermination_ao kernel.
20  * This is the sixth kernel in the ray tracing logic. This is the fifth
21  * of the path iteration kernels. This kernel takes care of the logic to process
22  * "material of type holdout", indirect primitive emission, bsdf blurring,
23  * probabilistic path termination and AO.
24  *
25  * This kernels determines the rays for which a shadow_blocked() function associated with AO should be executed.
26  * Those rays for which a shadow_blocked() function for AO must be executed are marked with flag RAY_SHADOW_RAY_CAST_ao and
27  * enqueued into the queue QUEUE_SHADOW_RAY_CAST_AO_RAYS
28  *
29  * Ray state of rays that are terminated in this kernel are changed to RAY_UPDATE_BUFFER
30  *
31  * The input and output are as follows,
32  *
33  * rng_coop ---------------------------------------------|--- kernel_holdout_emission_blurring_pathtermination_ao ---|--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
34  * throughput_coop --------------------------------------|                                                           |--- PathState_coop
35  * PathRadiance_coop ------------------------------------|                                                           |--- throughput_coop
36  * Intersection_coop ------------------------------------|                                                           |--- L_transparent_coop
37  * PathState_coop ---------------------------------------|                                                           |--- per_sample_output_buffers
38  * L_transparent_coop -----------------------------------|                                                           |--- PathRadiance_coop
39  * sd ---------------------------------------------------|                                                           |--- ShaderData
40  * ray_state --------------------------------------------|                                                           |--- ray_state
41  * Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) -------|                                                           |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
42  * Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---|                                                           |--- AOAlpha_coop
43  * kg (globals) -----------------------------------------|                                                           |--- AOBSDF_coop
44  * parallel_samples -------------------------------------|                                                           |--- AOLightRay_coop
45  * per_sample_output_buffers ----------------------------|                                                           |
46  * sw ---------------------------------------------------|                                                           |
47  * sh ---------------------------------------------------|                                                           |
48  * sx ---------------------------------------------------|                                                           |
49  * sy ---------------------------------------------------|                                                           |
50  * stride -----------------------------------------------|                                                           |
51  * work_array -------------------------------------------|                                                           |
52  * queuesize --------------------------------------------|                                                           |
53  * start_sample -----------------------------------------|                                                           |
54  *
55  * Note on Queues :
56  * This kernel fetches rays from the queue QUEUE_ACTIVE_AND_REGENERATED_RAYS and processes only
57  * the rays of state RAY_ACTIVE.
58  * There are different points in this kernel where a ray may terminate and reach RAY_UPDATE_BUFFER
59  * state. These rays are enqueued into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. These rays will
60  * still be present in QUEUE_ACTIVE_AND_REGENERATED_RAYS queue, but since their ray-state has been
61  * changed to RAY_UPDATE_BUFFER, there is no problem.
62  *
63  * State of queues when this kernel is called :
64  * At entry,
65  * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays
66  * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE rays.
67  * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be empty.
68  * At exit,
69  * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE, RAY_REGENERATED and RAY_UPDATE_BUFFER rays
70  * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays
71  * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with flag RAY_SHADOW_RAY_CAST_AO
72  */
73 ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobals *kg)
74 {
75         ccl_local unsigned int local_queue_atomics_bg;
76         ccl_local unsigned int local_queue_atomics_ao;
77         if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
78                 local_queue_atomics_bg = 0;
79                 local_queue_atomics_ao = 0;
80         }
81         ccl_barrier(CCL_LOCAL_MEM_FENCE);
82
83         char enqueue_flag = 0;
84         char enqueue_flag_AO_SHADOW_RAY_CAST = 0;
85         int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
86         ray_index = get_ray_index(kg, ray_index,
87                                   QUEUE_ACTIVE_AND_REGENERATED_RAYS,
88                                   kernel_split_state.queue_data,
89                                   kernel_split_params.queue_size,
90                                   0);
91
92 #ifdef __COMPUTE_DEVICE_GPU__
93         /* If we are executing on a GPU device, we exit all threads that are not
94          * required.
95          *
96          * If we are executing on a CPU device, then we need to keep all threads
97          * active since we have barrier() calls later in the kernel. CPU devices,
98          * expect all threads to execute barrier statement.
99          */
100         if(ray_index == QUEUE_EMPTY_SLOT) {
101                 return;
102         }
103 #endif  /* __COMPUTE_DEVICE_GPU__ */
104
105 #ifndef __COMPUTE_DEVICE_GPU__
106         if(ray_index != QUEUE_EMPTY_SLOT) {
107 #endif
108
109         int stride = kernel_split_params.stride;
110
111         unsigned int work_index;
112         unsigned int pixel_x;
113         unsigned int pixel_y;
114
115         unsigned int tile_x;
116         unsigned int tile_y;
117         unsigned int sample;
118
119         ccl_global RNG *rng = 0x0;
120         ccl_global PathState *state = 0x0;
121         float3 throughput;
122
123         ccl_global char *ray_state = kernel_split_state.ray_state;
124         ShaderData *sd = &kernel_split_state.sd[ray_index];
125         ccl_global float *buffer = kernel_split_params.buffer;
126
127         if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
128
129                 throughput = kernel_split_state.throughput[ray_index];
130                 state = &kernel_split_state.path_state[ray_index];
131                 rng = &kernel_split_state.rng[ray_index];
132
133                 work_index = kernel_split_state.work_array[ray_index];
134                 sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample;
135                 get_work_pixel_tile_position(kg, &pixel_x, &pixel_y,
136                                         &tile_x, &tile_y,
137                                         work_index,
138                                         ray_index);
139
140                 buffer += (kernel_split_params.offset + pixel_x + pixel_y * stride) * kernel_data.film.pass_stride;
141
142                 /* holdout */
143 #ifdef __HOLDOUT__
144                 if(((sd->flag & SD_HOLDOUT) ||
145                     (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
146                    (state->flag & PATH_RAY_CAMERA))
147                 {
148                         if(kernel_data.background.transparent) {
149                                 float3 holdout_weight;
150                                 if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
151                                         holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
152                                 }
153                                 else {
154                                         holdout_weight = shader_holdout_eval(kg, sd);
155                                 }
156                                 /* any throughput is ok, should all be identical here */
157                                 kernel_split_state.L_transparent[ray_index] += average(holdout_weight*throughput);
158                         }
159                         if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
160                                 ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
161                                 enqueue_flag = 1;
162                         }
163                 }
164 #endif  /* __HOLDOUT__ */
165         }
166
167         if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
168                 PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
169                 /* Holdout mask objects do not write data passes. */
170                 kernel_write_data_passes(kg,
171                                          buffer,
172                                          L,
173                                          sd,
174                                          sample,
175                                          state,
176                                          throughput);
177                 /* Blurring of bsdf after bounces, for rays that have a small likelihood
178                  * of following this particular path (diffuse, rough glossy.
179                  */
180                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
181                         float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
182                         if(blur_pdf < 1.0f) {
183                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
184                                 shader_bsdf_blur(kg, sd, blur_roughness);
185                         }
186                 }
187
188 #ifdef __EMISSION__
189                 /* emission */
190                 if(sd->flag & SD_EMISSION) {
191                         /* TODO(sergey): is isect.t wrong here for transparent surfaces? */
192                         float3 emission = indirect_primitive_emission(
193                                 kg,
194                                 sd,
195                                 kernel_split_state.isect[ray_index].t,
196                                 state->flag,
197                                 state->ray_pdf);
198                         path_radiance_accum_emission(L, throughput, emission, state->bounce);
199                 }
200 #endif  /* __EMISSION__ */
201
202                 /* Path termination. this is a strange place to put the termination, it's
203                  * mainly due to the mixed in MIS that we use. gives too many unneeded
204                  * shader evaluations, only need emission if we are going to terminate.
205                  */
206                 float probability = path_state_terminate_probability(kg, state, throughput);
207
208                 if(probability == 0.0f) {
209                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
210                         enqueue_flag = 1;
211                 }
212
213                 if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
214                         if(probability != 1.0f) {
215                                 float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
216                                 if(terminate >= probability) {
217                                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
218                                         enqueue_flag = 1;
219                                 }
220                                 else {
221                                         kernel_split_state.throughput[ray_index] = throughput/probability;
222                                 }
223                         }
224                 }
225         }
226
227 #ifdef __AO__
228         if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
229                 /* ambient occlusion */
230                 if(kernel_data.integrator.use_ambient_occlusion ||
231                    (sd->flag & SD_AO))
232                 {
233                         /* todo: solve correlation */
234                         float bsdf_u, bsdf_v;
235                         path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
236
237                         float ao_factor = kernel_data.background.ao_factor;
238                         float3 ao_N;
239                         kernel_split_state.ao_bsdf[ray_index] = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
240                         kernel_split_state.ao_alpha[ray_index] = shader_bsdf_alpha(kg, sd);
241
242                         float3 ao_D;
243                         float ao_pdf;
244                         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
245
246                         if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
247                                 Ray _ray;
248                                 _ray.P = ray_offset(sd->P, sd->Ng);
249                                 _ray.D = ao_D;
250                                 _ray.t = kernel_data.background.ao_distance;
251 #ifdef __OBJECT_MOTION__
252                                 _ray.time = sd->time;
253 #endif
254                                 _ray.dP = sd->dP;
255                                 _ray.dD = differential3_zero();
256                                 kernel_split_state.ao_light_ray[ray_index] = _ray;
257
258                                 ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
259                                 enqueue_flag_AO_SHADOW_RAY_CAST = 1;
260                         }
261                 }
262         }
263 #endif  /* __AO__ */
264
265 #ifndef __COMPUTE_DEVICE_GPU__
266         }
267 #endif
268
269         /* Enqueue RAY_UPDATE_BUFFER rays. */
270         enqueue_ray_index_local(ray_index,
271                                 QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
272                                 enqueue_flag,
273                                 kernel_split_params.queue_size,
274                                 &local_queue_atomics_bg,
275                                 kernel_split_state.queue_data,
276                                 kernel_split_params.queue_index);
277
278 #ifdef __AO__
279         /* Enqueue to-shadow-ray-cast rays. */
280         enqueue_ray_index_local(ray_index,
281                                 QUEUE_SHADOW_RAY_CAST_AO_RAYS,
282                                 enqueue_flag_AO_SHADOW_RAY_CAST,
283                                 kernel_split_params.queue_size,
284                                 &local_queue_atomics_ao,
285                                 kernel_split_state.queue_data,
286                                 kernel_split_params.queue_index);
287 #endif
288 }
289
290 CCL_NAMESPACE_END
291