Merge remote-tracking branch 'origin/master' into blender2.8
[blender.git] / intern / cycles / kernel / split / kernel_holdout_emission_blurring_pathtermination_ao.h
1 /*
2  * Copyright 2011-2015 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* This kernel takes care of the logic to process "material of type holdout",
20  * indirect primitive emission, bsdf blurring, probabilistic path termination
21  * and AO.
22  *
23  * This kernels determines the rays for which a shadow_blocked() function
24  * associated with AO should be executed. Those rays for which a
25  * shadow_blocked() function for AO must be executed are marked with flag
26  * RAY_SHADOW_RAY_CAST_ao and enqueued into the queue
27  * QUEUE_SHADOW_RAY_CAST_AO_RAYS
28  *
29  * Ray state of rays that are terminated in this kernel are changed to RAY_UPDATE_BUFFER
30  *
31  * Note on Queues:
32  * This kernel fetches rays from the queue QUEUE_ACTIVE_AND_REGENERATED_RAYS
33  * and processes only the rays of state RAY_ACTIVE.
34  * There are different points in this kernel where a ray may terminate and
35  * reach RAY_UPDATE_BUFFER state. These rays are enqueued into
36  * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. These rays will still be present
37  * in QUEUE_ACTIVE_AND_REGENERATED_RAYS queue, but since their ray-state has
38  * been changed to RAY_UPDATE_BUFFER, there is no problem.
39  *
40  * State of queues when this kernel is called:
41  * At entry,
42  *   - QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and
43  *     RAY_REGENERATED rays
44  *   - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
45  *     RAY_TO_REGENERATE rays.
46  *   - QUEUE_SHADOW_RAY_CAST_AO_RAYS will be empty.
47  * At exit,
48  *   - QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE,
49  *     RAY_REGENERATED and RAY_UPDATE_BUFFER rays.
50  *   - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
51  *     RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays.
52  *   - QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with
53  *     flag RAY_SHADOW_RAY_CAST_AO
54  */
55 ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
56         KernelGlobals *kg,
57         ccl_local_param BackgroundAOLocals *locals)
58 {
59         if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
60                 locals->queue_atomics_bg = 0;
61                 locals->queue_atomics_ao = 0;
62         }
63         ccl_barrier(CCL_LOCAL_MEM_FENCE);
64
65         char enqueue_flag = 0;
66         char enqueue_flag_AO_SHADOW_RAY_CAST = 0;
67         int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
68         ray_index = get_ray_index(kg, ray_index,
69                                   QUEUE_ACTIVE_AND_REGENERATED_RAYS,
70                                   kernel_split_state.queue_data,
71                                   kernel_split_params.queue_size,
72                                   0);
73
74 #ifdef __COMPUTE_DEVICE_GPU__
75         /* If we are executing on a GPU device, we exit all threads that are not
76          * required.
77          *
78          * If we are executing on a CPU device, then we need to keep all threads
79          * active since we have barrier() calls later in the kernel. CPU devices,
80          * expect all threads to execute barrier statement.
81          */
82         if(ray_index == QUEUE_EMPTY_SLOT) {
83                 return;
84         }
85 #endif  /* __COMPUTE_DEVICE_GPU__ */
86
87 #ifndef __COMPUTE_DEVICE_GPU__
88         if(ray_index != QUEUE_EMPTY_SLOT) {
89 #endif
90
91         int stride = kernel_split_params.stride;
92
93         unsigned int work_index;
94         unsigned int pixel_x;
95         unsigned int pixel_y;
96
97         unsigned int tile_x;
98         unsigned int tile_y;
99         unsigned int sample;
100
101         RNG rng = kernel_split_state.rng[ray_index];
102         ccl_global PathState *state = 0x0;
103         float3 throughput;
104
105         ccl_global char *ray_state = kernel_split_state.ray_state;
106         ShaderData *sd = &kernel_split_state.sd[ray_index];
107         ccl_global float *buffer = kernel_split_params.buffer;
108
109         if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
110
111                 throughput = kernel_split_state.throughput[ray_index];
112                 state = &kernel_split_state.path_state[ray_index];
113
114                 work_index = kernel_split_state.work_array[ray_index];
115                 sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample;
116                 get_work_pixel_tile_position(kg, &pixel_x, &pixel_y,
117                                         &tile_x, &tile_y,
118                                         work_index,
119                                         ray_index);
120
121                 buffer += (kernel_split_params.offset + pixel_x + pixel_y * stride) * kernel_data.film.pass_stride;
122
123 #ifdef __SHADOW_TRICKS__
124                 if((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) {
125                         if(state->flag & PATH_RAY_CAMERA) {
126                                 state->flag |= (PATH_RAY_SHADOW_CATCHER | PATH_RAY_SHADOW_CATCHER_ONLY);
127                                 state->catcher_object = sd->object;
128                                 if(!kernel_data.background.transparent) {
129                                         PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
130                                         ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
131                                         L->shadow_color = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray);
132                                 }
133                         }
134                 }
135                 else {
136                         state->flag &= ~PATH_RAY_SHADOW_CATCHER_ONLY;
137                 }
138 #endif  /* __SHADOW_TRICKS__ */
139
140                 /* holdout */
141 #ifdef __HOLDOUT__
142                 if(((sd->flag & SD_HOLDOUT) ||
143                     (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
144                    (state->flag & PATH_RAY_CAMERA))
145                 {
146                         if(kernel_data.background.transparent) {
147                                 float3 holdout_weight;
148                                 if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
149                                         holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
150                                 }
151                                 else {
152                                         holdout_weight = shader_holdout_eval(kg, sd);
153                                 }
154                                 /* any throughput is ok, should all be identical here */
155                                 kernel_split_state.L_transparent[ray_index] += average(holdout_weight*throughput);
156                         }
157                         if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
158                                 ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
159                                 enqueue_flag = 1;
160                         }
161                 }
162 #endif  /* __HOLDOUT__ */
163         }
164
165         if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
166                 PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
167                 /* Holdout mask objects do not write data passes. */
168                 kernel_write_data_passes(kg,
169                                          buffer,
170                                          L,
171                                          sd,
172                                          sample,
173                                          state,
174                                          throughput);
175                 /* Blurring of bsdf after bounces, for rays that have a small likelihood
176                  * of following this particular path (diffuse, rough glossy.
177                  */
178                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
179                         float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
180                         if(blur_pdf < 1.0f) {
181                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
182                                 shader_bsdf_blur(kg, sd, blur_roughness);
183                         }
184                 }
185
186 #ifdef __EMISSION__
187                 /* emission */
188                 if(sd->flag & SD_EMISSION) {
189                         /* TODO(sergey): is isect.t wrong here for transparent surfaces? */
190                         float3 emission = indirect_primitive_emission(
191                                 kg,
192                                 sd,
193                                 kernel_split_state.isect[ray_index].t,
194                                 state->flag,
195                                 state->ray_pdf);
196                         path_radiance_accum_emission(L, throughput, emission, state->bounce);
197                 }
198 #endif  /* __EMISSION__ */
199
200                 /* Path termination. this is a strange place to put the termination, it's
201                  * mainly due to the mixed in MIS that we use. gives too many unneeded
202                  * shader evaluations, only need emission if we are going to terminate.
203                  */
204                 float probability = path_state_terminate_probability(kg, state, throughput);
205
206                 if(probability == 0.0f) {
207                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
208                         enqueue_flag = 1;
209                 }
210
211                 if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
212                         if(probability != 1.0f) {
213                                 float terminate = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_TERMINATE);
214                                 if(terminate >= probability) {
215                                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
216                                         enqueue_flag = 1;
217                                 }
218                                 else {
219                                         kernel_split_state.throughput[ray_index] = throughput/probability;
220                                 }
221                         }
222                 }
223         }
224
225 #ifdef __AO__
226         if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
227                 /* ambient occlusion */
228                 if(kernel_data.integrator.use_ambient_occlusion ||
229                    (sd->flag & SD_AO))
230                 {
231                         /* todo: solve correlation */
232                         float bsdf_u, bsdf_v;
233                         path_state_rng_2D(kg, &rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
234
235                         float ao_factor = kernel_data.background.ao_factor;
236                         float3 ao_N;
237                         kernel_split_state.ao_bsdf[ray_index] = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
238                         kernel_split_state.ao_alpha[ray_index] = shader_bsdf_alpha(kg, sd);
239
240                         float3 ao_D;
241                         float ao_pdf;
242                         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
243
244                         if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
245                                 Ray _ray;
246                                 _ray.P = ray_offset(sd->P, sd->Ng);
247                                 _ray.D = ao_D;
248                                 _ray.t = kernel_data.background.ao_distance;
249 #ifdef __OBJECT_MOTION__
250                                 _ray.time = sd->time;
251 #endif
252                                 _ray.dP = sd->dP;
253                                 _ray.dD = differential3_zero();
254                                 kernel_split_state.ao_light_ray[ray_index] = _ray;
255
256                                 ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
257                                 enqueue_flag_AO_SHADOW_RAY_CAST = 1;
258                         }
259                 }
260         }
261 #endif  /* __AO__ */
262         kernel_split_state.rng[ray_index] = rng;
263
264
265 #ifndef __COMPUTE_DEVICE_GPU__
266         }
267 #endif
268
269         /* Enqueue RAY_UPDATE_BUFFER rays. */
270         enqueue_ray_index_local(ray_index,
271                                 QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
272                                 enqueue_flag,
273                                 kernel_split_params.queue_size,
274                                 &locals->queue_atomics_bg,
275                                 kernel_split_state.queue_data,
276                                 kernel_split_params.queue_index);
277
278 #ifdef __AO__
279         /* Enqueue to-shadow-ray-cast rays. */
280         enqueue_ray_index_local(ray_index,
281                                 QUEUE_SHADOW_RAY_CAST_AO_RAYS,
282                                 enqueue_flag_AO_SHADOW_RAY_CAST,
283                                 kernel_split_params.queue_size,
284                                 &locals->queue_atomics_ao,
285                                 kernel_split_state.queue_data,
286                                 kernel_split_params.queue_index);
287 #endif
288 }
289
290 CCL_NAMESPACE_END