ClangFormat: apply to source, most of intern
[blender.git] / intern / cycles / kernel / split / kernel_holdout_emission_blurring_pathtermination_ao.h
index 174070a..63bc5a8 100644 (file)
  * limitations under the License.
  */
 
-#include "kernel_split_common.h"
+CCL_NAMESPACE_BEGIN
 
-/*
- * Note on kernel_holdout_emission_blurring_pathtermination_ao kernel.
- * This is the sixth kernel in the ray tracing logic. This is the fifth
- * of the path iteration kernels. This kernel takes care of the logic to process
- * "material of type holdout", indirect primitive emission, bsdf blurring,
- * probabilistic path termination and AO.
+/* This kernel takes care of the logic to process "material of type holdout",
+ * indirect primitive emission, bsdf blurring, probabilistic path termination
+ * and AO.
  *
- * This kernels determines the rays for which a shadow_blocked() function associated with AO should be executed.
- * Those rays for which a shadow_blocked() function for AO must be executed are marked with flag RAY_SHADOW_RAY_CAST_ao and
- * enqueued into the queue QUEUE_SHADOW_RAY_CAST_AO_RAYS
+ * This kernels determines the rays for which a shadow_blocked() function
+ * associated with AO should be executed. Those rays for which a
+ * shadow_blocked() function for AO must be executed are marked with flag
+ * RAY_SHADOW_RAY_CAST_ao and enqueued into the queue
+ * QUEUE_SHADOW_RAY_CAST_AO_RAYS
  *
  * Ray state of rays that are terminated in this kernel are changed to RAY_UPDATE_BUFFER
  *
- * The input and output are as follows,
- *
- * rng_coop ---------------------------------------------|--- kernel_holdout_emission_blurring_pathtermination_ao ---|--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
- * throughput_coop --------------------------------------|                                                           |--- PathState_coop
- * PathRadiance_coop ------------------------------------|                                                           |--- throughput_coop
- * Intersection_coop ------------------------------------|                                                           |--- L_transparent_coop
- * PathState_coop ---------------------------------------|                                                           |--- per_sample_output_buffers
- * L_transparent_coop -----------------------------------|                                                           |--- PathRadiance_coop
- * shader_data ------------------------------------------|                                                           |--- ShaderData
- * ray_state --------------------------------------------|                                                           |--- ray_state
- * Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) -------|                                                           |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
- * Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---|                                                           |--- AOAlpha_coop
- * kg (globals + data) ----------------------------------|                                                           |--- AOBSDF_coop
- * parallel_samples -------------------------------------|                                                           |--- AOLightRay_coop
- * per_sample_output_buffers ----------------------------|                                                           |
- * sw ---------------------------------------------------|                                                           |
- * sh ---------------------------------------------------|                                                           |
- * sx ---------------------------------------------------|                                                           |
- * sy ---------------------------------------------------|                                                           |
- * stride -----------------------------------------------|                                                           |
- * work_array -------------------------------------------|                                                           |
- * queuesize --------------------------------------------|                                                           |
- * start_sample -----------------------------------------|                                                           |
+ * Note on Queues:
+ * This kernel fetches rays from the queue QUEUE_ACTIVE_AND_REGENERATED_RAYS
+ * and processes only the rays of state RAY_ACTIVE.
+ * There are different points in this kernel where a ray may terminate and
+ * reach RAY_UPDATE_BUFFER state. These rays are enqueued into
+ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. These rays will still be present
+ * in QUEUE_ACTIVE_AND_REGENERATED_RAYS queue, but since their ray-state has
+ * been changed to RAY_UPDATE_BUFFER, there is no problem.
  *
- * Note on Queues :
- * This kernel fetches rays from the queue QUEUE_ACTIVE_AND_REGENERATED_RAYS and processes only
- * the rays of state RAY_ACTIVE.
- * There are different points in this kernel where a ray may terminate and reach RAY_UPDATE_BUFFER
- * state. These rays are enqueued into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. These rays will
- * still be present in QUEUE_ACTIVE_AND_REGENERATED_RAYS queue, but since their ray-state has been
- * changed to RAY_UPDATE_BUFFER, there is no problem.
- *
- * State of queues when this kernel is called :
+ * State of queues when this kernel is called:
  * At entry,
- * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays
- * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE rays.
- * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be empty.
+ *   - QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and
+ *     RAY_REGENERATED rays
+ *   - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
+ *     RAY_TO_REGENERATE rays.
+ *   - QUEUE_SHADOW_RAY_CAST_AO_RAYS will be empty.
  * At exit,
- * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE, RAY_REGENERATED and RAY_UPDATE_BUFFER rays
- * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays
- * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with flag RAY_SHADOW_RAY_CAST_AO
+ *   - QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE,
+ *     RAY_REGENERATED and RAY_UPDATE_BUFFER rays.
+ *   - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
+ *     RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays.
+ *   - QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with
+ *     flag RAY_SHADOW_RAY_CAST_AO
  */
 
-__kernel void kernel_holdout_emission_blurring_pathtermination_ao(
-       ccl_global char *globals,
-       ccl_constant KernelData *data,
-       ccl_global char *shader_data,               /* Required throughout the kernel except probabilistic path termination and AO */
-       ccl_global float *per_sample_output_buffers,
-       ccl_global uint *rng_coop,                  /* Required for "kernel_write_data_passes" and AO */
-       ccl_global float3 *throughput_coop,         /* Required for handling holdout material and AO */
-       ccl_global float *L_transparent_coop,       /* Required for handling holdout material */
-       PathRadiance *PathRadiance_coop, /* Required for "kernel_write_data_passes" and indirect primitive emission */
-       ccl_global PathState *PathState_coop,       /* Required throughout the kernel and AO */
-       Intersection *Intersection_coop, /* Required for indirect primitive emission */
-       ccl_global float3 *AOAlpha_coop,            /* Required for AO */
-       ccl_global float3 *AOBSDF_coop,             /* Required for AO */
-       ccl_global Ray *AOLightRay_coop,            /* Required for AO */
-       int sw, int sh, int sx, int sy, int stride,
-       ccl_global char *ray_state,                /* Denotes the state of each ray */
-       ccl_global unsigned int *work_array,       /* Denotes the work that each ray belongs to */
-       ccl_global int *Queue_data,                /* Queue memory */
-       ccl_global int *Queue_index,               /* Tracks the number of elements in each queue */
-       int queuesize,                             /* Size (capacity) of each queue */
-#ifdef __WORK_STEALING__
-       unsigned int start_sample,
-#endif
-       int parallel_samples                       /* Number of samples to be processed in parallel */
-       )
+ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
+    KernelGlobals *kg, ccl_local_param BackgroundAOLocals *locals)
 {
-       ccl_local unsigned int local_queue_atomics_bg;
-       ccl_local unsigned int local_queue_atomics_ao;
-       if(get_local_id(0) == 0 && get_local_id(1) == 0) {
-               local_queue_atomics_bg = 0;
-               local_queue_atomics_ao = 0;
-       }
-       barrier(CLK_LOCAL_MEM_FENCE);
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    locals->queue_atomics_bg = 0;
+    locals->queue_atomics_ao = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
-       char enqueue_flag = 0;
-       char enqueue_flag_AO_SHADOW_RAY_CAST = 0;
-       int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
-       ray_index = get_ray_index(ray_index, QUEUE_ACTIVE_AND_REGENERATED_RAYS, Queue_data, queuesize, 0);
+#ifdef __AO__
+  char enqueue_flag = 0;
+#endif
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  ray_index = get_ray_index(kg,
+                            ray_index,
+                            QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            0);
 
 #ifdef __COMPUTE_DEVICE_GPU__
-       /* If we are executing on a GPU device, we exit all threads that are not required
-        * If we are executing on a CPU device, then we need to keep all threads active
-        * since we have barrier() calls later in the kernel. CPU devices
-        * expect all threads to execute barrier statement.
-        */
-       if(ray_index == QUEUE_EMPTY_SLOT)
-               return;
-#endif
+  /* If we are executing on a GPU device, we exit all threads that are not
+   * required.
+   *
+   * If we are executing on a CPU device, then we need to keep all threads
+   * active since we have barrier() calls later in the kernel. CPU devices,
+   * expect all threads to execute barrier statement.
+   */
+  if (ray_index == QUEUE_EMPTY_SLOT) {
+    return;
+  }
+#endif /* __COMPUTE_DEVICE_GPU__ */
 
 #ifndef __COMPUTE_DEVICE_GPU__
-       if(ray_index != QUEUE_EMPTY_SLOT) {
-#endif
-               /* Load kernel globals structure and ShaderData structure */
-               KernelGlobals *kg = (KernelGlobals *)globals;
-               ShaderData *sd = (ShaderData *)shader_data;
-
-#ifdef __WORK_STEALING__
-               unsigned int my_work;
-               unsigned int pixel_x;
-               unsigned int pixel_y;
-#endif
-               unsigned int tile_x;
-               unsigned int tile_y;
-               int my_sample_tile;
-               unsigned int sample;
-
-               ccl_global RNG *rng = 0x0;
-               ccl_global PathState *state = 0x0;
-               float3 throughput;
-
-               if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-
-                       throughput = throughput_coop[ray_index];
-                       state = &PathState_coop[ray_index];
-                       rng = &rng_coop[ray_index];
-#ifdef __WORK_STEALING__
-                       my_work = work_array[ray_index];
-                       sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
-                       get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index);
-                       my_sample_tile = 0;
-#else // __WORK_STEALING__
-                       sample = work_array[ray_index];
-                       /* buffer's stride is "stride"; Find x and y using ray_index */
-                       int tile_index = ray_index / parallel_samples;
-                       tile_x = tile_index % sw;
-                       tile_y = tile_index / sw;
-                       my_sample_tile = ray_index - (tile_index * parallel_samples);
-#endif // __WORK_STEALING__
-                       per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
-
-                       /* holdout */
-#ifdef __HOLDOUT__
-                       if((ccl_fetch(sd, flag) & (SD_HOLDOUT|SD_HOLDOUT_MASK)) && (state->flag & PATH_RAY_CAMERA)) {
-                               if(kernel_data.background.transparent) {
-                                       float3 holdout_weight;
-
-                                       if(ccl_fetch(sd, flag) & SD_HOLDOUT_MASK)
-                                               holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
-                                       else
-                                               holdout_weight = shader_holdout_eval(kg, sd);
-
-                                       /* any throughput is ok, should all be identical here */
-                                       L_transparent_coop[ray_index] += average(holdout_weight*throughput);
-                               }
-
-                               if(ccl_fetch(sd, flag) & SD_HOLDOUT_MASK) {
-                                       ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-                                       enqueue_flag = 1;
-                               }
-                       }
-#endif
-               }
-
-               if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-
-                       PathRadiance *L = &PathRadiance_coop[ray_index];
-                       /* holdout mask objects do not write data passes */
-                       kernel_write_data_passes(kg, per_sample_output_buffers, L, sd, sample, state, throughput);
-
-                       /* blurring of bsdf after bounces, for rays that have a small likelihood
-                               * of following this particular path (diffuse, rough glossy) */
-                       if(kernel_data.integrator.filter_glossy != FLT_MAX) {
-                               float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
-
-                               if(blur_pdf < 1.0f) {
-                                       float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
-                                       shader_bsdf_blur(kg, sd, blur_roughness);
-                               }
-                       }
-
-#ifdef __EMISSION__
-                       /* emission */
-                       if(ccl_fetch(sd, flag) & SD_EMISSION) {
-                               /* todo: is isect.t wrong here for transparent surfaces? */
-                               float3 emission = indirect_primitive_emission(kg, sd, Intersection_coop[ray_index].t, state->flag, state->ray_pdf);
-                               path_radiance_accum_emission(L, throughput, emission, state->bounce);
-                       }
+  if (ray_index != QUEUE_EMPTY_SLOT) {
 #endif
 
-                       /* path termination. this is a strange place to put the termination, it's
-                        * mainly due to the mixed in MIS that we use. gives too many unneeded
-                        * shader evaluations, only need emission if we are going to terminate */
-                       float probability = path_state_terminate_probability(kg, state, throughput);
-
-                       if(probability == 0.0f) {
-                               ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-                               enqueue_flag = 1;
-                       }
-
-                       if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-                               if(probability != 1.0f) {
-                                       float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
-
-                                       if(terminate >= probability) {
-                                               ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-                                               enqueue_flag = 1;
-                                       } else {
-                                               throughput_coop[ray_index] = throughput/probability;
-                                       }
-                               }
-                       }
-               }
+    ccl_global PathState *state = 0x0;
+    float3 throughput;
+
+    ccl_global char *ray_state = kernel_split_state.ray_state;
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
+
+    if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+      uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+      ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+
+      ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+      ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+      PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
+      throughput = kernel_split_state.throughput[ray_index];
+      state = &kernel_split_state.path_state[ray_index];
+
+      if (!kernel_path_shader_apply(kg, sd, state, ray, throughput, emission_sd, L, buffer)) {
+        kernel_split_path_end(kg, ray_index);
+      }
+    }
+
+    if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+      /* Path termination. this is a strange place to put the termination, it's
+     * mainly due to the mixed in MIS that we use. gives too many unneeded
+     * shader evaluations, only need emission if we are going to terminate.
+     */
+      float probability = path_state_continuation_probability(kg, state, throughput);
+
+      if (probability == 0.0f) {
+        kernel_split_path_end(kg, ray_index);
+      }
+      else if (probability < 1.0f) {
+        float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+        if (terminate >= probability) {
+          kernel_split_path_end(kg, ray_index);
+        }
+        else {
+          kernel_split_state.throughput[ray_index] = throughput / probability;
+        }
+      }
+
+      if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+        PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+        kernel_update_denoising_features(kg, sd, state, L);
+      }
+    }
 
 #ifdef __AO__
-               if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-                       /* ambient occlusion */
-                       if(kernel_data.integrator.use_ambient_occlusion || (ccl_fetch(sd, flag) & SD_AO)) {
-                               /* todo: solve correlation */
-                               float bsdf_u, bsdf_v;
-                               path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-
-                               float ao_factor = kernel_data.background.ao_factor;
-                               float3 ao_N;
-                               AOBSDF_coop[ray_index] = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
-                               AOAlpha_coop[ray_index] = shader_bsdf_alpha(kg, sd);
-
-                               float3 ao_D;
-                               float ao_pdf;
-                               sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+    if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+      /* ambient occlusion */
+      if (kernel_data.integrator.use_ambient_occlusion) {
+        enqueue_flag = 1;
+      }
+    }
+#endif /* __AO__ */
 
-                               if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
-                                       Ray _ray;
-                                       _ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
-                                       _ray.D = ao_D;
-                                       _ray.t = kernel_data.background.ao_distance;
-#ifdef __OBJECT_MOTION__
-                                       _ray.time = ccl_fetch(sd, time);
-#endif
-                                       _ray.dP = ccl_fetch(sd, dP);
-                                       _ray.dD = differential3_zero();
-                                       AOLightRay_coop[ray_index] = _ray;
-
-                                       ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
-                                       enqueue_flag_AO_SHADOW_RAY_CAST = 1;
-                               }
-                       }
-               }
-#endif
 #ifndef __COMPUTE_DEVICE_GPU__
-       }
+  }
 #endif
 
-       /* Enqueue RAY_UPDATE_BUFFER rays */
-       enqueue_ray_index_local(ray_index, QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, enqueue_flag, queuesize, &local_queue_atomics_bg, Queue_data, Queue_index);
 #ifdef __AO__
-       /* Enqueue to-shadow-ray-cast rays */
-       enqueue_ray_index_local(ray_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS, enqueue_flag_AO_SHADOW_RAY_CAST, queuesize, &local_queue_atomics_ao, Queue_data, Queue_index);
+  /* Enqueue to-shadow-ray-cast rays. */
+  enqueue_ray_index_local(ray_index,
+                          QUEUE_SHADOW_RAY_CAST_AO_RAYS,
+                          enqueue_flag,
+                          kernel_split_params.queue_size,
+                          &locals->queue_atomics_ao,
+                          kernel_split_state.queue_data,
+                          kernel_split_params.queue_index);
 #endif
 }
+
+CCL_NAMESPACE_END