ClangFormat: apply to source, most of intern
[blender.git] / intern / cycles / kernel / split / kernel_holdout_emission_blurring_pathtermination_ao.h
index 8a7c4e11dcfeaa254a16992a436d0ac838d4354c..63bc5a8e0ce51142e3ff1eb560d2a9088408240c 100644 (file)
  * limitations under the License.
  */
 
  * limitations under the License.
  */
 
-#include "kernel_split_common.h"
+CCL_NAMESPACE_BEGIN
 
 
-/* Note on kernel_holdout_emission_blurring_pathtermination_ao kernel.
- * This is the sixth kernel in the ray tracing logic. This is the fifth
- * of the path iteration kernels. This kernel takes care of the logic to process
- * "material of type holdout", indirect primitive emission, bsdf blurring,
- * probabilistic path termination and AO.
+/* This kernel takes care of the logic to process "material of type holdout",
+ * indirect primitive emission, bsdf blurring, probabilistic path termination
+ * and AO.
  *
  *
- * This kernels determines the rays for which a shadow_blocked() function associated with AO should be executed.
- * Those rays for which a shadow_blocked() function for AO must be executed are marked with flag RAY_SHADOW_RAY_CAST_ao and
- * enqueued into the queue QUEUE_SHADOW_RAY_CAST_AO_RAYS
+ * This kernels determines the rays for which a shadow_blocked() function
+ * associated with AO should be executed. Those rays for which a
+ * shadow_blocked() function for AO must be executed are marked with flag
+ * RAY_SHADOW_RAY_CAST_ao and enqueued into the queue
+ * QUEUE_SHADOW_RAY_CAST_AO_RAYS
  *
  * Ray state of rays that are terminated in this kernel are changed to RAY_UPDATE_BUFFER
  *
  *
  * Ray state of rays that are terminated in this kernel are changed to RAY_UPDATE_BUFFER
  *
- * The input and output are as follows,
+ * Note on Queues:
+ * This kernel fetches rays from the queue QUEUE_ACTIVE_AND_REGENERATED_RAYS
+ * and processes only the rays of state RAY_ACTIVE.
+ * There are different points in this kernel where a ray may terminate and
+ * reach RAY_UPDATE_BUFFER state. These rays are enqueued into
+ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. These rays will still be present
+ * in QUEUE_ACTIVE_AND_REGENERATED_RAYS queue, but since their ray-state has
+ * been changed to RAY_UPDATE_BUFFER, there is no problem.
  *
  *
- * rng_coop ---------------------------------------------|--- kernel_holdout_emission_blurring_pathtermination_ao ---|--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
- * throughput_coop --------------------------------------|                                                           |--- PathState_coop
- * PathRadiance_coop ------------------------------------|                                                           |--- throughput_coop
- * Intersection_coop ------------------------------------|                                                           |--- L_transparent_coop
- * PathState_coop ---------------------------------------|                                                           |--- per_sample_output_buffers
- * L_transparent_coop -----------------------------------|                                                           |--- PathRadiance_coop
- * shader_data ------------------------------------------|                                                           |--- ShaderData
- * ray_state --------------------------------------------|                                                           |--- ray_state
- * Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) -------|                                                           |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
- * Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---|                                                           |--- AOAlpha_coop
- * kg (globals + data) ----------------------------------|                                                           |--- AOBSDF_coop
- * parallel_samples -------------------------------------|                                                           |--- AOLightRay_coop
- * per_sample_output_buffers ----------------------------|                                                           |
- * sw ---------------------------------------------------|                                                           |
- * sh ---------------------------------------------------|                                                           |
- * sx ---------------------------------------------------|                                                           |
- * sy ---------------------------------------------------|                                                           |
- * stride -----------------------------------------------|                                                           |
- * work_array -------------------------------------------|                                                           |
- * queuesize --------------------------------------------|                                                           |
- * start_sample -----------------------------------------|                                                           |
- *
- * Note on Queues :
- * This kernel fetches rays from the queue QUEUE_ACTIVE_AND_REGENERATED_RAYS and processes only
- * the rays of state RAY_ACTIVE.
- * There are different points in this kernel where a ray may terminate and reach RAY_UPDATE_BUFFER
- * state. These rays are enqueued into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. These rays will
- * still be present in QUEUE_ACTIVE_AND_REGENERATED_RAYS queue, but since their ray-state has been
- * changed to RAY_UPDATE_BUFFER, there is no problem.
- *
- * State of queues when this kernel is called :
+ * State of queues when this kernel is called:
  * At entry,
  * At entry,
- * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays
- * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE rays.
- * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be empty.
+ *   - QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and
+ *     RAY_REGENERATED rays
+ *   - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
+ *     RAY_TO_REGENERATE rays.
+ *   - QUEUE_SHADOW_RAY_CAST_AO_RAYS will be empty.
  * At exit,
  * At exit,
- * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE, RAY_REGENERATED and RAY_UPDATE_BUFFER rays
- * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays
- * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with flag RAY_SHADOW_RAY_CAST_AO
+ *   - QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE,
+ *     RAY_REGENERATED and RAY_UPDATE_BUFFER rays.
+ *   - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
+ *     RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays.
+ *   - QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with
+ *     flag RAY_SHADOW_RAY_CAST_AO
  */
  */
+
 ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
-        ccl_global char *globals,
-        ccl_constant KernelData *data,
-        ccl_global char *shader_data,          /* Required throughout the kernel except probabilistic path termination and AO */
-        ccl_global float *per_sample_output_buffers,
-        ccl_global uint *rng_coop,             /* Required for "kernel_write_data_passes" and AO */
-        ccl_global float3 *throughput_coop,    /* Required for handling holdout material and AO */
-        ccl_global float *L_transparent_coop,  /* Required for handling holdout material */
-        PathRadiance *PathRadiance_coop,       /* Required for "kernel_write_data_passes" and indirect primitive emission */
-        ccl_global PathState *PathState_coop,  /* Required throughout the kernel and AO */
-        Intersection *Intersection_coop,       /* Required for indirect primitive emission */
-        ccl_global float3 *AOAlpha_coop,       /* Required for AO */
-        ccl_global float3 *AOBSDF_coop,        /* Required for AO */
-        ccl_global Ray *AOLightRay_coop,       /* Required for AO */
-        int sw, int sh, int sx, int sy, int stride,
-        ccl_global char *ray_state,            /* Denotes the state of each ray */
-        ccl_global unsigned int *work_array,   /* Denotes the work that each ray belongs to */
-#ifdef __WORK_STEALING__
-        unsigned int start_sample,
-#endif
-        int parallel_samples,                  /* Number of samples to be processed in parallel */
-        int ray_index,
-        char *enqueue_flag,
-        char *enqueue_flag_AO_SHADOW_RAY_CAST)
+    KernelGlobals *kg, ccl_local_param BackgroundAOLocals *locals)
 {
 {
-       /* Load kernel globals structure and ShaderData structure */
-       KernelGlobals *kg = (KernelGlobals *)globals;
-       ShaderData *sd = (ShaderData *)shader_data;
-
-#ifdef __WORK_STEALING__
-       unsigned int my_work;
-       unsigned int pixel_x;
-       unsigned int pixel_y;
-#endif
-       unsigned int tile_x;
-       unsigned int tile_y;
-       int my_sample_tile;
-       unsigned int sample;
-
-       ccl_global RNG *rng = 0x0;
-       ccl_global PathState *state = 0x0;
-       float3 throughput;
-
-       if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-
-               throughput = throughput_coop[ray_index];
-               state = &PathState_coop[ray_index];
-               rng = &rng_coop[ray_index];
-#ifdef __WORK_STEALING__
-               my_work = work_array[ray_index];
-               sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
-               get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index);
-               my_sample_tile = 0;
-#else // __WORK_STEALING__
-               sample = work_array[ray_index];
-               /* buffer's stride is "stride"; Find x and y using ray_index */
-               int tile_index = ray_index / parallel_samples;
-               tile_x = tile_index % sw;
-               tile_y = tile_index / sw;
-               my_sample_tile = ray_index - (tile_index * parallel_samples);
-#endif // __WORK_STEALING__
-               per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
-
-               /* holdout */
-#ifdef __HOLDOUT__
-               if((ccl_fetch(sd, flag) & (SD_HOLDOUT|SD_HOLDOUT_MASK)) && (state->flag & PATH_RAY_CAMERA)) {
-                       if(kernel_data.background.transparent) {
-                               float3 holdout_weight;
-
-                               if(ccl_fetch(sd, flag) & SD_HOLDOUT_MASK)
-                                       holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
-                               else
-                                       holdout_weight = shader_holdout_eval(kg, sd);
-
-                               /* any throughput is ok, should all be identical here */
-                               L_transparent_coop[ray_index] += average(holdout_weight*throughput);
-                       }
-
-                       if(ccl_fetch(sd, flag) & SD_HOLDOUT_MASK) {
-                               ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-                               *enqueue_flag = 1;
-                       }
-               }
+  if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+    locals->queue_atomics_bg = 0;
+    locals->queue_atomics_ao = 0;
+  }
+  ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+#ifdef __AO__
+  char enqueue_flag = 0;
 #endif
 #endif
-       }
-
-       if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-
-               PathRadiance *L = &PathRadiance_coop[ray_index];
-               /* holdout mask objects do not write data passes */
-               kernel_write_data_passes(kg, per_sample_output_buffers, L, sd, sample, state, throughput);
-
-               /* blurring of bsdf after bounces, for rays that have a small likelihood
-                * of following this particular path (diffuse, rough glossy) */
-               if(kernel_data.integrator.filter_glossy != FLT_MAX) {
-                       float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
-
-                       if(blur_pdf < 1.0f) {
-                               float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
-                               shader_bsdf_blur(kg, sd, blur_roughness);
-                       }
-               }
-
-#ifdef __EMISSION__
-               /* emission */
-               if(ccl_fetch(sd, flag) & SD_EMISSION) {
-                       /* todo: is isect.t wrong here for transparent surfaces? */
-                       float3 emission = indirect_primitive_emission(kg, sd, Intersection_coop[ray_index].t, state->flag, state->ray_pdf);
-                       path_radiance_accum_emission(L, throughput, emission, state->bounce);
-               }
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  ray_index = get_ray_index(kg,
+                            ray_index,
+                            QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            0);
+
+#ifdef __COMPUTE_DEVICE_GPU__
+  /* If we are executing on a GPU device, we exit all threads that are not
+   * required.
+   *
+   * If we are executing on a CPU device, then we need to keep all threads
+   * active since we have barrier() calls later in the kernel. CPU devices,
+   * expect all threads to execute barrier statement.
+   */
+  if (ray_index == QUEUE_EMPTY_SLOT) {
+    return;
+  }
+#endif /* __COMPUTE_DEVICE_GPU__ */
+
+#ifndef __COMPUTE_DEVICE_GPU__
+  if (ray_index != QUEUE_EMPTY_SLOT) {
 #endif
 
 #endif
 
-               /* path termination. this is a strange place to put the termination, it's
-                * mainly due to the mixed in MIS that we use. gives too many unneeded
-                * shader evaluations, only need emission if we are going to terminate */
-               float probability = path_state_terminate_probability(kg, state, throughput);
-
-               if(probability == 0.0f) {
-                       ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-                       *enqueue_flag = 1;
-               }
-
-               if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-                       if(probability != 1.0f) {
-                               float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
-
-                               if(terminate >= probability) {
-                                       ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-                                       *enqueue_flag = 1;
-                               } else {
-                                       throughput_coop[ray_index] = throughput/probability;
-                               }
-                       }
-               }
-       }
+    ccl_global PathState *state = 0x0;
+    float3 throughput;
+
+    ccl_global char *ray_state = kernel_split_state.ray_state;
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
+
+    if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+      uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+      ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+
+      ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+      ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+      PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
+      throughput = kernel_split_state.throughput[ray_index];
+      state = &kernel_split_state.path_state[ray_index];
+
+      if (!kernel_path_shader_apply(kg, sd, state, ray, throughput, emission_sd, L, buffer)) {
+        kernel_split_path_end(kg, ray_index);
+      }
+    }
+
+    if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+      /* Path termination. this is a strange place to put the termination, it's
+     * mainly due to the mixed in MIS that we use. gives too many unneeded
+     * shader evaluations, only need emission if we are going to terminate.
+     */
+      float probability = path_state_continuation_probability(kg, state, throughput);
+
+      if (probability == 0.0f) {
+        kernel_split_path_end(kg, ray_index);
+      }
+      else if (probability < 1.0f) {
+        float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+        if (terminate >= probability) {
+          kernel_split_path_end(kg, ray_index);
+        }
+        else {
+          kernel_split_state.throughput[ray_index] = throughput / probability;
+        }
+      }
+
+      if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+        PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+        kernel_update_denoising_features(kg, sd, state, L);
+      }
+    }
 
 #ifdef __AO__
 
 #ifdef __AO__
-       if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-               /* ambient occlusion */
-               if(kernel_data.integrator.use_ambient_occlusion || (ccl_fetch(sd, flag) & SD_AO)) {
-                       /* todo: solve correlation */
-                       float bsdf_u, bsdf_v;
-                       path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-
-                       float ao_factor = kernel_data.background.ao_factor;
-                       float3 ao_N;
-                       AOBSDF_coop[ray_index] = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
-                       AOAlpha_coop[ray_index] = shader_bsdf_alpha(kg, sd);
-
-                       float3 ao_D;
-                       float ao_pdf;
-                       sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
-
-                       if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
-                               Ray _ray;
-                               _ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
-                               _ray.D = ao_D;
-                               _ray.t = kernel_data.background.ao_distance;
-#ifdef __OBJECT_MOTION__
-                               _ray.time = ccl_fetch(sd, time);
+    if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+      /* ambient occlusion */
+      if (kernel_data.integrator.use_ambient_occlusion) {
+        enqueue_flag = 1;
+      }
+    }
+#endif /* __AO__ */
+
+#ifndef __COMPUTE_DEVICE_GPU__
+  }
 #endif
 #endif
-                               _ray.dP = ccl_fetch(sd, dP);
-                               _ray.dD = differential3_zero();
-                               AOLightRay_coop[ray_index] = _ray;
-
-                               ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
-                               *enqueue_flag_AO_SHADOW_RAY_CAST = 1;
-                       }
-               }
-       }
+
+#ifdef __AO__
+  /* Enqueue to-shadow-ray-cast rays. */
+  enqueue_ray_index_local(ray_index,
+                          QUEUE_SHADOW_RAY_CAST_AO_RAYS,
+                          enqueue_flag,
+                          kernel_split_params.queue_size,
+                          &locals->queue_atomics_ao,
+                          kernel_split_state.queue_data,
+                          kernel_split_params.queue_index);
 #endif
 }
 #endif
 }
+
+CCL_NAMESPACE_END