ClangFormat: apply to source, most of intern
[blender.git] / intern / cycles / kernel / split / kernel_subsurface_scatter.h
index 4eaa7f5..ba06ae3 100644 (file)
 
 CCL_NAMESPACE_BEGIN
 
+#if defined(__BRANCHED_PATH__) && defined(__SUBSURFACE__)
 
-ccl_device void kernel_subsurface_scatter(KernelGlobals *kg,
-                                          ccl_local_param unsigned int* local_queue_atomics)
+ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg,
+                                                                                 int ray_index)
 {
+  kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+
+  SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+  branched_state->ss_next_closure = 0;
+  branched_state->ss_next_sample = 0;
+
+  branched_state->num_hits = 0;
+  branched_state->next_hit = 0;
+
+  ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT);
+}
+
+ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter(
+    KernelGlobals *kg, int ray_index)
+{
+  SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+  ShaderData *sd = kernel_split_sd(branched_state_sd, ray_index);
+  PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+  ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+
+  for (int i = branched_state->ss_next_closure; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
+
+    if (!CLOSURE_IS_BSSRDF(sc->type))
+      continue;
+
+    /* Closure memory will be overwritten, so read required variables now. */
+    Bssrdf *bssrdf = (Bssrdf *)sc;
+    ClosureType bssrdf_type = sc->type;
+    float bssrdf_roughness = bssrdf->roughness;
+
+    /* set up random number generator */
+    if (branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 &&
+        branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+      branched_state->lcg_state = lcg_state_init_addrspace(&branched_state->path_state,
+                                                           0x68bc21eb);
+    }
+    int num_samples = kernel_data.integrator.subsurface_samples * 3;
+    float num_samples_inv = 1.0f / num_samples;
+    uint bssrdf_rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
+
+    /* do subsurface scatter step with copy of shader data, this will
+     * replace the BSSRDF with a diffuse BSDF closure */
+    for (int j = branched_state->ss_next_sample; j < num_samples; j++) {
+      ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index];
+      *hit_state = branched_state->path_state;
+      hit_state->rng_hash = bssrdf_rng_hash;
+      path_state_branch(hit_state, j, num_samples);
+
+      ccl_global LocalIntersection *ss_isect = &branched_state->ss_isect;
+      float bssrdf_u, bssrdf_v;
+      path_branched_rng_2D(
+          kg, bssrdf_rng_hash, hit_state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+
+      /* intersection is expensive so avoid doing multiple times for the same input */
+      if (branched_state->next_hit == 0 && branched_state->next_closure == 0 &&
+          branched_state->next_sample == 0) {
+        uint lcg_state = branched_state->lcg_state;
+        LocalIntersection ss_isect_private;
+
+        branched_state->num_hits = subsurface_scatter_multi_intersect(
+            kg, &ss_isect_private, sd, hit_state, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
+
+        branched_state->lcg_state = lcg_state;
+        *ss_isect = ss_isect_private;
+      }
+
+      hit_state->rng_offset += PRNG_BOUNCE_NUM;
+
+#  ifdef __VOLUME__
+      Ray volume_ray = branched_state->ray;
+      bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
+                                      sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
+#  endif /* __VOLUME__ */
+
+      /* compute lighting with the BSDF closure */
+      for (int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) {
+        ShaderData *bssrdf_sd = kernel_split_sd(sd, ray_index);
+        *bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is
+                           * important as the indirect path will write into bssrdf_sd */
+
+        LocalIntersection ss_isect_private = *ss_isect;
+        subsurface_scatter_multi_setup(
+            kg, &ss_isect_private, hit, bssrdf_sd, hit_state, bssrdf_type, bssrdf_roughness);
+        *ss_isect = ss_isect_private;
+
+#  ifdef __VOLUME__
+        if (need_update_volume_stack) {
+          /* Setup ray from previous surface point to the new one. */
+          float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng);
+          volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
+
+          for (int k = 0; k < VOLUME_STACK_SIZE; k++) {
+            hit_state->volume_stack[k] = branched_state->path_state.volume_stack[k];
+          }
+
+          kernel_volume_stack_update_for_subsurface(
+              kg, emission_sd, &volume_ray, hit_state->volume_stack);
+        }
+#  endif /* __VOLUME__ */
+
+#  ifdef __EMISSION__
+        if (branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+          /* direct light */
+          if (kernel_data.integrator.use_direct_light) {
+            int all = (kernel_data.integrator.sample_all_lights_direct) ||
+                      (hit_state->flag & PATH_RAY_SHADOW_CATCHER);
+            kernel_branched_path_surface_connect_light(kg,
+                                                       bssrdf_sd,
+                                                       emission_sd,
+                                                       hit_state,
+                                                       branched_state->throughput,
+                                                       num_samples_inv,
+                                                       L,
+                                                       all);
+          }
+        }
+#  endif /* __EMISSION__ */
+
+        /* indirect light */
+        if (kernel_split_branched_path_surface_indirect_light_iter(
+                kg, ray_index, num_samples_inv, bssrdf_sd, false, false)) {
+          branched_state->ss_next_closure = i;
+          branched_state->ss_next_sample = j;
+          branched_state->next_hit = hit;
+
+          return true;
+        }
+
+        branched_state->next_closure = 0;
+      }
+
+      branched_state->next_hit = 0;
+    }
+
+    branched_state->ss_next_sample = 0;
+  }
+
+  branched_state->ss_next_closure = sd->num_closure;
+
+  branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+  if (branched_state->waiting_on_shared_samples) {
+    return true;
+  }
+
+  kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+
+  return false;
+}
+
+#endif /* __BRANCHED_PATH__ && __SUBSURFACE__ */
+
+ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
+{
+  int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (thread_index == 0) {
+    /* We will empty both queues in this kernel. */
+    kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+    kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+  }
+
+  int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  ray_index = get_ray_index(kg,
+                            ray_index,
+                            QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            1);
+  get_ray_index(kg,
+                thread_index,
+                QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+                kernel_split_state.queue_data,
+                kernel_split_params.queue_size,
+                1);
+
 #ifdef __SUBSURFACE__
-       if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
-               *local_queue_atomics = 0;
-       }
-       ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
-       int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-       ray_index = get_ray_index(kg, ray_index,
-                                 QUEUE_ACTIVE_AND_REGENERATED_RAYS,
-                                 kernel_split_state.queue_data,
-                                 kernel_split_params.queue_size,
-                                 0);
-
-#ifdef __COMPUTE_DEVICE_GPU__
-       /* If we are executing on a GPU device, we exit all threads that are not
-        * required.
-        *
-        * If we are executing on a CPU device, then we need to keep all threads
-        * active since we have barrier() calls later in the kernel. CPU devices,
-        * expect all threads to execute barrier statement.
-        */
-       if(ray_index == QUEUE_EMPTY_SLOT) {
-               return;
-       }
-#endif
-
-       char enqueue_flag = 0;
-
-#ifndef __COMPUTE_DEVICE_GPU__
-       if(ray_index != QUEUE_EMPTY_SLOT) {
-#endif
-
-       ccl_global char *ray_state = kernel_split_state.ray_state;
-       ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-       PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-       ccl_global RNG *rng = &kernel_split_state.rng[ray_index];
-       ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
-       ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
-       ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
-       ShaderData *sd = &kernel_split_state.sd[ray_index];
-       ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
-
-       if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
-               if(sd->flag & SD_BSSRDF) {
-                       if(kernel_path_subsurface_scatter(kg,
-                                                         sd,
-                                                         emission_sd,
-                                                         L,
-                                                         state,
-                                                         rng,
-                                                         ray,
-                                                         throughput,
-                                                         ss_indirect)) {
-                               ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-                               enqueue_flag = 1;
-                       }
-               }
-       }
-
-#ifndef __COMPUTE_DEVICE_GPU__
-       }
-#endif
-
-       /* Enqueue RAY_UPDATE_BUFFER rays. */
-       enqueue_ray_index_local(ray_index,
-                               QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
-                               enqueue_flag,
-                               kernel_split_params.queue_size,
-                               local_queue_atomics,
-                               kernel_split_state.queue_data,
-                               kernel_split_params.queue_index);
-
-#endif  /* __SUBSURFACE__ */
+  ccl_global char *ray_state = kernel_split_state.ray_state;
+
+  if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+    ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+    PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+    ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+    ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+    ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+    ShaderData *sd = kernel_split_sd(sd, ray_index);
+    ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+
+    if (sd->flag & SD_BSSRDF) {
+
+#  ifdef __BRANCHED_PATH__
+      if (!kernel_data.integrator.branched ||
+          IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+#  endif
+        if (kernel_path_subsurface_scatter(
+                kg, sd, emission_sd, L, state, ray, throughput, ss_indirect)) {
+          kernel_split_path_end(kg, ray_index);
+        }
+#  ifdef __BRANCHED_PATH__
+      }
+      else {
+        kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index);
+
+        if (kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+          ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+        }
+      }
+#  endif
+    }
+  }
+
+#  ifdef __BRANCHED_PATH__
+  if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+    kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0;
+  }
+
+  /* iter loop */
+  ray_index = get_ray_index(kg,
+                            ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+                            QUEUE_SUBSURFACE_INDIRECT_ITER,
+                            kernel_split_state.queue_data,
+                            kernel_split_params.queue_size,
+                            1);
+
+  if (IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) {
+    /* for render passes, sum and reset indirect light pass variables
+     * for the next samples */
+    path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+    path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+    if (kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+      ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+    }
+  }
+#  endif /* __BRANCHED_PATH__ */
 
+#endif /* __SUBSURFACE__ */
 }
 
 CCL_NAMESPACE_END