ClangFormat: apply to source, most of intern
[blender.git] / intern / cycles / device / device_split_kernel.cpp
index ee566e5..42e597a 100644 (file)
@@ -27,299 +27,304 @@ CCL_NAMESPACE_BEGIN
 static const double alpha = 0.1; /* alpha for rolling average */
 
 DeviceSplitKernel::DeviceSplitKernel(Device *device)
-: device(device),
-  split_data(device, "split_data"),
-  ray_state(device, "ray_state", MEM_READ_WRITE),
-  queue_index(device, "queue_index"),
-  use_queues_flag(device, "use_queues_flag"),
-  work_pool_wgs(device, "work_pool_wgs"),
-  kernel_data_initialized(false)
+    : device(device),
+      split_data(device, "split_data"),
+      ray_state(device, "ray_state", MEM_READ_WRITE),
+      queue_index(device, "queue_index"),
+      use_queues_flag(device, "use_queues_flag"),
+      work_pool_wgs(device, "work_pool_wgs"),
+      kernel_data_initialized(false)
 {
-       avg_time_per_sample = 0.0;
-
-       kernel_path_init = NULL;
-       kernel_scene_intersect = NULL;
-       kernel_lamp_emission = NULL;
-       kernel_do_volume = NULL;
-       kernel_queue_enqueue = NULL;
-       kernel_indirect_background = NULL;
-       kernel_shader_setup = NULL;
-       kernel_shader_sort = NULL;
-       kernel_shader_eval = NULL;
-       kernel_holdout_emission_blurring_pathtermination_ao = NULL;
-       kernel_subsurface_scatter = NULL;
-       kernel_direct_lighting = NULL;
-       kernel_shadow_blocked_ao = NULL;
-       kernel_shadow_blocked_dl = NULL;
-       kernel_enqueue_inactive = NULL;
-       kernel_next_iteration_setup = NULL;
-       kernel_indirect_subsurface = NULL;
-       kernel_buffer_update = NULL;
+  avg_time_per_sample = 0.0;
+
+  kernel_path_init = NULL;
+  kernel_scene_intersect = NULL;
+  kernel_lamp_emission = NULL;
+  kernel_do_volume = NULL;
+  kernel_queue_enqueue = NULL;
+  kernel_indirect_background = NULL;
+  kernel_shader_setup = NULL;
+  kernel_shader_sort = NULL;
+  kernel_shader_eval = NULL;
+  kernel_holdout_emission_blurring_pathtermination_ao = NULL;
+  kernel_subsurface_scatter = NULL;
+  kernel_direct_lighting = NULL;
+  kernel_shadow_blocked_ao = NULL;
+  kernel_shadow_blocked_dl = NULL;
+  kernel_enqueue_inactive = NULL;
+  kernel_next_iteration_setup = NULL;
+  kernel_indirect_subsurface = NULL;
+  kernel_buffer_update = NULL;
 }
 
 DeviceSplitKernel::~DeviceSplitKernel()
 {
-       split_data.free();
-       ray_state.free();
-       use_queues_flag.free();
-       queue_index.free();
-       work_pool_wgs.free();
-
-       delete kernel_path_init;
-       delete kernel_scene_intersect;
-       delete kernel_lamp_emission;
-       delete kernel_do_volume;
-       delete kernel_queue_enqueue;
-       delete kernel_indirect_background;
-       delete kernel_shader_setup;
-       delete kernel_shader_sort;
-       delete kernel_shader_eval;
-       delete kernel_holdout_emission_blurring_pathtermination_ao;
-       delete kernel_subsurface_scatter;
-       delete kernel_direct_lighting;
-       delete kernel_shadow_blocked_ao;
-       delete kernel_shadow_blocked_dl;
-       delete kernel_enqueue_inactive;
-       delete kernel_next_iteration_setup;
-       delete kernel_indirect_subsurface;
-       delete kernel_buffer_update;
+  split_data.free();
+  ray_state.free();
+  use_queues_flag.free();
+  queue_index.free();
+  work_pool_wgs.free();
+
+  delete kernel_path_init;
+  delete kernel_scene_intersect;
+  delete kernel_lamp_emission;
+  delete kernel_do_volume;
+  delete kernel_queue_enqueue;
+  delete kernel_indirect_background;
+  delete kernel_shader_setup;
+  delete kernel_shader_sort;
+  delete kernel_shader_eval;
+  delete kernel_holdout_emission_blurring_pathtermination_ao;
+  delete kernel_subsurface_scatter;
+  delete kernel_direct_lighting;
+  delete kernel_shadow_blocked_ao;
+  delete kernel_shadow_blocked_dl;
+  delete kernel_enqueue_inactive;
+  delete kernel_next_iteration_setup;
+  delete kernel_indirect_subsurface;
+  delete kernel_buffer_update;
 }
 
-bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeaturesrequested_features)
+bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures &requested_features)
 {
 #define LOAD_KERNEL(name) \
-               kernel_##name = get_split_kernel_function(#name, requested_features); \
-               if(!kernel_##name) { \
-                       device->set_error(string("Split kernel error: failed to load kernel_") + #name); \
-                       return false; \
-               }
-
-       LOAD_KERNEL(path_init);
-       LOAD_KERNEL(scene_intersect);
-       LOAD_KERNEL(lamp_emission);
-       if (requested_features.use_volume) {
-               LOAD_KERNEL(do_volume);
-       }
-       LOAD_KERNEL(queue_enqueue);
-       LOAD_KERNEL(indirect_background);
-       LOAD_KERNEL(shader_setup);
-       LOAD_KERNEL(shader_sort);
-       LOAD_KERNEL(shader_eval);
-       LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
-       LOAD_KERNEL(subsurface_scatter);
-       LOAD_KERNEL(direct_lighting);
-       LOAD_KERNEL(shadow_blocked_ao);
-       LOAD_KERNEL(shadow_blocked_dl);
-       LOAD_KERNEL(enqueue_inactive);
-       LOAD_KERNEL(next_iteration_setup);
-       LOAD_KERNEL(indirect_subsurface);
-       LOAD_KERNEL(buffer_update);
+  kernel_##name = get_split_kernel_function(#name, requested_features); \
+  if (!kernel_##name) { \
+    device->set_error(string("Split kernel error: failed to load kernel_") + #name); \
+    return false; \
+  }
+
+  LOAD_KERNEL(path_init);
+  LOAD_KERNEL(scene_intersect);
+  LOAD_KERNEL(lamp_emission);
+  if (requested_features.use_volume) {
+    LOAD_KERNEL(do_volume);
+  }
+  LOAD_KERNEL(queue_enqueue);
+  LOAD_KERNEL(indirect_background);
+  LOAD_KERNEL(shader_setup);
+  LOAD_KERNEL(shader_sort);
+  LOAD_KERNEL(shader_eval);
+  LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
+  LOAD_KERNEL(subsurface_scatter);
+  LOAD_KERNEL(direct_lighting);
+  LOAD_KERNEL(shadow_blocked_ao);
+  LOAD_KERNEL(shadow_blocked_dl);
+  LOAD_KERNEL(enqueue_inactive);
+  LOAD_KERNEL(next_iteration_setup);
+  LOAD_KERNEL(indirect_subsurface);
+  LOAD_KERNEL(buffer_update);
 
 #undef LOAD_KERNEL
 
-       /* Re-initialiaze kernel-dependent data when kernels change. */
-       kernel_data_initialized = false;
+  /* Re-initialiaze kernel-dependent data when kernels change. */
+  kernel_data_initialized = false;
 
-       return true;
+  return true;
 }
 
-size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size)
+size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory &kg,
+                                                           device_memory &data,
+                                                           uint64_t max_buffer_size)
 {
-       uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
-       VLOG(1) << "Split state element size: "
-               << string_human_readable_number(size_per_element) << " bytes. ("
-               << string_human_readable_size(size_per_element) << ").";
-       return max_buffer_size / size_per_element;
+  uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
+  VLOG(1) << "Split state element size: " << string_human_readable_number(size_per_element)
+          << " bytes. (" << string_human_readable_size(size_per_element) << ").";
+  return max_buffer_size / size_per_element;
 }
 
 bool DeviceSplitKernel::path_trace(DeviceTask *task,
-                                   RenderTiletile,
-                                   device_memorykgbuffer,
-                                   device_memorykernel_data)
+                                   RenderTile &tile,
+                                   device_memory &kgbuffer,
+                                   device_memory &kernel_data)
 {
-       if(device->have_error()) {
-               return false;
-       }
+  if (device->have_error()) {
+    return false;
+  }
 
-       /* Allocate all required global memory once. */
-       if(!kernel_data_initialized) {
-               kernel_data_initialized = true;
+  /* Allocate all required global memory once. */
+  if (!kernel_data_initialized) {
+    kernel_data_initialized = true;
 
-               /* Set local size */
-               int2 lsize = split_kernel_local_size();
-               local_size[0] = lsize[0];
-               local_size[1] = lsize[1];
+    /* Set local size */
+    int2 lsize = split_kernel_local_size();
+    local_size[0] = lsize[0];
+    local_size[1] = lsize[1];
 
-               /* Set global size */
-               int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task);
+    /* Set global size */
+    int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task);
 
-               /* Make sure that set work size is a multiple of local
-                * work size dimensions.
-                */
-               global_size[0] = round_up(gsize[0], local_size[0]);
-               global_size[1] = round_up(gsize[1], local_size[1]);
+    /* Make sure that set work size is a multiple of local
+     * work size dimensions.
+     */
+    global_size[0] = round_up(gsize[0], local_size[0]);
+    global_size[1] = round_up(gsize[1], local_size[1]);
 
-               int num_global_elements = global_size[0] * global_size[1];
-               assert(num_global_elements % WORK_POOL_SIZE == 0);
+    int num_global_elements = global_size[0] * global_size[1];
+    assert(num_global_elements % WORK_POOL_SIZE == 0);
 
-               /* Calculate max groups */
+    /* Calculate max groups */
 
-               /* Denotes the maximum work groups possible w.r.t. current requested tile size. */
-               unsigned int work_pool_size = (device->info.type == DEVICE_CPU) ? WORK_POOL_SIZE_CPU : WORK_POOL_SIZE_GPU;
-               unsigned int max_work_groups = num_global_elements / work_pool_size + 1;
+    /* Denotes the maximum work groups possible w.r.t. current requested tile size. */
+    unsigned int work_pool_size = (device->info.type == DEVICE_CPU) ? WORK_POOL_SIZE_CPU :
+                                                                      WORK_POOL_SIZE_GPU;
+    unsigned int max_work_groups = num_global_elements / work_pool_size + 1;
 
-               /* Allocate work_pool_wgs memory. */
-               work_pool_wgs.alloc_to_device(max_work_groups);
-               queue_index.alloc_to_device(NUM_QUEUES);
-               use_queues_flag.alloc_to_device(1);
-               split_data.alloc_to_device(state_buffer_size(kgbuffer, kernel_data, num_global_elements));
-               ray_state.alloc(num_global_elements);
-       }
+    /* Allocate work_pool_wgs memory. */
+    work_pool_wgs.alloc_to_device(max_work_groups);
+    queue_index.alloc_to_device(NUM_QUEUES);
+    use_queues_flag.alloc_to_device(1);
+    split_data.alloc_to_device(state_buffer_size(kgbuffer, kernel_data, num_global_elements));
+    ray_state.alloc(num_global_elements);
+  }
 
-       /* Number of elements in the global state buffer */
-       int num_global_elements = global_size[0] * global_size[1];
+  /* Number of elements in the global state buffer */
+  int num_global_elements = global_size[0] * global_size[1];
 
 #define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \
-               if(device->have_error()) { \
-                       return false; \
-               } \
-               if(!kernel_##name->enqueue(KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \
-                       return false; \
-               }
-
-       tile.sample = tile.start_sample;
-
-       /* for exponential increase between tile updates */
-       int time_multiplier = 1;
-
-       while(tile.sample < tile.start_sample + tile.num_samples) {
-               /* to keep track of how long it takes to run a number of samples */
-               double start_time = time_dt();
-
-               /* initial guess to start rolling average */
-               const int initial_num_samples = 1;
-               /* approx number of samples per second */
-               int samples_per_second = (avg_time_per_sample > 0.0) ?
-                                        int(double(time_multiplier) / avg_time_per_sample) + 1 : initial_num_samples;
-
-               RenderTile subtile = tile;
-               subtile.start_sample = tile.sample;
-               subtile.num_samples = min(samples_per_second, tile.start_sample + tile.num_samples - tile.sample);
-
-               if(device->have_error()) {
-                       return false;
-               }
-
-               /* reset state memory here as global size for data_init
-                * kernel might not be large enough to do in kernel
-                */
-               work_pool_wgs.zero_to_device();
-               split_data.zero_to_device();
-               ray_state.zero_to_device();
-
-               if(!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
-                                                  subtile,
-                                                  num_global_elements,
-                                                  kgbuffer,
-                                                  kernel_data,
-                                                  split_data,
-                                                  ray_state,
-                                                  queue_index,
-                                                  use_queues_flag,
-                                                  work_pool_wgs))
-               {
-                       return false;
-               }
-
-               ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size);
-
-               bool activeRaysAvailable = true;
-               double cancel_time = DBL_MAX;
-
-               while(activeRaysAvailable) {
-                       /* Do path-iteration in host [Enqueue Path-iteration kernels. */
-                       for(int PathIter = 0; PathIter < 16; PathIter++) {
-                               ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
-                               if (kernel_do_volume) {
-                                       ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
-                               }
-                               ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(shader_setup, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(shader_sort, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(enqueue_inactive, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size);
-
-                               if(task->get_cancel() && cancel_time == DBL_MAX) {
-                                       /* Wait up to twice as many seconds for current samples to finish
-                                        * to avoid artifacts in render result from ending too soon.
-                                        */
-                                       cancel_time = time_dt() + 2.0 * time_multiplier;
-                               }
-
-                               if(time_dt() > cancel_time) {
-                                       return true;
-                               }
-                       }
-
-                       /* Decide if we should exit path-iteration in host. */
-                       ray_state.copy_from_device(0, global_size[0] * global_size[1], 1);
-
-                       activeRaysAvailable = false;
-
-                       for(int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) {
-                               if(!IS_STATE(ray_state.data(), rayStateIter, RAY_INACTIVE)) {
-                                       if(IS_STATE(ray_state.data(), rayStateIter, RAY_INVALID)) {
-                                               /* Something went wrong, abort to avoid looping endlessly. */
-                                               device->set_error("Split kernel error: invalid ray state");
-                                               return false;
-                                       }
-
-                                       /* Not all rays are RAY_INACTIVE. */
-                                       activeRaysAvailable = true;
-                                       break;
-                               }
-                       }
-
-                       if(time_dt() > cancel_time) {
-                               return true;
-                       }
-               }
-
-               double time_per_sample = ((time_dt()-start_time) / subtile.num_samples);
-
-               if(avg_time_per_sample == 0.0) {
-                       /* start rolling average */
-                       avg_time_per_sample = time_per_sample;
-               }
-               else {
-                       avg_time_per_sample = alpha*time_per_sample + (1.0-alpha)*avg_time_per_sample;
-               }
+  if (device->have_error()) { \
+    return false; \
+  } \
+  if (!kernel_##name->enqueue( \
+          KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \
+    return false; \
+  }
+
+  tile.sample = tile.start_sample;
+
+  /* for exponential increase between tile updates */
+  int time_multiplier = 1;
+
+  while (tile.sample < tile.start_sample + tile.num_samples) {
+    /* to keep track of how long it takes to run a number of samples */
+    double start_time = time_dt();
+
+    /* initial guess to start rolling average */
+    const int initial_num_samples = 1;
+    /* approx number of samples per second */
+    int samples_per_second = (avg_time_per_sample > 0.0) ?
+                                 int(double(time_multiplier) / avg_time_per_sample) + 1 :
+                                 initial_num_samples;
+
+    RenderTile subtile = tile;
+    subtile.start_sample = tile.sample;
+    subtile.num_samples = min(samples_per_second,
+                              tile.start_sample + tile.num_samples - tile.sample);
+
+    if (device->have_error()) {
+      return false;
+    }
+
+    /* reset state memory here as global size for data_init
+     * kernel might not be large enough to do in kernel
+     */
+    work_pool_wgs.zero_to_device();
+    split_data.zero_to_device();
+    ray_state.zero_to_device();
+
+    if (!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
+                                        subtile,
+                                        num_global_elements,
+                                        kgbuffer,
+                                        kernel_data,
+                                        split_data,
+                                        ray_state,
+                                        queue_index,
+                                        use_queues_flag,
+                                        work_pool_wgs)) {
+      return false;
+    }
+
+    ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size);
+
+    bool activeRaysAvailable = true;
+    double cancel_time = DBL_MAX;
+
+    while (activeRaysAvailable) {
+      /* Do path-iteration in host [Enqueue Path-iteration kernels. */
+      for (int PathIter = 0; PathIter < 16; PathIter++) {
+        ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
+        ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
+        if (kernel_do_volume) {
+          ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
+        }
+        ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
+        ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
+        ENQUEUE_SPLIT_KERNEL(shader_setup, global_size, local_size);
+        ENQUEUE_SPLIT_KERNEL(shader_sort, global_size, local_size);
+        ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
+        ENQUEUE_SPLIT_KERNEL(
+            holdout_emission_blurring_pathtermination_ao, global_size, local_size);
+        ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
+        ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
+        ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
+        ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
+        ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
+        ENQUEUE_SPLIT_KERNEL(enqueue_inactive, global_size, local_size);
+        ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
+        ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size);
+        ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
+        ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size);
+
+        if (task->get_cancel() && cancel_time == DBL_MAX) {
+          /* Wait up to twice as many seconds for current samples to finish
+           * to avoid artifacts in render result from ending too soon.
+           */
+          cancel_time = time_dt() + 2.0 * time_multiplier;
+        }
+
+        if (time_dt() > cancel_time) {
+          return true;
+        }
+      }
+
+      /* Decide if we should exit path-iteration in host. */
+      ray_state.copy_from_device(0, global_size[0] * global_size[1], 1);
+
+      activeRaysAvailable = false;
+
+      for (int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) {
+        if (!IS_STATE(ray_state.data(), rayStateIter, RAY_INACTIVE)) {
+          if (IS_STATE(ray_state.data(), rayStateIter, RAY_INVALID)) {
+            /* Something went wrong, abort to avoid looping endlessly. */
+            device->set_error("Split kernel error: invalid ray state");
+            return false;
+          }
+
+          /* Not all rays are RAY_INACTIVE. */
+          activeRaysAvailable = true;
+          break;
+        }
+      }
+
+      if (time_dt() > cancel_time) {
+        return true;
+      }
+    }
+
+    double time_per_sample = ((time_dt() - start_time) / subtile.num_samples);
+
+    if (avg_time_per_sample == 0.0) {
+      /* start rolling average */
+      avg_time_per_sample = time_per_sample;
+    }
+    else {
+      avg_time_per_sample = alpha * time_per_sample + (1.0 - alpha) * avg_time_per_sample;
+    }
 
 #undef ENQUEUE_SPLIT_KERNEL
 
-               tile.sample += subtile.num_samples;
-               task->update_progress(&tile, tile.w*tile.h*subtile.num_samples);
+    tile.sample += subtile.num_samples;
+    task->update_progress(&tile, tile.w * tile.h * subtile.num_samples);
 
-               time_multiplier = min(time_multiplier << 1, 10);
+    time_multiplier = min(time_multiplier << 1, 10);
 
-               if(task->get_cancel()) {
-                       return true;
-               }
-       }
+    if (task->get_cancel()) {
+      return true;
+    }
+  }
 
-       return true;
+  return true;
 }
 
 CCL_NAMESPACE_END