ClangFormat: apply to source, most of intern
[blender.git] / intern / cycles / bvh / bvh_sort.cpp
index 8088dcc..4498a75 100644 (file)
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "bvh_build.h"
-#include "bvh_sort.h"
 
-#include "util_algorithm.h"
-#include "util_debug.h"
+#include "bvh/bvh_sort.h"
+
+#include "bvh/bvh_build.h"
+
+#include "util/util_algorithm.h"
+#include "util/util_task.h"
 
 CCL_NAMESPACE_BEGIN
 
-/* silly workaround for float extended precision that happens when compiling
- * on x86, due to one float staying in 80 bit precision register and the other
- * not, which causes the strictly weak ordering to break */
-#if !defined(__i386__)
-#define NO_EXTENDED_PRECISION
-#else
-#define NO_EXTENDED_PRECISION volatile
-#endif
+static const int BVH_SORT_THRESHOLD = 4096;
 
 struct BVHReferenceCompare {
-public:
-       int dim;
-
-       BVHReferenceCompare(int dim_)
-       {
-               dim = dim_;
-       }
-
-       bool operator()(const BVHReference& ra, const BVHReference& rb)
-       {
-               NO_EXTENDED_PRECISION float ca = ra.bounds().min[dim] + ra.bounds().max[dim];
-               NO_EXTENDED_PRECISION float cb = rb.bounds().min[dim] + rb.bounds().max[dim];
-
-               if(ca < cb) return true;
-               else if(ca > cb) return false;
-               else if(ra.prim_object() < rb.prim_object()) return true;
-               else if(ra.prim_object() > rb.prim_object()) return false;
-               else if(ra.prim_index() < rb.prim_index()) return true;
-               else if(ra.prim_index() > rb.prim_index()) return false;
-               else if(ra.prim_type() < rb.prim_type()) return true;
-               else if(ra.prim_type() > rb.prim_type()) return false;
-
-               return false;
-       }
+ public:
+  int dim;
+  const BVHUnaligned *unaligned_heuristic;
+  const Transform *aligned_space;
+
+  BVHReferenceCompare(int dim,
+                      const BVHUnaligned *unaligned_heuristic,
+                      const Transform *aligned_space)
+      : dim(dim), unaligned_heuristic(unaligned_heuristic), aligned_space(aligned_space)
+  {
+  }
+
+  __forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
+  {
+    return (aligned_space != NULL) ?
+               unaligned_heuristic->compute_aligned_prim_boundbox(prim, *aligned_space) :
+               prim.bounds();
+  }
+
+  /* Compare two references.
+   *
+   * Returns value is similar to return value of strcmp().
+   */
+  __forceinline int compare(const BVHReference &ra, const BVHReference &rb) const
+  {
+    BoundBox ra_bounds = get_prim_bounds(ra), rb_bounds = get_prim_bounds(rb);
+    float ca = ra_bounds.min[dim] + ra_bounds.max[dim];
+    float cb = rb_bounds.min[dim] + rb_bounds.max[dim];
+
+    if (ca < cb)
+      return -1;
+    else if (ca > cb)
+      return 1;
+    else if (ra.prim_object() < rb.prim_object())
+      return -1;
+    else if (ra.prim_object() > rb.prim_object())
+      return 1;
+    else if (ra.prim_index() < rb.prim_index())
+      return -1;
+    else if (ra.prim_index() > rb.prim_index())
+      return 1;
+    else if (ra.prim_type() < rb.prim_type())
+      return -1;
+    else if (ra.prim_type() > rb.prim_type())
+      return 1;
+
+    return 0;
+  }
+
+  bool operator()(const BVHReference &ra, const BVHReference &rb)
+  {
+    return (compare(ra, rb) < 0);
+  }
+};
+
+static void bvh_reference_sort_threaded(TaskPool *task_pool,
+                                        BVHReference *data,
+                                        const int job_start,
+                                        const int job_end,
+                                        const BVHReferenceCompare &compare);
+
+class BVHSortTask : public Task {
+ public:
+  BVHSortTask(TaskPool *task_pool,
+              BVHReference *data,
+              const int job_start,
+              const int job_end,
+              const BVHReferenceCompare &compare)
+  {
+    run = function_bind(bvh_reference_sort_threaded, task_pool, data, job_start, job_end, compare);
+  }
 };
 
-void bvh_reference_sort(int start, int end, BVHReference *data, int dim)
+/* Multi-threaded reference sort. */
+static void bvh_reference_sort_threaded(TaskPool *task_pool,
+                                        BVHReference *data,
+                                        const int job_start,
+                                        const int job_end,
+                                        const BVHReferenceCompare &compare)
 {
-       BVHReferenceCompare compare(dim);
-       sort(data+start, data+end, compare);
+  int start = job_start, end = job_end;
+  bool have_work = (start < end);
+  while (have_work) {
+    const int count = job_end - job_start;
+    if (count < BVH_SORT_THRESHOLD) {
+      /* Number of reference low enough, faster to finish the job
+       * in one thread rather than to spawn more threads.
+       */
+      sort(data + job_start, data + job_end + 1, compare);
+      break;
+    }
+    /* Single QSort step.
+     * Use median-of-three method for the pivot point.
+     */
+    int left = start, right = end;
+    int center = (left + right) >> 1;
+    if (compare.compare(data[left], data[center]) > 0) {
+      swap(data[left], data[center]);
+    }
+    if (compare.compare(data[left], data[right]) > 0) {
+      swap(data[left], data[right]);
+    }
+    if (compare.compare(data[center], data[right]) > 0) {
+      swap(data[center], data[right]);
+    }
+    swap(data[center], data[right - 1]);
+    BVHReference median = data[right - 1];
+    do {
+      while (compare.compare(data[left], median) < 0) {
+        ++left;
+      }
+      while (compare.compare(data[right], median) > 0) {
+        --right;
+      }
+      if (left <= right) {
+        swap(data[left], data[right]);
+        ++left;
+        --right;
+      }
+    } while (left <= right);
+    /* We only create one new task here to reduce downside effects of
+     * latency in TaskScheduler.
+     * So generally current thread keeps working on the left part of the
+     * array, and we create new task for the right side.
+     * However, if there's nothing to be done in the left side of the array
+     * we don't create any tasks and make it so current thread works on the
+     * right side.
+     */
+    have_work = false;
+    if (left < end) {
+      if (start < right) {
+        task_pool->push(new BVHSortTask(task_pool, data, left, end, compare), true);
+      }
+      else {
+        start = left;
+        have_work = true;
+      }
+    }
+    if (start < right) {
+      end = right;
+      have_work = true;
+    }
+  }
 }
 
-struct BVHReferenceCompareIndexed {
-public:
-       BVHReferenceCompareIndexed(int dim, const BVHReference *data, int start)
-       : dim_(dim),
-         start_(start),
-         data_(data)
-       {}
-
-       bool operator()(const int a, const int b)
-       {
-               const BVHReference& ra = data_[start_ + a];
-               const BVHReference& rb = data_[start_ + b];
-               NO_EXTENDED_PRECISION float ca = ra.bounds().min[dim_] + ra.bounds().max[dim_];
-               NO_EXTENDED_PRECISION float cb = rb.bounds().min[dim_] + rb.bounds().max[dim_];
-
-               if(ca < cb) return true;
-               else if(ca > cb) return false;
-               else if(ra.prim_object() < rb.prim_object()) return true;
-               else if(ra.prim_object() > rb.prim_object()) return false;
-               else if(ra.prim_index() < rb.prim_index()) return true;
-               else if(ra.prim_index() > rb.prim_index()) return false;
-               else if(ra.prim_type() < rb.prim_type()) return true;
-               else if(ra.prim_type() > rb.prim_type()) return false;
-
-               return false;
-       }
-
-private:
-       int dim_, start_;
-       const BVHReference *data_;
-};
-
-/* NOTE: indices are always from 0 to count, even in the cases when start is not
- * zero. This is to simplify indexing in the object splitter. Index array is also
- * always zero-based index.
- */
-void bvh_reference_sort_indices(int start,
-                                int end,
-                                const BVHReference *data,
-                                int *indices,
-                                int dim)
+void bvh_reference_sort(int start,
+                        int end,
+                        BVHReference *data,
+                        int dim,
+                        const BVHUnaligned *unaligned_heuristic,
+                        const Transform *aligned_space)
 {
-       const int count = end - start;
-       for(int i = 0; i < count; ++i) {
-               indices[i] = i;
-       }
-       BVHReferenceCompareIndexed compare(dim, data, start);
-       sort(indices, indices+count, compare);
+  const int count = end - start;
+  BVHReferenceCompare compare(dim, unaligned_heuristic, aligned_space);
+  if (count < BVH_SORT_THRESHOLD) {
+    /* It is important to not use any mutex if array is small enough,
+     * otherwise we end up in situation when we're going to sleep far
+     * too often.
+     */
+    sort(data + start, data + end, compare);
+  }
+  else {
+    TaskPool task_pool;
+    bvh_reference_sort_threaded(&task_pool, data, start, end - 1, compare);
+    task_pool.wait_work();
+  }
 }
 
 CCL_NAMESPACE_END