Cycles: Add option to split curve motion primitives by time steps
[blender.git] / intern / cycles / bvh / bvh_build.cpp
1 /*
2  * Adapted from code copyright 2009-2010 NVIDIA Corporation
3  * Modifications Copyright 2011, Blender Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include "bvh_binning.h"
19 #include "bvh_build.h"
20 #include "bvh_node.h"
21 #include "bvh_params.h"
22 #include "bvh_split.h"
23
24 #include "mesh.h"
25 #include "object.h"
26 #include "scene.h"
27 #include "curves.h"
28
29 #include "util_debug.h"
30 #include "util_foreach.h"
31 #include "util_logging.h"
32 #include "util_progress.h"
33 #include "util_stack_allocator.h"
34 #include "util_simd.h"
35 #include "util_time.h"
36 #include "util_queue.h"
37
38 CCL_NAMESPACE_BEGIN
39
40 /* BVH Build Task */
41
42 class BVHBuildTask : public Task {
43 public:
44         BVHBuildTask(BVHBuild *build,
45                      InnerNode *node,
46                      int child,
47                      const BVHObjectBinning& range,
48                      int level)
49         : range_(range)
50         {
51                 run = function_bind(&BVHBuild::thread_build_node,
52                                     build,
53                                     node,
54                                     child,
55                                     &range_,
56                                     level);
57         }
58 private:
59         BVHObjectBinning range_;
60 };
61
62 class BVHSpatialSplitBuildTask : public Task {
63 public:
64         BVHSpatialSplitBuildTask(BVHBuild *build,
65                                  InnerNode *node,
66                                  int child,
67                                  const BVHRange& range,
68                                  const vector<BVHReference>& references,
69                                  int level)
70         : range_(range),
71           references_(references.begin() + range.start(),
72                       references.begin() + range.end())
73         {
74                 range_.set_start(0);
75                 run = function_bind(&BVHBuild::thread_build_spatial_split_node,
76                                     build,
77                                     node,
78                                     child,
79                                     &range_,
80                                     &references_,
81                                     level,
82                                     _1);
83         }
84 private:
85         BVHRange range_;
86         vector<BVHReference> references_;
87 };
88
89 /* Constructor / Destructor */
90
91 BVHBuild::BVHBuild(const vector<Object*>& objects_,
92                    array<int>& prim_type_,
93                    array<int>& prim_index_,
94                    array<int>& prim_object_,
95                    const BVHParams& params_,
96                    Progress& progress_)
97  : objects(objects_),
98    prim_type(prim_type_),
99    prim_index(prim_index_),
100    prim_object(prim_object_),
101    params(params_),
102    progress(progress_),
103    progress_start_time(0.0),
104    unaligned_heuristic(objects_)
105 {
106         spatial_min_overlap = 0.0f;
107 }
108
109 BVHBuild::~BVHBuild()
110 {
111 }
112
113 /* Adding References */
114
115 void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
116 {
117         if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
118                 Attribute *attr_mP = NULL;
119
120                 if(mesh->has_motion_blur())
121                         attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
122
123                 size_t num_triangles = mesh->num_triangles();
124                 for(uint j = 0; j < num_triangles; j++) {
125                         Mesh::Triangle t = mesh->get_triangle(j);
126                         BoundBox bounds = BoundBox::empty;
127                         PrimitiveType type = PRIMITIVE_TRIANGLE;
128
129                         t.bounds_grow(&mesh->verts[0], bounds);
130
131                         /* motion triangles */
132                         if(attr_mP) {
133                                 const size_t num_verts = mesh->verts.size();
134                                 const size_t num_steps = mesh->motion_steps - 1;
135                                 const float3 *vert_steps = attr_mP->data_float3();
136
137                                 for(size_t step = 0; step < num_steps; step++) {
138                                         t.bounds_grow(vert_steps + step*num_verts, bounds);
139                                 }
140
141                                 type = PRIMITIVE_MOTION_TRIANGLE;
142                         }
143
144                         if(bounds.valid()) {
145                                 references.push_back(BVHReference(bounds, j, i, type));
146                                 root.grow(bounds);
147                                 center.grow(bounds.center2());
148                         }
149                 }
150         }
151
152         if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
153                 Attribute *curve_attr_mP = NULL;
154
155                 if(mesh->has_motion_blur())
156                         curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
157
158                 size_t num_curves = mesh->num_curves();
159                 for(uint j = 0; j < num_curves; j++) {
160                         const Mesh::Curve curve = mesh->get_curve(j);
161                         const float *curve_radius = &mesh->curve_radius[0];
162                         for(int k = 0; k < curve.num_keys - 1; k++) {
163                                 if(curve_attr_mP == NULL) {
164                                         /* Really simple logic for static hair. */
165                                         BoundBox bounds = BoundBox::empty;
166                                         curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
167                                         if(bounds.valid()) {
168                                                 int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE, k);
169                                                 references.push_back(BVHReference(bounds, j, i, packed_type));
170                                                 root.grow(bounds);
171                                                 center.grow(bounds.center2());
172                                         }
173                                 }
174                                 else if(params.num_motion_curve_steps == 0 || params.use_spatial_split) {
175                                         /* Simple case of motion curves: single node for the while
176                                          * shutter time. Lowest memory usage but less optimal
177                                          * rendering.
178                                          */
179                                         /* TODO(sergey): Support motion steps for spatially split BVH. */
180                                         BoundBox bounds = BoundBox::empty;
181                                         curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
182                                         const size_t num_keys = mesh->curve_keys.size();
183                                         const size_t num_steps = mesh->motion_steps;
184                                         const float3 *key_steps = curve_attr_mP->data_float3();
185                                         for(size_t step = 0; step < num_steps - 1; step++) {
186                                                 curve.bounds_grow(k,
187                                                                   key_steps + step*num_keys,
188                                                                   curve_radius,
189                                                                   bounds);
190                                         }
191                                         if(bounds.valid()) {
192                                                 int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
193                                                 references.push_back(BVHReference(bounds,
194                                                                                   j,
195                                                                                   i,
196                                                                                   packed_type));
197                                                 root.grow(bounds);
198                                                 center.grow(bounds.center2());
199                                         }
200                                 }
201                                 else {
202                                         /* Motion curves, trace optimized case:  we split curve keys
203                                          * primitives into separate nodes for each of the time steps.
204                                          * This way we minimize overlap of neighbor curve primitives.
205                                          */
206                                         const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
207                                         const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
208                                         const size_t num_steps = mesh->motion_steps;
209                                         const float3 *curve_keys = &mesh->curve_keys[0];
210                                         const float3 *key_steps = curve_attr_mP->data_float3();
211                                         const size_t num_keys = mesh->curve_keys.size();
212                                         /* Calculate bounding box of the previous time step.
213                                          * Will be reused later to avoid duplicated work on
214                                          * calculating BVH time step boundbox.
215                                          */
216                                         float4 prev_keys[4];
217                                         curve.cardinal_motion_keys(curve_keys,
218                                                                    curve_radius,
219                                                                    key_steps,
220                                                                    num_keys,
221                                                                    num_steps,
222                                                                    0.0f,
223                                                                    k - 1, k, k + 1, k + 2,
224                                                                    prev_keys);
225                                         BoundBox prev_bounds = BoundBox::empty;
226                                         curve.bounds_grow(prev_keys, prev_bounds);
227                                         for(int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
228                                                 const float curr_time = (float)(bvh_step) * num_bvh_steps_inv_1;
229                                                 float4 curr_keys[4];
230                                                 curve.cardinal_motion_keys(curve_keys,
231                                                                            curve_radius,
232                                                                            key_steps,
233                                                                            num_keys,
234                                                                            num_steps,
235                                                                            curr_time,
236                                                                            k - 1, k, k + 1, k + 2,
237                                                                            curr_keys);
238                                                 BoundBox curr_bounds = BoundBox::empty;
239                                                 curve.bounds_grow(curr_keys, curr_bounds);
240                                                 BoundBox bounds = prev_bounds;
241                                                 bounds.grow(curr_bounds);
242                                                 if(bounds.valid()) {
243                                                         int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
244                                                         references.push_back(BVHReference(bounds,
245                                                                                           j,
246                                                                                           i,
247                                                                                           packed_type));
248                                                         root.grow(bounds);
249                                                         center.grow(bounds.center2());
250                                                 }
251                                                 /* Current time boundbox becomes previous one for the
252                                                  * next time step.
253                                                  */
254                                                 prev_bounds = curr_bounds;
255                                         }
256                                 }
257                         }
258                 }
259         }
260 }
261
262 void BVHBuild::add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i)
263 {
264         references.push_back(BVHReference(ob->bounds, -1, i, 0));
265         root.grow(ob->bounds);
266         center.grow(ob->bounds.center2());
267 }
268
269 static size_t count_curve_segments(Mesh *mesh)
270 {
271         size_t num = 0, num_curves = mesh->num_curves();
272
273         for(size_t i = 0; i < num_curves; i++)
274                 num += mesh->get_curve(i).num_keys - 1;
275
276         return num;
277 }
278
279 void BVHBuild::add_references(BVHRange& root)
280 {
281         /* reserve space for references */
282         size_t num_alloc_references = 0;
283
284         foreach(Object *ob, objects) {
285                 if(params.top_level) {
286                         if(!ob->is_traceable()) {
287                                 continue;
288                         }
289                         if(!ob->mesh->is_instanced()) {
290                                 if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
291                                         num_alloc_references += ob->mesh->num_triangles();
292                                 }
293                                 if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
294                                         num_alloc_references += count_curve_segments(ob->mesh);
295                                 }
296                         }
297                         else
298                                 num_alloc_references++;
299                 }
300                 else {
301                         if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
302                                 num_alloc_references += ob->mesh->num_triangles();
303                         }
304                         if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
305                                 num_alloc_references += count_curve_segments(ob->mesh);
306                         }
307                 }
308         }
309
310         references.reserve(num_alloc_references);
311
312         /* add references from objects */
313         BoundBox bounds = BoundBox::empty, center = BoundBox::empty;
314         int i = 0;
315
316         foreach(Object *ob, objects) {
317                 if(params.top_level) {
318                         if(!ob->is_traceable()) {
319                                 ++i;
320                                 continue;
321                         }
322                         if(!ob->mesh->is_instanced())
323                                 add_reference_mesh(bounds, center, ob->mesh, i);
324                         else
325                                 add_reference_object(bounds, center, ob, i);
326                 }
327                 else
328                         add_reference_mesh(bounds, center, ob->mesh, i);
329
330                 i++;
331
332                 if(progress.get_cancel()) return;
333         }
334
335         /* happens mostly on empty meshes */
336         if(!bounds.valid())
337                 bounds.grow(make_float3(0.0f, 0.0f, 0.0f));
338
339         root = BVHRange(bounds, center, 0, references.size());
340 }
341
342 /* Build */
343
344 BVHNode* BVHBuild::run()
345 {
346         BVHRange root;
347
348         /* add references */
349         add_references(root);
350
351         if(progress.get_cancel())
352                 return NULL;
353
354         /* init spatial splits */
355         if(params.top_level) {
356                 /* NOTE: Technically it is supported by the builder but it's not really
357                  * optimized for speed yet and not really clear yet if it has measurable
358                  * improvement on render time. Needs some extra investigation before
359                  * enabling spatial split for top level BVH.
360                  */
361                 params.use_spatial_split = false;
362         }
363
364         spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
365         if(params.use_spatial_split) {
366                 /* NOTE: The API here tries to be as much ready for multi-threaded build
367                  * as possible, but at the same time it tries not to introduce any
368                  * changes in behavior for until all refactoring needed for threading is
369                  * finished.
370                  *
371                  * So we currently allocate single storage for now, which is only used by
372                  * the only thread working on the spatial BVH build.
373                  */
374                 spatial_storage.resize(TaskScheduler::num_threads() + 1);
375                 size_t num_bins = max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1;
376                 foreach(BVHSpatialStorage &storage, spatial_storage) {
377                         storage.right_bounds.clear();
378                 }
379                 spatial_storage[0].right_bounds.resize(num_bins);
380         }
381         spatial_free_index = 0;
382
383         /* init progress updates */
384         double build_start_time;
385         build_start_time = progress_start_time = time_dt();
386         progress_count = 0;
387         progress_total = references.size();
388         progress_original_total = progress_total;
389
390         prim_type.resize(references.size());
391         prim_index.resize(references.size());
392         prim_object.resize(references.size());
393
394         /* build recursively */
395         BVHNode *rootnode;
396
397         if(params.use_spatial_split) {
398                 /* Perform multithreaded spatial split build. */
399                 rootnode = build_node(root, &references, 0, 0);
400                 task_pool.wait_work();
401         }
402         else {
403                 /* Perform multithreaded binning build. */
404                 BVHObjectBinning rootbin(root, (references.size())? &references[0]: NULL);
405                 rootnode = build_node(rootbin, 0);
406                 task_pool.wait_work();
407         }
408
409         /* delete if we canceled */
410         if(rootnode) {
411                 if(progress.get_cancel()) {
412                         rootnode->deleteSubtree();
413                         rootnode = NULL;
414                         VLOG(1) << "BVH build cancelled.";
415                 }
416                 else {
417                         /*rotate(rootnode, 4, 5);*/
418                         rootnode->update_visibility();
419                 }
420                 if(rootnode != NULL) {
421                         VLOG(1) << "BVH build statistics:\n"
422                                 << "  Build time: " << time_dt() - build_start_time << "\n"
423                                 << "  Total number of nodes: "
424                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_NODE_COUNT)) << "\n"
425                                 << "  Number of inner nodes: "
426                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_INNER_COUNT)) << "\n"
427                                 << "  Number of leaf nodes: "
428                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_LEAF_COUNT)) << "\n"
429                                 << "  Number of unaligned nodes: "
430                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_UNALIGNED_COUNT))  << "\n"
431                                 << "  Allocation slop factor: "
432                                        << ((prim_type.capacity() != 0)
433                                                ? (float)prim_type.size() / prim_type.capacity()
434                                                : 1.0f) << "\n";
435                 }
436         }
437
438
439         return rootnode;
440 }
441
442 void BVHBuild::progress_update()
443 {
444         if(time_dt() - progress_start_time < 0.25)
445                 return;
446
447         double progress_start = (double)progress_count/(double)progress_total;
448         double duplicates = (double)(progress_total - progress_original_total)/(double)progress_total;
449
450         string msg = string_printf("Building BVH %.0f%%, duplicates %.0f%%",
451                                    progress_start * 100.0, duplicates * 100.0);
452
453         progress.set_substatus(msg);
454         progress_start_time = time_dt();
455 }
456
457 void BVHBuild::thread_build_node(InnerNode *inner,
458                                  int child,
459                                  BVHObjectBinning *range,
460                                  int level)
461 {
462         if(progress.get_cancel())
463                 return;
464
465         /* build nodes */
466         BVHNode *node = build_node(*range, level);
467
468         /* set child in inner node */
469         inner->children[child] = node;
470
471         /* update progress */
472         if(range->size() < THREAD_TASK_SIZE) {
473                 /*rotate(node, INT_MAX, 5);*/
474
475                 thread_scoped_lock lock(build_mutex);
476
477                 progress_count += range->size();
478                 progress_update();
479         }
480 }
481
482 void BVHBuild::thread_build_spatial_split_node(InnerNode *inner,
483                                                int child,
484                                                BVHRange *range,
485                                                vector<BVHReference> *references,
486                                                int level,
487                                                int thread_id)
488 {
489         if(progress.get_cancel()) {
490                 return;
491         }
492
493         /* build nodes */
494         BVHNode *node = build_node(*range, references, level, thread_id);
495
496         /* set child in inner node */
497         inner->children[child] = node;
498 }
499
500 bool BVHBuild::range_within_max_leaf_size(const BVHRange& range,
501                                           const vector<BVHReference>& references) const
502 {
503         size_t size = range.size();
504         size_t max_leaf_size = max(params.max_triangle_leaf_size, params.max_curve_leaf_size);
505
506         if(size > max_leaf_size)
507                 return false;
508
509         size_t num_triangles = 0;
510         size_t num_motion_triangles = 0;
511         size_t num_curves = 0;
512         size_t num_motion_curves = 0;
513
514         for(int i = 0; i < size; i++) {
515                 const BVHReference& ref = references[range.start() + i];
516
517                 if(ref.prim_type() & PRIMITIVE_CURVE)
518                         num_curves++;
519                 if(ref.prim_type() & PRIMITIVE_MOTION_CURVE)
520                         num_motion_curves++;
521                 else if(ref.prim_type() & PRIMITIVE_TRIANGLE)
522                         num_triangles++;
523                 else if(ref.prim_type() & PRIMITIVE_MOTION_TRIANGLE)
524                         num_motion_triangles++;
525         }
526
527         return (num_triangles <= params.max_triangle_leaf_size) &&
528                (num_motion_triangles <= params.max_motion_triangle_leaf_size) &&
529                (num_curves <= params.max_curve_leaf_size) &&
530                (num_motion_curves <= params.max_motion_curve_leaf_size);
531 }
532
533 /* multithreaded binning builder */
534 BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
535 {
536         size_t size = range.size();
537         float leafSAH = params.sah_primitive_cost * range.leafSAH;
538         float splitSAH = params.sah_node_cost * range.bounds().half_area() + params.sah_primitive_cost * range.splitSAH;
539
540         /* Have at least one inner node on top level, for performance and correct
541          * visibility tests, since object instances do not check visibility flag.
542          */
543         if(!(range.size() > 0 && params.top_level && level == 0)) {
544                 /* Make leaf node when threshold reached or SAH tells us. */
545                 if((params.small_enough_for_leaf(size, level)) ||
546                    (range_within_max_leaf_size(range, references) && leafSAH < splitSAH))
547                 {
548                         return create_leaf_node(range, references);
549                 }
550         }
551
552         BVHObjectBinning unaligned_range;
553         float unalignedSplitSAH = FLT_MAX;
554         float unalignedLeafSAH = FLT_MAX;
555         Transform aligned_space;
556         bool do_unalinged_split = false;
557         if(params.use_unaligned_nodes &&
558            splitSAH > params.unaligned_split_threshold*leafSAH)
559         {
560                 aligned_space = unaligned_heuristic.compute_aligned_space(
561                         range, &references[0]);
562                 unaligned_range = BVHObjectBinning(range,
563                                                    &references[0],
564                                                    &unaligned_heuristic,
565                                                    &aligned_space);
566                 unalignedSplitSAH = params.sah_node_cost * unaligned_range.unaligned_bounds().half_area() +
567                                     params.sah_primitive_cost * unaligned_range.splitSAH;
568                 unalignedLeafSAH = params.sah_primitive_cost * unaligned_range.leafSAH;
569                 if(!(range.size() > 0 && params.top_level && level == 0)) {
570                         if(unalignedLeafSAH < unalignedSplitSAH && unalignedSplitSAH < splitSAH &&
571                            range_within_max_leaf_size(range, references))
572                         {
573                                 return create_leaf_node(range, references);
574                         }
575                 }
576                 /* Check whether unaligned split is better than the regulat one. */
577                 if(unalignedSplitSAH < splitSAH) {
578                         do_unalinged_split = true;
579                 }
580         }
581
582         /* Perform split. */
583         BVHObjectBinning left, right;
584         if(do_unalinged_split) {
585                 unaligned_range.split(&references[0], left, right);
586         }
587         else {
588                 range.split(&references[0], left, right);
589         }
590
591         BoundBox bounds;
592         if(do_unalinged_split) {
593                 bounds = unaligned_heuristic.compute_aligned_boundbox(
594                         range, &references[0], aligned_space);
595         }
596         else {
597                 bounds = range.bounds();
598         }
599
600         /* Create inner node. */
601         InnerNode *inner;
602         if(range.size() < THREAD_TASK_SIZE) {
603                 /* local build */
604                 BVHNode *leftnode = build_node(left, level + 1);
605                 BVHNode *rightnode = build_node(right, level + 1);
606
607                 inner = new InnerNode(bounds, leftnode, rightnode);
608         }
609         else {
610                 /* Threaded build */
611                 inner = new InnerNode(bounds);
612
613                 task_pool.push(new BVHBuildTask(this, inner, 0, left, level + 1), true);
614                 task_pool.push(new BVHBuildTask(this, inner, 1, right, level + 1), true);
615         }
616
617         if(do_unalinged_split) {
618                 inner->set_aligned_space(aligned_space);
619         }
620
621         return inner;
622 }
623
624 /* multithreaded spatial split builder */
625 BVHNode* BVHBuild::build_node(const BVHRange& range,
626                               vector<BVHReference> *references,
627                               int level,
628                               int thread_id)
629 {
630         /* Update progress.
631          *
632          * TODO(sergey): Currently it matches old behavior, but we can move it to the
633          * task thread (which will mimic non=split builder) and save some CPU ticks
634          * on checking cancel status.
635          */
636         progress_update();
637         if(progress.get_cancel()) {
638                 return NULL;
639         }
640
641         /* Small enough or too deep => create leaf. */
642         if(!(range.size() > 0 && params.top_level && level == 0)) {
643                 if(params.small_enough_for_leaf(range.size(), level)) {
644                         progress_count += range.size();
645                         return create_leaf_node(range, *references);
646                 }
647         }
648
649         /* Perform splitting test. */
650         BVHSpatialStorage *storage = &spatial_storage[thread_id];
651         BVHMixedSplit split(this, storage, range, references, level);
652
653         if(!(range.size() > 0 && params.top_level && level == 0)) {
654                 if(split.no_split) {
655                         progress_count += range.size();
656                         return create_leaf_node(range, *references);
657                 }
658         }
659         float leafSAH = params.sah_primitive_cost * split.leafSAH;
660         float splitSAH = params.sah_node_cost * range.bounds().half_area() +
661                          params.sah_primitive_cost * split.nodeSAH;
662
663         BVHMixedSplit unaligned_split;
664         float unalignedSplitSAH = FLT_MAX;
665         /* float unalignedLeafSAH = FLT_MAX; */
666         Transform aligned_space;
667         bool do_unalinged_split = false;
668         if(params.use_unaligned_nodes &&
669            splitSAH > params.unaligned_split_threshold*leafSAH)
670         {
671                 aligned_space =
672                         unaligned_heuristic.compute_aligned_space(range, &references->at(0));
673                 unaligned_split = BVHMixedSplit(this,
674                                                 storage,
675                                                 range,
676                                                 references,
677                                                 level,
678                                                 &unaligned_heuristic,
679                                                 &aligned_space);
680                 /* unalignedLeafSAH = params.sah_primitive_cost * split.leafSAH; */
681                 unalignedSplitSAH = params.sah_node_cost * unaligned_split.bounds.half_area() +
682                                     params.sah_primitive_cost * unaligned_split.nodeSAH;
683                 /* TOOD(sergey): Check we can create leaf already. */
684                 /* Check whether unaligned split is better than the regulat one. */
685                 if(unalignedSplitSAH < splitSAH) {
686                         do_unalinged_split = true;
687                 }
688         }
689
690         /* Do split. */
691         BVHRange left, right;
692         if(do_unalinged_split) {
693                 unaligned_split.split(this, left, right, range);
694         }
695         else {
696                 split.split(this, left, right, range);
697         }
698
699         progress_total += left.size() + right.size() - range.size();
700
701         BoundBox bounds;
702         if(do_unalinged_split) {
703                 bounds = unaligned_heuristic.compute_aligned_boundbox(
704                         range, &references->at(0), aligned_space);
705         }
706         else {
707                 bounds = range.bounds();
708         }
709
710         /* Create inner node. */
711         InnerNode *inner;
712         if(range.size() < THREAD_TASK_SIZE) {
713                 /* Local build. */
714
715                 /* Build left node. */
716                 vector<BVHReference> copy(references->begin() + right.start(),
717                                           references->begin() + right.end());
718                 right.set_start(0);
719
720                 BVHNode *leftnode = build_node(left, references, level + 1, thread_id);
721
722                 /* Build right node. */
723                 BVHNode *rightnode = build_node(right, &copy, level + 1, thread_id);
724
725                 inner = new InnerNode(bounds, leftnode, rightnode);
726         }
727         else {
728                 /* Threaded build. */
729                 inner = new InnerNode(bounds);
730                 task_pool.push(new BVHSpatialSplitBuildTask(this,
731                                                             inner,
732                                                             0,
733                                                             left,
734                                                             *references,
735                                                             level + 1),
736                                true);
737                 task_pool.push(new BVHSpatialSplitBuildTask(this,
738                                                             inner,
739                                                             1,
740                                                             right,
741                                                             *references,
742                                                             level + 1),
743                                true);
744         }
745
746         if(do_unalinged_split) {
747                 inner->set_aligned_space(aligned_space);
748         }
749
750         return inner;
751 }
752
753 /* Create Nodes */
754
755 BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start, int num)
756 {
757         if(num == 0) {
758                 BoundBox bounds = BoundBox::empty;
759                 return new LeafNode(bounds, 0, 0, 0);
760         }
761         else if(num == 1) {
762                 assert(start < prim_type.size());
763                 prim_type[start] = ref->prim_type();
764                 prim_index[start] = ref->prim_index();
765                 prim_object[start] = ref->prim_object();
766
767                 uint visibility = objects[ref->prim_object()]->visibility;
768                 return new LeafNode(ref->bounds(), visibility, start, start+1);
769         }
770         else {
771                 int mid = num/2;
772                 BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid);
773                 BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid);
774
775                 BoundBox bounds = BoundBox::empty;
776                 bounds.grow(leaf0->m_bounds);
777                 bounds.grow(leaf1->m_bounds);
778
779                 return new InnerNode(bounds, leaf0, leaf1);
780         }
781 }
782
783 BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
784                                     const vector<BVHReference>& references)
785 {
786         /* This is a bit overallocating here (considering leaf size into account),
787          * but chunk-based re-allocation in vector makes it difficult to use small
788          * size of stack storage here. Some tweaks are possible tho.
789          *
790          * NOTES:
791          *  - If the size is too big, we'll have inefficient stack usage,
792          *    and lots of cache misses.
793          *  - If the size is too small, then we can run out of memory
794          *    allowed to be used by vector.
795          *    In practice it wouldn't mean crash, just allocator will fallback
796          *    to heap which is slower.
797          *  - Optimistic re-allocation in STL could jump us out of stack usage
798          *    because re-allocation happens in chunks and size of those chunks we
799          *    can not control.
800          */
801         typedef StackAllocator<256, int> LeafStackAllocator;
802         typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator;
803
804         vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM_TOTAL];
805         vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM_TOTAL];
806         vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM_TOTAL];
807         vector<BVHReference, LeafReferenceStackAllocator> p_ref[PRIMITIVE_NUM_TOTAL];
808
809         /* TODO(sergey): In theory we should be able to store references. */
810         vector<BVHReference, LeafReferenceStackAllocator> object_references;
811
812         uint visibility[PRIMITIVE_NUM_TOTAL] = {0};
813         /* NOTE: Keep initializtion in sync with actual number of primitives. */
814         BoundBox bounds[PRIMITIVE_NUM_TOTAL] = {BoundBox::empty,
815                                                 BoundBox::empty,
816                                                 BoundBox::empty,
817                                                 BoundBox::empty};
818         int ob_num = 0;
819         int num_new_prims = 0;
820         /* Fill in per-type type/index array. */
821         for(int i = 0; i < range.size(); i++) {
822                 const BVHReference& ref = references[range.start() + i];
823                 if(ref.prim_index() != -1) {
824                         int type_index = bitscan(ref.prim_type() & PRIMITIVE_ALL);
825                         p_ref[type_index].push_back(ref);
826                         p_type[type_index].push_back(ref.prim_type());
827                         p_index[type_index].push_back(ref.prim_index());
828                         p_object[type_index].push_back(ref.prim_object());
829
830                         bounds[type_index].grow(ref.bounds());
831                         visibility[type_index] |= objects[ref.prim_object()]->visibility;
832                         if(ref.prim_type() & PRIMITIVE_ALL_CURVE) {
833                                 visibility[type_index] |= PATH_RAY_CURVE;
834                         }
835                         ++num_new_prims;
836                 }
837                 else {
838                         object_references.push_back(ref);
839                         ++ob_num;
840                 }
841         }
842
843         /* Create leaf nodes for every existing primitive.
844          *
845          * Here we write primitive types, indices and objects to a temporary array.
846          * This way we keep all the heavy memory allocation code outside of the
847          * thread lock in the case of spatial split building.
848          *
849          * TODO(sergey): With some pointer trickery we can write directly to the
850          * destination buffers for the non-spatial split BVH.
851          */
852         BVHNode *leaves[PRIMITIVE_NUM_TOTAL + 1] = {NULL};
853         int num_leaves = 0;
854         size_t start_index = 0;
855         vector<int, LeafStackAllocator> local_prim_type,
856                                         local_prim_index,
857                                         local_prim_object;
858         local_prim_type.resize(num_new_prims);
859         local_prim_index.resize(num_new_prims);
860         local_prim_object.resize(num_new_prims);
861         for(int i = 0; i < PRIMITIVE_NUM_TOTAL; ++i) {
862                 int num = (int)p_type[i].size();
863                 if(num != 0) {
864                         assert(p_type[i].size() == p_index[i].size());
865                         assert(p_type[i].size() == p_object[i].size());
866                         Transform aligned_space;
867                         bool alignment_found = false;
868                         for(int j = 0; j < num; ++j) {
869                                 const int index = start_index + j;
870                                 local_prim_type[index] = p_type[i][j];
871                                 local_prim_index[index] = p_index[i][j];
872                                 local_prim_object[index] = p_object[i][j];
873                                 if(params.use_unaligned_nodes && !alignment_found) {
874                                         alignment_found =
875                                                 unaligned_heuristic.compute_aligned_space(p_ref[i][j],
876                                                                                           &aligned_space);
877                                 }
878                         }
879                         LeafNode *leaf_node = new LeafNode(bounds[i],
880                                                            visibility[i],
881                                                            start_index,
882                                                            start_index + num);
883                         if(alignment_found) {
884                                 /* Need to recalculate leaf bounds with new alignment. */
885                                 leaf_node->m_bounds = BoundBox::empty;
886                                 for(int j = 0; j < num; ++j) {
887                                         const BVHReference &ref = p_ref[i][j];
888                                         BoundBox ref_bounds =
889                                                 unaligned_heuristic.compute_aligned_prim_boundbox(
890                                                         ref,
891                                                         aligned_space);
892                                         leaf_node->m_bounds.grow(ref_bounds);
893                                 }
894                                 /* Set alignment space. */
895                                 leaf_node->set_aligned_space(aligned_space);
896                         }
897                         leaves[num_leaves++] = leaf_node;
898                         start_index += num;
899                 }
900         }
901         /* Get size of new data to be copied to the packed arrays. */
902         const int num_new_leaf_data = start_index;
903         const size_t new_leaf_data_size = sizeof(int) * num_new_leaf_data;
904         /* Copy actual data to the packed array. */
905         if(params.use_spatial_split) {
906                 spatial_spin_lock.lock();
907                 /* We use first free index in the packed arrays and mode pointer to the
908                  * end of the current range.
909                  *
910                  * This doesn't give deterministic packed arrays, but it shouldn't really
911                  * matter because order of children in BVH is deterministic.
912                  */
913                 start_index = spatial_free_index;
914                 spatial_free_index += range.size();
915
916                 /* Extend an array when needed. */
917                 const size_t range_end = start_index + range.size();
918                 if(prim_type.size() < range_end) {
919                         /* Avoid extra re-allocations by pre-allocating bigger array in an
920                          * advance.
921                          */
922                         if(range_end >= prim_type.capacity()) {
923                                 float progress = (float)progress_count/(float)progress_total;
924                                 float factor = (1.0f - progress);
925                                 const size_t reserve = (size_t)(range_end + (float)range_end*factor);
926                                 prim_type.reserve(reserve);
927                                 prim_index.reserve(reserve);
928                                 prim_object.reserve(reserve);
929                         }
930
931                         prim_type.resize(range_end);
932                         prim_index.resize(range_end);
933                         prim_object.resize(range_end);
934                 }
935                 spatial_spin_lock.unlock();
936
937                 /* Perform actual data copy. */
938                 if(new_leaf_data_size > 0) {
939                         memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
940                         memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
941                         memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
942                 }
943         }
944         else {
945                 /* For the regular BVH builder we simply copy new data starting at the
946                  * range start. This is totally thread-safe, all threads are living
947                  * inside of their own range.
948                  */
949                 start_index = range.start();
950                 if(new_leaf_data_size > 0) {
951                         memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
952                         memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
953                         memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
954                 }
955         }
956
957         /* So far leaves were created with the zero-based index in an arrays,
958          * here we modify the indices to correspond to actual packed array start
959          * index.
960          */
961         for(int i = 0; i < num_leaves; ++i) {
962                 LeafNode *leaf = (LeafNode *)leaves[i];
963                 leaf->m_lo += start_index;
964                 leaf->m_hi += start_index;
965         }
966
967         /* Create leaf node for object. */
968         if(num_leaves == 0 || ob_num) {
969                 /* Only create object leaf nodes if there are objects or no other
970                  * nodes created.
971                  */
972                 const BVHReference *ref = (ob_num)? &object_references[0]: NULL;
973                 leaves[num_leaves] = create_object_leaf_nodes(ref,
974                                                               start_index + num_new_leaf_data,
975                                                               ob_num);
976                 ++num_leaves;
977         }
978
979         /* TODO(sergey): Need to take care of alignment when number of leaves
980          * is more than 1.
981          */
982         if(num_leaves == 1) {
983                 /* Simplest case: single leaf, just return it.
984                  * In all the rest cases we'll be creating intermediate inner node with
985                  * an appropriate bounding box.
986                  */
987                 return leaves[0];
988         }
989         else if(num_leaves == 2) {
990                 return new InnerNode(range.bounds(), leaves[0], leaves[1]);
991         }
992         else if(num_leaves == 3) {
993                 BoundBox inner_bounds = merge(leaves[1]->m_bounds, leaves[2]->m_bounds);
994                 BVHNode *inner = new InnerNode(inner_bounds, leaves[1], leaves[2]);
995                 return new InnerNode(range.bounds(), leaves[0], inner);
996         } else {
997                 /* Should be doing more branches if more primitive types added. */
998                 assert(num_leaves <= 5);
999                 BoundBox inner_bounds_a = merge(leaves[0]->m_bounds, leaves[1]->m_bounds);
1000                 BoundBox inner_bounds_b = merge(leaves[2]->m_bounds, leaves[3]->m_bounds);
1001                 BVHNode *inner_a = new InnerNode(inner_bounds_a, leaves[0], leaves[1]);
1002                 BVHNode *inner_b = new InnerNode(inner_bounds_b, leaves[2], leaves[3]);
1003                 BoundBox inner_bounds_c = merge(inner_a->m_bounds, inner_b->m_bounds);
1004                 BVHNode *inner_c = new InnerNode(inner_bounds_c, inner_a, inner_b);
1005                 if(num_leaves == 5) {
1006                         return new InnerNode(range.bounds(), inner_c, leaves[4]);
1007                 }
1008                 return inner_c;
1009         }
1010
1011 #undef MAX_ITEMS_PER_LEAF
1012 }
1013
1014 /* Tree Rotations */
1015
1016 void BVHBuild::rotate(BVHNode *node, int max_depth, int iterations)
1017 {
1018         /* in tested scenes, this resulted in slightly slower raytracing, so disabled
1019          * it for now. could be implementation bug, or depend on the scene */
1020         if(node)
1021                 for(int i = 0; i < iterations; i++)
1022                         rotate(node, max_depth);
1023 }
1024
1025 void BVHBuild::rotate(BVHNode *node, int max_depth)
1026 {
1027         /* nothing to rotate if we reached a leaf node. */
1028         if(node->is_leaf() || max_depth < 0)
1029                 return;
1030
1031         InnerNode *parent = (InnerNode*)node;
1032
1033         /* rotate all children first */
1034         for(size_t c = 0; c < 2; c++)
1035                 rotate(parent->children[c], max_depth-1);
1036
1037         /* compute current area of all children */
1038         BoundBox bounds0 = parent->children[0]->m_bounds;
1039         BoundBox bounds1 = parent->children[1]->m_bounds;
1040
1041         float area0 = bounds0.half_area();
1042         float area1 = bounds1.half_area();
1043         float4 child_area = make_float4(area0, area1, 0.0f, 0.0f);
1044
1045         /* find best rotation. we pick a target child of a first child, and swap
1046          * this with an other child. we perform the best such swap. */
1047         float best_cost = FLT_MAX;
1048         int best_child = -1, best_target = -1, best_other = -1;
1049
1050         for(size_t c = 0; c < 2; c++) {
1051                 /* ignore leaf nodes as we cannot descent into */
1052                 if(parent->children[c]->is_leaf())
1053                         continue;
1054
1055                 InnerNode *child = (InnerNode*)parent->children[c];
1056                 BoundBox& other = (c == 0)? bounds1: bounds0;
1057
1058                 /* transpose child bounds */
1059                 BoundBox target0 = child->children[0]->m_bounds;
1060                 BoundBox target1 = child->children[1]->m_bounds;
1061
1062                 /* compute cost for both possible swaps */
1063                 float cost0 = merge(other, target1).half_area() - child_area[c];
1064                 float cost1 = merge(target0, other).half_area() - child_area[c];
1065
1066                 if(min(cost0,cost1) < best_cost) {
1067                         best_child = (int)c;
1068                         best_other = (int)(1-c);
1069
1070                         if(cost0 < cost1) {
1071                                 best_cost = cost0;
1072                                 best_target = 0;
1073                         }
1074                         else {
1075                                 best_cost = cost0;
1076                                 best_target = 1;
1077                         }
1078                 }
1079         }
1080
1081         /* if we did not find a swap that improves the SAH then do nothing */
1082         if(best_cost >= 0)
1083                 return;
1084
1085         assert(best_child == 0 || best_child == 1);
1086         assert(best_target != -1);
1087
1088         /* perform the best found tree rotation */
1089         InnerNode *child = (InnerNode*)parent->children[best_child];
1090
1091         swap(parent->children[best_other], child->children[best_target]);
1092         child->m_bounds = merge(child->children[0]->m_bounds, child->children[1]->m_bounds);
1093 }
1094
1095 CCL_NAMESPACE_END