Cycles: Remove odd definition from CMake file
[blender.git] / intern / cycles / bvh / bvh_build.cpp
1 /*
2  * Adapted from code copyright 2009-2010 NVIDIA Corporation
3  * Modifications Copyright 2011, Blender Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include "bvh_binning.h"
19 #include "bvh_build.h"
20 #include "bvh_node.h"
21 #include "bvh_params.h"
22 #include "bvh_split.h"
23
24 #include "mesh.h"
25 #include "object.h"
26 #include "scene.h"
27 #include "curves.h"
28
29 #include "util_debug.h"
30 #include "util_foreach.h"
31 #include "util_logging.h"
32 #include "util_progress.h"
33 #include "util_stack_allocator.h"
34 #include "util_simd.h"
35 #include "util_time.h"
36 #include "util_queue.h"
37
38 CCL_NAMESPACE_BEGIN
39
40 /* BVH Build Task */
41
42 class BVHBuildTask : public Task {
43 public:
44         BVHBuildTask(BVHBuild *build,
45                      InnerNode *node,
46                      int child,
47                      const BVHObjectBinning& range,
48                      int level)
49         : range_(range)
50         {
51                 run = function_bind(&BVHBuild::thread_build_node,
52                                     build,
53                                     node,
54                                     child,
55                                     &range_,
56                                     level);
57         }
58 private:
59         BVHObjectBinning range_;
60 };
61
62 class BVHSpatialSplitBuildTask : public Task {
63 public:
64         BVHSpatialSplitBuildTask(BVHBuild *build,
65                                  InnerNode *node,
66                                  int child,
67                                  const BVHRange& range,
68                                  const vector<BVHReference>& references,
69                                  int level)
70         : range_(range),
71           references_(references.begin() + range.start(),
72                       references.begin() + range.end())
73         {
74                 range_.set_start(0);
75                 run = function_bind(&BVHBuild::thread_build_spatial_split_node,
76                                     build,
77                                     node,
78                                     child,
79                                     &range_,
80                                     &references_,
81                                     level,
82                                     _1);
83         }
84 private:
85         BVHRange range_;
86         vector<BVHReference> references_;
87 };
88
89 /* Constructor / Destructor */
90
91 BVHBuild::BVHBuild(const vector<Object*>& objects_,
92                    array<int>& prim_type_,
93                    array<int>& prim_index_,
94                    array<int>& prim_object_,
95                    const BVHParams& params_,
96                    Progress& progress_)
97  : objects(objects_),
98    prim_type(prim_type_),
99    prim_index(prim_index_),
100    prim_object(prim_object_),
101    params(params_),
102    progress(progress_),
103    progress_start_time(0.0),
104    unaligned_heuristic(objects_)
105 {
106         spatial_min_overlap = 0.0f;
107 }
108
109 BVHBuild::~BVHBuild()
110 {
111 }
112
113 /* Adding References */
114
115 void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
116 {
117         if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
118                 Attribute *attr_mP = NULL;
119
120                 if(mesh->has_motion_blur())
121                         attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
122
123                 size_t num_triangles = mesh->num_triangles();
124                 for(uint j = 0; j < num_triangles; j++) {
125                         Mesh::Triangle t = mesh->get_triangle(j);
126                         BoundBox bounds = BoundBox::empty;
127                         PrimitiveType type = PRIMITIVE_TRIANGLE;
128
129                         t.bounds_grow(&mesh->verts[0], bounds);
130
131                         /* motion triangles */
132                         if(attr_mP) {
133                                 size_t mesh_size = mesh->verts.size();
134                                 size_t steps = mesh->motion_steps - 1;
135                                 float3 *vert_steps = attr_mP->data_float3();
136
137                                 for(size_t i = 0; i < steps; i++)
138                                         t.bounds_grow(vert_steps + i*mesh_size, bounds);
139
140                                 type = PRIMITIVE_MOTION_TRIANGLE;
141                         }
142
143                         if(bounds.valid()) {
144                                 references.push_back(BVHReference(bounds, j, i, type));
145                                 root.grow(bounds);
146                                 center.grow(bounds.center2());
147                         }
148                 }
149         }
150
151         if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
152                 Attribute *curve_attr_mP = NULL;
153
154                 if(mesh->has_motion_blur())
155                         curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
156
157                 size_t num_curves = mesh->num_curves();
158                 for(uint j = 0; j < num_curves; j++) {
159                         Mesh::Curve curve = mesh->get_curve(j);
160                         PrimitiveType type = PRIMITIVE_CURVE;
161
162                         for(int k = 0; k < curve.num_keys - 1; k++) {
163                                 BoundBox bounds = BoundBox::empty;
164                                 curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bounds);
165
166                                 /* motion curve */
167                                 if(curve_attr_mP) {
168                                         size_t mesh_size = mesh->curve_keys.size();
169                                         size_t steps = mesh->motion_steps - 1;
170                                         float3 *key_steps = curve_attr_mP->data_float3();
171
172                                         for(size_t i = 0; i < steps; i++)
173                                                 curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bounds);
174
175                                         type = PRIMITIVE_MOTION_CURVE;
176                                 }
177
178                                 if(bounds.valid()) {
179                                         int packed_type = PRIMITIVE_PACK_SEGMENT(type, k);
180
181                                         references.push_back(BVHReference(bounds, j, i, packed_type));
182                                         root.grow(bounds);
183                                         center.grow(bounds.center2());
184                                 }
185                         }
186                 }
187         }
188 }
189
190 void BVHBuild::add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i)
191 {
192         references.push_back(BVHReference(ob->bounds, -1, i, 0));
193         root.grow(ob->bounds);
194         center.grow(ob->bounds.center2());
195 }
196
197 static size_t count_curve_segments(Mesh *mesh)
198 {
199         size_t num = 0, num_curves = mesh->num_curves();
200
201         for(size_t i = 0; i < num_curves; i++)
202                 num += mesh->get_curve(i).num_keys - 1;
203         
204         return num;
205 }
206
207 void BVHBuild::add_references(BVHRange& root)
208 {
209         /* reserve space for references */
210         size_t num_alloc_references = 0;
211
212         foreach(Object *ob, objects) {
213                 if(params.top_level) {
214                         if(!ob->is_traceable()) {
215                                 continue;
216                         }
217                         if(!ob->mesh->is_instanced()) {
218                                 if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
219                                         num_alloc_references += ob->mesh->num_triangles();
220                                 }
221                                 if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
222                                         num_alloc_references += count_curve_segments(ob->mesh);
223                                 }
224                         }
225                         else
226                                 num_alloc_references++;
227                 }
228                 else {
229                         if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
230                                 num_alloc_references += ob->mesh->num_triangles();
231                         }
232                         if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
233                                 num_alloc_references += count_curve_segments(ob->mesh);
234                         }
235                 }
236         }
237
238         references.reserve(num_alloc_references);
239
240         /* add references from objects */
241         BoundBox bounds = BoundBox::empty, center = BoundBox::empty;
242         int i = 0;
243
244         foreach(Object *ob, objects) {
245                 if(params.top_level) {
246                         if(!ob->is_traceable()) {
247                                 ++i;
248                                 continue;
249                         }
250                         if(!ob->mesh->is_instanced())
251                                 add_reference_mesh(bounds, center, ob->mesh, i);
252                         else
253                                 add_reference_object(bounds, center, ob, i);
254                 }
255                 else
256                         add_reference_mesh(bounds, center, ob->mesh, i);
257
258                 i++;
259
260                 if(progress.get_cancel()) return;
261         }
262
263         /* happens mostly on empty meshes */
264         if(!bounds.valid())
265                 bounds.grow(make_float3(0.0f, 0.0f, 0.0f));
266
267         root = BVHRange(bounds, center, 0, references.size());
268 }
269
270 /* Build */
271
272 BVHNode* BVHBuild::run()
273 {
274         BVHRange root;
275
276         /* add references */
277         add_references(root);
278
279         if(progress.get_cancel())
280                 return NULL;
281
282         /* init spatial splits */
283         if(params.top_level) {
284                 /* NOTE: Technically it is supported by the builder but it's not really
285                  * optimized for speed yet and not really clear yet if it has measurable
286                  * improvement on render time. Needs some extra investigation before
287                  * enabling spatial split for top level BVH.
288                  */
289                 params.use_spatial_split = false;
290         }
291
292         spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
293         if(params.use_spatial_split) {
294                 /* NOTE: The API here tries to be as much ready for multi-threaded build
295                  * as possible, but at the same time it tries not to introduce any
296                  * changes in behavior for until all refactoring needed for threading is
297                  * finished.
298                  *
299                  * So we currently allocate single storage for now, which is only used by
300                  * the only thread working on the spatial BVH build.
301                  */
302                 spatial_storage.resize(TaskScheduler::num_threads() + 1);
303                 size_t num_bins = max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1;
304                 foreach(BVHSpatialStorage &storage, spatial_storage) {
305                         storage.right_bounds.clear();
306                 }
307                 spatial_storage[0].right_bounds.resize(num_bins);
308         }
309         spatial_free_index = 0;
310
311         /* init progress updates */
312         double build_start_time;
313         build_start_time = progress_start_time = time_dt();
314         progress_count = 0;
315         progress_total = references.size();
316         progress_original_total = progress_total;
317
318         prim_type.resize(references.size());
319         prim_index.resize(references.size());
320         prim_object.resize(references.size());
321
322         /* build recursively */
323         BVHNode *rootnode;
324
325         if(params.use_spatial_split) {
326                 /* Perform multithreaded spatial split build. */
327                 rootnode = build_node(root, &references, 0, 0);
328                 task_pool.wait_work();
329         }
330         else {
331                 /* Perform multithreaded binning build. */
332                 BVHObjectBinning rootbin(root, (references.size())? &references[0]: NULL);
333                 rootnode = build_node(rootbin, 0);
334                 task_pool.wait_work();
335         }
336
337         /* delete if we canceled */
338         if(rootnode) {
339                 if(progress.get_cancel()) {
340                         rootnode->deleteSubtree();
341                         rootnode = NULL;
342                         VLOG(1) << "BVH build cancelled.";
343                 }
344                 else {
345                         /*rotate(rootnode, 4, 5);*/
346                         rootnode->update_visibility();
347                 }
348                 if(rootnode != NULL) {
349                         VLOG(1) << "BVH build statistics:\n"
350                                 << "  Build time: " << time_dt() - build_start_time << "\n"
351                                 << "  Total number of nodes: "
352                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_NODE_COUNT)) << "\n"
353                                 << "  Number of inner nodes: "
354                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_INNER_COUNT)) << "\n"
355                                 << "  Number of leaf nodes: "
356                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_LEAF_COUNT)) << "\n"
357                                 << "  Number of unaligned nodes: "
358                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_UNALIGNED_COUNT))  << "\n"
359                                 << "  Allocation slop factor: "
360                                        << ((prim_type.capacity() != 0)
361                                                ? (float)prim_type.size() / prim_type.capacity()
362                                                : 1.0f) << "\n";
363                 }
364         }
365
366
367         return rootnode;
368 }
369
370 void BVHBuild::progress_update()
371 {
372         if(time_dt() - progress_start_time < 0.25)
373                 return;
374         
375         double progress_start = (double)progress_count/(double)progress_total;
376         double duplicates = (double)(progress_total - progress_original_total)/(double)progress_total;
377
378         string msg = string_printf("Building BVH %.0f%%, duplicates %.0f%%",
379                                    progress_start * 100.0, duplicates * 100.0);
380
381         progress.set_substatus(msg);
382         progress_start_time = time_dt(); 
383 }
384
385 void BVHBuild::thread_build_node(InnerNode *inner,
386                                  int child,
387                                  BVHObjectBinning *range,
388                                  int level)
389 {
390         if(progress.get_cancel())
391                 return;
392
393         /* build nodes */
394         BVHNode *node = build_node(*range, level);
395
396         /* set child in inner node */
397         inner->children[child] = node;
398
399         /* update progress */
400         if(range->size() < THREAD_TASK_SIZE) {
401                 /*rotate(node, INT_MAX, 5);*/
402
403                 thread_scoped_lock lock(build_mutex);
404
405                 progress_count += range->size();
406                 progress_update();
407         }
408 }
409
410 void BVHBuild::thread_build_spatial_split_node(InnerNode *inner,
411                                                int child,
412                                                BVHRange *range,
413                                                vector<BVHReference> *references,
414                                                int level,
415                                                int thread_id)
416 {
417         if(progress.get_cancel()) {
418                 return;
419         }
420
421         /* build nodes */
422         BVHNode *node = build_node(*range, references, level, thread_id);
423
424         /* set child in inner node */
425         inner->children[child] = node;
426 }
427
428 bool BVHBuild::range_within_max_leaf_size(const BVHRange& range,
429                                           const vector<BVHReference>& references) const
430 {
431         size_t size = range.size();
432         size_t max_leaf_size = max(params.max_triangle_leaf_size, params.max_curve_leaf_size);
433
434         if(size > max_leaf_size)
435                 return false;
436
437         size_t num_triangles = 0;
438         size_t num_curves = 0;
439         size_t num_motion_curves = 0;
440
441         for(int i = 0; i < size; i++) {
442                 const BVHReference& ref = references[range.start() + i];
443
444                 if(ref.prim_type() & PRIMITIVE_CURVE)
445                         num_curves++;
446                 if(ref.prim_type() & PRIMITIVE_MOTION_CURVE)
447                         num_motion_curves++;
448                 else if(ref.prim_type() & PRIMITIVE_ALL_TRIANGLE)
449                         num_triangles++;
450         }
451
452         return (num_triangles < params.max_triangle_leaf_size) &&
453                (num_curves < params.max_curve_leaf_size) &&
454                (num_motion_curves < params.max_curve_leaf_size);
455 }
456
457 /* multithreaded binning builder */
458 BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
459 {
460         size_t size = range.size();
461         float leafSAH = params.sah_primitive_cost * range.leafSAH;
462         float splitSAH = params.sah_node_cost * range.bounds().half_area() + params.sah_primitive_cost * range.splitSAH;
463
464         /* Have at least one inner node on top level, for performance and correct
465          * visibility tests, since object instances do not check visibility flag.
466          */
467         if(!(range.size() > 0 && params.top_level && level == 0)) {
468                 /* Make leaf node when threshold reached or SAH tells us. */
469                 if((params.small_enough_for_leaf(size, level)) ||
470                    (range_within_max_leaf_size(range, references) && leafSAH < splitSAH))
471                 {
472                         return create_leaf_node(range, references);
473                 }
474         }
475
476         BVHObjectBinning unaligned_range;
477         float unalignedSplitSAH = FLT_MAX;
478         float unalignedLeafSAH = FLT_MAX;
479         Transform aligned_space;
480         if(params.use_unaligned_nodes &&
481            splitSAH > params.unaligned_split_threshold*leafSAH)
482         {
483                 aligned_space = unaligned_heuristic.compute_aligned_space(
484                         range, &references[0]);
485                 unaligned_range = BVHObjectBinning(range,
486                                                    &references[0],
487                                                    &unaligned_heuristic,
488                                                    &aligned_space);
489                 unalignedSplitSAH = params.sah_node_cost * unaligned_range.unaligned_bounds().half_area() +
490                                     params.sah_primitive_cost * unaligned_range.splitSAH;
491                 unalignedLeafSAH = params.sah_primitive_cost * unaligned_range.leafSAH;
492                 if(!(range.size() > 0 && params.top_level && level == 0)) {
493                         if(unalignedLeafSAH < unalignedSplitSAH && unalignedSplitSAH < splitSAH &&
494                            range_within_max_leaf_size(range, references))
495                         {
496                                 return create_leaf_node(range, references);
497                         }
498                 }
499         }
500
501         /* Perform split. */
502         BVHObjectBinning left, right;
503         if(unalignedSplitSAH < splitSAH) {
504                 unaligned_range.split(&references[0], left, right);
505         }
506         else {
507                 range.split(&references[0], left, right);
508         }
509
510         BoundBox bounds;
511         if(unalignedSplitSAH < splitSAH) {
512                 bounds = unaligned_heuristic.compute_aligned_boundbox(
513                         range, &references[0], aligned_space);
514         }
515         else {
516                 bounds = range.bounds();
517         }
518
519         /* Create inner node. */
520         InnerNode *inner;
521         if(range.size() < THREAD_TASK_SIZE) {
522                 /* local build */
523                 BVHNode *leftnode = build_node(left, level + 1);
524                 BVHNode *rightnode = build_node(right, level + 1);
525
526                 inner = new InnerNode(bounds, leftnode, rightnode);
527         }
528         else {
529                 /* Threaded build */
530                 inner = new InnerNode(bounds);
531
532                 task_pool.push(new BVHBuildTask(this, inner, 0, left, level + 1), true);
533                 task_pool.push(new BVHBuildTask(this, inner, 1, right, level + 1), true);
534         }
535
536         if(unalignedSplitSAH < splitSAH) {
537                 inner->set_aligned_space(aligned_space);
538         }
539
540         return inner;
541 }
542
543 /* multithreaded spatial split builder */
544 BVHNode* BVHBuild::build_node(const BVHRange& range,
545                               vector<BVHReference> *references,
546                               int level,
547                               int thread_id)
548 {
549         /* Update progress.
550          *
551          * TODO(sergey): Currently it matches old behavior, but we can move it to the
552          * task thread (which will mimic non=split builder) and save some CPU ticks
553          * on checking cancel status.
554          */
555         progress_update();
556         if(progress.get_cancel()) {
557                 return NULL;
558         }
559
560         /* Small enough or too deep => create leaf. */
561         if(!(range.size() > 0 && params.top_level && level == 0)) {
562                 if(params.small_enough_for_leaf(range.size(), level)) {
563                         progress_count += range.size();
564                         return create_leaf_node(range, *references);
565                 }
566         }
567
568         /* Perform splitting test. */
569         BVHSpatialStorage *storage = &spatial_storage[thread_id];
570         BVHMixedSplit split(this, storage, range, references, level);
571
572         if(!(range.size() > 0 && params.top_level && level == 0)) {
573                 if(split.no_split) {
574                         progress_count += range.size();
575                         return create_leaf_node(range, *references);
576                 }
577         }
578         float leafSAH = params.sah_primitive_cost * split.leafSAH;
579         float splitSAH = params.sah_node_cost * range.bounds().half_area() +
580                          params.sah_primitive_cost * split.nodeSAH;
581
582         BVHMixedSplit unaligned_split;
583         float unalignedSplitSAH = FLT_MAX;
584         /* float unalignedLeafSAH = FLT_MAX; */
585         Transform aligned_space;
586         if(params.use_unaligned_nodes &&
587            splitSAH > params.unaligned_split_threshold*leafSAH)
588         {
589                 aligned_space =
590                         unaligned_heuristic.compute_aligned_space(range, &references->at(0));
591                 unaligned_split = BVHMixedSplit(this,
592                                                 storage,
593                                                 range,
594                                                 references,
595                                                 level,
596                                                 &unaligned_heuristic,
597                                                 &aligned_space);
598                 /* unalignedLeafSAH = params.sah_primitive_cost * split.leafSAH; */
599                 unalignedSplitSAH = params.sah_node_cost * unaligned_split.bounds.half_area() +
600                                     params.sah_primitive_cost * unaligned_split.nodeSAH;
601                 /* TOOD(sergey): Check we can create leaf already. */
602         }
603
604         /* Do split. */
605         BVHRange left, right;
606         if(unalignedSplitSAH < splitSAH) {
607                 unaligned_split.split(this, left, right, range);
608         }
609         else {
610                 split.split(this, left, right, range);
611         }
612
613         progress_total += left.size() + right.size() - range.size();
614
615         BoundBox bounds;
616         if(unalignedSplitSAH < splitSAH) {
617                 bounds = unaligned_heuristic.compute_aligned_boundbox(
618                         range, &references->at(0), aligned_space);
619         }
620         else {
621                 bounds = range.bounds();
622         }
623
624         /* Create inner node. */
625         InnerNode *inner;
626         if(range.size() < THREAD_TASK_SIZE) {
627                 /* Local build. */
628
629                 /* Build left node. */
630                 vector<BVHReference> copy(references->begin() + right.start(),
631                                           references->begin() + right.end());
632                 right.set_start(0);
633
634                 BVHNode *leftnode = build_node(left, references, level + 1, thread_id);
635
636                 /* Build right node. */
637                 BVHNode *rightnode = build_node(right, &copy, level + 1, thread_id);
638
639                 inner = new InnerNode(bounds, leftnode, rightnode);
640         }
641         else {
642                 /* Threaded build. */
643                 inner = new InnerNode(bounds);
644                 task_pool.push(new BVHSpatialSplitBuildTask(this,
645                                                             inner,
646                                                             0,
647                                                             left,
648                                                             *references,
649                                                             level + 1),
650                                true);
651                 task_pool.push(new BVHSpatialSplitBuildTask(this,
652                                                             inner,
653                                                             1,
654                                                             right,
655                                                             *references,
656                                                             level + 1),
657                                true);
658         }
659
660         if(unalignedSplitSAH < splitSAH) {
661                 inner->set_aligned_space(aligned_space);
662         }
663
664         return inner;
665 }
666
667 /* Create Nodes */
668
669 BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start, int num)
670 {
671         if(num == 0) {
672                 BoundBox bounds = BoundBox::empty;
673                 return new LeafNode(bounds, 0, 0, 0);
674         }
675         else if(num == 1) {
676                 assert(start < prim_type.size());
677                 prim_type[start] = ref->prim_type();
678                 prim_index[start] = ref->prim_index();
679                 prim_object[start] = ref->prim_object();
680
681                 uint visibility = objects[ref->prim_object()]->visibility;
682                 return new LeafNode(ref->bounds(), visibility, start, start+1);
683         }
684         else {
685                 int mid = num/2;
686                 BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid); 
687                 BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid); 
688
689                 BoundBox bounds = BoundBox::empty;
690                 bounds.grow(leaf0->m_bounds);
691                 bounds.grow(leaf1->m_bounds);
692
693                 return new InnerNode(bounds, leaf0, leaf1);
694         }
695 }
696
697 BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
698                                     const vector<BVHReference>& references)
699 {
700         /* This is a bit overallocating here (considering leaf size into account),
701          * but chunk-based re-allocation in vector makes it difficult to use small
702          * size of stack storage here. Some tweaks are possible tho.
703          *
704          * NOTES:
705          *  - If the size is too big, we'll have inefficient stack usage,
706          *    and lots of cache misses.
707          *  - If the size is too small, then we can run out of memory
708          *    allowed to be used by vector.
709          *    In practice it wouldn't mean crash, just allocator will fallback
710          *    to heap which is slower.
711          *  - Optimistic re-allocation in STL could jump us out of stack usage
712          *    because re-allocation happens in chunks and size of those chunks we
713          *    can not control.
714          */
715         typedef StackAllocator<256, int> LeafStackAllocator;
716
717         vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM_TOTAL];
718         vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM_TOTAL];
719         vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM_TOTAL];
720         vector<BVHReference, LeafStackAllocator> p_ref[PRIMITIVE_NUM_TOTAL];
721
722         /* TODO(sergey): In theory we should be able to store references. */
723         typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator;
724         vector<BVHReference, LeafReferenceStackAllocator> object_references;
725
726         uint visibility[PRIMITIVE_NUM_TOTAL] = {0};
727         /* NOTE: Keep initializtion in sync with actual number of primitives. */
728         BoundBox bounds[PRIMITIVE_NUM_TOTAL] = {BoundBox::empty,
729                                                 BoundBox::empty,
730                                                 BoundBox::empty,
731                                                 BoundBox::empty};
732         int ob_num = 0;
733         int num_new_prims = 0;
734         /* Fill in per-type type/index array. */
735         for(int i = 0; i < range.size(); i++) {
736                 const BVHReference& ref = references[range.start() + i];
737                 if(ref.prim_index() != -1) {
738                         int type_index = bitscan(ref.prim_type() & PRIMITIVE_ALL);
739                         p_ref[type_index].push_back(ref);
740                         p_type[type_index].push_back(ref.prim_type());
741                         p_index[type_index].push_back(ref.prim_index());
742                         p_object[type_index].push_back(ref.prim_object());
743
744                         bounds[type_index].grow(ref.bounds());
745                         visibility[type_index] |= objects[ref.prim_object()]->visibility;
746                         if(ref.prim_type() & PRIMITIVE_ALL_CURVE) {
747                                 visibility[type_index] |= PATH_RAY_CURVE;
748                         }
749                         ++num_new_prims;
750                 }
751                 else {
752                         object_references.push_back(ref);
753                         ++ob_num;
754                 }
755         }
756
757         /* Create leaf nodes for every existing primitive.
758          *
759          * Here we write primitive types, indices and objects to a temporary array.
760          * This way we keep all the heavy memory allocation code outside of the
761          * thread lock in the case of spatial split building.
762          *
763          * TODO(sergey): With some pointer trickery we can write directly to the
764          * destination buffers for the non-spatial split BVH.
765          */
766         BVHNode *leaves[PRIMITIVE_NUM_TOTAL + 1] = {NULL};
767         int num_leaves = 0;
768         size_t start_index = 0;
769         vector<int, LeafStackAllocator> local_prim_type,
770                                         local_prim_index,
771                                         local_prim_object;
772         local_prim_type.resize(num_new_prims);
773         local_prim_index.resize(num_new_prims);
774         local_prim_object.resize(num_new_prims);
775         for(int i = 0; i < PRIMITIVE_NUM_TOTAL; ++i) {
776                 int num = (int)p_type[i].size();
777                 if(num != 0) {
778                         assert(p_type[i].size() == p_index[i].size());
779                         assert(p_type[i].size() == p_object[i].size());
780                         Transform aligned_space;
781                         bool alignment_found = false;
782                         for(int j = 0; j < num; ++j) {
783                                 const int index = start_index + j;
784                                 local_prim_type[index] = p_type[i][j];
785                                 local_prim_index[index] = p_index[i][j];
786                                 local_prim_object[index] = p_object[i][j];
787                                 if(params.use_unaligned_nodes && !alignment_found) {
788                                         alignment_found =
789                                                 unaligned_heuristic.compute_aligned_space(p_ref[i][j],
790                                                                                            &aligned_space);
791                                 }
792                         }
793                         LeafNode *leaf_node = new LeafNode(bounds[i],
794                                                            visibility[i],
795                                                            start_index,
796                                                            start_index + num);
797                         if(alignment_found) {
798                                 /* Need to recalculate leaf bounds with new alignment. */
799                                 leaf_node->m_bounds = BoundBox::empty;
800                                 for(int j = 0; j < num; ++j) {
801                                         const BVHReference &ref = p_ref[i][j];
802                                         BoundBox ref_bounds =
803                                                 unaligned_heuristic.compute_aligned_prim_boundbox(
804                                                         ref,
805                                                         aligned_space);
806                                         leaf_node->m_bounds.grow(ref_bounds);
807                                 }
808                                 /* Set alignment space. */
809                                 leaf_node->set_aligned_space(aligned_space);
810                         }
811                         leaves[num_leaves++] = leaf_node;
812                         start_index += num;
813                 }
814         }
815         /* Get size of new data to be copied to the packed arrays. */
816         const int num_new_leaf_data = start_index;
817         const size_t new_leaf_data_size = sizeof(int) * num_new_leaf_data;
818         /* Copy actual data to the packed array. */
819         if(params.use_spatial_split) {
820                 spatial_spin_lock.lock();
821                 /* We use first free index in the packed arrays and mode pointer to the
822                  * end of the current range.
823                  *
824                  * This doesn't give deterministic packed arrays, but it shouldn't really
825                  * matter because order of children in BVH is deterministic.
826                  */
827                 start_index = spatial_free_index;
828                 spatial_free_index += range.size();
829
830                 /* Extend an array when needed. */
831                 const size_t range_end = start_index + range.size();
832                 if(prim_type.size() < range_end) {
833                         /* Avoid extra re-allocations by pre-allocating bigger array in an
834                          * advance.
835                          */
836                         if(range_end >= prim_type.capacity()) {
837                                 float progress = (float)progress_count/(float)progress_total;
838                                 float factor = (1.0f - progress);
839                                 const size_t reserve = (size_t)(range_end + (float)range_end*factor);
840                                 prim_type.reserve(reserve);
841                                 prim_index.reserve(reserve);
842                                 prim_object.reserve(reserve);
843                         }
844
845                         prim_type.resize(range_end);
846                         prim_index.resize(range_end);
847                         prim_object.resize(range_end);
848                 }
849                 spatial_spin_lock.unlock();
850
851                 /* Perform actual data copy. */
852                 if(new_leaf_data_size > 0) {
853                         memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
854                         memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
855                         memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
856                 }
857         }
858         else {
859                 /* For the regular BVH builder we simply copy new data starting at the
860                  * range start. This is totally thread-safe, all threads are living
861                  * inside of their own range.
862                  */
863                 start_index = range.start();
864                 if(new_leaf_data_size > 0) {
865                         memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
866                         memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
867                         memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
868                 }
869         }
870
871         /* So far leaves were created with the zero-based index in an arrays,
872          * here we modify the indices to correspond to actual packed array start
873          * index.
874          */
875         for(int i = 0; i < num_leaves; ++i) {
876                 LeafNode *leaf = (LeafNode *)leaves[i];
877                 leaf->m_lo += start_index;
878                 leaf->m_hi += start_index;
879         }
880
881         /* Create leaf node for object. */
882         if(num_leaves == 0 || ob_num) {
883                 /* Only create object leaf nodes if there are objects or no other
884                  * nodes created.
885                  */
886                 const BVHReference *ref = (ob_num)? &object_references[0]: NULL;
887                 leaves[num_leaves] = create_object_leaf_nodes(ref,
888                                                               start_index + num_new_leaf_data,
889                                                               ob_num);
890                 ++num_leaves;
891         }
892
893         /* TODO(sergey): Need to take care of alignment when number of leaves
894          * is more than 1.
895          */
896         if(num_leaves == 1) {
897                 /* Simplest case: single leaf, just return it.
898                  * In all the rest cases we'll be creating intermediate inner node with
899                  * an appropriate bounding box.
900                  */
901                 return leaves[0];
902         }
903         else if(num_leaves == 2) {
904                 return new InnerNode(range.bounds(), leaves[0], leaves[1]);
905         }
906         else if(num_leaves == 3) {
907                 BoundBox inner_bounds = merge(leaves[1]->m_bounds, leaves[2]->m_bounds);
908                 BVHNode *inner = new InnerNode(inner_bounds, leaves[1], leaves[2]);
909                 return new InnerNode(range.bounds(), leaves[0], inner);
910         } else {
911                 /* Shpuld be doing more branches if more primitive types added. */
912                 assert(num_leaves <= 5);
913                 BoundBox inner_bounds_a = merge(leaves[0]->m_bounds, leaves[1]->m_bounds);
914                 BoundBox inner_bounds_b = merge(leaves[2]->m_bounds, leaves[3]->m_bounds);
915                 BVHNode *inner_a = new InnerNode(inner_bounds_a, leaves[0], leaves[1]);
916                 BVHNode *inner_b = new InnerNode(inner_bounds_b, leaves[2], leaves[3]);
917                 BoundBox inner_bounds_c = merge(inner_a->m_bounds, inner_b->m_bounds);
918                 BVHNode *inner_c = new InnerNode(inner_bounds_c, inner_a, inner_b);
919                 if(num_leaves == 5) {
920                         return new InnerNode(range.bounds(), inner_c, leaves[4]);
921                 }
922                 return inner_c;
923         }
924
925 #undef MAX_ITEMS_PER_LEAF
926 }
927
928 /* Tree Rotations */
929
930 void BVHBuild::rotate(BVHNode *node, int max_depth, int iterations)
931 {
932         /* in tested scenes, this resulted in slightly slower raytracing, so disabled
933          * it for now. could be implementation bug, or depend on the scene */
934         if(node)
935                 for(int i = 0; i < iterations; i++)
936                         rotate(node, max_depth);
937 }
938
939 void BVHBuild::rotate(BVHNode *node, int max_depth)
940 {
941         /* nothing to rotate if we reached a leaf node. */
942         if(node->is_leaf() || max_depth < 0)
943                 return;
944         
945         InnerNode *parent = (InnerNode*)node;
946
947         /* rotate all children first */
948         for(size_t c = 0; c < 2; c++)
949                 rotate(parent->children[c], max_depth-1);
950
951         /* compute current area of all children */
952         BoundBox bounds0 = parent->children[0]->m_bounds;
953         BoundBox bounds1 = parent->children[1]->m_bounds;
954
955         float area0 = bounds0.half_area();
956         float area1 = bounds1.half_area();
957         float4 child_area = make_float4(area0, area1, 0.0f, 0.0f);
958
959         /* find best rotation. we pick a target child of a first child, and swap
960          * this with an other child. we perform the best such swap. */
961         float best_cost = FLT_MAX;
962         int best_child = -1, best_target = -1, best_other = -1;
963
964         for(size_t c = 0; c < 2; c++) {
965                 /* ignore leaf nodes as we cannot descent into */
966                 if(parent->children[c]->is_leaf())
967                         continue;
968
969                 InnerNode *child = (InnerNode*)parent->children[c];
970                 BoundBox& other = (c == 0)? bounds1: bounds0;
971
972                 /* transpose child bounds */
973                 BoundBox target0 = child->children[0]->m_bounds;
974                 BoundBox target1 = child->children[1]->m_bounds;
975
976                 /* compute cost for both possible swaps */
977                 float cost0 = merge(other, target1).half_area() - child_area[c];
978                 float cost1 = merge(target0, other).half_area() - child_area[c];
979
980                 if(min(cost0,cost1) < best_cost) {
981                         best_child = (int)c;
982                         best_other = (int)(1-c);
983
984                         if(cost0 < cost1) {
985                                 best_cost = cost0;
986                                 best_target = 0;
987                         }
988                         else {
989                                 best_cost = cost0;
990                                 best_target = 1;
991                         }
992                 }
993         }
994
995         /* if we did not find a swap that improves the SAH then do nothing */
996         if(best_cost >= 0)
997                 return;
998
999         assert(best_child == 0 || best_child == 1);
1000         assert(best_target != -1);
1001
1002         /* perform the best found tree rotation */
1003         InnerNode *child = (InnerNode*)parent->children[best_child];
1004
1005         swap(parent->children[best_other], child->children[best_target]);
1006         child->m_bounds = merge(child->children[0]->m_bounds, child->children[1]->m_bounds);
1007 }
1008
1009 CCL_NAMESPACE_END