39e14dba3ee0eae6d40215acd50bbf8f13d3e8be
[blender-staging.git] / intern / cycles / bvh / bvh_build.cpp
1 /*
2  * Adapted from code copyright 2009-2010 NVIDIA Corporation
3  * Modifications Copyright 2011, Blender Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include "bvh/bvh_build.h"
19
20 #include "bvh/bvh_binning.h"
21 #include "bvh/bvh_node.h"
22 #include "bvh/bvh_params.h"
23 #include "bvh_split.h"
24
25 #include "render/mesh.h"
26 #include "render/object.h"
27 #include "render/scene.h"
28 #include "render/curves.h"
29
30 #include "util/util_algorithm.h"
31 #include "util/util_debug.h"
32 #include "util/util_foreach.h"
33 #include "util/util_logging.h"
34 #include "util/util_progress.h"
35 #include "util/util_stack_allocator.h"
36 #include "util/util_simd.h"
37 #include "util/util_time.h"
38 #include "util/util_queue.h"
39
40 CCL_NAMESPACE_BEGIN
41
42 /* BVH Build Task */
43
44 class BVHBuildTask : public Task {
45 public:
46         BVHBuildTask(BVHBuild *build,
47                      InnerNode *node,
48                      int child,
49                      const BVHObjectBinning& range,
50                      int level)
51         : range_(range)
52         {
53                 run = function_bind(&BVHBuild::thread_build_node,
54                                     build,
55                                     node,
56                                     child,
57                                     &range_,
58                                     level);
59         }
60 private:
61         BVHObjectBinning range_;
62 };
63
64 class BVHSpatialSplitBuildTask : public Task {
65 public:
66         BVHSpatialSplitBuildTask(BVHBuild *build,
67                                  InnerNode *node,
68                                  int child,
69                                  const BVHRange& range,
70                                  const vector<BVHReference>& references,
71                                  int level)
72         : range_(range),
73           references_(references.begin() + range.start(),
74                       references.begin() + range.end())
75         {
76                 range_.set_start(0);
77                 run = function_bind(&BVHBuild::thread_build_spatial_split_node,
78                                     build,
79                                     node,
80                                     child,
81                                     &range_,
82                                     &references_,
83                                     level,
84                                     _1);
85         }
86 private:
87         BVHRange range_;
88         vector<BVHReference> references_;
89 };
90
91 /* Constructor / Destructor */
92
93 BVHBuild::BVHBuild(const vector<Object*>& objects_,
94                    array<int>& prim_type_,
95                    array<int>& prim_index_,
96                    array<int>& prim_object_,
97                    array<float2>& prim_time_,
98                    const BVHParams& params_,
99                    Progress& progress_)
100  : objects(objects_),
101    prim_type(prim_type_),
102    prim_index(prim_index_),
103    prim_object(prim_object_),
104    prim_time(prim_time_),
105    params(params_),
106    progress(progress_),
107    progress_start_time(0.0),
108    unaligned_heuristic(objects_)
109 {
110         spatial_min_overlap = 0.0f;
111 }
112
113 BVHBuild::~BVHBuild()
114 {
115 }
116
117 /* Adding References */
118
119 void BVHBuild::add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
120 {
121         const Attribute *attr_mP = NULL;
122         if(mesh->has_motion_blur()) {
123                 attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
124         }
125         const size_t num_triangles = mesh->num_triangles();
126         for(uint j = 0; j < num_triangles; j++) {
127                 Mesh::Triangle t = mesh->get_triangle(j);
128                 const float3 *verts = &mesh->verts[0];
129                 if(attr_mP == NULL) {
130                         BoundBox bounds = BoundBox::empty;
131                         t.bounds_grow(verts, bounds);
132                         if(bounds.valid() && t.valid(verts)) {
133                                 references.push_back(BVHReference(bounds,
134                                                                   j,
135                                                                   i,
136                                                                   PRIMITIVE_TRIANGLE));
137                                 root.grow(bounds);
138                                 center.grow(bounds.center2());
139                         }
140                 }
141                 else if(params.num_motion_triangle_steps == 0 || params.use_spatial_split) {
142                         /* Motion triangles, simple case: single node for the whole
143                          * primitive. Lowest memory footprint and faster BVH build but
144                          * least optimal ray-tracing.
145                          */
146                         /* TODO(sergey): Support motion steps for spatially split BVH. */
147                         const size_t num_verts = mesh->verts.size();
148                         const size_t num_steps = mesh->motion_steps;
149                         const float3 *vert_steps = attr_mP->data_float3();
150                         BoundBox bounds = BoundBox::empty;
151                         t.bounds_grow(verts, bounds);
152                         for(size_t step = 0; step < num_steps - 1; step++) {
153                                 t.bounds_grow(vert_steps + step*num_verts, bounds);
154                         }
155                         if(bounds.valid()) {
156                                 references.push_back(
157                                         BVHReference(bounds,
158                                                      j,
159                                                      i,
160                                                      PRIMITIVE_MOTION_TRIANGLE));
161                                 root.grow(bounds);
162                                 center.grow(bounds.center2());
163                         }
164                 }
165                 else {
166                         /* Motion triangles, trace optimized case:  we split triangle
167                          * primitives into separate nodes for each of the time steps.
168                          * This way we minimize overlap of neighbor curve primitives.
169                          */
170                         const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
171                         const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
172                         const size_t num_verts = mesh->verts.size();
173                         const size_t num_steps = mesh->motion_steps;
174                         const float3 *vert_steps = attr_mP->data_float3();
175                         /* Calculate bounding box of the previous time step.
176                          * Will be reused later to avoid duplicated work on
177                          * calculating BVH time step boundbox.
178                          */
179                         float3 prev_verts[3];
180                         t.motion_verts(verts,
181                                        vert_steps,
182                                        num_verts,
183                                        num_steps,
184                                        0.0f,
185                                        prev_verts);
186                         BoundBox prev_bounds = BoundBox::empty;
187                         prev_bounds.grow(prev_verts[0]);
188                         prev_bounds.grow(prev_verts[1]);
189                         prev_bounds.grow(prev_verts[2]);
190                         /* Create all primitive time steps, */
191                         for(int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
192                                 const float curr_time = (float)(bvh_step) * num_bvh_steps_inv_1;
193                                 float3 curr_verts[3];
194                                 t.motion_verts(verts,
195                                                vert_steps,
196                                                num_verts,
197                                                num_steps,
198                                                curr_time,
199                                                curr_verts);
200                                 BoundBox curr_bounds = BoundBox::empty;
201                                 curr_bounds.grow(curr_verts[0]);
202                                 curr_bounds.grow(curr_verts[1]);
203                                 curr_bounds.grow(curr_verts[2]);
204                                 BoundBox bounds = prev_bounds;
205                                 bounds.grow(curr_bounds);
206                                 if(bounds.valid()) {
207                                         const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
208                                         references.push_back(
209                                                 BVHReference(bounds,
210                                                              j,
211                                                              i,
212                                                              PRIMITIVE_MOTION_TRIANGLE,
213                                                              prev_time,
214                                                              curr_time));
215                                         root.grow(bounds);
216                                         center.grow(bounds.center2());
217                                 }
218                                 /* Current time boundbox becomes previous one for the
219                                  * next time step.
220                                  */
221                                 prev_bounds = curr_bounds;
222                         }
223                 }
224         }
225 }
226
227 void BVHBuild::add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
228 {
229         const Attribute *curve_attr_mP = NULL;
230         if(mesh->has_motion_blur()) {
231                 curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
232         }
233         const size_t num_curves = mesh->num_curves();
234         for(uint j = 0; j < num_curves; j++) {
235                 const Mesh::Curve curve = mesh->get_curve(j);
236                 const float *curve_radius = &mesh->curve_radius[0];
237                 for(int k = 0; k < curve.num_keys - 1; k++) {
238                         if(curve_attr_mP == NULL) {
239                                 /* Really simple logic for static hair. */
240                                 BoundBox bounds = BoundBox::empty;
241                                 curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
242                                 if(bounds.valid()) {
243                                         int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE, k);
244                                         references.push_back(BVHReference(bounds, j, i, packed_type));
245                                         root.grow(bounds);
246                                         center.grow(bounds.center2());
247                                 }
248                         }
249                         else if(params.num_motion_curve_steps == 0 || params.use_spatial_split) {
250                                 /* Simple case of motion curves: single node for the while
251                                  * shutter time. Lowest memory usage but less optimal
252                                  * rendering.
253                                  */
254                                 /* TODO(sergey): Support motion steps for spatially split BVH. */
255                                 BoundBox bounds = BoundBox::empty;
256                                 curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
257                                 const size_t num_keys = mesh->curve_keys.size();
258                                 const size_t num_steps = mesh->motion_steps;
259                                 const float3 *key_steps = curve_attr_mP->data_float3();
260                                 for(size_t step = 0; step < num_steps - 1; step++) {
261                                         curve.bounds_grow(k,
262                                                           key_steps + step*num_keys,
263                                                           curve_radius,
264                                                           bounds);
265                                 }
266                                 if(bounds.valid()) {
267                                         int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
268                                         references.push_back(BVHReference(bounds,
269                                                                           j,
270                                                                           i,
271                                                                           packed_type));
272                                         root.grow(bounds);
273                                         center.grow(bounds.center2());
274                                 }
275                         }
276                         else {
277                                 /* Motion curves, trace optimized case:  we split curve keys
278                                  * primitives into separate nodes for each of the time steps.
279                                  * This way we minimize overlap of neighbor curve primitives.
280                                  */
281                                 const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
282                                 const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
283                                 const size_t num_steps = mesh->motion_steps;
284                                 const float3 *curve_keys = &mesh->curve_keys[0];
285                                 const float3 *key_steps = curve_attr_mP->data_float3();
286                                 const size_t num_keys = mesh->curve_keys.size();
287                                 /* Calculate bounding box of the previous time step.
288                                  * Will be reused later to avoid duplicated work on
289                                  * calculating BVH time step boundbox.
290                                  */
291                                 float4 prev_keys[4];
292                                 curve.cardinal_motion_keys(curve_keys,
293                                                            curve_radius,
294                                                            key_steps,
295                                                            num_keys,
296                                                            num_steps,
297                                                            0.0f,
298                                                            k - 1, k, k + 1, k + 2,
299                                                            prev_keys);
300                                 BoundBox prev_bounds = BoundBox::empty;
301                                 curve.bounds_grow(prev_keys, prev_bounds);
302                                 /* Create all primitive time steps, */
303                                 for(int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
304                                         const float curr_time = (float)(bvh_step) * num_bvh_steps_inv_1;
305                                         float4 curr_keys[4];
306                                         curve.cardinal_motion_keys(curve_keys,
307                                                                    curve_radius,
308                                                                    key_steps,
309                                                                    num_keys,
310                                                                    num_steps,
311                                                                    curr_time,
312                                                                    k - 1, k, k + 1, k + 2,
313                                                                    curr_keys);
314                                         BoundBox curr_bounds = BoundBox::empty;
315                                         curve.bounds_grow(curr_keys, curr_bounds);
316                                         BoundBox bounds = prev_bounds;
317                                         bounds.grow(curr_bounds);
318                                         if(bounds.valid()) {
319                                                 const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
320                                                 int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
321                                                 references.push_back(BVHReference(bounds,
322                                                                                   j,
323                                                                                   i,
324                                                                                   packed_type,
325                                                                                   prev_time,
326                                                                                   curr_time));
327                                                 root.grow(bounds);
328                                                 center.grow(bounds.center2());
329                                         }
330                                         /* Current time boundbox becomes previous one for the
331                                          * next time step.
332                                          */
333                                         prev_bounds = curr_bounds;
334                                 }
335                         }
336                 }
337         }
338 }
339
340 void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
341 {
342         if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
343                 add_reference_triangles(root, center, mesh, i);
344         }
345         if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
346                 add_reference_curves(root, center, mesh, i);
347         }
348 }
349
350 void BVHBuild::add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i)
351 {
352         references.push_back(BVHReference(ob->bounds, -1, i, 0));
353         root.grow(ob->bounds);
354         center.grow(ob->bounds.center2());
355 }
356
357 static size_t count_curve_segments(Mesh *mesh)
358 {
359         size_t num = 0, num_curves = mesh->num_curves();
360
361         for(size_t i = 0; i < num_curves; i++)
362                 num += mesh->get_curve(i).num_keys - 1;
363
364         return num;
365 }
366
367 void BVHBuild::add_references(BVHRange& root)
368 {
369         /* reserve space for references */
370         size_t num_alloc_references = 0;
371
372         foreach(Object *ob, objects) {
373                 if(params.top_level) {
374                         if(!ob->is_traceable()) {
375                                 continue;
376                         }
377                         if(!ob->mesh->is_instanced()) {
378                                 if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
379                                         num_alloc_references += ob->mesh->num_triangles();
380                                 }
381                                 if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
382                                         num_alloc_references += count_curve_segments(ob->mesh);
383                                 }
384                         }
385                         else
386                                 num_alloc_references++;
387                 }
388                 else {
389                         if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
390                                 num_alloc_references += ob->mesh->num_triangles();
391                         }
392                         if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
393                                 num_alloc_references += count_curve_segments(ob->mesh);
394                         }
395                 }
396         }
397
398         references.reserve(num_alloc_references);
399
400         /* add references from objects */
401         BoundBox bounds = BoundBox::empty, center = BoundBox::empty;
402         int i = 0;
403
404         foreach(Object *ob, objects) {
405                 if(params.top_level) {
406                         if(!ob->is_traceable()) {
407                                 ++i;
408                                 continue;
409                         }
410                         if(!ob->mesh->is_instanced())
411                                 add_reference_mesh(bounds, center, ob->mesh, i);
412                         else
413                                 add_reference_object(bounds, center, ob, i);
414                 }
415                 else
416                         add_reference_mesh(bounds, center, ob->mesh, i);
417
418                 i++;
419
420                 if(progress.get_cancel()) return;
421         }
422
423         /* happens mostly on empty meshes */
424         if(!bounds.valid())
425                 bounds.grow(make_float3(0.0f, 0.0f, 0.0f));
426
427         root = BVHRange(bounds, center, 0, references.size());
428 }
429
430 /* Build */
431
432 BVHNode* BVHBuild::run()
433 {
434         BVHRange root;
435
436         /* add references */
437         add_references(root);
438
439         if(progress.get_cancel())
440                 return NULL;
441
442         /* init spatial splits */
443         if(params.top_level) {
444                 /* NOTE: Technically it is supported by the builder but it's not really
445                  * optimized for speed yet and not really clear yet if it has measurable
446                  * improvement on render time. Needs some extra investigation before
447                  * enabling spatial split for top level BVH.
448                  */
449                 params.use_spatial_split = false;
450         }
451
452         spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
453         if(params.use_spatial_split) {
454                 /* NOTE: The API here tries to be as much ready for multi-threaded build
455                  * as possible, but at the same time it tries not to introduce any
456                  * changes in behavior for until all refactoring needed for threading is
457                  * finished.
458                  *
459                  * So we currently allocate single storage for now, which is only used by
460                  * the only thread working on the spatial BVH build.
461                  */
462                 spatial_storage.resize(TaskScheduler::num_threads() + 1);
463                 size_t num_bins = max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1;
464                 foreach(BVHSpatialStorage &storage, spatial_storage) {
465                         storage.right_bounds.clear();
466                 }
467                 spatial_storage[0].right_bounds.resize(num_bins);
468         }
469         spatial_free_index = 0;
470
471         need_prim_time = params.num_motion_curve_steps > 0 ||
472                          params.num_motion_triangle_steps > 0;
473
474         /* init progress updates */
475         double build_start_time;
476         build_start_time = progress_start_time = time_dt();
477         progress_count = 0;
478         progress_total = references.size();
479         progress_original_total = progress_total;
480
481         prim_type.resize(references.size());
482         prim_index.resize(references.size());
483         prim_object.resize(references.size());
484         if(need_prim_time) {
485                 prim_time.resize(references.size());
486         }
487         else {
488                 prim_time.resize(0);
489         }
490
491         /* build recursively */
492         BVHNode *rootnode;
493
494         if(params.use_spatial_split) {
495                 /* Perform multithreaded spatial split build. */
496                 rootnode = build_node(root, &references, 0, 0);
497                 task_pool.wait_work();
498         }
499         else {
500                 /* Perform multithreaded binning build. */
501                 BVHObjectBinning rootbin(root, (references.size())? &references[0]: NULL);
502                 rootnode = build_node(rootbin, 0);
503                 task_pool.wait_work();
504         }
505
506         /* delete if we canceled */
507         if(rootnode) {
508                 if(progress.get_cancel()) {
509                         rootnode->deleteSubtree();
510                         rootnode = NULL;
511                         VLOG(1) << "BVH build cancelled.";
512                 }
513                 else {
514                         /*rotate(rootnode, 4, 5);*/
515                         rootnode->update_visibility();
516                         rootnode->update_time();
517                 }
518                 if(rootnode != NULL) {
519                         VLOG(1) << "BVH build statistics:\n"
520                                 << "  Build time: " << time_dt() - build_start_time << "\n"
521                                 << "  Total number of nodes: "
522                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_NODE_COUNT)) << "\n"
523                                 << "  Number of inner nodes: "
524                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_INNER_COUNT)) << "\n"
525                                 << "  Number of leaf nodes: "
526                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_LEAF_COUNT)) << "\n"
527                                 << "  Number of unaligned nodes: "
528                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_UNALIGNED_COUNT))  << "\n"
529                                 << "  Allocation slop factor: "
530                                        << ((prim_type.capacity() != 0)
531                                                ? (float)prim_type.size() / prim_type.capacity()
532                                                : 1.0f) << "\n"
533                                 << "  Maximum depth: "
534                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_DEPTH))  << "\n";
535                 }
536         }
537
538
539         return rootnode;
540 }
541
542 void BVHBuild::progress_update()
543 {
544         if(time_dt() - progress_start_time < 0.25)
545                 return;
546
547         double progress_start = (double)progress_count/(double)progress_total;
548         double duplicates = (double)(progress_total - progress_original_total)/(double)progress_total;
549
550         string msg = string_printf("Building BVH %.0f%%, duplicates %.0f%%",
551                                    progress_start * 100.0, duplicates * 100.0);
552
553         progress.set_substatus(msg);
554         progress_start_time = time_dt();
555 }
556
557 void BVHBuild::thread_build_node(InnerNode *inner,
558                                  int child,
559                                  BVHObjectBinning *range,
560                                  int level)
561 {
562         if(progress.get_cancel())
563                 return;
564
565         /* build nodes */
566         BVHNode *node = build_node(*range, level);
567
568         /* set child in inner node */
569         inner->children[child] = node;
570
571         /* update progress */
572         if(range->size() < THREAD_TASK_SIZE) {
573                 /*rotate(node, INT_MAX, 5);*/
574
575                 thread_scoped_lock lock(build_mutex);
576
577                 progress_count += range->size();
578                 progress_update();
579         }
580 }
581
582 void BVHBuild::thread_build_spatial_split_node(InnerNode *inner,
583                                                int child,
584                                                BVHRange *range,
585                                                vector<BVHReference> *references,
586                                                int level,
587                                                int thread_id)
588 {
589         if(progress.get_cancel()) {
590                 return;
591         }
592
593         /* build nodes */
594         BVHNode *node = build_node(*range, references, level, thread_id);
595
596         /* set child in inner node */
597         inner->children[child] = node;
598 }
599
600 bool BVHBuild::range_within_max_leaf_size(const BVHRange& range,
601                                           const vector<BVHReference>& references) const
602 {
603         size_t size = range.size();
604         size_t max_leaf_size = max(params.max_triangle_leaf_size, params.max_curve_leaf_size);
605
606         if(size > max_leaf_size)
607                 return false;
608
609         size_t num_triangles = 0;
610         size_t num_motion_triangles = 0;
611         size_t num_curves = 0;
612         size_t num_motion_curves = 0;
613
614         for(int i = 0; i < size; i++) {
615                 const BVHReference& ref = references[range.start() + i];
616
617                 if(ref.prim_type() & PRIMITIVE_CURVE)
618                         num_curves++;
619                 if(ref.prim_type() & PRIMITIVE_MOTION_CURVE)
620                         num_motion_curves++;
621                 else if(ref.prim_type() & PRIMITIVE_TRIANGLE)
622                         num_triangles++;
623                 else if(ref.prim_type() & PRIMITIVE_MOTION_TRIANGLE)
624                         num_motion_triangles++;
625         }
626
627         return (num_triangles <= params.max_triangle_leaf_size) &&
628                (num_motion_triangles <= params.max_motion_triangle_leaf_size) &&
629                (num_curves <= params.max_curve_leaf_size) &&
630                (num_motion_curves <= params.max_motion_curve_leaf_size);
631 }
632
633 /* multithreaded binning builder */
634 BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
635 {
636         size_t size = range.size();
637         float leafSAH = params.sah_primitive_cost * range.leafSAH;
638         float splitSAH = params.sah_node_cost * range.bounds().half_area() + params.sah_primitive_cost * range.splitSAH;
639
640         /* Have at least one inner node on top level, for performance and correct
641          * visibility tests, since object instances do not check visibility flag.
642          */
643         if(!(range.size() > 0 && params.top_level && level == 0)) {
644                 /* Make leaf node when threshold reached or SAH tells us. */
645                 if((params.small_enough_for_leaf(size, level)) ||
646                    (range_within_max_leaf_size(range, references) && leafSAH < splitSAH))
647                 {
648                         return create_leaf_node(range, references);
649                 }
650         }
651
652         BVHObjectBinning unaligned_range;
653         float unalignedSplitSAH = FLT_MAX;
654         float unalignedLeafSAH = FLT_MAX;
655         Transform aligned_space;
656         bool do_unalinged_split = false;
657         if(params.use_unaligned_nodes &&
658            splitSAH > params.unaligned_split_threshold*leafSAH)
659         {
660                 aligned_space = unaligned_heuristic.compute_aligned_space(
661                         range, &references[0]);
662                 unaligned_range = BVHObjectBinning(range,
663                                                    &references[0],
664                                                    &unaligned_heuristic,
665                                                    &aligned_space);
666                 unalignedSplitSAH = params.sah_node_cost * unaligned_range.unaligned_bounds().half_area() +
667                                     params.sah_primitive_cost * unaligned_range.splitSAH;
668                 unalignedLeafSAH = params.sah_primitive_cost * unaligned_range.leafSAH;
669                 if(!(range.size() > 0 && params.top_level && level == 0)) {
670                         if(unalignedLeafSAH < unalignedSplitSAH && unalignedSplitSAH < splitSAH &&
671                            range_within_max_leaf_size(range, references))
672                         {
673                                 return create_leaf_node(range, references);
674                         }
675                 }
676                 /* Check whether unaligned split is better than the regular one. */
677                 if(unalignedSplitSAH < splitSAH) {
678                         do_unalinged_split = true;
679                 }
680         }
681
682         /* Perform split. */
683         BVHObjectBinning left, right;
684         if(do_unalinged_split) {
685                 unaligned_range.split(&references[0], left, right);
686         }
687         else {
688                 range.split(&references[0], left, right);
689         }
690
691         BoundBox bounds;
692         if(do_unalinged_split) {
693                 bounds = unaligned_heuristic.compute_aligned_boundbox(
694                         range, &references[0], aligned_space);
695         }
696         else {
697                 bounds = range.bounds();
698         }
699
700         /* Create inner node. */
701         InnerNode *inner;
702         if(range.size() < THREAD_TASK_SIZE) {
703                 /* local build */
704                 BVHNode *leftnode = build_node(left, level + 1);
705                 BVHNode *rightnode = build_node(right, level + 1);
706
707                 inner = new InnerNode(bounds, leftnode, rightnode);
708         }
709         else {
710                 /* Threaded build */
711                 inner = new InnerNode(bounds);
712
713                 task_pool.push(new BVHBuildTask(this, inner, 0, left, level + 1), true);
714                 task_pool.push(new BVHBuildTask(this, inner, 1, right, level + 1), true);
715         }
716
717         if(do_unalinged_split) {
718                 inner->set_aligned_space(aligned_space);
719         }
720
721         return inner;
722 }
723
724 /* multithreaded spatial split builder */
725 BVHNode* BVHBuild::build_node(const BVHRange& range,
726                               vector<BVHReference> *references,
727                               int level,
728                               int thread_id)
729 {
730         /* Update progress.
731          *
732          * TODO(sergey): Currently it matches old behavior, but we can move it to the
733          * task thread (which will mimic non=split builder) and save some CPU ticks
734          * on checking cancel status.
735          */
736         progress_update();
737         if(progress.get_cancel()) {
738                 return NULL;
739         }
740
741         /* Small enough or too deep => create leaf. */
742         if(!(range.size() > 0 && params.top_level && level == 0)) {
743                 if(params.small_enough_for_leaf(range.size(), level)) {
744                         progress_count += range.size();
745                         return create_leaf_node(range, *references);
746                 }
747         }
748
749         /* Perform splitting test. */
750         BVHSpatialStorage *storage = &spatial_storage[thread_id];
751         BVHMixedSplit split(this, storage, range, references, level);
752
753         if(!(range.size() > 0 && params.top_level && level == 0)) {
754                 if(split.no_split) {
755                         progress_count += range.size();
756                         return create_leaf_node(range, *references);
757                 }
758         }
759         float leafSAH = params.sah_primitive_cost * split.leafSAH;
760         float splitSAH = params.sah_node_cost * range.bounds().half_area() +
761                          params.sah_primitive_cost * split.nodeSAH;
762
763         BVHMixedSplit unaligned_split;
764         float unalignedSplitSAH = FLT_MAX;
765         /* float unalignedLeafSAH = FLT_MAX; */
766         Transform aligned_space;
767         bool do_unalinged_split = false;
768         if(params.use_unaligned_nodes &&
769            splitSAH > params.unaligned_split_threshold*leafSAH)
770         {
771                 aligned_space =
772                         unaligned_heuristic.compute_aligned_space(range, &references->at(0));
773                 unaligned_split = BVHMixedSplit(this,
774                                                 storage,
775                                                 range,
776                                                 references,
777                                                 level,
778                                                 &unaligned_heuristic,
779                                                 &aligned_space);
780                 /* unalignedLeafSAH = params.sah_primitive_cost * split.leafSAH; */
781                 unalignedSplitSAH = params.sah_node_cost * unaligned_split.bounds.half_area() +
782                                     params.sah_primitive_cost * unaligned_split.nodeSAH;
783                 /* TOOD(sergey): Check we can create leaf already. */
784                 /* Check whether unaligned split is better than the regulat one. */
785                 if(unalignedSplitSAH < splitSAH) {
786                         do_unalinged_split = true;
787                 }
788         }
789
790         /* Do split. */
791         BVHRange left, right;
792         if(do_unalinged_split) {
793                 unaligned_split.split(this, left, right, range);
794         }
795         else {
796                 split.split(this, left, right, range);
797         }
798
799         progress_total += left.size() + right.size() - range.size();
800
801         BoundBox bounds;
802         if(do_unalinged_split) {
803                 bounds = unaligned_heuristic.compute_aligned_boundbox(
804                         range, &references->at(0), aligned_space);
805         }
806         else {
807                 bounds = range.bounds();
808         }
809
810         /* Create inner node. */
811         InnerNode *inner;
812         if(range.size() < THREAD_TASK_SIZE) {
813                 /* Local build. */
814
815                 /* Build left node. */
816                 vector<BVHReference> copy(references->begin() + right.start(),
817                                           references->begin() + right.end());
818                 right.set_start(0);
819
820                 BVHNode *leftnode = build_node(left, references, level + 1, thread_id);
821
822                 /* Build right node. */
823                 BVHNode *rightnode = build_node(right, &copy, level + 1, thread_id);
824
825                 inner = new InnerNode(bounds, leftnode, rightnode);
826         }
827         else {
828                 /* Threaded build. */
829                 inner = new InnerNode(bounds);
830                 task_pool.push(new BVHSpatialSplitBuildTask(this,
831                                                             inner,
832                                                             0,
833                                                             left,
834                                                             *references,
835                                                             level + 1),
836                                true);
837                 task_pool.push(new BVHSpatialSplitBuildTask(this,
838                                                             inner,
839                                                             1,
840                                                             right,
841                                                             *references,
842                                                             level + 1),
843                                true);
844         }
845
846         if(do_unalinged_split) {
847                 inner->set_aligned_space(aligned_space);
848         }
849
850         return inner;
851 }
852
853 /* Create Nodes */
854
855 BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start, int num)
856 {
857         if(num == 0) {
858                 BoundBox bounds = BoundBox::empty;
859                 return new LeafNode(bounds, 0, 0, 0);
860         }
861         else if(num == 1) {
862                 assert(start < prim_type.size());
863                 prim_type[start] = ref->prim_type();
864                 prim_index[start] = ref->prim_index();
865                 prim_object[start] = ref->prim_object();
866                 if(need_prim_time) {
867                         prim_time[start] = make_float2(ref->time_from(), ref->time_to());
868                 }
869
870                 const uint visibility = objects[ref->prim_object()]->visibility_for_tracing();
871                 BVHNode *leaf_node =  new LeafNode(ref->bounds(), visibility, start, start+1);
872                 leaf_node->time_from = ref->time_from();
873                 leaf_node->time_to = ref->time_to();
874                 return leaf_node;
875         }
876         else {
877                 int mid = num/2;
878                 BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid);
879                 BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid);
880
881                 BoundBox bounds = BoundBox::empty;
882                 bounds.grow(leaf0->bounds);
883                 bounds.grow(leaf1->bounds);
884
885                 BVHNode *inner_node = new InnerNode(bounds, leaf0, leaf1);
886                 inner_node->time_from = min(leaf0->time_from, leaf1->time_from);
887                 inner_node->time_to = max(leaf0->time_to, leaf1->time_to);
888                 return inner_node;
889         }
890 }
891
892 BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
893                                     const vector<BVHReference>& references)
894 {
895         /* This is a bit overallocating here (considering leaf size into account),
896          * but chunk-based re-allocation in vector makes it difficult to use small
897          * size of stack storage here. Some tweaks are possible tho.
898          *
899          * NOTES:
900          *  - If the size is too big, we'll have inefficient stack usage,
901          *    and lots of cache misses.
902          *  - If the size is too small, then we can run out of memory
903          *    allowed to be used by vector.
904          *    In practice it wouldn't mean crash, just allocator will fallback
905          *    to heap which is slower.
906          *  - Optimistic re-allocation in STL could jump us out of stack usage
907          *    because re-allocation happens in chunks and size of those chunks we
908          *    can not control.
909          */
910         typedef StackAllocator<256, int> LeafStackAllocator;
911         typedef StackAllocator<256, float2> LeafTimeStackAllocator;
912         typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator;
913
914         vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM_TOTAL];
915         vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM_TOTAL];
916         vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM_TOTAL];
917         vector<float2, LeafTimeStackAllocator> p_time[PRIMITIVE_NUM_TOTAL];
918         vector<BVHReference, LeafReferenceStackAllocator> p_ref[PRIMITIVE_NUM_TOTAL];
919
920         /* TODO(sergey): In theory we should be able to store references. */
921         vector<BVHReference, LeafReferenceStackAllocator> object_references;
922
923         uint visibility[PRIMITIVE_NUM_TOTAL] = {0};
924         /* NOTE: Keep initializtion in sync with actual number of primitives. */
925         BoundBox bounds[PRIMITIVE_NUM_TOTAL] = {BoundBox::empty,
926                                                 BoundBox::empty,
927                                                 BoundBox::empty,
928                                                 BoundBox::empty};
929         int ob_num = 0;
930         int num_new_prims = 0;
931         /* Fill in per-type type/index array. */
932         for(int i = 0; i < range.size(); i++) {
933                 const BVHReference& ref = references[range.start() + i];
934                 if(ref.prim_index() != -1) {
935                         int type_index = bitscan(ref.prim_type() & PRIMITIVE_ALL);
936                         p_ref[type_index].push_back(ref);
937                         p_type[type_index].push_back(ref.prim_type());
938                         p_index[type_index].push_back(ref.prim_index());
939                         p_object[type_index].push_back(ref.prim_object());
940                         p_time[type_index].push_back(make_float2(ref.time_from(),
941                                                                  ref.time_to()));
942
943                         bounds[type_index].grow(ref.bounds());
944                         visibility[type_index] |= objects[ref.prim_object()]->visibility_for_tracing();
945                         if(ref.prim_type() & PRIMITIVE_ALL_CURVE) {
946                                 visibility[type_index] |= PATH_RAY_CURVE;
947                         }
948                         ++num_new_prims;
949                 }
950                 else {
951                         object_references.push_back(ref);
952                         ++ob_num;
953                 }
954         }
955
956         /* Create leaf nodes for every existing primitive.
957          *
958          * Here we write primitive types, indices and objects to a temporary array.
959          * This way we keep all the heavy memory allocation code outside of the
960          * thread lock in the case of spatial split building.
961          *
962          * TODO(sergey): With some pointer trickery we can write directly to the
963          * destination buffers for the non-spatial split BVH.
964          */
965         BVHNode *leaves[PRIMITIVE_NUM_TOTAL + 1] = {NULL};
966         int num_leaves = 0;
967         size_t start_index = 0;
968         vector<int, LeafStackAllocator> local_prim_type,
969                                         local_prim_index,
970                                         local_prim_object;
971         vector<float2, LeafTimeStackAllocator> local_prim_time;
972         local_prim_type.resize(num_new_prims);
973         local_prim_index.resize(num_new_prims);
974         local_prim_object.resize(num_new_prims);
975         if(need_prim_time) {
976                 local_prim_time.resize(num_new_prims);
977         }
978         for(int i = 0; i < PRIMITIVE_NUM_TOTAL; ++i) {
979                 int num = (int)p_type[i].size();
980                 if(num != 0) {
981                         assert(p_type[i].size() == p_index[i].size());
982                         assert(p_type[i].size() == p_object[i].size());
983                         Transform aligned_space;
984                         bool alignment_found = false;
985                         for(int j = 0; j < num; ++j) {
986                                 const int index = start_index + j;
987                                 local_prim_type[index] = p_type[i][j];
988                                 local_prim_index[index] = p_index[i][j];
989                                 local_prim_object[index] = p_object[i][j];
990                                 if(need_prim_time) {
991                                         local_prim_time[index] = p_time[i][j];
992                                 }
993                                 if(params.use_unaligned_nodes && !alignment_found) {
994                                         alignment_found =
995                                                 unaligned_heuristic.compute_aligned_space(p_ref[i][j],
996                                                                                           &aligned_space);
997                                 }
998                         }
999                         LeafNode *leaf_node = new LeafNode(bounds[i],
1000                                                            visibility[i],
1001                                                            start_index,
1002                                                            start_index + num);
1003                         if(true) {
1004                                 float time_from = 1.0f, time_to = 0.0f;
1005                                 for(int j = 0; j < num; ++j) {
1006                                         const BVHReference &ref = p_ref[i][j];
1007                                         time_from = min(time_from, ref.time_from());
1008                                         time_to = max(time_to, ref.time_to());
1009                                 }
1010                                 leaf_node->time_from = time_from;
1011                                 leaf_node->time_to = time_to;
1012                         }
1013                         if(alignment_found) {
1014                                 /* Need to recalculate leaf bounds with new alignment. */
1015                                 leaf_node->bounds = BoundBox::empty;
1016                                 for(int j = 0; j < num; ++j) {
1017                                         const BVHReference &ref = p_ref[i][j];
1018                                         BoundBox ref_bounds =
1019                                                 unaligned_heuristic.compute_aligned_prim_boundbox(
1020                                                         ref,
1021                                                         aligned_space);
1022                                         leaf_node->bounds.grow(ref_bounds);
1023                                 }
1024                                 /* Set alignment space. */
1025                                 leaf_node->set_aligned_space(aligned_space);
1026                         }
1027                         leaves[num_leaves++] = leaf_node;
1028                         start_index += num;
1029                 }
1030         }
1031         /* Get size of new data to be copied to the packed arrays. */
1032         const int num_new_leaf_data = start_index;
1033         const size_t new_leaf_data_size = sizeof(int) * num_new_leaf_data;
1034         /* Copy actual data to the packed array. */
1035         if(params.use_spatial_split) {
1036                 spatial_spin_lock.lock();
1037                 /* We use first free index in the packed arrays and mode pointer to the
1038                  * end of the current range.
1039                  *
1040                  * This doesn't give deterministic packed arrays, but it shouldn't really
1041                  * matter because order of children in BVH is deterministic.
1042                  */
1043                 start_index = spatial_free_index;
1044                 spatial_free_index += range.size();
1045                 /* Extend an array when needed. */
1046                 const size_t range_end = start_index + range.size();
1047                 if(prim_type.size() < range_end) {
1048                         /* Avoid extra re-allocations by pre-allocating bigger array in an
1049                          * advance.
1050                          */
1051                         if(range_end >= prim_type.capacity()) {
1052                                 float progress = (float)progress_count/(float)progress_total;
1053                                 float factor = (1.0f - progress);
1054                                 const size_t reserve = (size_t)(range_end + (float)range_end*factor);
1055                                 prim_type.reserve(reserve);
1056                                 prim_index.reserve(reserve);
1057                                 prim_object.reserve(reserve);
1058                                 if(need_prim_time) {
1059                                         prim_time.reserve(reserve);
1060                                 }
1061                         }
1062
1063                         prim_type.resize(range_end);
1064                         prim_index.resize(range_end);
1065                         prim_object.resize(range_end);
1066                         if(need_prim_time) {
1067                                 prim_time.resize(range_end);
1068                         }
1069                 }
1070                 /* Perform actual data copy. */
1071                 if(new_leaf_data_size > 0) {
1072                         memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
1073                         memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
1074                         memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
1075                         if(need_prim_time) {
1076                                 memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2)*num_new_leaf_data);
1077                         }
1078                 }
1079                 spatial_spin_lock.unlock();
1080         }
1081         else {
1082                 /* For the regular BVH builder we simply copy new data starting at the
1083                  * range start. This is totally thread-safe, all threads are living
1084                  * inside of their own range.
1085                  */
1086                 start_index = range.start();
1087                 if(new_leaf_data_size > 0) {
1088                         memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
1089                         memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
1090                         memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
1091                         if(need_prim_time) {
1092                                 memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2)*num_new_leaf_data);
1093                         }
1094                 }
1095         }
1096
1097         /* So far leaves were created with the zero-based index in an arrays,
1098          * here we modify the indices to correspond to actual packed array start
1099          * index.
1100          */
1101         for(int i = 0; i < num_leaves; ++i) {
1102                 LeafNode *leaf = (LeafNode *)leaves[i];
1103                 leaf->lo += start_index;
1104                 leaf->hi += start_index;
1105         }
1106
1107         /* Create leaf node for object. */
1108         if(num_leaves == 0 || ob_num) {
1109                 /* Only create object leaf nodes if there are objects or no other
1110                  * nodes created.
1111                  */
1112                 const BVHReference *ref = (ob_num)? &object_references[0]: NULL;
1113                 leaves[num_leaves] = create_object_leaf_nodes(ref,
1114                                                               start_index + num_new_leaf_data,
1115                                                               ob_num);
1116                 ++num_leaves;
1117         }
1118
1119         /* TODO(sergey): Need to take care of alignment when number of leaves
1120          * is more than 1.
1121          */
1122         if(num_leaves == 1) {
1123                 /* Simplest case: single leaf, just return it.
1124                  * In all the rest cases we'll be creating intermediate inner node with
1125                  * an appropriate bounding box.
1126                  */
1127                 return leaves[0];
1128         }
1129         else if(num_leaves == 2) {
1130                 return new InnerNode(range.bounds(), leaves[0], leaves[1]);
1131         }
1132         else if(num_leaves == 3) {
1133                 BoundBox inner_bounds = merge(leaves[1]->bounds, leaves[2]->bounds);
1134                 BVHNode *inner = new InnerNode(inner_bounds, leaves[1], leaves[2]);
1135                 return new InnerNode(range.bounds(), leaves[0], inner);
1136         } else {
1137                 /* Should be doing more branches if more primitive types added. */
1138                 assert(num_leaves <= 5);
1139                 BoundBox inner_bounds_a = merge(leaves[0]->bounds, leaves[1]->bounds);
1140                 BoundBox inner_bounds_b = merge(leaves[2]->bounds, leaves[3]->bounds);
1141                 BVHNode *inner_a = new InnerNode(inner_bounds_a, leaves[0], leaves[1]);
1142                 BVHNode *inner_b = new InnerNode(inner_bounds_b, leaves[2], leaves[3]);
1143                 BoundBox inner_bounds_c = merge(inner_a->bounds, inner_b->bounds);
1144                 BVHNode *inner_c = new InnerNode(inner_bounds_c, inner_a, inner_b);
1145                 if(num_leaves == 5) {
1146                         return new InnerNode(range.bounds(), inner_c, leaves[4]);
1147                 }
1148                 return inner_c;
1149         }
1150
1151 #undef MAX_ITEMS_PER_LEAF
1152 }
1153
1154 /* Tree Rotations */
1155
1156 void BVHBuild::rotate(BVHNode *node, int max_depth, int iterations)
1157 {
1158         /* in tested scenes, this resulted in slightly slower raytracing, so disabled
1159          * it for now. could be implementation bug, or depend on the scene */
1160         if(node)
1161                 for(int i = 0; i < iterations; i++)
1162                         rotate(node, max_depth);
1163 }
1164
1165 void BVHBuild::rotate(BVHNode *node, int max_depth)
1166 {
1167         /* nothing to rotate if we reached a leaf node. */
1168         if(node->is_leaf() || max_depth < 0)
1169                 return;
1170
1171         InnerNode *parent = (InnerNode*)node;
1172
1173         /* rotate all children first */
1174         for(size_t c = 0; c < 2; c++)
1175                 rotate(parent->children[c], max_depth-1);
1176
1177         /* compute current area of all children */
1178         BoundBox bounds0 = parent->children[0]->bounds;
1179         BoundBox bounds1 = parent->children[1]->bounds;
1180
1181         float area0 = bounds0.half_area();
1182         float area1 = bounds1.half_area();
1183         float4 child_area = make_float4(area0, area1, 0.0f, 0.0f);
1184
1185         /* find best rotation. we pick a target child of a first child, and swap
1186          * this with an other child. we perform the best such swap. */
1187         float best_cost = FLT_MAX;
1188         int best_child = -1, best_target = -1, best_other = -1;
1189
1190         for(size_t c = 0; c < 2; c++) {
1191                 /* ignore leaf nodes as we cannot descent into */
1192                 if(parent->children[c]->is_leaf())
1193                         continue;
1194
1195                 InnerNode *child = (InnerNode*)parent->children[c];
1196                 BoundBox& other = (c == 0)? bounds1: bounds0;
1197
1198                 /* transpose child bounds */
1199                 BoundBox target0 = child->children[0]->bounds;
1200                 BoundBox target1 = child->children[1]->bounds;
1201
1202                 /* compute cost for both possible swaps */
1203                 float cost0 = merge(other, target1).half_area() - child_area[c];
1204                 float cost1 = merge(target0, other).half_area() - child_area[c];
1205
1206                 if(min(cost0,cost1) < best_cost) {
1207                         best_child = (int)c;
1208                         best_other = (int)(1-c);
1209
1210                         if(cost0 < cost1) {
1211                                 best_cost = cost0;
1212                                 best_target = 0;
1213                         }
1214                         else {
1215                                 best_cost = cost0;
1216                                 best_target = 1;
1217                         }
1218                 }
1219         }
1220
1221         /* if we did not find a swap that improves the SAH then do nothing */
1222         if(best_cost >= 0)
1223                 return;
1224
1225         assert(best_child == 0 || best_child == 1);
1226         assert(best_target != -1);
1227
1228         /* perform the best found tree rotation */
1229         InnerNode *child = (InnerNode*)parent->children[best_child];
1230
1231         swap(parent->children[best_other], child->children[best_target]);
1232         child->bounds = merge(child->children[0]->bounds, child->children[1]->bounds);
1233 }
1234
1235 CCL_NAMESPACE_END