Cycles: Remove unneeded include statements
[blender-staging.git] / intern / cycles / bvh / bvh_build.cpp
1 /*
2  * Adapted from code copyright 2009-2010 NVIDIA Corporation
3  * Modifications Copyright 2011, Blender Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include "bvh/bvh_build.h"
19
20 #include "bvh/bvh_binning.h"
21 #include "bvh/bvh_node.h"
22 #include "bvh/bvh_params.h"
23 #include "bvh_split.h"
24
25 #include "render/mesh.h"
26 #include "render/object.h"
27 #include "render/scene.h"
28 #include "render/curves.h"
29
30 #include "util/util_algorithm.h"
31 #include "util/util_foreach.h"
32 #include "util/util_logging.h"
33 #include "util/util_progress.h"
34 #include "util/util_stack_allocator.h"
35 #include "util/util_simd.h"
36 #include "util/util_time.h"
37 #include "util/util_queue.h"
38
39 CCL_NAMESPACE_BEGIN
40
41 /* BVH Build Task */
42
43 class BVHBuildTask : public Task {
44 public:
45         BVHBuildTask(BVHBuild *build,
46                      InnerNode *node,
47                      int child,
48                      const BVHObjectBinning& range,
49                      int level)
50         : range_(range)
51         {
52                 run = function_bind(&BVHBuild::thread_build_node,
53                                     build,
54                                     node,
55                                     child,
56                                     &range_,
57                                     level);
58         }
59 private:
60         BVHObjectBinning range_;
61 };
62
63 class BVHSpatialSplitBuildTask : public Task {
64 public:
65         BVHSpatialSplitBuildTask(BVHBuild *build,
66                                  InnerNode *node,
67                                  int child,
68                                  const BVHRange& range,
69                                  const vector<BVHReference>& references,
70                                  int level)
71         : range_(range),
72           references_(references.begin() + range.start(),
73                       references.begin() + range.end())
74         {
75                 range_.set_start(0);
76                 run = function_bind(&BVHBuild::thread_build_spatial_split_node,
77                                     build,
78                                     node,
79                                     child,
80                                     &range_,
81                                     &references_,
82                                     level,
83                                     _1);
84         }
85 private:
86         BVHRange range_;
87         vector<BVHReference> references_;
88 };
89
90 /* Constructor / Destructor */
91
92 BVHBuild::BVHBuild(const vector<Object*>& objects_,
93                    array<int>& prim_type_,
94                    array<int>& prim_index_,
95                    array<int>& prim_object_,
96                    array<float2>& prim_time_,
97                    const BVHParams& params_,
98                    Progress& progress_)
99  : objects(objects_),
100    prim_type(prim_type_),
101    prim_index(prim_index_),
102    prim_object(prim_object_),
103    prim_time(prim_time_),
104    params(params_),
105    progress(progress_),
106    progress_start_time(0.0),
107    unaligned_heuristic(objects_)
108 {
109         spatial_min_overlap = 0.0f;
110 }
111
112 BVHBuild::~BVHBuild()
113 {
114 }
115
116 /* Adding References */
117
118 void BVHBuild::add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
119 {
120         const Attribute *attr_mP = NULL;
121         if(mesh->has_motion_blur()) {
122                 attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
123         }
124         const size_t num_triangles = mesh->num_triangles();
125         for(uint j = 0; j < num_triangles; j++) {
126                 Mesh::Triangle t = mesh->get_triangle(j);
127                 const float3 *verts = &mesh->verts[0];
128                 if(attr_mP == NULL) {
129                         BoundBox bounds = BoundBox::empty;
130                         t.bounds_grow(verts, bounds);
131                         if(bounds.valid() && t.valid(verts)) {
132                                 references.push_back(BVHReference(bounds,
133                                                                   j,
134                                                                   i,
135                                                                   PRIMITIVE_TRIANGLE));
136                                 root.grow(bounds);
137                                 center.grow(bounds.center2());
138                         }
139                 }
140                 else if(params.num_motion_triangle_steps == 0 || params.use_spatial_split) {
141                         /* Motion triangles, simple case: single node for the whole
142                          * primitive. Lowest memory footprint and faster BVH build but
143                          * least optimal ray-tracing.
144                          */
145                         /* TODO(sergey): Support motion steps for spatially split BVH. */
146                         const size_t num_verts = mesh->verts.size();
147                         const size_t num_steps = mesh->motion_steps;
148                         const float3 *vert_steps = attr_mP->data_float3();
149                         BoundBox bounds = BoundBox::empty;
150                         t.bounds_grow(verts, bounds);
151                         for(size_t step = 0; step < num_steps - 1; step++) {
152                                 t.bounds_grow(vert_steps + step*num_verts, bounds);
153                         }
154                         if(bounds.valid()) {
155                                 references.push_back(
156                                         BVHReference(bounds,
157                                                      j,
158                                                      i,
159                                                      PRIMITIVE_MOTION_TRIANGLE));
160                                 root.grow(bounds);
161                                 center.grow(bounds.center2());
162                         }
163                 }
164                 else {
165                         /* Motion triangles, trace optimized case:  we split triangle
166                          * primitives into separate nodes for each of the time steps.
167                          * This way we minimize overlap of neighbor curve primitives.
168                          */
169                         const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
170                         const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
171                         const size_t num_verts = mesh->verts.size();
172                         const size_t num_steps = mesh->motion_steps;
173                         const float3 *vert_steps = attr_mP->data_float3();
174                         /* Calculate bounding box of the previous time step.
175                          * Will be reused later to avoid duplicated work on
176                          * calculating BVH time step boundbox.
177                          */
178                         float3 prev_verts[3];
179                         t.motion_verts(verts,
180                                        vert_steps,
181                                        num_verts,
182                                        num_steps,
183                                        0.0f,
184                                        prev_verts);
185                         BoundBox prev_bounds = BoundBox::empty;
186                         prev_bounds.grow(prev_verts[0]);
187                         prev_bounds.grow(prev_verts[1]);
188                         prev_bounds.grow(prev_verts[2]);
189                         /* Create all primitive time steps, */
190                         for(int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
191                                 const float curr_time = (float)(bvh_step) * num_bvh_steps_inv_1;
192                                 float3 curr_verts[3];
193                                 t.motion_verts(verts,
194                                                vert_steps,
195                                                num_verts,
196                                                num_steps,
197                                                curr_time,
198                                                curr_verts);
199                                 BoundBox curr_bounds = BoundBox::empty;
200                                 curr_bounds.grow(curr_verts[0]);
201                                 curr_bounds.grow(curr_verts[1]);
202                                 curr_bounds.grow(curr_verts[2]);
203                                 BoundBox bounds = prev_bounds;
204                                 bounds.grow(curr_bounds);
205                                 if(bounds.valid()) {
206                                         const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
207                                         references.push_back(
208                                                 BVHReference(bounds,
209                                                              j,
210                                                              i,
211                                                              PRIMITIVE_MOTION_TRIANGLE,
212                                                              prev_time,
213                                                              curr_time));
214                                         root.grow(bounds);
215                                         center.grow(bounds.center2());
216                                 }
217                                 /* Current time boundbox becomes previous one for the
218                                  * next time step.
219                                  */
220                                 prev_bounds = curr_bounds;
221                         }
222                 }
223         }
224 }
225
226 void BVHBuild::add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
227 {
228         const Attribute *curve_attr_mP = NULL;
229         if(mesh->has_motion_blur()) {
230                 curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
231         }
232         const size_t num_curves = mesh->num_curves();
233         for(uint j = 0; j < num_curves; j++) {
234                 const Mesh::Curve curve = mesh->get_curve(j);
235                 const float *curve_radius = &mesh->curve_radius[0];
236                 for(int k = 0; k < curve.num_keys - 1; k++) {
237                         if(curve_attr_mP == NULL) {
238                                 /* Really simple logic for static hair. */
239                                 BoundBox bounds = BoundBox::empty;
240                                 curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
241                                 if(bounds.valid()) {
242                                         int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE, k);
243                                         references.push_back(BVHReference(bounds, j, i, packed_type));
244                                         root.grow(bounds);
245                                         center.grow(bounds.center2());
246                                 }
247                         }
248                         else if(params.num_motion_curve_steps == 0 || params.use_spatial_split) {
249                                 /* Simple case of motion curves: single node for the while
250                                  * shutter time. Lowest memory usage but less optimal
251                                  * rendering.
252                                  */
253                                 /* TODO(sergey): Support motion steps for spatially split BVH. */
254                                 BoundBox bounds = BoundBox::empty;
255                                 curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
256                                 const size_t num_keys = mesh->curve_keys.size();
257                                 const size_t num_steps = mesh->motion_steps;
258                                 const float3 *key_steps = curve_attr_mP->data_float3();
259                                 for(size_t step = 0; step < num_steps - 1; step++) {
260                                         curve.bounds_grow(k,
261                                                           key_steps + step*num_keys,
262                                                           curve_radius,
263                                                           bounds);
264                                 }
265                                 if(bounds.valid()) {
266                                         int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
267                                         references.push_back(BVHReference(bounds,
268                                                                           j,
269                                                                           i,
270                                                                           packed_type));
271                                         root.grow(bounds);
272                                         center.grow(bounds.center2());
273                                 }
274                         }
275                         else {
276                                 /* Motion curves, trace optimized case:  we split curve keys
277                                  * primitives into separate nodes for each of the time steps.
278                                  * This way we minimize overlap of neighbor curve primitives.
279                                  */
280                                 const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
281                                 const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
282                                 const size_t num_steps = mesh->motion_steps;
283                                 const float3 *curve_keys = &mesh->curve_keys[0];
284                                 const float3 *key_steps = curve_attr_mP->data_float3();
285                                 const size_t num_keys = mesh->curve_keys.size();
286                                 /* Calculate bounding box of the previous time step.
287                                  * Will be reused later to avoid duplicated work on
288                                  * calculating BVH time step boundbox.
289                                  */
290                                 float4 prev_keys[4];
291                                 curve.cardinal_motion_keys(curve_keys,
292                                                            curve_radius,
293                                                            key_steps,
294                                                            num_keys,
295                                                            num_steps,
296                                                            0.0f,
297                                                            k - 1, k, k + 1, k + 2,
298                                                            prev_keys);
299                                 BoundBox prev_bounds = BoundBox::empty;
300                                 curve.bounds_grow(prev_keys, prev_bounds);
301                                 /* Create all primitive time steps, */
302                                 for(int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
303                                         const float curr_time = (float)(bvh_step) * num_bvh_steps_inv_1;
304                                         float4 curr_keys[4];
305                                         curve.cardinal_motion_keys(curve_keys,
306                                                                    curve_radius,
307                                                                    key_steps,
308                                                                    num_keys,
309                                                                    num_steps,
310                                                                    curr_time,
311                                                                    k - 1, k, k + 1, k + 2,
312                                                                    curr_keys);
313                                         BoundBox curr_bounds = BoundBox::empty;
314                                         curve.bounds_grow(curr_keys, curr_bounds);
315                                         BoundBox bounds = prev_bounds;
316                                         bounds.grow(curr_bounds);
317                                         if(bounds.valid()) {
318                                                 const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
319                                                 int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
320                                                 references.push_back(BVHReference(bounds,
321                                                                                   j,
322                                                                                   i,
323                                                                                   packed_type,
324                                                                                   prev_time,
325                                                                                   curr_time));
326                                                 root.grow(bounds);
327                                                 center.grow(bounds.center2());
328                                         }
329                                         /* Current time boundbox becomes previous one for the
330                                          * next time step.
331                                          */
332                                         prev_bounds = curr_bounds;
333                                 }
334                         }
335                 }
336         }
337 }
338
339 void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
340 {
341         if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
342                 add_reference_triangles(root, center, mesh, i);
343         }
344         if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
345                 add_reference_curves(root, center, mesh, i);
346         }
347 }
348
349 void BVHBuild::add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i)
350 {
351         references.push_back(BVHReference(ob->bounds, -1, i, 0));
352         root.grow(ob->bounds);
353         center.grow(ob->bounds.center2());
354 }
355
356 static size_t count_curve_segments(Mesh *mesh)
357 {
358         size_t num = 0, num_curves = mesh->num_curves();
359
360         for(size_t i = 0; i < num_curves; i++)
361                 num += mesh->get_curve(i).num_keys - 1;
362
363         return num;
364 }
365
366 void BVHBuild::add_references(BVHRange& root)
367 {
368         /* reserve space for references */
369         size_t num_alloc_references = 0;
370
371         foreach(Object *ob, objects) {
372                 if(params.top_level) {
373                         if(!ob->is_traceable()) {
374                                 continue;
375                         }
376                         if(!ob->mesh->is_instanced()) {
377                                 if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
378                                         num_alloc_references += ob->mesh->num_triangles();
379                                 }
380                                 if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
381                                         num_alloc_references += count_curve_segments(ob->mesh);
382                                 }
383                         }
384                         else
385                                 num_alloc_references++;
386                 }
387                 else {
388                         if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
389                                 num_alloc_references += ob->mesh->num_triangles();
390                         }
391                         if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
392                                 num_alloc_references += count_curve_segments(ob->mesh);
393                         }
394                 }
395         }
396
397         references.reserve(num_alloc_references);
398
399         /* add references from objects */
400         BoundBox bounds = BoundBox::empty, center = BoundBox::empty;
401         int i = 0;
402
403         foreach(Object *ob, objects) {
404                 if(params.top_level) {
405                         if(!ob->is_traceable()) {
406                                 ++i;
407                                 continue;
408                         }
409                         if(!ob->mesh->is_instanced())
410                                 add_reference_mesh(bounds, center, ob->mesh, i);
411                         else
412                                 add_reference_object(bounds, center, ob, i);
413                 }
414                 else
415                         add_reference_mesh(bounds, center, ob->mesh, i);
416
417                 i++;
418
419                 if(progress.get_cancel()) return;
420         }
421
422         /* happens mostly on empty meshes */
423         if(!bounds.valid())
424                 bounds.grow(make_float3(0.0f, 0.0f, 0.0f));
425
426         root = BVHRange(bounds, center, 0, references.size());
427 }
428
429 /* Build */
430
431 BVHNode* BVHBuild::run()
432 {
433         BVHRange root;
434
435         /* add references */
436         add_references(root);
437
438         if(progress.get_cancel())
439                 return NULL;
440
441         /* init spatial splits */
442         if(params.top_level) {
443                 /* NOTE: Technically it is supported by the builder but it's not really
444                  * optimized for speed yet and not really clear yet if it has measurable
445                  * improvement on render time. Needs some extra investigation before
446                  * enabling spatial split for top level BVH.
447                  */
448                 params.use_spatial_split = false;
449         }
450
451         spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
452         if(params.use_spatial_split) {
453                 /* NOTE: The API here tries to be as much ready for multi-threaded build
454                  * as possible, but at the same time it tries not to introduce any
455                  * changes in behavior for until all refactoring needed for threading is
456                  * finished.
457                  *
458                  * So we currently allocate single storage for now, which is only used by
459                  * the only thread working on the spatial BVH build.
460                  */
461                 spatial_storage.resize(TaskScheduler::num_threads() + 1);
462                 size_t num_bins = max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1;
463                 foreach(BVHSpatialStorage &storage, spatial_storage) {
464                         storage.right_bounds.clear();
465                 }
466                 spatial_storage[0].right_bounds.resize(num_bins);
467         }
468         spatial_free_index = 0;
469
470         need_prim_time = params.num_motion_curve_steps > 0 ||
471                          params.num_motion_triangle_steps > 0;
472
473         /* init progress updates */
474         double build_start_time;
475         build_start_time = progress_start_time = time_dt();
476         progress_count = 0;
477         progress_total = references.size();
478         progress_original_total = progress_total;
479
480         prim_type.resize(references.size());
481         prim_index.resize(references.size());
482         prim_object.resize(references.size());
483         if(need_prim_time) {
484                 prim_time.resize(references.size());
485         }
486         else {
487                 prim_time.resize(0);
488         }
489
490         /* build recursively */
491         BVHNode *rootnode;
492
493         if(params.use_spatial_split) {
494                 /* Perform multithreaded spatial split build. */
495                 rootnode = build_node(root, &references, 0, 0);
496                 task_pool.wait_work();
497         }
498         else {
499                 /* Perform multithreaded binning build. */
500                 BVHObjectBinning rootbin(root, (references.size())? &references[0]: NULL);
501                 rootnode = build_node(rootbin, 0);
502                 task_pool.wait_work();
503         }
504
505         /* delete if we canceled */
506         if(rootnode) {
507                 if(progress.get_cancel()) {
508                         rootnode->deleteSubtree();
509                         rootnode = NULL;
510                         VLOG(1) << "BVH build cancelled.";
511                 }
512                 else {
513                         /*rotate(rootnode, 4, 5);*/
514                         rootnode->update_visibility();
515                         rootnode->update_time();
516                 }
517                 if(rootnode != NULL) {
518                         VLOG(1) << "BVH build statistics:\n"
519                                 << "  Build time: " << time_dt() - build_start_time << "\n"
520                                 << "  Total number of nodes: "
521                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_NODE_COUNT)) << "\n"
522                                 << "  Number of inner nodes: "
523                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_INNER_COUNT)) << "\n"
524                                 << "  Number of leaf nodes: "
525                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_LEAF_COUNT)) << "\n"
526                                 << "  Number of unaligned nodes: "
527                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_UNALIGNED_COUNT))  << "\n"
528                                 << "  Allocation slop factor: "
529                                        << ((prim_type.capacity() != 0)
530                                                ? (float)prim_type.size() / prim_type.capacity()
531                                                : 1.0f) << "\n"
532                                 << "  Maximum depth: "
533                                 << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_DEPTH))  << "\n";
534                 }
535         }
536
537
538         return rootnode;
539 }
540
541 void BVHBuild::progress_update()
542 {
543         if(time_dt() - progress_start_time < 0.25)
544                 return;
545
546         double progress_start = (double)progress_count/(double)progress_total;
547         double duplicates = (double)(progress_total - progress_original_total)/(double)progress_total;
548
549         string msg = string_printf("Building BVH %.0f%%, duplicates %.0f%%",
550                                    progress_start * 100.0, duplicates * 100.0);
551
552         progress.set_substatus(msg);
553         progress_start_time = time_dt();
554 }
555
556 void BVHBuild::thread_build_node(InnerNode *inner,
557                                  int child,
558                                  BVHObjectBinning *range,
559                                  int level)
560 {
561         if(progress.get_cancel())
562                 return;
563
564         /* build nodes */
565         BVHNode *node = build_node(*range, level);
566
567         /* set child in inner node */
568         inner->children[child] = node;
569
570         /* update progress */
571         if(range->size() < THREAD_TASK_SIZE) {
572                 /*rotate(node, INT_MAX, 5);*/
573
574                 thread_scoped_lock lock(build_mutex);
575
576                 progress_count += range->size();
577                 progress_update();
578         }
579 }
580
581 void BVHBuild::thread_build_spatial_split_node(InnerNode *inner,
582                                                int child,
583                                                BVHRange *range,
584                                                vector<BVHReference> *references,
585                                                int level,
586                                                int thread_id)
587 {
588         if(progress.get_cancel()) {
589                 return;
590         }
591
592         /* build nodes */
593         BVHNode *node = build_node(*range, references, level, thread_id);
594
595         /* set child in inner node */
596         inner->children[child] = node;
597 }
598
599 bool BVHBuild::range_within_max_leaf_size(const BVHRange& range,
600                                           const vector<BVHReference>& references) const
601 {
602         size_t size = range.size();
603         size_t max_leaf_size = max(params.max_triangle_leaf_size, params.max_curve_leaf_size);
604
605         if(size > max_leaf_size)
606                 return false;
607
608         size_t num_triangles = 0;
609         size_t num_motion_triangles = 0;
610         size_t num_curves = 0;
611         size_t num_motion_curves = 0;
612
613         for(int i = 0; i < size; i++) {
614                 const BVHReference& ref = references[range.start() + i];
615
616                 if(ref.prim_type() & PRIMITIVE_CURVE)
617                         num_curves++;
618                 if(ref.prim_type() & PRIMITIVE_MOTION_CURVE)
619                         num_motion_curves++;
620                 else if(ref.prim_type() & PRIMITIVE_TRIANGLE)
621                         num_triangles++;
622                 else if(ref.prim_type() & PRIMITIVE_MOTION_TRIANGLE)
623                         num_motion_triangles++;
624         }
625
626         return (num_triangles <= params.max_triangle_leaf_size) &&
627                (num_motion_triangles <= params.max_motion_triangle_leaf_size) &&
628                (num_curves <= params.max_curve_leaf_size) &&
629                (num_motion_curves <= params.max_motion_curve_leaf_size);
630 }
631
632 /* multithreaded binning builder */
633 BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
634 {
635         size_t size = range.size();
636         float leafSAH = params.sah_primitive_cost * range.leafSAH;
637         float splitSAH = params.sah_node_cost * range.bounds().half_area() + params.sah_primitive_cost * range.splitSAH;
638
639         /* Have at least one inner node on top level, for performance and correct
640          * visibility tests, since object instances do not check visibility flag.
641          */
642         if(!(range.size() > 0 && params.top_level && level == 0)) {
643                 /* Make leaf node when threshold reached or SAH tells us. */
644                 if((params.small_enough_for_leaf(size, level)) ||
645                    (range_within_max_leaf_size(range, references) && leafSAH < splitSAH))
646                 {
647                         return create_leaf_node(range, references);
648                 }
649         }
650
651         BVHObjectBinning unaligned_range;
652         float unalignedSplitSAH = FLT_MAX;
653         float unalignedLeafSAH = FLT_MAX;
654         Transform aligned_space;
655         bool do_unalinged_split = false;
656         if(params.use_unaligned_nodes &&
657            splitSAH > params.unaligned_split_threshold*leafSAH)
658         {
659                 aligned_space = unaligned_heuristic.compute_aligned_space(
660                         range, &references[0]);
661                 unaligned_range = BVHObjectBinning(range,
662                                                    &references[0],
663                                                    &unaligned_heuristic,
664                                                    &aligned_space);
665                 unalignedSplitSAH = params.sah_node_cost * unaligned_range.unaligned_bounds().half_area() +
666                                     params.sah_primitive_cost * unaligned_range.splitSAH;
667                 unalignedLeafSAH = params.sah_primitive_cost * unaligned_range.leafSAH;
668                 if(!(range.size() > 0 && params.top_level && level == 0)) {
669                         if(unalignedLeafSAH < unalignedSplitSAH && unalignedSplitSAH < splitSAH &&
670                            range_within_max_leaf_size(range, references))
671                         {
672                                 return create_leaf_node(range, references);
673                         }
674                 }
675                 /* Check whether unaligned split is better than the regular one. */
676                 if(unalignedSplitSAH < splitSAH) {
677                         do_unalinged_split = true;
678                 }
679         }
680
681         /* Perform split. */
682         BVHObjectBinning left, right;
683         if(do_unalinged_split) {
684                 unaligned_range.split(&references[0], left, right);
685         }
686         else {
687                 range.split(&references[0], left, right);
688         }
689
690         BoundBox bounds;
691         if(do_unalinged_split) {
692                 bounds = unaligned_heuristic.compute_aligned_boundbox(
693                         range, &references[0], aligned_space);
694         }
695         else {
696                 bounds = range.bounds();
697         }
698
699         /* Create inner node. */
700         InnerNode *inner;
701         if(range.size() < THREAD_TASK_SIZE) {
702                 /* local build */
703                 BVHNode *leftnode = build_node(left, level + 1);
704                 BVHNode *rightnode = build_node(right, level + 1);
705
706                 inner = new InnerNode(bounds, leftnode, rightnode);
707         }
708         else {
709                 /* Threaded build */
710                 inner = new InnerNode(bounds);
711
712                 task_pool.push(new BVHBuildTask(this, inner, 0, left, level + 1), true);
713                 task_pool.push(new BVHBuildTask(this, inner, 1, right, level + 1), true);
714         }
715
716         if(do_unalinged_split) {
717                 inner->set_aligned_space(aligned_space);
718         }
719
720         return inner;
721 }
722
723 /* multithreaded spatial split builder */
724 BVHNode* BVHBuild::build_node(const BVHRange& range,
725                               vector<BVHReference> *references,
726                               int level,
727                               int thread_id)
728 {
729         /* Update progress.
730          *
731          * TODO(sergey): Currently it matches old behavior, but we can move it to the
732          * task thread (which will mimic non=split builder) and save some CPU ticks
733          * on checking cancel status.
734          */
735         progress_update();
736         if(progress.get_cancel()) {
737                 return NULL;
738         }
739
740         /* Small enough or too deep => create leaf. */
741         if(!(range.size() > 0 && params.top_level && level == 0)) {
742                 if(params.small_enough_for_leaf(range.size(), level)) {
743                         progress_count += range.size();
744                         return create_leaf_node(range, *references);
745                 }
746         }
747
748         /* Perform splitting test. */
749         BVHSpatialStorage *storage = &spatial_storage[thread_id];
750         BVHMixedSplit split(this, storage, range, references, level);
751
752         if(!(range.size() > 0 && params.top_level && level == 0)) {
753                 if(split.no_split) {
754                         progress_count += range.size();
755                         return create_leaf_node(range, *references);
756                 }
757         }
758         float leafSAH = params.sah_primitive_cost * split.leafSAH;
759         float splitSAH = params.sah_node_cost * range.bounds().half_area() +
760                          params.sah_primitive_cost * split.nodeSAH;
761
762         BVHMixedSplit unaligned_split;
763         float unalignedSplitSAH = FLT_MAX;
764         /* float unalignedLeafSAH = FLT_MAX; */
765         Transform aligned_space;
766         bool do_unalinged_split = false;
767         if(params.use_unaligned_nodes &&
768            splitSAH > params.unaligned_split_threshold*leafSAH)
769         {
770                 aligned_space =
771                         unaligned_heuristic.compute_aligned_space(range, &references->at(0));
772                 unaligned_split = BVHMixedSplit(this,
773                                                 storage,
774                                                 range,
775                                                 references,
776                                                 level,
777                                                 &unaligned_heuristic,
778                                                 &aligned_space);
779                 /* unalignedLeafSAH = params.sah_primitive_cost * split.leafSAH; */
780                 unalignedSplitSAH = params.sah_node_cost * unaligned_split.bounds.half_area() +
781                                     params.sah_primitive_cost * unaligned_split.nodeSAH;
782                 /* TOOD(sergey): Check we can create leaf already. */
783                 /* Check whether unaligned split is better than the regulat one. */
784                 if(unalignedSplitSAH < splitSAH) {
785                         do_unalinged_split = true;
786                 }
787         }
788
789         /* Do split. */
790         BVHRange left, right;
791         if(do_unalinged_split) {
792                 unaligned_split.split(this, left, right, range);
793         }
794         else {
795                 split.split(this, left, right, range);
796         }
797
798         progress_total += left.size() + right.size() - range.size();
799
800         BoundBox bounds;
801         if(do_unalinged_split) {
802                 bounds = unaligned_heuristic.compute_aligned_boundbox(
803                         range, &references->at(0), aligned_space);
804         }
805         else {
806                 bounds = range.bounds();
807         }
808
809         /* Create inner node. */
810         InnerNode *inner;
811         if(range.size() < THREAD_TASK_SIZE) {
812                 /* Local build. */
813
814                 /* Build left node. */
815                 vector<BVHReference> copy(references->begin() + right.start(),
816                                           references->begin() + right.end());
817                 right.set_start(0);
818
819                 BVHNode *leftnode = build_node(left, references, level + 1, thread_id);
820
821                 /* Build right node. */
822                 BVHNode *rightnode = build_node(right, &copy, level + 1, thread_id);
823
824                 inner = new InnerNode(bounds, leftnode, rightnode);
825         }
826         else {
827                 /* Threaded build. */
828                 inner = new InnerNode(bounds);
829                 task_pool.push(new BVHSpatialSplitBuildTask(this,
830                                                             inner,
831                                                             0,
832                                                             left,
833                                                             *references,
834                                                             level + 1),
835                                true);
836                 task_pool.push(new BVHSpatialSplitBuildTask(this,
837                                                             inner,
838                                                             1,
839                                                             right,
840                                                             *references,
841                                                             level + 1),
842                                true);
843         }
844
845         if(do_unalinged_split) {
846                 inner->set_aligned_space(aligned_space);
847         }
848
849         return inner;
850 }
851
852 /* Create Nodes */
853
854 BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start, int num)
855 {
856         if(num == 0) {
857                 BoundBox bounds = BoundBox::empty;
858                 return new LeafNode(bounds, 0, 0, 0);
859         }
860         else if(num == 1) {
861                 assert(start < prim_type.size());
862                 prim_type[start] = ref->prim_type();
863                 prim_index[start] = ref->prim_index();
864                 prim_object[start] = ref->prim_object();
865                 if(need_prim_time) {
866                         prim_time[start] = make_float2(ref->time_from(), ref->time_to());
867                 }
868
869                 const uint visibility = objects[ref->prim_object()]->visibility_for_tracing();
870                 BVHNode *leaf_node =  new LeafNode(ref->bounds(), visibility, start, start+1);
871                 leaf_node->time_from = ref->time_from();
872                 leaf_node->time_to = ref->time_to();
873                 return leaf_node;
874         }
875         else {
876                 int mid = num/2;
877                 BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid);
878                 BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid);
879
880                 BoundBox bounds = BoundBox::empty;
881                 bounds.grow(leaf0->bounds);
882                 bounds.grow(leaf1->bounds);
883
884                 BVHNode *inner_node = new InnerNode(bounds, leaf0, leaf1);
885                 inner_node->time_from = min(leaf0->time_from, leaf1->time_from);
886                 inner_node->time_to = max(leaf0->time_to, leaf1->time_to);
887                 return inner_node;
888         }
889 }
890
891 BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
892                                     const vector<BVHReference>& references)
893 {
894         /* This is a bit overallocating here (considering leaf size into account),
895          * but chunk-based re-allocation in vector makes it difficult to use small
896          * size of stack storage here. Some tweaks are possible tho.
897          *
898          * NOTES:
899          *  - If the size is too big, we'll have inefficient stack usage,
900          *    and lots of cache misses.
901          *  - If the size is too small, then we can run out of memory
902          *    allowed to be used by vector.
903          *    In practice it wouldn't mean crash, just allocator will fallback
904          *    to heap which is slower.
905          *  - Optimistic re-allocation in STL could jump us out of stack usage
906          *    because re-allocation happens in chunks and size of those chunks we
907          *    can not control.
908          */
909         typedef StackAllocator<256, int> LeafStackAllocator;
910         typedef StackAllocator<256, float2> LeafTimeStackAllocator;
911         typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator;
912
913         vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM_TOTAL];
914         vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM_TOTAL];
915         vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM_TOTAL];
916         vector<float2, LeafTimeStackAllocator> p_time[PRIMITIVE_NUM_TOTAL];
917         vector<BVHReference, LeafReferenceStackAllocator> p_ref[PRIMITIVE_NUM_TOTAL];
918
919         /* TODO(sergey): In theory we should be able to store references. */
920         vector<BVHReference, LeafReferenceStackAllocator> object_references;
921
922         uint visibility[PRIMITIVE_NUM_TOTAL] = {0};
923         /* NOTE: Keep initializtion in sync with actual number of primitives. */
924         BoundBox bounds[PRIMITIVE_NUM_TOTAL] = {BoundBox::empty,
925                                                 BoundBox::empty,
926                                                 BoundBox::empty,
927                                                 BoundBox::empty};
928         int ob_num = 0;
929         int num_new_prims = 0;
930         /* Fill in per-type type/index array. */
931         for(int i = 0; i < range.size(); i++) {
932                 const BVHReference& ref = references[range.start() + i];
933                 if(ref.prim_index() != -1) {
934                         int type_index = bitscan(ref.prim_type() & PRIMITIVE_ALL);
935                         p_ref[type_index].push_back(ref);
936                         p_type[type_index].push_back(ref.prim_type());
937                         p_index[type_index].push_back(ref.prim_index());
938                         p_object[type_index].push_back(ref.prim_object());
939                         p_time[type_index].push_back(make_float2(ref.time_from(),
940                                                                  ref.time_to()));
941
942                         bounds[type_index].grow(ref.bounds());
943                         visibility[type_index] |= objects[ref.prim_object()]->visibility_for_tracing();
944                         if(ref.prim_type() & PRIMITIVE_ALL_CURVE) {
945                                 visibility[type_index] |= PATH_RAY_CURVE;
946                         }
947                         ++num_new_prims;
948                 }
949                 else {
950                         object_references.push_back(ref);
951                         ++ob_num;
952                 }
953         }
954
955         /* Create leaf nodes for every existing primitive.
956          *
957          * Here we write primitive types, indices and objects to a temporary array.
958          * This way we keep all the heavy memory allocation code outside of the
959          * thread lock in the case of spatial split building.
960          *
961          * TODO(sergey): With some pointer trickery we can write directly to the
962          * destination buffers for the non-spatial split BVH.
963          */
964         BVHNode *leaves[PRIMITIVE_NUM_TOTAL + 1] = {NULL};
965         int num_leaves = 0;
966         size_t start_index = 0;
967         vector<int, LeafStackAllocator> local_prim_type,
968                                         local_prim_index,
969                                         local_prim_object;
970         vector<float2, LeafTimeStackAllocator> local_prim_time;
971         local_prim_type.resize(num_new_prims);
972         local_prim_index.resize(num_new_prims);
973         local_prim_object.resize(num_new_prims);
974         if(need_prim_time) {
975                 local_prim_time.resize(num_new_prims);
976         }
977         for(int i = 0; i < PRIMITIVE_NUM_TOTAL; ++i) {
978                 int num = (int)p_type[i].size();
979                 if(num != 0) {
980                         assert(p_type[i].size() == p_index[i].size());
981                         assert(p_type[i].size() == p_object[i].size());
982                         Transform aligned_space;
983                         bool alignment_found = false;
984                         for(int j = 0; j < num; ++j) {
985                                 const int index = start_index + j;
986                                 local_prim_type[index] = p_type[i][j];
987                                 local_prim_index[index] = p_index[i][j];
988                                 local_prim_object[index] = p_object[i][j];
989                                 if(need_prim_time) {
990                                         local_prim_time[index] = p_time[i][j];
991                                 }
992                                 if(params.use_unaligned_nodes && !alignment_found) {
993                                         alignment_found =
994                                                 unaligned_heuristic.compute_aligned_space(p_ref[i][j],
995                                                                                           &aligned_space);
996                                 }
997                         }
998                         LeafNode *leaf_node = new LeafNode(bounds[i],
999                                                            visibility[i],
1000                                                            start_index,
1001                                                            start_index + num);
1002                         if(true) {
1003                                 float time_from = 1.0f, time_to = 0.0f;
1004                                 for(int j = 0; j < num; ++j) {
1005                                         const BVHReference &ref = p_ref[i][j];
1006                                         time_from = min(time_from, ref.time_from());
1007                                         time_to = max(time_to, ref.time_to());
1008                                 }
1009                                 leaf_node->time_from = time_from;
1010                                 leaf_node->time_to = time_to;
1011                         }
1012                         if(alignment_found) {
1013                                 /* Need to recalculate leaf bounds with new alignment. */
1014                                 leaf_node->bounds = BoundBox::empty;
1015                                 for(int j = 0; j < num; ++j) {
1016                                         const BVHReference &ref = p_ref[i][j];
1017                                         BoundBox ref_bounds =
1018                                                 unaligned_heuristic.compute_aligned_prim_boundbox(
1019                                                         ref,
1020                                                         aligned_space);
1021                                         leaf_node->bounds.grow(ref_bounds);
1022                                 }
1023                                 /* Set alignment space. */
1024                                 leaf_node->set_aligned_space(aligned_space);
1025                         }
1026                         leaves[num_leaves++] = leaf_node;
1027                         start_index += num;
1028                 }
1029         }
1030         /* Get size of new data to be copied to the packed arrays. */
1031         const int num_new_leaf_data = start_index;
1032         const size_t new_leaf_data_size = sizeof(int) * num_new_leaf_data;
1033         /* Copy actual data to the packed array. */
1034         if(params.use_spatial_split) {
1035                 spatial_spin_lock.lock();
1036                 /* We use first free index in the packed arrays and mode pointer to the
1037                  * end of the current range.
1038                  *
1039                  * This doesn't give deterministic packed arrays, but it shouldn't really
1040                  * matter because order of children in BVH is deterministic.
1041                  */
1042                 start_index = spatial_free_index;
1043                 spatial_free_index += range.size();
1044                 /* Extend an array when needed. */
1045                 const size_t range_end = start_index + range.size();
1046                 if(prim_type.size() < range_end) {
1047                         /* Avoid extra re-allocations by pre-allocating bigger array in an
1048                          * advance.
1049                          */
1050                         if(range_end >= prim_type.capacity()) {
1051                                 float progress = (float)progress_count/(float)progress_total;
1052                                 float factor = (1.0f - progress);
1053                                 const size_t reserve = (size_t)(range_end + (float)range_end*factor);
1054                                 prim_type.reserve(reserve);
1055                                 prim_index.reserve(reserve);
1056                                 prim_object.reserve(reserve);
1057                                 if(need_prim_time) {
1058                                         prim_time.reserve(reserve);
1059                                 }
1060                         }
1061
1062                         prim_type.resize(range_end);
1063                         prim_index.resize(range_end);
1064                         prim_object.resize(range_end);
1065                         if(need_prim_time) {
1066                                 prim_time.resize(range_end);
1067                         }
1068                 }
1069                 /* Perform actual data copy. */
1070                 if(new_leaf_data_size > 0) {
1071                         memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
1072                         memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
1073                         memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
1074                         if(need_prim_time) {
1075                                 memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2)*num_new_leaf_data);
1076                         }
1077                 }
1078                 spatial_spin_lock.unlock();
1079         }
1080         else {
1081                 /* For the regular BVH builder we simply copy new data starting at the
1082                  * range start. This is totally thread-safe, all threads are living
1083                  * inside of their own range.
1084                  */
1085                 start_index = range.start();
1086                 if(new_leaf_data_size > 0) {
1087                         memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
1088                         memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
1089                         memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
1090                         if(need_prim_time) {
1091                                 memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2)*num_new_leaf_data);
1092                         }
1093                 }
1094         }
1095
1096         /* So far leaves were created with the zero-based index in an arrays,
1097          * here we modify the indices to correspond to actual packed array start
1098          * index.
1099          */
1100         for(int i = 0; i < num_leaves; ++i) {
1101                 LeafNode *leaf = (LeafNode *)leaves[i];
1102                 leaf->lo += start_index;
1103                 leaf->hi += start_index;
1104         }
1105
1106         /* Create leaf node for object. */
1107         if(num_leaves == 0 || ob_num) {
1108                 /* Only create object leaf nodes if there are objects or no other
1109                  * nodes created.
1110                  */
1111                 const BVHReference *ref = (ob_num)? &object_references[0]: NULL;
1112                 leaves[num_leaves] = create_object_leaf_nodes(ref,
1113                                                               start_index + num_new_leaf_data,
1114                                                               ob_num);
1115                 ++num_leaves;
1116         }
1117
1118         /* TODO(sergey): Need to take care of alignment when number of leaves
1119          * is more than 1.
1120          */
1121         if(num_leaves == 1) {
1122                 /* Simplest case: single leaf, just return it.
1123                  * In all the rest cases we'll be creating intermediate inner node with
1124                  * an appropriate bounding box.
1125                  */
1126                 return leaves[0];
1127         }
1128         else if(num_leaves == 2) {
1129                 return new InnerNode(range.bounds(), leaves[0], leaves[1]);
1130         }
1131         else if(num_leaves == 3) {
1132                 BoundBox inner_bounds = merge(leaves[1]->bounds, leaves[2]->bounds);
1133                 BVHNode *inner = new InnerNode(inner_bounds, leaves[1], leaves[2]);
1134                 return new InnerNode(range.bounds(), leaves[0], inner);
1135         } else {
1136                 /* Should be doing more branches if more primitive types added. */
1137                 assert(num_leaves <= 5);
1138                 BoundBox inner_bounds_a = merge(leaves[0]->bounds, leaves[1]->bounds);
1139                 BoundBox inner_bounds_b = merge(leaves[2]->bounds, leaves[3]->bounds);
1140                 BVHNode *inner_a = new InnerNode(inner_bounds_a, leaves[0], leaves[1]);
1141                 BVHNode *inner_b = new InnerNode(inner_bounds_b, leaves[2], leaves[3]);
1142                 BoundBox inner_bounds_c = merge(inner_a->bounds, inner_b->bounds);
1143                 BVHNode *inner_c = new InnerNode(inner_bounds_c, inner_a, inner_b);
1144                 if(num_leaves == 5) {
1145                         return new InnerNode(range.bounds(), inner_c, leaves[4]);
1146                 }
1147                 return inner_c;
1148         }
1149
1150 #undef MAX_ITEMS_PER_LEAF
1151 }
1152
1153 /* Tree Rotations */
1154
1155 void BVHBuild::rotate(BVHNode *node, int max_depth, int iterations)
1156 {
1157         /* in tested scenes, this resulted in slightly slower raytracing, so disabled
1158          * it for now. could be implementation bug, or depend on the scene */
1159         if(node)
1160                 for(int i = 0; i < iterations; i++)
1161                         rotate(node, max_depth);
1162 }
1163
1164 void BVHBuild::rotate(BVHNode *node, int max_depth)
1165 {
1166         /* nothing to rotate if we reached a leaf node. */
1167         if(node->is_leaf() || max_depth < 0)
1168                 return;
1169
1170         InnerNode *parent = (InnerNode*)node;
1171
1172         /* rotate all children first */
1173         for(size_t c = 0; c < 2; c++)
1174                 rotate(parent->children[c], max_depth-1);
1175
1176         /* compute current area of all children */
1177         BoundBox bounds0 = parent->children[0]->bounds;
1178         BoundBox bounds1 = parent->children[1]->bounds;
1179
1180         float area0 = bounds0.half_area();
1181         float area1 = bounds1.half_area();
1182         float4 child_area = make_float4(area0, area1, 0.0f, 0.0f);
1183
1184         /* find best rotation. we pick a target child of a first child, and swap
1185          * this with an other child. we perform the best such swap. */
1186         float best_cost = FLT_MAX;
1187         int best_child = -1, best_target = -1, best_other = -1;
1188
1189         for(size_t c = 0; c < 2; c++) {
1190                 /* ignore leaf nodes as we cannot descent into */
1191                 if(parent->children[c]->is_leaf())
1192                         continue;
1193
1194                 InnerNode *child = (InnerNode*)parent->children[c];
1195                 BoundBox& other = (c == 0)? bounds1: bounds0;
1196
1197                 /* transpose child bounds */
1198                 BoundBox target0 = child->children[0]->bounds;
1199                 BoundBox target1 = child->children[1]->bounds;
1200
1201                 /* compute cost for both possible swaps */
1202                 float cost0 = merge(other, target1).half_area() - child_area[c];
1203                 float cost1 = merge(target0, other).half_area() - child_area[c];
1204
1205                 if(min(cost0,cost1) < best_cost) {
1206                         best_child = (int)c;
1207                         best_other = (int)(1-c);
1208
1209                         if(cost0 < cost1) {
1210                                 best_cost = cost0;
1211                                 best_target = 0;
1212                         }
1213                         else {
1214                                 best_cost = cost0;
1215                                 best_target = 1;
1216                         }
1217                 }
1218         }
1219
1220         /* if we did not find a swap that improves the SAH then do nothing */
1221         if(best_cost >= 0)
1222                 return;
1223
1224         assert(best_child == 0 || best_child == 1);
1225         assert(best_target != -1);
1226
1227         /* perform the best found tree rotation */
1228         InnerNode *child = (InnerNode*)parent->children[best_child];
1229
1230         swap(parent->children[best_other], child->children[best_target]);
1231         child->bounds = merge(child->children[0]->bounds, child->children[1]->bounds);
1232 }
1233
1234 CCL_NAMESPACE_END