Cycles: Make BVH wider prior to packing
[blender.git] / intern / cycles / bvh / bvh4.cpp
1 /*
2  * Adapted from code copyright 2009-2010 NVIDIA Corporation
3  * Modifications Copyright 2011, Blender Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include "bvh/bvh4.h"
19
20 #include "render/mesh.h"
21 #include "render/object.h"
22
23 #include "bvh/bvh_node.h"
24 #include "bvh/bvh_unaligned.h"
25
26 CCL_NAMESPACE_BEGIN
27
28 /* Can we avoid this somehow or make more generic?
29  *
30  * Perhaps we can merge nodes in actual tree and make our
31  * life easier all over the place.
32  */
33
34 BVH4::BVH4(const BVHParams& params_, const vector<Object*>& objects_)
35 : BVH(params_, objects_)
36 {
37         params.bvh_layout = BVH_LAYOUT_BVH4;
38 }
39
40 namespace {
41
42 BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
43 {
44         if(node->is_leaf()) {
45                 return new LeafNode(*reinterpret_cast<const LeafNode *>(node));
46         }
47         /* Collect nodes of one layer deeper, allowing us to have more childrem in
48          * an inner layer. */
49         assert(node->num_children() <= 2);
50         const BVHNode *children[4];
51         const BVHNode *child0 = node->get_child(0);
52         const BVHNode *child1 = node->get_child(1);
53         int num_children = 0;
54         if(child0->is_leaf()) {
55                 children[num_children++] = child0;
56         }
57         else {
58                 children[num_children++] = child0->get_child(0);
59                 children[num_children++] = child0->get_child(1);
60         }
61         if(child1->is_leaf()) {
62                 children[num_children++] = child1;
63         }
64         else {
65                 children[num_children++] = child1->get_child(0);
66                 children[num_children++] = child1->get_child(1);
67         }
68         /* Merge children in subtrees. */
69         BVHNode *children4[4];
70         for(int i = 0; i < num_children; ++i) {
71                 children4[i] = bvh_node_merge_children_recursively(children[i]);
72         }
73         /* Allocate new node. */
74         BVHNode *node4 = new InnerNode(node->bounds, children4, num_children);
75         /* TODO(sergey): Consider doing this from the InnerNode() constructor.
76          * But in order to do this nicely need to think of how to pass all the
77          * parameters there. */
78         if(node->is_unaligned) {
79                 node4->is_unaligned = true;
80                 node4->aligned_space = new Transform();
81                 *node4->aligned_space = *node->aligned_space;
82         }
83         return node4;
84 }
85
86 }  // namespace
87
88 BVHNode *BVH4::widen_children_nodes(const BVHNode *root)
89 {
90         if(root == NULL) {
91                 return NULL;
92         }
93         if(root->is_leaf()) {
94                 return const_cast<BVHNode *>(root);
95         }
96         BVHNode *root4 = bvh_node_merge_children_recursively(root);
97         /* TODO(sergey): Pack children nodes to parents which has less that 4
98          * children. */
99         return root4;
100 }
101
102 void BVH4::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
103 {
104         float4 data[BVH_QNODE_LEAF_SIZE];
105         memset(data, 0, sizeof(data));
106         if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
107                 /* object */
108                 data[0].x = __int_as_float(~(leaf->lo));
109                 data[0].y = __int_as_float(0);
110         }
111         else {
112                 /* triangle */
113                 data[0].x = __int_as_float(leaf->lo);
114                 data[0].y = __int_as_float(leaf->hi);
115         }
116         data[0].z = __uint_as_float(leaf->visibility);
117         if(leaf->num_triangles() != 0) {
118                 data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
119         }
120
121         memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
122 }
123
124 void BVH4::pack_inner(const BVHStackEntry& e,
125                       const BVHStackEntry *en,
126                       int num)
127 {
128         bool has_unaligned = false;
129         /* Check whether we have to create unaligned node or all nodes are aligned
130          * and we can cut some corner here.
131          */
132         if(params.use_unaligned_nodes) {
133                 for(int i = 0; i < num; i++) {
134                         if(en[i].node->is_unaligned) {
135                                 has_unaligned = true;
136                                 break;
137                         }
138                 }
139         }
140         if(has_unaligned) {
141                 /* There's no unaligned children, pack into AABB node. */
142                 pack_unaligned_inner(e, en, num);
143         }
144         else {
145                 /* Create unaligned node with orientation transform for each of the
146                  * children.
147                  */
148                 pack_aligned_inner(e, en, num);
149         }
150 }
151
152 void BVH4::pack_aligned_inner(const BVHStackEntry& e,
153                               const BVHStackEntry *en,
154                               int num)
155 {
156         BoundBox bounds[4];
157         int child[4];
158         for(int i = 0; i < num; ++i) {
159                 bounds[i] = en[i].node->bounds;
160                 child[i] = en[i].encodeIdx();
161         }
162         pack_aligned_node(e.idx,
163                           bounds,
164                           child,
165                           e.node->visibility,
166                           e.node->time_from,
167                           e.node->time_to,
168                           num);
169 }
170
171 void BVH4::pack_aligned_node(int idx,
172                              const BoundBox *bounds,
173                              const int *child,
174                              const uint visibility,
175                              const float time_from,
176                              const float time_to,
177                              const int num)
178 {
179         float4 data[BVH_QNODE_SIZE];
180         memset(data, 0, sizeof(data));
181
182         data[0].x = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
183         data[0].y = time_from;
184         data[0].z = time_to;
185
186         for(int i = 0; i < num; i++) {
187                 float3 bb_min = bounds[i].min;
188                 float3 bb_max = bounds[i].max;
189
190                 data[1][i] = bb_min.x;
191                 data[2][i] = bb_max.x;
192                 data[3][i] = bb_min.y;
193                 data[4][i] = bb_max.y;
194                 data[5][i] = bb_min.z;
195                 data[6][i] = bb_max.z;
196
197                 data[7][i] = __int_as_float(child[i]);
198         }
199
200         for(int i = num; i < 4; i++) {
201                 /* We store BB which would never be recorded as intersection
202                  * so kernel might safely assume there are always 4 child nodes.
203                  */
204                 data[1][i] = FLT_MAX;
205                 data[2][i] = -FLT_MAX;
206
207                 data[3][i] = FLT_MAX;
208                 data[4][i] = -FLT_MAX;
209
210                 data[5][i] = FLT_MAX;
211                 data[6][i] = -FLT_MAX;
212
213                 data[7][i] = __int_as_float(0);
214         }
215
216         memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_QNODE_SIZE);
217 }
218
219 void BVH4::pack_unaligned_inner(const BVHStackEntry& e,
220                                 const BVHStackEntry *en,
221                                 int num)
222 {
223         Transform aligned_space[4];
224         BoundBox bounds[4];
225         int child[4];
226         for(int i = 0; i < num; ++i) {
227                 aligned_space[i] = en[i].node->get_aligned_space();
228                 bounds[i] = en[i].node->bounds;
229                 child[i] = en[i].encodeIdx();
230         }
231         pack_unaligned_node(e.idx,
232                             aligned_space,
233                             bounds,
234                             child,
235                             e.node->visibility,
236                             e.node->time_from,
237                             e.node->time_to,
238                             num);
239 }
240
241 void BVH4::pack_unaligned_node(int idx,
242                                const Transform *aligned_space,
243                                const BoundBox *bounds,
244                                const int *child,
245                                const uint visibility,
246                                const float time_from,
247                                const float time_to,
248                                const int num)
249 {
250         float4 data[BVH_UNALIGNED_QNODE_SIZE];
251         memset(data, 0, sizeof(data));
252
253         data[0].x = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
254         data[0].y = time_from;
255         data[0].z = time_to;
256
257         for(int i = 0; i < num; i++) {
258                 Transform space = BVHUnaligned::compute_node_transform(
259                         bounds[i],
260                         aligned_space[i]);
261
262                 data[1][i] = space.x.x;
263                 data[2][i] = space.x.y;
264                 data[3][i] = space.x.z;
265
266                 data[4][i] = space.y.x;
267                 data[5][i] = space.y.y;
268                 data[6][i] = space.y.z;
269
270                 data[7][i] = space.z.x;
271                 data[8][i] = space.z.y;
272                 data[9][i] = space.z.z;
273
274                 data[10][i] = space.x.w;
275                 data[11][i] = space.y.w;
276                 data[12][i] = space.z.w;
277
278                 data[13][i] = __int_as_float(child[i]);
279         }
280
281         for(int i = num; i < 4; i++) {
282                 /* We store BB which would never be recorded as intersection
283                  * so kernel might safely assume there are always 4 child nodes.
284                  */
285
286                 data[1][i] = NAN;
287                 data[2][i] = NAN;
288                 data[3][i] = NAN;
289
290                 data[4][i] = NAN;
291                 data[5][i] = NAN;
292                 data[6][i] = NAN;
293
294                 data[7][i] = NAN;
295                 data[8][i] = NAN;
296                 data[9][i] = NAN;
297
298                 data[10][i] = NAN;
299                 data[11][i] = NAN;
300                 data[12][i] = NAN;
301
302                 data[13][i] = __int_as_float(0);
303         }
304
305         memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_QNODE_SIZE);
306 }
307
308 /* Quad SIMD Nodes */
309
310 void BVH4::pack_nodes(const BVHNode *root)
311 {
312         /* Calculate size of the arrays required. */
313         const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
314         const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
315         assert(num_leaf_nodes <= num_nodes);
316         const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
317         size_t node_size;
318         if(params.use_unaligned_nodes) {
319                 const size_t num_unaligned_nodes =
320                         root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
321                 node_size = (num_unaligned_nodes * BVH_UNALIGNED_QNODE_SIZE) +
322                             (num_inner_nodes - num_unaligned_nodes) * BVH_QNODE_SIZE;
323         }
324         else {
325                 node_size = num_inner_nodes * BVH_QNODE_SIZE;
326         }
327         /* Resize arrays. */
328         pack.nodes.clear();
329         pack.leaf_nodes.clear();
330         /* For top level BVH, first merge existing BVH's so we know the offsets. */
331         if(params.top_level) {
332                 pack_instances(node_size, num_leaf_nodes*BVH_QNODE_LEAF_SIZE);
333         }
334         else {
335                 pack.nodes.resize(node_size);
336                 pack.leaf_nodes.resize(num_leaf_nodes*BVH_QNODE_LEAF_SIZE);
337         }
338
339         int nextNodeIdx = 0, nextLeafNodeIdx = 0;
340
341         vector<BVHStackEntry> stack;
342         stack.reserve(BVHParams::MAX_DEPTH*2);
343         if(root->is_leaf()) {
344                 stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
345         }
346         else {
347                 stack.push_back(BVHStackEntry(root, nextNodeIdx));
348                 nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE
349                                                      : BVH_QNODE_SIZE;
350         }
351
352         while(stack.size()) {
353                 BVHStackEntry e = stack.back();
354                 stack.pop_back();
355
356                 if(e.node->is_leaf()) {
357                         /* leaf node */
358                         const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node);
359                         pack_leaf(e, leaf);
360                 }
361                 else {
362                         /* Inner node. */
363                         /* Collect nodes. */
364                         const BVHNode *children[4];
365                         const int num_children = e.node->num_children();
366                         /* Push entries on the stack. */
367                         for(int i = 0; i < num_children; ++i) {
368                                 int idx;
369                                 children[i] = e.node->get_child(i);
370                                 assert(children[i] != NULL);
371                                 if(children[i]->is_leaf()) {
372                                         idx = nextLeafNodeIdx++;
373                                 }
374                                 else {
375                                         idx = nextNodeIdx;
376                                         nextNodeIdx += children[i]->has_unaligned()
377                                                                ? BVH_UNALIGNED_QNODE_SIZE
378                                                                : BVH_QNODE_SIZE;
379                                 }
380                                 stack.push_back(BVHStackEntry(children[i], idx));
381                         }
382                         /* Set node. */
383                         pack_inner(e, &stack[stack.size() - num_children], num_children);
384                 }
385         }
386
387         assert(node_size == nextNodeIdx);
388         /* Root index to start traversal at, to handle case of single leaf node. */
389         pack.root_index = (root->is_leaf())? -1: 0;
390 }
391
392 void BVH4::refit_nodes()
393 {
394         assert(!params.top_level);
395
396         BoundBox bbox = BoundBox::empty;
397         uint visibility = 0;
398         refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility);
399 }
400
401 void BVH4::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
402 {
403         if(leaf) {
404                 /* Refit leaf node. */
405                 int4 *data = &pack.leaf_nodes[idx];
406                 int4 c = data[0];
407
408                 BVH::refit_primitives(c.x, c.y, bbox, visibility);
409
410                 /* TODO(sergey): This is actually a copy of pack_leaf(),
411                  * but this chunk of code only knows actual data and has
412                  * no idea about BVHNode.
413                  *
414                  * Would be nice to de-duplicate code, but trying to make
415                  * making code more general ends up in much nastier code
416                  * in my opinion so far.
417                  *
418                  * Same applies to the inner nodes case below.
419                  */
420                 float4 leaf_data[BVH_QNODE_LEAF_SIZE];
421                 leaf_data[0].x = __int_as_float(c.x);
422                 leaf_data[0].y = __int_as_float(c.y);
423                 leaf_data[0].z = __uint_as_float(visibility);
424                 leaf_data[0].w = __uint_as_float(c.w);
425                 memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
426         }
427         else {
428                 int4 *data = &pack.nodes[idx];
429                 bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
430                 int4 c;
431                 if(is_unaligned) {
432                         c = data[13];
433                 }
434                 else {
435                         c = data[7];
436                 }
437                 /* Refit inner node, set bbox from children. */
438                 BoundBox child_bbox[4] = {BoundBox::empty,
439                                           BoundBox::empty,
440                                           BoundBox::empty,
441                                           BoundBox::empty};
442                 uint child_visibility[4] = {0};
443                 int num_nodes = 0;
444
445                 for(int i = 0; i < 4; ++i) {
446                         if(c[i] != 0) {
447                                 refit_node((c[i] < 0)? -c[i]-1: c[i], (c[i] < 0),
448                                            child_bbox[i], child_visibility[i]);
449                                 ++num_nodes;
450                                 bbox.grow(child_bbox[i]);
451                                 visibility |= child_visibility[i];
452                         }
453                 }
454
455                 if(is_unaligned) {
456                         Transform aligned_space[4] = {transform_identity(),
457                                                       transform_identity(),
458                                                       transform_identity(),
459                                                       transform_identity()};
460                         pack_unaligned_node(idx,
461                                             aligned_space,
462                                             child_bbox,
463                                             &c[0],
464                                             visibility,
465                                             0.0f,
466                                             1.0f,
467                                             num_nodes);
468                 }
469                 else {
470                         pack_aligned_node(idx,
471                                           child_bbox,
472                                           &c[0],
473                                           visibility,
474                                           0.0f,
475                                           1.0f,
476                                           num_nodes);
477                 }
478         }
479 }
480
481 CCL_NAMESPACE_END