Cycles: Add BVH8 and packeted triangle intersection
[blender.git] / intern / cycles / bvh / bvh4.cpp
1 /*
2  * Adapted from code copyright 2009-2010 NVIDIA Corporation
3  * Modifications Copyright 2011, Blender Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include "bvh/bvh4.h"
19
20 #include "render/mesh.h"
21 #include "render/object.h"
22
23 #include "bvh/bvh_node.h"
24 #include "bvh/bvh_unaligned.h"
25
26 CCL_NAMESPACE_BEGIN
27
28 /* Can we avoid this somehow or make more generic?
29  *
30  * Perhaps we can merge nodes in actual tree and make our
31  * life easier all over the place.
32  */
33
34 BVH4::BVH4(const BVHParams& params_, const vector<Object*>& objects_)
35 : BVH(params_, objects_)
36 {
37         params.bvh_layout = BVH_LAYOUT_BVH4;
38 }
39
40 void BVH4::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
41 {
42         float4 data[BVH_QNODE_LEAF_SIZE];
43         memset(data, 0, sizeof(data));
44         if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
45                 /* object */
46                 data[0].x = __int_as_float(~(leaf->lo));
47                 data[0].y = __int_as_float(0);
48         }
49         else {
50                 /* triangle */
51                 data[0].x = __int_as_float(leaf->lo);
52                 data[0].y = __int_as_float(leaf->hi);
53         }
54         data[0].z = __uint_as_float(leaf->visibility);
55         if(leaf->num_triangles() != 0) {
56                 data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
57         }
58
59         memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
60 }
61
62 void BVH4::pack_inner(const BVHStackEntry& e,
63                       const BVHStackEntry *en,
64                       int num)
65 {
66         bool has_unaligned = false;
67         /* Check whether we have to create unaligned node or all nodes are aligned
68          * and we can cut some corner here.
69          */
70         if(params.use_unaligned_nodes) {
71                 for(int i = 0; i < num; i++) {
72                         if(en[i].node->is_unaligned) {
73                                 has_unaligned = true;
74                                 break;
75                         }
76                 }
77         }
78         if(has_unaligned) {
79                 /* There's no unaligned children, pack into AABB node. */
80                 pack_unaligned_inner(e, en, num);
81         }
82         else {
83                 /* Create unaligned node with orientation transform for each of the
84                  * children.
85                  */
86                 pack_aligned_inner(e, en, num);
87         }
88 }
89
90 void BVH4::pack_aligned_inner(const BVHStackEntry& e,
91                               const BVHStackEntry *en,
92                               int num)
93 {
94         BoundBox bounds[4];
95         int child[4];
96         for(int i = 0; i < num; ++i) {
97                 bounds[i] = en[i].node->bounds;
98                 child[i] = en[i].encodeIdx();
99         }
100         pack_aligned_node(e.idx,
101                           bounds,
102                           child,
103                           e.node->visibility,
104                           e.node->time_from,
105                           e.node->time_to,
106                           num);
107 }
108
109 void BVH4::pack_aligned_node(int idx,
110                              const BoundBox *bounds,
111                              const int *child,
112                              const uint visibility,
113                              const float time_from,
114                              const float time_to,
115                              const int num)
116 {
117         float4 data[BVH_QNODE_SIZE];
118         memset(data, 0, sizeof(data));
119
120         data[0].x = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
121         data[0].y = time_from;
122         data[0].z = time_to;
123
124         for(int i = 0; i < num; i++) {
125                 float3 bb_min = bounds[i].min;
126                 float3 bb_max = bounds[i].max;
127
128                 data[1][i] = bb_min.x;
129                 data[2][i] = bb_max.x;
130                 data[3][i] = bb_min.y;
131                 data[4][i] = bb_max.y;
132                 data[5][i] = bb_min.z;
133                 data[6][i] = bb_max.z;
134
135                 data[7][i] = __int_as_float(child[i]);
136         }
137
138         for(int i = num; i < 4; i++) {
139                 /* We store BB which would never be recorded as intersection
140                  * so kernel might safely assume there are always 4 child nodes.
141                  */
142                 data[1][i] = FLT_MAX;
143                 data[2][i] = -FLT_MAX;
144
145                 data[3][i] = FLT_MAX;
146                 data[4][i] = -FLT_MAX;
147
148                 data[5][i] = FLT_MAX;
149                 data[6][i] = -FLT_MAX;
150
151                 data[7][i] = __int_as_float(0);
152         }
153
154         memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_QNODE_SIZE);
155 }
156
157 void BVH4::pack_unaligned_inner(const BVHStackEntry& e,
158                                 const BVHStackEntry *en,
159                                 int num)
160 {
161         Transform aligned_space[4];
162         BoundBox bounds[4];
163         int child[4];
164         for(int i = 0; i < num; ++i) {
165                 aligned_space[i] = en[i].node->get_aligned_space();
166                 bounds[i] = en[i].node->bounds;
167                 child[i] = en[i].encodeIdx();
168         }
169         pack_unaligned_node(e.idx,
170                             aligned_space,
171                             bounds,
172                             child,
173                             e.node->visibility,
174                             e.node->time_from,
175                             e.node->time_to,
176                             num);
177 }
178
179 void BVH4::pack_unaligned_node(int idx,
180                                const Transform *aligned_space,
181                                const BoundBox *bounds,
182                                const int *child,
183                                const uint visibility,
184                                const float time_from,
185                                const float time_to,
186                                const int num)
187 {
188         float4 data[BVH_UNALIGNED_QNODE_SIZE];
189         memset(data, 0, sizeof(data));
190
191         data[0].x = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
192         data[0].y = time_from;
193         data[0].z = time_to;
194
195         for(int i = 0; i < num; i++) {
196                 Transform space = BVHUnaligned::compute_node_transform(
197                         bounds[i],
198                         aligned_space[i]);
199
200                 data[1][i] = space.x.x;
201                 data[2][i] = space.x.y;
202                 data[3][i] = space.x.z;
203
204                 data[4][i] = space.y.x;
205                 data[5][i] = space.y.y;
206                 data[6][i] = space.y.z;
207
208                 data[7][i] = space.z.x;
209                 data[8][i] = space.z.y;
210                 data[9][i] = space.z.z;
211
212                 data[10][i] = space.x.w;
213                 data[11][i] = space.y.w;
214                 data[12][i] = space.z.w;
215
216                 data[13][i] = __int_as_float(child[i]);
217         }
218
219         for(int i = num; i < 4; i++) {
220                 /* We store BB which would never be recorded as intersection
221                  * so kernel might safely assume there are always 4 child nodes.
222                  */
223
224                 data[1][i] = NAN;
225                 data[2][i] = NAN;
226                 data[3][i] = NAN;
227
228                 data[4][i] = NAN;
229                 data[5][i] = NAN;
230                 data[6][i] = NAN;
231
232                 data[7][i] = NAN;
233                 data[8][i] = NAN;
234                 data[9][i] = NAN;
235
236                 data[10][i] = NAN;
237                 data[11][i] = NAN;
238                 data[12][i] = NAN;
239
240                 data[13][i] = __int_as_float(0);
241         }
242
243         memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_QNODE_SIZE);
244 }
245
246 /* Quad SIMD Nodes */
247
248 void BVH4::pack_nodes(const BVHNode *root)
249 {
250         /* Calculate size of the arrays required. */
251         const size_t num_nodes = root->getSubtreeSize(BVH_STAT_QNODE_COUNT);
252         const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
253         assert(num_leaf_nodes <= num_nodes);
254         const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
255         size_t node_size;
256         if(params.use_unaligned_nodes) {
257                 const size_t num_unaligned_nodes =
258                         root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_QNODE_COUNT);
259                 node_size = (num_unaligned_nodes * BVH_UNALIGNED_QNODE_SIZE) +
260                             (num_inner_nodes - num_unaligned_nodes) * BVH_QNODE_SIZE;
261         }
262         else {
263                 node_size = num_inner_nodes * BVH_QNODE_SIZE;
264         }
265         /* Resize arrays. */
266         pack.nodes.clear();
267         pack.leaf_nodes.clear();
268         /* For top level BVH, first merge existing BVH's so we know the offsets. */
269         if(params.top_level) {
270                 pack_instances(node_size, num_leaf_nodes*BVH_QNODE_LEAF_SIZE);
271         }
272         else {
273                 pack.nodes.resize(node_size);
274                 pack.leaf_nodes.resize(num_leaf_nodes*BVH_QNODE_LEAF_SIZE);
275         }
276
277         int nextNodeIdx = 0, nextLeafNodeIdx = 0;
278
279         vector<BVHStackEntry> stack;
280         stack.reserve(BVHParams::MAX_DEPTH*2);
281         if(root->is_leaf()) {
282                 stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
283         }
284         else {
285                 stack.push_back(BVHStackEntry(root, nextNodeIdx));
286                 nextNodeIdx += node_is_unaligned(root, bvh4)
287                                        ? BVH_UNALIGNED_QNODE_SIZE
288                                        : BVH_QNODE_SIZE;
289         }
290
291         while(stack.size()) {
292                 BVHStackEntry e = stack.back();
293                 stack.pop_back();
294
295                 if(e.node->is_leaf()) {
296                         /* leaf node */
297                         const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node);
298                         pack_leaf(e, leaf);
299                 }
300                 else {
301                         /* Inner node. */
302                         const BVHNode *node = e.node;
303                         const BVHNode *node0 = node->get_child(0);
304                         const BVHNode *node1 = node->get_child(1);
305                         /* Collect nodes. */
306                         const BVHNode *nodes[4];
307                         int numnodes = 0;
308                         if(node0->is_leaf()) {
309                                 nodes[numnodes++] = node0;
310                         }
311                         else {
312                                 nodes[numnodes++] = node0->get_child(0);
313                                 nodes[numnodes++] = node0->get_child(1);
314                         }
315                         if(node1->is_leaf()) {
316                                 nodes[numnodes++] = node1;
317                         }
318                         else {
319                                 nodes[numnodes++] = node1->get_child(0);
320                                 nodes[numnodes++] = node1->get_child(1);
321                         }
322                         /* Push entries on the stack. */
323                         for(int i = 0; i < numnodes; ++i) {
324                                 int idx;
325                                 if(nodes[i]->is_leaf()) {
326                                         idx = nextLeafNodeIdx++;
327                                 }
328                                 else {
329                                         idx = nextNodeIdx;
330                                         nextNodeIdx += node_is_unaligned(nodes[i], bvh4)
331                                                                ? BVH_UNALIGNED_QNODE_SIZE
332                                                                : BVH_QNODE_SIZE;
333                                 }
334                                 stack.push_back(BVHStackEntry(nodes[i], idx));
335                         }
336                         /* Set node. */
337                         pack_inner(e, &stack[stack.size()-numnodes], numnodes);
338                 }
339         }
340         assert(node_size == nextNodeIdx);
341         /* Root index to start traversal at, to handle case of single leaf node. */
342         pack.root_index = (root->is_leaf())? -1: 0;
343 }
344
345 void BVH4::refit_nodes()
346 {
347         assert(!params.top_level);
348
349         BoundBox bbox = BoundBox::empty;
350         uint visibility = 0;
351         refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility);
352 }
353
354 void BVH4::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
355 {
356         if(leaf) {
357                 /* Refit leaf node. */
358                 int4 *data = &pack.leaf_nodes[idx];
359                 int4 c = data[0];
360
361                 BVH::refit_primitives(c.x, c.y, bbox, visibility);
362
363                 /* TODO(sergey): This is actually a copy of pack_leaf(),
364                  * but this chunk of code only knows actual data and has
365                  * no idea about BVHNode.
366                  *
367                  * Would be nice to de-duplicate code, but trying to make
368                  * making code more general ends up in much nastier code
369                  * in my opinion so far.
370                  *
371                  * Same applies to the inner nodes case below.
372                  */
373                 float4 leaf_data[BVH_QNODE_LEAF_SIZE];
374                 leaf_data[0].x = __int_as_float(c.x);
375                 leaf_data[0].y = __int_as_float(c.y);
376                 leaf_data[0].z = __uint_as_float(visibility);
377                 leaf_data[0].w = __uint_as_float(c.w);
378                 memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
379         }
380         else {
381                 int4 *data = &pack.nodes[idx];
382                 bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
383                 int4 c;
384                 if(is_unaligned) {
385                         c = data[13];
386                 }
387                 else {
388                         c = data[7];
389                 }
390                 /* Refit inner node, set bbox from children. */
391                 BoundBox child_bbox[4] = {BoundBox::empty,
392                                           BoundBox::empty,
393                                           BoundBox::empty,
394                                           BoundBox::empty};
395                 uint child_visibility[4] = {0};
396                 int num_nodes = 0;
397
398                 for(int i = 0; i < 4; ++i) {
399                         if(c[i] != 0) {
400                                 refit_node((c[i] < 0)? -c[i]-1: c[i], (c[i] < 0),
401                                            child_bbox[i], child_visibility[i]);
402                                 ++num_nodes;
403                                 bbox.grow(child_bbox[i]);
404                                 visibility |= child_visibility[i];
405                         }
406                 }
407
408                 if(is_unaligned) {
409                         Transform aligned_space[4] = {transform_identity(),
410                                                       transform_identity(),
411                                                       transform_identity(),
412                                                       transform_identity()};
413                         pack_unaligned_node(idx,
414                                             aligned_space,
415                                             child_bbox,
416                                             &c[0],
417                                             visibility,
418                                             0.0f,
419                                             1.0f,
420                                             num_nodes);
421                 }
422                 else {
423                         pack_aligned_node(idx,
424                                           child_bbox,
425                                           &c[0],
426                                           visibility,
427                                           0.0f,
428                                           1.0f,
429                                           num_nodes);
430                 }
431         }
432 }
433
434 CCL_NAMESPACE_END