Cycles: Make all #include statements relative to cycles source directory
[blender.git] / intern / cycles / bvh / bvh_binning.cpp
1 /*
2  * Adapted from code copyright 2009-2011 Intel Corporation
3  * Modifications Copyright 2012, Blender Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 //#define __KERNEL_SSE__
19
20 #include <stdlib.h>
21
22 #include "bvh/bvh_binning.h"
23
24 #include "util/util_algorithm.h"
25 #include "util/util_boundbox.h"
26 #include "util/util_types.h"
27
28 CCL_NAMESPACE_BEGIN
29
30 /* SSE replacements */
31
32 __forceinline void prefetch_L1 (const void* /*ptr*/) { }
33 __forceinline void prefetch_L2 (const void* /*ptr*/) { }
34 __forceinline void prefetch_L3 (const void* /*ptr*/) { }
35 __forceinline void prefetch_NTA(const void* /*ptr*/) { }
36
37 template<size_t src> __forceinline float extract(const int4& b)
38 { return b[src]; }
39 template<size_t dst> __forceinline const float4 insert(const float4& a, const float b)
40 { float4 r = a; r[dst] = b; return r; }
41
42 __forceinline int get_best_dimension(const float4& bestSAH)
43 {
44         // return (int)__bsf(movemask(reduce_min(bestSAH) == bestSAH));
45
46         float minSAH = min(bestSAH.x, min(bestSAH.y, bestSAH.z));
47
48         if(bestSAH.x == minSAH) return 0;
49         else if(bestSAH.y == minSAH) return 1;
50         else return 2;
51 }
52
53 /* BVH Object Binning */
54
55 BVHObjectBinning::BVHObjectBinning(const BVHRange& job,
56                                    BVHReference *prims,
57                                    const BVHUnaligned *unaligned_heuristic,
58                                    const Transform *aligned_space)
59 : BVHRange(job),
60   splitSAH(FLT_MAX),
61   dim(0),
62   pos(0),
63   unaligned_heuristic_(unaligned_heuristic),
64   aligned_space_(aligned_space)
65 {
66         if(aligned_space_ == NULL) {
67                 bounds_ = bounds();
68                 cent_bounds_ = cent_bounds();
69         }
70         else {
71                 /* TODO(sergey): With some additional storage we can avoid
72                  * need in re-calculating this.
73                  */
74                 bounds_ = unaligned_heuristic->compute_aligned_boundbox(
75                         *this,
76                         prims,
77                         *aligned_space,
78                         &cent_bounds_);
79         }
80
81         /* compute number of bins to use and precompute scaling factor for binning */
82         num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f*size()));
83         scale = rcp(cent_bounds_.size()) * make_float3((float)num_bins);
84
85         /* initialize binning counter and bounds */
86         BoundBox bin_bounds[MAX_BINS][4];       /* bounds for every bin in every dimension */
87         int4 bin_count[MAX_BINS];                       /* number of primitives mapped to bin */
88
89         for(size_t i = 0; i < num_bins; i++) {
90                 bin_count[i] = make_int4(0);
91                 bin_bounds[i][0] = bin_bounds[i][1] = bin_bounds[i][2] = BoundBox::empty;
92         }
93
94         /* map geometry to bins, unrolled once */
95         {
96                 ssize_t i;
97
98                 for(i = 0; i < ssize_t(size()) - 1; i += 2) {
99                         prefetch_L2(&prims[start() + i + 8]);
100
101                         /* map even and odd primitive to bin */
102                         const BVHReference& prim0 = prims[start() + i + 0];
103                         const BVHReference& prim1 = prims[start() + i + 1];
104
105                         BoundBox bounds0 = get_prim_bounds(prim0);
106                         BoundBox bounds1 = get_prim_bounds(prim1);
107
108                         int4 bin0 = get_bin(bounds0);
109                         int4 bin1 = get_bin(bounds1);
110
111                         /* increase bounds for bins for even primitive */
112                         int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(bounds0);
113                         int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(bounds0);
114                         int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(bounds0);
115
116                         /* increase bounds of bins for odd primitive */
117                         int b10 = (int)extract<0>(bin1); bin_count[b10][0]++; bin_bounds[b10][0].grow(bounds1);
118                         int b11 = (int)extract<1>(bin1); bin_count[b11][1]++; bin_bounds[b11][1].grow(bounds1);
119                         int b12 = (int)extract<2>(bin1); bin_count[b12][2]++; bin_bounds[b12][2].grow(bounds1);
120                 }
121
122                 /* for uneven number of primitives */
123                 if(i < ssize_t(size())) {
124                         /* map primitive to bin */
125                         const BVHReference& prim0 = prims[start() + i];
126                         BoundBox bounds0 = get_prim_bounds(prim0);
127                         int4 bin0 = get_bin(bounds0);
128
129                         /* increase bounds of bins */
130                         int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(bounds0);
131                         int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(bounds0);
132                         int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(bounds0);
133                 }
134         }
135
136         /* sweep from right to left and compute parallel prefix of merged bounds */
137         float4 r_area[MAX_BINS];        /* area of bounds of primitives on the right */
138         float4 r_count[MAX_BINS];       /* number of primitives on the right */
139         int4 count = make_int4(0);
140
141         BoundBox bx = BoundBox::empty;
142         BoundBox by = BoundBox::empty;
143         BoundBox bz = BoundBox::empty;
144
145         for(size_t i = num_bins - 1; i > 0; i--) {
146                 count = count + bin_count[i];
147                 r_count[i] = blocks(count);
148
149                 bx = merge(bx,bin_bounds[i][0]); r_area[i][0] = bx.half_area();
150                 by = merge(by,bin_bounds[i][1]); r_area[i][1] = by.half_area();
151                 bz = merge(bz,bin_bounds[i][2]); r_area[i][2] = bz.half_area();
152                 r_area[i][3] = r_area[i][2];
153         }
154
155         /* sweep from left to right and compute SAH */
156         int4 ii = make_int4(1);
157         float4 bestSAH = make_float4(FLT_MAX);
158         int4 bestSplit = make_int4(-1);
159
160         count = make_int4(0);
161
162         bx = BoundBox::empty;
163         by = BoundBox::empty;
164         bz = BoundBox::empty;
165
166         for(size_t i = 1; i < num_bins; i++, ii += make_int4(1)) {
167                 count = count + bin_count[i-1];
168
169                 bx = merge(bx,bin_bounds[i-1][0]); float Ax = bx.half_area();
170                 by = merge(by,bin_bounds[i-1][1]); float Ay = by.half_area();
171                 bz = merge(bz,bin_bounds[i-1][2]); float Az = bz.half_area();
172
173                 float4 lCount = blocks(count);
174                 float4 lArea = make_float4(Ax,Ay,Az,Az);
175                 float4 sah = lArea*lCount + r_area[i]*r_count[i];
176
177                 bestSplit = select(sah < bestSAH,ii,bestSplit);
178                 bestSAH = min(sah,bestSAH);
179         }
180
181         int4 mask = float3_to_float4(cent_bounds_.size()) <= make_float4(0.0f);
182         bestSAH = insert<3>(select(mask, make_float4(FLT_MAX), bestSAH), FLT_MAX);
183
184         /* find best dimension */
185         dim = get_best_dimension(bestSAH);
186         splitSAH = bestSAH[dim];
187         pos = bestSplit[dim];
188         leafSAH = bounds_.half_area() * blocks(size());
189 }
190
191 void BVHObjectBinning::split(BVHReference* prims,
192                              BVHObjectBinning& left_o,
193                              BVHObjectBinning& right_o) const
194 {
195         size_t N = size();
196
197         BoundBox lgeom_bounds = BoundBox::empty;
198         BoundBox rgeom_bounds = BoundBox::empty;
199         BoundBox lcent_bounds = BoundBox::empty;
200         BoundBox rcent_bounds = BoundBox::empty;
201
202         ssize_t l = 0, r = N-1;
203
204         while(l <= r) {
205                 prefetch_L2(&prims[start() + l + 8]);
206                 prefetch_L2(&prims[start() + r - 8]);
207
208                 BVHReference prim = prims[start() + l];
209                 BoundBox unaligned_bounds = get_prim_bounds(prim);
210                 float3 unaligned_center = unaligned_bounds.center2();
211                 float3 center = prim.bounds().center2();
212
213                 if(get_bin(unaligned_center)[dim] < pos) {
214                         lgeom_bounds.grow(prim.bounds());
215                         lcent_bounds.grow(center);
216                         l++;
217                 }
218                 else {
219                         rgeom_bounds.grow(prim.bounds());
220                         rcent_bounds.grow(center);
221                         swap(prims[start()+l],prims[start()+r]);
222                         r--;
223                 }
224         }
225         /* finish */
226         if(l != 0 && N-1-r != 0) {
227                 right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N-1-r), prims);
228                 left_o  = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), l), prims);
229                 return;
230         }
231
232         /* object medium split if we did not make progress, can happen when all
233          * primitives have same centroid */
234         lgeom_bounds = BoundBox::empty;
235         rgeom_bounds = BoundBox::empty;
236         lcent_bounds = BoundBox::empty;
237         rcent_bounds = BoundBox::empty;
238
239         for(size_t i = 0; i < N/2; i++) {
240                 lgeom_bounds.grow(prims[start()+i].bounds());
241                 lcent_bounds.grow(prims[start()+i].bounds().center2());
242         }
243
244         for(size_t i = N/2; i < N; i++) {
245                 rgeom_bounds.grow(prims[start()+i].bounds());
246                 rcent_bounds.grow(prims[start()+i].bounds().center2());
247         }
248
249         right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N/2, N/2 + N%2), prims);
250         left_o  = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), N/2), prims);
251 }
252
253 CCL_NAMESPACE_END
254