9d9711eee56929614aa61b462009faf3b8073beb
[blender-staging.git] / source / blender / render / intern / raytrace / reorganize.h
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version. 
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2009 Blender Foundation.
19  * All rights reserved.
20  *
21  * The Original Code is: all of this file.
22  *
23  * Contributor(s): AndrĂ© Pinto.
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 /** \file blender/render/intern/raytrace/reorganize.h
29  *  \ingroup render
30  */
31
32
33 #include <float.h>
34 #include <math.h>
35 #include <stdio.h>
36
37 #include <algorithm>
38 #include <queue>
39 #include <vector>
40
41 #include "BKE_global.h"
42
43 #ifdef _WIN32
44 #  ifdef INFINITY
45 #    undef INFINITY
46 #  endif
47 #  define INFINITY FLT_MAX // in mingw math.h: (1.0F/0.0F). This generates compile error, though.
48 #endif
49
50 extern int tot_pushup;
51 extern int tot_pushdown;
52
53 #if !defined(INFINITY) && defined(HUGE_VAL)
54 #define INFINITY HUGE_VAL
55 #endif
56
57 template<class Node>
58 bool node_fits_inside(Node *a, Node *b)
59 {
60         return bb_fits_inside(b->bb, b->bb + 3, a->bb, a->bb + 3);
61 }
62
63 template<class Node>
64 void reorganize_find_fittest_parent(Node *tree, Node *node, std::pair<float, Node *> &cost)
65 {
66         std::queue<Node *> q;
67         q.push(tree);
68         
69         while (!q.empty()) {
70                 Node *parent = q.front();
71                 q.pop();
72                 
73                 if (parent == node) continue;
74                 if (node_fits_inside(node, parent) && RE_rayobject_isAligned(parent->child) ) {
75                         float pcost = bb_area(parent->bb, parent->bb + 3);
76                         cost = std::min(cost, std::make_pair(pcost, parent) );
77                         for (Node *child = parent->child; child; child = child->sibling)
78                                 q.push(child);
79                 }
80         }
81 }
82
83 static int tot_moves = 0;
84 template<class Node>
85 void reorganize(Node *root)
86 {
87         std::queue<Node *> q;
88
89         q.push(root);
90         while (!q.empty()) {
91                 Node *node = q.front();
92                 q.pop();
93                 
94                 if (RE_rayobject_isAligned(node->child)) {
95                         for (Node **prev = &node->child; *prev; ) {
96                                 assert(RE_rayobject_isAligned(*prev));
97                                 q.push(*prev);
98
99                                 std::pair<float, Node *> best(FLT_MAX, root);
100                                 reorganize_find_fittest_parent(root, *prev, best);
101
102                                 if (best.second == node) {
103                                         //Already inside the fitnest BB
104                                         prev = &(*prev)->sibling;
105                                 }
106                                 else {
107                                         Node *tmp = *prev;
108                                         *prev = (*prev)->sibling;
109                                         
110                                         tmp->sibling =  best.second->child;
111                                         best.second->child = tmp;
112                                         
113                                         tot_moves++;
114                                 }
115                         
116                         
117                         }
118                 }
119                 if (node != root) {
120                 }
121         }
122 }
123
124 /*
125  * Prunes useless nodes from trees:
126  *  erases nodes with total amount of primitives = 0
127  *  prunes nodes with only one child (except if that child is a primitive)
128  */
129 template<class Node>
130 void remove_useless(Node *node, Node **new_node)
131 {
132         if (RE_rayobject_isAligned(node->child) ) {
133
134                 for (Node **prev = &node->child; *prev; ) {
135                         Node *next = (*prev)->sibling;
136                         remove_useless(*prev, prev);
137                         if (*prev == NULL)
138                                 *prev = next;
139                         else {
140                                 (*prev)->sibling = next;
141                                 prev = &((*prev)->sibling);
142                         }
143                 }
144         }
145         if (node->child) {
146                 if (RE_rayobject_isAligned(node->child) && node->child->sibling == 0)
147                         *new_node = node->child;
148         }
149         else if (node->child == NULL) {
150                 *new_node = NULL;
151         }
152 }
153
154 /*
155  * Minimizes expected number of BBtest by colapsing nodes
156  * it uses surface area heuristic for determining whether a node should be colapsed
157  */
158 template<class Node>
159 void pushup(Node *parent)
160 {
161         if (is_leaf(parent)) return;
162         
163         float p_area = bb_area(parent->bb, parent->bb + 3);
164         Node **prev = &parent->child;
165         for (Node *child = parent->child; RE_rayobject_isAligned(child) && child; ) {
166                 const float c_area = bb_area(child->bb, child->bb + 3);
167                 const int nchilds = count_childs(child);
168                 float original_cost = ((p_area != 0.0f) ? (c_area / p_area) * nchilds : 1.0f) + 1;
169                 float flatten_cost = nchilds;
170                 if (flatten_cost < original_cost && nchilds >= 2) {
171                         append_sibling(child, child->child);
172                         child = child->sibling;
173                         *prev = child;
174
175 //                      *prev = child->child;
176 //                      append_sibling( *prev, child->sibling );
177 //                      child = *prev;
178                         tot_pushup++;
179                 }
180                 else {
181                         *prev = child;
182                         prev = &(*prev)->sibling;
183                         child = *prev;
184                 }
185         }
186         
187         for (Node *child = parent->child; RE_rayobject_isAligned(child) && child; child = child->sibling)
188                 pushup(child);
189 }
190
191 /*
192  * try to optimize number of childs to be a multiple of SSize
193  */
194 template<class Node, int SSize>
195 void pushup_simd(Node *parent)
196 {
197         if (is_leaf(parent)) return;
198         
199         int n = count_childs(parent);
200                 
201         Node **prev = &parent->child;
202         for (Node *child = parent->child; RE_rayobject_isAligned(child) && child; ) {
203                 int cn = count_childs(child);
204                 if (cn - 1 <= (SSize - (n % SSize) ) % SSize && RE_rayobject_isAligned(child->child) ) {
205                         n += (cn - 1);
206                         append_sibling(child, child->child);
207                         child = child->sibling;
208                         *prev = child;
209                 }
210                 else {
211                         *prev = child;
212                         prev = &(*prev)->sibling;
213                         child = *prev;
214                 }
215         }
216                 
217         for (Node *child = parent->child; RE_rayobject_isAligned(child) && child; child = child->sibling)
218                 pushup_simd<Node, SSize>(child);
219 }
220
221
222 /*
223  * Pushdown
224  *      makes sure no child fits inside any of its sibling
225  */
226 template<class Node>
227 void pushdown(Node *parent)
228 {
229         Node **s_child = &parent->child;
230         Node *child = parent->child;
231         
232         while (child && RE_rayobject_isAligned(child)) {
233                 Node *next = child->sibling;
234                 Node **next_s_child = &child->sibling;
235                 
236                 //assert(bb_fits_inside(parent->bb, parent->bb+3, child->bb, child->bb+3));
237                 
238                 for (Node *i = parent->child; RE_rayobject_isAligned(i) && i; i = i->sibling)
239                         if (child != i && bb_fits_inside(i->bb, i->bb + 3, child->bb, child->bb + 3) && RE_rayobject_isAligned(i->child)) {
240 //                      todo optimize (should the one with the smallest area?)
241 //                      float ia = bb_area(i->bb, i->bb+3)
242 //                      if (child->i)
243                                 *s_child = child->sibling;
244                                 child->sibling = i->child;
245                                 i->child = child;
246                                 next_s_child = s_child;
247                         
248                                 tot_pushdown++;
249                                 break;
250                         }
251                 child = next;
252                 s_child = next_s_child;
253         }
254         
255         for (Node *i = parent->child; RE_rayobject_isAligned(i) && i; i = i->sibling) {
256                 pushdown(i);
257         }
258 }
259
260
261 /*
262  * BVH refit
263  * readjust nodes BB (useful if nodes childs where modified)
264  */
265 template<class Node>
266 float bvh_refit(Node *node)
267 {
268         if (is_leaf(node)) return 0;
269         if (is_leaf(node->child)) return 0;
270         
271         float total = 0;
272         
273         for (Node *child = node->child; child; child = child->sibling)
274                 total += bvh_refit(child);
275                 
276         float old_area = bb_area(node->bb, node->bb + 3);
277         INIT_MINMAX(node->bb, node->bb + 3);
278         for (Node *child = node->child; child; child = child->sibling) {
279                 DO_MIN(child->bb, node->bb);
280                 DO_MAX(child->bb + 3, node->bb + 3);
281         }
282         total += old_area - bb_area(node->bb, node->bb + 3);
283         return total;
284 }
285
286
287 /*
288  * this finds the best way to packing a tree according to a given test cost function
289  * with the purpose to reduce the expected cost (eg.: number of BB tests).
290  */
291 #include <vector>
292 #define MAX_CUT_SIZE         4               /* svbvh assumes max 4 children! */
293 #define MAX_OPTIMIZE_CHILDS  MAX_CUT_SIZE
294
295 struct OVBVHNode {
296         float bb[6];
297
298         OVBVHNode *child;
299         OVBVHNode *sibling;
300         
301         /*
302          * Returns min cost to represent the subtree starting at the given node,
303          * allowing it to have a given cutsize
304          */
305         float cut_cost[MAX_CUT_SIZE];
306         float get_cost(int cutsize)
307         {
308                 return cut_cost[cutsize - 1];
309         }
310         
311         /*
312          * This saves the cut size of this child, when parent is reaching
313          * its minimum cut with the given cut size
314          */
315         int cut_size[MAX_CUT_SIZE];
316         int get_cut_size(int parent_cut_size)
317         {
318                 return cut_size[parent_cut_size - 1];
319         }
320         
321         /*
322          * Reorganize the node based on calculated cut costs
323          */
324         int best_cutsize;
325         void set_cut(int cutsize, OVBVHNode ***cut)
326         {
327                 if (cutsize == 1) {
328                         **cut = this;
329                         *cut = &(**cut)->sibling;
330                 }
331                 else {
332                         if (cutsize > MAX_CUT_SIZE) {
333                                 for (OVBVHNode *child = this->child; child && RE_rayobject_isAligned(child); child = child->sibling) {
334                                         child->set_cut(1, cut);
335                                         cutsize--;
336                                 }
337                                 assert(cutsize == 0);
338                         }
339                         else {
340                                 for (OVBVHNode *child = this->child; child && RE_rayobject_isAligned(child); child = child->sibling) {
341                                         child->set_cut(child->get_cut_size(cutsize), cut);
342                                 }
343                         }
344                 }
345         }
346
347         void optimize()
348         {
349                 if (RE_rayobject_isAligned(this->child)) {
350                         //Calc new childs
351                         {
352                                 OVBVHNode **cut = &(this->child);
353                                 set_cut(best_cutsize, &cut);
354                                 *cut = NULL;
355                         }
356
357                         //Optimize new childs
358                         for (OVBVHNode *child = this->child; child && RE_rayobject_isAligned(child); child = child->sibling)
359                                 child->optimize();
360                 }
361         }
362 };
363
364 /*
365  * Calculates an optimal SIMD packing
366  *
367  */
368 template<class Node, class TestCost>
369 struct VBVH_optimalPackSIMD {
370         TestCost testcost;
371         
372         VBVH_optimalPackSIMD(TestCost testcost)
373         {
374                 this->testcost = testcost;
375         }
376         
377         /*
378          * calc best cut on a node
379          */
380         struct calc_best {
381                 Node *child[MAX_OPTIMIZE_CHILDS];
382                 float child_hit_prob[MAX_OPTIMIZE_CHILDS];
383                 
384                 calc_best(Node *node)
385                 {
386                         int nchilds = 0;
387                         //Fetch childs and needed data
388                         {
389                                 float parent_area = bb_area(node->bb, node->bb + 3);
390                                 for (Node *child = node->child; child && RE_rayobject_isAligned(child); child = child->sibling) {
391                                         this->child[nchilds] = child;
392                                         this->child_hit_prob[nchilds] = (parent_area != 0.0f) ? bb_area(child->bb, child->bb + 3) / parent_area : 1.0f;
393                                         nchilds++;
394                                 }
395
396                                 assert(nchilds >= 2 && nchilds <= MAX_OPTIMIZE_CHILDS);
397                         }
398                         
399                         
400                         //Build DP table to find minimum cost to represent this node with a given cutsize
401                         int     bt[MAX_OPTIMIZE_CHILDS + 1][MAX_CUT_SIZE + 1];     //backtrace table
402                         float cost[MAX_OPTIMIZE_CHILDS + 1][MAX_CUT_SIZE + 1]; //cost table (can be reduced to float[2][MAX_CUT_COST])
403                         
404                         for (int i = 0; i <= nchilds; i++) {
405                                 for (int j = 0; j <= MAX_CUT_SIZE; j++) {
406                                         cost[i][j] = INFINITY;
407                                 }
408                         }
409
410                         cost[0][0] = 0;
411                         
412                         for (int i = 1; i <= nchilds; i++) {
413                                 for (int size = i - 1; size /*+(nchilds-i)*/ <= MAX_CUT_SIZE; size++) {
414                                         for (int cut = 1; cut + size /*+(nchilds-i)*/ <= MAX_CUT_SIZE; cut++) {
415                                                 float new_cost = cost[i - 1][size] + child_hit_prob[i - 1] * child[i - 1]->get_cost(cut);
416                                                 if (new_cost < cost[i][size + cut]) {
417                                                         cost[i][size + cut] = new_cost;
418                                                         bt[i][size + cut] = cut;
419                                                 }
420                                         }
421                                 }
422                         }
423                         
424                         /* Save the ways to archive the minimum cost with a given cutsize */
425                         for (int i = nchilds; i <= MAX_CUT_SIZE; i++) {
426                                 node->cut_cost[i - 1] = cost[nchilds][i];
427                                 if (cost[nchilds][i] < INFINITY) {
428                                         int current_size = i;
429                                         for (int j = nchilds; j > 0; j--) {
430                                                 child[j - 1]->cut_size[i - 1] = bt[j][current_size];
431                                                 current_size -= bt[j][current_size];
432                                         }
433                                 }
434                         }
435                 }
436         };
437         
438         void calc_costs(Node *node)
439         {
440                 
441                 if (RE_rayobject_isAligned(node->child) ) {
442                         int nchilds = 0;
443                         for (Node *child = node->child; child && RE_rayobject_isAligned(child); child = child->sibling) {
444                                 calc_costs(child);
445                                 nchilds++;
446                         }
447
448                         for (int i = 0; i < MAX_CUT_SIZE; i++)
449                                 node->cut_cost[i] = INFINITY;
450
451                         //We are not allowed to look on nodes with with so many childs
452                         if (nchilds > MAX_CUT_SIZE) {
453                                 float cost = 0;
454
455                                 float parent_area = bb_area(node->bb, node->bb + 3);
456                                 for (Node *child = node->child; child && RE_rayobject_isAligned(child); child = child->sibling) {
457                                         cost += ((parent_area != 0.0f) ? (bb_area(child->bb, child->bb + 3) / parent_area) : 1.0f) * child->get_cost(1);
458                                 }
459                                 
460                                 cost += testcost(nchilds);
461                                 node->cut_cost[0] = cost;
462                                 node->best_cutsize = nchilds;
463                         }
464                         else {
465                                 calc_best calc(node);
466                 
467                                 //calc expected cost if we optimaly pack this node
468                                 for (int cutsize = nchilds; cutsize <= MAX_CUT_SIZE; cutsize++) {
469                                         float m = node->get_cost(cutsize) + testcost(cutsize);
470                                         if (m < node->cut_cost[0]) {
471                                                 node->cut_cost[0] = m;
472                                                 node->best_cutsize = cutsize;
473                                         }
474                                 }
475                         }
476                         assert(node->cut_cost[0] != INFINITY);
477                 }
478                 else {
479                         node->cut_cost[0] = 1.0f;
480                         for (int i = 1; i < MAX_CUT_SIZE; i++)
481                                 node->cut_cost[i] = INFINITY;
482                 }
483         }
484
485         Node *transform(Node *node)
486         {
487                 if (RE_rayobject_isAligned(node->child)) {
488                         static int num = 0;
489                         bool first = false;
490                         if (num == 0) { num++; first = true; }
491                         
492                         calc_costs(node);
493                         if ((G.debug & G_DEBUG) && first) printf("expected cost = %f (%d)\n", node->cut_cost[0], node->best_cutsize);
494                         node->optimize();
495                 }
496                 return node;
497         }
498 };