Bugfix for approximate AO to deal better with degenerate geometry.
[blender.git] / source / blender / render / intern / source / occlusion.c
1 /* 
2  * $Id$
3  *
4  * ***** BEGIN GPL LICENSE BLOCK *****
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software Foundation,
18  * Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19  *
20  * The Original Code is Copyright (C) 2008 Blender Foundation.
21  * All rights reserved.
22  *
23  * The Original Code is: all of this file.
24  *
25  * Contributor(s): Brecht Van Lommel.
26  *
27  * ***** END GPL LICENSE BLOCK *****
28  */
29
30 #include <math.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34
35 #include "MEM_guardedalloc.h"
36
37 #include "DNA_material_types.h"
38 #include "DNA_meshdata_types.h"
39
40 #include "BLI_arithb.h"
41 #include "BLI_blenlib.h"
42 #include "BLI_memarena.h"
43 #include "BLI_threads.h"
44
45 #include "BKE_DerivedMesh.h"
46 #include "BKE_global.h"
47 #include "BKE_utildefines.h"
48
49 #include "RE_shader_ext.h"
50
51 /* local includes */
52 #include "occlusion.h"
53 #include "render_types.h"
54 #include "rendercore.h"
55 #include "renderdatabase.h"
56 #include "pixelshading.h"
57 #include "shading.h"
58 #include "zbuf.h"
59
60 /* ------------------------- Declarations --------------------------- */
61
62 #define INVALID_INDEX ((int)(~0))
63 #define INVPI 0.31830988618379069f
64 #define TOTCHILD 8
65 #define CACHE_STEP 3
66
67 typedef struct OcclusionCacheSample {
68         float co[3], n[3], col[3], intensity, dist2;
69         int x, y, filled;
70 } OcclusionCacheSample;
71
72 typedef struct OcclusionCache {
73         OcclusionCacheSample *sample;
74         int x, y, w, h, step;
75 } OcclusionCache;
76
77 typedef struct OcclusionCacheMesh {
78         struct OcclusionCacheMesh *next, *prev;
79         ObjectRen *obr;
80         int (*face)[4];
81         float (*co)[3];
82         float (*col)[3];
83         int totvert, totface;
84 } OcclusionCacheMesh;
85
86 typedef struct OccFace {
87         int obi;
88         int facenr;
89 } OccFace;
90
91 typedef struct OccNode {
92         float co[3], area;
93         float sh[9], dco;
94         float occlusion;
95         int childflag;
96         union {
97                 //OccFace face;
98                 int face;
99                 struct OccNode *node;
100         } child[TOTCHILD];
101 } OccNode;
102
103 typedef struct OcclusionTree {
104         MemArena *arena;
105
106         float (*co)[3];         /* temporary during build */
107
108         OccFace *face;          /* instance and face indices */
109         float *occlusion;       /* occlusion for faces */
110         
111         OccNode *root;
112
113         OccNode **stack[BLENDER_MAX_THREADS];
114         int maxdepth;
115
116         int totface;
117
118         float error;
119         float distfac;
120
121         OcclusionCache *cache;
122 } OcclusionTree;
123
124 /* ------------------------- Shading --------------------------- */
125
126 extern Render R; // meh
127
128 #if 0
129 static void occ_shade(ShadeSample *ssamp, ObjectInstanceRen *obi, VlakRen *vlr, float *rad)
130 {
131         ShadeInput *shi= ssamp->shi;
132         ShadeResult *shr= ssamp->shr;
133         float l, u, v, *v1, *v2, *v3;
134         
135         /* init */
136         if(vlr->v4) {
137                 shi->u= u= 0.5f;
138                 shi->v= v= 0.5f;
139         }
140         else {
141                 shi->u= u= 1.0f/3.0f;
142                 shi->v= v= 1.0f/3.0f;
143         }
144
145         /* setup render coordinates */
146         v1= vlr->v1->co;
147         v2= vlr->v2->co;
148         v3= vlr->v3->co;
149         
150         /* renderco */
151         l= 1.0f-u-v;
152         
153         shi->co[0]= l*v3[0]+u*v1[0]+v*v2[0];
154         shi->co[1]= l*v3[1]+u*v1[1]+v*v2[1];
155         shi->co[2]= l*v3[2]+u*v1[2]+v*v2[2];
156         
157         shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
158
159         /* set up view vector */
160         VECCOPY(shi->view, shi->co);
161         Normalize(shi->view);
162         
163         /* cache for shadow */
164         shi->samplenr++;
165         
166         shi->xs= 0; // TODO
167         shi->ys= 0;
168         
169         shade_input_set_normals(shi);
170
171         /* no normal flip */
172         if(shi->flippednor)
173                 shade_input_flip_normals(shi);
174
175         /* not a pretty solution, but fixes common cases */
176         if(shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) {
177                 VecMulf(shi->vn, -1.0f);
178                 VecMulf(shi->vno, -1.0f);
179         }
180
181         /* init material vars */
182         // note, keep this synced with render_types.h
183         memcpy(&shi->r, &shi->mat->r, 23*sizeof(float));
184         shi->har= shi->mat->har;
185         
186         /* render */
187         shade_input_set_shade_texco(shi);
188         shade_material_loop(shi, shr); /* todo: nodes */
189         
190         VECCOPY(rad, shr->combined);
191 }
192
193 static void occ_build_shade(Render *re, OcclusionTree *tree)
194 {
195         ShadeSample ssamp;
196         ObjectInstanceRen *obi;
197         VlakRen *vlr;
198         int a;
199
200         R= *re;
201
202         /* setup shade sample with correct passes */
203         memset(&ssamp, 0, sizeof(ShadeSample));
204         ssamp.shi[0].lay= re->scene->lay;
205         ssamp.shi[0].passflag= SCE_PASS_DIFFUSE|SCE_PASS_RGBA;
206         ssamp.shi[0].combinedflag= ~(SCE_PASS_SPEC);
207         ssamp.tot= 1;
208
209         for(a=0; a<tree->totface; a++) {
210                 obi= &R.objectinstance[tree->face[a].obi];
211                 vlr= RE_findOrAddVlak(obi->obr, tree->face[a].vlr);
212
213                 occ_shade(&ssamp, obi, vlr, tree->rad[a]);
214         }
215 }
216 #endif
217
218 /* ------------------------- Spherical Harmonics --------------------------- */
219
220 /* Use 2nd order SH => 9 coefficients, stored in this order:
221    0 = (0,0),
222    1 = (1,-1), 2 = (1,0), 3 = (1,1),
223    4 = (2,-2), 5 = (2,-1), 6 = (2,0), 7 = (2,1), 8 = (2,2) */
224
225 static void sh_copy(float *shresult, float *sh)
226 {
227         memcpy(shresult, sh, sizeof(float)*9);
228 }
229
230 static void sh_mul(float *sh, float f)
231 {
232         int i;
233
234         for(i=0; i<9; i++)
235                 sh[i] *= f;
236 }
237
238 static void sh_add(float *shresult, float *sh1, float *sh2)
239 {
240         int i;
241
242         for(i=0; i<9; i++)
243                 shresult[i]= sh1[i] + sh2[i];
244 }
245
246 static void sh_from_disc(float *n, float area, float *shresult)
247 {
248         /* See formula (3) in:
249            "An Efficient Representation for Irradiance Environment Maps" */
250         float sh[9], x, y, z;
251
252         x= n[0];
253         y= n[1];
254         z= n[2];
255
256         sh[0]= 0.282095f;
257
258         sh[1]= 0.488603f*y;
259         sh[2]= 0.488603f*z;
260         sh[3]= 0.488603f*x;
261         
262         sh[4]= 1.092548f*x*y;
263         sh[5]= 1.092548f*y*z;
264         sh[6]= 0.315392f*(3.0f*z*z - 1.0f);
265         sh[7]= 1.092548f*x*z;
266         sh[8]= 0.546274f*(x*x - y*y);
267
268         sh_mul(sh, area);
269         sh_copy(shresult, sh);
270 }
271
272 static float sh_eval(float *sh, float *v)
273 {
274         /* See formula (13) in:
275            "An Efficient Representation for Irradiance Environment Maps" */
276         static const float c1 = 0.429043f, c2 = 0.511664f, c3 = 0.743125f;
277         static const float c4 = 0.886227f, c5 = 0.247708f;
278         float x, y, z, sum;
279
280         x= v[0];
281         y= v[1];
282         z= v[2];
283
284         sum= c1*sh[8]*(x*x - y*y);
285         sum += c3*sh[6]*z*z;
286         sum += c4*sh[0];
287         sum += -c5*sh[6];
288         sum += 2.0f*c1*(sh[4]*x*y + sh[7]*x*z + sh[5]*y*z);
289         sum += 2.0f*c2*(sh[3]*x + sh[1]*y + sh[2]*z);
290
291         return sum;
292 }
293
294 /* ------------------------------ Building --------------------------------- */
295
296 static void occ_face(const OccFace *face, float *co, float *normal, float *area)
297 {
298         ObjectInstanceRen *obi;
299         VlakRen *vlr;
300         float v1[3], v2[3], v3[3], v4[3];
301
302         obi= &R.objectinstance[face->obi];
303         vlr= RE_findOrAddVlak(obi->obr, face->facenr);
304         
305         if(co) {
306                 if(vlr->v4)
307                         VecLerpf(co, vlr->v1->co, vlr->v3->co, 0.5f);
308                 else
309                         CalcCent3f(co, vlr->v1->co, vlr->v2->co, vlr->v3->co);
310
311                 if(obi->flag & R_TRANSFORMED)
312                         Mat4MulVecfl(obi->mat, co);
313         }
314         
315         if(normal) {
316                 normal[0]= -vlr->n[0];
317                 normal[1]= -vlr->n[1];
318                 normal[2]= -vlr->n[2];
319
320                 if(obi->flag & R_TRANSFORMED) {
321                         float xn, yn, zn, (*imat)[3]= obi->imat;
322
323                         xn= normal[0];
324                         yn= normal[1];
325                         zn= normal[2];
326
327                         normal[0]= imat[0][0]*xn + imat[0][1]*yn + imat[0][2]*zn;
328                         normal[1]= imat[1][0]*xn + imat[1][1]*yn + imat[1][2]*zn;
329                         normal[2]= imat[2][0]*xn + imat[2][1]*yn + imat[2][2]*zn;
330                 }
331         }
332
333         if(area) {
334                 VECCOPY(v1, vlr->v1->co);
335                 VECCOPY(v2, vlr->v2->co);
336                 VECCOPY(v3, vlr->v3->co);
337                 if(vlr->v4) VECCOPY(v4, vlr->v4->co);
338
339                 if(obi->flag & R_TRANSFORMED) {
340                         Mat4MulVecfl(obi->mat, v1);
341                         Mat4MulVecfl(obi->mat, v2);
342                         Mat4MulVecfl(obi->mat, v3);
343                         if(vlr->v4) Mat4MulVecfl(obi->mat, v4);
344                 }
345
346                 /* todo: correct area for instances */
347                 if(vlr->v4)
348                         *area= AreaQ3Dfl(v1, v2, v3, v4);
349                 else
350                         *area= AreaT3Dfl(v1, v2, v3);
351         }
352 }
353
354 static void occ_sum_occlusion(OcclusionTree *tree, OccNode *node)
355 {
356         OccNode *child;
357         float occ, area, totarea;
358         int a, b;
359
360         occ= 0.0f;
361         totarea= 0.0f;
362
363         for(b=0; b<TOTCHILD; b++) {
364                 if(node->childflag & (1<<b)) {
365                         a= node->child[b].face;
366                         occ_face(&tree->face[a], 0, 0, &area);
367                         occ += area*tree->occlusion[a];
368                         totarea += area;
369                 }
370                 else if(node->child[b].node) {
371                         child= node->child[b].node;
372                         occ_sum_occlusion(tree, child);
373
374                         occ += child->area*child->occlusion;
375                         totarea += child->area;
376                 }
377         }
378
379         if(totarea != 0.0f)
380                 occ /= totarea;
381         
382         node->occlusion= occ;
383 }
384
385 static int occ_find_bbox_axis(OcclusionTree *tree, int begin, int end, float *min, float *max)
386 {
387         float len, maxlen= -1.0f;
388         int a, axis = 0;
389
390         INIT_MINMAX(min, max);
391
392         for(a=begin; a<end; a++)
393                 DO_MINMAX(tree->co[a], min, max)
394
395         for(a=0; a<3; a++) {
396                 len= max[a] - min[a];
397
398                 if(len > maxlen) {
399                         maxlen= len;
400                         axis= a;
401                 }
402         }
403
404         return axis;
405 }
406
407 void occ_node_from_face(OccFace *face, OccNode *node)
408 {
409         float n[3];
410
411         occ_face(face, node->co, n, &node->area);
412         node->dco= 0.0f;
413         sh_from_disc(n, node->area, node->sh);
414 }
415
416 static void occ_build_dco(OcclusionTree *tree, OccNode *node, float *co, float *dco)
417 {
418         OccNode *child;
419         float dist, d[3], nco[3];
420         int b;
421
422         for(b=0; b<TOTCHILD; b++) {
423                 if(node->childflag & (1<<b)) {
424                         occ_face(tree->face+node->child[b].face, nco, 0, 0);
425                 }
426                 else if(node->child[b].node) {
427                         child= node->child[b].node;
428                         occ_build_dco(tree, child, co, dco);
429                         VECCOPY(nco, child->co);
430                 }
431
432                 VECSUB(d, nco, co);
433                 dist= INPR(d, d);
434                 if(dist > *dco)
435                         *dco= dist;
436         }
437 }
438
439 static void occ_build_split(OcclusionTree *tree, int begin, int end, int *split)
440 {
441         float min[3], max[3], mid;
442         int axis, a, enda;
443
444         /* split in middle of boundbox. this seems faster than median split
445          * on complex scenes, possibly since it avoids two distant faces to
446          * be in the same node better? */
447         axis= occ_find_bbox_axis(tree, begin, end, min, max);
448         mid= 0.5f*(min[axis]+max[axis]);
449
450         a= begin;
451         enda= end;
452         while(a<enda) {
453                 if(tree->co[a][axis] > mid) {
454                         enda--;
455                         SWAP(OccFace, tree->face[a], tree->face[enda]);
456                         SWAP(float, tree->co[a][0], tree->co[enda][0]);
457                         SWAP(float, tree->co[a][1], tree->co[enda][1]);
458                         SWAP(float, tree->co[a][2], tree->co[enda][2]);
459                 }
460                 else
461                         a++;
462         }
463
464         *split= enda;
465 }
466
467 static void occ_build_8_split(OcclusionTree *tree, int begin, int end, int *offset, int *count)
468 {
469         /* split faces into eight groups */
470         int b, splitx, splity[2], splitz[4];
471
472         occ_build_split(tree, begin, end, &splitx);
473
474         /* force split if none found, to deal with degenerate geometry */
475         if(splitx == begin || splitx == end)
476                 splitx= (begin+end)/2;
477
478         occ_build_split(tree, begin, splitx, &splity[0]);
479         occ_build_split(tree, splitx, end, &splity[1]);
480
481         occ_build_split(tree, begin, splity[0], &splitz[0]);
482         occ_build_split(tree, splity[0], splitx, &splitz[1]);
483         occ_build_split(tree, splitx, splity[1], &splitz[2]);
484         occ_build_split(tree, splity[1], end, &splitz[3]);
485
486         offset[0]= begin;
487         offset[1]= splitz[0];
488         offset[2]= splity[0];
489         offset[3]= splitz[1];
490         offset[4]= splitx;
491         offset[5]= splitz[2];
492         offset[6]= splity[1];
493         offset[7]= splitz[3];
494
495         for(b=0; b<7; b++)
496                 count[b]= offset[b+1] - offset[b];
497         count[7]= end - offset[7];
498 }
499
500 static OccNode *occ_build_recursive(OcclusionTree *tree, int begin, int end, int depth)
501 {
502         OccNode *node, *child, tmpnode;
503         OccFace *face;
504         int a, b, offset[TOTCHILD], count[TOTCHILD];
505
506         /* keep track of maximum depth for stack */
507         if(depth > tree->maxdepth)
508                 tree->maxdepth= depth;
509
510         /* add a new node */
511         node= BLI_memarena_alloc(tree->arena, sizeof(OccNode));
512         node->occlusion= 1.0f;
513
514         /* leaf node with only children */
515         if(end - begin <= TOTCHILD) {
516                 for(a=begin, b=0; a<end; a++, b++) {
517                         face= &tree->face[a];
518                         node->child[b].face= a;
519                         node->childflag |= (1<<b);
520                 }
521         }
522         else {
523                 /* order faces */
524                 occ_build_8_split(tree, begin, end, offset, count);
525
526                 for(b=0; b<TOTCHILD; b++) {
527                         if(count[b] == 0) {
528                                 node->child[b].node= NULL;
529                         }
530                         else if(count[b] == 1) {
531                                 face= &tree->face[offset[b]];
532                                 node->child[b].face= offset[b];
533                                 node->childflag |= (1<<b);
534                         }
535                         else {
536                                 child= occ_build_recursive(tree, offset[b], offset[b]+count[b], depth+1);
537                                 node->child[b].node= child;
538                         }
539                 }
540         }
541
542         /* combine area, position and sh */
543         for(b=0; b<TOTCHILD; b++) {
544                 if(node->childflag & (1<<b)) {
545                         child= &tmpnode;
546                         occ_node_from_face(tree->face+node->child[b].face, &tmpnode);
547                 }
548                 else {
549                         child= node->child[b].node;
550                 }
551
552                 if(child) {
553                         node->area += child->area;
554                         sh_add(node->sh, node->sh, child->sh);
555                         VECADDFAC(node->co, node->co, child->co, child->area);
556                 }
557         }
558
559         if(node->area != 0.0f)
560                 VecMulf(node->co, 1.0f/node->area);
561
562         /* compute maximum distance from center */
563         node->dco= 0.0f;
564         occ_build_dco(tree, node, node->co, &node->dco);
565
566         return node;
567 }
568
569 static void occ_build_sh_normalize(OccNode *node)
570 {
571         /* normalize spherical harmonics to not include area, so
572          * we can clamp the dot product and then mutliply by area */
573         int b;
574
575         if(node->area != 0.0f)
576                 sh_mul(node->sh, 1.0f/node->area);
577
578         for(b=0; b<TOTCHILD; b++) {
579                 if(node->childflag & (1<<b));
580                 else if(node->child[b].node)
581                         occ_build_sh_normalize(node->child[b].node);
582         }
583 }
584
585 static OcclusionTree *occ_tree_build(Render *re)
586 {
587         OcclusionTree *tree;
588         ObjectInstanceRen *obi;
589         ObjectRen *obr;
590         VlakRen *vlr= NULL;
591         int a, b, c, totface;
592
593         /* count */
594         totface= 0;
595         for(obi=re->instancetable.first; obi; obi=obi->next) {
596                 obr= obi->obr;
597                 for(a=0; a<obr->totvlak; a++) {
598                         if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
599                         else vlr++;
600
601                         if(vlr->mat->mode & MA_TRACEBLE)
602                                 totface++;
603                 }
604         }
605
606         if(totface == 0)
607                 return NULL;
608         
609         tree= MEM_callocN(sizeof(OcclusionTree), "OcclusionTree");
610         tree->totface= totface;
611
612         /* parameters */
613         tree->error= re->wrld.ao_approx_error;
614         tree->distfac= (re->wrld.aomode & WO_AODIST)? re->wrld.aodistfac: 0.0f;
615
616         /* allocation */
617         tree->arena= BLI_memarena_new(0x8000 * sizeof(OccNode));
618         BLI_memarena_use_calloc(tree->arena);
619
620         if(re->wrld.aomode & WO_AOCACHE)
621                 tree->cache= MEM_callocN(sizeof(OcclusionCache)*BLENDER_MAX_THREADS, "OcclusionCache");
622
623         tree->face= MEM_callocN(sizeof(OccFace)*totface, "OcclusionFace");
624         tree->co= MEM_callocN(sizeof(float)*3*totface, "OcclusionCo");
625         tree->occlusion= MEM_callocN(sizeof(float)*totface, "OcclusionOcclusion");
626
627         /* make array of face pointers */
628         for(b=0, c=0, obi=re->instancetable.first; obi; obi=obi->next, c++) {
629                 obr= obi->obr;
630                 for(a=0; a<obr->totvlak; a++) {
631                         if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
632                         else vlr++;
633
634                         if(vlr->mat->mode & MA_TRACEBLE) {
635                                 tree->face[b].obi= c;
636                                 tree->face[b].facenr= a;
637                                 tree->occlusion[b]= 1.0f;
638                                 occ_face(&tree->face[b], tree->co[b], NULL, NULL); 
639                                 b++;
640                         }
641                 }
642         }
643
644         /* recurse */
645         tree->root= occ_build_recursive(tree, 0, totface, 1);
646
647 #if 0
648         if(tree->doindirect) {
649                 occ_build_shade(re, tree);
650                 occ_sum_occlusion(tree, tree->root);
651         }
652 #endif
653         
654         MEM_freeN(tree->co);
655         tree->co= NULL;
656
657         occ_build_sh_normalize(tree->root);
658
659         for(a=0; a<BLENDER_MAX_THREADS; a++)
660                 tree->stack[a]= MEM_callocN(sizeof(OccNode)*TOTCHILD*(tree->maxdepth+1), "OccStack");
661
662         return tree;
663 }
664
665 static void occ_free_tree(OcclusionTree *tree)
666 {
667         int a;
668
669         if(tree) {
670                 if(tree->arena) BLI_memarena_free(tree->arena);
671                 for(a=0; a<BLENDER_MAX_THREADS; a++)
672                         if(tree->stack[a])
673                                 MEM_freeN(tree->stack[a]);
674                 if(tree->occlusion) MEM_freeN(tree->occlusion);
675                 if(tree->face) MEM_freeN(tree->face);
676                 if(tree->cache) MEM_freeN(tree->cache);
677                 MEM_freeN(tree);
678         }
679 }
680
681 /* ------------------------- Traversal --------------------------- */
682
683 static float occ_solid_angle(OccNode *node, float *v, float d2, float *receivenormal)
684 {
685         float dotreceive, dotemit, invd2 = 1.0f/sqrtf(d2);
686         float ev[3];
687
688         ev[0]= -v[0]*invd2;
689         ev[1]= -v[1]*invd2;
690         ev[2]= -v[2]*invd2;
691         dotemit= sh_eval(node->sh, ev);
692         dotreceive= INPR(receivenormal, v)*invd2;
693
694         CLAMP(dotemit, 0.0f, 1.0f);
695         CLAMP(dotreceive, 0.0f, 1.0f);
696         
697         return ((node->area*dotemit*dotreceive)/(d2 + node->area*INVPI))*INVPI;
698 }
699
700 static void VecAddDir(float *result, float *v1, float *v2, float fac)
701 {
702         result[0]= v1[0] + fac*(v2[0] - v1[0]);
703         result[1]= v1[1] + fac*(v2[1] - v1[1]);
704         result[2]= v1[2] + fac*(v2[2] - v1[2]);
705 }
706
707 static int occ_visible_quad(float *p, float *n, float *v0, float *v1, float *v2, float *q0, float *q1, float *q2, float *q3)
708 {
709         static const float epsilon = 1e-6f;
710         float c, sd[3];
711         
712         c= INPR(n, p);
713
714         /* signed distances from the vertices to the plane. */
715         sd[0]= INPR(n, v0) - c;
716         sd[1]= INPR(n, v1) - c;
717         sd[2]= INPR(n, v2) - c;
718
719         if(fabs(sd[0]) < epsilon) sd[0] = 0.0f;
720         if(fabs(sd[1]) < epsilon) sd[1] = 0.0f;
721         if(fabs(sd[2]) < epsilon) sd[2] = 0.0f;
722
723         if(sd[0] > 0) {
724                 if(sd[1] > 0) {
725                         if(sd[2] > 0) {
726                                 // +++
727                                 VECCOPY(q0, v0);
728                                 VECCOPY(q1, v1);
729                                 VECCOPY(q2, v2);
730                                 VECCOPY(q3, q2);
731                         }
732                         else if(sd[2] < 0) {
733                                 // ++-
734                                 VECCOPY(q0, v0);
735                                 VECCOPY(q1, v1);
736                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
737                                 VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2])));
738                         }
739                         else {
740                                 // ++0
741                                 VECCOPY(q0, v0);
742                                 VECCOPY(q1, v1);
743                                 VECCOPY(q2, v2);
744                                 VECCOPY(q3, q2);
745                         }
746                 }
747                 else if(sd[1] < 0) {
748                         if(sd[2] > 0) {
749                                 // +-+
750                                 VECCOPY(q0, v0);
751                                 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
752                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
753                                 VECCOPY(q3, v2);
754                         }
755                         else if(sd[2] < 0) {
756                                 // +--
757                                 VECCOPY(q0, v0);
758                                 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
759                                 VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2])));
760                                 VECCOPY(q3, q2);
761                         }
762                         else {
763                                 // +-0
764                                 VECCOPY(q0, v0);
765                                 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
766                                 VECCOPY(q2, v2);
767                                 VECCOPY(q3, q2);
768                         }
769                 }
770                 else {
771                         if(sd[2] > 0) {
772                                 // +0+
773                                 VECCOPY(q0, v0);
774                                 VECCOPY(q1, v1);
775                                 VECCOPY(q2, v2);
776                                 VECCOPY(q3, q2);
777                         }
778                         else if(sd[2] < 0) {
779                                 // +0-
780                                 VECCOPY(q0, v0);
781                                 VECCOPY(q1, v1);
782                                 VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2])));
783                                 VECCOPY(q3, q2);
784                         }
785                         else {
786                                 // +00
787                                 VECCOPY(q0, v0);
788                                 VECCOPY(q1, v1);
789                                 VECCOPY(q2, v2);
790                                 VECCOPY(q3, q2);
791                         }
792                 }
793         }
794         else if(sd[0] < 0) {
795                 if(sd[1] > 0) {
796                         if(sd[2] > 0) {
797                                 // -++
798                                 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
799                                 VECCOPY(q1, v1);
800                                 VECCOPY(q2, v2);
801                                 VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2])));
802                         }
803                         else if(sd[2] < 0) {
804                                 // -+-
805                                 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
806                                 VECCOPY(q1, v1);
807                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
808                                 VECCOPY(q3, q2);
809                         }
810                         else {
811                                 // -+0
812                                 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
813                                 VECCOPY(q1, v1);
814                                 VECCOPY(q2, v2);
815                                 VECCOPY(q3, q2);
816                         }
817                 }
818                 else if(sd[1] < 0) {
819                         if(sd[2] > 0) {
820                                 // --+
821                                 VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2])));
822                                 VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2])));
823                                 VECCOPY(q2, v2);
824                                 VECCOPY(q3, q2);
825                         }
826                         else if(sd[2] < 0) {
827                                 // ---
828                                 return 0;
829                         }
830                         else {
831                                 // --0
832                                 return 0;
833                         }
834                 }
835                 else {
836                         if(sd[2] > 0) {
837                                 // -0+
838                                 VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2])));
839                                 VECCOPY(q1, v1);
840                                 VECCOPY(q2, v2);
841                                 VECCOPY(q3, q2);
842                         }
843                         else if(sd[2] < 0) {
844                                 // -0-
845                                 return 0;
846                         }
847                         else {
848                                 // -00
849                                 return 0;
850                         }
851                 }
852         }
853         else {
854                 if(sd[1] > 0) {
855                         if(sd[2] > 0) {
856                                 // 0++
857                                 VECCOPY(q0, v0);
858                                 VECCOPY(q1, v1);
859                                 VECCOPY(q2, v2);
860                                 VECCOPY(q3, q2);
861                         }
862                         else if(sd[2] < 0) {
863                                 // 0+-
864                                 VECCOPY(q0, v0);
865                                 VECCOPY(q1, v1);
866                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
867                                 VECCOPY(q3, q2);
868                         }
869                         else {
870                                 // 0+0
871                                 VECCOPY(q0, v0);
872                                 VECCOPY(q1, v1);
873                                 VECCOPY(q2, v2);
874                                 VECCOPY(q3, q2);
875                         }
876                 }
877                 else if(sd[1] < 0) {
878                         if(sd[2] > 0) {
879                                 // 0-+
880                                 VECCOPY(q0, v0);
881                                 VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2])));
882                                 VECCOPY(q2, v2);
883                                 VECCOPY(q3, q2);
884                         }
885                         else if(sd[2] < 0) {
886                                 // 0--
887                                 return 0;
888                         }
889                         else {
890                                 // 0-0
891                                 return 0;
892                         }
893                 }
894                 else {
895                         if(sd[2] > 0) {
896                                 // 00+
897                                 VECCOPY(q0, v0);
898                                 VECCOPY(q1, v1);
899                                 VECCOPY(q2, v2);
900                                 VECCOPY(q3, q2);
901                         }
902                         else if(sd[2] < 0) {
903                                 // 00-
904                                 return 0;
905                         }
906                         else {
907                                 // 000
908                                 return 0;
909                         }
910                 }
911         }
912
913         return 1;
914 }
915
916 /* altivec optimization, this works, but is unused */
917
918 #if 0
919 #include <Accelerate/Accelerate.h>
920
921 typedef union {
922         vFloat v;
923         float f[4];
924 } vFloatResult;
925
926 static vFloat vec_splat_float(float val)
927 {
928         return (vFloat){val, val, val, val};
929 }
930
931 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
932 {
933         vFloat vcos, rlen, vrx, vry, vrz, vsrx, vsry, vsrz, gx, gy, gz, vangle;
934         vUInt8 rotate = (vUInt8){4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3};
935         vFloatResult vresult;
936         float result;
937
938         /* compute r* */
939         vrx = (vFloat){q0[0], q1[0], q2[0], q3[0]} - vec_splat_float(p[0]);
940         vry = (vFloat){q0[1], q1[1], q2[1], q3[1]} - vec_splat_float(p[1]);
941         vrz = (vFloat){q0[2], q1[2], q2[2], q3[2]} - vec_splat_float(p[2]);
942
943         /* normalize r* */
944         rlen = vec_rsqrte(vrx*vrx + vry*vry + vrz*vrz + vec_splat_float(1e-16f));
945         vrx = vrx*rlen;
946         vry = vry*rlen;
947         vrz = vrz*rlen;
948
949         /* rotate r* for cross and dot */
950         vsrx= vec_perm(vrx, vrx, rotate);
951         vsry= vec_perm(vry, vry, rotate);
952         vsrz= vec_perm(vrz, vrz, rotate);
953
954         /* cross product */
955         gx = vsry*vrz - vsrz*vry;
956         gy = vsrz*vrx - vsrx*vrz;
957         gz = vsrx*vry - vsry*vrx;
958
959         /* normalize */
960         rlen = vec_rsqrte(gx*gx + gy*gy + gz*gz + vec_splat_float(1e-16f));
961         gx = gx*rlen;
962         gy = gy*rlen;
963         gz = gz*rlen;
964
965         /* angle */
966         vcos = vrx*vsrx + vry*vsry + vrz*vsrz;
967         vcos= vec_max(vec_min(vcos, vec_splat_float(1.0f)), vec_splat_float(-1.0f));
968         vangle= vacosf(vcos);
969
970         /* dot */
971         vresult.v = (vec_splat_float(n[0])*gx +
972                      vec_splat_float(n[1])*gy +
973                      vec_splat_float(n[2])*gz)*vangle;
974
975         result= (vresult.f[0] + vresult.f[1] + vresult.f[2] + vresult.f[3])*(0.5f/(float)M_PI);
976         result= MAX2(result, 0.0f);
977
978         return result;
979 }
980
981 #endif
982
983 /* SSE optimization, acos code doesn't work */
984
985 #if 0
986
987 #include <xmmintrin.h>
988
989 static __m128 sse_approx_acos(__m128 x)
990 {
991         /* needs a better approximation than taylor expansion of acos, since that
992          * gives big erros for near 1.0 values, sqrt(2*x)*acos(1-x) should work
993          * better, see http://www.tom.womack.net/projects/sse-fast-arctrig.html */
994
995         return _mm_set_ps1(1.0f);
996 }
997
998 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
999 {
1000         float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3];
1001         float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result;
1002         float fresult[4] __attribute__((aligned(16)));
1003         __m128 qx, qy, qz, rx, ry, rz, rlen, srx, sry, srz, gx, gy, gz, glen, rcos, angle, aresult;
1004
1005         /* compute r */
1006         qx = _mm_set_ps(q3[0], q2[0], q1[0], q0[0]);
1007         qy = _mm_set_ps(q3[1], q2[1], q1[1], q0[1]);
1008         qz = _mm_set_ps(q3[2], q2[2], q1[2], q0[2]);
1009
1010         rx = qx - _mm_set_ps1(p[0]);
1011         ry = qy - _mm_set_ps1(p[1]);
1012         rz = qz - _mm_set_ps1(p[2]);
1013
1014         /* normalize r */
1015         rlen = _mm_rsqrt_ps(rx*rx + ry*ry + rz*rz + _mm_set_ps1(1e-16f));
1016         rx = rx*rlen;
1017         ry = ry*rlen;
1018         rz = rz*rlen;
1019
1020         /* cross product */
1021         srx = _mm_shuffle_ps(rx, rx, _MM_SHUFFLE(0,3,2,1));
1022         sry = _mm_shuffle_ps(ry, ry, _MM_SHUFFLE(0,3,2,1));
1023         srz = _mm_shuffle_ps(rz, rz, _MM_SHUFFLE(0,3,2,1));
1024
1025         gx = sry*rz - srz*ry;
1026         gy = srz*rx - srx*rz;
1027         gz = srx*ry - sry*rx;
1028
1029         /* normalize g */
1030         glen = _mm_rsqrt_ps(gx*gx + gy*gy + gz*gz + _mm_set_ps1(1e-16f));
1031         gx = gx*glen;
1032         gy = gy*glen;
1033         gz = gz*glen;
1034
1035         /* compute angle */
1036         rcos = rx*srx + ry*sry + rz*srz;
1037         rcos= _mm_max_ps(_mm_min_ps(rcos, _mm_set_ps1(1.0f)), _mm_set_ps1(-1.0f));
1038
1039         angle = sse_approx_cos(rcos);
1040         aresult = (_mm_set_ps1(n[0])*gx + _mm_set_ps1(n[1])*gy + _mm_set_ps1(n[2])*gz)*angle;
1041
1042         /* sum together */
1043     result= (fresult[0] + fresult[1] + fresult[2] + fresult[3])*(0.5f/(float)M_PI);
1044         result= MAX2(result, 0.0f);
1045
1046         return result;
1047 }
1048
1049 #endif
1050
1051 static float saacosf(float fac)
1052 {
1053         if(fac<= -1.0f) return (float)M_PI;
1054         else if(fac>=1.0f) return 0.0f;
1055         else return acos(fac); /* acosf(fac) */
1056 }
1057
1058 static void normalizef(float *n)
1059 {
1060         float d;
1061         
1062         d= INPR(n, n);
1063
1064         if(d > 1.0e-35F) {
1065                 d= 1.0f/sqrt(d); /* sqrtf(d) */
1066
1067                 n[0] *= d; 
1068                 n[1] *= d; 
1069                 n[2] *= d;
1070         } 
1071 }
1072
1073 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
1074 {
1075         float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3];
1076         float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result;
1077
1078         VECSUB(r0, q0, p);
1079         VECSUB(r1, q1, p);
1080         VECSUB(r2, q2, p);
1081         VECSUB(r3, q3, p);
1082
1083         normalizef(r0);
1084         normalizef(r1);
1085         normalizef(r2);
1086         normalizef(r3);
1087
1088         Crossf(g0, r1, r0); normalizef(g0);
1089         Crossf(g1, r2, r1); normalizef(g1);
1090         Crossf(g2, r3, r2); normalizef(g2);
1091         Crossf(g3, r0, r3); normalizef(g3);
1092
1093         a1= saacosf(INPR(r0, r1));
1094         a2= saacosf(INPR(r1, r2));
1095         a3= saacosf(INPR(r2, r3));
1096         a4= saacosf(INPR(r3, r0));
1097
1098         dot1= INPR(n, g0);
1099         dot2= INPR(n, g1);
1100         dot3= INPR(n, g2);
1101         dot4= INPR(n, g3);
1102
1103         result= (a1*dot1 + a2*dot2 + a3*dot3 + a4*dot4)*0.5f/(float)M_PI;
1104         result= MAX2(result, 0.0f);
1105
1106         return result;
1107 }
1108
1109 float occ_form_factor(OccFace *face, float *p, float *n)
1110 {
1111         ObjectInstanceRen *obi;
1112         VlakRen *vlr;
1113         float v1[3], v2[3], v3[3], v4[3], q0[3], q1[3], q2[3], q3[3], contrib= 0.0f;
1114
1115         obi= &R.objectinstance[face->obi];
1116         vlr= RE_findOrAddVlak(obi->obr, face->facenr);
1117
1118         VECCOPY(v1, vlr->v1->co);
1119         VECCOPY(v2, vlr->v2->co);
1120         VECCOPY(v3, vlr->v3->co);
1121
1122         if(obi->flag & R_TRANSFORMED) {
1123                 Mat4MulVecfl(obi->mat, v1);
1124                 Mat4MulVecfl(obi->mat, v2);
1125                 Mat4MulVecfl(obi->mat, v3);
1126         }
1127
1128         if(occ_visible_quad(p, n, v1, v2, v3, q0, q1, q2, q3))
1129                 contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3);
1130
1131         if(vlr->v4) {
1132                 VECCOPY(v4, vlr->v4->co);
1133                 if(obi->flag & R_TRANSFORMED)
1134                         Mat4MulVecfl(obi->mat, v4);
1135
1136                 if(occ_visible_quad(p, n, v1, v3, v4, q0, q1, q2, q3))
1137                         contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3);
1138         }
1139
1140         return contrib;
1141 }
1142
1143 static void occ_lookup(OcclusionTree *tree, int thread, OccFace *exclude, float *pp, float *pn, float *occ, float *bentn)
1144 {
1145         OccNode *node, **stack;
1146         OccFace *face;
1147         float resultocc, v[3], p[3], n[3], co[3];
1148         float distfac, error, d2, weight, emitarea;
1149         int b, totstack;
1150
1151         /* init variables */
1152         VECCOPY(p, pp);
1153         VECCOPY(n, pn);
1154         VECADDFAC(p, p, n, 1e-4f);
1155
1156         if(bentn)
1157                 VECCOPY(bentn, n);
1158         
1159         error= tree->error;
1160         distfac= tree->distfac;
1161
1162         resultocc= 0.0f;
1163
1164         /* init stack */
1165         stack= tree->stack[thread];
1166         stack[0]= tree->root;
1167         totstack= 1;
1168
1169         while(totstack) {
1170                 /* pop point off the stack */
1171                 node= stack[--totstack];
1172
1173                 VECSUB(v, node->co, p);
1174                 d2= INPR(v, v) + 1e-16f;
1175                 emitarea= MAX2(node->area, node->dco);
1176
1177                 if(d2*error > emitarea) {
1178                         /* accumulate occlusion from spherical harmonics */
1179                         weight= occ_solid_angle(node, v, d2, n);
1180                         weight *= node->occlusion;
1181
1182                         if(bentn) {
1183                                 Normalize(v);
1184                                 bentn[0] -= weight*v[0];
1185                                 bentn[1] -= weight*v[1];
1186                                 bentn[2] -= weight*v[2];
1187                         }
1188
1189                         if(distfac != 0.0f)
1190                                 weight /= (1.0 + distfac*d2);
1191
1192                         resultocc += weight;
1193                 }
1194                 else {
1195                         /* traverse into children */
1196                         for(b=0; b<TOTCHILD; b++) {
1197                                 if(node->childflag & (1<<b)) {
1198                                         face= tree->face+node->child[b].face;
1199
1200                                         /* accumulate occlusion with face form factor */
1201                                         if(!exclude || !(face->obi == exclude->obi && face->facenr == exclude->facenr)) {
1202                                                 weight= occ_form_factor(face, p, n);
1203                                                 weight *= tree->occlusion[node->child[b].face];
1204
1205                                                 if(bentn || distfac != 0.0f) {
1206                                                         occ_face(face, co, NULL, NULL); 
1207                                                         VECSUB(v, co, p);
1208
1209                                                         if(bentn) {
1210                                                                 Normalize(v);
1211                                                                 bentn[0] -= weight*v[0];
1212                                                                 bentn[1] -= weight*v[1];
1213                                                                 bentn[2] -= weight*v[2];
1214                                                         }
1215
1216                                                         if(distfac != 0.0f) {
1217                                                                 d2= INPR(v, v) + 1e-16f;
1218                                                                 weight /= (1.0 + distfac*d2);
1219                                                         }
1220                                                 }
1221
1222                                                 resultocc += weight;
1223
1224                                         }
1225                                 }
1226                                 else if(node->child[b].node) {
1227                                         /* push child on the stack */
1228                                         stack[totstack++]= node->child[b].node;
1229                                 }
1230                         }
1231                 }
1232         }
1233
1234         if(occ) *occ= resultocc;
1235 }
1236
1237 static void occ_compute_passes(Render *re, OcclusionTree *tree, int totpass)
1238 {
1239         float *occ, co[3], n[3];
1240         int pass, i;
1241         
1242         occ= MEM_callocN(sizeof(float)*tree->totface, "OcclusionPassOcc");
1243
1244         for(pass=0; pass<totpass; pass++) {
1245                 for(i=0; i<tree->totface; i++) {
1246                         occ_face(&tree->face[i], co, n, NULL);
1247                         VecMulf(n, -1.0f);
1248                         VECADDFAC(co, co, n, 1e-8f);
1249
1250                         occ_lookup(tree, 0, &tree->face[i], co, n, &occ[i], NULL);
1251                         if(re->test_break())
1252                                 break;
1253                 }
1254
1255                 if(re->test_break())
1256                         break;
1257
1258                 for(i=0; i<tree->totface; i++) {
1259                         tree->occlusion[i] -= occ[i]; //MAX2(1.0f-occ[i], 0.0f);
1260                         if(tree->occlusion[i] < 0.0f)
1261                                 tree->occlusion[i]= 0.0f;
1262                 }
1263
1264                 occ_sum_occlusion(tree, tree->root);
1265         }
1266
1267         MEM_freeN(occ);
1268 }
1269
1270 static void sample_occ_tree(Render *re, OcclusionTree *tree, OccFace *exclude, float *co, float *n, int thread, int onlyshadow, float *skycol)
1271 {
1272         float nn[3], bn[3], dxyview[3], fac, occ, occlusion, correction;
1273         int aocolor;
1274
1275         aocolor= re->wrld.aocolor;
1276         if(onlyshadow)
1277                 aocolor= WO_AOPLAIN;
1278
1279         VECCOPY(nn, n);
1280         VecMulf(nn, -1.0f);
1281
1282         occ_lookup(tree, thread, exclude, co, nn, &occ, (aocolor)? bn: NULL);
1283
1284         correction= re->wrld.ao_approx_correction;
1285
1286         occlusion= (1.0f-correction)*(1.0f-occ);
1287         CLAMP(occlusion, 0.0f, 1.0f);
1288         if(correction != 0.0f)
1289                 occlusion += correction*exp(-occ);
1290
1291         if(aocolor) {
1292                 /* sky shading using bent normal */
1293                 if(aocolor==WO_AOSKYCOL) {
1294                         fac= 0.5*(1.0f+bn[0]*re->grvec[0]+ bn[1]*re->grvec[1]+ bn[2]*re->grvec[2]);
1295                         skycol[0]= (1.0f-fac)*re->wrld.horr + fac*re->wrld.zenr;
1296                         skycol[1]= (1.0f-fac)*re->wrld.horg + fac*re->wrld.zeng;
1297                         skycol[2]= (1.0f-fac)*re->wrld.horb + fac*re->wrld.zenb;
1298                 }
1299                 else {  /* WO_AOSKYTEX */
1300                         dxyview[0]= 1.0f;
1301                         dxyview[1]= 1.0f;
1302                         dxyview[2]= 0.0f;
1303                         shadeSkyView(skycol, co, bn, dxyview);
1304                 }
1305
1306                 VecMulf(skycol, occlusion);
1307         }
1308         else {
1309                 skycol[0]= occlusion;
1310                 skycol[1]= occlusion;
1311                 skycol[2]= occlusion;
1312         }
1313 }
1314
1315 /* ---------------------------- Caching ------------------------------- */
1316
1317 static OcclusionCacheSample *find_occ_sample(OcclusionCache *cache, int x, int y)
1318 {
1319         x -= cache->x;
1320         y -= cache->y;
1321
1322         x /= cache->step;
1323         y /= cache->step;
1324         x *= cache->step;
1325         y *= cache->step;
1326
1327         if(x < 0 || x >= cache->w || y < 0 || y >= cache->h)
1328                 return NULL;
1329         else
1330                 return &cache->sample[y*cache->w + x];
1331 }
1332
1333 static int sample_occ_cache(OcclusionTree *tree, float *co, float *n, int x, int y, int thread, float *col)
1334 {
1335         OcclusionCache *cache;
1336         OcclusionCacheSample *samples[4], *sample;
1337         float wn[4], wz[4], wb[4], tx, ty, w, totw, mino, maxo;
1338         float d[3], dist2;
1339         int i, x1, y1, x2, y2;
1340
1341         if(!tree->cache)
1342                 return 0;
1343         
1344         /* first try to find a sample in the same pixel */
1345         cache= &tree->cache[thread];
1346
1347         if(cache->sample && cache->step) {
1348                 sample= &cache->sample[(y-cache->y)*cache->w + (x-cache->x)];
1349                 if(sample->filled) {
1350                         VECSUB(d, sample->co, co);
1351                         dist2= INPR(d, d);
1352                         if(dist2 < 0.5f*sample->dist2 && INPR(sample->n, n) > 0.98f) {
1353                                 VECCOPY(col, sample->col);
1354                                 return 1;
1355                         }
1356                 }
1357         }
1358         else
1359                 return 0;
1360
1361         /* try to interpolate between 4 neighbouring pixels */
1362         samples[0]= find_occ_sample(cache, x, y);
1363         samples[1]= find_occ_sample(cache, x+cache->step, y);
1364         samples[2]= find_occ_sample(cache, x, y+cache->step);
1365         samples[3]= find_occ_sample(cache, x+cache->step, y+cache->step);
1366
1367         for(i=0; i<4; i++)
1368                 if(!samples[i] || !samples[i]->filled)
1369                         return 0;
1370
1371         /* require intensities not being too different */
1372         mino= MIN4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
1373         maxo= MAX4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
1374
1375         if(maxo - mino > 0.05f)
1376                 return 0;
1377
1378         /* compute weighted interpolation between samples */
1379         col[0]= col[1]= col[2]= 0.0f;
1380         totw= 0.0f;
1381
1382         x1= samples[0]->x;
1383         y1= samples[0]->y;
1384         x2= samples[3]->x;
1385         y2= samples[3]->y;
1386
1387         tx= (float)(x2 - x)/(float)(x2 - x1);
1388         ty= (float)(y2 - y)/(float)(y2 - y1);
1389
1390         wb[3]= (1.0f-tx)*(1.0f-ty);
1391         wb[2]= (tx)*(1.0f-ty);
1392         wb[1]= (1.0f-tx)*(ty);
1393         wb[0]= tx*ty;
1394
1395         for(i=0; i<4; i++) {
1396                 VECSUB(d, samples[i]->co, co);
1397                 dist2= INPR(d, d);
1398
1399                 wz[i]= 1.0f; //(samples[i]->dist2/(1e-4f + dist2));
1400                 wn[i]= pow(INPR(samples[i]->n, n), 32.0f);
1401
1402                 w= wb[i]*wn[i]*wz[i];
1403
1404                 totw += w;
1405                 col[0] += w*samples[i]->col[0];
1406                 col[1] += w*samples[i]->col[1];
1407                 col[2] += w*samples[i]->col[2];
1408         }
1409
1410         if(totw >= 0.9f) {
1411                 totw= 1.0f/totw;
1412                 col[0] *= totw;
1413                 col[1] *= totw;
1414                 col[2] *= totw;
1415                 return 1;
1416         }
1417
1418         return 0;
1419 }
1420
1421 static void sample_occ_cache_mesh(ShadeInput *shi)
1422 {
1423         StrandRen *strand= shi->strand;
1424         OcclusionCacheMesh *mesh= strand->buffer->occlusionmesh;
1425         int *face, *index = RE_strandren_get_face(shi->obr, strand, 0);
1426         float w[4], *co1, *co2, *co3, *co4;
1427
1428         if(mesh && index) {
1429                 face= mesh->face[*index];
1430
1431                 co1= mesh->co[face[0]];
1432                 co2= mesh->co[face[1]];
1433                 co3= mesh->co[face[2]];
1434                 co4= (face[3])? mesh->co[face[3]]: NULL;
1435
1436                 InterpWeightsQ3Dfl(co1, co2, co3, co4, strand->vert->co, w);
1437
1438                 shi->ao[0]= shi->ao[1]= shi->ao[2]= 0.0f;
1439                 VECADDFAC(shi->ao, shi->ao, mesh->col[face[0]], w[0]);
1440                 VECADDFAC(shi->ao, shi->ao, mesh->col[face[1]], w[1]);
1441                 VECADDFAC(shi->ao, shi->ao, mesh->col[face[2]], w[2]);
1442                 if(face[3])
1443                         VECADDFAC(shi->ao, shi->ao, mesh->col[face[3]], w[3]);
1444         }
1445         else {
1446                 shi->ao[0]= 1.0f;
1447                 shi->ao[1]= 1.0f;
1448                 shi->ao[2]= 1.0f;
1449         }
1450 }
1451
1452 void *cache_occ_mesh(Render *re, ObjectRen *obr, DerivedMesh *dm, float mat[][4])
1453 {
1454         OcclusionCacheMesh *mesh;
1455         MFace *mface;
1456         MVert *mvert;
1457         int a, totvert, totface;
1458
1459         totvert= dm->getNumVerts(dm);
1460         totface= dm->getNumFaces(dm);
1461
1462         mesh= re->occlusionmesh.last;
1463         if(mesh && mesh->obr->ob == obr->ob && mesh->obr->par == obr->par
1464                 && mesh->totvert==totvert && mesh->totface==totface)
1465                 return mesh;
1466
1467         mesh= MEM_callocN(sizeof(OcclusionCacheMesh), "OcclusionCacheMesh");
1468         mesh->obr= obr;
1469         mesh->totvert= totvert;
1470         mesh->totface= totface;
1471         mesh->co= MEM_callocN(sizeof(float)*3*mesh->totvert, "OcclusionMeshCo");
1472         mesh->face= MEM_callocN(sizeof(int)*4*mesh->totface, "OcclusionMeshFaces");
1473         mesh->col= MEM_callocN(sizeof(float)*3*mesh->totvert, "OcclusionMeshCol");
1474
1475         mvert= dm->getVertArray(dm);
1476         for(a=0; a<mesh->totvert; a++, mvert++) {
1477                 VECCOPY(mesh->co[a], mvert->co);
1478                 Mat4MulVecfl(mat, mesh->co[a]);
1479         }
1480
1481         mface= dm->getFaceArray(dm);
1482         for(a=0; a<mesh->totface; a++, mface++) {
1483                 mesh->face[a][0]= mface->v1;
1484                 mesh->face[a][1]= mface->v2;
1485                 mesh->face[a][2]= mface->v3;
1486                 mesh->face[a][3]= mface->v4;
1487         }
1488
1489         BLI_addtail(&re->occlusionmesh, mesh);
1490
1491         return mesh;
1492 }
1493
1494 /* ------------------------- External Functions --------------------------- */
1495
1496 void make_occ_tree(Render *re)
1497 {
1498         OcclusionCacheMesh *mesh;
1499         float col[3], co[3], n[3], *co1, *co2, *co3, *co4;
1500         int a, *face, *count;
1501
1502         /* ugly, needed for occ_face */
1503         R= *re;
1504
1505         re->i.infostr= "Occlusion preprocessing";
1506         re->stats_draw(&re->i);
1507         
1508         re->occlusiontree= occ_tree_build(re);
1509         
1510         if(re->occlusiontree) {
1511                 if(re->wrld.ao_approx_passes)
1512                         occ_compute_passes(re, re->occlusiontree, re->wrld.ao_approx_passes);
1513
1514                 for(mesh=re->occlusionmesh.first; mesh; mesh=mesh->next) {
1515                         count= MEM_callocN(sizeof(int)*mesh->totvert, "OcclusionCount");
1516
1517                         for(a=0; a<mesh->totface; a++) {
1518                                 face= mesh->face[a];
1519                                 co1= mesh->co[face[0]];
1520                                 co2= mesh->co[face[1]];
1521                                 co3= mesh->co[face[2]];
1522
1523                                 if(face[3]) {
1524                                         co4= mesh->co[face[3]];
1525
1526                                         VecLerpf(co, co1, co3, 0.5f);
1527                                         CalcNormFloat4(co1, co2, co3, co4, n);
1528                                 }
1529                                 else {
1530                                         CalcCent3f(co, co1, co2, co3);
1531                                         CalcNormFloat(co1, co2, co3, n);
1532                                 }
1533                                 VecMulf(n, -1.0f);
1534
1535                                 sample_occ_tree(re, re->occlusiontree, NULL, co, n, 0, 0, col);
1536
1537                                 VECADD(mesh->col[face[0]], mesh->col[face[0]], col);
1538                                 count[face[0]]++;
1539                                 VECADD(mesh->col[face[1]], mesh->col[face[1]], col);
1540                                 count[face[1]]++;
1541                                 VECADD(mesh->col[face[2]], mesh->col[face[2]], col);
1542                                 count[face[2]]++;
1543
1544                                 if(face[3]) {
1545                                         VECADD(mesh->col[face[3]], mesh->col[face[3]], col);
1546                                         count[face[3]]++;
1547                                 }
1548                         }
1549
1550                         for(a=0; a<mesh->totvert; a++)
1551                                 if(count[a])
1552                                         VecMulf(mesh->col[a], 1.0f/count[a]);
1553
1554                         MEM_freeN(count);
1555                 }
1556         }
1557 }
1558
1559 void free_occ(Render *re)
1560 {
1561         OcclusionCacheMesh *mesh;
1562
1563         if(re->occlusiontree) {
1564                 occ_free_tree(re->occlusiontree);
1565                 re->occlusiontree = NULL;
1566         }
1567
1568         for(mesh=re->occlusionmesh.first; mesh; mesh=mesh->next) {
1569                 if(mesh->co) MEM_freeN(mesh->co);
1570                 if(mesh->col) MEM_freeN(mesh->col);
1571                 if(mesh->face) MEM_freeN(mesh->face);
1572         }
1573
1574         BLI_freelistN(&re->occlusionmesh);
1575 }
1576
1577 void sample_occ(Render *re, ShadeInput *shi)
1578 {
1579         OcclusionTree *tree= re->occlusiontree;
1580         OcclusionCache *cache;
1581         OcclusionCacheSample *sample;
1582         OccFace exclude;
1583         int onlyshadow;
1584
1585         if(tree) {
1586                 if(shi->strand) {
1587                         sample_occ_cache_mesh(shi);
1588                 }
1589                 /* try to get result from the cache if possible */
1590                 else if(!sample_occ_cache(tree, shi->co, shi->vno, shi->xs, shi->ys, shi->thread, shi->ao)) {
1591                         /* no luck, let's sample the occlusion */
1592                         exclude.obi= shi->obi - re->objectinstance;
1593                         exclude.facenr= shi->vlr->index;
1594                         onlyshadow= (shi->mat->mode & MA_ONLYSHADOW);
1595                         sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao);
1596
1597                         if(G.rt & 32)
1598                                 shi->ao[2] *= 2.0f;
1599
1600                         /* fill result into sample, each time */
1601                         if(tree->cache) {
1602                                 cache= &tree->cache[shi->thread];
1603
1604                                 if(cache->sample && cache->step) {
1605                                         sample= &cache->sample[(shi->ys-cache->y)*cache->w + (shi->xs-cache->x)];
1606                                         VECCOPY(sample->co, shi->co);
1607                                         VECCOPY(sample->n, shi->vno);
1608                                         VECCOPY(sample->col, shi->ao);
1609                                         sample->intensity= MAX3(sample->col[0], sample->col[1], sample->col[2]);
1610                                         sample->dist2= INPR(shi->dxco, shi->dxco) + INPR(shi->dyco, shi->dyco);
1611                                         sample->filled= 1;
1612                                 }
1613                         }
1614                 }
1615         }
1616         else {
1617                 shi->ao[0]= 1.0f;
1618                 shi->ao[1]= 1.0f;
1619                 shi->ao[2]= 1.0f;
1620         }
1621 }
1622
1623 void cache_occ_samples(Render *re, RenderPart *pa, ShadeSample *ssamp)
1624 {
1625         OcclusionTree *tree= re->occlusiontree;
1626         PixStr ps;
1627         OcclusionCache *cache;
1628         OcclusionCacheSample *sample;
1629         OccFace exclude;
1630         ShadeInput *shi;
1631         int *ro, *rp, *rz, onlyshadow;
1632         int x, y, step = CACHE_STEP;
1633
1634         if(!tree->cache)
1635                 return;
1636
1637         cache= &tree->cache[pa->thread];
1638         cache->w= pa->rectx;
1639         cache->h= pa->recty;
1640         cache->x= pa->disprect.xmin;
1641         cache->y= pa->disprect.ymin;
1642         cache->step= step;
1643         cache->sample= MEM_callocN(sizeof(OcclusionCacheSample)*cache->w*cache->h, "OcclusionCacheSample");
1644         sample= cache->sample;
1645
1646         ps.next= NULL;
1647         ps.mask= 0xFFFF;
1648
1649         ro= pa->recto;
1650         rp= pa->rectp;
1651         rz= pa->rectz;
1652
1653         /* compute a sample at every step pixels */
1654         for(y=pa->disprect.ymin; y<pa->disprect.ymax; y++) {
1655                 for(x=pa->disprect.xmin; x<pa->disprect.xmax; x++, ro++, rp++, rz++, sample++) {
1656                         if((((x - pa->disprect.xmin + step) % step) == 0 || x == pa->disprect.xmax-1) && (((y - pa->disprect.ymin + step) % step) == 0 || y == pa->disprect.ymax-1)) {
1657                                 if(*rp) {
1658                                         ps.obi= *ro;
1659                                         ps.facenr= *rp;
1660                                         ps.z= *rz;
1661
1662                                         shade_samples_fill_with_ps(ssamp, &ps, x, y);
1663                                         shi= ssamp->shi;
1664                                         if(!shi->vlr)
1665                                                 continue;
1666
1667                                         onlyshadow= (shi->mat->mode & MA_ONLYSHADOW);
1668                                         exclude.obi= shi->obi - re->objectinstance;
1669                                         exclude.facenr= shi->vlr->index;
1670                                         sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao);
1671
1672                                         VECCOPY(sample->co, shi->co);
1673                                         VECCOPY(sample->n, shi->vno);
1674                                         VECCOPY(sample->col, shi->ao);
1675                                         sample->intensity= MAX3(sample->col[0], sample->col[1], sample->col[2]);
1676                                         sample->dist2= INPR(shi->dxco, shi->dxco) + INPR(shi->dyco, shi->dyco);
1677                                         sample->x= shi->xs;
1678                                         sample->y= shi->ys;
1679                                         sample->filled= 1;
1680                                 }
1681
1682                                 if(re->test_break())
1683                                         break;
1684                         }
1685                 }
1686         }
1687 }
1688
1689 void free_occ_samples(Render *re, RenderPart *pa)
1690 {
1691         OcclusionTree *tree= re->occlusiontree;
1692         OcclusionCache *cache;
1693
1694         if(tree->cache) {
1695                 cache= &tree->cache[pa->thread];
1696
1697                 if(cache->sample)
1698                         MEM_freeN(cache->sample);
1699
1700                 cache->w= 0;
1701                 cache->h= 0;
1702                 cache->step= 0;
1703         }
1704 }
1705