bcd6ed3aa0da9825e9cad81351dac0b1d8aa91a5
[blender.git] / source / blender / render / intern / source / occlusion.c
1 /* 
2  * $Id$
3  *
4  * ***** BEGIN GPL LICENSE BLOCK *****
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software Foundation,
18  * Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19  *
20  * The Original Code is Copyright (C) 2008 Blender Foundation.
21  * All rights reserved.
22  *
23  * The Original Code is: all of this file.
24  *
25  * Contributor(s): Brecht Van Lommel.
26  *
27  * ***** END GPL LICENSE BLOCK *****
28  */
29
30 #include <math.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34
35 #include "MEM_guardedalloc.h"
36
37 #include "DNA_material_types.h"
38 #include "DNA_meshdata_types.h"
39
40 #include "BLI_arithb.h"
41 #include "BLI_blenlib.h"
42 #include "BLI_memarena.h"
43 #include "BLI_threads.h"
44
45 #include "BKE_DerivedMesh.h"
46 #include "BKE_global.h"
47 #include "BKE_utildefines.h"
48
49 #include "RE_shader_ext.h"
50
51 /* local includes */
52 #include "occlusion.h"
53 #include "render_types.h"
54 #include "rendercore.h"
55 #include "renderdatabase.h"
56 #include "pixelshading.h"
57 #include "shading.h"
58 #include "zbuf.h"
59
60 /* ------------------------- Declarations --------------------------- */
61
62 #define INVALID_INDEX ((int)(~0))
63 #define INVPI 0.31830988618379069f
64 #define TOTCHILD 8
65 #define CACHE_STEP 3
66
67 typedef struct OcclusionCacheSample {
68         float co[3], n[3], col[3], intensity, dist2;
69         int x, y, filled;
70 } OcclusionCacheSample;
71
72 typedef struct OcclusionCache {
73         OcclusionCacheSample *sample;
74         int x, y, w, h, step;
75 } OcclusionCache;
76
77 typedef struct OcclusionCacheMesh {
78         struct OcclusionCacheMesh *next, *prev;
79         ObjectRen *obr;
80         int (*face)[4];
81         float (*co)[3];
82         float (*col)[3];
83         int totvert, totface;
84 } OcclusionCacheMesh;
85
86 typedef struct OccFace {
87         int obi;
88         int facenr;
89 } OccFace;
90
91 typedef struct OccNode {
92         float co[3], area;
93         float sh[9], dco;
94         float occlusion;
95         int childflag;
96         union {
97                 //OccFace face;
98                 int face;
99                 struct OccNode *node;
100         } child[TOTCHILD];
101 } OccNode;
102
103 typedef struct OcclusionTree {
104         MemArena *arena;
105
106         float (*co)[3];         /* temporary during build */
107
108         OccFace *face;          /* instance and face indices */
109         float *occlusion;       /* occlusion for faces */
110         
111         OccNode *root;
112
113         OccNode **stack[BLENDER_MAX_THREADS];
114         int maxdepth;
115
116         int totface;
117
118         float error;
119         float distfac;
120
121         OcclusionCache *cache;
122 } OcclusionTree;
123
124 /* ------------------------- Shading --------------------------- */
125
126 extern Render R; // meh
127
128 #if 0
129 static void occ_shade(ShadeSample *ssamp, ObjectInstanceRen *obi, VlakRen *vlr, float *rad)
130 {
131         ShadeInput *shi= ssamp->shi;
132         ShadeResult *shr= ssamp->shr;
133         float l, u, v, *v1, *v2, *v3;
134         
135         /* init */
136         if(vlr->v4) {
137                 shi->u= u= 0.5f;
138                 shi->v= v= 0.5f;
139         }
140         else {
141                 shi->u= u= 1.0f/3.0f;
142                 shi->v= v= 1.0f/3.0f;
143         }
144
145         /* setup render coordinates */
146         v1= vlr->v1->co;
147         v2= vlr->v2->co;
148         v3= vlr->v3->co;
149         
150         /* renderco */
151         l= 1.0f-u-v;
152         
153         shi->co[0]= l*v3[0]+u*v1[0]+v*v2[0];
154         shi->co[1]= l*v3[1]+u*v1[1]+v*v2[1];
155         shi->co[2]= l*v3[2]+u*v1[2]+v*v2[2];
156         
157         shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
158
159         /* set up view vector */
160         VECCOPY(shi->view, shi->co);
161         Normalize(shi->view);
162         
163         /* cache for shadow */
164         shi->samplenr++;
165         
166         shi->xs= 0; // TODO
167         shi->ys= 0;
168         
169         shade_input_set_normals(shi);
170
171         /* no normal flip */
172         if(shi->flippednor)
173                 shade_input_flip_normals(shi);
174
175         /* not a pretty solution, but fixes common cases */
176         if(shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) {
177                 VecMulf(shi->vn, -1.0f);
178                 VecMulf(shi->vno, -1.0f);
179         }
180
181         /* init material vars */
182         // note, keep this synced with render_types.h
183         memcpy(&shi->r, &shi->mat->r, 23*sizeof(float));
184         shi->har= shi->mat->har;
185         
186         /* render */
187         shade_input_set_shade_texco(shi);
188         shade_material_loop(shi, shr); /* todo: nodes */
189         
190         VECCOPY(rad, shr->combined);
191 }
192
193 static void occ_build_shade(Render *re, OcclusionTree *tree)
194 {
195         ShadeSample ssamp;
196         ObjectInstanceRen *obi;
197         VlakRen *vlr;
198         int a;
199
200         R= *re;
201
202         /* setup shade sample with correct passes */
203         memset(&ssamp, 0, sizeof(ShadeSample));
204         ssamp.shi[0].lay= re->scene->lay;
205         ssamp.shi[0].passflag= SCE_PASS_DIFFUSE|SCE_PASS_RGBA;
206         ssamp.shi[0].combinedflag= ~(SCE_PASS_SPEC);
207         ssamp.tot= 1;
208
209         for(a=0; a<tree->totface; a++) {
210                 obi= &R.objectinstance[tree->face[a].obi];
211                 vlr= RE_findOrAddVlak(obi->obr, tree->face[a].vlr);
212
213                 occ_shade(&ssamp, obi, vlr, tree->rad[a]);
214         }
215 }
216 #endif
217
218 /* ------------------------- Spherical Harmonics --------------------------- */
219
220 /* Use 2nd order SH => 9 coefficients, stored in this order:
221    0 = (0,0),
222    1 = (1,-1), 2 = (1,0), 3 = (1,1),
223    4 = (2,-2), 5 = (2,-1), 6 = (2,0), 7 = (2,1), 8 = (2,2) */
224
225 static void sh_copy(float *shresult, float *sh)
226 {
227         memcpy(shresult, sh, sizeof(float)*9);
228 }
229
230 static void sh_mul(float *sh, float f)
231 {
232         int i;
233
234         for(i=0; i<9; i++)
235                 sh[i] *= f;
236 }
237
238 static void sh_add(float *shresult, float *sh1, float *sh2)
239 {
240         int i;
241
242         for(i=0; i<9; i++)
243                 shresult[i]= sh1[i] + sh2[i];
244 }
245
246 static void sh_from_disc(float *n, float area, float *shresult)
247 {
248         /* See formula (3) in:
249            "An Efficient Representation for Irradiance Environment Maps" */
250         float sh[9], x, y, z;
251
252         x= n[0];
253         y= n[1];
254         z= n[2];
255
256         sh[0]= 0.282095f;
257
258         sh[1]= 0.488603f*y;
259         sh[2]= 0.488603f*z;
260         sh[3]= 0.488603f*x;
261         
262         sh[4]= 1.092548f*x*y;
263         sh[5]= 1.092548f*y*z;
264         sh[6]= 0.315392f*(3.0f*z*z - 1.0f);
265         sh[7]= 1.092548f*x*z;
266         sh[8]= 0.546274f*(x*x - y*y);
267
268         sh_mul(sh, area);
269         sh_copy(shresult, sh);
270 }
271
272 static float sh_eval(float *sh, float *v)
273 {
274         /* See formula (13) in:
275            "An Efficient Representation for Irradiance Environment Maps" */
276         static const float c1 = 0.429043f, c2 = 0.511664f, c3 = 0.743125f;
277         static const float c4 = 0.886227f, c5 = 0.247708f;
278         float x, y, z, sum;
279
280         x= v[0];
281         y= v[1];
282         z= v[2];
283
284         sum= c1*sh[8]*(x*x - y*y);
285         sum += c3*sh[6]*z*z;
286         sum += c4*sh[0];
287         sum += -c5*sh[6];
288         sum += 2.0f*c1*(sh[4]*x*y + sh[7]*x*z + sh[5]*y*z);
289         sum += 2.0f*c2*(sh[3]*x + sh[1]*y + sh[2]*z);
290
291         return sum;
292 }
293
294 /* ------------------------------ Building --------------------------------- */
295
296 static void occ_face(const OccFace *face, float *co, float *normal, float *area)
297 {
298         ObjectInstanceRen *obi;
299         VlakRen *vlr;
300
301         obi= &R.objectinstance[face->obi];
302         vlr= RE_findOrAddVlak(obi->obr, face->facenr);
303         
304         if(co) {
305                 if(vlr->v4)
306                         VecLerpf(co, vlr->v1->co, vlr->v3->co, 0.5f);
307                 else
308                         CalcCent3f(co, vlr->v1->co, vlr->v2->co, vlr->v3->co);
309
310                 if(obi->flag & R_TRANSFORMED)
311                         Mat4MulVecfl(obi->mat, co);
312         }
313         
314         if(normal) {
315                 normal[0]= -vlr->n[0];
316                 normal[1]= -vlr->n[1];
317                 normal[2]= -vlr->n[2];
318
319                 if(obi->flag & R_TRANSFORMED) {
320                         float xn, yn, zn, (*imat)[3]= obi->imat;
321
322                         xn= normal[0];
323                         yn= normal[1];
324                         zn= normal[2];
325
326                         normal[0]= imat[0][0]*xn + imat[0][1]*yn + imat[0][2]*zn;
327                         normal[1]= imat[1][0]*xn + imat[1][1]*yn + imat[1][2]*zn;
328                         normal[2]= imat[2][0]*xn + imat[2][1]*yn + imat[2][2]*zn;
329                 }
330         }
331
332         if(area) {
333                 /* todo: correct area for instances */
334                 if(vlr->v4)
335                         *area= AreaQ3Dfl(vlr->v1->co, vlr->v2->co, vlr->v3->co, vlr->v4->co);
336                 else
337                         *area= AreaT3Dfl(vlr->v1->co, vlr->v2->co, vlr->v3->co);
338         }
339 }
340
341 static void occ_sum_occlusion(OcclusionTree *tree, OccNode *node)
342 {
343         OccNode *child;
344         float occ, area, totarea;
345         int a, b;
346
347         occ= 0.0f;
348         totarea= 0.0f;
349
350         for(b=0; b<TOTCHILD; b++) {
351                 if(node->childflag & (1<<b)) {
352                         a= node->child[b].face;
353                         occ_face(&tree->face[a], 0, 0, &area);
354                         occ += area*tree->occlusion[a];
355                         totarea += area;
356                 }
357                 else if(node->child[b].node) {
358                         child= node->child[b].node;
359                         occ_sum_occlusion(tree, child);
360
361                         occ += child->area*child->occlusion;
362                         totarea += child->area;
363                 }
364         }
365
366         if(totarea != 0.0f)
367                 occ /= totarea;
368         
369         node->occlusion= occ;
370 }
371
372 static int occ_find_bbox_axis(OcclusionTree *tree, int begin, int end, float *min, float *max)
373 {
374         float len, maxlen= -1.0f;
375         int a, axis = 0;
376
377         INIT_MINMAX(min, max);
378
379         for(a=begin; a<end; a++)
380                 DO_MINMAX(tree->co[a], min, max)
381
382         for(a=0; a<3; a++) {
383                 len= max[a] - min[a];
384
385                 if(len > maxlen) {
386                         maxlen= len;
387                         axis= a;
388                 }
389         }
390
391         return axis;
392 }
393
394 void occ_node_from_face(OccFace *face, OccNode *node)
395 {
396         float n[3];
397
398         occ_face(face, node->co, n, &node->area);
399         node->dco= 0.0f;
400         sh_from_disc(n, node->area, node->sh);
401 }
402
403 static void occ_build_dco(OcclusionTree *tree, OccNode *node, float *co, float *dco)
404 {
405         OccNode *child;
406         float dist, d[3], nco[3];
407         int b;
408
409         for(b=0; b<TOTCHILD; b++) {
410                 if(node->childflag & (1<<b)) {
411                         occ_face(tree->face+node->child[b].face, nco, 0, 0);
412                 }
413                 else if(node->child[b].node) {
414                         child= node->child[b].node;
415                         occ_build_dco(tree, child, co, dco);
416                         VECCOPY(nco, child->co);
417                 }
418
419                 VECSUB(d, nco, co);
420                 dist= INPR(d, d);
421                 if(dist > *dco)
422                         *dco= dist;
423         }
424 }
425
426 static void occ_build_split(OcclusionTree *tree, int begin, int end, int *split)
427 {
428         float min[3], max[3], mid;
429         int axis, a, enda;
430
431         /* split in middle of boundbox. this seems faster than median split
432          * on complex scenes, possibly since it avoids two distant faces to
433          * be in the same node better? */
434         axis= occ_find_bbox_axis(tree, begin, end, min, max);
435         mid= 0.5f*(min[axis]+max[axis]);
436
437         a= begin;
438         enda= end;
439         while(a<enda) {
440                 if(tree->co[a][axis] > mid) {
441                         enda--;
442                         SWAP(OccFace, tree->face[a], tree->face[enda]);
443                         SWAP(float, tree->co[a][0], tree->co[enda][0]);
444                         SWAP(float, tree->co[a][1], tree->co[enda][1]);
445                         SWAP(float, tree->co[a][2], tree->co[enda][2]);
446                 }
447                 else
448                         a++;
449         }
450
451         *split= enda;
452 }
453
454 static void occ_build_8_split(OcclusionTree *tree, int begin, int end, int *offset, int *count)
455 {
456         /* split faces into eight groups */
457         int b, splitx, splity[2], splitz[4];
458
459         occ_build_split(tree, begin, end, &splitx);
460
461         occ_build_split(tree, begin, splitx, &splity[0]);
462         occ_build_split(tree, splitx, end, &splity[1]);
463
464         occ_build_split(tree, begin, splity[0], &splitz[0]);
465         occ_build_split(tree, splity[0], splitx, &splitz[1]);
466         occ_build_split(tree, splitx, splity[1], &splitz[2]);
467         occ_build_split(tree, splity[1], end, &splitz[3]);
468
469         offset[0]= begin;
470         offset[1]= splitz[0];
471         offset[2]= splity[0];
472         offset[3]= splitz[1];
473         offset[4]= splitx;
474         offset[5]= splitz[2];
475         offset[6]= splity[1];
476         offset[7]= splitz[3];
477
478         for(b=0; b<7; b++)
479                 count[b]= offset[b+1] - offset[b];
480         count[7]= end - offset[7];
481 }
482
483 static OccNode *occ_build_recursive(OcclusionTree *tree, int begin, int end, int depth)
484 {
485         OccNode *node, *child, tmpnode;
486         OccFace *face;
487         int a, b, offset[TOTCHILD], count[TOTCHILD];
488
489         /* keep track of maximum depth for stack */
490         if(depth > tree->maxdepth)
491                 tree->maxdepth= depth;
492
493         /* add a new node */
494         node= BLI_memarena_alloc(tree->arena, sizeof(OccNode));
495         node->occlusion= 1.0f;
496
497         /* leaf node with only children */
498         if(end - begin <= TOTCHILD) {
499                 for(a=begin, b=0; a<end; a++, b++) {
500                         face= &tree->face[a];
501                         node->child[b].face= a;
502                         node->childflag |= (1<<b);
503                 }
504         }
505         else {
506                 /* order faces */
507                 occ_build_8_split(tree, begin, end, offset, count);
508
509                 for(b=0; b<TOTCHILD; b++) {
510                         if(count[b] == 0) {
511                                 node->child[b].node= NULL;
512                         }
513                         else if(count[b] == 1) {
514                                 face= &tree->face[offset[b]];
515                                 node->child[b].face= offset[b];
516                                 node->childflag |= (1<<b);
517                         }
518                         else {
519                                 child= occ_build_recursive(tree, offset[b], offset[b]+count[b], depth+1);
520                                 node->child[b].node= child;
521                         }
522                 }
523         }
524
525         /* combine area, position and sh */
526         for(b=0; b<TOTCHILD; b++) {
527                 if(node->childflag & (1<<b)) {
528                         child= &tmpnode;
529                         occ_node_from_face(tree->face+node->child[b].face, &tmpnode);
530                 }
531                 else {
532                         child= node->child[b].node;
533                 }
534
535                 if(child) {
536                         node->area += child->area;
537                         sh_add(node->sh, node->sh, child->sh);
538                         VECADDFAC(node->co, node->co, child->co, child->area);
539                 }
540         }
541
542         if(node->area != 0.0f)
543                 VecMulf(node->co, 1.0f/node->area);
544
545         /* compute maximum distance from center */
546         node->dco= 0.0f;
547         occ_build_dco(tree, node, node->co, &node->dco);
548
549         return node;
550 }
551
552 static void occ_build_sh_normalize(OccNode *node)
553 {
554         /* normalize spherical harmonics to not include area, so
555          * we can clamp the dot product and then mutliply by area */
556         int b;
557
558         if(node->area != 0.0f)
559                 sh_mul(node->sh, 1.0f/node->area);
560
561         for(b=0; b<TOTCHILD; b++) {
562                 if(node->childflag & (1<<b));
563                 else if(node->child[b].node)
564                         occ_build_sh_normalize(node->child[b].node);
565         }
566 }
567
568 static OcclusionTree *occ_tree_build(Render *re)
569 {
570         OcclusionTree *tree;
571         ObjectInstanceRen *obi;
572         ObjectRen *obr;
573         VlakRen *vlr= NULL;
574         int a, b, c, totface;
575
576         /* count */
577         totface= 0;
578         for(obi=re->instancetable.first; obi; obi=obi->next) {
579                 if(obi->flag & R_TRANSFORMED)
580                         continue;
581
582                 obr= obi->obr;
583                 for(a=0; a<obr->totvlak; a++) {
584                         if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
585                         else vlr++;
586
587                         if(vlr->mat->mode & MA_TRACEBLE)
588                                 totface++;
589                 }
590         }
591
592         if(totface == 0)
593                 return NULL;
594         
595         tree= MEM_callocN(sizeof(OcclusionTree), "OcclusionTree");
596         tree->totface= totface;
597
598         /* parameters */
599         tree->error= re->wrld.ao_approx_error;
600         tree->distfac= (re->wrld.aomode & WO_AODIST)? re->wrld.aodistfac: 0.0f;
601
602         /* allocation */
603         tree->arena= BLI_memarena_new(0x8000 * sizeof(OccNode));
604         BLI_memarena_use_calloc(tree->arena);
605
606         if(re->wrld.aomode & WO_AOCACHE)
607                 tree->cache= MEM_callocN(sizeof(OcclusionCache)*BLENDER_MAX_THREADS, "OcclusionCache");
608
609         tree->face= MEM_callocN(sizeof(OccFace)*totface, "OcclusionFace");
610         tree->co= MEM_callocN(sizeof(float)*3*totface, "OcclusionCo");
611         tree->occlusion= MEM_callocN(sizeof(float)*totface, "OcclusionOcclusion");
612
613         /* make array of face pointers */
614         for(b=0, c=0, obi=re->instancetable.first; obi; obi=obi->next, c++) {
615                 if(obi->flag & R_TRANSFORMED)
616                         continue; /* temporary to avoid slow renders with loads of duplis */
617
618                 obr= obi->obr;
619                 for(a=0; a<obr->totvlak; a++) {
620                         if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
621                         else vlr++;
622
623                         if(vlr->mat->mode & MA_TRACEBLE) {
624                                 tree->face[b].obi= c;
625                                 tree->face[b].facenr= a;
626                                 tree->occlusion[b]= 1.0f;
627                                 occ_face(&tree->face[b], tree->co[b], NULL, NULL); 
628                                 b++;
629                         }
630                 }
631         }
632
633         /* recurse */
634         tree->root= occ_build_recursive(tree, 0, totface, 1);
635
636 #if 0
637         if(tree->doindirect) {
638                 occ_build_shade(re, tree);
639                 occ_sum_occlusion(tree, tree->root);
640         }
641 #endif
642         
643         MEM_freeN(tree->co);
644         tree->co= NULL;
645
646         occ_build_sh_normalize(tree->root);
647
648         for(a=0; a<BLENDER_MAX_THREADS; a++)
649                 tree->stack[a]= MEM_callocN(sizeof(OccNode)*TOTCHILD*(tree->maxdepth+1), "OccStack");
650
651         return tree;
652 }
653
654 static void occ_free_tree(OcclusionTree *tree)
655 {
656         int a;
657
658         if(tree) {
659                 if(tree->arena) BLI_memarena_free(tree->arena);
660                 for(a=0; a<BLENDER_MAX_THREADS; a++)
661                         if(tree->stack[a])
662                                 MEM_freeN(tree->stack[a]);
663                 if(tree->occlusion) MEM_freeN(tree->occlusion);
664                 if(tree->face) MEM_freeN(tree->face);
665                 if(tree->cache) MEM_freeN(tree->cache);
666                 MEM_freeN(tree);
667         }
668 }
669
670 /* ------------------------- Traversal --------------------------- */
671
672 static float occ_solid_angle(OccNode *node, float *v, float d2, float *receivenormal)
673 {
674         float dotreceive, dotemit, invd2 = 1.0f/sqrtf(d2);
675         float ev[3];
676
677         ev[0]= -v[0]*invd2;
678         ev[1]= -v[1]*invd2;
679         ev[2]= -v[2]*invd2;
680         dotemit= sh_eval(node->sh, ev);
681         dotreceive= INPR(receivenormal, v)*invd2;
682
683         CLAMP(dotemit, 0.0f, 1.0f);
684         CLAMP(dotreceive, 0.0f, 1.0f);
685         
686         return ((node->area*dotemit*dotreceive)/(d2 + node->area*INVPI))*INVPI;
687 }
688
689 static void VecAddDir(float *result, float *v1, float *v2, float fac)
690 {
691         result[0]= v1[0] + fac*(v2[0] - v1[0]);
692         result[1]= v1[1] + fac*(v2[1] - v1[1]);
693         result[2]= v1[2] + fac*(v2[2] - v1[2]);
694 }
695
696 static int occ_visible_quad(float *p, float *n, float *v0, float *v1, float *v2, float *q0, float *q1, float *q2, float *q3)
697 {
698         static const float epsilon = 1e-6f;
699         float c, sd[3];
700         
701         c= INPR(n, p);
702
703         /* signed distances from the vertices to the plane. */
704         sd[0]= INPR(n, v0) - c;
705         sd[1]= INPR(n, v1) - c;
706         sd[2]= INPR(n, v2) - c;
707
708         if(fabs(sd[0]) < epsilon) sd[0] = 0.0f;
709         if(fabs(sd[1]) < epsilon) sd[1] = 0.0f;
710         if(fabs(sd[2]) < epsilon) sd[2] = 0.0f;
711
712         if(sd[0] > 0) {
713                 if(sd[1] > 0) {
714                         if(sd[2] > 0) {
715                                 // +++
716                                 VECCOPY(q0, v0);
717                                 VECCOPY(q1, v1);
718                                 VECCOPY(q2, v2);
719                                 VECCOPY(q3, q2);
720                         }
721                         else if(sd[2] < 0) {
722                                 // ++-
723                                 VECCOPY(q0, v0);
724                                 VECCOPY(q1, v1);
725                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
726                                 VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2])));
727                         }
728                         else {
729                                 // ++0
730                                 VECCOPY(q0, v0);
731                                 VECCOPY(q1, v1);
732                                 VECCOPY(q2, v2);
733                                 VECCOPY(q3, q2);
734                         }
735                 }
736                 else if(sd[1] < 0) {
737                         if(sd[2] > 0) {
738                                 // +-+
739                                 VECCOPY(q0, v0);
740                                 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
741                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
742                                 VECCOPY(q3, v2);
743                         }
744                         else if(sd[2] < 0) {
745                                 // +--
746                                 VECCOPY(q0, v0);
747                                 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
748                                 VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2])));
749                                 VECCOPY(q3, q2);
750                         }
751                         else {
752                                 // +-0
753                                 VECCOPY(q0, v0);
754                                 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
755                                 VECCOPY(q2, v2);
756                                 VECCOPY(q3, q2);
757                         }
758                 }
759                 else {
760                         if(sd[2] > 0) {
761                                 // +0+
762                                 VECCOPY(q0, v0);
763                                 VECCOPY(q1, v1);
764                                 VECCOPY(q2, v2);
765                                 VECCOPY(q3, q2);
766                         }
767                         else if(sd[2] < 0) {
768                                 // +0-
769                                 VECCOPY(q0, v0);
770                                 VECCOPY(q1, v1);
771                                 VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2])));
772                                 VECCOPY(q3, q2);
773                         }
774                         else {
775                                 // +00
776                                 VECCOPY(q0, v0);
777                                 VECCOPY(q1, v1);
778                                 VECCOPY(q2, v2);
779                                 VECCOPY(q3, q2);
780                         }
781                 }
782         }
783         else if(sd[0] < 0) {
784                 if(sd[1] > 0) {
785                         if(sd[2] > 0) {
786                                 // -++
787                                 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
788                                 VECCOPY(q1, v1);
789                                 VECCOPY(q2, v2);
790                                 VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2])));
791                         }
792                         else if(sd[2] < 0) {
793                                 // -+-
794                                 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
795                                 VECCOPY(q1, v1);
796                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
797                                 VECCOPY(q3, q2);
798                         }
799                         else {
800                                 // -+0
801                                 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
802                                 VECCOPY(q1, v1);
803                                 VECCOPY(q2, v2);
804                                 VECCOPY(q3, q2);
805                         }
806                 }
807                 else if(sd[1] < 0) {
808                         if(sd[2] > 0) {
809                                 // --+
810                                 VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2])));
811                                 VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2])));
812                                 VECCOPY(q2, v2);
813                                 VECCOPY(q3, q2);
814                         }
815                         else if(sd[2] < 0) {
816                                 // ---
817                                 return 0;
818                         }
819                         else {
820                                 // --0
821                                 return 0;
822                         }
823                 }
824                 else {
825                         if(sd[2] > 0) {
826                                 // -0+
827                                 VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2])));
828                                 VECCOPY(q1, v1);
829                                 VECCOPY(q2, v2);
830                                 VECCOPY(q3, q2);
831                         }
832                         else if(sd[2] < 0) {
833                                 // -0-
834                                 return 0;
835                         }
836                         else {
837                                 // -00
838                                 return 0;
839                         }
840                 }
841         }
842         else {
843                 if(sd[1] > 0) {
844                         if(sd[2] > 0) {
845                                 // 0++
846                                 VECCOPY(q0, v0);
847                                 VECCOPY(q1, v1);
848                                 VECCOPY(q2, v2);
849                                 VECCOPY(q3, q2);
850                         }
851                         else if(sd[2] < 0) {
852                                 // 0+-
853                                 VECCOPY(q0, v0);
854                                 VECCOPY(q1, v1);
855                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
856                                 VECCOPY(q3, q2);
857                         }
858                         else {
859                                 // 0+0
860                                 VECCOPY(q0, v0);
861                                 VECCOPY(q1, v1);
862                                 VECCOPY(q2, v2);
863                                 VECCOPY(q3, q2);
864                         }
865                 }
866                 else if(sd[1] < 0) {
867                         if(sd[2] > 0) {
868                                 // 0-+
869                                 VECCOPY(q0, v0);
870                                 VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2])));
871                                 VECCOPY(q2, v2);
872                                 VECCOPY(q3, q2);
873                         }
874                         else if(sd[2] < 0) {
875                                 // 0--
876                                 return 0;
877                         }
878                         else {
879                                 // 0-0
880                                 return 0;
881                         }
882                 }
883                 else {
884                         if(sd[2] > 0) {
885                                 // 00+
886                                 VECCOPY(q0, v0);
887                                 VECCOPY(q1, v1);
888                                 VECCOPY(q2, v2);
889                                 VECCOPY(q3, q2);
890                         }
891                         else if(sd[2] < 0) {
892                                 // 00-
893                                 return 0;
894                         }
895                         else {
896                                 // 000
897                                 return 0;
898                         }
899                 }
900         }
901
902         return 1;
903 }
904
905 /* altivec optimization, this works, but is unused */
906
907 #if 0
908 #include <Accelerate/Accelerate.h>
909
910 typedef union {
911         vFloat v;
912         float f[4];
913 } vFloatResult;
914
915 static vFloat vec_splat_float(float val)
916 {
917         return (vFloat){val, val, val, val};
918 }
919
920 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
921 {
922         vFloat vcos, rlen, vrx, vry, vrz, vsrx, vsry, vsrz, gx, gy, gz, vangle;
923         vUInt8 rotate = (vUInt8){4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3};
924         vFloatResult vresult;
925         float result;
926
927         /* compute r* */
928         vrx = (vFloat){q0[0], q1[0], q2[0], q3[0]} - vec_splat_float(p[0]);
929         vry = (vFloat){q0[1], q1[1], q2[1], q3[1]} - vec_splat_float(p[1]);
930         vrz = (vFloat){q0[2], q1[2], q2[2], q3[2]} - vec_splat_float(p[2]);
931
932         /* normalize r* */
933         rlen = vec_rsqrte(vrx*vrx + vry*vry + vrz*vrz + vec_splat_float(1e-16f));
934         vrx = vrx*rlen;
935         vry = vry*rlen;
936         vrz = vrz*rlen;
937
938         /* rotate r* for cross and dot */
939         vsrx= vec_perm(vrx, vrx, rotate);
940         vsry= vec_perm(vry, vry, rotate);
941         vsrz= vec_perm(vrz, vrz, rotate);
942
943         /* cross product */
944         gx = vsry*vrz - vsrz*vry;
945         gy = vsrz*vrx - vsrx*vrz;
946         gz = vsrx*vry - vsry*vrx;
947
948         /* normalize */
949         rlen = vec_rsqrte(gx*gx + gy*gy + gz*gz + vec_splat_float(1e-16f));
950         gx = gx*rlen;
951         gy = gy*rlen;
952         gz = gz*rlen;
953
954         /* angle */
955         vcos = vrx*vsrx + vry*vsry + vrz*vsrz;
956         vcos= vec_max(vec_min(vcos, vec_splat_float(1.0f)), vec_splat_float(-1.0f));
957         vangle= vacosf(vcos);
958
959         /* dot */
960         vresult.v = (vec_splat_float(n[0])*gx +
961                      vec_splat_float(n[1])*gy +
962                      vec_splat_float(n[2])*gz)*vangle;
963
964         result= (vresult.f[0] + vresult.f[1] + vresult.f[2] + vresult.f[3])*(0.5f/(float)M_PI);
965         result= MAX2(result, 0.0f);
966
967         return result;
968 }
969
970 #endif
971
972 /* SSE optimization, acos code doesn't work */
973
974 #if 0
975
976 #include <xmmintrin.h>
977
978 static __m128 sse_approx_acos(__m128 x)
979 {
980         /* needs a better approximation than taylor expansion of acos, since that
981          * gives big erros for near 1.0 values, sqrt(2*x)*acos(1-x) should work
982          * better, see http://www.tom.womack.net/projects/sse-fast-arctrig.html */
983
984         return _mm_set_ps1(1.0f);
985 }
986
987 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
988 {
989         float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3];
990         float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result;
991         float fresult[4] __attribute__((aligned(16)));
992         __m128 qx, qy, qz, rx, ry, rz, rlen, srx, sry, srz, gx, gy, gz, glen, rcos, angle, aresult;
993
994         /* compute r */
995         qx = _mm_set_ps(q3[0], q2[0], q1[0], q0[0]);
996         qy = _mm_set_ps(q3[1], q2[1], q1[1], q0[1]);
997         qz = _mm_set_ps(q3[2], q2[2], q1[2], q0[2]);
998
999         rx = qx - _mm_set_ps1(p[0]);
1000         ry = qy - _mm_set_ps1(p[1]);
1001         rz = qz - _mm_set_ps1(p[2]);
1002
1003         /* normalize r */
1004         rlen = _mm_rsqrt_ps(rx*rx + ry*ry + rz*rz + _mm_set_ps1(1e-16f));
1005         rx = rx*rlen;
1006         ry = ry*rlen;
1007         rz = rz*rlen;
1008
1009         /* cross product */
1010         srx = _mm_shuffle_ps(rx, rx, _MM_SHUFFLE(0,3,2,1));
1011         sry = _mm_shuffle_ps(ry, ry, _MM_SHUFFLE(0,3,2,1));
1012         srz = _mm_shuffle_ps(rz, rz, _MM_SHUFFLE(0,3,2,1));
1013
1014         gx = sry*rz - srz*ry;
1015         gy = srz*rx - srx*rz;
1016         gz = srx*ry - sry*rx;
1017
1018         /* normalize g */
1019         glen = _mm_rsqrt_ps(gx*gx + gy*gy + gz*gz + _mm_set_ps1(1e-16f));
1020         gx = gx*glen;
1021         gy = gy*glen;
1022         gz = gz*glen;
1023
1024         /* compute angle */
1025         rcos = rx*srx + ry*sry + rz*srz;
1026         rcos= _mm_max_ps(_mm_min_ps(rcos, _mm_set_ps1(1.0f)), _mm_set_ps1(-1.0f));
1027
1028         angle = sse_approx_cos(rcos);
1029         aresult = (_mm_set_ps1(n[0])*gx + _mm_set_ps1(n[1])*gy + _mm_set_ps1(n[2])*gz)*angle;
1030
1031         /* sum together */
1032     result= (fresult[0] + fresult[1] + fresult[2] + fresult[3])*(0.5f/(float)M_PI);
1033         result= MAX2(result, 0.0f);
1034
1035         return result;
1036 }
1037
1038 #endif
1039
1040 static float saacosf(float fac)
1041 {
1042         if(fac<= -1.0f) return (float)M_PI;
1043         else if(fac>=1.0f) return 0.0f;
1044         else return acos(fac); /* acosf(fac) */
1045 }
1046
1047 static void normalizef(float *n)
1048 {
1049         float d;
1050         
1051         d= INPR(n, n);
1052
1053         if(d > 1.0e-35F) {
1054                 d= 1.0f/sqrt(d); /* sqrtf(d) */
1055
1056                 n[0] *= d; 
1057                 n[1] *= d; 
1058                 n[2] *= d;
1059         } 
1060 }
1061
1062 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
1063 {
1064         float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3];
1065         float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result;
1066
1067         VECSUB(r0, q0, p);
1068         VECSUB(r1, q1, p);
1069         VECSUB(r2, q2, p);
1070         VECSUB(r3, q3, p);
1071
1072         normalizef(r0);
1073         normalizef(r1);
1074         normalizef(r2);
1075         normalizef(r3);
1076
1077         Crossf(g0, r1, r0); normalizef(g0);
1078         Crossf(g1, r2, r1); normalizef(g1);
1079         Crossf(g2, r3, r2); normalizef(g2);
1080         Crossf(g3, r0, r3); normalizef(g3);
1081
1082         a1= saacosf(INPR(r0, r1));
1083         a2= saacosf(INPR(r1, r2));
1084         a3= saacosf(INPR(r2, r3));
1085         a4= saacosf(INPR(r3, r0));
1086
1087         dot1= INPR(n, g0);
1088         dot2= INPR(n, g1);
1089         dot3= INPR(n, g2);
1090         dot4= INPR(n, g3);
1091
1092         result= (a1*dot1 + a2*dot2 + a3*dot3 + a4*dot4)*0.5f/(float)M_PI;
1093         result= MAX2(result, 0.0f);
1094
1095         return result;
1096 }
1097
1098 float occ_form_factor(OccFace *face, float *p, float *n)
1099 {
1100         ObjectInstanceRen *obi;
1101         VlakRen *vlr;
1102         float v1[3], v2[3], v3[3], v4[3], q0[3], q1[3], q2[3], q3[3], contrib= 0.0f;
1103
1104         obi= &R.objectinstance[face->obi];
1105         vlr= RE_findOrAddVlak(obi->obr, face->facenr);
1106
1107         VECCOPY(v1, vlr->v1->co);
1108         VECCOPY(v2, vlr->v2->co);
1109         VECCOPY(v3, vlr->v3->co);
1110
1111         if(obi->flag & R_TRANSFORMED) {
1112                 Mat4MulVecfl(obi->mat, v1);
1113                 Mat4MulVecfl(obi->mat, v2);
1114                 Mat4MulVecfl(obi->mat, v3);
1115         }
1116
1117         if(occ_visible_quad(p, n, v1, v2, v3, q0, q1, q2, q3))
1118                 contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3);
1119
1120         if(vlr->v4) {
1121                 VECCOPY(v4, vlr->v4->co);
1122                 if(obi->flag & R_TRANSFORMED)
1123                         Mat4MulVecfl(obi->mat, v4);
1124
1125                 if(occ_visible_quad(p, n, v1, v3, v4, q0, q1, q2, q3))
1126                         contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3);
1127         }
1128
1129         return contrib;
1130 }
1131
1132 static void occ_lookup(OcclusionTree *tree, int thread, OccFace *exclude, float *pp, float *pn, float *occ, float *bentn)
1133 {
1134         OccNode *node, **stack;
1135         OccFace *face;
1136         float resultocc, v[3], p[3], n[3], co[3];
1137         float distfac, error, d2, weight, emitarea;
1138         int b, totstack;
1139
1140         /* init variables */
1141         VECCOPY(p, pp);
1142         VECCOPY(n, pn);
1143         VECADDFAC(p, p, n, 1e-4f);
1144
1145         if(bentn)
1146                 VECCOPY(bentn, n);
1147         
1148         error= tree->error;
1149         distfac= tree->distfac;
1150
1151         resultocc= 0.0f;
1152
1153         /* init stack */
1154         stack= tree->stack[thread];
1155         stack[0]= tree->root;
1156         totstack= 1;
1157
1158         while(totstack) {
1159                 /* pop point off the stack */
1160                 node= stack[--totstack];
1161
1162                 VECSUB(v, node->co, p);
1163                 d2= INPR(v, v) + 1e-16f;
1164                 emitarea= MAX2(node->area, node->dco);
1165
1166                 if(d2*error > emitarea) {
1167                         /* accumulate occlusion from spherical harmonics */
1168                         weight= occ_solid_angle(node, v, d2, n);
1169                         weight *= node->occlusion;
1170
1171                         if(bentn) {
1172                                 Normalize(v);
1173                                 bentn[0] -= weight*v[0];
1174                                 bentn[1] -= weight*v[1];
1175                                 bentn[2] -= weight*v[2];
1176                         }
1177
1178                         if(distfac != 0.0f)
1179                                 weight /= (1.0 + distfac*d2);
1180
1181                         resultocc += weight;
1182                 }
1183                 else {
1184                         /* traverse into children */
1185                         for(b=0; b<TOTCHILD; b++) {
1186                                 if(node->childflag & (1<<b)) {
1187                                         face= tree->face+node->child[b].face;
1188
1189                                         /* accumulate occlusion with face form factor */
1190                                         if(!exclude || !(face->obi == exclude->obi && face->facenr == exclude->facenr)) {
1191                                                 weight= occ_form_factor(face, p, n);
1192                                                 weight *= tree->occlusion[node->child[b].face];
1193
1194                                                 if(bentn || distfac != 0.0f) {
1195                                                         occ_face(face, co, NULL, NULL); 
1196                                                         VECSUB(v, co, p);
1197
1198                                                         if(bentn) {
1199                                                                 Normalize(v);
1200                                                                 bentn[0] -= weight*v[0];
1201                                                                 bentn[1] -= weight*v[1];
1202                                                                 bentn[2] -= weight*v[2];
1203                                                         }
1204
1205                                                         if(distfac != 0.0f) {
1206                                                                 d2= INPR(v, v) + 1e-16f;
1207                                                                 weight /= (1.0 + distfac*d2);
1208                                                         }
1209                                                 }
1210
1211                                                 resultocc += weight;
1212
1213                                         }
1214                                 }
1215                                 else if(node->child[b].node) {
1216                                         /* push child on the stack */
1217                                         stack[totstack++]= node->child[b].node;
1218                                 }
1219                         }
1220                 }
1221         }
1222
1223         if(occ) *occ= resultocc;
1224 }
1225
1226 static void occ_compute_passes(Render *re, OcclusionTree *tree, int totpass)
1227 {
1228         float *occ, co[3], n[3];
1229         int pass, i;
1230         
1231         occ= MEM_callocN(sizeof(float)*tree->totface, "OcclusionPassOcc");
1232
1233         for(pass=0; pass<totpass; pass++) {
1234                 for(i=0; i<tree->totface; i++) {
1235                         occ_face(&tree->face[i], co, n, NULL);
1236                         VecMulf(n, -1.0f);
1237                         VECADDFAC(co, co, n, 1e-8f);
1238
1239                         occ_lookup(tree, 0, &tree->face[i], co, n, &occ[i], NULL);
1240                         if(re->test_break())
1241                                 break;
1242                 }
1243
1244                 if(re->test_break())
1245                         break;
1246
1247                 for(i=0; i<tree->totface; i++) {
1248                         tree->occlusion[i] -= occ[i]; //MAX2(1.0f-occ[i], 0.0f);
1249                         if(tree->occlusion[i] < 0.0f)
1250                                 tree->occlusion[i]= 0.0f;
1251                 }
1252
1253                 occ_sum_occlusion(tree, tree->root);
1254         }
1255
1256         MEM_freeN(occ);
1257 }
1258
1259 static void sample_occ_tree(Render *re, OcclusionTree *tree, OccFace *exclude, float *co, float *n, int thread, int onlyshadow, float *skycol)
1260 {
1261         float nn[3], bn[3], dxyview[3], fac, occ, occlusion, correction;
1262         int aocolor;
1263
1264         aocolor= re->wrld.aocolor;
1265         if(onlyshadow)
1266                 aocolor= WO_AOPLAIN;
1267
1268         VECCOPY(nn, n);
1269         VecMulf(nn, -1.0f);
1270
1271         occ_lookup(tree, thread, exclude, co, nn, &occ, (aocolor)? bn: NULL);
1272
1273         correction= re->wrld.ao_approx_correction;
1274
1275         occlusion= (1.0f-correction)*(1.0f-occ);
1276         CLAMP(occlusion, 0.0f, 1.0f);
1277         if(correction != 0.0f)
1278                 occlusion += correction*exp(-occ);
1279
1280         if(aocolor) {
1281                 /* sky shading using bent normal */
1282                 if(aocolor==WO_AOSKYCOL) {
1283                         fac= 0.5*(1.0f+bn[0]*re->grvec[0]+ bn[1]*re->grvec[1]+ bn[2]*re->grvec[2]);
1284                         skycol[0]= (1.0f-fac)*re->wrld.horr + fac*re->wrld.zenr;
1285                         skycol[1]= (1.0f-fac)*re->wrld.horg + fac*re->wrld.zeng;
1286                         skycol[2]= (1.0f-fac)*re->wrld.horb + fac*re->wrld.zenb;
1287                 }
1288                 else {  /* WO_AOSKYTEX */
1289                         dxyview[0]= 1.0f;
1290                         dxyview[1]= 1.0f;
1291                         dxyview[2]= 0.0f;
1292                         shadeSkyView(skycol, co, bn, dxyview);
1293                 }
1294
1295                 VecMulf(skycol, occlusion);
1296         }
1297         else {
1298                 skycol[0]= occlusion;
1299                 skycol[1]= occlusion;
1300                 skycol[2]= occlusion;
1301         }
1302 }
1303
1304 /* ---------------------------- Caching ------------------------------- */
1305
1306 static OcclusionCacheSample *find_occ_sample(OcclusionCache *cache, int x, int y)
1307 {
1308         x -= cache->x;
1309         y -= cache->y;
1310
1311         x /= cache->step;
1312         y /= cache->step;
1313         x *= cache->step;
1314         y *= cache->step;
1315
1316         if(x < 0 || x >= cache->w || y < 0 || y >= cache->h)
1317                 return NULL;
1318         else
1319                 return &cache->sample[y*cache->w + x];
1320 }
1321
1322 static int sample_occ_cache(OcclusionTree *tree, float *co, float *n, int x, int y, int thread, float *col)
1323 {
1324         OcclusionCache *cache;
1325         OcclusionCacheSample *samples[4], *sample;
1326         float wn[4], wz[4], wb[4], tx, ty, w, totw, mino, maxo;
1327         float d[3], dist2;
1328         int i, x1, y1, x2, y2;
1329
1330         if(!tree->cache)
1331                 return 0;
1332         
1333         /* first try to find a sample in the same pixel */
1334         cache= &tree->cache[thread];
1335
1336         if(cache->sample && cache->step) {
1337                 sample= &cache->sample[(y-cache->y)*cache->w + (x-cache->x)];
1338                 if(sample->filled) {
1339                         VECSUB(d, sample->co, co);
1340                         dist2= INPR(d, d);
1341                         if(dist2 < 0.5f*sample->dist2 && INPR(sample->n, n) > 0.98f) {
1342                                 VECCOPY(col, sample->col);
1343                                 return 1;
1344                         }
1345                 }
1346         }
1347         else
1348                 return 0;
1349
1350         /* try to interpolate between 4 neighbouring pixels */
1351         samples[0]= find_occ_sample(cache, x, y);
1352         samples[1]= find_occ_sample(cache, x+cache->step, y);
1353         samples[2]= find_occ_sample(cache, x, y+cache->step);
1354         samples[3]= find_occ_sample(cache, x+cache->step, y+cache->step);
1355
1356         for(i=0; i<4; i++)
1357                 if(!samples[i] || !samples[i]->filled)
1358                         return 0;
1359
1360         /* require intensities not being too different */
1361         mino= MIN4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
1362         maxo= MAX4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
1363
1364         if(maxo - mino > 0.05f)
1365                 return 0;
1366
1367         /* compute weighted interpolation between samples */
1368         col[0]= col[1]= col[2]= 0.0f;
1369         totw= 0.0f;
1370
1371         x1= samples[0]->x;
1372         y1= samples[0]->y;
1373         x2= samples[3]->x;
1374         y2= samples[3]->y;
1375
1376         tx= (float)(x2 - x)/(float)(x2 - x1);
1377         ty= (float)(y2 - y)/(float)(y2 - y1);
1378
1379         wb[3]= (1.0f-tx)*(1.0f-ty);
1380         wb[2]= (tx)*(1.0f-ty);
1381         wb[1]= (1.0f-tx)*(ty);
1382         wb[0]= tx*ty;
1383
1384         for(i=0; i<4; i++) {
1385                 VECSUB(d, samples[i]->co, co);
1386                 dist2= INPR(d, d);
1387
1388                 wz[i]= 1.0f; //(samples[i]->dist2/(1e-4f + dist2));
1389                 wn[i]= pow(INPR(samples[i]->n, n), 32.0f);
1390
1391                 w= wb[i]*wn[i]*wz[i];
1392
1393                 totw += w;
1394                 col[0] += w*samples[i]->col[0];
1395                 col[1] += w*samples[i]->col[1];
1396                 col[2] += w*samples[i]->col[2];
1397         }
1398
1399         if(totw >= 0.9f) {
1400                 totw= 1.0f/totw;
1401                 col[0] *= totw;
1402                 col[1] *= totw;
1403                 col[2] *= totw;
1404                 return 1;
1405         }
1406
1407         return 0;
1408 }
1409
1410 static void sample_occ_cache_mesh(ShadeInput *shi)
1411 {
1412         StrandRen *strand= shi->strand;
1413         OcclusionCacheMesh *mesh= strand->buffer->occlusionmesh;
1414         int *face, *index = RE_strandren_get_face(shi->obr, strand, 0);
1415         float w[4], *co1, *co2, *co3, *co4;
1416
1417         if(mesh && index) {
1418                 face= mesh->face[*index];
1419
1420                 co1= mesh->co[face[0]];
1421                 co2= mesh->co[face[1]];
1422                 co3= mesh->co[face[2]];
1423                 co4= (face[3])? mesh->co[face[3]]: NULL;
1424
1425                 InterpWeightsQ3Dfl(co1, co2, co3, co4, strand->vert->co, w);
1426
1427                 shi->ao[0]= shi->ao[1]= shi->ao[2]= 0.0f;
1428                 VECADDFAC(shi->ao, shi->ao, mesh->col[face[0]], w[0]);
1429                 VECADDFAC(shi->ao, shi->ao, mesh->col[face[1]], w[1]);
1430                 VECADDFAC(shi->ao, shi->ao, mesh->col[face[2]], w[2]);
1431                 if(face[3])
1432                         VECADDFAC(shi->ao, shi->ao, mesh->col[face[3]], w[3]);
1433         }
1434         else {
1435                 shi->ao[0]= 1.0f;
1436                 shi->ao[1]= 1.0f;
1437                 shi->ao[2]= 1.0f;
1438         }
1439 }
1440
1441 void *cache_occ_mesh(Render *re, ObjectRen *obr, DerivedMesh *dm, float mat[][4])
1442 {
1443         OcclusionCacheMesh *mesh;
1444         MFace *mface;
1445         MVert *mvert;
1446         int a, totvert, totface;
1447
1448         totvert= dm->getNumVerts(dm);
1449         totface= dm->getNumFaces(dm);
1450
1451         mesh= re->occlusionmesh.last;
1452         if(mesh && mesh->obr->ob == obr->ob && mesh->obr->par == obr->par
1453                 && mesh->totvert==totvert && mesh->totface==totface)
1454                 return mesh;
1455
1456         mesh= MEM_callocN(sizeof(OcclusionCacheMesh), "OcclusionCacheMesh");
1457         mesh->obr= obr;
1458         mesh->totvert= totvert;
1459         mesh->totface= totface;
1460         mesh->co= MEM_callocN(sizeof(float)*3*mesh->totvert, "OcclusionMeshCo");
1461         mesh->face= MEM_callocN(sizeof(int)*4*mesh->totface, "OcclusionMeshFaces");
1462         mesh->col= MEM_callocN(sizeof(float)*3*mesh->totvert, "OcclusionMeshCol");
1463
1464         mvert= dm->getVertArray(dm);
1465         for(a=0; a<mesh->totvert; a++, mvert++) {
1466                 VECCOPY(mesh->co[a], mvert->co);
1467                 Mat4MulVecfl(mat, mesh->co[a]);
1468         }
1469
1470         mface= dm->getFaceArray(dm);
1471         for(a=0; a<mesh->totface; a++, mface++) {
1472                 mesh->face[a][0]= mface->v1;
1473                 mesh->face[a][1]= mface->v2;
1474                 mesh->face[a][2]= mface->v3;
1475                 mesh->face[a][3]= mface->v4;
1476         }
1477
1478         BLI_addtail(&re->occlusionmesh, mesh);
1479
1480         return mesh;
1481 }
1482
1483 /* ------------------------- External Functions --------------------------- */
1484
1485 void make_occ_tree(Render *re)
1486 {
1487         OcclusionCacheMesh *mesh;
1488         float col[3], co[3], n[3], *co1, *co2, *co3, *co4;
1489         int a, *face, *count;
1490
1491         /* ugly, needed for occ_face */
1492         R= *re;
1493
1494         re->i.infostr= "Occlusion preprocessing";
1495         re->stats_draw(&re->i);
1496         
1497         re->occlusiontree= occ_tree_build(re);
1498         
1499         if(re->occlusiontree) {
1500                 if(re->wrld.ao_approx_passes)
1501                         occ_compute_passes(re, re->occlusiontree, re->wrld.ao_approx_passes);
1502
1503                 for(mesh=re->occlusionmesh.first; mesh; mesh=mesh->next) {
1504                         count= MEM_callocN(sizeof(int)*mesh->totvert, "OcclusionCount");
1505
1506                         for(a=0; a<mesh->totface; a++) {
1507                                 face= mesh->face[a];
1508                                 co1= mesh->co[face[0]];
1509                                 co2= mesh->co[face[1]];
1510                                 co3= mesh->co[face[2]];
1511
1512                                 if(face[3]) {
1513                                         co4= mesh->co[face[3]];
1514
1515                                         VecLerpf(co, co1, co3, 0.5f);
1516                                         CalcNormFloat4(co1, co2, co3, co4, n);
1517                                 }
1518                                 else {
1519                                         CalcCent3f(co, co1, co2, co3);
1520                                         CalcNormFloat(co1, co2, co3, n);
1521                                 }
1522                                 VecMulf(n, -1.0f);
1523
1524                                 sample_occ_tree(re, re->occlusiontree, NULL, co, n, 0, 0, col);
1525
1526                                 VECADD(mesh->col[face[0]], mesh->col[face[0]], col);
1527                                 count[face[0]]++;
1528                                 VECADD(mesh->col[face[1]], mesh->col[face[1]], col);
1529                                 count[face[1]]++;
1530                                 VECADD(mesh->col[face[2]], mesh->col[face[2]], col);
1531                                 count[face[2]]++;
1532
1533                                 if(face[3]) {
1534                                         VECADD(mesh->col[face[3]], mesh->col[face[3]], col);
1535                                         count[face[3]]++;
1536                                 }
1537                         }
1538
1539                         for(a=0; a<mesh->totvert; a++)
1540                                 if(count[a])
1541                                         VecMulf(mesh->col[a], 1.0f/count[a]);
1542
1543                         MEM_freeN(count);
1544                 }
1545         }
1546 }
1547
1548 void free_occ(Render *re)
1549 {
1550         OcclusionCacheMesh *mesh;
1551
1552         if(re->occlusiontree) {
1553                 occ_free_tree(re->occlusiontree);
1554                 re->occlusiontree = NULL;
1555         }
1556
1557         for(mesh=re->occlusionmesh.first; mesh; mesh=mesh->next) {
1558                 if(mesh->co) MEM_freeN(mesh->co);
1559                 if(mesh->col) MEM_freeN(mesh->col);
1560                 if(mesh->face) MEM_freeN(mesh->face);
1561         }
1562
1563         BLI_freelistN(&re->occlusionmesh);
1564 }
1565
1566 void sample_occ(Render *re, ShadeInput *shi)
1567 {
1568         OcclusionTree *tree= re->occlusiontree;
1569         OcclusionCache *cache;
1570         OcclusionCacheSample *sample;
1571         OccFace exclude;
1572         int onlyshadow;
1573
1574         if(tree) {
1575                 if(shi->strand) {
1576                         sample_occ_cache_mesh(shi);
1577                 }
1578                 /* try to get result from the cache if possible */
1579                 else if(!sample_occ_cache(tree, shi->co, shi->vno, shi->xs, shi->ys, shi->thread, shi->ao)) {
1580                         /* no luck, let's sample the occlusion */
1581                         exclude.obi= shi->obi - re->objectinstance;
1582                         exclude.facenr= shi->vlr->index;
1583                         onlyshadow= (shi->mat->mode & MA_ONLYSHADOW);
1584                         sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao);
1585
1586                         if(G.rt & 32)
1587                                 shi->ao[2] *= 2.0f;
1588
1589                         /* fill result into sample, each time */
1590                         if(tree->cache) {
1591                                 cache= &tree->cache[shi->thread];
1592
1593                                 if(cache->sample && cache->step) {
1594                                         sample= &cache->sample[(shi->ys-cache->y)*cache->w + (shi->xs-cache->x)];
1595                                         VECCOPY(sample->co, shi->co);
1596                                         VECCOPY(sample->n, shi->vno);
1597                                         VECCOPY(sample->col, shi->ao);
1598                                         sample->intensity= MAX3(sample->col[0], sample->col[1], sample->col[2]);
1599                                         sample->dist2= INPR(shi->dxco, shi->dxco) + INPR(shi->dyco, shi->dyco);
1600                                         sample->filled= 1;
1601                                 }
1602                         }
1603                 }
1604         }
1605         else {
1606                 shi->ao[0]= 1.0f;
1607                 shi->ao[1]= 1.0f;
1608                 shi->ao[2]= 1.0f;
1609         }
1610 }
1611
1612 void cache_occ_samples(Render *re, RenderPart *pa, ShadeSample *ssamp)
1613 {
1614         OcclusionTree *tree= re->occlusiontree;
1615         PixStr ps;
1616         OcclusionCache *cache;
1617         OcclusionCacheSample *sample;
1618         OccFace exclude;
1619         ShadeInput *shi;
1620         int *ro, *rp, *rz, onlyshadow;
1621         int x, y, step = CACHE_STEP;
1622
1623         if(!tree->cache)
1624                 return;
1625
1626         cache= &tree->cache[pa->thread];
1627         cache->w= pa->rectx;
1628         cache->h= pa->recty;
1629         cache->x= pa->disprect.xmin;
1630         cache->y= pa->disprect.ymin;
1631         cache->step= step;
1632         cache->sample= MEM_callocN(sizeof(OcclusionCacheSample)*cache->w*cache->h, "OcclusionCacheSample");
1633         sample= cache->sample;
1634
1635         ps.next= NULL;
1636         ps.mask= 0xFFFF;
1637
1638         ro= pa->recto;
1639         rp= pa->rectp;
1640         rz= pa->rectz;
1641
1642         /* compute a sample at every step pixels */
1643         for(y=pa->disprect.ymin; y<pa->disprect.ymax; y++) {
1644                 for(x=pa->disprect.xmin; x<pa->disprect.xmax; x++, ro++, rp++, rz++, sample++) {
1645                         if((((x - pa->disprect.xmin + step) % step) == 0 || x == pa->disprect.xmax-1) && (((y - pa->disprect.ymin + step) % step) == 0 || y == pa->disprect.ymax-1)) {
1646                                 if(*rp) {
1647                                         ps.obi= *ro;
1648                                         ps.facenr= *rp;
1649                                         ps.z= *rz;
1650
1651                                         shade_samples_fill_with_ps(ssamp, &ps, x, y);
1652                                         shi= ssamp->shi;
1653                                         if(!shi->vlr)
1654                                                 continue;
1655
1656                                         onlyshadow= (shi->mat->mode & MA_ONLYSHADOW);
1657                                         exclude.obi= shi->obi - re->objectinstance;
1658                                         exclude.facenr= shi->vlr->index;
1659                                         sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao);
1660
1661                                         VECCOPY(sample->co, shi->co);
1662                                         VECCOPY(sample->n, shi->vno);
1663                                         VECCOPY(sample->col, shi->ao);
1664                                         sample->intensity= MAX3(sample->col[0], sample->col[1], sample->col[2]);
1665                                         sample->dist2= INPR(shi->dxco, shi->dxco) + INPR(shi->dyco, shi->dyco);
1666                                         sample->x= shi->xs;
1667                                         sample->y= shi->ys;
1668                                         sample->filled= 1;
1669                                 }
1670
1671                                 if(re->test_break())
1672                                         break;
1673                         }
1674                 }
1675         }
1676 }
1677
1678 void free_occ_samples(Render *re, RenderPart *pa)
1679 {
1680         OcclusionTree *tree= re->occlusiontree;
1681         OcclusionCache *cache;
1682
1683         if(tree->cache) {
1684                 cache= &tree->cache[pa->thread];
1685
1686                 if(cache->sample)
1687                         MEM_freeN(cache->sample);
1688
1689                 cache->w= 0;
1690                 cache->h= 0;
1691                 cache->step= 0;
1692         }
1693 }
1694