replace macros VECCOPY and QUATCOPY with inline math functions no functional changes
[blender.git] / source / blender / render / intern / source / occlusion.c
1 /* 
2  * $Id$
3  *
4  * ***** BEGIN GPL LICENSE BLOCK *****
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software Foundation,
18  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19  *
20  * The Original Code is Copyright (C) 2008 Blender Foundation.
21  * All rights reserved.
22  *
23  * The Original Code is: all of this file.
24  *
25  * Contributor(s): Brecht Van Lommel.
26  *
27  * ***** END GPL LICENSE BLOCK *****
28  */
29
30 #include <math.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34
35 #include "MEM_guardedalloc.h"
36
37 #include "DNA_material_types.h"
38
39 #include "BLI_math.h"
40 #include "BLI_blenlib.h"
41 #include "BLI_memarena.h"
42 #include "BLI_threads.h"
43
44 #include "BKE_global.h"
45 #include "BKE_scene.h"
46 #include "BKE_utildefines.h"
47
48 #include "RE_shader_ext.h"
49
50 /* local includes */
51 #include "occlusion.h"
52 #include "render_types.h"
53 #include "rendercore.h"
54 #include "renderdatabase.h"
55 #include "pixelshading.h"
56 #include "shading.h"
57 #include "zbuf.h"
58
59 /* ------------------------- Declarations --------------------------- */
60
61 #define INVALID_INDEX ((int)(~0))
62 #define INVPI 0.31830988618379069f
63 #define TOTCHILD 8
64 #define CACHE_STEP 3
65
66 typedef struct OcclusionCacheSample {
67         float co[3], n[3], ao[3], env[3], indirect[3], intensity, dist2;
68         int x, y, filled;
69 } OcclusionCacheSample;
70
71 typedef struct OcclusionCache {
72         OcclusionCacheSample *sample;
73         int x, y, w, h, step;
74 } OcclusionCache;
75
76 typedef struct OccFace {
77         int obi;
78         int facenr;
79 } OccFace;
80
81 typedef struct OccNode {
82         float co[3], area;
83         float sh[9], dco;
84         float occlusion, rad[3];
85         int childflag;
86         union {
87                 //OccFace face;
88                 int face;
89                 struct OccNode *node;
90         } child[TOTCHILD];
91 } OccNode;
92
93 typedef struct OcclusionTree {
94         MemArena *arena;
95
96         float (*co)[3];         /* temporary during build */
97
98         OccFace *face;          /* instance and face indices */
99         float *occlusion;       /* occlusion for faces */
100         float (*rad)[3];        /* radiance for faces */
101         
102         OccNode *root;
103
104         OccNode **stack[BLENDER_MAX_THREADS];
105         int maxdepth;
106
107         int totface;
108
109         float error;
110         float distfac;
111
112         int dothreadedbuild;
113         int totbuildthread;
114         int doindirect;
115
116         OcclusionCache *cache;
117 } OcclusionTree;
118
119 typedef struct OcclusionThread {
120         Render *re;
121         StrandSurface *mesh;
122         float (*faceao)[3];
123         float (*faceenv)[3];
124         float (*faceindirect)[3];
125         int begin, end;
126         int thread;
127 } OcclusionThread;
128
129 typedef struct OcclusionBuildThread {
130         OcclusionTree *tree;
131         int begin, end, depth;
132         OccNode *node;
133 } OcclusionBuildThread;
134
135 /* ------------------------- Shading --------------------------- */
136
137 extern Render R; // meh
138
139 static void occ_shade(ShadeSample *ssamp, ObjectInstanceRen *obi, VlakRen *vlr, float *rad)
140 {
141         ShadeInput *shi= ssamp->shi;
142         ShadeResult *shr= ssamp->shr;
143         float l, u, v, *v1, *v2, *v3;
144         
145         /* init */
146         if(vlr->v4) {
147                 shi->u= u= 0.5f;
148                 shi->v= v= 0.5f;
149         }
150         else {
151                 shi->u= u= 1.0f/3.0f;
152                 shi->v= v= 1.0f/3.0f;
153         }
154
155         /* setup render coordinates */
156         v1= vlr->v1->co;
157         v2= vlr->v2->co;
158         v3= vlr->v3->co;
159         
160         /* renderco */
161         l= 1.0f-u-v;
162         
163         shi->co[0]= l*v3[0]+u*v1[0]+v*v2[0];
164         shi->co[1]= l*v3[1]+u*v1[1]+v*v2[1];
165         shi->co[2]= l*v3[2]+u*v1[2]+v*v2[2];
166
167         shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
168
169         /* set up view vector */
170         VECCOPY(shi->view, shi->co);
171         normalize_v3(shi->view);
172         
173         /* cache for shadow */
174         shi->samplenr++;
175         
176         shi->xs= 0; // TODO
177         shi->ys= 0;
178         
179         shade_input_set_normals(shi);
180
181         /* no normal flip */
182         if(shi->flippednor)
183                 shade_input_flip_normals(shi);
184
185         madd_v3_v3fl(shi->co, shi->vn, 0.0001f); /* ugly.. */
186
187         /* not a pretty solution, but fixes common cases */
188         if(shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) {
189                 negate_v3(shi->vn);
190                 negate_v3(shi->vno);
191         }
192
193         /* init material vars */
194         // note, keep this synced with render_types.h
195         memcpy(&shi->r, &shi->mat->r, 23*sizeof(float));
196         shi->har= shi->mat->har;
197         
198         /* render */
199         shade_input_set_shade_texco(shi);
200         shade_material_loop(shi, shr); /* todo: nodes */
201         
202         VECCOPY(rad, shr->combined);
203 }
204
205 static void occ_build_shade(Render *re, OcclusionTree *tree)
206 {
207         ShadeSample ssamp;
208         ObjectInstanceRen *obi;
209         VlakRen *vlr;
210         int a;
211
212         R= *re;
213
214         /* setup shade sample with correct passes */
215         memset(&ssamp, 0, sizeof(ShadeSample));
216         ssamp.shi[0].lay= re->lay;
217         ssamp.shi[0].passflag= SCE_PASS_DIFFUSE|SCE_PASS_RGBA;
218         ssamp.shi[0].combinedflag= ~(SCE_PASS_SPEC);
219         ssamp.tot= 1;
220
221         for(a=0; a<tree->totface; a++) {
222                 obi= &R.objectinstance[tree->face[a].obi];
223                 vlr= RE_findOrAddVlak(obi->obr, tree->face[a].facenr);
224
225                 occ_shade(&ssamp, obi, vlr, tree->rad[a]);
226         }
227 }
228
229 /* ------------------------- Spherical Harmonics --------------------------- */
230
231 /* Use 2nd order SH => 9 coefficients, stored in this order:
232    0 = (0,0),
233    1 = (1,-1), 2 = (1,0), 3 = (1,1),
234    4 = (2,-2), 5 = (2,-1), 6 = (2,0), 7 = (2,1), 8 = (2,2) */
235
236 static void sh_copy(float *shresult, float *sh)
237 {
238         memcpy(shresult, sh, sizeof(float)*9);
239 }
240
241 static void sh_mul(float *sh, float f)
242 {
243         int i;
244
245         for(i=0; i<9; i++)
246                 sh[i] *= f;
247 }
248
249 static void sh_add(float *shresult, float *sh1, float *sh2)
250 {
251         int i;
252
253         for(i=0; i<9; i++)
254                 shresult[i]= sh1[i] + sh2[i];
255 }
256
257 static void sh_from_disc(float *n, float area, float *shresult)
258 {
259         /* See formula (3) in:
260            "An Efficient Representation for Irradiance Environment Maps" */
261         float sh[9], x, y, z;
262
263         x= n[0];
264         y= n[1];
265         z= n[2];
266
267         sh[0]= 0.282095f;
268
269         sh[1]= 0.488603f*y;
270         sh[2]= 0.488603f*z;
271         sh[3]= 0.488603f*x;
272         
273         sh[4]= 1.092548f*x*y;
274         sh[5]= 1.092548f*y*z;
275         sh[6]= 0.315392f*(3.0f*z*z - 1.0f);
276         sh[7]= 1.092548f*x*z;
277         sh[8]= 0.546274f*(x*x - y*y);
278
279         sh_mul(sh, area);
280         sh_copy(shresult, sh);
281 }
282
283 static float sh_eval(float *sh, float *v)
284 {
285         /* See formula (13) in:
286            "An Efficient Representation for Irradiance Environment Maps" */
287         static const float c1 = 0.429043f, c2 = 0.511664f, c3 = 0.743125f;
288         static const float c4 = 0.886227f, c5 = 0.247708f;
289         float x, y, z, sum;
290
291         x= v[0];
292         y= v[1];
293         z= v[2];
294
295         sum= c1*sh[8]*(x*x - y*y);
296         sum += c3*sh[6]*z*z;
297         sum += c4*sh[0];
298         sum += -c5*sh[6];
299         sum += 2.0f*c1*(sh[4]*x*y + sh[7]*x*z + sh[5]*y*z);
300         sum += 2.0f*c2*(sh[3]*x + sh[1]*y + sh[2]*z);
301
302         return sum;
303 }
304
305 /* ------------------------------ Building --------------------------------- */
306
307 static void occ_face(const OccFace *face, float *co, float *normal, float *area)
308 {
309         ObjectInstanceRen *obi;
310         VlakRen *vlr;
311         float v1[3], v2[3], v3[3], v4[3];
312
313         obi= &R.objectinstance[face->obi];
314         vlr= RE_findOrAddVlak(obi->obr, face->facenr);
315         
316         if(co) {
317                 if(vlr->v4)
318                         interp_v3_v3v3(co, vlr->v1->co, vlr->v3->co, 0.5f);
319                 else
320                         cent_tri_v3(co, vlr->v1->co, vlr->v2->co, vlr->v3->co);
321
322                 if(obi->flag & R_TRANSFORMED)
323                         mul_m4_v3(obi->mat, co);
324         }
325         
326         if(normal) {
327                 normal[0]= -vlr->n[0];
328                 normal[1]= -vlr->n[1];
329                 normal[2]= -vlr->n[2];
330
331                 if(obi->flag & R_TRANSFORMED)
332                         mul_m3_v3(obi->nmat, normal);
333         }
334
335         if(area) {
336                 VECCOPY(v1, vlr->v1->co);
337                 VECCOPY(v2, vlr->v2->co);
338                 VECCOPY(v3, vlr->v3->co);
339                 if(vlr->v4) VECCOPY(v4, vlr->v4->co);
340
341                 if(obi->flag & R_TRANSFORMED) {
342                         mul_m4_v3(obi->mat, v1);
343                         mul_m4_v3(obi->mat, v2);
344                         mul_m4_v3(obi->mat, v3);
345                         if(vlr->v4) mul_m4_v3(obi->mat, v4);
346                 }
347
348                 /* todo: correct area for instances */
349                 if(vlr->v4)
350                         *area= area_quad_v3(v1, v2, v3, v4);
351                 else
352                         *area= area_tri_v3(v1, v2, v3);
353         }
354 }
355
356 static void occ_sum_occlusion(OcclusionTree *tree, OccNode *node)
357 {
358         OccNode *child;
359         float occ, area, totarea, rad[3];
360         int a, b, indirect= tree->doindirect;
361
362         occ= 0.0f;
363         totarea= 0.0f;
364         if(indirect) zero_v3(rad);
365
366         for(b=0; b<TOTCHILD; b++) {
367                 if(node->childflag & (1<<b)) {
368                         a= node->child[b].face;
369                         occ_face(&tree->face[a], 0, 0, &area);
370                         occ += area*tree->occlusion[a];
371                         if(indirect) madd_v3_v3fl(rad, tree->rad[a], area);
372                         totarea += area;
373                 }
374                 else if(node->child[b].node) {
375                         child= node->child[b].node;
376                         occ_sum_occlusion(tree, child);
377
378                         occ += child->area*child->occlusion;
379                         if(indirect) madd_v3_v3fl(rad, child->rad, child->area);
380                         totarea += child->area;
381                 }
382         }
383
384         if(totarea != 0.0f) {
385                 occ /= totarea;
386                 if(indirect) mul_v3_fl(rad, 1.0f/totarea);
387         }
388         
389         node->occlusion= occ;
390         if(indirect) copy_v3_v3(node->rad, rad);
391 }
392
393 static int occ_find_bbox_axis(OcclusionTree *tree, int begin, int end, float *min, float *max)
394 {
395         float len, maxlen= -1.0f;
396         int a, axis = 0;
397
398         INIT_MINMAX(min, max);
399
400         for(a=begin; a<end; a++)
401                 DO_MINMAX(tree->co[a], min, max)
402
403         for(a=0; a<3; a++) {
404                 len= max[a] - min[a];
405
406                 if(len > maxlen) {
407                         maxlen= len;
408                         axis= a;
409                 }
410         }
411
412         return axis;
413 }
414
415 static void occ_node_from_face(OccFace *face, OccNode *node)
416 {
417         float n[3];
418
419         occ_face(face, node->co, n, &node->area);
420         node->dco= 0.0f;
421         sh_from_disc(n, node->area, node->sh);
422 }
423
424 static void occ_build_dco(OcclusionTree *tree, OccNode *node, float *co, float *dco)
425 {
426         OccNode *child;
427         float dist, d[3], nco[3];
428         int b;
429
430         for(b=0; b<TOTCHILD; b++) {
431                 if(node->childflag & (1<<b)) {
432                         occ_face(tree->face+node->child[b].face, nco, 0, 0);
433                 }
434                 else if(node->child[b].node) {
435                         child= node->child[b].node;
436                         occ_build_dco(tree, child, co, dco);
437                         VECCOPY(nco, child->co);
438                 }
439
440                 VECSUB(d, nco, co);
441                 dist= INPR(d, d);
442                 if(dist > *dco)
443                         *dco= dist;
444         }
445 }
446
447 static void occ_build_split(OcclusionTree *tree, int begin, int end, int *split)
448 {
449         float min[3], max[3], mid;
450         int axis, a, enda;
451
452         /* split in middle of boundbox. this seems faster than median split
453          * on complex scenes, possibly since it avoids two distant faces to
454          * be in the same node better? */
455         axis= occ_find_bbox_axis(tree, begin, end, min, max);
456         mid= 0.5f*(min[axis]+max[axis]);
457
458         a= begin;
459         enda= end;
460         while(a<enda) {
461                 if(tree->co[a][axis] > mid) {
462                         enda--;
463                         SWAP(OccFace, tree->face[a], tree->face[enda]);
464                         SWAP(float, tree->co[a][0], tree->co[enda][0]);
465                         SWAP(float, tree->co[a][1], tree->co[enda][1]);
466                         SWAP(float, tree->co[a][2], tree->co[enda][2]);
467                 }
468                 else
469                         a++;
470         }
471
472         *split= enda;
473 }
474
475 static void occ_build_8_split(OcclusionTree *tree, int begin, int end, int *offset, int *count)
476 {
477         /* split faces into eight groups */
478         int b, splitx, splity[2], splitz[4];
479
480         occ_build_split(tree, begin, end, &splitx);
481
482         /* force split if none found, to deal with degenerate geometry */
483         if(splitx == begin || splitx == end)
484                 splitx= (begin+end)/2;
485
486         occ_build_split(tree, begin, splitx, &splity[0]);
487         occ_build_split(tree, splitx, end, &splity[1]);
488
489         occ_build_split(tree, begin, splity[0], &splitz[0]);
490         occ_build_split(tree, splity[0], splitx, &splitz[1]);
491         occ_build_split(tree, splitx, splity[1], &splitz[2]);
492         occ_build_split(tree, splity[1], end, &splitz[3]);
493
494         offset[0]= begin;
495         offset[1]= splitz[0];
496         offset[2]= splity[0];
497         offset[3]= splitz[1];
498         offset[4]= splitx;
499         offset[5]= splitz[2];
500         offset[6]= splity[1];
501         offset[7]= splitz[3];
502
503         for(b=0; b<7; b++)
504                 count[b]= offset[b+1] - offset[b];
505         count[7]= end - offset[7];
506 }
507
508 static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth);
509
510 static void *exec_occ_build(void *data)
511 {
512         OcclusionBuildThread *othread= (OcclusionBuildThread*)data;
513
514         occ_build_recursive(othread->tree, othread->node, othread->begin, othread->end, othread->depth);
515
516         return 0;
517 }
518
519 static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth)
520 {
521         ListBase threads;
522         OcclusionBuildThread othreads[BLENDER_MAX_THREADS];
523         OccNode *child, tmpnode;
524         OccFace *face;
525         int a, b, totthread=0, offset[TOTCHILD], count[TOTCHILD];
526
527         /* add a new node */
528         node->occlusion= 1.0f;
529
530         /* leaf node with only children */
531         if(end - begin <= TOTCHILD) {
532                 for(a=begin, b=0; a<end; a++, b++) {
533                         face= &tree->face[a];
534                         node->child[b].face= a;
535                         node->childflag |= (1<<b);
536                 }
537         }
538         else {
539                 /* order faces */
540                 occ_build_8_split(tree, begin, end, offset, count);
541
542                 if(depth == 1 && tree->dothreadedbuild)
543                         BLI_init_threads(&threads, exec_occ_build, tree->totbuildthread);
544
545                 for(b=0; b<TOTCHILD; b++) {
546                         if(count[b] == 0) {
547                                 node->child[b].node= NULL;
548                         }
549                         else if(count[b] == 1) {
550                                 face= &tree->face[offset[b]];
551                                 node->child[b].face= offset[b];
552                                 node->childflag |= (1<<b);
553                         }
554                         else {
555                                 if(tree->dothreadedbuild)
556                                         BLI_lock_thread(LOCK_CUSTOM1);
557
558                                 child= BLI_memarena_alloc(tree->arena, sizeof(OccNode));
559                                 node->child[b].node= child;
560
561                                 /* keep track of maximum depth for stack */
562                                 if(depth+1 > tree->maxdepth)
563                                         tree->maxdepth= depth+1;
564
565                                 if(tree->dothreadedbuild)
566                                         BLI_unlock_thread(LOCK_CUSTOM1);
567
568                                 if(depth == 1 && tree->dothreadedbuild) {
569                                         othreads[totthread].tree= tree;
570                                         othreads[totthread].node= child;
571                                         othreads[totthread].begin= offset[b];
572                                         othreads[totthread].end= offset[b]+count[b];
573                                         othreads[totthread].depth= depth+1;
574                                         BLI_insert_thread(&threads, &othreads[totthread]);
575                                         totthread++;
576                                 }
577                                 else
578                                         occ_build_recursive(tree, child, offset[b], offset[b]+count[b], depth+1);
579                         }
580                 }
581
582                 if(depth == 1 && tree->dothreadedbuild)
583                         BLI_end_threads(&threads);
584         }
585
586         /* combine area, position and sh */
587         for(b=0; b<TOTCHILD; b++) {
588                 if(node->childflag & (1<<b)) {
589                         child= &tmpnode;
590                         occ_node_from_face(tree->face+node->child[b].face, &tmpnode);
591                 }
592                 else {
593                         child= node->child[b].node;
594                 }
595
596                 if(child) {
597                         node->area += child->area;
598                         sh_add(node->sh, node->sh, child->sh);
599                         VECADDFAC(node->co, node->co, child->co, child->area);
600                 }
601         }
602
603         if(node->area != 0.0f)
604                 mul_v3_fl(node->co, 1.0f/node->area);
605
606         /* compute maximum distance from center */
607         node->dco= 0.0f;
608         occ_build_dco(tree, node, node->co, &node->dco);
609 }
610
611 static void occ_build_sh_normalize(OccNode *node)
612 {
613         /* normalize spherical harmonics to not include area, so
614          * we can clamp the dot product and then mutliply by area */
615         int b;
616
617         if(node->area != 0.0f)
618                 sh_mul(node->sh, 1.0f/node->area);
619
620         for(b=0; b<TOTCHILD; b++) {
621                 if(node->childflag & (1<<b));
622                 else if(node->child[b].node)
623                         occ_build_sh_normalize(node->child[b].node);
624         }
625 }
626
627 static OcclusionTree *occ_tree_build(Render *re)
628 {
629         OcclusionTree *tree;
630         ObjectInstanceRen *obi;
631         ObjectRen *obr;
632         Material *ma;
633         VlakRen *vlr= NULL;
634         int a, b, c, totface;
635
636         /* count */
637         totface= 0;
638         for(obi=re->instancetable.first; obi; obi=obi->next) {
639                 obr= obi->obr;
640                 for(a=0; a<obr->totvlak; a++) {
641                         if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
642                         else vlr++;
643
644                         ma= vlr->mat;
645
646                         if((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE))
647                                 totface++;
648                 }
649         }
650
651         if(totface == 0)
652                 return NULL;
653         
654         tree= MEM_callocN(sizeof(OcclusionTree), "OcclusionTree");
655         tree->totface= totface;
656
657         /* parameters */
658         tree->error= get_render_aosss_error(&re->r, re->wrld.ao_approx_error);
659         tree->distfac= (re->wrld.aomode & WO_AODIST)? re->wrld.aodistfac: 0.0f;
660         tree->doindirect= (re->wrld.ao_indirect_energy > 0.0f && re->wrld.ao_indirect_bounces > 0);
661
662         /* allocation */
663         tree->arena= BLI_memarena_new(0x8000 * sizeof(OccNode), "occ tree arena");
664         BLI_memarena_use_calloc(tree->arena);
665
666         if(re->wrld.aomode & WO_AOCACHE)
667                 tree->cache= MEM_callocN(sizeof(OcclusionCache)*BLENDER_MAX_THREADS, "OcclusionCache");
668
669         tree->face= MEM_callocN(sizeof(OccFace)*totface, "OcclusionFace");
670         tree->co= MEM_callocN(sizeof(float)*3*totface, "OcclusionCo");
671         tree->occlusion= MEM_callocN(sizeof(float)*totface, "OcclusionOcclusion");
672
673         if(tree->doindirect)
674                 tree->rad= MEM_callocN(sizeof(float)*3*totface, "OcclusionRad");
675
676         /* make array of face pointers */
677         for(b=0, c=0, obi=re->instancetable.first; obi; obi=obi->next, c++) {
678                 obr= obi->obr;
679                 for(a=0; a<obr->totvlak; a++) {
680                         if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
681                         else vlr++;
682
683                         ma= vlr->mat;
684
685                         if((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE)) {
686                                 tree->face[b].obi= c;
687                                 tree->face[b].facenr= a;
688                                 tree->occlusion[b]= 1.0f;
689                                 occ_face(&tree->face[b], tree->co[b], NULL, NULL); 
690                                 b++;
691                         }
692                 }
693         }
694
695         /* threads */
696         tree->totbuildthread= (re->r.threads > 1 && totface > 10000)? 8: 1;
697         tree->dothreadedbuild= (tree->totbuildthread > 1);
698
699         /* recurse */
700         tree->root= BLI_memarena_alloc(tree->arena, sizeof(OccNode));
701         tree->maxdepth= 1;
702         occ_build_recursive(tree, tree->root, 0, totface, 1);
703
704         if(tree->doindirect) {
705                 occ_build_shade(re, tree);
706                 occ_sum_occlusion(tree, tree->root);
707         }
708         
709         MEM_freeN(tree->co);
710         tree->co= NULL;
711
712         occ_build_sh_normalize(tree->root);
713
714         for(a=0; a<BLENDER_MAX_THREADS; a++)
715                 tree->stack[a]= MEM_callocN(sizeof(OccNode)*TOTCHILD*(tree->maxdepth+1), "OccStack");
716
717         return tree;
718 }
719
720 static void occ_free_tree(OcclusionTree *tree)
721 {
722         int a;
723
724         if(tree) {
725                 if(tree->arena) BLI_memarena_free(tree->arena);
726                 for(a=0; a<BLENDER_MAX_THREADS; a++)
727                         if(tree->stack[a])
728                                 MEM_freeN(tree->stack[a]);
729                 if(tree->occlusion) MEM_freeN(tree->occlusion);
730                 if(tree->cache) MEM_freeN(tree->cache);
731                 if(tree->face) MEM_freeN(tree->face);
732                 if(tree->rad) MEM_freeN(tree->rad);
733                 MEM_freeN(tree);
734         }
735 }
736
737 /* ------------------------- Traversal --------------------------- */
738
739 static float occ_solid_angle(OccNode *node, float *v, float d2, float invd2, float *receivenormal)
740 {
741         float dotreceive, dotemit;
742         float ev[3];
743
744         ev[0]= -v[0]*invd2;
745         ev[1]= -v[1]*invd2;
746         ev[2]= -v[2]*invd2;
747         dotemit= sh_eval(node->sh, ev);
748         dotreceive= INPR(receivenormal, v)*invd2;
749
750         CLAMP(dotemit, 0.0f, 1.0f);
751         CLAMP(dotreceive, 0.0f, 1.0f);
752         
753         return ((node->area*dotemit*dotreceive)/(d2 + node->area*INVPI))*INVPI;
754 }
755
756 static void VecAddDir(float *result, float *v1, float *v2, float fac)
757 {
758         result[0]= v1[0] + fac*(v2[0] - v1[0]);
759         result[1]= v1[1] + fac*(v2[1] - v1[1]);
760         result[2]= v1[2] + fac*(v2[2] - v1[2]);
761 }
762
763 static int occ_visible_quad(float *p, float *n, float *v0, float *v1, float *v2, float *q0, float *q1, float *q2, float *q3)
764 {
765         static const float epsilon = 1e-6f;
766         float c, sd[3];
767         
768         c= INPR(n, p);
769
770         /* signed distances from the vertices to the plane. */
771         sd[0]= INPR(n, v0) - c;
772         sd[1]= INPR(n, v1) - c;
773         sd[2]= INPR(n, v2) - c;
774
775         if(fabsf(sd[0]) < epsilon) sd[0] = 0.0f;
776         if(fabsf(sd[1]) < epsilon) sd[1] = 0.0f;
777         if(fabsf(sd[2]) < epsilon) sd[2] = 0.0f;
778
779         if(sd[0] > 0) {
780                 if(sd[1] > 0) {
781                         if(sd[2] > 0) {
782                                 // +++
783                                 VECCOPY(q0, v0);
784                                 VECCOPY(q1, v1);
785                                 VECCOPY(q2, v2);
786                                 VECCOPY(q3, q2);
787                         }
788                         else if(sd[2] < 0) {
789                                 // ++-
790                                 VECCOPY(q0, v0);
791                                 VECCOPY(q1, v1);
792                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
793                                 VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2])));
794                         }
795                         else {
796                                 // ++0
797                                 VECCOPY(q0, v0);
798                                 VECCOPY(q1, v1);
799                                 VECCOPY(q2, v2);
800                                 VECCOPY(q3, q2);
801                         }
802                 }
803                 else if(sd[1] < 0) {
804                         if(sd[2] > 0) {
805                                 // +-+
806                                 VECCOPY(q0, v0);
807                                 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
808                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
809                                 VECCOPY(q3, v2);
810                         }
811                         else if(sd[2] < 0) {
812                                 // +--
813                                 VECCOPY(q0, v0);
814                                 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
815                                 VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2])));
816                                 VECCOPY(q3, q2);
817                         }
818                         else {
819                                 // +-0
820                                 VECCOPY(q0, v0);
821                                 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
822                                 VECCOPY(q2, v2);
823                                 VECCOPY(q3, q2);
824                         }
825                 }
826                 else {
827                         if(sd[2] > 0) {
828                                 // +0+
829                                 VECCOPY(q0, v0);
830                                 VECCOPY(q1, v1);
831                                 VECCOPY(q2, v2);
832                                 VECCOPY(q3, q2);
833                         }
834                         else if(sd[2] < 0) {
835                                 // +0-
836                                 VECCOPY(q0, v0);
837                                 VECCOPY(q1, v1);
838                                 VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2])));
839                                 VECCOPY(q3, q2);
840                         }
841                         else {
842                                 // +00
843                                 VECCOPY(q0, v0);
844                                 VECCOPY(q1, v1);
845                                 VECCOPY(q2, v2);
846                                 VECCOPY(q3, q2);
847                         }
848                 }
849         }
850         else if(sd[0] < 0) {
851                 if(sd[1] > 0) {
852                         if(sd[2] > 0) {
853                                 // -++
854                                 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
855                                 VECCOPY(q1, v1);
856                                 VECCOPY(q2, v2);
857                                 VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2])));
858                         }
859                         else if(sd[2] < 0) {
860                                 // -+-
861                                 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
862                                 VECCOPY(q1, v1);
863                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
864                                 VECCOPY(q3, q2);
865                         }
866                         else {
867                                 // -+0
868                                 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
869                                 VECCOPY(q1, v1);
870                                 VECCOPY(q2, v2);
871                                 VECCOPY(q3, q2);
872                         }
873                 }
874                 else if(sd[1] < 0) {
875                         if(sd[2] > 0) {
876                                 // --+
877                                 VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2])));
878                                 VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2])));
879                                 VECCOPY(q2, v2);
880                                 VECCOPY(q3, q2);
881                         }
882                         else if(sd[2] < 0) {
883                                 // ---
884                                 return 0;
885                         }
886                         else {
887                                 // --0
888                                 return 0;
889                         }
890                 }
891                 else {
892                         if(sd[2] > 0) {
893                                 // -0+
894                                 VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2])));
895                                 VECCOPY(q1, v1);
896                                 VECCOPY(q2, v2);
897                                 VECCOPY(q3, q2);
898                         }
899                         else if(sd[2] < 0) {
900                                 // -0-
901                                 return 0;
902                         }
903                         else {
904                                 // -00
905                                 return 0;
906                         }
907                 }
908         }
909         else {
910                 if(sd[1] > 0) {
911                         if(sd[2] > 0) {
912                                 // 0++
913                                 VECCOPY(q0, v0);
914                                 VECCOPY(q1, v1);
915                                 VECCOPY(q2, v2);
916                                 VECCOPY(q3, q2);
917                         }
918                         else if(sd[2] < 0) {
919                                 // 0+-
920                                 VECCOPY(q0, v0);
921                                 VECCOPY(q1, v1);
922                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
923                                 VECCOPY(q3, q2);
924                         }
925                         else {
926                                 // 0+0
927                                 VECCOPY(q0, v0);
928                                 VECCOPY(q1, v1);
929                                 VECCOPY(q2, v2);
930                                 VECCOPY(q3, q2);
931                         }
932                 }
933                 else if(sd[1] < 0) {
934                         if(sd[2] > 0) {
935                                 // 0-+
936                                 VECCOPY(q0, v0);
937                                 VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2])));
938                                 VECCOPY(q2, v2);
939                                 VECCOPY(q3, q2);
940                         }
941                         else if(sd[2] < 0) {
942                                 // 0--
943                                 return 0;
944                         }
945                         else {
946                                 // 0-0
947                                 return 0;
948                         }
949                 }
950                 else {
951                         if(sd[2] > 0) {
952                                 // 00+
953                                 VECCOPY(q0, v0);
954                                 VECCOPY(q1, v1);
955                                 VECCOPY(q2, v2);
956                                 VECCOPY(q3, q2);
957                         }
958                         else if(sd[2] < 0) {
959                                 // 00-
960                                 return 0;
961                         }
962                         else {
963                                 // 000
964                                 return 0;
965                         }
966                 }
967         }
968
969         return 1;
970 }
971
972 /* altivec optimization, this works, but is unused */
973
974 #if 0
975 #include <Accelerate/Accelerate.h>
976
977 typedef union {
978         vFloat v;
979         float f[4];
980 } vFloatResult;
981
982 static vFloat vec_splat_float(float val)
983 {
984         return (vFloat){val, val, val, val};
985 }
986
987 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
988 {
989         vFloat vcos, rlen, vrx, vry, vrz, vsrx, vsry, vsrz, gx, gy, gz, vangle;
990         vUInt8 rotate = (vUInt8){4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3};
991         vFloatResult vresult;
992         float result;
993
994         /* compute r* */
995         vrx = (vFloat){q0[0], q1[0], q2[0], q3[0]} - vec_splat_float(p[0]);
996         vry = (vFloat){q0[1], q1[1], q2[1], q3[1]} - vec_splat_float(p[1]);
997         vrz = (vFloat){q0[2], q1[2], q2[2], q3[2]} - vec_splat_float(p[2]);
998
999         /* normalize r* */
1000         rlen = vec_rsqrte(vrx*vrx + vry*vry + vrz*vrz + vec_splat_float(1e-16f));
1001         vrx = vrx*rlen;
1002         vry = vry*rlen;
1003         vrz = vrz*rlen;
1004
1005         /* rotate r* for cross and dot */
1006         vsrx= vec_perm(vrx, vrx, rotate);
1007         vsry= vec_perm(vry, vry, rotate);
1008         vsrz= vec_perm(vrz, vrz, rotate);
1009
1010         /* cross product */
1011         gx = vsry*vrz - vsrz*vry;
1012         gy = vsrz*vrx - vsrx*vrz;
1013         gz = vsrx*vry - vsry*vrx;
1014
1015         /* normalize */
1016         rlen = vec_rsqrte(gx*gx + gy*gy + gz*gz + vec_splat_float(1e-16f));
1017         gx = gx*rlen;
1018         gy = gy*rlen;
1019         gz = gz*rlen;
1020
1021         /* angle */
1022         vcos = vrx*vsrx + vry*vsry + vrz*vsrz;
1023         vcos= vec_max(vec_min(vcos, vec_splat_float(1.0f)), vec_splat_float(-1.0f));
1024         vangle= vacosf(vcos);
1025
1026         /* dot */
1027         vresult.v = (vec_splat_float(n[0])*gx +
1028                                  vec_splat_float(n[1])*gy +
1029                                  vec_splat_float(n[2])*gz)*vangle;
1030
1031         result= (vresult.f[0] + vresult.f[1] + vresult.f[2] + vresult.f[3])*(0.5f/(float)M_PI);
1032         result= MAX2(result, 0.0f);
1033
1034         return result;
1035 }
1036
1037 #endif
1038
1039 /* SSE optimization, acos code doesn't work */
1040
1041 #if 0
1042
1043 #include <xmmintrin.h>
1044
1045 static __m128 sse_approx_acos(__m128 x)
1046 {
1047         /* needs a better approximation than taylor expansion of acos, since that
1048          * gives big erros for near 1.0 values, sqrt(2*x)*acos(1-x) should work
1049          * better, see http://www.tom.womack.net/projects/sse-fast-arctrig.html */
1050
1051         return _mm_set_ps1(1.0f);
1052 }
1053
1054 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
1055 {
1056         float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3];
1057         float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result;
1058         float fresult[4] __attribute__((aligned(16)));
1059         __m128 qx, qy, qz, rx, ry, rz, rlen, srx, sry, srz, gx, gy, gz, glen, rcos, angle, aresult;
1060
1061         /* compute r */
1062         qx = _mm_set_ps(q3[0], q2[0], q1[0], q0[0]);
1063         qy = _mm_set_ps(q3[1], q2[1], q1[1], q0[1]);
1064         qz = _mm_set_ps(q3[2], q2[2], q1[2], q0[2]);
1065
1066         rx = qx - _mm_set_ps1(p[0]);
1067         ry = qy - _mm_set_ps1(p[1]);
1068         rz = qz - _mm_set_ps1(p[2]);
1069
1070         /* normalize r */
1071         rlen = _mm_rsqrt_ps(rx*rx + ry*ry + rz*rz + _mm_set_ps1(1e-16f));
1072         rx = rx*rlen;
1073         ry = ry*rlen;
1074         rz = rz*rlen;
1075
1076         /* cross product */
1077         srx = _mm_shuffle_ps(rx, rx, _MM_SHUFFLE(0,3,2,1));
1078         sry = _mm_shuffle_ps(ry, ry, _MM_SHUFFLE(0,3,2,1));
1079         srz = _mm_shuffle_ps(rz, rz, _MM_SHUFFLE(0,3,2,1));
1080
1081         gx = sry*rz - srz*ry;
1082         gy = srz*rx - srx*rz;
1083         gz = srx*ry - sry*rx;
1084
1085         /* normalize g */
1086         glen = _mm_rsqrt_ps(gx*gx + gy*gy + gz*gz + _mm_set_ps1(1e-16f));
1087         gx = gx*glen;
1088         gy = gy*glen;
1089         gz = gz*glen;
1090
1091         /* compute angle */
1092         rcos = rx*srx + ry*sry + rz*srz;
1093         rcos= _mm_max_ps(_mm_min_ps(rcos, _mm_set_ps1(1.0f)), _mm_set_ps1(-1.0f));
1094
1095         angle = sse_approx_cos(rcos);
1096         aresult = (_mm_set_ps1(n[0])*gx + _mm_set_ps1(n[1])*gy + _mm_set_ps1(n[2])*gz)*angle;
1097
1098         /* sum together */
1099         result= (fresult[0] + fresult[1] + fresult[2] + fresult[3])*(0.5f/(float)M_PI);
1100         result= MAX2(result, 0.0f);
1101
1102         return result;
1103 }
1104
1105 #endif
1106
1107 static void normalizef(float *n)
1108 {
1109         float d;
1110         
1111         d= INPR(n, n);
1112
1113         if(d > 1.0e-35F) {
1114                 d= 1.0f/sqrtf(d);
1115
1116                 n[0] *= d; 
1117                 n[1] *= d; 
1118                 n[2] *= d;
1119         } 
1120 }
1121
1122 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
1123 {
1124         float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3];
1125         float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result;
1126
1127         VECSUB(r0, q0, p);
1128         VECSUB(r1, q1, p);
1129         VECSUB(r2, q2, p);
1130         VECSUB(r3, q3, p);
1131
1132         normalizef(r0);
1133         normalizef(r1);
1134         normalizef(r2);
1135         normalizef(r3);
1136
1137         cross_v3_v3v3(g0, r1, r0); normalizef(g0);
1138         cross_v3_v3v3(g1, r2, r1); normalizef(g1);
1139         cross_v3_v3v3(g2, r3, r2); normalizef(g2);
1140         cross_v3_v3v3(g3, r0, r3); normalizef(g3);
1141
1142         a1= saacosf(INPR(r0, r1));
1143         a2= saacosf(INPR(r1, r2));
1144         a3= saacosf(INPR(r2, r3));
1145         a4= saacosf(INPR(r3, r0));
1146
1147         dot1= INPR(n, g0);
1148         dot2= INPR(n, g1);
1149         dot3= INPR(n, g2);
1150         dot4= INPR(n, g3);
1151
1152         result= (a1*dot1 + a2*dot2 + a3*dot3 + a4*dot4)*0.5f/(float)M_PI;
1153         result= MAX2(result, 0.0f);
1154
1155         return result;
1156 }
1157
1158 static float occ_form_factor(OccFace *face, float *p, float *n)
1159 {
1160         ObjectInstanceRen *obi;
1161         VlakRen *vlr;
1162         float v1[3], v2[3], v3[3], v4[3], q0[3], q1[3], q2[3], q3[3], contrib= 0.0f;
1163
1164         obi= &R.objectinstance[face->obi];
1165         vlr= RE_findOrAddVlak(obi->obr, face->facenr);
1166
1167         VECCOPY(v1, vlr->v1->co);
1168         VECCOPY(v2, vlr->v2->co);
1169         VECCOPY(v3, vlr->v3->co);
1170
1171         if(obi->flag & R_TRANSFORMED) {
1172                 mul_m4_v3(obi->mat, v1);
1173                 mul_m4_v3(obi->mat, v2);
1174                 mul_m4_v3(obi->mat, v3);
1175         }
1176
1177         if(occ_visible_quad(p, n, v1, v2, v3, q0, q1, q2, q3))
1178                 contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3);
1179
1180         if(vlr->v4) {
1181                 VECCOPY(v4, vlr->v4->co);
1182                 if(obi->flag & R_TRANSFORMED)
1183                         mul_m4_v3(obi->mat, v4);
1184
1185                 if(occ_visible_quad(p, n, v1, v3, v4, q0, q1, q2, q3))
1186                         contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3);
1187         }
1188
1189         return contrib;
1190 }
1191
1192 static void occ_lookup(OcclusionTree *tree, int thread, OccFace *exclude, float *pp, float *pn, float *occ, float rad[3], float bentn[3])
1193 {
1194         OccNode *node, **stack;
1195         OccFace *face;
1196         float resultocc, resultrad[3], v[3], p[3], n[3], co[3], invd2;
1197         float distfac, fac, error, d2, weight, emitarea;
1198         int b, f, totstack;
1199
1200         /* init variables */
1201         VECCOPY(p, pp);
1202         VECCOPY(n, pn);
1203         VECADDFAC(p, p, n, 1e-4f);
1204
1205         if(bentn)
1206                 copy_v3_v3(bentn, n);
1207         
1208         error= tree->error;
1209         distfac= tree->distfac;
1210
1211         resultocc= 0.0f;
1212         zero_v3(resultrad);
1213
1214         /* init stack */
1215         stack= tree->stack[thread];
1216         stack[0]= tree->root;
1217         totstack= 1;
1218
1219         while(totstack) {
1220                 /* pop point off the stack */
1221                 node= stack[--totstack];
1222
1223                 VECSUB(v, node->co, p);
1224                 d2= INPR(v, v) + 1e-16f;
1225                 emitarea= MAX2(node->area, node->dco);
1226
1227                 if(d2*error > emitarea) {
1228                         if(distfac != 0.0f) {
1229                                 fac= 1.0f/(1.0f + distfac*d2);
1230                                 if(fac < 0.01f)
1231                                         continue;
1232                         }
1233                         else
1234                                 fac= 1.0f;
1235
1236                         /* accumulate occlusion from spherical harmonics */
1237                         invd2 = 1.0f/sqrtf(d2);
1238                         weight= occ_solid_angle(node, v, d2, invd2, n);
1239
1240                         if(rad)
1241                                 madd_v3_v3fl(resultrad, node->rad, weight*fac);
1242
1243                         weight *= node->occlusion;
1244
1245                         if(bentn) {
1246                                 bentn[0] -= weight*invd2*v[0];
1247                                 bentn[1] -= weight*invd2*v[1];
1248                                 bentn[2] -= weight*invd2*v[2];
1249                         }
1250
1251                         resultocc += weight*fac;
1252                 }
1253                 else {
1254                         /* traverse into children */
1255                         for(b=0; b<TOTCHILD; b++) {
1256                                 if(node->childflag & (1<<b)) {
1257                                         f= node->child[b].face;
1258                                         face= &tree->face[f];
1259
1260                                         /* accumulate occlusion with face form factor */
1261                                         if(!exclude || !(face->obi == exclude->obi && face->facenr == exclude->facenr)) {
1262                                                 if(bentn || distfac != 0.0f) {
1263                                                         occ_face(face, co, NULL, NULL); 
1264                                                         VECSUB(v, co, p);
1265                                                         d2= INPR(v, v) + 1e-16f;
1266
1267                                                         fac= (distfac == 0.0f)? 1.0f: 1.0f/(1.0f + distfac*d2);
1268                                                         if(fac < 0.01f)
1269                                                                 continue;
1270                                                 }
1271                                                 else
1272                                                         fac= 1.0f;
1273
1274                                                 weight= occ_form_factor(face, p, n);
1275
1276                                                 if(rad)
1277                                                         madd_v3_v3fl(resultrad, tree->rad[f], weight*fac);
1278
1279                                                 weight *= tree->occlusion[f];
1280
1281                                                 if(bentn) {
1282                                                         invd2= 1.0f/sqrtf(d2);
1283                                                         bentn[0] -= weight*invd2*v[0];
1284                                                         bentn[1] -= weight*invd2*v[1];
1285                                                         bentn[2] -= weight*invd2*v[2];
1286                                                 }
1287
1288                                                 resultocc += weight*fac;
1289                                         }
1290                                 }
1291                                 else if(node->child[b].node) {
1292                                         /* push child on the stack */
1293                                         stack[totstack++]= node->child[b].node;
1294                                 }
1295                         }
1296                 }
1297         }
1298
1299         if(occ) *occ= resultocc;
1300         if(rad) copy_v3_v3(rad, resultrad);
1301         /*if(rad && exclude) {
1302                 int a;
1303                 for(a=0; a<tree->totface; a++)
1304                         if((tree->face[a].obi == exclude->obi && tree->face[a].facenr == exclude->facenr))
1305                                 copy_v3_v3(rad, tree->rad[a]);
1306         }*/
1307         if(bentn) normalize_v3(bentn);
1308 }
1309
1310 static void occ_compute_bounces(Render *re, OcclusionTree *tree, int totbounce)
1311 {
1312         float (*rad)[3], (*sum)[3], (*tmp)[3], co[3], n[3], occ;
1313         int bounce, i;
1314
1315         rad= MEM_callocN(sizeof(float)*3*tree->totface, "OcclusionBounceRad");
1316         sum= MEM_dupallocN(tree->rad);
1317
1318         for(bounce=1; bounce<totbounce; bounce++) {
1319                 for(i=0; i<tree->totface; i++) {
1320                         occ_face(&tree->face[i], co, n, NULL);
1321                         madd_v3_v3fl(co, n, 1e-8f);
1322
1323                         occ_lookup(tree, 0, &tree->face[i], co, n, &occ, rad[i], NULL);
1324                         rad[i][0]= MAX2(rad[i][0], 0.0f);
1325                         rad[i][1]= MAX2(rad[i][1], 0.0f);
1326                         rad[i][2]= MAX2(rad[i][2], 0.0f);
1327                         add_v3_v3(sum[i], rad[i]);
1328
1329                         if(re->test_break(re->tbh))
1330                                 break;
1331                 }
1332
1333                 if(re->test_break(re->tbh))
1334                         break;
1335
1336                 tmp= tree->rad;
1337                 tree->rad= rad;
1338                 rad= tmp;
1339
1340                 occ_sum_occlusion(tree, tree->root);
1341         }
1342
1343         MEM_freeN(rad);
1344         MEM_freeN(tree->rad);
1345         tree->rad= sum;
1346
1347         if(!re->test_break(re->tbh))
1348                 occ_sum_occlusion(tree, tree->root);
1349 }
1350
1351 static void occ_compute_passes(Render *re, OcclusionTree *tree, int totpass)
1352 {
1353         float *occ, co[3], n[3];
1354         int pass, i;
1355         
1356         occ= MEM_callocN(sizeof(float)*tree->totface, "OcclusionPassOcc");
1357
1358         for(pass=0; pass<totpass; pass++) {
1359                 for(i=0; i<tree->totface; i++) {
1360                         occ_face(&tree->face[i], co, n, NULL);
1361                         negate_v3(n);
1362                         VECADDFAC(co, co, n, 1e-8f);
1363
1364                         occ_lookup(tree, 0, &tree->face[i], co, n, &occ[i], NULL, NULL);
1365                         if(re->test_break(re->tbh))
1366                                 break;
1367                 }
1368
1369                 if(re->test_break(re->tbh))
1370                         break;
1371
1372                 for(i=0; i<tree->totface; i++) {
1373                         tree->occlusion[i] -= occ[i]; //MAX2(1.0f-occ[i], 0.0f);
1374                         if(tree->occlusion[i] < 0.0f)
1375                                 tree->occlusion[i]= 0.0f;
1376                 }
1377
1378                 occ_sum_occlusion(tree, tree->root);
1379         }
1380
1381         MEM_freeN(occ);
1382 }
1383
1384 static void sample_occ_tree(Render *re, OcclusionTree *tree, OccFace *exclude, float *co, float *n, int thread, int onlyshadow, float *ao, float *env, float *indirect)
1385 {
1386         float nn[3], bn[3], fac, occ, occlusion, correction, rad[3];
1387         int envcolor;
1388
1389         envcolor= re->wrld.aocolor;
1390         if(onlyshadow)
1391                 envcolor= WO_AOPLAIN;
1392
1393         negate_v3_v3(nn, n);
1394
1395         occ_lookup(tree, thread, exclude, co, nn, &occ, (tree->doindirect)? rad: NULL, (env && envcolor)? bn: NULL);
1396
1397         correction= re->wrld.ao_approx_correction;
1398
1399         occlusion= (1.0f-correction)*(1.0f-occ);
1400         CLAMP(occlusion, 0.0f, 1.0f);
1401         if(correction != 0.0f)
1402                 occlusion += correction*exp(-occ);
1403
1404         if(env) {
1405                 /* sky shading using bent normal */
1406                 if(ELEM(envcolor, WO_AOSKYCOL, WO_AOSKYTEX)) {
1407                         fac= 0.5*(1.0f+bn[0]*re->grvec[0]+ bn[1]*re->grvec[1]+ bn[2]*re->grvec[2]);
1408                         env[0]= (1.0f-fac)*re->wrld.horr + fac*re->wrld.zenr;
1409                         env[1]= (1.0f-fac)*re->wrld.horg + fac*re->wrld.zeng;
1410                         env[2]= (1.0f-fac)*re->wrld.horb + fac*re->wrld.zenb;
1411
1412                         mul_v3_fl(env, occlusion);
1413                 }
1414                 else {
1415                         env[0]= occlusion;
1416                         env[1]= occlusion;
1417                         env[2]= occlusion;
1418                 }
1419 #if 0
1420                 else {  /* WO_AOSKYTEX */
1421                         float dxyview[3];
1422                         bn[0]= -bn[0];
1423                         bn[1]= -bn[1];
1424                         bn[2]= -bn[2];
1425                         dxyview[0]= 1.0f;
1426                         dxyview[1]= 1.0f;
1427                         dxyview[2]= 0.0f;
1428                         shadeSkyView(ao, co, bn, dxyview);
1429                 }
1430 #endif
1431         }
1432
1433         if(ao) {
1434                 ao[0]= occlusion;
1435                 ao[1]= occlusion;
1436                 ao[2]= occlusion;
1437         }
1438
1439         if(tree->doindirect) copy_v3_v3(indirect, rad);
1440         else zero_v3(indirect);
1441 }
1442
1443 /* ---------------------------- Caching ------------------------------- */
1444
1445 static OcclusionCacheSample *find_occ_sample(OcclusionCache *cache, int x, int y)
1446 {
1447         x -= cache->x;
1448         y -= cache->y;
1449
1450         x /= cache->step;
1451         y /= cache->step;
1452         x *= cache->step;
1453         y *= cache->step;
1454
1455         if(x < 0 || x >= cache->w || y < 0 || y >= cache->h)
1456                 return NULL;
1457         else
1458                 return &cache->sample[y*cache->w + x];
1459 }
1460
1461 static int sample_occ_cache(OcclusionTree *tree, float *co, float *n, int x, int y, int thread, float *ao, float *env, float *indirect)
1462 {
1463         OcclusionCache *cache;
1464         OcclusionCacheSample *samples[4], *sample;
1465         float wn[4], wz[4], wb[4], tx, ty, w, totw, mino, maxo;
1466         float d[3], dist2;
1467         int i, x1, y1, x2, y2;
1468
1469         if(!tree->cache)
1470                 return 0;
1471         
1472         /* first try to find a sample in the same pixel */
1473         cache= &tree->cache[thread];
1474
1475         if(cache->sample && cache->step) {
1476                 sample= &cache->sample[(y-cache->y)*cache->w + (x-cache->x)];
1477                 if(sample->filled) {
1478                         VECSUB(d, sample->co, co);
1479                         dist2= INPR(d, d);
1480                         if(dist2 < 0.5f*sample->dist2 && INPR(sample->n, n) > 0.98f) {
1481                                 VECCOPY(ao, sample->ao);
1482                                 VECCOPY(env, sample->env);
1483                                 VECCOPY(indirect, sample->indirect);
1484                                 return 1;
1485                         }
1486                 }
1487         }
1488         else
1489                 return 0;
1490
1491         /* try to interpolate between 4 neighbouring pixels */
1492         samples[0]= find_occ_sample(cache, x, y);
1493         samples[1]= find_occ_sample(cache, x+cache->step, y);
1494         samples[2]= find_occ_sample(cache, x, y+cache->step);
1495         samples[3]= find_occ_sample(cache, x+cache->step, y+cache->step);
1496
1497         for(i=0; i<4; i++)
1498                 if(!samples[i] || !samples[i]->filled)
1499                         return 0;
1500
1501         /* require intensities not being too different */
1502         mino= MIN4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
1503         maxo= MAX4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
1504
1505         if(maxo - mino > 0.05f)
1506                 return 0;
1507
1508         /* compute weighted interpolation between samples */
1509         zero_v3(ao);
1510         zero_v3(env);
1511         zero_v3(indirect);
1512         totw= 0.0f;
1513
1514         x1= samples[0]->x;
1515         y1= samples[0]->y;
1516         x2= samples[3]->x;
1517         y2= samples[3]->y;
1518
1519         tx= (float)(x2 - x)/(float)(x2 - x1);
1520         ty= (float)(y2 - y)/(float)(y2 - y1);
1521
1522         wb[3]= (1.0f-tx)*(1.0f-ty);
1523         wb[2]= (tx)*(1.0f-ty);
1524         wb[1]= (1.0f-tx)*(ty);
1525         wb[0]= tx*ty;
1526
1527         for(i=0; i<4; i++) {
1528                 VECSUB(d, samples[i]->co, co);
1529                 dist2= INPR(d, d);
1530
1531                 wz[i]= 1.0f; //(samples[i]->dist2/(1e-4f + dist2));
1532                 wn[i]= pow(INPR(samples[i]->n, n), 32.0f);
1533
1534                 w= wb[i]*wn[i]*wz[i];
1535
1536                 totw += w;
1537                 madd_v3_v3fl(ao, samples[i]->ao, w);
1538                 madd_v3_v3fl(env, samples[i]->env, w);
1539                 madd_v3_v3fl(indirect, samples[i]->indirect, w);
1540         }
1541
1542         if(totw >= 0.9f) {
1543                 totw= 1.0f/totw;
1544                 mul_v3_fl(ao, totw);
1545                 mul_v3_fl(env, totw);
1546                 mul_v3_fl(indirect, totw);
1547                 return 1;
1548         }
1549
1550         return 0;
1551 }
1552
1553 static void sample_occ_surface(ShadeInput *shi)
1554 {
1555         StrandRen *strand= shi->strand;
1556         StrandSurface *mesh= strand->buffer->surface;
1557         int *face, *index = RE_strandren_get_face(shi->obr, strand, 0);
1558         float w[4], *co1, *co2, *co3, *co4;
1559
1560         if(mesh && mesh->face && mesh->co && mesh->ao && index) {
1561                 face= mesh->face[*index];
1562
1563                 co1= mesh->co[face[0]];
1564                 co2= mesh->co[face[1]];
1565                 co3= mesh->co[face[2]];
1566                 co4= (face[3])? mesh->co[face[3]]: NULL;
1567
1568                 interp_weights_face_v3(w, co1, co2, co3, co4, strand->vert->co);
1569
1570                 zero_v3(shi->ao);
1571                 zero_v3(shi->env);
1572                 zero_v3(shi->indirect);
1573
1574                 madd_v3_v3fl(shi->ao, mesh->ao[face[0]], w[0]);
1575                 madd_v3_v3fl(shi->env, mesh->env[face[0]], w[0]);
1576                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[0]], w[0]);
1577                 madd_v3_v3fl(shi->ao, mesh->ao[face[1]], w[1]);
1578                 madd_v3_v3fl(shi->env, mesh->env[face[1]], w[1]);
1579                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[1]], w[1]);
1580                 madd_v3_v3fl(shi->ao, mesh->ao[face[2]], w[2]);
1581                 madd_v3_v3fl(shi->env, mesh->env[face[2]], w[2]);
1582                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[2]], w[2]);
1583                 if(face[3]) {
1584                         madd_v3_v3fl(shi->ao, mesh->ao[face[3]], w[3]);
1585                         madd_v3_v3fl(shi->env, mesh->env[face[3]], w[3]);
1586                         madd_v3_v3fl(shi->indirect, mesh->indirect[face[3]], w[3]);
1587                 }
1588         }
1589         else {
1590                 shi->ao[0]= 1.0f;
1591                 shi->ao[1]= 1.0f;
1592                 shi->ao[2]= 1.0f;
1593                 zero_v3(shi->env);
1594                 zero_v3(shi->indirect);
1595         }
1596 }
1597
1598 /* ------------------------- External Functions --------------------------- */
1599
1600 static void *exec_strandsurface_sample(void *data)
1601 {
1602         OcclusionThread *othread= (OcclusionThread*)data;
1603         Render *re= othread->re;
1604         StrandSurface *mesh= othread->mesh;
1605         float ao[3], env[3], indirect[3], co[3], n[3], *co1, *co2, *co3, *co4;
1606         int a, *face;
1607
1608         for(a=othread->begin; a<othread->end; a++) {
1609                 face= mesh->face[a];
1610                 co1= mesh->co[face[0]];
1611                 co2= mesh->co[face[1]];
1612                 co3= mesh->co[face[2]];
1613
1614                 if(face[3]) {
1615                         co4= mesh->co[face[3]];
1616
1617                         interp_v3_v3v3(co, co1, co3, 0.5f);
1618                         normal_quad_v3( n,co1, co2, co3, co4);
1619                 }
1620                 else {
1621                         cent_tri_v3(co, co1, co2, co3);
1622                         normal_tri_v3( n,co1, co2, co3);
1623                 }
1624                 negate_v3(n);
1625
1626                 sample_occ_tree(re, re->occlusiontree, NULL, co, n, othread->thread, 0, ao, env, indirect);
1627                 VECCOPY(othread->faceao[a], ao);
1628                 VECCOPY(othread->faceenv[a], env);
1629                 VECCOPY(othread->faceindirect[a], indirect);
1630         }
1631
1632         return 0;
1633 }
1634
1635 void make_occ_tree(Render *re)
1636 {
1637         OcclusionThread othreads[BLENDER_MAX_THREADS];
1638         OcclusionTree *tree;
1639         StrandSurface *mesh;
1640         ListBase threads;
1641         float ao[3], env[3], indirect[3], (*faceao)[3], (*faceenv)[3], (*faceindirect)[3];
1642         int a, totface, totthread, *face, *count;
1643
1644         /* ugly, needed for occ_face */
1645         R= *re;
1646
1647         re->i.infostr= "Occlusion preprocessing";
1648         re->stats_draw(re->sdh, &re->i);
1649         
1650         re->occlusiontree= tree= occ_tree_build(re);
1651         
1652         if(tree) {
1653                 if(re->wrld.ao_approx_passes > 0)
1654                         occ_compute_passes(re, tree, re->wrld.ao_approx_passes);
1655                 if(tree->doindirect && (re->wrld.mode & WO_INDIRECT_LIGHT))
1656                         occ_compute_bounces(re, tree, re->wrld.ao_indirect_bounces);
1657
1658                 for(mesh=re->strandsurface.first; mesh; mesh=mesh->next) {
1659                         if(!mesh->face || !mesh->co || !mesh->ao)
1660                                 continue;
1661
1662                         count= MEM_callocN(sizeof(int)*mesh->totvert, "OcclusionCount");
1663                         faceao= MEM_callocN(sizeof(float)*3*mesh->totface, "StrandSurfFaceAO");
1664                         faceenv= MEM_callocN(sizeof(float)*3*mesh->totface, "StrandSurfFaceEnv");
1665                         faceindirect= MEM_callocN(sizeof(float)*3*mesh->totface, "StrandSurfFaceIndirect");
1666
1667                         totthread= (mesh->totface > 10000)? re->r.threads: 1;
1668                         totface= mesh->totface/totthread;
1669                         for(a=0; a<totthread; a++) {
1670                                 othreads[a].re= re;
1671                                 othreads[a].faceao= faceao;
1672                                 othreads[a].faceenv= faceenv;
1673                                 othreads[a].faceindirect= faceindirect;
1674                                 othreads[a].thread= a;
1675                                 othreads[a].mesh= mesh;
1676                                 othreads[a].begin= a*totface;
1677                                 othreads[a].end= (a == totthread-1)? mesh->totface: (a+1)*totface;
1678                         }
1679
1680                         if(totthread == 1) {
1681                                 exec_strandsurface_sample(&othreads[0]);
1682                         }
1683                         else {
1684                                 BLI_init_threads(&threads, exec_strandsurface_sample, totthread);
1685
1686                                 for(a=0; a<totthread; a++)
1687                                         BLI_insert_thread(&threads, &othreads[a]);
1688
1689                                 BLI_end_threads(&threads);
1690                         }
1691
1692                         for(a=0; a<mesh->totface; a++) {
1693                                 face= mesh->face[a];
1694
1695                                 VECCOPY(ao, faceao[a]);
1696                                 VECCOPY(env, faceenv[a]);
1697                                 VECCOPY(indirect, faceindirect[a]);
1698
1699                                 VECADD(mesh->ao[face[0]], mesh->ao[face[0]], ao);
1700                                 VECADD(mesh->env[face[0]], mesh->env[face[0]], env);
1701                                 VECADD(mesh->indirect[face[0]], mesh->indirect[face[0]], indirect);
1702                                 count[face[0]]++;
1703                                 VECADD(mesh->ao[face[1]], mesh->ao[face[1]], ao);
1704                                 VECADD(mesh->env[face[1]], mesh->env[face[1]], env);
1705                                 VECADD(mesh->indirect[face[1]], mesh->indirect[face[1]], indirect);
1706                                 count[face[1]]++;
1707                                 VECADD(mesh->ao[face[2]], mesh->ao[face[2]], ao);
1708                                 VECADD(mesh->env[face[2]], mesh->env[face[2]], env);
1709                                 VECADD(mesh->indirect[face[2]], mesh->indirect[face[2]], indirect);
1710                                 count[face[2]]++;
1711
1712                                 if(face[3]) {
1713                                         VECADD(mesh->ao[face[3]], mesh->ao[face[3]], ao);
1714                                         VECADD(mesh->env[face[3]], mesh->env[face[3]], env);
1715                                         VECADD(mesh->indirect[face[3]], mesh->indirect[face[3]], indirect);
1716                                         count[face[3]]++;
1717                                 }
1718                         }
1719
1720                         for(a=0; a<mesh->totvert; a++) {
1721                                 if(count[a]) {
1722                                         mul_v3_fl(mesh->ao[a], 1.0f/count[a]);
1723                                         mul_v3_fl(mesh->env[a], 1.0f/count[a]);
1724                                         mul_v3_fl(mesh->indirect[a], 1.0f/count[a]);
1725                                 }
1726                         }
1727
1728                         MEM_freeN(count);
1729                         MEM_freeN(faceao);
1730                         MEM_freeN(faceenv);
1731                         MEM_freeN(faceindirect);
1732                 }
1733         }
1734 }
1735
1736 void free_occ(Render *re)
1737 {
1738         if(re->occlusiontree) {
1739                 occ_free_tree(re->occlusiontree);
1740                 re->occlusiontree = NULL;
1741         }
1742 }
1743
1744 void sample_occ(Render *re, ShadeInput *shi)
1745 {
1746         OcclusionTree *tree= re->occlusiontree;
1747         OcclusionCache *cache;
1748         OcclusionCacheSample *sample;
1749         OccFace exclude;
1750         int onlyshadow;
1751
1752         if(tree) {
1753                 if(shi->strand) {
1754                         sample_occ_surface(shi);
1755                 }
1756                 /* try to get result from the cache if possible */
1757                 else if(shi->depth!=0 || !sample_occ_cache(tree, shi->co, shi->vno, shi->xs, shi->ys, shi->thread, shi->ao, shi->env, shi->indirect)) {
1758                         /* no luck, let's sample the occlusion */
1759                         exclude.obi= shi->obi - re->objectinstance;
1760                         exclude.facenr= shi->vlr->index;
1761                         onlyshadow= (shi->mat->mode & MA_ONLYSHADOW);
1762                         sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect);
1763
1764                         /* fill result into sample, each time */
1765                         if(tree->cache) {
1766                                 cache= &tree->cache[shi->thread];
1767
1768                                 if(cache->sample && cache->step) {
1769                                         sample= &cache->sample[(shi->ys-cache->y)*cache->w + (shi->xs-cache->x)];
1770                                         VECCOPY(sample->co, shi->co);
1771                                         VECCOPY(sample->n, shi->vno);
1772                                         VECCOPY(sample->ao, shi->ao);
1773                                         VECCOPY(sample->env, shi->env);
1774                                         VECCOPY(sample->indirect, shi->indirect);
1775                                         sample->intensity= MAX3(sample->ao[0], sample->ao[1], sample->ao[2]);
1776                                         sample->intensity= MAX2(sample->intensity, MAX3(sample->env[0], sample->env[1], sample->env[2]));
1777                                         sample->intensity= MAX2(sample->intensity, MAX3(sample->indirect[0], sample->indirect[1], sample->indirect[2]));
1778                                         sample->dist2= INPR(shi->dxco, shi->dxco) + INPR(shi->dyco, shi->dyco);
1779                                         sample->filled= 1;
1780                                 }
1781                         }
1782                 }
1783         }
1784         else {
1785                 shi->ao[0]= 1.0f;
1786                 shi->ao[1]= 1.0f;
1787                 shi->ao[2]= 1.0f;
1788
1789                 shi->env[0]= 0.0f;
1790                 shi->env[1]= 0.0f;
1791                 shi->env[2]= 0.0f;
1792
1793                 shi->indirect[0]= 0.0f;
1794                 shi->indirect[1]= 0.0f;
1795                 shi->indirect[2]= 0.0f;
1796         }
1797 }
1798
1799 void cache_occ_samples(Render *re, RenderPart *pa, ShadeSample *ssamp)
1800 {
1801         OcclusionTree *tree= re->occlusiontree;
1802         PixStr ps;
1803         OcclusionCache *cache;
1804         OcclusionCacheSample *sample;
1805         OccFace exclude;
1806         ShadeInput *shi;
1807         intptr_t *rd=NULL;
1808         int *ro=NULL, *rp=NULL, *rz=NULL, onlyshadow;
1809         int x, y, step = CACHE_STEP;
1810
1811         if(!tree->cache)
1812                 return;
1813
1814         cache= &tree->cache[pa->thread];
1815         cache->w= pa->rectx;
1816         cache->h= pa->recty;
1817         cache->x= pa->disprect.xmin;
1818         cache->y= pa->disprect.ymin;
1819         cache->step= step;
1820         cache->sample= MEM_callocN(sizeof(OcclusionCacheSample)*cache->w*cache->h, "OcclusionCacheSample");
1821         sample= cache->sample;
1822
1823         if(re->osa) {
1824                 rd= pa->rectdaps;
1825         }
1826         else {
1827                 /* fake pixel struct for non-osa */
1828                 ps.next= NULL;
1829                 ps.mask= 0xFFFF;
1830
1831                 ro= pa->recto;
1832                 rp= pa->rectp;
1833                 rz= pa->rectz;
1834         }
1835
1836         /* compute a sample at every step pixels */
1837         for(y=pa->disprect.ymin; y<pa->disprect.ymax; y++) {
1838                 for(x=pa->disprect.xmin; x<pa->disprect.xmax; x++, sample++, rd++, ro++, rp++, rz++) {
1839                         if(!(((x - pa->disprect.xmin + step) % step) == 0 || x == pa->disprect.xmax-1))
1840                                 continue;
1841                         if(!(((y - pa->disprect.ymin + step) % step) == 0 || y == pa->disprect.ymax-1))
1842                                 continue;
1843
1844                         if(re->osa) {
1845                                 if(!*rd) continue;
1846
1847                                 shade_samples_fill_with_ps(ssamp, (PixStr *)(*rd), x, y);
1848                         }
1849                         else {
1850                                 if(!*rp) continue;
1851
1852                                 ps.obi= *ro;
1853                                 ps.facenr= *rp;
1854                                 ps.z= *rz;
1855                                 shade_samples_fill_with_ps(ssamp, &ps, x, y);
1856                         }
1857
1858                         shi= ssamp->shi;
1859                         if(shi->vlr) {
1860                                 onlyshadow= (shi->mat->mode & MA_ONLYSHADOW);
1861                                 exclude.obi= shi->obi - re->objectinstance;
1862                                 exclude.facenr= shi->vlr->index;
1863                                 sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect);
1864
1865                                 VECCOPY(sample->co, shi->co);
1866                                 VECCOPY(sample->n, shi->vno);
1867                                 VECCOPY(sample->ao, shi->ao);
1868                                 VECCOPY(sample->env, shi->env);
1869                                 VECCOPY(sample->indirect, shi->indirect);
1870                                 sample->intensity= MAX3(sample->ao[0], sample->ao[1], sample->ao[2]);
1871                                 sample->intensity= MAX2(sample->intensity, MAX3(sample->env[0], sample->env[1], sample->env[2]));
1872                                 sample->intensity= MAX2(sample->intensity, MAX3(sample->indirect[0], sample->indirect[1], sample->indirect[2]));
1873                                 sample->dist2= INPR(shi->dxco, shi->dxco) + INPR(shi->dyco, shi->dyco);
1874                                 sample->x= shi->xs;
1875                                 sample->y= shi->ys;
1876                                 sample->filled= 1;
1877                         }
1878
1879                         if(re->test_break(re->tbh))
1880                                 break;
1881                 }
1882         }
1883 }
1884
1885 void free_occ_samples(Render *re, RenderPart *pa)
1886 {
1887         OcclusionTree *tree= re->occlusiontree;
1888         OcclusionCache *cache;
1889
1890         if(tree->cache) {
1891                 cache= &tree->cache[pa->thread];
1892
1893                 if(cache->sample)
1894                         MEM_freeN(cache->sample);
1895
1896                 cache->w= 0;
1897                 cache->h= 0;
1898                 cache->step= 0;
1899         }
1900 }
1901