style cleanup: follow style guide for formatting of if/for/while loops, and else...
[blender.git] / source / blender / render / intern / source / occlusion.c
1 /* 
2  *
3  * ***** BEGIN GPL LICENSE BLOCK *****
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18  *
19  * The Original Code is Copyright (C) 2008 Blender Foundation.
20  * All rights reserved.
21  *
22  * The Original Code is: all of this file.
23  *
24  * Contributor(s): Brecht Van Lommel.
25  *
26  * ***** END GPL LICENSE BLOCK *****
27  */
28
29 /** \file blender/render/intern/source/occlusion.c
30  *  \ingroup render
31  */
32
33
34 #include <math.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38
39 #include "MEM_guardedalloc.h"
40
41 #include "DNA_material_types.h"
42
43 #include "BLI_math.h"
44 #include "BLI_blenlib.h"
45 #include "BLI_memarena.h"
46 #include "BLI_threads.h"
47 #include "BLI_utildefines.h"
48
49 #include "BKE_global.h"
50 #include "BKE_scene.h"
51
52
53 #include "RE_shader_ext.h"
54
55 /* local includes */
56 #include "occlusion.h"
57 #include "render_types.h"
58 #include "rendercore.h"
59 #include "renderdatabase.h"
60 #include "pixelshading.h"
61 #include "shading.h"
62 #include "zbuf.h"
63
64 /* ------------------------- Declarations --------------------------- */
65
66 #define INVALID_INDEX ((int)(~0))
67 #define INVPI 0.31830988618379069f
68 #define TOTCHILD 8
69 #define CACHE_STEP 3
70
71 typedef struct OcclusionCacheSample {
72         float co[3], n[3], ao[3], env[3], indirect[3], intensity, dist2;
73         int x, y, filled;
74 } OcclusionCacheSample;
75
76 typedef struct OcclusionCache {
77         OcclusionCacheSample *sample;
78         int x, y, w, h, step;
79 } OcclusionCache;
80
81 typedef struct OccFace {
82         int obi;
83         int facenr;
84 } OccFace;
85
86 typedef struct OccNode {
87         float co[3], area;
88         float sh[9], dco;
89         float occlusion, rad[3];
90         int childflag;
91         union {
92                 //OccFace face;
93                 int face;
94                 struct OccNode *node;
95         } child[TOTCHILD];
96 } OccNode;
97
98 typedef struct OcclusionTree {
99         MemArena *arena;
100
101         float (*co)[3];         /* temporary during build */
102
103         OccFace *face;          /* instance and face indices */
104         float *occlusion;       /* occlusion for faces */
105         float (*rad)[3];        /* radiance for faces */
106         
107         OccNode *root;
108
109         OccNode **stack[BLENDER_MAX_THREADS];
110         int maxdepth;
111
112         int totface;
113
114         float error;
115         float distfac;
116
117         int dothreadedbuild;
118         int totbuildthread;
119         int doindirect;
120
121         OcclusionCache *cache;
122 } OcclusionTree;
123
124 typedef struct OcclusionThread {
125         Render *re;
126         StrandSurface *mesh;
127         float (*faceao)[3];
128         float (*faceenv)[3];
129         float (*faceindirect)[3];
130         int begin, end;
131         int thread;
132 } OcclusionThread;
133
134 typedef struct OcclusionBuildThread {
135         OcclusionTree *tree;
136         int begin, end, depth;
137         OccNode *node;
138 } OcclusionBuildThread;
139
140 /* ------------------------- Shading --------------------------- */
141
142 extern Render R; // meh
143
144 static void occ_shade(ShadeSample *ssamp, ObjectInstanceRen *obi, VlakRen *vlr, float *rad)
145 {
146         ShadeInput *shi= ssamp->shi;
147         ShadeResult *shr= ssamp->shr;
148         float l, u, v, *v1, *v2, *v3;
149         
150         /* init */
151         if (vlr->v4) {
152                 shi->u= u= 0.5f;
153                 shi->v= v= 0.5f;
154         }
155         else {
156                 shi->u= u= 1.0f/3.0f;
157                 shi->v= v= 1.0f/3.0f;
158         }
159
160         /* setup render coordinates */
161         v1= vlr->v1->co;
162         v2= vlr->v2->co;
163         v3= vlr->v3->co;
164         
165         /* renderco */
166         l= 1.0f-u-v;
167         
168         shi->co[0]= l*v3[0]+u*v1[0]+v*v2[0];
169         shi->co[1]= l*v3[1]+u*v1[1]+v*v2[1];
170         shi->co[2]= l*v3[2]+u*v1[2]+v*v2[2];
171
172         shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
173
174         /* set up view vector */
175         copy_v3_v3(shi->view, shi->co);
176         normalize_v3(shi->view);
177         
178         /* cache for shadow */
179         shi->samplenr++;
180         
181         shi->xs= 0; // TODO
182         shi->ys= 0;
183         
184         shade_input_set_normals(shi);
185
186         /* no normal flip */
187         if (shi->flippednor)
188                 shade_input_flip_normals(shi);
189
190         madd_v3_v3fl(shi->co, shi->vn, 0.0001f); /* ugly.. */
191
192         /* not a pretty solution, but fixes common cases */
193         if (shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) {
194                 negate_v3(shi->vn);
195                 negate_v3(shi->vno);
196                 negate_v3(shi->nmapnorm);
197         }
198
199         /* init material vars */
200         // note, keep this synced with render_types.h
201         memcpy(&shi->r, &shi->mat->r, 23*sizeof(float));
202         shi->har= shi->mat->har;
203         
204         /* render */
205         shade_input_set_shade_texco(shi);
206         shade_material_loop(shi, shr); /* todo: nodes */
207         
208         copy_v3_v3(rad, shr->combined);
209 }
210
211 static void occ_build_shade(Render *re, OcclusionTree *tree)
212 {
213         ShadeSample ssamp;
214         ObjectInstanceRen *obi;
215         VlakRen *vlr;
216         int a;
217
218         R= *re;
219
220         /* setup shade sample with correct passes */
221         memset(&ssamp, 0, sizeof(ShadeSample));
222         ssamp.shi[0].lay= re->lay;
223         ssamp.shi[0].passflag= SCE_PASS_DIFFUSE|SCE_PASS_RGBA;
224         ssamp.shi[0].combinedflag= ~(SCE_PASS_SPEC);
225         ssamp.tot= 1;
226
227         for (a=0; a<tree->totface; a++) {
228                 obi= &R.objectinstance[tree->face[a].obi];
229                 vlr= RE_findOrAddVlak(obi->obr, tree->face[a].facenr);
230
231                 occ_shade(&ssamp, obi, vlr, tree->rad[a]);
232         }
233 }
234
235 /* ------------------------- Spherical Harmonics --------------------------- */
236
237 /* Use 2nd order SH => 9 coefficients, stored in this order:
238  * 0 = (0,0),
239  * 1 = (1,-1), 2 = (1,0), 3 = (1,1),
240  * 4 = (2,-2), 5 = (2,-1), 6 = (2,0), 7 = (2,1), 8 = (2,2) */
241
242 static void sh_copy(float *shresult, float *sh)
243 {
244         memcpy(shresult, sh, sizeof(float)*9);
245 }
246
247 static void sh_mul(float *sh, float f)
248 {
249         int i;
250
251         for (i=0; i<9; i++)
252                 sh[i] *= f;
253 }
254
255 static void sh_add(float *shresult, float *sh1, float *sh2)
256 {
257         int i;
258
259         for (i=0; i<9; i++)
260                 shresult[i]= sh1[i] + sh2[i];
261 }
262
263 static void sh_from_disc(float *n, float area, float *shresult)
264 {
265         /* See formula (3) in:
266          * "An Efficient Representation for Irradiance Environment Maps" */
267         float sh[9], x, y, z;
268
269         x= n[0];
270         y= n[1];
271         z= n[2];
272
273         sh[0]= 0.282095f;
274
275         sh[1]= 0.488603f*y;
276         sh[2]= 0.488603f*z;
277         sh[3]= 0.488603f*x;
278         
279         sh[4]= 1.092548f*x*y;
280         sh[5]= 1.092548f*y*z;
281         sh[6]= 0.315392f*(3.0f*z*z - 1.0f);
282         sh[7]= 1.092548f*x*z;
283         sh[8]= 0.546274f*(x*x - y*y);
284
285         sh_mul(sh, area);
286         sh_copy(shresult, sh);
287 }
288
289 static float sh_eval(float *sh, float *v)
290 {
291         /* See formula (13) in:
292          * "An Efficient Representation for Irradiance Environment Maps" */
293         static const float c1 = 0.429043f, c2 = 0.511664f, c3 = 0.743125f;
294         static const float c4 = 0.886227f, c5 = 0.247708f;
295         float x, y, z, sum;
296
297         x= v[0];
298         y= v[1];
299         z= v[2];
300
301         sum= c1*sh[8]*(x*x - y*y);
302         sum += c3*sh[6]*z*z;
303         sum += c4*sh[0];
304         sum += -c5*sh[6];
305         sum += 2.0f*c1*(sh[4]*x*y + sh[7]*x*z + sh[5]*y*z);
306         sum += 2.0f*c2*(sh[3]*x + sh[1]*y + sh[2]*z);
307
308         return sum;
309 }
310
311 /* ------------------------------ Building --------------------------------- */
312
313 static void occ_face(const OccFace *face, float co[3], float normal[3], float *area)
314 {
315         ObjectInstanceRen *obi;
316         VlakRen *vlr;
317         float v1[3], v2[3], v3[3], v4[3];
318
319         obi= &R.objectinstance[face->obi];
320         vlr= RE_findOrAddVlak(obi->obr, face->facenr);
321         
322         if (co) {
323                 if (vlr->v4)
324                         mid_v3_v3v3(co, vlr->v1->co, vlr->v3->co);
325                 else
326                         cent_tri_v3(co, vlr->v1->co, vlr->v2->co, vlr->v3->co);
327
328                 if (obi->flag & R_TRANSFORMED)
329                         mul_m4_v3(obi->mat, co);
330         }
331         
332         if (normal) {
333                 normal[0]= -vlr->n[0];
334                 normal[1]= -vlr->n[1];
335                 normal[2]= -vlr->n[2];
336
337                 if (obi->flag & R_TRANSFORMED)
338                         mul_m3_v3(obi->nmat, normal);
339         }
340
341         if (area) {
342                 copy_v3_v3(v1, vlr->v1->co);
343                 copy_v3_v3(v2, vlr->v2->co);
344                 copy_v3_v3(v3, vlr->v3->co);
345                 if (vlr->v4) copy_v3_v3(v4, vlr->v4->co);
346
347                 if (obi->flag & R_TRANSFORMED) {
348                         mul_m4_v3(obi->mat, v1);
349                         mul_m4_v3(obi->mat, v2);
350                         mul_m4_v3(obi->mat, v3);
351                         if (vlr->v4) mul_m4_v3(obi->mat, v4);
352                 }
353
354                 /* todo: correct area for instances */
355                 if (vlr->v4)
356                         *area= area_quad_v3(v1, v2, v3, v4);
357                 else
358                         *area= area_tri_v3(v1, v2, v3);
359         }
360 }
361
362 static void occ_sum_occlusion(OcclusionTree *tree, OccNode *node)
363 {
364         OccNode *child;
365         float occ, area, totarea, rad[3];
366         int a, b, indirect= tree->doindirect;
367
368         occ= 0.0f;
369         totarea= 0.0f;
370         if (indirect) zero_v3(rad);
371
372         for (b=0; b<TOTCHILD; b++) {
373                 if (node->childflag & (1<<b)) {
374                         a= node->child[b].face;
375                         occ_face(&tree->face[a], 0, 0, &area);
376                         occ += area*tree->occlusion[a];
377                         if (indirect) madd_v3_v3fl(rad, tree->rad[a], area);
378                         totarea += area;
379                 }
380                 else if (node->child[b].node) {
381                         child= node->child[b].node;
382                         occ_sum_occlusion(tree, child);
383
384                         occ += child->area*child->occlusion;
385                         if (indirect) madd_v3_v3fl(rad, child->rad, child->area);
386                         totarea += child->area;
387                 }
388         }
389
390         if (totarea != 0.0f) {
391                 occ /= totarea;
392                 if (indirect) mul_v3_fl(rad, 1.0f/totarea);
393         }
394         
395         node->occlusion= occ;
396         if (indirect) copy_v3_v3(node->rad, rad);
397 }
398
399 static int occ_find_bbox_axis(OcclusionTree *tree, int begin, int end, float *min, float *max)
400 {
401         float len, maxlen= -1.0f;
402         int a, axis = 0;
403
404         INIT_MINMAX(min, max);
405
406         for (a = begin; a < end; a++) {
407                 DO_MINMAX(tree->co[a], min, max);
408         }
409
410         for (a=0; a<3; a++) {
411                 len= max[a] - min[a];
412
413                 if (len > maxlen) {
414                         maxlen= len;
415                         axis= a;
416                 }
417         }
418
419         return axis;
420 }
421
422 static void occ_node_from_face(OccFace *face, OccNode *node)
423 {
424         float n[3];
425
426         occ_face(face, node->co, n, &node->area);
427         node->dco= 0.0f;
428         sh_from_disc(n, node->area, node->sh);
429 }
430
431 static void occ_build_dco(OcclusionTree *tree, OccNode *node, const float co[3], float *dco)
432 {
433         int b;
434         for (b=0; b<TOTCHILD; b++) {
435                 float dist, d[3], nco[3];
436
437                 if (node->childflag & (1<<b)) {
438                         occ_face(tree->face+node->child[b].face, nco, NULL, NULL);
439                 }
440                 else if (node->child[b].node) {
441                         OccNode *child= node->child[b].node;
442                         occ_build_dco(tree, child, co, dco);
443                         copy_v3_v3(nco, child->co);
444                 }
445                 else {
446                         continue;
447                 }
448
449                 sub_v3_v3v3(d, nco, co);
450                 dist= dot_v3v3(d, d);
451                 if (dist > *dco)
452                         *dco= dist;
453         }
454 }
455
456 static void occ_build_split(OcclusionTree *tree, int begin, int end, int *split)
457 {
458         float min[3], max[3], mid;
459         int axis, a, enda;
460
461         /* split in middle of boundbox. this seems faster than median split
462          * on complex scenes, possibly since it avoids two distant faces to
463          * be in the same node better? */
464         axis= occ_find_bbox_axis(tree, begin, end, min, max);
465         mid= 0.5f*(min[axis]+max[axis]);
466
467         a= begin;
468         enda= end;
469         while(a<enda) {
470                 if (tree->co[a][axis] > mid) {
471                         enda--;
472                         SWAP(OccFace, tree->face[a], tree->face[enda]);
473                         SWAP(float, tree->co[a][0], tree->co[enda][0]);
474                         SWAP(float, tree->co[a][1], tree->co[enda][1]);
475                         SWAP(float, tree->co[a][2], tree->co[enda][2]);
476                 }
477                 else
478                         a++;
479         }
480
481         *split= enda;
482 }
483
484 static void occ_build_8_split(OcclusionTree *tree, int begin, int end, int *offset, int *count)
485 {
486         /* split faces into eight groups */
487         int b, splitx, splity[2], splitz[4];
488
489         occ_build_split(tree, begin, end, &splitx);
490
491         /* force split if none found, to deal with degenerate geometry */
492         if (splitx == begin || splitx == end)
493                 splitx= (begin+end)/2;
494
495         occ_build_split(tree, begin, splitx, &splity[0]);
496         occ_build_split(tree, splitx, end, &splity[1]);
497
498         occ_build_split(tree, begin, splity[0], &splitz[0]);
499         occ_build_split(tree, splity[0], splitx, &splitz[1]);
500         occ_build_split(tree, splitx, splity[1], &splitz[2]);
501         occ_build_split(tree, splity[1], end, &splitz[3]);
502
503         offset[0]= begin;
504         offset[1]= splitz[0];
505         offset[2]= splity[0];
506         offset[3]= splitz[1];
507         offset[4]= splitx;
508         offset[5]= splitz[2];
509         offset[6]= splity[1];
510         offset[7]= splitz[3];
511
512         for (b=0; b<7; b++)
513                 count[b]= offset[b+1] - offset[b];
514         count[7]= end - offset[7];
515 }
516
517 static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth);
518
519 static void *exec_occ_build(void *data)
520 {
521         OcclusionBuildThread *othread= (OcclusionBuildThread*)data;
522
523         occ_build_recursive(othread->tree, othread->node, othread->begin, othread->end, othread->depth);
524
525         return 0;
526 }
527
528 static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth)
529 {
530         ListBase threads;
531         OcclusionBuildThread othreads[BLENDER_MAX_THREADS];
532         OccNode *child, tmpnode;
533         /* OccFace *face; */
534         int a, b, totthread=0, offset[TOTCHILD], count[TOTCHILD];
535
536         /* add a new node */
537         node->occlusion= 1.0f;
538
539         /* leaf node with only children */
540         if (end - begin <= TOTCHILD) {
541                 for (a=begin, b=0; a<end; a++, b++) {
542                         /* face= &tree->face[a]; */
543                         node->child[b].face= a;
544                         node->childflag |= (1<<b);
545                 }
546         }
547         else {
548                 /* order faces */
549                 occ_build_8_split(tree, begin, end, offset, count);
550
551                 if (depth == 1 && tree->dothreadedbuild)
552                         BLI_init_threads(&threads, exec_occ_build, tree->totbuildthread);
553
554                 for (b=0; b<TOTCHILD; b++) {
555                         if (count[b] == 0) {
556                                 node->child[b].node= NULL;
557                         }
558                         else if (count[b] == 1) {
559                                 /* face= &tree->face[offset[b]]; */
560                                 node->child[b].face= offset[b];
561                                 node->childflag |= (1<<b);
562                         }
563                         else {
564                                 if (tree->dothreadedbuild)
565                                         BLI_lock_thread(LOCK_CUSTOM1);
566
567                                 child= BLI_memarena_alloc(tree->arena, sizeof(OccNode));
568                                 node->child[b].node= child;
569
570                                 /* keep track of maximum depth for stack */
571                                 if (depth+1 > tree->maxdepth)
572                                         tree->maxdepth= depth+1;
573
574                                 if (tree->dothreadedbuild)
575                                         BLI_unlock_thread(LOCK_CUSTOM1);
576
577                                 if (depth == 1 && tree->dothreadedbuild) {
578                                         othreads[totthread].tree= tree;
579                                         othreads[totthread].node= child;
580                                         othreads[totthread].begin= offset[b];
581                                         othreads[totthread].end= offset[b]+count[b];
582                                         othreads[totthread].depth= depth+1;
583                                         BLI_insert_thread(&threads, &othreads[totthread]);
584                                         totthread++;
585                                 }
586                                 else
587                                         occ_build_recursive(tree, child, offset[b], offset[b]+count[b], depth+1);
588                         }
589                 }
590
591                 if (depth == 1 && tree->dothreadedbuild)
592                         BLI_end_threads(&threads);
593         }
594
595         /* combine area, position and sh */
596         for (b=0; b<TOTCHILD; b++) {
597                 if (node->childflag & (1<<b)) {
598                         child= &tmpnode;
599                         occ_node_from_face(tree->face+node->child[b].face, &tmpnode);
600                 }
601                 else {
602                         child= node->child[b].node;
603                 }
604
605                 if (child) {
606                         node->area += child->area;
607                         sh_add(node->sh, node->sh, child->sh);
608                         madd_v3_v3fl(node->co, child->co, child->area);
609                 }
610         }
611
612         if (node->area != 0.0f)
613                 mul_v3_fl(node->co, 1.0f/node->area);
614
615         /* compute maximum distance from center */
616         node->dco= 0.0f;
617         if (node->area > 0.0f)
618                 occ_build_dco(tree, node, node->co, &node->dco);
619 }
620
621 static void occ_build_sh_normalize(OccNode *node)
622 {
623         /* normalize spherical harmonics to not include area, so
624          * we can clamp the dot product and then mutliply by area */
625         int b;
626
627         if (node->area != 0.0f)
628                 sh_mul(node->sh, 1.0f/node->area);
629
630         for (b=0; b<TOTCHILD; b++) {
631                 if (node->childflag & (1<<b));
632                 else if (node->child[b].node)
633                         occ_build_sh_normalize(node->child[b].node);
634         }
635 }
636
637 static OcclusionTree *occ_tree_build(Render *re)
638 {
639         OcclusionTree *tree;
640         ObjectInstanceRen *obi;
641         ObjectRen *obr;
642         Material *ma;
643         VlakRen *vlr= NULL;
644         int a, b, c, totface;
645
646         /* count */
647         totface= 0;
648         for (obi=re->instancetable.first; obi; obi=obi->next) {
649                 obr= obi->obr;
650                 for (a=0; a<obr->totvlak; a++) {
651                         if ((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
652                         else vlr++;
653
654                         ma= vlr->mat;
655
656                         if ((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE))
657                                 totface++;
658                 }
659         }
660
661         if (totface == 0)
662                 return NULL;
663         
664         tree= MEM_callocN(sizeof(OcclusionTree), "OcclusionTree");
665         tree->totface= totface;
666
667         /* parameters */
668         tree->error= get_render_aosss_error(&re->r, re->wrld.ao_approx_error);
669         tree->distfac= (re->wrld.aomode & WO_AODIST)? re->wrld.aodistfac: 0.0f;
670         tree->doindirect= (re->wrld.ao_indirect_energy > 0.0f && re->wrld.ao_indirect_bounces > 0);
671
672         /* allocation */
673         tree->arena= BLI_memarena_new(0x8000 * sizeof(OccNode), "occ tree arena");
674         BLI_memarena_use_calloc(tree->arena);
675
676         if (re->wrld.aomode & WO_AOCACHE)
677                 tree->cache= MEM_callocN(sizeof(OcclusionCache)*BLENDER_MAX_THREADS, "OcclusionCache");
678
679         tree->face= MEM_callocN(sizeof(OccFace)*totface, "OcclusionFace");
680         tree->co= MEM_callocN(sizeof(float)*3*totface, "OcclusionCo");
681         tree->occlusion= MEM_callocN(sizeof(float)*totface, "OcclusionOcclusion");
682
683         if (tree->doindirect)
684                 tree->rad= MEM_callocN(sizeof(float)*3*totface, "OcclusionRad");
685
686         /* make array of face pointers */
687         for (b=0, c=0, obi=re->instancetable.first; obi; obi=obi->next, c++) {
688                 obr= obi->obr;
689                 for (a=0; a<obr->totvlak; a++) {
690                         if ((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
691                         else vlr++;
692
693                         ma= vlr->mat;
694
695                         if ((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE)) {
696                                 tree->face[b].obi= c;
697                                 tree->face[b].facenr= a;
698                                 tree->occlusion[b]= 1.0f;
699                                 occ_face(&tree->face[b], tree->co[b], NULL, NULL); 
700                                 b++;
701                         }
702                 }
703         }
704
705         /* threads */
706         tree->totbuildthread= (re->r.threads > 1 && totface > 10000)? 8: 1;
707         tree->dothreadedbuild= (tree->totbuildthread > 1);
708
709         /* recurse */
710         tree->root= BLI_memarena_alloc(tree->arena, sizeof(OccNode));
711         tree->maxdepth= 1;
712         occ_build_recursive(tree, tree->root, 0, totface, 1);
713
714         if (tree->doindirect) {
715                 occ_build_shade(re, tree);
716                 occ_sum_occlusion(tree, tree->root);
717         }
718         
719         MEM_freeN(tree->co);
720         tree->co= NULL;
721
722         occ_build_sh_normalize(tree->root);
723
724         for (a=0; a<BLENDER_MAX_THREADS; a++)
725                 tree->stack[a]= MEM_callocN(sizeof(OccNode)*TOTCHILD*(tree->maxdepth+1), "OccStack");
726
727         return tree;
728 }
729
730 static void occ_free_tree(OcclusionTree *tree)
731 {
732         int a;
733
734         if (tree) {
735                 if (tree->arena) BLI_memarena_free(tree->arena);
736                 for (a=0; a<BLENDER_MAX_THREADS; a++)
737                         if (tree->stack[a])
738                                 MEM_freeN(tree->stack[a]);
739                 if (tree->occlusion) MEM_freeN(tree->occlusion);
740                 if (tree->cache) MEM_freeN(tree->cache);
741                 if (tree->face) MEM_freeN(tree->face);
742                 if (tree->rad) MEM_freeN(tree->rad);
743                 MEM_freeN(tree);
744         }
745 }
746
747 /* ------------------------- Traversal --------------------------- */
748
749 static float occ_solid_angle(OccNode *node, const float v[3], float d2, float invd2, const float receivenormal[3])
750 {
751         float dotreceive, dotemit;
752         float ev[3];
753
754         ev[0]= -v[0]*invd2;
755         ev[1]= -v[1]*invd2;
756         ev[2]= -v[2]*invd2;
757         dotemit= sh_eval(node->sh, ev);
758         dotreceive= dot_v3v3(receivenormal, v)*invd2;
759
760         CLAMP(dotemit, 0.0f, 1.0f);
761         CLAMP(dotreceive, 0.0f, 1.0f);
762         
763         return ((node->area*dotemit*dotreceive)/(d2 + node->area*INVPI))*INVPI;
764 }
765
766 static void VecAddDir(float result[3], const float v1[3], const float v2[3], const float fac)
767 {
768         result[0]= v1[0] + fac*(v2[0] - v1[0]);
769         result[1]= v1[1] + fac*(v2[1] - v1[1]);
770         result[2]= v1[2] + fac*(v2[2] - v1[2]);
771 }
772
773 static int occ_visible_quad(float *p, const float n[3], const float v0[3], const float *v1, const float *v2, float q0[3], float q1[3], float q2[3], float q3[3])
774 {
775         static const float epsilon = 1e-6f;
776         float c, sd[3];
777         
778         c= dot_v3v3(n, p);
779
780         /* signed distances from the vertices to the plane. */
781         sd[0]= dot_v3v3(n, v0) - c;
782         sd[1]= dot_v3v3(n, v1) - c;
783         sd[2]= dot_v3v3(n, v2) - c;
784
785         if (fabsf(sd[0]) < epsilon) sd[0] = 0.0f;
786         if (fabsf(sd[1]) < epsilon) sd[1] = 0.0f;
787         if (fabsf(sd[2]) < epsilon) sd[2] = 0.0f;
788
789         if (sd[0] > 0) {
790                 if (sd[1] > 0) {
791                         if (sd[2] > 0) {
792                                 // +++
793                                 copy_v3_v3(q0, v0);
794                                 copy_v3_v3(q1, v1);
795                                 copy_v3_v3(q2, v2);
796                                 copy_v3_v3(q3, q2);
797                         }
798                         else if (sd[2] < 0) {
799                                 // ++-
800                                 copy_v3_v3(q0, v0);
801                                 copy_v3_v3(q1, v1);
802                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
803                                 VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2])));
804                         }
805                         else {
806                                 // ++0
807                                 copy_v3_v3(q0, v0);
808                                 copy_v3_v3(q1, v1);
809                                 copy_v3_v3(q2, v2);
810                                 copy_v3_v3(q3, q2);
811                         }
812                 }
813                 else if (sd[1] < 0) {
814                         if (sd[2] > 0) {
815                                 // +-+
816                                 copy_v3_v3(q0, v0);
817                                 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
818                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
819                                 copy_v3_v3(q3, v2);
820                         }
821                         else if (sd[2] < 0) {
822                                 // +--
823                                 copy_v3_v3(q0, v0);
824                                 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
825                                 VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2])));
826                                 copy_v3_v3(q3, q2);
827                         }
828                         else {
829                                 // +-0
830                                 copy_v3_v3(q0, v0);
831                                 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
832                                 copy_v3_v3(q2, v2);
833                                 copy_v3_v3(q3, q2);
834                         }
835                 }
836                 else {
837                         if (sd[2] > 0) {
838                                 // +0+
839                                 copy_v3_v3(q0, v0);
840                                 copy_v3_v3(q1, v1);
841                                 copy_v3_v3(q2, v2);
842                                 copy_v3_v3(q3, q2);
843                         }
844                         else if (sd[2] < 0) {
845                                 // +0-
846                                 copy_v3_v3(q0, v0);
847                                 copy_v3_v3(q1, v1);
848                                 VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2])));
849                                 copy_v3_v3(q3, q2);
850                         }
851                         else {
852                                 // +00
853                                 copy_v3_v3(q0, v0);
854                                 copy_v3_v3(q1, v1);
855                                 copy_v3_v3(q2, v2);
856                                 copy_v3_v3(q3, q2);
857                         }
858                 }
859         }
860         else if (sd[0] < 0) {
861                 if (sd[1] > 0) {
862                         if (sd[2] > 0) {
863                                 // -++
864                                 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
865                                 copy_v3_v3(q1, v1);
866                                 copy_v3_v3(q2, v2);
867                                 VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2])));
868                         }
869                         else if (sd[2] < 0) {
870                                 // -+-
871                                 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
872                                 copy_v3_v3(q1, v1);
873                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
874                                 copy_v3_v3(q3, q2);
875                         }
876                         else {
877                                 // -+0
878                                 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
879                                 copy_v3_v3(q1, v1);
880                                 copy_v3_v3(q2, v2);
881                                 copy_v3_v3(q3, q2);
882                         }
883                 }
884                 else if (sd[1] < 0) {
885                         if (sd[2] > 0) {
886                                 // --+
887                                 VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2])));
888                                 VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2])));
889                                 copy_v3_v3(q2, v2);
890                                 copy_v3_v3(q3, q2);
891                         }
892                         else if (sd[2] < 0) {
893                                 // ---
894                                 return 0;
895                         }
896                         else {
897                                 // --0
898                                 return 0;
899                         }
900                 }
901                 else {
902                         if (sd[2] > 0) {
903                                 // -0+
904                                 VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2])));
905                                 copy_v3_v3(q1, v1);
906                                 copy_v3_v3(q2, v2);
907                                 copy_v3_v3(q3, q2);
908                         }
909                         else if (sd[2] < 0) {
910                                 // -0-
911                                 return 0;
912                         }
913                         else {
914                                 // -00
915                                 return 0;
916                         }
917                 }
918         }
919         else {
920                 if (sd[1] > 0) {
921                         if (sd[2] > 0) {
922                                 // 0++
923                                 copy_v3_v3(q0, v0);
924                                 copy_v3_v3(q1, v1);
925                                 copy_v3_v3(q2, v2);
926                                 copy_v3_v3(q3, q2);
927                         }
928                         else if (sd[2] < 0) {
929                                 // 0+-
930                                 copy_v3_v3(q0, v0);
931                                 copy_v3_v3(q1, v1);
932                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
933                                 copy_v3_v3(q3, q2);
934                         }
935                         else {
936                                 // 0+0
937                                 copy_v3_v3(q0, v0);
938                                 copy_v3_v3(q1, v1);
939                                 copy_v3_v3(q2, v2);
940                                 copy_v3_v3(q3, q2);
941                         }
942                 }
943                 else if (sd[1] < 0) {
944                         if (sd[2] > 0) {
945                                 // 0-+
946                                 copy_v3_v3(q0, v0);
947                                 VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2])));
948                                 copy_v3_v3(q2, v2);
949                                 copy_v3_v3(q3, q2);
950                         }
951                         else if (sd[2] < 0) {
952                                 // 0--
953                                 return 0;
954                         }
955                         else {
956                                 // 0-0
957                                 return 0;
958                         }
959                 }
960                 else {
961                         if (sd[2] > 0) {
962                                 // 00+
963                                 copy_v3_v3(q0, v0);
964                                 copy_v3_v3(q1, v1);
965                                 copy_v3_v3(q2, v2);
966                                 copy_v3_v3(q3, q2);
967                         }
968                         else if (sd[2] < 0) {
969                                 // 00-
970                                 return 0;
971                         }
972                         else {
973                                 // 000
974                                 return 0;
975                         }
976                 }
977         }
978
979         return 1;
980 }
981
982 /* altivec optimization, this works, but is unused */
983
984 #if 0
985 #include <Accelerate/Accelerate.h>
986
987 typedef union {
988         vFloat v;
989         float f[4];
990 } vFloatResult;
991
992 static vFloat vec_splat_float(float val)
993 {
994         return (vFloat) {val, val, val, val};
995 }
996
997 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
998 {
999         vFloat vcos, rlen, vrx, vry, vrz, vsrx, vsry, vsrz, gx, gy, gz, vangle;
1000         vUInt8 rotate = (vUInt8) {4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3};
1001         vFloatResult vresult;
1002         float result;
1003
1004         /* compute r* */
1005         vrx = (vFloat) {q0[0], q1[0], q2[0], q3[0]} - vec_splat_float(p[0]);
1006         vry = (vFloat) {q0[1], q1[1], q2[1], q3[1]} - vec_splat_float(p[1]);
1007         vrz = (vFloat) {q0[2], q1[2], q2[2], q3[2]} - vec_splat_float(p[2]);
1008
1009         /* normalize r* */
1010         rlen = vec_rsqrte(vrx*vrx + vry*vry + vrz*vrz + vec_splat_float(1e-16f));
1011         vrx = vrx*rlen;
1012         vry = vry*rlen;
1013         vrz = vrz*rlen;
1014
1015         /* rotate r* for cross and dot */
1016         vsrx= vec_perm(vrx, vrx, rotate);
1017         vsry= vec_perm(vry, vry, rotate);
1018         vsrz= vec_perm(vrz, vrz, rotate);
1019
1020         /* cross product */
1021         gx = vsry*vrz - vsrz*vry;
1022         gy = vsrz*vrx - vsrx*vrz;
1023         gz = vsrx*vry - vsry*vrx;
1024
1025         /* normalize */
1026         rlen = vec_rsqrte(gx*gx + gy*gy + gz*gz + vec_splat_float(1e-16f));
1027         gx = gx*rlen;
1028         gy = gy*rlen;
1029         gz = gz*rlen;
1030
1031         /* angle */
1032         vcos = vrx*vsrx + vry*vsry + vrz*vsrz;
1033         vcos= vec_max(vec_min(vcos, vec_splat_float(1.0f)), vec_splat_float(-1.0f));
1034         vangle= vacosf(vcos);
1035
1036         /* dot */
1037         vresult.v = (vec_splat_float(n[0])*gx +
1038                      vec_splat_float(n[1])*gy +
1039                      vec_splat_float(n[2])*gz)*vangle;
1040
1041         result= (vresult.f[0] + vresult.f[1] + vresult.f[2] + vresult.f[3])*(0.5f/(float)M_PI);
1042         result= MAX2(result, 0.0f);
1043
1044         return result;
1045 }
1046
1047 #endif
1048
1049 /* SSE optimization, acos code doesn't work */
1050
1051 #if 0
1052
1053 #include <xmmintrin.h>
1054
1055 static __m128 sse_approx_acos(__m128 x)
1056 {
1057         /* needs a better approximation than taylor expansion of acos, since that
1058          * gives big erros for near 1.0 values, sqrt(2*x)*acos(1-x) should work
1059          * better, see http://www.tom.womack.net/projects/sse-fast-arctrig.html */
1060
1061         return _mm_set_ps1(1.0f);
1062 }
1063
1064 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
1065 {
1066         float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3];
1067         float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result;
1068         float fresult[4] __attribute__((aligned(16)));
1069         __m128 qx, qy, qz, rx, ry, rz, rlen, srx, sry, srz, gx, gy, gz, glen, rcos, angle, aresult;
1070
1071         /* compute r */
1072         qx = _mm_set_ps(q3[0], q2[0], q1[0], q0[0]);
1073         qy = _mm_set_ps(q3[1], q2[1], q1[1], q0[1]);
1074         qz = _mm_set_ps(q3[2], q2[2], q1[2], q0[2]);
1075
1076         rx = qx - _mm_set_ps1(p[0]);
1077         ry = qy - _mm_set_ps1(p[1]);
1078         rz = qz - _mm_set_ps1(p[2]);
1079
1080         /* normalize r */
1081         rlen = _mm_rsqrt_ps(rx*rx + ry*ry + rz*rz + _mm_set_ps1(1e-16f));
1082         rx = rx*rlen;
1083         ry = ry*rlen;
1084         rz = rz*rlen;
1085
1086         /* cross product */
1087         srx = _mm_shuffle_ps(rx, rx, _MM_SHUFFLE(0,3,2,1));
1088         sry = _mm_shuffle_ps(ry, ry, _MM_SHUFFLE(0,3,2,1));
1089         srz = _mm_shuffle_ps(rz, rz, _MM_SHUFFLE(0,3,2,1));
1090
1091         gx = sry*rz - srz*ry;
1092         gy = srz*rx - srx*rz;
1093         gz = srx*ry - sry*rx;
1094
1095         /* normalize g */
1096         glen = _mm_rsqrt_ps(gx*gx + gy*gy + gz*gz + _mm_set_ps1(1e-16f));
1097         gx = gx*glen;
1098         gy = gy*glen;
1099         gz = gz*glen;
1100
1101         /* compute angle */
1102         rcos = rx*srx + ry*sry + rz*srz;
1103         rcos= _mm_max_ps(_mm_min_ps(rcos, _mm_set_ps1(1.0f)), _mm_set_ps1(-1.0f));
1104
1105         angle = sse_approx_cos(rcos);
1106         aresult = (_mm_set_ps1(n[0])*gx + _mm_set_ps1(n[1])*gy + _mm_set_ps1(n[2])*gz)*angle;
1107
1108         /* sum together */
1109         result= (fresult[0] + fresult[1] + fresult[2] + fresult[3])*(0.5f/(float)M_PI);
1110         result= MAX2(result, 0.0f);
1111
1112         return result;
1113 }
1114
1115 #endif
1116
1117 static void normalizef(float *n)
1118 {
1119         float d;
1120         
1121         d= dot_v3v3(n, n);
1122
1123         if (d > 1.0e-35F) {
1124                 d= 1.0f/sqrtf(d);
1125
1126                 n[0] *= d; 
1127                 n[1] *= d; 
1128                 n[2] *= d;
1129         } 
1130 }
1131
1132 static float occ_quad_form_factor(const float p[3], const float n[3], const float q0[3], const float q1[3], const float q2[3], const float q3[3])
1133 {
1134         float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3];
1135         float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result;
1136
1137         sub_v3_v3v3(r0, q0, p);
1138         sub_v3_v3v3(r1, q1, p);
1139         sub_v3_v3v3(r2, q2, p);
1140         sub_v3_v3v3(r3, q3, p);
1141
1142         normalizef(r0);
1143         normalizef(r1);
1144         normalizef(r2);
1145         normalizef(r3);
1146
1147         cross_v3_v3v3(g0, r1, r0); normalizef(g0);
1148         cross_v3_v3v3(g1, r2, r1); normalizef(g1);
1149         cross_v3_v3v3(g2, r3, r2); normalizef(g2);
1150         cross_v3_v3v3(g3, r0, r3); normalizef(g3);
1151
1152         a1= saacosf(dot_v3v3(r0, r1));
1153         a2= saacosf(dot_v3v3(r1, r2));
1154         a3= saacosf(dot_v3v3(r2, r3));
1155         a4= saacosf(dot_v3v3(r3, r0));
1156
1157         dot1= dot_v3v3(n, g0);
1158         dot2= dot_v3v3(n, g1);
1159         dot3= dot_v3v3(n, g2);
1160         dot4= dot_v3v3(n, g3);
1161
1162         result= (a1*dot1 + a2*dot2 + a3*dot3 + a4*dot4)*0.5f/(float)M_PI;
1163         result= MAX2(result, 0.0f);
1164
1165         return result;
1166 }
1167
1168 static float occ_form_factor(OccFace *face, float *p, float *n)
1169 {
1170         ObjectInstanceRen *obi;
1171         VlakRen *vlr;
1172         float v1[3], v2[3], v3[3], v4[3], q0[3], q1[3], q2[3], q3[3], contrib= 0.0f;
1173
1174         obi= &R.objectinstance[face->obi];
1175         vlr= RE_findOrAddVlak(obi->obr, face->facenr);
1176
1177         copy_v3_v3(v1, vlr->v1->co);
1178         copy_v3_v3(v2, vlr->v2->co);
1179         copy_v3_v3(v3, vlr->v3->co);
1180
1181         if (obi->flag & R_TRANSFORMED) {
1182                 mul_m4_v3(obi->mat, v1);
1183                 mul_m4_v3(obi->mat, v2);
1184                 mul_m4_v3(obi->mat, v3);
1185         }
1186
1187         if (occ_visible_quad(p, n, v1, v2, v3, q0, q1, q2, q3))
1188                 contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3);
1189
1190         if (vlr->v4) {
1191                 copy_v3_v3(v4, vlr->v4->co);
1192                 if (obi->flag & R_TRANSFORMED)
1193                         mul_m4_v3(obi->mat, v4);
1194
1195                 if (occ_visible_quad(p, n, v1, v3, v4, q0, q1, q2, q3))
1196                         contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3);
1197         }
1198
1199         return contrib;
1200 }
1201
1202 static void occ_lookup(OcclusionTree *tree, int thread, OccFace *exclude, float *pp, float *pn, float *occ, float rad[3], float bentn[3])
1203 {
1204         OccNode *node, **stack;
1205         OccFace *face;
1206         float resultocc, resultrad[3], v[3], p[3], n[3], co[3], invd2;
1207         float distfac, fac, error, d2, weight, emitarea;
1208         int b, f, totstack;
1209
1210         /* init variables */
1211         copy_v3_v3(p, pp);
1212         copy_v3_v3(n, pn);
1213         madd_v3_v3fl(p, n, 1e-4f);
1214
1215         if (bentn)
1216                 copy_v3_v3(bentn, n);
1217         
1218         error= tree->error;
1219         distfac= tree->distfac;
1220
1221         resultocc= 0.0f;
1222         zero_v3(resultrad);
1223
1224         /* init stack */
1225         stack= tree->stack[thread];
1226         stack[0]= tree->root;
1227         totstack= 1;
1228
1229         while(totstack) {
1230                 /* pop point off the stack */
1231                 node= stack[--totstack];
1232
1233                 sub_v3_v3v3(v, node->co, p);
1234                 d2= dot_v3v3(v, v) + 1e-16f;
1235                 emitarea= MAX2(node->area, node->dco);
1236
1237                 if (d2*error > emitarea) {
1238                         if (distfac != 0.0f) {
1239                                 fac= 1.0f/(1.0f + distfac*d2);
1240                                 if (fac < 0.01f)
1241                                         continue;
1242                         }
1243                         else
1244                                 fac= 1.0f;
1245
1246                         /* accumulate occlusion from spherical harmonics */
1247                         invd2 = 1.0f/sqrtf(d2);
1248                         weight= occ_solid_angle(node, v, d2, invd2, n);
1249
1250                         if (rad)
1251                                 madd_v3_v3fl(resultrad, node->rad, weight*fac);
1252
1253                         weight *= node->occlusion;
1254
1255                         if (bentn) {
1256                                 bentn[0] -= weight*invd2*v[0];
1257                                 bentn[1] -= weight*invd2*v[1];
1258                                 bentn[2] -= weight*invd2*v[2];
1259                         }
1260
1261                         resultocc += weight*fac;
1262                 }
1263                 else {
1264                         /* traverse into children */
1265                         for (b=0; b<TOTCHILD; b++) {
1266                                 if (node->childflag & (1<<b)) {
1267                                         f= node->child[b].face;
1268                                         face= &tree->face[f];
1269
1270                                         /* accumulate occlusion with face form factor */
1271                                         if (!exclude || !(face->obi == exclude->obi && face->facenr == exclude->facenr)) {
1272                                                 if (bentn || distfac != 0.0f) {
1273                                                         occ_face(face, co, NULL, NULL); 
1274                                                         sub_v3_v3v3(v, co, p);
1275                                                         d2= dot_v3v3(v, v) + 1e-16f;
1276
1277                                                         fac= (distfac == 0.0f)? 1.0f: 1.0f/(1.0f + distfac*d2);
1278                                                         if (fac < 0.01f)
1279                                                                 continue;
1280                                                 }
1281                                                 else
1282                                                         fac= 1.0f;
1283
1284                                                 weight= occ_form_factor(face, p, n);
1285
1286                                                 if (rad)
1287                                                         madd_v3_v3fl(resultrad, tree->rad[f], weight*fac);
1288
1289                                                 weight *= tree->occlusion[f];
1290
1291                                                 if (bentn) {
1292                                                         invd2= 1.0f/sqrtf(d2);
1293                                                         bentn[0] -= weight*invd2*v[0];
1294                                                         bentn[1] -= weight*invd2*v[1];
1295                                                         bentn[2] -= weight*invd2*v[2];
1296                                                 }
1297
1298                                                 resultocc += weight*fac;
1299                                         }
1300                                 }
1301                                 else if (node->child[b].node) {
1302                                         /* push child on the stack */
1303                                         stack[totstack++]= node->child[b].node;
1304                                 }
1305                         }
1306                 }
1307         }
1308
1309         if (occ) *occ= resultocc;
1310         if (rad) copy_v3_v3(rad, resultrad);
1311 #if 0
1312         if (rad && exclude) {
1313                 int a;
1314                 for (a=0; a<tree->totface; a++)
1315                         if ((tree->face[a].obi == exclude->obi && tree->face[a].facenr == exclude->facenr))
1316                                 copy_v3_v3(rad, tree->rad[a]);
1317         }
1318 #endif
1319         if (bentn) normalize_v3(bentn);
1320 }
1321
1322 static void occ_compute_bounces(Render *re, OcclusionTree *tree, int totbounce)
1323 {
1324         float (*rad)[3], (*sum)[3], (*tmp)[3], co[3], n[3], occ;
1325         int bounce, i;
1326
1327         rad= MEM_callocN(sizeof(float)*3*tree->totface, "OcclusionBounceRad");
1328         sum= MEM_dupallocN(tree->rad);
1329
1330         for (bounce=1; bounce<totbounce; bounce++) {
1331                 for (i=0; i<tree->totface; i++) {
1332                         occ_face(&tree->face[i], co, n, NULL);
1333                         madd_v3_v3fl(co, n, 1e-8f);
1334
1335                         occ_lookup(tree, 0, &tree->face[i], co, n, &occ, rad[i], NULL);
1336                         rad[i][0]= MAX2(rad[i][0], 0.0f);
1337                         rad[i][1]= MAX2(rad[i][1], 0.0f);
1338                         rad[i][2]= MAX2(rad[i][2], 0.0f);
1339                         add_v3_v3(sum[i], rad[i]);
1340
1341                         if (re->test_break(re->tbh))
1342                                 break;
1343                 }
1344
1345                 if (re->test_break(re->tbh))
1346                         break;
1347
1348                 tmp= tree->rad;
1349                 tree->rad= rad;
1350                 rad= tmp;
1351
1352                 occ_sum_occlusion(tree, tree->root);
1353         }
1354
1355         MEM_freeN(rad);
1356         MEM_freeN(tree->rad);
1357         tree->rad= sum;
1358
1359         if (!re->test_break(re->tbh))
1360                 occ_sum_occlusion(tree, tree->root);
1361 }
1362
1363 static void occ_compute_passes(Render *re, OcclusionTree *tree, int totpass)
1364 {
1365         float *occ, co[3], n[3];
1366         int pass, i;
1367         
1368         occ= MEM_callocN(sizeof(float)*tree->totface, "OcclusionPassOcc");
1369
1370         for (pass=0; pass<totpass; pass++) {
1371                 for (i=0; i<tree->totface; i++) {
1372                         occ_face(&tree->face[i], co, n, NULL);
1373                         negate_v3(n);
1374                         madd_v3_v3fl(co, n, 1e-8f);
1375
1376                         occ_lookup(tree, 0, &tree->face[i], co, n, &occ[i], NULL, NULL);
1377                         if (re->test_break(re->tbh))
1378                                 break;
1379                 }
1380
1381                 if (re->test_break(re->tbh))
1382                         break;
1383
1384                 for (i=0; i<tree->totface; i++) {
1385                         tree->occlusion[i] -= occ[i]; //MAX2(1.0f-occ[i], 0.0f);
1386                         if (tree->occlusion[i] < 0.0f)
1387                                 tree->occlusion[i]= 0.0f;
1388                 }
1389
1390                 occ_sum_occlusion(tree, tree->root);
1391         }
1392
1393         MEM_freeN(occ);
1394 }
1395
1396 static void sample_occ_tree(Render *re, OcclusionTree *tree, OccFace *exclude, float *co, float *n, int thread, int onlyshadow, float *ao, float *env, float *indirect)
1397 {
1398         float nn[3], bn[3], fac, occ, occlusion, correction, rad[3];
1399         int envcolor;
1400
1401         envcolor= re->wrld.aocolor;
1402         if (onlyshadow)
1403                 envcolor= WO_AOPLAIN;
1404
1405         negate_v3_v3(nn, n);
1406
1407         occ_lookup(tree, thread, exclude, co, nn, &occ, (tree->doindirect)? rad: NULL, (env && envcolor)? bn: NULL);
1408
1409         correction= re->wrld.ao_approx_correction;
1410
1411         occlusion= (1.0f-correction)*(1.0f-occ);
1412         CLAMP(occlusion, 0.0f, 1.0f);
1413         if (correction != 0.0f)
1414                 occlusion += correction*expf(-occ);
1415
1416         if (env) {
1417                 /* sky shading using bent normal */
1418                 if (ELEM(envcolor, WO_AOSKYCOL, WO_AOSKYTEX)) {
1419                         fac= 0.5f * (1.0f + dot_v3v3(bn, re->grvec));
1420                         env[0]= (1.0f-fac)*re->wrld.horr + fac*re->wrld.zenr;
1421                         env[1]= (1.0f-fac)*re->wrld.horg + fac*re->wrld.zeng;
1422                         env[2]= (1.0f-fac)*re->wrld.horb + fac*re->wrld.zenb;
1423
1424                         mul_v3_fl(env, occlusion);
1425                 }
1426                 else {
1427                         env[0]= occlusion;
1428                         env[1]= occlusion;
1429                         env[2]= occlusion;
1430                 }
1431 #if 0
1432                 else {  /* WO_AOSKYTEX */
1433                         float dxyview[3];
1434                         bn[0]= -bn[0];
1435                         bn[1]= -bn[1];
1436                         bn[2]= -bn[2];
1437                         dxyview[0]= 1.0f;
1438                         dxyview[1]= 1.0f;
1439                         dxyview[2]= 0.0f;
1440                         shadeSkyView(ao, co, bn, dxyview);
1441                 }
1442 #endif
1443         }
1444
1445         if (ao) {
1446                 ao[0]= occlusion;
1447                 ao[1]= occlusion;
1448                 ao[2]= occlusion;
1449         }
1450
1451         if (tree->doindirect) copy_v3_v3(indirect, rad);
1452         else zero_v3(indirect);
1453 }
1454
1455 /* ---------------------------- Caching ------------------------------- */
1456
1457 static OcclusionCacheSample *find_occ_sample(OcclusionCache *cache, int x, int y)
1458 {
1459         x -= cache->x;
1460         y -= cache->y;
1461
1462         x /= cache->step;
1463         y /= cache->step;
1464         x *= cache->step;
1465         y *= cache->step;
1466
1467         if (x < 0 || x >= cache->w || y < 0 || y >= cache->h)
1468                 return NULL;
1469         else
1470                 return &cache->sample[y*cache->w + x];
1471 }
1472
1473 static int sample_occ_cache(OcclusionTree *tree, float *co, float *n, int x, int y, int thread, float *ao, float *env, float *indirect)
1474 {
1475         OcclusionCache *cache;
1476         OcclusionCacheSample *samples[4], *sample;
1477         float wn[4], wz[4], wb[4], tx, ty, w, totw, mino, maxo;
1478         float d[3], dist2;
1479         int i, x1, y1, x2, y2;
1480
1481         if (!tree->cache)
1482                 return 0;
1483         
1484         /* first try to find a sample in the same pixel */
1485         cache= &tree->cache[thread];
1486
1487         if (cache->sample && cache->step) {
1488                 sample= &cache->sample[(y-cache->y)*cache->w + (x-cache->x)];
1489                 if (sample->filled) {
1490                         sub_v3_v3v3(d, sample->co, co);
1491                         dist2= dot_v3v3(d, d);
1492                         if (dist2 < 0.5f*sample->dist2 && dot_v3v3(sample->n, n) > 0.98f) {
1493                                 copy_v3_v3(ao, sample->ao);
1494                                 copy_v3_v3(env, sample->env);
1495                                 copy_v3_v3(indirect, sample->indirect);
1496                                 return 1;
1497                         }
1498                 }
1499         }
1500         else
1501                 return 0;
1502
1503         /* try to interpolate between 4 neighboring pixels */
1504         samples[0]= find_occ_sample(cache, x, y);
1505         samples[1]= find_occ_sample(cache, x+cache->step, y);
1506         samples[2]= find_occ_sample(cache, x, y+cache->step);
1507         samples[3]= find_occ_sample(cache, x+cache->step, y+cache->step);
1508
1509         for (i=0; i<4; i++)
1510                 if (!samples[i] || !samples[i]->filled)
1511                         return 0;
1512
1513         /* require intensities not being too different */
1514         mino= MIN4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
1515         maxo= MAX4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
1516
1517         if (maxo - mino > 0.05f)
1518                 return 0;
1519
1520         /* compute weighted interpolation between samples */
1521         zero_v3(ao);
1522         zero_v3(env);
1523         zero_v3(indirect);
1524         totw= 0.0f;
1525
1526         x1= samples[0]->x;
1527         y1= samples[0]->y;
1528         x2= samples[3]->x;
1529         y2= samples[3]->y;
1530
1531         tx= (float)(x2 - x)/(float)(x2 - x1);
1532         ty= (float)(y2 - y)/(float)(y2 - y1);
1533
1534         wb[3]= (1.0f-tx)*(1.0f-ty);
1535         wb[2]= (tx)*(1.0f-ty);
1536         wb[1]= (1.0f-tx)*(ty);
1537         wb[0]= tx*ty;
1538
1539         for (i=0; i<4; i++) {
1540                 sub_v3_v3v3(d, samples[i]->co, co);
1541                 //dist2= dot_v3v3(d, d);
1542
1543                 wz[i]= 1.0f; //(samples[i]->dist2/(1e-4f + dist2));
1544                 wn[i]= pow(dot_v3v3(samples[i]->n, n), 32.0f);
1545
1546                 w= wb[i]*wn[i]*wz[i];
1547
1548                 totw += w;
1549                 madd_v3_v3fl(ao, samples[i]->ao, w);
1550                 madd_v3_v3fl(env, samples[i]->env, w);
1551                 madd_v3_v3fl(indirect, samples[i]->indirect, w);
1552         }
1553
1554         if (totw >= 0.9f) {
1555                 totw= 1.0f/totw;
1556                 mul_v3_fl(ao, totw);
1557                 mul_v3_fl(env, totw);
1558                 mul_v3_fl(indirect, totw);
1559                 return 1;
1560         }
1561
1562         return 0;
1563 }
1564
1565 static void sample_occ_surface(ShadeInput *shi)
1566 {
1567         StrandRen *strand= shi->strand;
1568         StrandSurface *mesh= strand->buffer->surface;
1569         int *face, *index = RE_strandren_get_face(shi->obr, strand, 0);
1570         float w[4], *co1, *co2, *co3, *co4;
1571
1572         if (mesh && mesh->face && mesh->co && mesh->ao && index) {
1573                 face= mesh->face[*index];
1574
1575                 co1= mesh->co[face[0]];
1576                 co2= mesh->co[face[1]];
1577                 co3= mesh->co[face[2]];
1578                 co4= (face[3])? mesh->co[face[3]]: NULL;
1579
1580                 interp_weights_face_v3(w, co1, co2, co3, co4, strand->vert->co);
1581
1582                 zero_v3(shi->ao);
1583                 zero_v3(shi->env);
1584                 zero_v3(shi->indirect);
1585
1586                 madd_v3_v3fl(shi->ao, mesh->ao[face[0]], w[0]);
1587                 madd_v3_v3fl(shi->env, mesh->env[face[0]], w[0]);
1588                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[0]], w[0]);
1589                 madd_v3_v3fl(shi->ao, mesh->ao[face[1]], w[1]);
1590                 madd_v3_v3fl(shi->env, mesh->env[face[1]], w[1]);
1591                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[1]], w[1]);
1592                 madd_v3_v3fl(shi->ao, mesh->ao[face[2]], w[2]);
1593                 madd_v3_v3fl(shi->env, mesh->env[face[2]], w[2]);
1594                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[2]], w[2]);
1595                 if (face[3]) {
1596                         madd_v3_v3fl(shi->ao, mesh->ao[face[3]], w[3]);
1597                         madd_v3_v3fl(shi->env, mesh->env[face[3]], w[3]);
1598                         madd_v3_v3fl(shi->indirect, mesh->indirect[face[3]], w[3]);
1599                 }
1600         }
1601         else {
1602                 shi->ao[0]= 1.0f;
1603                 shi->ao[1]= 1.0f;
1604                 shi->ao[2]= 1.0f;
1605                 zero_v3(shi->env);
1606                 zero_v3(shi->indirect);
1607         }
1608 }
1609
1610 /* ------------------------- External Functions --------------------------- */
1611
1612 static void *exec_strandsurface_sample(void *data)
1613 {
1614         OcclusionThread *othread= (OcclusionThread*)data;
1615         Render *re= othread->re;
1616         StrandSurface *mesh= othread->mesh;
1617         float ao[3], env[3], indirect[3], co[3], n[3], *co1, *co2, *co3, *co4;
1618         int a, *face;
1619
1620         for (a=othread->begin; a<othread->end; a++) {
1621                 face= mesh->face[a];
1622                 co1= mesh->co[face[0]];
1623                 co2= mesh->co[face[1]];
1624                 co3= mesh->co[face[2]];
1625
1626                 if (face[3]) {
1627                         co4= mesh->co[face[3]];
1628
1629                         mid_v3_v3v3(co, co1, co3);
1630                         normal_quad_v3( n,co1, co2, co3, co4);
1631                 }
1632                 else {
1633                         cent_tri_v3(co, co1, co2, co3);
1634                         normal_tri_v3( n,co1, co2, co3);
1635                 }
1636                 negate_v3(n);
1637
1638                 sample_occ_tree(re, re->occlusiontree, NULL, co, n, othread->thread, 0, ao, env, indirect);
1639                 copy_v3_v3(othread->faceao[a], ao);
1640                 copy_v3_v3(othread->faceenv[a], env);
1641                 copy_v3_v3(othread->faceindirect[a], indirect);
1642         }
1643
1644         return 0;
1645 }
1646
1647 void make_occ_tree(Render *re)
1648 {
1649         OcclusionThread othreads[BLENDER_MAX_THREADS];
1650         OcclusionTree *tree;
1651         StrandSurface *mesh;
1652         ListBase threads;
1653         float ao[3], env[3], indirect[3], (*faceao)[3], (*faceenv)[3], (*faceindirect)[3];
1654         int a, totface, totthread, *face, *count;
1655
1656         /* ugly, needed for occ_face */
1657         R= *re;
1658
1659         re->i.infostr= "Occlusion preprocessing";
1660         re->stats_draw(re->sdh, &re->i);
1661         
1662         re->occlusiontree= tree= occ_tree_build(re);
1663         
1664         if (tree) {
1665                 if (re->wrld.ao_approx_passes > 0)
1666                         occ_compute_passes(re, tree, re->wrld.ao_approx_passes);
1667                 if (tree->doindirect && (re->wrld.mode & WO_INDIRECT_LIGHT))
1668                         occ_compute_bounces(re, tree, re->wrld.ao_indirect_bounces);
1669
1670                 for (mesh=re->strandsurface.first; mesh; mesh=mesh->next) {
1671                         if (!mesh->face || !mesh->co || !mesh->ao)
1672                                 continue;
1673
1674                         count= MEM_callocN(sizeof(int)*mesh->totvert, "OcclusionCount");
1675                         faceao= MEM_callocN(sizeof(float)*3*mesh->totface, "StrandSurfFaceAO");
1676                         faceenv= MEM_callocN(sizeof(float)*3*mesh->totface, "StrandSurfFaceEnv");
1677                         faceindirect= MEM_callocN(sizeof(float)*3*mesh->totface, "StrandSurfFaceIndirect");
1678
1679                         totthread= (mesh->totface > 10000)? re->r.threads: 1;
1680                         totface= mesh->totface/totthread;
1681                         for (a=0; a<totthread; a++) {
1682                                 othreads[a].re= re;
1683                                 othreads[a].faceao= faceao;
1684                                 othreads[a].faceenv= faceenv;
1685                                 othreads[a].faceindirect= faceindirect;
1686                                 othreads[a].thread= a;
1687                                 othreads[a].mesh= mesh;
1688                                 othreads[a].begin= a*totface;
1689                                 othreads[a].end= (a == totthread-1)? mesh->totface: (a+1)*totface;
1690                         }
1691
1692                         if (totthread == 1) {
1693                                 exec_strandsurface_sample(&othreads[0]);
1694                         }
1695                         else {
1696                                 BLI_init_threads(&threads, exec_strandsurface_sample, totthread);
1697
1698                                 for (a=0; a<totthread; a++)
1699                                         BLI_insert_thread(&threads, &othreads[a]);
1700
1701                                 BLI_end_threads(&threads);
1702                         }
1703
1704                         for (a=0; a<mesh->totface; a++) {
1705                                 face= mesh->face[a];
1706
1707                                 copy_v3_v3(ao, faceao[a]);
1708                                 copy_v3_v3(env, faceenv[a]);
1709                                 copy_v3_v3(indirect, faceindirect[a]);
1710
1711                                 add_v3_v3(mesh->ao[face[0]], ao);
1712                                 add_v3_v3(mesh->env[face[0]], env);
1713                                 add_v3_v3(mesh->indirect[face[0]], indirect);
1714                                 count[face[0]]++;
1715                                 add_v3_v3(mesh->ao[face[1]], ao);
1716                                 add_v3_v3(mesh->env[face[1]], env);
1717                                 add_v3_v3(mesh->indirect[face[1]], indirect);
1718                                 count[face[1]]++;
1719                                 add_v3_v3(mesh->ao[face[2]], ao);
1720                                 add_v3_v3(mesh->env[face[2]], env);
1721                                 add_v3_v3(mesh->indirect[face[2]], indirect);
1722                                 count[face[2]]++;
1723
1724                                 if (face[3]) {
1725                                         add_v3_v3(mesh->ao[face[3]], ao);
1726                                         add_v3_v3(mesh->env[face[3]], env);
1727                                         add_v3_v3(mesh->indirect[face[3]], indirect);
1728                                         count[face[3]]++;
1729                                 }
1730                         }
1731
1732                         for (a=0; a<mesh->totvert; a++) {
1733                                 if (count[a]) {
1734                                         mul_v3_fl(mesh->ao[a], 1.0f/count[a]);
1735                                         mul_v3_fl(mesh->env[a], 1.0f/count[a]);
1736                                         mul_v3_fl(mesh->indirect[a], 1.0f/count[a]);
1737                                 }
1738                         }
1739
1740                         MEM_freeN(count);
1741                         MEM_freeN(faceao);
1742                         MEM_freeN(faceenv);
1743                         MEM_freeN(faceindirect);
1744                 }
1745         }
1746 }
1747
1748 void free_occ(Render *re)
1749 {
1750         if (re->occlusiontree) {
1751                 occ_free_tree(re->occlusiontree);
1752                 re->occlusiontree = NULL;
1753         }
1754 }
1755
1756 void sample_occ(Render *re, ShadeInput *shi)
1757 {
1758         OcclusionTree *tree= re->occlusiontree;
1759         OcclusionCache *cache;
1760         OcclusionCacheSample *sample;
1761         OccFace exclude;
1762         int onlyshadow;
1763
1764         if (tree) {
1765                 if (shi->strand) {
1766                         sample_occ_surface(shi);
1767                 }
1768                 /* try to get result from the cache if possible */
1769                 else if (shi->depth!=0 || !sample_occ_cache(tree, shi->co, shi->vno, shi->xs, shi->ys, shi->thread, shi->ao, shi->env, shi->indirect)) {
1770                         /* no luck, let's sample the occlusion */
1771                         exclude.obi= shi->obi - re->objectinstance;
1772                         exclude.facenr= shi->vlr->index;
1773                         onlyshadow= (shi->mat->mode & MA_ONLYSHADOW);
1774                         sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect);
1775
1776                         /* fill result into sample, each time */
1777                         if (tree->cache) {
1778                                 cache= &tree->cache[shi->thread];
1779
1780                                 if (cache->sample && cache->step) {
1781                                         sample= &cache->sample[(shi->ys-cache->y)*cache->w + (shi->xs-cache->x)];
1782                                         copy_v3_v3(sample->co, shi->co);
1783                                         copy_v3_v3(sample->n, shi->vno);
1784                                         copy_v3_v3(sample->ao, shi->ao);
1785                                         copy_v3_v3(sample->env, shi->env);
1786                                         copy_v3_v3(sample->indirect, shi->indirect);
1787                                         sample->intensity= MAX3(sample->ao[0], sample->ao[1], sample->ao[2]);
1788                                         sample->intensity= MAX2(sample->intensity, MAX3(sample->env[0], sample->env[1], sample->env[2]));
1789                                         sample->intensity= MAX2(sample->intensity, MAX3(sample->indirect[0], sample->indirect[1], sample->indirect[2]));
1790                                         sample->dist2= dot_v3v3(shi->dxco, shi->dxco) + dot_v3v3(shi->dyco, shi->dyco);
1791                                         sample->filled= 1;
1792                                 }
1793                         }
1794                 }
1795         }
1796         else {
1797                 shi->ao[0]= 1.0f;
1798                 shi->ao[1]= 1.0f;
1799                 shi->ao[2]= 1.0f;
1800
1801                 shi->env[0]= 0.0f;
1802                 shi->env[1]= 0.0f;
1803                 shi->env[2]= 0.0f;
1804
1805                 shi->indirect[0]= 0.0f;
1806                 shi->indirect[1]= 0.0f;
1807                 shi->indirect[2]= 0.0f;
1808         }
1809 }
1810
1811 void cache_occ_samples(Render *re, RenderPart *pa, ShadeSample *ssamp)
1812 {
1813         OcclusionTree *tree= re->occlusiontree;
1814         PixStr ps;
1815         OcclusionCache *cache;
1816         OcclusionCacheSample *sample;
1817         OccFace exclude;
1818         ShadeInput *shi;
1819         intptr_t *rd=NULL;
1820         int *ro=NULL, *rp=NULL, *rz=NULL, onlyshadow;
1821         int x, y, step = CACHE_STEP;
1822
1823         if (!tree->cache)
1824                 return;
1825
1826         cache= &tree->cache[pa->thread];
1827         cache->w= pa->rectx;
1828         cache->h= pa->recty;
1829         cache->x= pa->disprect.xmin;
1830         cache->y= pa->disprect.ymin;
1831         cache->step= step;
1832         cache->sample= MEM_callocN(sizeof(OcclusionCacheSample)*cache->w*cache->h, "OcclusionCacheSample");
1833         sample= cache->sample;
1834
1835         if (re->osa) {
1836                 rd= pa->rectdaps;
1837         }
1838         else {
1839                 /* fake pixel struct for non-osa */
1840                 ps.next= NULL;
1841                 ps.mask= 0xFFFF;
1842
1843                 ro= pa->recto;
1844                 rp= pa->rectp;
1845                 rz= pa->rectz;
1846         }
1847
1848         /* compute a sample at every step pixels */
1849         for (y=pa->disprect.ymin; y<pa->disprect.ymax; y++) {
1850                 for (x=pa->disprect.xmin; x<pa->disprect.xmax; x++, sample++, rd++, ro++, rp++, rz++) {
1851                         if (!(((x - pa->disprect.xmin + step) % step) == 0 || x == pa->disprect.xmax-1))
1852                                 continue;
1853                         if (!(((y - pa->disprect.ymin + step) % step) == 0 || y == pa->disprect.ymax-1))
1854                                 continue;
1855
1856                         if (re->osa) {
1857                                 if (!*rd) continue;
1858
1859                                 shade_samples_fill_with_ps(ssamp, (PixStr *)(*rd), x, y);
1860                         }
1861                         else {
1862                                 if (!*rp) continue;
1863
1864                                 ps.obi= *ro;
1865                                 ps.facenr= *rp;
1866                                 ps.z= *rz;
1867                                 shade_samples_fill_with_ps(ssamp, &ps, x, y);
1868                         }
1869
1870                         shi= ssamp->shi;
1871                         if (shi->vlr) {
1872                                 onlyshadow= (shi->mat->mode & MA_ONLYSHADOW);
1873                                 exclude.obi= shi->obi - re->objectinstance;
1874                                 exclude.facenr= shi->vlr->index;
1875                                 sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect);
1876
1877                                 copy_v3_v3(sample->co, shi->co);
1878                                 copy_v3_v3(sample->n, shi->vno);
1879                                 copy_v3_v3(sample->ao, shi->ao);
1880                                 copy_v3_v3(sample->env, shi->env);
1881                                 copy_v3_v3(sample->indirect, shi->indirect);
1882                                 sample->intensity= MAX3(sample->ao[0], sample->ao[1], sample->ao[2]);
1883                                 sample->intensity= MAX2(sample->intensity, MAX3(sample->env[0], sample->env[1], sample->env[2]));
1884                                 sample->intensity= MAX2(sample->intensity, MAX3(sample->indirect[0], sample->indirect[1], sample->indirect[2]));
1885                                 sample->dist2= dot_v3v3(shi->dxco, shi->dxco) + dot_v3v3(shi->dyco, shi->dyco);
1886                                 sample->x= shi->xs;
1887                                 sample->y= shi->ys;
1888                                 sample->filled= 1;
1889                         }
1890
1891                         if (re->test_break(re->tbh))
1892                                 break;
1893                 }
1894         }
1895 }
1896
1897 void free_occ_samples(Render *re, RenderPart *pa)
1898 {
1899         OcclusionTree *tree= re->occlusiontree;
1900         OcclusionCache *cache;
1901
1902         if (tree->cache) {
1903                 cache= &tree->cache[pa->thread];
1904
1905                 if (cache->sample)
1906                         MEM_freeN(cache->sample);
1907
1908                 cache->w= 0;
1909                 cache->h= 0;
1910                 cache->step= 0;
1911         }
1912 }
1913