replace MIN/MAX 3,4 with inline functions
[blender-staging.git] / source / blender / render / intern / source / occlusion.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2008 Blender Foundation.
19  * All rights reserved.
20  *
21  * The Original Code is: all of this file.
22  *
23  * Contributor(s): Brecht Van Lommel.
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 /** \file blender/render/intern/source/occlusion.c
29  *  \ingroup render
30  */
31
32 #include <math.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36
37 #include "MEM_guardedalloc.h"
38
39 #include "DNA_material_types.h"
40
41 #include "BLI_math.h"
42 #include "BLI_blenlib.h"
43 #include "BLI_memarena.h"
44 #include "BLI_threads.h"
45 #include "BLI_utildefines.h"
46
47 #include "BKE_global.h"
48 #include "BKE_scene.h"
49
50
51 #include "RE_shader_ext.h"
52
53 /* local includes */
54 #include "occlusion.h"
55 #include "render_types.h"
56 #include "rendercore.h"
57 #include "renderdatabase.h"
58 #include "pixelshading.h"
59 #include "shading.h"
60 #include "zbuf.h"
61
62 /* ------------------------- Declarations --------------------------- */
63
64 #define INVPI ((float)M_1_PI)
65 #define TOTCHILD 8
66 #define CACHE_STEP 3
67
68 typedef struct OcclusionCacheSample {
69         float co[3], n[3], ao[3], env[3], indirect[3], intensity, dist2;
70         int x, y, filled;
71 } OcclusionCacheSample;
72
73 typedef struct OcclusionCache {
74         OcclusionCacheSample *sample;
75         int x, y, w, h, step;
76 } OcclusionCache;
77
78 typedef struct OccFace {
79         int obi;
80         int facenr;
81 } OccFace;
82
83 typedef struct OccNode {
84         float co[3], area;
85         float sh[9], dco;
86         float occlusion, rad[3];
87         int childflag;
88         union {
89                 //OccFace face;
90                 int face;
91                 struct OccNode *node;
92         } child[TOTCHILD];
93 } OccNode;
94
95 typedef struct OcclusionTree {
96         MemArena *arena;
97
98         float (*co)[3];     /* temporary during build */
99
100         OccFace *face;      /* instance and face indices */
101         float *occlusion;   /* occlusion for faces */
102         float (*rad)[3];    /* radiance for faces */
103         
104         OccNode *root;
105
106         OccNode **stack[BLENDER_MAX_THREADS];
107         int maxdepth;
108
109         int totface;
110
111         float error;
112         float distfac;
113
114         int dothreadedbuild;
115         int totbuildthread;
116         int doindirect;
117
118         OcclusionCache *cache;
119 } OcclusionTree;
120
121 typedef struct OcclusionThread {
122         Render *re;
123         StrandSurface *mesh;
124         float (*faceao)[3];
125         float (*faceenv)[3];
126         float (*faceindirect)[3];
127         int begin, end;
128         int thread;
129 } OcclusionThread;
130
131 typedef struct OcclusionBuildThread {
132         OcclusionTree *tree;
133         int begin, end, depth;
134         OccNode *node;
135 } OcclusionBuildThread;
136
137 /* ------------------------- Shading --------------------------- */
138
139 extern Render R; /* meh */
140
141 static void occ_shade(ShadeSample *ssamp, ObjectInstanceRen *obi, VlakRen *vlr, float *rad)
142 {
143         ShadeInput *shi = ssamp->shi;
144         ShadeResult *shr = ssamp->shr;
145         float l, u, v, *v1, *v2, *v3;
146         
147         /* init */
148         if (vlr->v4) {
149                 shi->u = u = 0.5f;
150                 shi->v = v = 0.5f;
151         }
152         else {
153                 shi->u = u = 1.0f / 3.0f;
154                 shi->v = v = 1.0f / 3.0f;
155         }
156
157         /* setup render coordinates */
158         v1 = vlr->v1->co;
159         v2 = vlr->v2->co;
160         v3 = vlr->v3->co;
161         
162         /* renderco */
163         l = 1.0f - u - v;
164         
165         shi->co[0] = l * v3[0] + u * v1[0] + v * v2[0];
166         shi->co[1] = l * v3[1] + u * v1[1] + v * v2[1];
167         shi->co[2] = l * v3[2] + u * v1[2] + v * v2[2];
168
169         shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
170
171         /* set up view vector */
172         copy_v3_v3(shi->view, shi->co);
173         normalize_v3(shi->view);
174         
175         /* cache for shadow */
176         shi->samplenr++;
177
178         shi->xs = 0; /* TODO */
179         shi->ys = 0;
180         
181         shade_input_set_normals(shi);
182
183         /* no normal flip */
184         if (shi->flippednor)
185                 shade_input_flip_normals(shi);
186
187         madd_v3_v3fl(shi->co, shi->facenor, -0.0001f); /* ugly.. */
188
189         /* not a pretty solution, but fixes common cases */
190         if (shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) {
191                 negate_v3(shi->vn);
192                 negate_v3(shi->vno);
193                 negate_v3(shi->nmapnorm);
194         }
195
196         /* init material vars */
197         /* note, keep this synced with render_types.h */
198         memcpy(&shi->r, &shi->mat->r, 23 * sizeof(float));
199         shi->har = shi->mat->har;
200         
201         /* render */
202         shade_input_set_shade_texco(shi);
203         shade_material_loop(shi, shr); /* todo: nodes */
204         
205         copy_v3_v3(rad, shr->combined);
206 }
207
208 static void occ_build_shade(Render *re, OcclusionTree *tree)
209 {
210         ShadeSample ssamp;
211         ObjectInstanceRen *obi;
212         VlakRen *vlr;
213         int a;
214
215         R = *re;
216
217         /* setup shade sample with correct passes */
218         memset(&ssamp, 0, sizeof(ShadeSample));
219         ssamp.shi[0].lay = re->lay;
220         ssamp.shi[0].passflag = SCE_PASS_DIFFUSE | SCE_PASS_RGBA;
221         ssamp.shi[0].combinedflag = ~(SCE_PASS_SPEC);
222         ssamp.tot = 1;
223
224         for (a = 0; a < tree->totface; a++) {
225                 obi = &R.objectinstance[tree->face[a].obi];
226                 vlr = RE_findOrAddVlak(obi->obr, tree->face[a].facenr);
227
228                 occ_shade(&ssamp, obi, vlr, tree->rad[a]);
229         }
230 }
231
232 /* ------------------------- Spherical Harmonics --------------------------- */
233
234 /* Use 2nd order SH => 9 coefficients, stored in this order:
235  * 0 = (0,0),
236  * 1 = (1,-1), 2 = (1,0), 3 = (1,1),
237  * 4 = (2,-2), 5 = (2,-1), 6 = (2,0), 7 = (2,1), 8 = (2,2) */
238
239 static void sh_copy(float *shresult, float *sh)
240 {
241         memcpy(shresult, sh, sizeof(float) * 9);
242 }
243
244 static void sh_mul(float *sh, float f)
245 {
246         int i;
247
248         for (i = 0; i < 9; i++)
249                 sh[i] *= f;
250 }
251
252 static void sh_add(float *shresult, float *sh1, float *sh2)
253 {
254         int i;
255
256         for (i = 0; i < 9; i++)
257                 shresult[i] = sh1[i] + sh2[i];
258 }
259
260 static void sh_from_disc(float *n, float area, float *shresult)
261 {
262         /* See formula (3) in:
263          * "An Efficient Representation for Irradiance Environment Maps" */
264         float sh[9], x, y, z;
265
266         x = n[0];
267         y = n[1];
268         z = n[2];
269
270         sh[0] = 0.282095f;
271
272         sh[1] = 0.488603f * y;
273         sh[2] = 0.488603f * z;
274         sh[3] = 0.488603f * x;
275
276         sh[4] = 1.092548f * x * y;
277         sh[5] = 1.092548f * y * z;
278         sh[6] = 0.315392f * (3.0f * z * z - 1.0f);
279         sh[7] = 1.092548f * x * z;
280         sh[8] = 0.546274f * (x * x - y * y);
281
282         sh_mul(sh, area);
283         sh_copy(shresult, sh);
284 }
285
286 static float sh_eval(float *sh, float *v)
287 {
288         /* See formula (13) in:
289          * "An Efficient Representation for Irradiance Environment Maps" */
290         static const float c1 = 0.429043f, c2 = 0.511664f, c3 = 0.743125f;
291         static const float c4 = 0.886227f, c5 = 0.247708f;
292         float x, y, z, sum;
293
294         x = v[0];
295         y = v[1];
296         z = v[2];
297
298         sum = c1 * sh[8] * (x * x - y * y);
299         sum += c3 * sh[6] * z * z;
300         sum += c4 * sh[0];
301         sum += -c5 * sh[6];
302         sum += 2.0f * c1 * (sh[4] * x * y + sh[7] * x * z + sh[5] * y * z);
303         sum += 2.0f * c2 * (sh[3] * x + sh[1] * y + sh[2] * z);
304
305         return sum;
306 }
307
308 /* ------------------------------ Building --------------------------------- */
309
310 static void occ_face(const OccFace *face, float co[3], float normal[3], float *area)
311 {
312         ObjectInstanceRen *obi;
313         VlakRen *vlr;
314         float v1[3], v2[3], v3[3], v4[3];
315
316         obi = &R.objectinstance[face->obi];
317         vlr = RE_findOrAddVlak(obi->obr, face->facenr);
318         
319         if (co) {
320                 if (vlr->v4)
321                         mid_v3_v3v3(co, vlr->v1->co, vlr->v3->co);
322                 else
323                         cent_tri_v3(co, vlr->v1->co, vlr->v2->co, vlr->v3->co);
324
325                 if (obi->flag & R_TRANSFORMED)
326                         mul_m4_v3(obi->mat, co);
327         }
328         
329         if (normal) {
330                 normal[0] = -vlr->n[0];
331                 normal[1] = -vlr->n[1];
332                 normal[2] = -vlr->n[2];
333
334                 if (obi->flag & R_TRANSFORMED)
335                         mul_m3_v3(obi->nmat, normal);
336         }
337
338         if (area) {
339                 copy_v3_v3(v1, vlr->v1->co);
340                 copy_v3_v3(v2, vlr->v2->co);
341                 copy_v3_v3(v3, vlr->v3->co);
342                 if (vlr->v4) copy_v3_v3(v4, vlr->v4->co);
343
344                 if (obi->flag & R_TRANSFORMED) {
345                         mul_m4_v3(obi->mat, v1);
346                         mul_m4_v3(obi->mat, v2);
347                         mul_m4_v3(obi->mat, v3);
348                         if (vlr->v4) mul_m4_v3(obi->mat, v4);
349                 }
350
351                 /* todo: correct area for instances */
352                 if (vlr->v4)
353                         *area = area_quad_v3(v1, v2, v3, v4);
354                 else
355                         *area = area_tri_v3(v1, v2, v3);
356         }
357 }
358
359 static void occ_sum_occlusion(OcclusionTree *tree, OccNode *node)
360 {
361         OccNode *child;
362         float occ, area, totarea, rad[3];
363         int a, b, indirect = tree->doindirect;
364
365         occ = 0.0f;
366         totarea = 0.0f;
367         if (indirect) zero_v3(rad);
368
369         for (b = 0; b < TOTCHILD; b++) {
370                 if (node->childflag & (1 << b)) {
371                         a = node->child[b].face;
372                         occ_face(&tree->face[a], 0, 0, &area);
373                         occ += area * tree->occlusion[a];
374                         if (indirect) madd_v3_v3fl(rad, tree->rad[a], area);
375                         totarea += area;
376                 }
377                 else if (node->child[b].node) {
378                         child = node->child[b].node;
379                         occ_sum_occlusion(tree, child);
380
381                         occ += child->area * child->occlusion;
382                         if (indirect) madd_v3_v3fl(rad, child->rad, child->area);
383                         totarea += child->area;
384                 }
385         }
386
387         if (totarea != 0.0f) {
388                 occ /= totarea;
389                 if (indirect) mul_v3_fl(rad, 1.0f / totarea);
390         }
391         
392         node->occlusion = occ;
393         if (indirect) copy_v3_v3(node->rad, rad);
394 }
395
396 static int occ_find_bbox_axis(OcclusionTree *tree, int begin, int end, float *min, float *max)
397 {
398         float len, maxlen = -1.0f;
399         int a, axis = 0;
400
401         INIT_MINMAX(min, max);
402
403         for (a = begin; a < end; a++) {
404                 minmax_v3v3_v3(min, max, tree->co[a]);
405         }
406
407         for (a = 0; a < 3; a++) {
408                 len = max[a] - min[a];
409
410                 if (len > maxlen) {
411                         maxlen = len;
412                         axis = a;
413                 }
414         }
415
416         return axis;
417 }
418
419 static void occ_node_from_face(OccFace *face, OccNode *node)
420 {
421         float n[3];
422
423         occ_face(face, node->co, n, &node->area);
424         node->dco = 0.0f;
425         sh_from_disc(n, node->area, node->sh);
426 }
427
428 static void occ_build_dco(OcclusionTree *tree, OccNode *node, const float co[3], float *dco)
429 {
430         int b;
431         for (b = 0; b < TOTCHILD; b++) {
432                 float dist, d[3], nco[3];
433
434                 if (node->childflag & (1 << b)) {
435                         occ_face(tree->face + node->child[b].face, nco, NULL, NULL);
436                 }
437                 else if (node->child[b].node) {
438                         OccNode *child = node->child[b].node;
439                         occ_build_dco(tree, child, co, dco);
440                         copy_v3_v3(nco, child->co);
441                 }
442                 else {
443                         continue;
444                 }
445
446                 sub_v3_v3v3(d, nco, co);
447                 dist = dot_v3v3(d, d);
448                 if (dist > *dco)
449                         *dco = dist;
450         }
451 }
452
453 static void occ_build_split(OcclusionTree *tree, int begin, int end, int *split)
454 {
455         float min[3], max[3], mid;
456         int axis, a, enda;
457
458         /* split in middle of boundbox. this seems faster than median split
459          * on complex scenes, possibly since it avoids two distant faces to
460          * be in the same node better? */
461         axis = occ_find_bbox_axis(tree, begin, end, min, max);
462         mid = 0.5f * (min[axis] + max[axis]);
463
464         a = begin;
465         enda = end;
466         while (a < enda) {
467                 if (tree->co[a][axis] > mid) {
468                         enda--;
469                         SWAP(OccFace, tree->face[a], tree->face[enda]);
470                         SWAP(float, tree->co[a][0], tree->co[enda][0]);
471                         SWAP(float, tree->co[a][1], tree->co[enda][1]);
472                         SWAP(float, tree->co[a][2], tree->co[enda][2]);
473                 }
474                 else
475                         a++;
476         }
477
478         *split = enda;
479 }
480
481 static void occ_build_8_split(OcclusionTree *tree, int begin, int end, int *offset, int *count)
482 {
483         /* split faces into eight groups */
484         int b, splitx, splity[2], splitz[4];
485
486         occ_build_split(tree, begin, end, &splitx);
487
488         /* force split if none found, to deal with degenerate geometry */
489         if (splitx == begin || splitx == end)
490                 splitx = (begin + end) / 2;
491
492         occ_build_split(tree, begin, splitx, &splity[0]);
493         occ_build_split(tree, splitx, end, &splity[1]);
494
495         occ_build_split(tree, begin, splity[0], &splitz[0]);
496         occ_build_split(tree, splity[0], splitx, &splitz[1]);
497         occ_build_split(tree, splitx, splity[1], &splitz[2]);
498         occ_build_split(tree, splity[1], end, &splitz[3]);
499
500         offset[0] = begin;
501         offset[1] = splitz[0];
502         offset[2] = splity[0];
503         offset[3] = splitz[1];
504         offset[4] = splitx;
505         offset[5] = splitz[2];
506         offset[6] = splity[1];
507         offset[7] = splitz[3];
508
509         for (b = 0; b < 7; b++)
510                 count[b] = offset[b + 1] - offset[b];
511         count[7] = end - offset[7];
512 }
513
514 static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth);
515
516 static void *exec_occ_build(void *data)
517 {
518         OcclusionBuildThread *othread = (OcclusionBuildThread *)data;
519
520         occ_build_recursive(othread->tree, othread->node, othread->begin, othread->end, othread->depth);
521
522         return 0;
523 }
524
525 static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth)
526 {
527         ListBase threads;
528         OcclusionBuildThread othreads[BLENDER_MAX_THREADS];
529         OccNode *child, tmpnode;
530         /* OccFace *face; */
531         int a, b, totthread = 0, offset[TOTCHILD], count[TOTCHILD];
532
533         /* add a new node */
534         node->occlusion = 1.0f;
535
536         /* leaf node with only children */
537         if (end - begin <= TOTCHILD) {
538                 for (a = begin, b = 0; a < end; a++, b++) {
539                         /* face= &tree->face[a]; */
540                         node->child[b].face = a;
541                         node->childflag |= (1 << b);
542                 }
543         }
544         else {
545                 /* order faces */
546                 occ_build_8_split(tree, begin, end, offset, count);
547
548                 if (depth == 1 && tree->dothreadedbuild)
549                         BLI_init_threads(&threads, exec_occ_build, tree->totbuildthread);
550
551                 for (b = 0; b < TOTCHILD; b++) {
552                         if (count[b] == 0) {
553                                 node->child[b].node = NULL;
554                         }
555                         else if (count[b] == 1) {
556                                 /* face= &tree->face[offset[b]]; */
557                                 node->child[b].face = offset[b];
558                                 node->childflag |= (1 << b);
559                         }
560                         else {
561                                 if (tree->dothreadedbuild)
562                                         BLI_lock_thread(LOCK_CUSTOM1);
563
564                                 child = BLI_memarena_alloc(tree->arena, sizeof(OccNode));
565                                 node->child[b].node = child;
566
567                                 /* keep track of maximum depth for stack */
568                                 if (depth + 1 > tree->maxdepth)
569                                         tree->maxdepth = depth + 1;
570
571                                 if (tree->dothreadedbuild)
572                                         BLI_unlock_thread(LOCK_CUSTOM1);
573
574                                 if (depth == 1 && tree->dothreadedbuild) {
575                                         othreads[totthread].tree = tree;
576                                         othreads[totthread].node = child;
577                                         othreads[totthread].begin = offset[b];
578                                         othreads[totthread].end = offset[b] + count[b];
579                                         othreads[totthread].depth = depth + 1;
580                                         BLI_insert_thread(&threads, &othreads[totthread]);
581                                         totthread++;
582                                 }
583                                 else
584                                         occ_build_recursive(tree, child, offset[b], offset[b] + count[b], depth + 1);
585                         }
586                 }
587
588                 if (depth == 1 && tree->dothreadedbuild)
589                         BLI_end_threads(&threads);
590         }
591
592         /* combine area, position and sh */
593         for (b = 0; b < TOTCHILD; b++) {
594                 if (node->childflag & (1 << b)) {
595                         child = &tmpnode;
596                         occ_node_from_face(tree->face + node->child[b].face, &tmpnode);
597                 }
598                 else {
599                         child = node->child[b].node;
600                 }
601
602                 if (child) {
603                         node->area += child->area;
604                         sh_add(node->sh, node->sh, child->sh);
605                         madd_v3_v3fl(node->co, child->co, child->area);
606                 }
607         }
608
609         if (node->area != 0.0f)
610                 mul_v3_fl(node->co, 1.0f / node->area);
611
612         /* compute maximum distance from center */
613         node->dco = 0.0f;
614         if (node->area > 0.0f)
615                 occ_build_dco(tree, node, node->co, &node->dco);
616 }
617
618 static void occ_build_sh_normalize(OccNode *node)
619 {
620         /* normalize spherical harmonics to not include area, so
621          * we can clamp the dot product and then mutliply by area */
622         int b;
623
624         if (node->area != 0.0f)
625                 sh_mul(node->sh, 1.0f / node->area);
626
627         for (b = 0; b < TOTCHILD; b++) {
628                 if (node->childflag & (1 << b)) {
629                         /* pass */
630                 }
631                 else if (node->child[b].node) {
632                         occ_build_sh_normalize(node->child[b].node);
633                 }
634         }
635 }
636
637 static OcclusionTree *occ_tree_build(Render *re)
638 {
639         OcclusionTree *tree;
640         ObjectInstanceRen *obi;
641         ObjectRen *obr;
642         Material *ma;
643         VlakRen *vlr = NULL;
644         int a, b, c, totface;
645
646         /* count */
647         totface = 0;
648         for (obi = re->instancetable.first; obi; obi = obi->next) {
649                 obr = obi->obr;
650                 for (a = 0; a < obr->totvlak; a++) {
651                         if ((a & 255) == 0) vlr = obr->vlaknodes[a >> 8].vlak;
652                         else vlr++;
653
654                         ma = vlr->mat;
655
656                         if ((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE))
657                                 totface++;
658                 }
659         }
660
661         if (totface == 0)
662                 return NULL;
663         
664         tree = MEM_callocN(sizeof(OcclusionTree), "OcclusionTree");
665         tree->totface = totface;
666
667         /* parameters */
668         tree->error = get_render_aosss_error(&re->r, re->wrld.ao_approx_error);
669         tree->distfac = (re->wrld.aomode & WO_AODIST) ? re->wrld.aodistfac : 0.0f;
670         tree->doindirect = (re->wrld.ao_indirect_energy > 0.0f && re->wrld.ao_indirect_bounces > 0);
671
672         /* allocation */
673         tree->arena = BLI_memarena_new(0x8000 * sizeof(OccNode), "occ tree arena");
674         BLI_memarena_use_calloc(tree->arena);
675
676         if (re->wrld.aomode & WO_AOCACHE)
677                 tree->cache = MEM_callocN(sizeof(OcclusionCache) * BLENDER_MAX_THREADS, "OcclusionCache");
678
679         tree->face = MEM_callocN(sizeof(OccFace) * totface, "OcclusionFace");
680         tree->co = MEM_callocN(sizeof(float) * 3 * totface, "OcclusionCo");
681         tree->occlusion = MEM_callocN(sizeof(float) * totface, "OcclusionOcclusion");
682
683         if (tree->doindirect)
684                 tree->rad = MEM_callocN(sizeof(float) * 3 * totface, "OcclusionRad");
685
686         /* make array of face pointers */
687         for (b = 0, c = 0, obi = re->instancetable.first; obi; obi = obi->next, c++) {
688                 obr = obi->obr;
689                 for (a = 0; a < obr->totvlak; a++) {
690                         if ((a & 255) == 0) vlr = obr->vlaknodes[a >> 8].vlak;
691                         else vlr++;
692
693                         ma = vlr->mat;
694
695                         if ((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE)) {
696                                 tree->face[b].obi = c;
697                                 tree->face[b].facenr = a;
698                                 tree->occlusion[b] = 1.0f;
699                                 occ_face(&tree->face[b], tree->co[b], NULL, NULL); 
700                                 b++;
701                         }
702                 }
703         }
704
705         /* threads */
706         tree->totbuildthread = (re->r.threads > 1 && totface > 10000) ? 8 : 1;
707         tree->dothreadedbuild = (tree->totbuildthread > 1);
708
709         /* recurse */
710         tree->root = BLI_memarena_alloc(tree->arena, sizeof(OccNode));
711         tree->maxdepth = 1;
712         occ_build_recursive(tree, tree->root, 0, totface, 1);
713
714         if (tree->doindirect) {
715                 occ_build_shade(re, tree);
716                 occ_sum_occlusion(tree, tree->root);
717         }
718         
719         MEM_freeN(tree->co);
720         tree->co = NULL;
721
722         occ_build_sh_normalize(tree->root);
723
724         for (a = 0; a < BLENDER_MAX_THREADS; a++)
725                 tree->stack[a] = MEM_callocN(sizeof(OccNode) * TOTCHILD * (tree->maxdepth + 1), "OccStack");
726
727         return tree;
728 }
729
730 static void occ_free_tree(OcclusionTree *tree)
731 {
732         int a;
733
734         if (tree) {
735                 if (tree->arena) BLI_memarena_free(tree->arena);
736                 for (a = 0; a < BLENDER_MAX_THREADS; a++)
737                         if (tree->stack[a])
738                                 MEM_freeN(tree->stack[a]);
739                 if (tree->occlusion) MEM_freeN(tree->occlusion);
740                 if (tree->cache) MEM_freeN(tree->cache);
741                 if (tree->face) MEM_freeN(tree->face);
742                 if (tree->rad) MEM_freeN(tree->rad);
743                 MEM_freeN(tree);
744         }
745 }
746
747 /* ------------------------- Traversal --------------------------- */
748
749 static float occ_solid_angle(OccNode *node, const float v[3], float d2, float invd2, const float receivenormal[3])
750 {
751         float dotreceive, dotemit;
752         float ev[3];
753
754         ev[0] = -v[0] * invd2;
755         ev[1] = -v[1] * invd2;
756         ev[2] = -v[2] * invd2;
757         dotemit = sh_eval(node->sh, ev);
758         dotreceive = dot_v3v3(receivenormal, v) * invd2;
759
760         CLAMP(dotemit, 0.0f, 1.0f);
761         CLAMP(dotreceive, 0.0f, 1.0f);
762         
763         return ((node->area * dotemit * dotreceive) / (d2 + node->area * INVPI)) * INVPI;
764 }
765
766 static void vec_add_dir(float r[3], const float v1[3], const float v2[3], const float fac)
767 {
768         r[0] = v1[0] + fac * (v2[0] - v1[0]);
769         r[1] = v1[1] + fac * (v2[1] - v1[1]);
770         r[2] = v1[2] + fac * (v2[2] - v1[2]);
771 }
772
773 /* TODO: exact duplicate of ff_visible_quad() in math_geom.c
774  *       why not de-duplicate? (also above helper func) */
775 static int occ_visible_quad(const float p[3], const float n[3],
776                             const float v0[3], const float v1[3], const float v2[3],
777                             float q0[3], float q1[3], float q2[3], float q3[3])
778 {
779         static const float epsilon = 1e-6f;
780         float c, sd[3];
781         
782         c = dot_v3v3(n, p);
783
784         /* signed distances from the vertices to the plane. */
785         sd[0] = dot_v3v3(n, v0) - c;
786         sd[1] = dot_v3v3(n, v1) - c;
787         sd[2] = dot_v3v3(n, v2) - c;
788
789         if (fabsf(sd[0]) < epsilon) sd[0] = 0.0f;
790         if (fabsf(sd[1]) < epsilon) sd[1] = 0.0f;
791         if (fabsf(sd[2]) < epsilon) sd[2] = 0.0f;
792
793         if (sd[0] > 0) {
794                 if (sd[1] > 0) {
795                         if (sd[2] > 0) {
796                                 /* +++ */
797                                 copy_v3_v3(q0, v0);
798                                 copy_v3_v3(q1, v1);
799                                 copy_v3_v3(q2, v2);
800                                 copy_v3_v3(q3, q2);
801                         }
802                         else if (sd[2] < 0) {
803                                 /* ++- */
804                                 copy_v3_v3(q0, v0);
805                                 copy_v3_v3(q1, v1);
806                                 vec_add_dir(q2, v1, v2, (sd[1] / (sd[1] - sd[2])));
807                                 vec_add_dir(q3, v0, v2, (sd[0] / (sd[0] - sd[2])));
808                         }
809                         else {
810                                 /* ++0 */
811                                 copy_v3_v3(q0, v0);
812                                 copy_v3_v3(q1, v1);
813                                 copy_v3_v3(q2, v2);
814                                 copy_v3_v3(q3, q2);
815                         }
816                 }
817                 else if (sd[1] < 0) {
818                         if (sd[2] > 0) {
819                                 /* +-+ */
820                                 copy_v3_v3(q0, v0);
821                                 vec_add_dir(q1, v0, v1, (sd[0] / (sd[0] - sd[1])));
822                                 vec_add_dir(q2, v1, v2, (sd[1] / (sd[1] - sd[2])));
823                                 copy_v3_v3(q3, v2);
824                         }
825                         else if (sd[2] < 0) {
826                                 /* +-- */
827                                 copy_v3_v3(q0, v0);
828                                 vec_add_dir(q1, v0, v1, (sd[0] / (sd[0] - sd[1])));
829                                 vec_add_dir(q2, v0, v2, (sd[0] / (sd[0] - sd[2])));
830                                 copy_v3_v3(q3, q2);
831                         }
832                         else {
833                                 /* +-0 */
834                                 copy_v3_v3(q0, v0);
835                                 vec_add_dir(q1, v0, v1, (sd[0] / (sd[0] - sd[1])));
836                                 copy_v3_v3(q2, v2);
837                                 copy_v3_v3(q3, q2);
838                         }
839                 }
840                 else {
841                         if (sd[2] > 0) {
842                                 /* +0+ */
843                                 copy_v3_v3(q0, v0);
844                                 copy_v3_v3(q1, v1);
845                                 copy_v3_v3(q2, v2);
846                                 copy_v3_v3(q3, q2);
847                         }
848                         else if (sd[2] < 0) {
849                                 /* +0- */
850                                 copy_v3_v3(q0, v0);
851                                 copy_v3_v3(q1, v1);
852                                 vec_add_dir(q2, v0, v2, (sd[0] / (sd[0] - sd[2])));
853                                 copy_v3_v3(q3, q2);
854                         }
855                         else {
856                                 /* +00 */
857                                 copy_v3_v3(q0, v0);
858                                 copy_v3_v3(q1, v1);
859                                 copy_v3_v3(q2, v2);
860                                 copy_v3_v3(q3, q2);
861                         }
862                 }
863         }
864         else if (sd[0] < 0) {
865                 if (sd[1] > 0) {
866                         if (sd[2] > 0) {
867                                 /* -++ */
868                                 vec_add_dir(q0, v0, v1, (sd[0] / (sd[0] - sd[1])));
869                                 copy_v3_v3(q1, v1);
870                                 copy_v3_v3(q2, v2);
871                                 vec_add_dir(q3, v0, v2, (sd[0] / (sd[0] - sd[2])));
872                         }
873                         else if (sd[2] < 0) {
874                                 /* -+- */
875                                 vec_add_dir(q0, v0, v1, (sd[0] / (sd[0] - sd[1])));
876                                 copy_v3_v3(q1, v1);
877                                 vec_add_dir(q2, v1, v2, (sd[1] / (sd[1] - sd[2])));
878                                 copy_v3_v3(q3, q2);
879                         }
880                         else {
881                                 /* -+0 */
882                                 vec_add_dir(q0, v0, v1, (sd[0] / (sd[0] - sd[1])));
883                                 copy_v3_v3(q1, v1);
884                                 copy_v3_v3(q2, v2);
885                                 copy_v3_v3(q3, q2);
886                         }
887                 }
888                 else if (sd[1] < 0) {
889                         if (sd[2] > 0) {
890                                 /* --+ */
891                                 vec_add_dir(q0, v0, v2, (sd[0] / (sd[0] - sd[2])));
892                                 vec_add_dir(q1, v1, v2, (sd[1] / (sd[1] - sd[2])));
893                                 copy_v3_v3(q2, v2);
894                                 copy_v3_v3(q3, q2);
895                         }
896                         else if (sd[2] < 0) {
897                                 /* --- */
898                                 return 0;
899                         }
900                         else {
901                                 /* --0 */
902                                 return 0;
903                         }
904                 }
905                 else {
906                         if (sd[2] > 0) {
907                                 /* -0+ */
908                                 vec_add_dir(q0, v0, v2, (sd[0] / (sd[0] - sd[2])));
909                                 copy_v3_v3(q1, v1);
910                                 copy_v3_v3(q2, v2);
911                                 copy_v3_v3(q3, q2);
912                         }
913                         else if (sd[2] < 0) {
914                                 /* -0- */
915                                 return 0;
916                         }
917                         else {
918                                 /* -00 */
919                                 return 0;
920                         }
921                 }
922         }
923         else {
924                 if (sd[1] > 0) {
925                         if (sd[2] > 0) {
926                                 /* 0++ */
927                                 copy_v3_v3(q0, v0);
928                                 copy_v3_v3(q1, v1);
929                                 copy_v3_v3(q2, v2);
930                                 copy_v3_v3(q3, q2);
931                         }
932                         else if (sd[2] < 0) {
933                                 /* 0+- */
934                                 copy_v3_v3(q0, v0);
935                                 copy_v3_v3(q1, v1);
936                                 vec_add_dir(q2, v1, v2, (sd[1] / (sd[1] - sd[2])));
937                                 copy_v3_v3(q3, q2);
938                         }
939                         else {
940                                 /* 0+0 */
941                                 copy_v3_v3(q0, v0);
942                                 copy_v3_v3(q1, v1);
943                                 copy_v3_v3(q2, v2);
944                                 copy_v3_v3(q3, q2);
945                         }
946                 }
947                 else if (sd[1] < 0) {
948                         if (sd[2] > 0) {
949                                 /* 0-+ */
950                                 copy_v3_v3(q0, v0);
951                                 vec_add_dir(q1, v1, v2, (sd[1] / (sd[1] - sd[2])));
952                                 copy_v3_v3(q2, v2);
953                                 copy_v3_v3(q3, q2);
954                         }
955                         else if (sd[2] < 0) {
956                                 /* 0-- */
957                                 return 0;
958                         }
959                         else {
960                                 /* 0-0 */
961                                 return 0;
962                         }
963                 }
964                 else {
965                         if (sd[2] > 0) {
966                                 /* 00+ */
967                                 copy_v3_v3(q0, v0);
968                                 copy_v3_v3(q1, v1);
969                                 copy_v3_v3(q2, v2);
970                                 copy_v3_v3(q3, q2);
971                         }
972                         else if (sd[2] < 0) {
973                                 /* 00- */
974                                 return 0;
975                         }
976                         else {
977                                 /* 000 */
978                                 return 0;
979                         }
980                 }
981         }
982
983         return 1;
984 }
985
986 /* altivec optimization, this works, but is unused */
987
988 #if 0
989 #include <Accelerate/Accelerate.h>
990
991 typedef union {
992         vFloat v;
993         float f[4];
994 } vFloatResult;
995
996 static vFloat vec_splat_float(float val)
997 {
998         return (vFloat) {val, val, val, val};
999 }
1000
1001 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
1002 {
1003         vFloat vcos, rlen, vrx, vry, vrz, vsrx, vsry, vsrz, gx, gy, gz, vangle;
1004         vUInt8 rotate = (vUInt8) {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3};
1005         vFloatResult vresult;
1006         float result;
1007
1008         /* compute r* */
1009         vrx = (vFloat) {q0[0], q1[0], q2[0], q3[0]} -vec_splat_float(p[0]);
1010         vry = (vFloat) {q0[1], q1[1], q2[1], q3[1]} -vec_splat_float(p[1]);
1011         vrz = (vFloat) {q0[2], q1[2], q2[2], q3[2]} -vec_splat_float(p[2]);
1012
1013         /* normalize r* */
1014         rlen = vec_rsqrte(vrx * vrx + vry * vry + vrz * vrz + vec_splat_float(1e-16f));
1015         vrx = vrx * rlen;
1016         vry = vry * rlen;
1017         vrz = vrz * rlen;
1018
1019         /* rotate r* for cross and dot */
1020         vsrx = vec_perm(vrx, vrx, rotate);
1021         vsry = vec_perm(vry, vry, rotate);
1022         vsrz = vec_perm(vrz, vrz, rotate);
1023
1024         /* cross product */
1025         gx = vsry * vrz - vsrz * vry;
1026         gy = vsrz * vrx - vsrx * vrz;
1027         gz = vsrx * vry - vsry * vrx;
1028
1029         /* normalize */
1030         rlen = vec_rsqrte(gx * gx + gy * gy + gz * gz + vec_splat_float(1e-16f));
1031         gx = gx * rlen;
1032         gy = gy * rlen;
1033         gz = gz * rlen;
1034
1035         /* angle */
1036         vcos = vrx * vsrx + vry * vsry + vrz * vsrz;
1037         vcos = vec_max(vec_min(vcos, vec_splat_float(1.0f)), vec_splat_float(-1.0f));
1038         vangle = vacosf(vcos);
1039
1040         /* dot */
1041         vresult.v = (vec_splat_float(n[0]) * gx +
1042                      vec_splat_float(n[1]) * gy +
1043                      vec_splat_float(n[2]) * gz) * vangle;
1044
1045         result = (vresult.f[0] + vresult.f[1] + vresult.f[2] + vresult.f[3]) * (0.5f / (float)M_PI);
1046         result = MAX2(result, 0.0f);
1047
1048         return result;
1049 }
1050
1051 #endif
1052
1053 /* SSE optimization, acos code doesn't work */
1054
1055 #if 0
1056
1057 #include <xmmintrin.h>
1058
1059 static __m128 sse_approx_acos(__m128 x)
1060 {
1061         /* needs a better approximation than taylor expansion of acos, since that
1062          * gives big erros for near 1.0 values, sqrt(2*x)*acos(1-x) should work
1063          * better, see http://www.tom.womack.net/projects/sse-fast-arctrig.html */
1064
1065         return _mm_set_ps1(1.0f);
1066 }
1067
1068 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
1069 {
1070         float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3];
1071         float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result;
1072         float fresult[4] __attribute__((aligned(16)));
1073         __m128 qx, qy, qz, rx, ry, rz, rlen, srx, sry, srz, gx, gy, gz, glen, rcos, angle, aresult;
1074
1075         /* compute r */
1076         qx = _mm_set_ps(q3[0], q2[0], q1[0], q0[0]);
1077         qy = _mm_set_ps(q3[1], q2[1], q1[1], q0[1]);
1078         qz = _mm_set_ps(q3[2], q2[2], q1[2], q0[2]);
1079
1080         rx = qx - _mm_set_ps1(p[0]);
1081         ry = qy - _mm_set_ps1(p[1]);
1082         rz = qz - _mm_set_ps1(p[2]);
1083
1084         /* normalize r */
1085         rlen = _mm_rsqrt_ps(rx * rx + ry * ry + rz * rz + _mm_set_ps1(1e-16f));
1086         rx = rx * rlen;
1087         ry = ry * rlen;
1088         rz = rz * rlen;
1089
1090         /* cross product */
1091         srx = _mm_shuffle_ps(rx, rx, _MM_SHUFFLE(0, 3, 2, 1));
1092         sry = _mm_shuffle_ps(ry, ry, _MM_SHUFFLE(0, 3, 2, 1));
1093         srz = _mm_shuffle_ps(rz, rz, _MM_SHUFFLE(0, 3, 2, 1));
1094
1095         gx = sry * rz - srz * ry;
1096         gy = srz * rx - srx * rz;
1097         gz = srx * ry - sry * rx;
1098
1099         /* normalize g */
1100         glen = _mm_rsqrt_ps(gx * gx + gy * gy + gz * gz + _mm_set_ps1(1e-16f));
1101         gx = gx * glen;
1102         gy = gy * glen;
1103         gz = gz * glen;
1104
1105         /* compute angle */
1106         rcos = rx * srx + ry * sry + rz * srz;
1107         rcos = _mm_max_ps(_mm_min_ps(rcos, _mm_set_ps1(1.0f)), _mm_set_ps1(-1.0f));
1108
1109         angle = sse_approx_cos(rcos);
1110         aresult = (_mm_set_ps1(n[0]) * gx + _mm_set_ps1(n[1]) * gy + _mm_set_ps1(n[2]) * gz) * angle;
1111
1112         /* sum together */
1113         result = (fresult[0] + fresult[1] + fresult[2] + fresult[3]) * (0.5f / (float)M_PI);
1114         result = MAX2(result, 0.0f);
1115
1116         return result;
1117 }
1118
1119 #endif
1120
1121 static void normalizef(float *n)
1122 {
1123         float d;
1124         
1125         d = dot_v3v3(n, n);
1126
1127         if (d > 1.0e-35F) {
1128                 d = 1.0f / sqrtf(d);
1129
1130                 n[0] *= d; 
1131                 n[1] *= d; 
1132                 n[2] *= d;
1133         }
1134 }
1135
1136 /* TODO: exact duplicate of ff_quad_form_factor() in math_geom.c
1137  *       why not de-duplicate? (also above helper func) */
1138 static float occ_quad_form_factor(const float p[3], const float n[3], const float q0[3], const float q1[3], const float q2[3], const float q3[3])
1139 {
1140         float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3];
1141         float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result;
1142
1143         sub_v3_v3v3(r0, q0, p);
1144         sub_v3_v3v3(r1, q1, p);
1145         sub_v3_v3v3(r2, q2, p);
1146         sub_v3_v3v3(r3, q3, p);
1147
1148         normalizef(r0);
1149         normalizef(r1);
1150         normalizef(r2);
1151         normalizef(r3);
1152
1153         cross_v3_v3v3(g0, r1, r0); normalizef(g0);
1154         cross_v3_v3v3(g1, r2, r1); normalizef(g1);
1155         cross_v3_v3v3(g2, r3, r2); normalizef(g2);
1156         cross_v3_v3v3(g3, r0, r3); normalizef(g3);
1157
1158         a1 = saacosf(dot_v3v3(r0, r1));
1159         a2 = saacosf(dot_v3v3(r1, r2));
1160         a3 = saacosf(dot_v3v3(r2, r3));
1161         a4 = saacosf(dot_v3v3(r3, r0));
1162
1163         dot1 = dot_v3v3(n, g0);
1164         dot2 = dot_v3v3(n, g1);
1165         dot3 = dot_v3v3(n, g2);
1166         dot4 = dot_v3v3(n, g3);
1167
1168         result = (a1 * dot1 + a2 * dot2 + a3 * dot3 + a4 * dot4) * 0.5f / (float)M_PI;
1169         result = MAX2(result, 0.0f);
1170
1171         return result;
1172 }
1173
1174 static float occ_form_factor(OccFace *face, float *p, float *n)
1175 {
1176         ObjectInstanceRen *obi;
1177         VlakRen *vlr;
1178         float v1[3], v2[3], v3[3], v4[3], q0[3], q1[3], q2[3], q3[3], contrib = 0.0f;
1179
1180         obi = &R.objectinstance[face->obi];
1181         vlr = RE_findOrAddVlak(obi->obr, face->facenr);
1182
1183         copy_v3_v3(v1, vlr->v1->co);
1184         copy_v3_v3(v2, vlr->v2->co);
1185         copy_v3_v3(v3, vlr->v3->co);
1186
1187         if (obi->flag & R_TRANSFORMED) {
1188                 mul_m4_v3(obi->mat, v1);
1189                 mul_m4_v3(obi->mat, v2);
1190                 mul_m4_v3(obi->mat, v3);
1191         }
1192
1193         if (occ_visible_quad(p, n, v1, v2, v3, q0, q1, q2, q3))
1194                 contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3);
1195
1196         if (vlr->v4) {
1197                 copy_v3_v3(v4, vlr->v4->co);
1198                 if (obi->flag & R_TRANSFORMED)
1199                         mul_m4_v3(obi->mat, v4);
1200
1201                 if (occ_visible_quad(p, n, v1, v3, v4, q0, q1, q2, q3))
1202                         contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3);
1203         }
1204
1205         return contrib;
1206 }
1207
1208 static void occ_lookup(OcclusionTree *tree, int thread, OccFace *exclude,
1209                        const float pp[3], const float pn[3], float *occ, float rad[3], float bentn[3])
1210 {
1211         OccNode *node, **stack;
1212         OccFace *face;
1213         float resultocc, resultrad[3], v[3], p[3], n[3], co[3], invd2;
1214         float distfac, fac, error, d2, weight, emitarea;
1215         int b, f, totstack;
1216
1217         /* init variables */
1218         copy_v3_v3(p, pp);
1219         copy_v3_v3(n, pn);
1220         madd_v3_v3fl(p, n, 1e-4f);
1221
1222         if (bentn)
1223                 copy_v3_v3(bentn, n);
1224         
1225         error = tree->error;
1226         distfac = tree->distfac;
1227
1228         resultocc = 0.0f;
1229         zero_v3(resultrad);
1230
1231         /* init stack */
1232         stack = tree->stack[thread];
1233         stack[0] = tree->root;
1234         totstack = 1;
1235
1236         while (totstack) {
1237                 /* pop point off the stack */
1238                 node = stack[--totstack];
1239
1240                 sub_v3_v3v3(v, node->co, p);
1241                 d2 = dot_v3v3(v, v) + 1e-16f;
1242                 emitarea = MAX2(node->area, node->dco);
1243
1244                 if (d2 * error > emitarea) {
1245                         if (distfac != 0.0f) {
1246                                 fac = 1.0f / (1.0f + distfac * d2);
1247                                 if (fac < 0.01f)
1248                                         continue;
1249                         }
1250                         else
1251                                 fac = 1.0f;
1252
1253                         /* accumulate occlusion from spherical harmonics */
1254                         invd2 = 1.0f / sqrtf(d2);
1255                         weight = occ_solid_angle(node, v, d2, invd2, n);
1256
1257                         if (rad)
1258                                 madd_v3_v3fl(resultrad, node->rad, weight * fac);
1259
1260                         weight *= node->occlusion;
1261
1262                         if (bentn) {
1263                                 bentn[0] -= weight * invd2 * v[0];
1264                                 bentn[1] -= weight * invd2 * v[1];
1265                                 bentn[2] -= weight * invd2 * v[2];
1266                         }
1267
1268                         resultocc += weight * fac;
1269                 }
1270                 else {
1271                         /* traverse into children */
1272                         for (b = 0; b < TOTCHILD; b++) {
1273                                 if (node->childflag & (1 << b)) {
1274                                         f = node->child[b].face;
1275                                         face = &tree->face[f];
1276
1277                                         /* accumulate occlusion with face form factor */
1278                                         if (!exclude || !(face->obi == exclude->obi && face->facenr == exclude->facenr)) {
1279                                                 if (bentn || distfac != 0.0f) {
1280                                                         occ_face(face, co, NULL, NULL); 
1281                                                         sub_v3_v3v3(v, co, p);
1282                                                         d2 = dot_v3v3(v, v) + 1e-16f;
1283
1284                                                         fac = (distfac == 0.0f) ? 1.0f : 1.0f / (1.0f + distfac * d2);
1285                                                         if (fac < 0.01f)
1286                                                                 continue;
1287                                                 }
1288                                                 else
1289                                                         fac = 1.0f;
1290
1291                                                 weight = occ_form_factor(face, p, n);
1292
1293                                                 if (rad)
1294                                                         madd_v3_v3fl(resultrad, tree->rad[f], weight * fac);
1295
1296                                                 weight *= tree->occlusion[f];
1297
1298                                                 if (bentn) {
1299                                                         invd2 = 1.0f / sqrtf(d2);
1300                                                         bentn[0] -= weight * invd2 * v[0];
1301                                                         bentn[1] -= weight * invd2 * v[1];
1302                                                         bentn[2] -= weight * invd2 * v[2];
1303                                                 }
1304
1305                                                 resultocc += weight * fac;
1306                                         }
1307                                 }
1308                                 else if (node->child[b].node) {
1309                                         /* push child on the stack */
1310                                         stack[totstack++] = node->child[b].node;
1311                                 }
1312                         }
1313                 }
1314         }
1315
1316         if (occ) *occ = resultocc;
1317         if (rad) copy_v3_v3(rad, resultrad);
1318 #if 0
1319         if (rad && exclude) {
1320                 int a;
1321                 for (a = 0; a < tree->totface; a++)
1322                         if ((tree->face[a].obi == exclude->obi && tree->face[a].facenr == exclude->facenr))
1323                                 copy_v3_v3(rad, tree->rad[a]);
1324         }
1325 #endif
1326         if (bentn) normalize_v3(bentn);
1327 }
1328
1329 static void occ_compute_bounces(Render *re, OcclusionTree *tree, int totbounce)
1330 {
1331         float (*rad)[3], (*sum)[3], (*tmp)[3], co[3], n[3], occ;
1332         int bounce, i;
1333
1334         rad = MEM_callocN(sizeof(float) * 3 * tree->totface, "OcclusionBounceRad");
1335         sum = MEM_dupallocN(tree->rad);
1336
1337         for (bounce = 1; bounce < totbounce; bounce++) {
1338                 for (i = 0; i < tree->totface; i++) {
1339                         occ_face(&tree->face[i], co, n, NULL);
1340                         madd_v3_v3fl(co, n, 1e-8f);
1341
1342                         occ_lookup(tree, 0, &tree->face[i], co, n, &occ, rad[i], NULL);
1343                         rad[i][0] = MAX2(rad[i][0], 0.0f);
1344                         rad[i][1] = MAX2(rad[i][1], 0.0f);
1345                         rad[i][2] = MAX2(rad[i][2], 0.0f);
1346                         add_v3_v3(sum[i], rad[i]);
1347
1348                         if (re->test_break(re->tbh))
1349                                 break;
1350                 }
1351
1352                 if (re->test_break(re->tbh))
1353                         break;
1354
1355                 tmp = tree->rad;
1356                 tree->rad = rad;
1357                 rad = tmp;
1358
1359                 occ_sum_occlusion(tree, tree->root);
1360         }
1361
1362         MEM_freeN(rad);
1363         MEM_freeN(tree->rad);
1364         tree->rad = sum;
1365
1366         if (!re->test_break(re->tbh))
1367                 occ_sum_occlusion(tree, tree->root);
1368 }
1369
1370 static void occ_compute_passes(Render *re, OcclusionTree *tree, int totpass)
1371 {
1372         float *occ, co[3], n[3];
1373         int pass, i;
1374         
1375         occ = MEM_callocN(sizeof(float) * tree->totface, "OcclusionPassOcc");
1376
1377         for (pass = 0; pass < totpass; pass++) {
1378                 for (i = 0; i < tree->totface; i++) {
1379                         occ_face(&tree->face[i], co, n, NULL);
1380                         negate_v3(n);
1381                         madd_v3_v3fl(co, n, 1e-8f);
1382
1383                         occ_lookup(tree, 0, &tree->face[i], co, n, &occ[i], NULL, NULL);
1384                         if (re->test_break(re->tbh))
1385                                 break;
1386                 }
1387
1388                 if (re->test_break(re->tbh))
1389                         break;
1390
1391                 for (i = 0; i < tree->totface; i++) {
1392                         tree->occlusion[i] -= occ[i]; //MAX2(1.0f-occ[i], 0.0f);
1393                         if (tree->occlusion[i] < 0.0f)
1394                                 tree->occlusion[i] = 0.0f;
1395                 }
1396
1397                 occ_sum_occlusion(tree, tree->root);
1398         }
1399
1400         MEM_freeN(occ);
1401 }
1402
1403 static void sample_occ_tree(Render *re, OcclusionTree *tree, OccFace *exclude,
1404                             const float co[3], const float n[3], int thread, int onlyshadow,
1405                             float *ao, float *env, float *indirect)
1406 {
1407         float nn[3], bn[3], fac, occ, occlusion, correction, rad[3];
1408         int envcolor;
1409
1410         envcolor = re->wrld.aocolor;
1411         if (onlyshadow)
1412                 envcolor = WO_AOPLAIN;
1413
1414         negate_v3_v3(nn, n);
1415
1416         occ_lookup(tree, thread, exclude, co, nn, &occ, (tree->doindirect) ? rad : NULL, (env && envcolor) ? bn : NULL);
1417
1418         correction = re->wrld.ao_approx_correction;
1419
1420         occlusion = (1.0f - correction) * (1.0f - occ);
1421         CLAMP(occlusion, 0.0f, 1.0f);
1422         if (correction != 0.0f)
1423                 occlusion += correction * expf(-occ);
1424
1425         if (env) {
1426                 /* sky shading using bent normal */
1427                 if (ELEM(envcolor, WO_AOSKYCOL, WO_AOSKYTEX)) {
1428                         fac = 0.5f * (1.0f + dot_v3v3(bn, re->grvec));
1429                         env[0] = (1.0f - fac) * re->wrld.horr + fac * re->wrld.zenr;
1430                         env[1] = (1.0f - fac) * re->wrld.horg + fac * re->wrld.zeng;
1431                         env[2] = (1.0f - fac) * re->wrld.horb + fac * re->wrld.zenb;
1432
1433                         mul_v3_fl(env, occlusion);
1434                 }
1435                 else {
1436                         env[0] = occlusion;
1437                         env[1] = occlusion;
1438                         env[2] = occlusion;
1439                 }
1440 #if 0
1441                 else {  /* WO_AOSKYTEX */
1442                         float dxyview[3];
1443                         bn[0] = -bn[0];
1444                         bn[1] = -bn[1];
1445                         bn[2] = -bn[2];
1446                         dxyview[0] = 1.0f;
1447                         dxyview[1] = 1.0f;
1448                         dxyview[2] = 0.0f;
1449                         shadeSkyView(ao, co, bn, dxyview);
1450                 }
1451 #endif
1452         }
1453
1454         if (ao) {
1455                 ao[0] = occlusion;
1456                 ao[1] = occlusion;
1457                 ao[2] = occlusion;
1458         }
1459
1460         if (tree->doindirect) copy_v3_v3(indirect, rad);
1461         else zero_v3(indirect);
1462 }
1463
1464 /* ---------------------------- Caching ------------------------------- */
1465
1466 static OcclusionCacheSample *find_occ_sample(OcclusionCache *cache, int x, int y)
1467 {
1468         x -= cache->x;
1469         y -= cache->y;
1470
1471         x /= cache->step;
1472         y /= cache->step;
1473         x *= cache->step;
1474         y *= cache->step;
1475
1476         if (x < 0 || x >= cache->w || y < 0 || y >= cache->h)
1477                 return NULL;
1478         else
1479                 return &cache->sample[y * cache->w + x];
1480 }
1481
1482 static int sample_occ_cache(OcclusionTree *tree, float *co, float *n, int x, int y, int thread, float *ao, float *env, float *indirect)
1483 {
1484         OcclusionCache *cache;
1485         OcclusionCacheSample *samples[4], *sample;
1486         float wn[4], wz[4], wb[4], tx, ty, w, totw, mino, maxo;
1487         float d[3], dist2;
1488         int i, x1, y1, x2, y2;
1489
1490         if (!tree->cache)
1491                 return 0;
1492         
1493         /* first try to find a sample in the same pixel */
1494         cache = &tree->cache[thread];
1495
1496         if (cache->sample && cache->step) {
1497                 sample = &cache->sample[(y - cache->y) * cache->w + (x - cache->x)];
1498                 if (sample->filled) {
1499                         sub_v3_v3v3(d, sample->co, co);
1500                         dist2 = dot_v3v3(d, d);
1501                         if (dist2 < 0.5f * sample->dist2 && dot_v3v3(sample->n, n) > 0.98f) {
1502                                 copy_v3_v3(ao, sample->ao);
1503                                 copy_v3_v3(env, sample->env);
1504                                 copy_v3_v3(indirect, sample->indirect);
1505                                 return 1;
1506                         }
1507                 }
1508         }
1509         else
1510                 return 0;
1511
1512         /* try to interpolate between 4 neighboring pixels */
1513         samples[0] = find_occ_sample(cache, x, y);
1514         samples[1] = find_occ_sample(cache, x + cache->step, y);
1515         samples[2] = find_occ_sample(cache, x, y + cache->step);
1516         samples[3] = find_occ_sample(cache, x + cache->step, y + cache->step);
1517
1518         for (i = 0; i < 4; i++)
1519                 if (!samples[i] || !samples[i]->filled)
1520                         return 0;
1521
1522         /* require intensities not being too different */
1523         mino = min_ffff(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
1524         maxo = max_ffff(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
1525
1526         if (maxo - mino > 0.05f)
1527                 return 0;
1528
1529         /* compute weighted interpolation between samples */
1530         zero_v3(ao);
1531         zero_v3(env);
1532         zero_v3(indirect);
1533         totw = 0.0f;
1534
1535         x1 = samples[0]->x;
1536         y1 = samples[0]->y;
1537         x2 = samples[3]->x;
1538         y2 = samples[3]->y;
1539
1540         tx = (float)(x2 - x) / (float)(x2 - x1);
1541         ty = (float)(y2 - y) / (float)(y2 - y1);
1542
1543         wb[3] = (1.0f - tx) * (1.0f - ty);
1544         wb[2] = (tx) * (1.0f - ty);
1545         wb[1] = (1.0f - tx) * (ty);
1546         wb[0] = tx * ty;
1547
1548         for (i = 0; i < 4; i++) {
1549                 sub_v3_v3v3(d, samples[i]->co, co);
1550                 //dist2 = dot_v3v3(d, d);
1551
1552                 wz[i] = 1.0f; //(samples[i]->dist2/(1e-4f + dist2));
1553                 wn[i] = pow(dot_v3v3(samples[i]->n, n), 32.0f);
1554
1555                 w = wb[i] * wn[i] * wz[i];
1556
1557                 totw += w;
1558                 madd_v3_v3fl(ao, samples[i]->ao, w);
1559                 madd_v3_v3fl(env, samples[i]->env, w);
1560                 madd_v3_v3fl(indirect, samples[i]->indirect, w);
1561         }
1562
1563         if (totw >= 0.9f) {
1564                 totw = 1.0f / totw;
1565                 mul_v3_fl(ao, totw);
1566                 mul_v3_fl(env, totw);
1567                 mul_v3_fl(indirect, totw);
1568                 return 1;
1569         }
1570
1571         return 0;
1572 }
1573
1574 static void sample_occ_surface(ShadeInput *shi)
1575 {
1576         StrandRen *strand = shi->strand;
1577         StrandSurface *mesh = strand->buffer->surface;
1578         int *face, *index = RE_strandren_get_face(shi->obr, strand, 0);
1579         float w[4], *co1, *co2, *co3, *co4;
1580
1581         if (mesh && mesh->face && mesh->co && mesh->ao && index) {
1582                 face = mesh->face[*index];
1583
1584                 co1 = mesh->co[face[0]];
1585                 co2 = mesh->co[face[1]];
1586                 co3 = mesh->co[face[2]];
1587                 co4 = (face[3]) ? mesh->co[face[3]] : NULL;
1588
1589                 interp_weights_face_v3(w, co1, co2, co3, co4, strand->vert->co);
1590
1591                 zero_v3(shi->ao);
1592                 zero_v3(shi->env);
1593                 zero_v3(shi->indirect);
1594
1595                 madd_v3_v3fl(shi->ao, mesh->ao[face[0]], w[0]);
1596                 madd_v3_v3fl(shi->env, mesh->env[face[0]], w[0]);
1597                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[0]], w[0]);
1598                 madd_v3_v3fl(shi->ao, mesh->ao[face[1]], w[1]);
1599                 madd_v3_v3fl(shi->env, mesh->env[face[1]], w[1]);
1600                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[1]], w[1]);
1601                 madd_v3_v3fl(shi->ao, mesh->ao[face[2]], w[2]);
1602                 madd_v3_v3fl(shi->env, mesh->env[face[2]], w[2]);
1603                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[2]], w[2]);
1604                 if (face[3]) {
1605                         madd_v3_v3fl(shi->ao, mesh->ao[face[3]], w[3]);
1606                         madd_v3_v3fl(shi->env, mesh->env[face[3]], w[3]);
1607                         madd_v3_v3fl(shi->indirect, mesh->indirect[face[3]], w[3]);
1608                 }
1609         }
1610         else {
1611                 shi->ao[0] = 1.0f;
1612                 shi->ao[1] = 1.0f;
1613                 shi->ao[2] = 1.0f;
1614                 zero_v3(shi->env);
1615                 zero_v3(shi->indirect);
1616         }
1617 }
1618
1619 /* ------------------------- External Functions --------------------------- */
1620
1621 static void *exec_strandsurface_sample(void *data)
1622 {
1623         OcclusionThread *othread = (OcclusionThread *)data;
1624         Render *re = othread->re;
1625         StrandSurface *mesh = othread->mesh;
1626         float ao[3], env[3], indirect[3], co[3], n[3], *co1, *co2, *co3, *co4;
1627         int a, *face;
1628
1629         for (a = othread->begin; a < othread->end; a++) {
1630                 face = mesh->face[a];
1631                 co1 = mesh->co[face[0]];
1632                 co2 = mesh->co[face[1]];
1633                 co3 = mesh->co[face[2]];
1634
1635                 if (face[3]) {
1636                         co4 = mesh->co[face[3]];
1637
1638                         mid_v3_v3v3(co, co1, co3);
1639                         normal_quad_v3(n, co1, co2, co3, co4);
1640                 }
1641                 else {
1642                         cent_tri_v3(co, co1, co2, co3);
1643                         normal_tri_v3(n, co1, co2, co3);
1644                 }
1645                 negate_v3(n);
1646
1647                 sample_occ_tree(re, re->occlusiontree, NULL, co, n, othread->thread, 0, ao, env, indirect);
1648                 copy_v3_v3(othread->faceao[a], ao);
1649                 copy_v3_v3(othread->faceenv[a], env);
1650                 copy_v3_v3(othread->faceindirect[a], indirect);
1651         }
1652
1653         return 0;
1654 }
1655
1656 void make_occ_tree(Render *re)
1657 {
1658         OcclusionThread othreads[BLENDER_MAX_THREADS];
1659         OcclusionTree *tree;
1660         StrandSurface *mesh;
1661         ListBase threads;
1662         float ao[3], env[3], indirect[3], (*faceao)[3], (*faceenv)[3], (*faceindirect)[3];
1663         int a, totface, totthread, *face, *count;
1664
1665         /* ugly, needed for occ_face */
1666         R = *re;
1667
1668         re->i.infostr = "Occlusion preprocessing";
1669         re->stats_draw(re->sdh, &re->i);
1670         
1671         re->occlusiontree = tree = occ_tree_build(re);
1672         
1673         if (tree) {
1674                 if (re->wrld.ao_approx_passes > 0)
1675                         occ_compute_passes(re, tree, re->wrld.ao_approx_passes);
1676                 if (tree->doindirect && (re->wrld.mode & WO_INDIRECT_LIGHT))
1677                         occ_compute_bounces(re, tree, re->wrld.ao_indirect_bounces);
1678
1679                 for (mesh = re->strandsurface.first; mesh; mesh = mesh->next) {
1680                         if (!mesh->face || !mesh->co || !mesh->ao)
1681                                 continue;
1682
1683                         count = MEM_callocN(sizeof(int) * mesh->totvert, "OcclusionCount");
1684                         faceao = MEM_callocN(sizeof(float) * 3 * mesh->totface, "StrandSurfFaceAO");
1685                         faceenv = MEM_callocN(sizeof(float) * 3 * mesh->totface, "StrandSurfFaceEnv");
1686                         faceindirect = MEM_callocN(sizeof(float) * 3 * mesh->totface, "StrandSurfFaceIndirect");
1687
1688                         totthread = (mesh->totface > 10000) ? re->r.threads : 1;
1689                         totface = mesh->totface / totthread;
1690                         for (a = 0; a < totthread; a++) {
1691                                 othreads[a].re = re;
1692                                 othreads[a].faceao = faceao;
1693                                 othreads[a].faceenv = faceenv;
1694                                 othreads[a].faceindirect = faceindirect;
1695                                 othreads[a].thread = a;
1696                                 othreads[a].mesh = mesh;
1697                                 othreads[a].begin = a * totface;
1698                                 othreads[a].end = (a == totthread - 1) ? mesh->totface : (a + 1) * totface;
1699                         }
1700
1701                         if (totthread == 1) {
1702                                 exec_strandsurface_sample(&othreads[0]);
1703                         }
1704                         else {
1705                                 BLI_init_threads(&threads, exec_strandsurface_sample, totthread);
1706
1707                                 for (a = 0; a < totthread; a++)
1708                                         BLI_insert_thread(&threads, &othreads[a]);
1709
1710                                 BLI_end_threads(&threads);
1711                         }
1712
1713                         for (a = 0; a < mesh->totface; a++) {
1714                                 face = mesh->face[a];
1715
1716                                 copy_v3_v3(ao, faceao[a]);
1717                                 copy_v3_v3(env, faceenv[a]);
1718                                 copy_v3_v3(indirect, faceindirect[a]);
1719
1720                                 add_v3_v3(mesh->ao[face[0]], ao);
1721                                 add_v3_v3(mesh->env[face[0]], env);
1722                                 add_v3_v3(mesh->indirect[face[0]], indirect);
1723                                 count[face[0]]++;
1724                                 add_v3_v3(mesh->ao[face[1]], ao);
1725                                 add_v3_v3(mesh->env[face[1]], env);
1726                                 add_v3_v3(mesh->indirect[face[1]], indirect);
1727                                 count[face[1]]++;
1728                                 add_v3_v3(mesh->ao[face[2]], ao);
1729                                 add_v3_v3(mesh->env[face[2]], env);
1730                                 add_v3_v3(mesh->indirect[face[2]], indirect);
1731                                 count[face[2]]++;
1732
1733                                 if (face[3]) {
1734                                         add_v3_v3(mesh->ao[face[3]], ao);
1735                                         add_v3_v3(mesh->env[face[3]], env);
1736                                         add_v3_v3(mesh->indirect[face[3]], indirect);
1737                                         count[face[3]]++;
1738                                 }
1739                         }
1740
1741                         for (a = 0; a < mesh->totvert; a++) {
1742                                 if (count[a]) {
1743                                         mul_v3_fl(mesh->ao[a], 1.0f / count[a]);
1744                                         mul_v3_fl(mesh->env[a], 1.0f / count[a]);
1745                                         mul_v3_fl(mesh->indirect[a], 1.0f / count[a]);
1746                                 }
1747                         }
1748
1749                         MEM_freeN(count);
1750                         MEM_freeN(faceao);
1751                         MEM_freeN(faceenv);
1752                         MEM_freeN(faceindirect);
1753                 }
1754         }
1755 }
1756
1757 void free_occ(Render *re)
1758 {
1759         if (re->occlusiontree) {
1760                 occ_free_tree(re->occlusiontree);
1761                 re->occlusiontree = NULL;
1762         }
1763 }
1764
1765 void sample_occ(Render *re, ShadeInput *shi)
1766 {
1767         OcclusionTree *tree = re->occlusiontree;
1768         OcclusionCache *cache;
1769         OcclusionCacheSample *sample;
1770         OccFace exclude;
1771         int onlyshadow;
1772
1773         if (tree) {
1774                 if (shi->strand) {
1775                         sample_occ_surface(shi);
1776                 }
1777                 /* try to get result from the cache if possible */
1778                 else if (shi->depth != 0 || !sample_occ_cache(tree, shi->co, shi->vno, shi->xs, shi->ys, shi->thread, shi->ao, shi->env, shi->indirect)) {
1779                         /* no luck, let's sample the occlusion */
1780                         exclude.obi = shi->obi - re->objectinstance;
1781                         exclude.facenr = shi->vlr->index;
1782                         onlyshadow = (shi->mat->mode & MA_ONLYSHADOW);
1783                         sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect);
1784
1785                         /* fill result into sample, each time */
1786                         if (tree->cache) {
1787                                 cache = &tree->cache[shi->thread];
1788
1789                                 if (cache->sample && cache->step) {
1790                                         sample = &cache->sample[(shi->ys - cache->y) * cache->w + (shi->xs - cache->x)];
1791                                         copy_v3_v3(sample->co, shi->co);
1792                                         copy_v3_v3(sample->n, shi->vno);
1793                                         copy_v3_v3(sample->ao, shi->ao);
1794                                         copy_v3_v3(sample->env, shi->env);
1795                                         copy_v3_v3(sample->indirect, shi->indirect);
1796                                         sample->intensity = max_fff(sample->ao[0], sample->ao[1], sample->ao[2]);
1797                                         sample->intensity = max_ff(sample->intensity, max_fff(sample->env[0], sample->env[1], sample->env[2]));
1798                                         sample->intensity = max_ff(sample->intensity, max_fff(sample->indirect[0], sample->indirect[1], sample->indirect[2]));
1799                                         sample->dist2 = dot_v3v3(shi->dxco, shi->dxco) + dot_v3v3(shi->dyco, shi->dyco);
1800                                         sample->filled = 1;
1801                                 }
1802                         }
1803                 }
1804         }
1805         else {
1806                 shi->ao[0] = 1.0f;
1807                 shi->ao[1] = 1.0f;
1808                 shi->ao[2] = 1.0f;
1809
1810                 shi->env[0] = 0.0f;
1811                 shi->env[1] = 0.0f;
1812                 shi->env[2] = 0.0f;
1813
1814                 shi->indirect[0] = 0.0f;
1815                 shi->indirect[1] = 0.0f;
1816                 shi->indirect[2] = 0.0f;
1817         }
1818 }
1819
1820 void cache_occ_samples(Render *re, RenderPart *pa, ShadeSample *ssamp)
1821 {
1822         OcclusionTree *tree = re->occlusiontree;
1823         PixStr ps;
1824         OcclusionCache *cache;
1825         OcclusionCacheSample *sample;
1826         OccFace exclude;
1827         ShadeInput *shi;
1828         intptr_t *rd = NULL;
1829         int *ro = NULL, *rp = NULL, *rz = NULL, onlyshadow;
1830         int x, y, step = CACHE_STEP;
1831
1832         if (!tree->cache)
1833                 return;
1834
1835         cache = &tree->cache[pa->thread];
1836         cache->w = pa->rectx;
1837         cache->h = pa->recty;
1838         cache->x = pa->disprect.xmin;
1839         cache->y = pa->disprect.ymin;
1840         cache->step = step;
1841         cache->sample = MEM_callocN(sizeof(OcclusionCacheSample) * cache->w * cache->h, "OcclusionCacheSample");
1842         sample = cache->sample;
1843
1844         if (re->osa) {
1845                 rd = pa->rectdaps;
1846         }
1847         else {
1848                 /* fake pixel struct for non-osa */
1849                 ps.next = NULL;
1850                 ps.mask = 0xFFFF;
1851
1852                 ro = pa->recto;
1853                 rp = pa->rectp;
1854                 rz = pa->rectz;
1855         }
1856
1857         /* compute a sample at every step pixels */
1858         for (y = pa->disprect.ymin; y < pa->disprect.ymax; y++) {
1859                 for (x = pa->disprect.xmin; x < pa->disprect.xmax; x++, sample++, rd++, ro++, rp++, rz++) {
1860                         if (!(((x - pa->disprect.xmin + step) % step) == 0 || x == pa->disprect.xmax - 1))
1861                                 continue;
1862                         if (!(((y - pa->disprect.ymin + step) % step) == 0 || y == pa->disprect.ymax - 1))
1863                                 continue;
1864
1865                         if (re->osa) {
1866                                 if (!*rd) continue;
1867
1868                                 shade_samples_fill_with_ps(ssamp, (PixStr *)(*rd), x, y);
1869                         }
1870                         else {
1871                                 if (!*rp) continue;
1872
1873                                 ps.obi = *ro;
1874                                 ps.facenr = *rp;
1875                                 ps.z = *rz;
1876                                 shade_samples_fill_with_ps(ssamp, &ps, x, y);
1877                         }
1878
1879                         shi = ssamp->shi;
1880                         if (shi->vlr) {
1881                                 onlyshadow = (shi->mat->mode & MA_ONLYSHADOW);
1882                                 exclude.obi = shi->obi - re->objectinstance;
1883                                 exclude.facenr = shi->vlr->index;
1884                                 sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect);
1885
1886                                 copy_v3_v3(sample->co, shi->co);
1887                                 copy_v3_v3(sample->n, shi->vno);
1888                                 copy_v3_v3(sample->ao, shi->ao);
1889                                 copy_v3_v3(sample->env, shi->env);
1890                                 copy_v3_v3(sample->indirect, shi->indirect);
1891                                 sample->intensity = max_fff(sample->ao[0], sample->ao[1], sample->ao[2]);
1892                                 sample->intensity = max_ff(sample->intensity, max_fff(sample->env[0], sample->env[1], sample->env[2]));
1893                                 sample->intensity = max_ff(sample->intensity, max_fff(sample->indirect[0], sample->indirect[1], sample->indirect[2]));
1894                                 sample->dist2 = dot_v3v3(shi->dxco, shi->dxco) + dot_v3v3(shi->dyco, shi->dyco);
1895                                 sample->x = shi->xs;
1896                                 sample->y = shi->ys;
1897                                 sample->filled = 1;
1898                         }
1899
1900                         if (re->test_break(re->tbh))
1901                                 break;
1902                 }
1903         }
1904 }
1905
1906 void free_occ_samples(Render *re, RenderPart *pa)
1907 {
1908         OcclusionTree *tree = re->occlusiontree;
1909         OcclusionCache *cache;
1910
1911         if (tree->cache) {
1912                 cache = &tree->cache[pa->thread];
1913
1914                 if (cache->sample)
1915                         MEM_freeN(cache->sample);
1916
1917                 cache->w = 0;
1918                 cache->h = 0;
1919                 cache->step = 0;
1920         }
1921 }
1922