Fix build error on Windows 32 bit.
[blender-staging.git] / source / blender / render / intern / source / occlusion.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2008 Blender Foundation.
19  * All rights reserved.
20  *
21  * The Original Code is: all of this file.
22  *
23  * Contributor(s): Brecht Van Lommel.
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 /** \file blender/render/intern/source/occlusion.c
29  *  \ingroup render
30  */
31
32 #include <math.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36
37 #include "MEM_guardedalloc.h"
38
39 #include "DNA_material_types.h"
40
41 #include "BLI_math.h"
42 #include "BLI_memarena.h"
43 #include "BLI_threads.h"
44 #include "BLI_utildefines.h"
45
46 #include "BLT_translation.h"
47
48 #include "BKE_node.h"
49 #include "BKE_scene.h"
50
51
52 #include "RE_shader_ext.h"
53
54 /* local includes */
55 #include "occlusion.h"
56 #include "render_types.h"
57 #include "rendercore.h"
58 #include "renderdatabase.h"
59 #include "shading.h"
60
61 /* ------------------------- Declarations --------------------------- */
62
63 #define INVPI ((float)M_1_PI)
64 #define TOTCHILD 8
65 #define CACHE_STEP 3
66
67 typedef struct OcclusionCacheSample {
68         float co[3], n[3], ao[3], env[3], indirect[3], intensity, dist2;
69         int x, y, filled;
70 } OcclusionCacheSample;
71
72 typedef struct OcclusionCache {
73         OcclusionCacheSample *sample;
74         int x, y, w, h, step;
75 } OcclusionCache;
76
77 typedef struct OccFace {
78         int obi;
79         int facenr;
80 } OccFace;
81
82 typedef struct OccNode {
83         float co[3], area;
84         float sh[9], dco;
85         float occlusion, rad[3];
86         int childflag;
87         union {
88                 //OccFace face;
89                 int face;
90                 struct OccNode *node;
91         } child[TOTCHILD];
92 } OccNode;
93
94 typedef struct OcclusionTree {
95         MemArena *arena;
96
97         float (*co)[3];     /* temporary during build */
98
99         OccFace *face;      /* instance and face indices */
100         float *occlusion;   /* occlusion for faces */
101         float (*rad)[3];    /* radiance for faces */
102         
103         OccNode *root;
104
105         OccNode **stack[BLENDER_MAX_THREADS];
106         int maxdepth;
107
108         int totface;
109
110         float error;
111         float distfac;
112
113         int dothreadedbuild;
114         int totbuildthread;
115         int doindirect;
116
117         OcclusionCache *cache;
118
119         int num_threads;
120 } OcclusionTree;
121
122 typedef struct OcclusionThread {
123         Render *re;
124         StrandSurface *mesh;
125         float (*faceao)[3];
126         float (*faceenv)[3];
127         float (*faceindirect)[3];
128         int begin, end;
129         int thread;
130 } OcclusionThread;
131
132 typedef struct OcclusionBuildThread {
133         OcclusionTree *tree;
134         int begin, end, depth;
135         OccNode *node;
136 } OcclusionBuildThread;
137
138 /* ------------------------- Shading --------------------------- */
139
140 extern Render R; /* meh */
141
142 static void occ_shade(ShadeSample *ssamp, ObjectInstanceRen *obi, VlakRen *vlr, float *rad)
143 {
144         ShadeInput *shi = ssamp->shi;
145         ShadeResult *shr = ssamp->shr;
146         float l, u, v, *v1, *v2, *v3;
147         
148         /* init */
149         if (vlr->v4) {
150                 shi->u = u = 0.5f;
151                 shi->v = v = 0.5f;
152         }
153         else {
154                 shi->u = u = 1.0f / 3.0f;
155                 shi->v = v = 1.0f / 3.0f;
156         }
157
158         /* setup render coordinates */
159         v1 = vlr->v1->co;
160         v2 = vlr->v2->co;
161         v3 = vlr->v3->co;
162         
163         /* renderco */
164         l = 1.0f - u - v;
165         
166         shi->co[0] = l * v3[0] + u * v1[0] + v * v2[0];
167         shi->co[1] = l * v3[1] + u * v1[1] + v * v2[1];
168         shi->co[2] = l * v3[2] + u * v1[2] + v * v2[2];
169
170         shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
171
172         /* set up view vector */
173         copy_v3_v3(shi->view, shi->co);
174         normalize_v3(shi->view);
175         
176         /* cache for shadow */
177         shi->samplenr++;
178
179         shi->xs = 0; /* TODO */
180         shi->ys = 0;
181         
182         shade_input_set_normals(shi);
183
184         /* no normal flip */
185         if (shi->flippednor)
186                 shade_input_flip_normals(shi);
187
188         madd_v3_v3fl(shi->co, shi->facenor, -0.0001f); /* ugly.. */
189
190         /* not a pretty solution, but fixes common cases */
191         if (shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) {
192                 negate_v3(shi->vn);
193                 negate_v3(shi->vno);
194                 negate_v3(shi->nmapnorm);
195         }
196
197         /* init material vars */
198         shade_input_init_material(shi);
199
200         /* render */
201         shade_input_set_shade_texco(shi);
202
203         if (shi->mat->nodetree && shi->mat->use_nodes) {
204                 ntreeShaderExecTree(shi->mat->nodetree, shi, shr);
205                 shi->mat = vlr->mat;  /* shi->mat is being set in nodetree */
206         }
207         else {
208                 shade_material_loop(shi, shr);
209         }
210
211         copy_v3_v3(rad, shr->combined);
212 }
213
214 static void occ_build_shade(Render *re, OcclusionTree *tree)
215 {
216         ShadeSample ssamp;
217         ObjectInstanceRen *obi;
218         VlakRen *vlr;
219         int a;
220
221         R = *re;
222
223         /* setup shade sample with correct passes */
224         memset(&ssamp, 0, sizeof(ShadeSample));
225         ssamp.shi[0].lay = re->lay;
226         ssamp.shi[0].passflag = SCE_PASS_DIFFUSE | SCE_PASS_RGBA;
227         ssamp.shi[0].combinedflag = ~(SCE_PASS_SPEC);
228         ssamp.tot = 1;
229
230         for (a = 0; a < tree->totface; a++) {
231                 obi = &R.objectinstance[tree->face[a].obi];
232                 vlr = RE_findOrAddVlak(obi->obr, tree->face[a].facenr);
233
234                 occ_shade(&ssamp, obi, vlr, tree->rad[a]);
235
236                 if (re->test_break(re->tbh))
237                         break;
238         }
239 }
240
241 /* ------------------------- Spherical Harmonics --------------------------- */
242
243 /* Use 2nd order SH => 9 coefficients, stored in this order:
244  * 0 = (0,0),
245  * 1 = (1,-1), 2 = (1,0), 3 = (1,1),
246  * 4 = (2,-2), 5 = (2,-1), 6 = (2,0), 7 = (2,1), 8 = (2,2) */
247
248 static void sh_copy(float *shresult, float *sh)
249 {
250         memcpy(shresult, sh, sizeof(float) * 9);
251 }
252
253 static void sh_mul(float *sh, float f)
254 {
255         int i;
256
257         for (i = 0; i < 9; i++)
258                 sh[i] *= f;
259 }
260
261 static void sh_add(float *shresult, float *sh1, float *sh2)
262 {
263         int i;
264
265         for (i = 0; i < 9; i++)
266                 shresult[i] = sh1[i] + sh2[i];
267 }
268
269 static void sh_from_disc(float *n, float area, float *shresult)
270 {
271         /* See formula (3) in:
272          * "An Efficient Representation for Irradiance Environment Maps" */
273         float sh[9], x, y, z;
274
275         x = n[0];
276         y = n[1];
277         z = n[2];
278
279         sh[0] = 0.282095f;
280
281         sh[1] = 0.488603f * y;
282         sh[2] = 0.488603f * z;
283         sh[3] = 0.488603f * x;
284
285         sh[4] = 1.092548f * x * y;
286         sh[5] = 1.092548f * y * z;
287         sh[6] = 0.315392f * (3.0f * z * z - 1.0f);
288         sh[7] = 1.092548f * x * z;
289         sh[8] = 0.546274f * (x * x - y * y);
290
291         sh_mul(sh, area);
292         sh_copy(shresult, sh);
293 }
294
295 static float sh_eval(float *sh, float *v)
296 {
297         /* See formula (13) in:
298          * "An Efficient Representation for Irradiance Environment Maps" */
299         static const float c1 = 0.429043f, c2 = 0.511664f, c3 = 0.743125f;
300         static const float c4 = 0.886227f, c5 = 0.247708f;
301         float x, y, z, sum;
302
303         x = v[0];
304         y = v[1];
305         z = v[2];
306
307         sum = c1 * sh[8] * (x * x - y * y);
308         sum += c3 * sh[6] * z * z;
309         sum += c4 * sh[0];
310         sum += -c5 * sh[6];
311         sum += 2.0f * c1 * (sh[4] * x * y + sh[7] * x * z + sh[5] * y * z);
312         sum += 2.0f * c2 * (sh[3] * x + sh[1] * y + sh[2] * z);
313
314         return sum;
315 }
316
317 /* ------------------------------ Building --------------------------------- */
318
319 static void occ_face(const OccFace *face, float co[3], float normal[3], float *area)
320 {
321         ObjectInstanceRen *obi;
322         VlakRen *vlr;
323         float v1[3], v2[3], v3[3], v4[3];
324
325         obi = &R.objectinstance[face->obi];
326         vlr = RE_findOrAddVlak(obi->obr, face->facenr);
327         
328         if (co) {
329                 if (vlr->v4)
330                         mid_v3_v3v3(co, vlr->v1->co, vlr->v3->co);
331                 else
332                         mid_v3_v3v3v3(co, vlr->v1->co, vlr->v2->co, vlr->v3->co);
333
334                 if (obi->flag & R_TRANSFORMED)
335                         mul_m4_v3(obi->mat, co);
336         }
337         
338         if (normal) {
339                 normal[0] = -vlr->n[0];
340                 normal[1] = -vlr->n[1];
341                 normal[2] = -vlr->n[2];
342
343                 if (obi->flag & R_TRANSFORMED)
344                         mul_m3_v3(obi->nmat, normal);
345         }
346
347         if (area) {
348                 copy_v3_v3(v1, vlr->v1->co);
349                 copy_v3_v3(v2, vlr->v2->co);
350                 copy_v3_v3(v3, vlr->v3->co);
351                 if (vlr->v4) copy_v3_v3(v4, vlr->v4->co);
352
353                 if (obi->flag & R_TRANSFORMED) {
354                         mul_m4_v3(obi->mat, v1);
355                         mul_m4_v3(obi->mat, v2);
356                         mul_m4_v3(obi->mat, v3);
357                         if (vlr->v4) mul_m4_v3(obi->mat, v4);
358                 }
359
360                 /* todo: correct area for instances */
361                 if (vlr->v4)
362                         *area = area_quad_v3(v1, v2, v3, v4);
363                 else
364                         *area = area_tri_v3(v1, v2, v3);
365         }
366 }
367
368 static void occ_sum_occlusion(OcclusionTree *tree, OccNode *node)
369 {
370         OccNode *child;
371         float occ, area, totarea, rad[3];
372         int a, b, indirect = tree->doindirect;
373
374         occ = 0.0f;
375         totarea = 0.0f;
376         if (indirect) zero_v3(rad);
377
378         for (b = 0; b < TOTCHILD; b++) {
379                 if (node->childflag & (1 << b)) {
380                         a = node->child[b].face;
381                         occ_face(&tree->face[a], NULL, NULL, &area);
382                         occ += area * tree->occlusion[a];
383                         if (indirect) madd_v3_v3fl(rad, tree->rad[a], area);
384                         totarea += area;
385                 }
386                 else if (node->child[b].node) {
387                         child = node->child[b].node;
388                         occ_sum_occlusion(tree, child);
389
390                         occ += child->area * child->occlusion;
391                         if (indirect) madd_v3_v3fl(rad, child->rad, child->area);
392                         totarea += child->area;
393                 }
394         }
395
396         if (totarea != 0.0f) {
397                 occ /= totarea;
398                 if (indirect) mul_v3_fl(rad, 1.0f / totarea);
399         }
400         
401         node->occlusion = occ;
402         if (indirect) copy_v3_v3(node->rad, rad);
403 }
404
405 static int occ_find_bbox_axis(OcclusionTree *tree, int begin, int end, float *min, float *max)
406 {
407         float len, maxlen = -1.0f;
408         int a, axis = 0;
409
410         INIT_MINMAX(min, max);
411
412         for (a = begin; a < end; a++) {
413                 minmax_v3v3_v3(min, max, tree->co[a]);
414         }
415
416         for (a = 0; a < 3; a++) {
417                 len = max[a] - min[a];
418
419                 if (len > maxlen) {
420                         maxlen = len;
421                         axis = a;
422                 }
423         }
424
425         return axis;
426 }
427
428 static void occ_node_from_face(OccFace *face, OccNode *node)
429 {
430         float n[3];
431
432         occ_face(face, node->co, n, &node->area);
433         node->dco = 0.0f;
434         sh_from_disc(n, node->area, node->sh);
435 }
436
437 static void occ_build_dco(OcclusionTree *tree, OccNode *node, const float co[3], float *dco)
438 {
439         int b;
440         for (b = 0; b < TOTCHILD; b++) {
441                 float dist, d[3], nco[3];
442
443                 if (node->childflag & (1 << b)) {
444                         occ_face(tree->face + node->child[b].face, nco, NULL, NULL);
445                 }
446                 else if (node->child[b].node) {
447                         OccNode *child = node->child[b].node;
448                         occ_build_dco(tree, child, co, dco);
449                         copy_v3_v3(nco, child->co);
450                 }
451                 else {
452                         continue;
453                 }
454
455                 sub_v3_v3v3(d, nco, co);
456                 dist = dot_v3v3(d, d);
457                 if (dist > *dco)
458                         *dco = dist;
459         }
460 }
461
462 static void occ_build_split(OcclusionTree *tree, int begin, int end, int *split)
463 {
464         float min[3], max[3], mid;
465         int axis, a, enda;
466
467         /* split in middle of boundbox. this seems faster than median split
468          * on complex scenes, possibly since it avoids two distant faces to
469          * be in the same node better? */
470         axis = occ_find_bbox_axis(tree, begin, end, min, max);
471         mid = 0.5f * (min[axis] + max[axis]);
472
473         a = begin;
474         enda = end;
475         while (a < enda) {
476                 if (tree->co[a][axis] > mid) {
477                         enda--;
478                         SWAP(OccFace, tree->face[a], tree->face[enda]);
479                         swap_v3_v3(tree->co[a], tree->co[enda]);
480                 }
481                 else
482                         a++;
483         }
484
485         *split = enda;
486 }
487
488 static void occ_build_8_split(OcclusionTree *tree, int begin, int end, int *offset, int *count)
489 {
490         /* split faces into eight groups */
491         int b, splitx, splity[2], splitz[4];
492
493         occ_build_split(tree, begin, end, &splitx);
494
495         /* force split if none found, to deal with degenerate geometry */
496         if (splitx == begin || splitx == end)
497                 splitx = (begin + end) / 2;
498
499         occ_build_split(tree, begin, splitx, &splity[0]);
500         occ_build_split(tree, splitx, end, &splity[1]);
501
502         occ_build_split(tree, begin, splity[0], &splitz[0]);
503         occ_build_split(tree, splity[0], splitx, &splitz[1]);
504         occ_build_split(tree, splitx, splity[1], &splitz[2]);
505         occ_build_split(tree, splity[1], end, &splitz[3]);
506
507         offset[0] = begin;
508         offset[1] = splitz[0];
509         offset[2] = splity[0];
510         offset[3] = splitz[1];
511         offset[4] = splitx;
512         offset[5] = splitz[2];
513         offset[6] = splity[1];
514         offset[7] = splitz[3];
515
516         for (b = 0; b < 7; b++)
517                 count[b] = offset[b + 1] - offset[b];
518         count[7] = end - offset[7];
519 }
520
521 static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth);
522
523 static void *exec_occ_build(void *data)
524 {
525         OcclusionBuildThread *othread = (OcclusionBuildThread *)data;
526
527         occ_build_recursive(othread->tree, othread->node, othread->begin, othread->end, othread->depth);
528
529         return NULL;
530 }
531
532 static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth)
533 {
534         ListBase threads;
535         OcclusionBuildThread othreads[BLENDER_MAX_THREADS];
536         OccNode *child, tmpnode;
537         /* OccFace *face; */
538         int a, b, totthread = 0, offset[TOTCHILD], count[TOTCHILD];
539
540         /* add a new node */
541         node->occlusion = 1.0f;
542
543         /* leaf node with only children */
544         if (end - begin <= TOTCHILD) {
545                 for (a = begin, b = 0; a < end; a++, b++) {
546                         /* face= &tree->face[a]; */
547                         node->child[b].face = a;
548                         node->childflag |= (1 << b);
549                 }
550         }
551         else {
552                 /* order faces */
553                 occ_build_8_split(tree, begin, end, offset, count);
554
555                 if (depth == 1 && tree->dothreadedbuild)
556                         BLI_threadpool_init(&threads, exec_occ_build, tree->totbuildthread);
557
558                 for (b = 0; b < TOTCHILD; b++) {
559                         if (count[b] == 0) {
560                                 node->child[b].node = NULL;
561                         }
562                         else if (count[b] == 1) {
563                                 /* face= &tree->face[offset[b]]; */
564                                 node->child[b].face = offset[b];
565                                 node->childflag |= (1 << b);
566                         }
567                         else {
568                                 if (tree->dothreadedbuild)
569                                         BLI_thread_lock(LOCK_CUSTOM1);
570
571                                 child = BLI_memarena_alloc(tree->arena, sizeof(OccNode));
572                                 node->child[b].node = child;
573
574                                 /* keep track of maximum depth for stack */
575                                 if (depth >= tree->maxdepth)
576                                         tree->maxdepth = depth + 1;
577
578                                 if (tree->dothreadedbuild)
579                                         BLI_thread_unlock(LOCK_CUSTOM1);
580
581                                 if (depth == 1 && tree->dothreadedbuild) {
582                                         othreads[totthread].tree = tree;
583                                         othreads[totthread].node = child;
584                                         othreads[totthread].begin = offset[b];
585                                         othreads[totthread].end = offset[b] + count[b];
586                                         othreads[totthread].depth = depth + 1;
587                                         BLI_threadpool_insert(&threads, &othreads[totthread]);
588                                         totthread++;
589                                 }
590                                 else
591                                         occ_build_recursive(tree, child, offset[b], offset[b] + count[b], depth + 1);
592                         }
593                 }
594
595                 if (depth == 1 && tree->dothreadedbuild)
596                         BLI_threadpool_end(&threads);
597         }
598
599         /* combine area, position and sh */
600         for (b = 0; b < TOTCHILD; b++) {
601                 if (node->childflag & (1 << b)) {
602                         child = &tmpnode;
603                         occ_node_from_face(tree->face + node->child[b].face, &tmpnode);
604                 }
605                 else {
606                         child = node->child[b].node;
607                 }
608
609                 if (child) {
610                         node->area += child->area;
611                         sh_add(node->sh, node->sh, child->sh);
612                         madd_v3_v3fl(node->co, child->co, child->area);
613                 }
614         }
615
616         if (node->area != 0.0f)
617                 mul_v3_fl(node->co, 1.0f / node->area);
618
619         /* compute maximum distance from center */
620         node->dco = 0.0f;
621         if (node->area > 0.0f)
622                 occ_build_dco(tree, node, node->co, &node->dco);
623 }
624
625 static void occ_build_sh_normalize(OccNode *node)
626 {
627         /* normalize spherical harmonics to not include area, so
628          * we can clamp the dot product and then multiply by area */
629         int b;
630
631         if (node->area != 0.0f)
632                 sh_mul(node->sh, 1.0f / node->area);
633
634         for (b = 0; b < TOTCHILD; b++) {
635                 if (node->childflag & (1 << b)) {
636                         /* pass */
637                 }
638                 else if (node->child[b].node) {
639                         occ_build_sh_normalize(node->child[b].node);
640                 }
641         }
642 }
643
644 static OcclusionTree *occ_tree_build(Render *re)
645 {
646         const int num_threads = re->r.threads;
647         OcclusionTree *tree;
648         ObjectInstanceRen *obi;
649         ObjectRen *obr;
650         Material *ma;
651         VlakRen *vlr = NULL;
652         int a, b, c, totface;
653
654         /* count */
655         totface = 0;
656         for (obi = re->instancetable.first; obi; obi = obi->next) {
657                 obr = obi->obr;
658                 for (a = 0; a < obr->totvlak; a++) {
659                         if ((a & 255) == 0) vlr = obr->vlaknodes[a >> 8].vlak;
660                         else vlr++;
661
662                         ma = vlr->mat;
663
664                         if ((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE))
665                                 totface++;
666                 }
667         }
668
669         if (totface == 0)
670                 return NULL;
671         
672         tree = MEM_callocN(sizeof(OcclusionTree), "OcclusionTree");
673         tree->totface = totface;
674
675         /* parameters */
676         tree->error = get_render_aosss_error(&re->r, re->wrld.ao_approx_error);
677         tree->distfac = (re->wrld.aomode & WO_AODIST) ? re->wrld.aodistfac : 0.0f;
678         tree->doindirect = (re->wrld.ao_indirect_energy > 0.0f && re->wrld.ao_indirect_bounces > 0);
679
680         /* allocation */
681         tree->arena = BLI_memarena_new(0x8000 * sizeof(OccNode), "occ tree arena");
682         BLI_memarena_use_calloc(tree->arena);
683
684         if (re->wrld.aomode & WO_AOCACHE)
685                 tree->cache = MEM_callocN(sizeof(OcclusionCache) * num_threads, "OcclusionCache");
686
687         tree->face = MEM_callocN(sizeof(OccFace) * totface, "OcclusionFace");
688         tree->co = MEM_callocN(sizeof(float) * 3 * totface, "OcclusionCo");
689         tree->occlusion = MEM_callocN(sizeof(float) * totface, "OcclusionOcclusion");
690
691         if (tree->doindirect)
692                 tree->rad = MEM_callocN(sizeof(float) * 3 * totface, "OcclusionRad");
693
694         /* make array of face pointers */
695         for (b = 0, c = 0, obi = re->instancetable.first; obi; obi = obi->next, c++) {
696                 obr = obi->obr;
697                 for (a = 0; a < obr->totvlak; a++) {
698                         if ((a & 255) == 0) vlr = obr->vlaknodes[a >> 8].vlak;
699                         else vlr++;
700
701                         ma = vlr->mat;
702
703                         if ((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE)) {
704                                 tree->face[b].obi = c;
705                                 tree->face[b].facenr = a;
706                                 tree->occlusion[b] = 1.0f;
707                                 occ_face(&tree->face[b], tree->co[b], NULL, NULL); 
708                                 b++;
709                         }
710                 }
711         }
712
713         /* threads */
714         tree->totbuildthread = (re->r.threads > 1 && totface > 10000) ? 8 : 1;
715         tree->dothreadedbuild = (tree->totbuildthread > 1);
716
717         /* recurse */
718         tree->root = BLI_memarena_alloc(tree->arena, sizeof(OccNode));
719         tree->maxdepth = 1;
720         occ_build_recursive(tree, tree->root, 0, totface, 1);
721
722         if (tree->doindirect) {
723                 if (!(re->test_break(re->tbh)))
724                         occ_build_shade(re, tree);
725
726                 if (!(re->test_break(re->tbh)))
727                         occ_sum_occlusion(tree, tree->root);
728         }
729         
730         MEM_freeN(tree->co);
731         tree->co = NULL;
732
733         if (!(re->test_break(re->tbh)))
734                 occ_build_sh_normalize(tree->root);
735
736         for (a = 0; a < num_threads; a++)
737                 tree->stack[a] = MEM_callocN(sizeof(OccNode) * TOTCHILD * (tree->maxdepth + 1), "OccStack");
738
739         tree->num_threads = num_threads;
740
741         return tree;
742 }
743
744 static void occ_free_tree(OcclusionTree *tree)
745 {
746         int a;
747
748         if (tree) {
749                 if (tree->arena) BLI_memarena_free(tree->arena);
750                 for (a = 0; a < tree->num_threads; a++)
751                         if (tree->stack[a])
752                                 MEM_freeN(tree->stack[a]);
753                 if (tree->occlusion) MEM_freeN(tree->occlusion);
754                 if (tree->cache) MEM_freeN(tree->cache);
755                 if (tree->face) MEM_freeN(tree->face);
756                 if (tree->rad) MEM_freeN(tree->rad);
757                 MEM_freeN(tree);
758         }
759 }
760
761 /* ------------------------- Traversal --------------------------- */
762
763 static float occ_solid_angle(OccNode *node, const float v[3], float d2, float invd2, const float receivenormal[3])
764 {
765         float dotreceive, dotemit;
766         float ev[3];
767
768         ev[0] = -v[0] * invd2;
769         ev[1] = -v[1] * invd2;
770         ev[2] = -v[2] * invd2;
771         dotemit = sh_eval(node->sh, ev);
772         dotreceive = dot_v3v3(receivenormal, v) * invd2;
773
774         CLAMP(dotemit, 0.0f, 1.0f);
775         CLAMP(dotreceive, 0.0f, 1.0f);
776         
777         return ((node->area * dotemit * dotreceive) / (d2 + node->area * INVPI)) * INVPI;
778 }
779
780 static float occ_form_factor(OccFace *face, float *p, float *n)
781 {
782         ObjectInstanceRen *obi;
783         VlakRen *vlr;
784         float v1[3], v2[3], v3[3], v4[3], q0[3], q1[3], q2[3], q3[3], contrib = 0.0f;
785
786         obi = &R.objectinstance[face->obi];
787         vlr = RE_findOrAddVlak(obi->obr, face->facenr);
788
789         copy_v3_v3(v1, vlr->v1->co);
790         copy_v3_v3(v2, vlr->v2->co);
791         copy_v3_v3(v3, vlr->v3->co);
792
793         if (obi->flag & R_TRANSFORMED) {
794                 mul_m4_v3(obi->mat, v1);
795                 mul_m4_v3(obi->mat, v2);
796                 mul_m4_v3(obi->mat, v3);
797         }
798
799         if (form_factor_visible_quad(p, n, v1, v2, v3, q0, q1, q2, q3))
800                 contrib += form_factor_quad(p, n, q0, q1, q2, q3);
801
802         if (vlr->v4) {
803                 copy_v3_v3(v4, vlr->v4->co);
804                 if (obi->flag & R_TRANSFORMED)
805                         mul_m4_v3(obi->mat, v4);
806
807                 if (form_factor_visible_quad(p, n, v1, v3, v4, q0, q1, q2, q3))
808                         contrib += form_factor_quad(p, n, q0, q1, q2, q3);
809         }
810
811         return contrib;
812 }
813
814 static void occ_lookup(OcclusionTree *tree, int thread, OccFace *exclude,
815                        const float pp[3], const float pn[3], float *occ, float rad[3], float bentn[3])
816 {
817         OccNode *node, **stack;
818         OccFace *face;
819         float resultocc, resultrad[3], v[3], p[3], n[3], co[3], invd2;
820         float distfac, fac, error, d2, weight, emitarea;
821         int b, f, totstack;
822
823         /* init variables */
824         copy_v3_v3(p, pp);
825         copy_v3_v3(n, pn);
826         madd_v3_v3fl(p, n, 1e-4f);
827
828         if (bentn)
829                 copy_v3_v3(bentn, n);
830         
831         error = tree->error;
832         distfac = tree->distfac;
833
834         resultocc = 0.0f;
835         zero_v3(resultrad);
836
837         /* init stack */
838         stack = tree->stack[thread];
839         stack[0] = tree->root;
840         totstack = 1;
841
842         while (totstack) {
843                 /* pop point off the stack */
844                 node = stack[--totstack];
845
846                 sub_v3_v3v3(v, node->co, p);
847                 d2 = dot_v3v3(v, v) + 1e-16f;
848                 emitarea = MAX2(node->area, node->dco);
849
850                 if (d2 * error > emitarea) {
851                         if (distfac != 0.0f) {
852                                 fac = 1.0f / (1.0f + distfac * d2);
853                                 if (fac < 0.01f)
854                                         continue;
855                         }
856                         else
857                                 fac = 1.0f;
858
859                         /* accumulate occlusion from spherical harmonics */
860                         invd2 = 1.0f / sqrtf(d2);
861                         weight = occ_solid_angle(node, v, d2, invd2, n);
862
863                         if (rad)
864                                 madd_v3_v3fl(resultrad, node->rad, weight * fac);
865
866                         weight *= node->occlusion;
867
868                         if (bentn) {
869                                 bentn[0] -= weight * invd2 * v[0];
870                                 bentn[1] -= weight * invd2 * v[1];
871                                 bentn[2] -= weight * invd2 * v[2];
872                         }
873
874                         resultocc += weight * fac;
875                 }
876                 else {
877                         /* traverse into children */
878                         for (b = 0; b < TOTCHILD; b++) {
879                                 if (node->childflag & (1 << b)) {
880                                         f = node->child[b].face;
881                                         face = &tree->face[f];
882
883                                         /* accumulate occlusion with face form factor */
884                                         if (!exclude || !(face->obi == exclude->obi && face->facenr == exclude->facenr)) {
885                                                 if (bentn || distfac != 0.0f) {
886                                                         occ_face(face, co, NULL, NULL); 
887                                                         sub_v3_v3v3(v, co, p);
888                                                         d2 = dot_v3v3(v, v) + 1e-16f;
889
890                                                         fac = (distfac == 0.0f) ? 1.0f : 1.0f / (1.0f + distfac * d2);
891                                                         if (fac < 0.01f)
892                                                                 continue;
893                                                 }
894                                                 else
895                                                         fac = 1.0f;
896
897                                                 weight = occ_form_factor(face, p, n);
898
899                                                 if (rad)
900                                                         madd_v3_v3fl(resultrad, tree->rad[f], weight * fac);
901
902                                                 weight *= tree->occlusion[f];
903
904                                                 if (bentn) {
905                                                         invd2 = 1.0f / sqrtf(d2);
906                                                         bentn[0] -= weight * invd2 * v[0];
907                                                         bentn[1] -= weight * invd2 * v[1];
908                                                         bentn[2] -= weight * invd2 * v[2];
909                                                 }
910
911                                                 resultocc += weight * fac;
912                                         }
913                                 }
914                                 else if (node->child[b].node) {
915                                         /* push child on the stack */
916                                         stack[totstack++] = node->child[b].node;
917                                 }
918                         }
919                 }
920         }
921
922         if (occ) *occ = resultocc;
923         if (rad) copy_v3_v3(rad, resultrad);
924 #if 0
925         if (rad && exclude) {
926                 int a;
927                 for (a = 0; a < tree->totface; a++)
928                         if ((tree->face[a].obi == exclude->obi && tree->face[a].facenr == exclude->facenr))
929                                 copy_v3_v3(rad, tree->rad[a]);
930         }
931 #endif
932         if (bentn) normalize_v3(bentn);
933 }
934
935 static void occ_compute_bounces(Render *re, OcclusionTree *tree, int totbounce)
936 {
937         float (*rad)[3], (*sum)[3], (*tmp)[3], co[3], n[3], occ;
938         int bounce, i;
939
940         rad = MEM_callocN(sizeof(float) * 3 * tree->totface, "OcclusionBounceRad");
941         sum = MEM_dupallocN(tree->rad);
942
943         for (bounce = 1; bounce < totbounce; bounce++) {
944                 for (i = 0; i < tree->totface; i++) {
945                         occ_face(&tree->face[i], co, n, NULL);
946                         madd_v3_v3fl(co, n, 1e-8f);
947
948                         occ_lookup(tree, 0, &tree->face[i], co, n, &occ, rad[i], NULL);
949                         rad[i][0] = MAX2(rad[i][0], 0.0f);
950                         rad[i][1] = MAX2(rad[i][1], 0.0f);
951                         rad[i][2] = MAX2(rad[i][2], 0.0f);
952                         add_v3_v3(sum[i], rad[i]);
953
954                         if (re->test_break(re->tbh))
955                                 break;
956                 }
957
958                 if (re->test_break(re->tbh))
959                         break;
960
961                 tmp = tree->rad;
962                 tree->rad = rad;
963                 rad = tmp;
964
965                 occ_sum_occlusion(tree, tree->root);
966         }
967
968         MEM_freeN(rad);
969         MEM_freeN(tree->rad);
970         tree->rad = sum;
971
972         if (!re->test_break(re->tbh))
973                 occ_sum_occlusion(tree, tree->root);
974 }
975
976 static void occ_compute_passes(Render *re, OcclusionTree *tree, int totpass)
977 {
978         float *occ, co[3], n[3];
979         int pass, i;
980         
981         occ = MEM_callocN(sizeof(float) * tree->totface, "OcclusionPassOcc");
982
983         for (pass = 0; pass < totpass; pass++) {
984                 for (i = 0; i < tree->totface; i++) {
985                         occ_face(&tree->face[i], co, n, NULL);
986                         negate_v3(n);
987                         madd_v3_v3fl(co, n, 1e-8f);
988
989                         occ_lookup(tree, 0, &tree->face[i], co, n, &occ[i], NULL, NULL);
990                         if (re->test_break(re->tbh))
991                                 break;
992                 }
993
994                 if (re->test_break(re->tbh))
995                         break;
996
997                 for (i = 0; i < tree->totface; i++) {
998                         tree->occlusion[i] -= occ[i]; //MAX2(1.0f-occ[i], 0.0f);
999                         if (tree->occlusion[i] < 0.0f)
1000                                 tree->occlusion[i] = 0.0f;
1001                 }
1002
1003                 occ_sum_occlusion(tree, tree->root);
1004         }
1005
1006         MEM_freeN(occ);
1007 }
1008
1009 static void sample_occ_tree(Render *re, OcclusionTree *tree, OccFace *exclude,
1010                             const float co[3], const float n[3], int thread, int onlyshadow,
1011                             float *ao, float *env, float *indirect)
1012 {
1013         float nn[3], bn[3], fac, occ, occlusion, correction, rad[3];
1014         int envcolor;
1015
1016         envcolor = re->wrld.aocolor;
1017         if (onlyshadow)
1018                 envcolor = WO_AOPLAIN;
1019
1020         negate_v3_v3(nn, n);
1021
1022         occ_lookup(tree, thread, exclude, co, nn, &occ, (tree->doindirect) ? rad : NULL, (env && envcolor) ? bn : NULL);
1023
1024         correction = re->wrld.ao_approx_correction;
1025
1026         occlusion = (1.0f - correction) * (1.0f - occ);
1027         CLAMP(occlusion, 0.0f, 1.0f);
1028         if (correction != 0.0f)
1029                 occlusion += correction * expf(-occ);
1030
1031         if (env) {
1032                 /* sky shading using bent normal */
1033                 if (ELEM(envcolor, WO_AOSKYCOL, WO_AOSKYTEX)) {
1034                         fac = 0.5f * (1.0f + dot_v3v3(bn, re->grvec));
1035                         env[0] = (1.0f - fac) * re->wrld.horr + fac * re->wrld.zenr;
1036                         env[1] = (1.0f - fac) * re->wrld.horg + fac * re->wrld.zeng;
1037                         env[2] = (1.0f - fac) * re->wrld.horb + fac * re->wrld.zenb;
1038
1039                         mul_v3_fl(env, occlusion);
1040                 }
1041                 else {
1042                         env[0] = occlusion;
1043                         env[1] = occlusion;
1044                         env[2] = occlusion;
1045                 }
1046 #if 0
1047                 else {  /* WO_AOSKYTEX */
1048                         float dxyview[3];
1049                         bn[0] = -bn[0];
1050                         bn[1] = -bn[1];
1051                         bn[2] = -bn[2];
1052                         dxyview[0] = 1.0f;
1053                         dxyview[1] = 1.0f;
1054                         dxyview[2] = 0.0f;
1055                         shadeSkyView(ao, co, bn, dxyview);
1056                 }
1057 #endif
1058         }
1059
1060         if (ao) {
1061                 ao[0] = occlusion;
1062                 ao[1] = occlusion;
1063                 ao[2] = occlusion;
1064         }
1065
1066         if (tree->doindirect) copy_v3_v3(indirect, rad);
1067         else zero_v3(indirect);
1068 }
1069
1070 /* ---------------------------- Caching ------------------------------- */
1071
1072 static OcclusionCacheSample *find_occ_sample(OcclusionCache *cache, int x, int y)
1073 {
1074         x -= cache->x;
1075         y -= cache->y;
1076
1077         x /= cache->step;
1078         y /= cache->step;
1079         x *= cache->step;
1080         y *= cache->step;
1081
1082         if (x < 0 || x >= cache->w || y < 0 || y >= cache->h)
1083                 return NULL;
1084         else
1085                 return &cache->sample[y * cache->w + x];
1086 }
1087
1088 static int sample_occ_cache(OcclusionTree *tree, float *co, float *n, int x, int y, int thread, float *ao, float *env, float *indirect)
1089 {
1090         OcclusionCache *cache;
1091         OcclusionCacheSample *samples[4], *sample;
1092         float wn[4], wz[4], wb[4], tx, ty, w, totw, mino, maxo;
1093         float d[3], dist2;
1094         int i, x1, y1, x2, y2;
1095
1096         if (!tree->cache)
1097                 return 0;
1098         
1099         /* first try to find a sample in the same pixel */
1100         cache = &tree->cache[thread];
1101
1102         if (cache->sample && cache->step) {
1103                 sample = &cache->sample[(y - cache->y) * cache->w + (x - cache->x)];
1104                 if (sample->filled) {
1105                         sub_v3_v3v3(d, sample->co, co);
1106                         dist2 = dot_v3v3(d, d);
1107                         if (dist2 < 0.5f * sample->dist2 && dot_v3v3(sample->n, n) > 0.98f) {
1108                                 copy_v3_v3(ao, sample->ao);
1109                                 copy_v3_v3(env, sample->env);
1110                                 copy_v3_v3(indirect, sample->indirect);
1111                                 return 1;
1112                         }
1113                 }
1114         }
1115         else
1116                 return 0;
1117
1118         /* try to interpolate between 4 neighboring pixels */
1119         samples[0] = find_occ_sample(cache, x, y);
1120         samples[1] = find_occ_sample(cache, x + cache->step, y);
1121         samples[2] = find_occ_sample(cache, x, y + cache->step);
1122         samples[3] = find_occ_sample(cache, x + cache->step, y + cache->step);
1123
1124         for (i = 0; i < 4; i++)
1125                 if (!samples[i] || !samples[i]->filled)
1126                         return 0;
1127
1128         /* require intensities not being too different */
1129         mino = min_ffff(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
1130         maxo = max_ffff(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
1131
1132         if (maxo - mino > 0.05f)
1133                 return 0;
1134
1135         /* compute weighted interpolation between samples */
1136         zero_v3(ao);
1137         zero_v3(env);
1138         zero_v3(indirect);
1139         totw = 0.0f;
1140
1141         x1 = samples[0]->x;
1142         y1 = samples[0]->y;
1143         x2 = samples[3]->x;
1144         y2 = samples[3]->y;
1145
1146         tx = (float)(x2 - x) / (float)(x2 - x1);
1147         ty = (float)(y2 - y) / (float)(y2 - y1);
1148
1149         wb[3] = (1.0f - tx) * (1.0f - ty);
1150         wb[2] = (tx) * (1.0f - ty);
1151         wb[1] = (1.0f - tx) * (ty);
1152         wb[0] = tx * ty;
1153
1154         for (i = 0; i < 4; i++) {
1155                 sub_v3_v3v3(d, samples[i]->co, co);
1156                 //dist2 = dot_v3v3(d, d);
1157
1158                 wz[i] = 1.0f; //(samples[i]->dist2/(1e-4f + dist2));
1159                 wn[i] = pow(dot_v3v3(samples[i]->n, n), 32.0f);
1160
1161                 w = wb[i] * wn[i] * wz[i];
1162
1163                 totw += w;
1164                 madd_v3_v3fl(ao, samples[i]->ao, w);
1165                 madd_v3_v3fl(env, samples[i]->env, w);
1166                 madd_v3_v3fl(indirect, samples[i]->indirect, w);
1167         }
1168
1169         if (totw >= 0.9f) {
1170                 totw = 1.0f / totw;
1171                 mul_v3_fl(ao, totw);
1172                 mul_v3_fl(env, totw);
1173                 mul_v3_fl(indirect, totw);
1174                 return 1;
1175         }
1176
1177         return 0;
1178 }
1179
1180 static void sample_occ_surface(ShadeInput *shi)
1181 {
1182         StrandRen *strand = shi->strand;
1183         StrandSurface *mesh = strand->buffer->surface;
1184         const int *face, *index = RE_strandren_get_face(shi->obr, strand, 0);
1185         float w[4], *co1, *co2, *co3, *co4;
1186
1187         if (mesh && mesh->face && mesh->co && mesh->ao && index) {
1188                 face = mesh->face[*index];
1189
1190                 co1 = mesh->co[face[0]];
1191                 co2 = mesh->co[face[1]];
1192                 co3 = mesh->co[face[2]];
1193
1194                 if (face[3]) {
1195                         co4 = mesh->co[face[3]];
1196                         interp_weights_quad_v3(w, co1, co2, co3, co4, strand->vert->co);
1197                 }
1198                 else {
1199                         interp_weights_tri_v3(w, co1, co2, co3, strand->vert->co);
1200                 }
1201
1202                 zero_v3(shi->ao);
1203                 zero_v3(shi->env);
1204                 zero_v3(shi->indirect);
1205
1206                 madd_v3_v3fl(shi->ao, mesh->ao[face[0]], w[0]);
1207                 madd_v3_v3fl(shi->env, mesh->env[face[0]], w[0]);
1208                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[0]], w[0]);
1209                 madd_v3_v3fl(shi->ao, mesh->ao[face[1]], w[1]);
1210                 madd_v3_v3fl(shi->env, mesh->env[face[1]], w[1]);
1211                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[1]], w[1]);
1212                 madd_v3_v3fl(shi->ao, mesh->ao[face[2]], w[2]);
1213                 madd_v3_v3fl(shi->env, mesh->env[face[2]], w[2]);
1214                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[2]], w[2]);
1215                 if (face[3]) {
1216                         madd_v3_v3fl(shi->ao, mesh->ao[face[3]], w[3]);
1217                         madd_v3_v3fl(shi->env, mesh->env[face[3]], w[3]);
1218                         madd_v3_v3fl(shi->indirect, mesh->indirect[face[3]], w[3]);
1219                 }
1220         }
1221         else {
1222                 shi->ao[0] = 1.0f;
1223                 shi->ao[1] = 1.0f;
1224                 shi->ao[2] = 1.0f;
1225                 zero_v3(shi->env);
1226                 zero_v3(shi->indirect);
1227         }
1228 }
1229
1230 /* ------------------------- External Functions --------------------------- */
1231
1232 static void *exec_strandsurface_sample(void *data)
1233 {
1234         OcclusionThread *othread = (OcclusionThread *)data;
1235         Render *re = othread->re;
1236         StrandSurface *mesh = othread->mesh;
1237         float ao[3], env[3], indirect[3], co[3], n[3], *co1, *co2, *co3, *co4;
1238         int a, *face;
1239
1240         for (a = othread->begin; a < othread->end; a++) {
1241                 face = mesh->face[a];
1242                 co1 = mesh->co[face[0]];
1243                 co2 = mesh->co[face[1]];
1244                 co3 = mesh->co[face[2]];
1245
1246                 if (face[3]) {
1247                         co4 = mesh->co[face[3]];
1248
1249                         mid_v3_v3v3(co, co1, co3);
1250                         normal_quad_v3(n, co1, co2, co3, co4);
1251                 }
1252                 else {
1253                         mid_v3_v3v3v3(co, co1, co2, co3);
1254                         normal_tri_v3(n, co1, co2, co3);
1255                 }
1256                 negate_v3(n);
1257
1258                 sample_occ_tree(re, re->occlusiontree, NULL, co, n, othread->thread, 0, ao, env, indirect);
1259                 copy_v3_v3(othread->faceao[a], ao);
1260                 copy_v3_v3(othread->faceenv[a], env);
1261                 copy_v3_v3(othread->faceindirect[a], indirect);
1262         }
1263
1264         return NULL;
1265 }
1266
1267 void make_occ_tree(Render *re)
1268 {
1269         OcclusionThread othreads[BLENDER_MAX_THREADS];
1270         OcclusionTree *tree;
1271         StrandSurface *mesh;
1272         ListBase threads;
1273         float ao[3], env[3], indirect[3], (*faceao)[3], (*faceenv)[3], (*faceindirect)[3];
1274         int a, totface, totthread, *face, *count;
1275
1276         /* ugly, needed for occ_face */
1277         R = *re;
1278
1279         re->i.infostr = IFACE_("Occlusion preprocessing");
1280         re->stats_draw(re->sdh, &re->i);
1281         
1282         re->occlusiontree = tree = occ_tree_build(re);
1283         
1284         if (tree && !re->test_break(re->tbh)) {
1285                 if (re->wrld.ao_approx_passes > 0)
1286                         occ_compute_passes(re, tree, re->wrld.ao_approx_passes);
1287                 if (tree->doindirect && (re->wrld.mode & WO_INDIRECT_LIGHT))
1288                         occ_compute_bounces(re, tree, re->wrld.ao_indirect_bounces);
1289
1290                 for (mesh = re->strandsurface.first; mesh; mesh = mesh->next) {
1291                         if (!mesh->face || !mesh->co || !mesh->ao)
1292                                 continue;
1293
1294                         count = MEM_callocN(sizeof(int) * mesh->totvert, "OcclusionCount");
1295                         faceao = MEM_callocN(sizeof(float) * 3 * mesh->totface, "StrandSurfFaceAO");
1296                         faceenv = MEM_callocN(sizeof(float) * 3 * mesh->totface, "StrandSurfFaceEnv");
1297                         faceindirect = MEM_callocN(sizeof(float) * 3 * mesh->totface, "StrandSurfFaceIndirect");
1298
1299                         totthread = (mesh->totface > 10000) ? re->r.threads : 1;
1300                         totface = mesh->totface / totthread;
1301                         for (a = 0; a < totthread; a++) {
1302                                 othreads[a].re = re;
1303                                 othreads[a].faceao = faceao;
1304                                 othreads[a].faceenv = faceenv;
1305                                 othreads[a].faceindirect = faceindirect;
1306                                 othreads[a].thread = a;
1307                                 othreads[a].mesh = mesh;
1308                                 othreads[a].begin = a * totface;
1309                                 othreads[a].end = (a == totthread - 1) ? mesh->totface : (a + 1) * totface;
1310                         }
1311
1312                         if (totthread == 1) {
1313                                 exec_strandsurface_sample(&othreads[0]);
1314                         }
1315                         else {
1316                                 BLI_threadpool_init(&threads, exec_strandsurface_sample, totthread);
1317
1318                                 for (a = 0; a < totthread; a++)
1319                                         BLI_threadpool_insert(&threads, &othreads[a]);
1320
1321                                 BLI_threadpool_end(&threads);
1322                         }
1323
1324                         for (a = 0; a < mesh->totface; a++) {
1325                                 face = mesh->face[a];
1326
1327                                 copy_v3_v3(ao, faceao[a]);
1328                                 copy_v3_v3(env, faceenv[a]);
1329                                 copy_v3_v3(indirect, faceindirect[a]);
1330
1331                                 add_v3_v3(mesh->ao[face[0]], ao);
1332                                 add_v3_v3(mesh->env[face[0]], env);
1333                                 add_v3_v3(mesh->indirect[face[0]], indirect);
1334                                 count[face[0]]++;
1335                                 add_v3_v3(mesh->ao[face[1]], ao);
1336                                 add_v3_v3(mesh->env[face[1]], env);
1337                                 add_v3_v3(mesh->indirect[face[1]], indirect);
1338                                 count[face[1]]++;
1339                                 add_v3_v3(mesh->ao[face[2]], ao);
1340                                 add_v3_v3(mesh->env[face[2]], env);
1341                                 add_v3_v3(mesh->indirect[face[2]], indirect);
1342                                 count[face[2]]++;
1343
1344                                 if (face[3]) {
1345                                         add_v3_v3(mesh->ao[face[3]], ao);
1346                                         add_v3_v3(mesh->env[face[3]], env);
1347                                         add_v3_v3(mesh->indirect[face[3]], indirect);
1348                                         count[face[3]]++;
1349                                 }
1350                         }
1351
1352                         for (a = 0; a < mesh->totvert; a++) {
1353                                 if (count[a]) {
1354                                         mul_v3_fl(mesh->ao[a], 1.0f / count[a]);
1355                                         mul_v3_fl(mesh->env[a], 1.0f / count[a]);
1356                                         mul_v3_fl(mesh->indirect[a], 1.0f / count[a]);
1357                                 }
1358                         }
1359
1360                         MEM_freeN(count);
1361                         MEM_freeN(faceao);
1362                         MEM_freeN(faceenv);
1363                         MEM_freeN(faceindirect);
1364                 }
1365         }
1366 }
1367
1368 void free_occ(Render *re)
1369 {
1370         if (re->occlusiontree) {
1371                 occ_free_tree(re->occlusiontree);
1372                 re->occlusiontree = NULL;
1373         }
1374 }
1375
1376 void sample_occ(Render *re, ShadeInput *shi)
1377 {
1378         OcclusionTree *tree = re->occlusiontree;
1379         OcclusionCache *cache;
1380         OcclusionCacheSample *sample;
1381         OccFace exclude;
1382         int onlyshadow;
1383
1384         if (tree) {
1385                 if (shi->strand) {
1386                         sample_occ_surface(shi);
1387                 }
1388                 /* try to get result from the cache if possible */
1389                 else if (shi->depth != 0 || !sample_occ_cache(tree, shi->co, shi->vno, shi->xs, shi->ys, shi->thread, shi->ao, shi->env, shi->indirect)) {
1390                         /* no luck, let's sample the occlusion */
1391                         exclude.obi = shi->obi - re->objectinstance;
1392                         exclude.facenr = shi->vlr->index;
1393                         onlyshadow = (shi->mat->mode & MA_ONLYSHADOW);
1394                         sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect);
1395
1396                         /* fill result into sample, each time */
1397                         if (tree->cache) {
1398                                 cache = &tree->cache[shi->thread];
1399
1400                                 if (cache->sample && cache->step) {
1401                                         sample = &cache->sample[(shi->ys - cache->y) * cache->w + (shi->xs - cache->x)];
1402                                         copy_v3_v3(sample->co, shi->co);
1403                                         copy_v3_v3(sample->n, shi->vno);
1404                                         copy_v3_v3(sample->ao, shi->ao);
1405                                         copy_v3_v3(sample->env, shi->env);
1406                                         copy_v3_v3(sample->indirect, shi->indirect);
1407                                         sample->intensity = max_fff(sample->ao[0], sample->ao[1], sample->ao[2]);
1408                                         sample->intensity = max_ff(sample->intensity, max_fff(sample->env[0], sample->env[1], sample->env[2]));
1409                                         sample->intensity = max_ff(sample->intensity, max_fff(sample->indirect[0], sample->indirect[1], sample->indirect[2]));
1410                                         sample->dist2 = dot_v3v3(shi->dxco, shi->dxco) + dot_v3v3(shi->dyco, shi->dyco);
1411                                         sample->filled = 1;
1412                                 }
1413                         }
1414                 }
1415         }
1416         else {
1417                 shi->ao[0] = 1.0f;
1418                 shi->ao[1] = 1.0f;
1419                 shi->ao[2] = 1.0f;
1420
1421                 shi->env[0] = 0.0f;
1422                 shi->env[1] = 0.0f;
1423                 shi->env[2] = 0.0f;
1424
1425                 shi->indirect[0] = 0.0f;
1426                 shi->indirect[1] = 0.0f;
1427                 shi->indirect[2] = 0.0f;
1428         }
1429 }
1430
1431 void cache_occ_samples(Render *re, RenderPart *pa, ShadeSample *ssamp)
1432 {
1433         OcclusionTree *tree = re->occlusiontree;
1434         PixStr ps;
1435         OcclusionCache *cache;
1436         OcclusionCacheSample *sample;
1437         OccFace exclude;
1438         ShadeInput *shi;
1439         intptr_t *rd = NULL;
1440         int *ro = NULL, *rp = NULL, *rz = NULL, onlyshadow;
1441         int x, y, step = CACHE_STEP;
1442
1443         if (!tree->cache)
1444                 return;
1445
1446         cache = &tree->cache[pa->thread];
1447         cache->w = pa->rectx;
1448         cache->h = pa->recty;
1449         cache->x = pa->disprect.xmin;
1450         cache->y = pa->disprect.ymin;
1451         cache->step = step;
1452         cache->sample = MEM_callocN(sizeof(OcclusionCacheSample) * cache->w * cache->h, "OcclusionCacheSample");
1453         sample = cache->sample;
1454
1455         if (re->osa) {
1456                 rd = pa->rectdaps;
1457         }
1458         else {
1459                 /* fake pixel struct for non-osa */
1460                 ps.next = NULL;
1461                 ps.mask = 0xFFFF;
1462
1463                 ro = pa->recto;
1464                 rp = pa->rectp;
1465                 rz = pa->rectz;
1466         }
1467
1468         /* compute a sample at every step pixels */
1469         for (y = pa->disprect.ymin; y < pa->disprect.ymax; y++) {
1470                 for (x = pa->disprect.xmin; x < pa->disprect.xmax; x++, sample++, rd++, ro++, rp++, rz++) {
1471                         if (!(((x - pa->disprect.xmin + step) % step) == 0 || x == pa->disprect.xmax - 1))
1472                                 continue;
1473                         if (!(((y - pa->disprect.ymin + step) % step) == 0 || y == pa->disprect.ymax - 1))
1474                                 continue;
1475
1476                         if (re->osa) {
1477                                 if (!*rd) continue;
1478
1479                                 shade_samples_fill_with_ps(ssamp, (PixStr *)(*rd), x, y);
1480                         }
1481                         else {
1482                                 if (!*rp) continue;
1483
1484                                 ps.obi = *ro;
1485                                 ps.facenr = *rp;
1486                                 ps.z = *rz;
1487                                 shade_samples_fill_with_ps(ssamp, &ps, x, y);
1488                         }
1489
1490                         shi = ssamp->shi;
1491                         if (shi->vlr) {
1492                                 onlyshadow = (shi->mat->mode & MA_ONLYSHADOW);
1493                                 exclude.obi = shi->obi - re->objectinstance;
1494                                 exclude.facenr = shi->vlr->index;
1495                                 sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect);
1496
1497                                 copy_v3_v3(sample->co, shi->co);
1498                                 copy_v3_v3(sample->n, shi->vno);
1499                                 copy_v3_v3(sample->ao, shi->ao);
1500                                 copy_v3_v3(sample->env, shi->env);
1501                                 copy_v3_v3(sample->indirect, shi->indirect);
1502                                 sample->intensity = max_fff(sample->ao[0], sample->ao[1], sample->ao[2]);
1503                                 sample->intensity = max_ff(sample->intensity, max_fff(sample->env[0], sample->env[1], sample->env[2]));
1504                                 sample->intensity = max_ff(sample->intensity, max_fff(sample->indirect[0], sample->indirect[1], sample->indirect[2]));
1505                                 sample->dist2 = dot_v3v3(shi->dxco, shi->dxco) + dot_v3v3(shi->dyco, shi->dyco);
1506                                 sample->x = shi->xs;
1507                                 sample->y = shi->ys;
1508                                 sample->filled = 1;
1509                         }
1510
1511                         if (re->test_break(re->tbh))
1512                                 break;
1513                 }
1514         }
1515 }
1516
1517 void free_occ_samples(Render *re, RenderPart *pa)
1518 {
1519         OcclusionTree *tree = re->occlusiontree;
1520         OcclusionCache *cache;
1521
1522         if (tree->cache) {
1523                 cache = &tree->cache[pa->thread];
1524
1525                 if (cache->sample)
1526                         MEM_freeN(cache->sample);
1527
1528                 cache->w = 0;
1529                 cache->h = 0;
1530                 cache->step = 0;
1531         }
1532 }
1533