More UI message i18n fixes and improvements...
[blender.git] / source / blender / render / intern / source / occlusion.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2008 Blender Foundation.
19  * All rights reserved.
20  *
21  * The Original Code is: all of this file.
22  *
23  * Contributor(s): Brecht Van Lommel.
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 /** \file blender/render/intern/source/occlusion.c
29  *  \ingroup render
30  */
31
32 #include <math.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36
37 #include "MEM_guardedalloc.h"
38
39 #include "DNA_material_types.h"
40
41 #include "BLI_math.h"
42 #include "BLI_blenlib.h"
43 #include "BLI_memarena.h"
44 #include "BLI_threads.h"
45 #include "BLI_utildefines.h"
46
47 #include "BLF_translation.h"
48
49 #include "BKE_global.h"
50 #include "BKE_scene.h"
51
52
53 #include "RE_shader_ext.h"
54
55 /* local includes */
56 #include "occlusion.h"
57 #include "render_types.h"
58 #include "rendercore.h"
59 #include "renderdatabase.h"
60 #include "pixelshading.h"
61 #include "shading.h"
62 #include "zbuf.h"
63
64 /* ------------------------- Declarations --------------------------- */
65
66 #define INVPI ((float)M_1_PI)
67 #define TOTCHILD 8
68 #define CACHE_STEP 3
69
70 typedef struct OcclusionCacheSample {
71         float co[3], n[3], ao[3], env[3], indirect[3], intensity, dist2;
72         int x, y, filled;
73 } OcclusionCacheSample;
74
75 typedef struct OcclusionCache {
76         OcclusionCacheSample *sample;
77         int x, y, w, h, step;
78 } OcclusionCache;
79
80 typedef struct OccFace {
81         int obi;
82         int facenr;
83 } OccFace;
84
85 typedef struct OccNode {
86         float co[3], area;
87         float sh[9], dco;
88         float occlusion, rad[3];
89         int childflag;
90         union {
91                 //OccFace face;
92                 int face;
93                 struct OccNode *node;
94         } child[TOTCHILD];
95 } OccNode;
96
97 typedef struct OcclusionTree {
98         MemArena *arena;
99
100         float (*co)[3];     /* temporary during build */
101
102         OccFace *face;      /* instance and face indices */
103         float *occlusion;   /* occlusion for faces */
104         float (*rad)[3];    /* radiance for faces */
105         
106         OccNode *root;
107
108         OccNode **stack[BLENDER_MAX_THREADS];
109         int maxdepth;
110
111         int totface;
112
113         float error;
114         float distfac;
115
116         int dothreadedbuild;
117         int totbuildthread;
118         int doindirect;
119
120         OcclusionCache *cache;
121 } OcclusionTree;
122
123 typedef struct OcclusionThread {
124         Render *re;
125         StrandSurface *mesh;
126         float (*faceao)[3];
127         float (*faceenv)[3];
128         float (*faceindirect)[3];
129         int begin, end;
130         int thread;
131 } OcclusionThread;
132
133 typedef struct OcclusionBuildThread {
134         OcclusionTree *tree;
135         int begin, end, depth;
136         OccNode *node;
137 } OcclusionBuildThread;
138
139 /* ------------------------- Shading --------------------------- */
140
141 extern Render R; /* meh */
142
143 static void occ_shade(ShadeSample *ssamp, ObjectInstanceRen *obi, VlakRen *vlr, float *rad)
144 {
145         ShadeInput *shi = ssamp->shi;
146         ShadeResult *shr = ssamp->shr;
147         float l, u, v, *v1, *v2, *v3;
148         
149         /* init */
150         if (vlr->v4) {
151                 shi->u = u = 0.5f;
152                 shi->v = v = 0.5f;
153         }
154         else {
155                 shi->u = u = 1.0f / 3.0f;
156                 shi->v = v = 1.0f / 3.0f;
157         }
158
159         /* setup render coordinates */
160         v1 = vlr->v1->co;
161         v2 = vlr->v2->co;
162         v3 = vlr->v3->co;
163         
164         /* renderco */
165         l = 1.0f - u - v;
166         
167         shi->co[0] = l * v3[0] + u * v1[0] + v * v2[0];
168         shi->co[1] = l * v3[1] + u * v1[1] + v * v2[1];
169         shi->co[2] = l * v3[2] + u * v1[2] + v * v2[2];
170
171         shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
172
173         /* set up view vector */
174         copy_v3_v3(shi->view, shi->co);
175         normalize_v3(shi->view);
176         
177         /* cache for shadow */
178         shi->samplenr++;
179
180         shi->xs = 0; /* TODO */
181         shi->ys = 0;
182         
183         shade_input_set_normals(shi);
184
185         /* no normal flip */
186         if (shi->flippednor)
187                 shade_input_flip_normals(shi);
188
189         madd_v3_v3fl(shi->co, shi->facenor, -0.0001f); /* ugly.. */
190
191         /* not a pretty solution, but fixes common cases */
192         if (shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) {
193                 negate_v3(shi->vn);
194                 negate_v3(shi->vno);
195                 negate_v3(shi->nmapnorm);
196         }
197
198         /* init material vars */
199         /* note, keep this synced with render_types.h */
200         memcpy(&shi->r, &shi->mat->r, 23 * sizeof(float));
201         shi->har = shi->mat->har;
202         
203         /* render */
204         shade_input_set_shade_texco(shi);
205         shade_material_loop(shi, shr); /* todo: nodes */
206         
207         copy_v3_v3(rad, shr->combined);
208 }
209
210 static void occ_build_shade(Render *re, OcclusionTree *tree)
211 {
212         ShadeSample ssamp;
213         ObjectInstanceRen *obi;
214         VlakRen *vlr;
215         int a;
216
217         R = *re;
218
219         /* setup shade sample with correct passes */
220         memset(&ssamp, 0, sizeof(ShadeSample));
221         ssamp.shi[0].lay = re->lay;
222         ssamp.shi[0].passflag = SCE_PASS_DIFFUSE | SCE_PASS_RGBA;
223         ssamp.shi[0].combinedflag = ~(SCE_PASS_SPEC);
224         ssamp.tot = 1;
225
226         for (a = 0; a < tree->totface; a++) {
227                 obi = &R.objectinstance[tree->face[a].obi];
228                 vlr = RE_findOrAddVlak(obi->obr, tree->face[a].facenr);
229
230                 occ_shade(&ssamp, obi, vlr, tree->rad[a]);
231         }
232 }
233
234 /* ------------------------- Spherical Harmonics --------------------------- */
235
236 /* Use 2nd order SH => 9 coefficients, stored in this order:
237  * 0 = (0,0),
238  * 1 = (1,-1), 2 = (1,0), 3 = (1,1),
239  * 4 = (2,-2), 5 = (2,-1), 6 = (2,0), 7 = (2,1), 8 = (2,2) */
240
241 static void sh_copy(float *shresult, float *sh)
242 {
243         memcpy(shresult, sh, sizeof(float) * 9);
244 }
245
246 static void sh_mul(float *sh, float f)
247 {
248         int i;
249
250         for (i = 0; i < 9; i++)
251                 sh[i] *= f;
252 }
253
254 static void sh_add(float *shresult, float *sh1, float *sh2)
255 {
256         int i;
257
258         for (i = 0; i < 9; i++)
259                 shresult[i] = sh1[i] + sh2[i];
260 }
261
262 static void sh_from_disc(float *n, float area, float *shresult)
263 {
264         /* See formula (3) in:
265          * "An Efficient Representation for Irradiance Environment Maps" */
266         float sh[9], x, y, z;
267
268         x = n[0];
269         y = n[1];
270         z = n[2];
271
272         sh[0] = 0.282095f;
273
274         sh[1] = 0.488603f * y;
275         sh[2] = 0.488603f * z;
276         sh[3] = 0.488603f * x;
277
278         sh[4] = 1.092548f * x * y;
279         sh[5] = 1.092548f * y * z;
280         sh[6] = 0.315392f * (3.0f * z * z - 1.0f);
281         sh[7] = 1.092548f * x * z;
282         sh[8] = 0.546274f * (x * x - y * y);
283
284         sh_mul(sh, area);
285         sh_copy(shresult, sh);
286 }
287
288 static float sh_eval(float *sh, float *v)
289 {
290         /* See formula (13) in:
291          * "An Efficient Representation for Irradiance Environment Maps" */
292         static const float c1 = 0.429043f, c2 = 0.511664f, c3 = 0.743125f;
293         static const float c4 = 0.886227f, c5 = 0.247708f;
294         float x, y, z, sum;
295
296         x = v[0];
297         y = v[1];
298         z = v[2];
299
300         sum = c1 * sh[8] * (x * x - y * y);
301         sum += c3 * sh[6] * z * z;
302         sum += c4 * sh[0];
303         sum += -c5 * sh[6];
304         sum += 2.0f * c1 * (sh[4] * x * y + sh[7] * x * z + sh[5] * y * z);
305         sum += 2.0f * c2 * (sh[3] * x + sh[1] * y + sh[2] * z);
306
307         return sum;
308 }
309
310 /* ------------------------------ Building --------------------------------- */
311
312 static void occ_face(const OccFace *face, float co[3], float normal[3], float *area)
313 {
314         ObjectInstanceRen *obi;
315         VlakRen *vlr;
316         float v1[3], v2[3], v3[3], v4[3];
317
318         obi = &R.objectinstance[face->obi];
319         vlr = RE_findOrAddVlak(obi->obr, face->facenr);
320         
321         if (co) {
322                 if (vlr->v4)
323                         mid_v3_v3v3(co, vlr->v1->co, vlr->v3->co);
324                 else
325                         cent_tri_v3(co, vlr->v1->co, vlr->v2->co, vlr->v3->co);
326
327                 if (obi->flag & R_TRANSFORMED)
328                         mul_m4_v3(obi->mat, co);
329         }
330         
331         if (normal) {
332                 normal[0] = -vlr->n[0];
333                 normal[1] = -vlr->n[1];
334                 normal[2] = -vlr->n[2];
335
336                 if (obi->flag & R_TRANSFORMED)
337                         mul_m3_v3(obi->nmat, normal);
338         }
339
340         if (area) {
341                 copy_v3_v3(v1, vlr->v1->co);
342                 copy_v3_v3(v2, vlr->v2->co);
343                 copy_v3_v3(v3, vlr->v3->co);
344                 if (vlr->v4) copy_v3_v3(v4, vlr->v4->co);
345
346                 if (obi->flag & R_TRANSFORMED) {
347                         mul_m4_v3(obi->mat, v1);
348                         mul_m4_v3(obi->mat, v2);
349                         mul_m4_v3(obi->mat, v3);
350                         if (vlr->v4) mul_m4_v3(obi->mat, v4);
351                 }
352
353                 /* todo: correct area for instances */
354                 if (vlr->v4)
355                         *area = area_quad_v3(v1, v2, v3, v4);
356                 else
357                         *area = area_tri_v3(v1, v2, v3);
358         }
359 }
360
361 static void occ_sum_occlusion(OcclusionTree *tree, OccNode *node)
362 {
363         OccNode *child;
364         float occ, area, totarea, rad[3];
365         int a, b, indirect = tree->doindirect;
366
367         occ = 0.0f;
368         totarea = 0.0f;
369         if (indirect) zero_v3(rad);
370
371         for (b = 0; b < TOTCHILD; b++) {
372                 if (node->childflag & (1 << b)) {
373                         a = node->child[b].face;
374                         occ_face(&tree->face[a], 0, 0, &area);
375                         occ += area * tree->occlusion[a];
376                         if (indirect) madd_v3_v3fl(rad, tree->rad[a], area);
377                         totarea += area;
378                 }
379                 else if (node->child[b].node) {
380                         child = node->child[b].node;
381                         occ_sum_occlusion(tree, child);
382
383                         occ += child->area * child->occlusion;
384                         if (indirect) madd_v3_v3fl(rad, child->rad, child->area);
385                         totarea += child->area;
386                 }
387         }
388
389         if (totarea != 0.0f) {
390                 occ /= totarea;
391                 if (indirect) mul_v3_fl(rad, 1.0f / totarea);
392         }
393         
394         node->occlusion = occ;
395         if (indirect) copy_v3_v3(node->rad, rad);
396 }
397
398 static int occ_find_bbox_axis(OcclusionTree *tree, int begin, int end, float *min, float *max)
399 {
400         float len, maxlen = -1.0f;
401         int a, axis = 0;
402
403         INIT_MINMAX(min, max);
404
405         for (a = begin; a < end; a++) {
406                 minmax_v3v3_v3(min, max, tree->co[a]);
407         }
408
409         for (a = 0; a < 3; a++) {
410                 len = max[a] - min[a];
411
412                 if (len > maxlen) {
413                         maxlen = len;
414                         axis = a;
415                 }
416         }
417
418         return axis;
419 }
420
421 static void occ_node_from_face(OccFace *face, OccNode *node)
422 {
423         float n[3];
424
425         occ_face(face, node->co, n, &node->area);
426         node->dco = 0.0f;
427         sh_from_disc(n, node->area, node->sh);
428 }
429
430 static void occ_build_dco(OcclusionTree *tree, OccNode *node, const float co[3], float *dco)
431 {
432         int b;
433         for (b = 0; b < TOTCHILD; b++) {
434                 float dist, d[3], nco[3];
435
436                 if (node->childflag & (1 << b)) {
437                         occ_face(tree->face + node->child[b].face, nco, NULL, NULL);
438                 }
439                 else if (node->child[b].node) {
440                         OccNode *child = node->child[b].node;
441                         occ_build_dco(tree, child, co, dco);
442                         copy_v3_v3(nco, child->co);
443                 }
444                 else {
445                         continue;
446                 }
447
448                 sub_v3_v3v3(d, nco, co);
449                 dist = dot_v3v3(d, d);
450                 if (dist > *dco)
451                         *dco = dist;
452         }
453 }
454
455 static void occ_build_split(OcclusionTree *tree, int begin, int end, int *split)
456 {
457         float min[3], max[3], mid;
458         int axis, a, enda;
459
460         /* split in middle of boundbox. this seems faster than median split
461          * on complex scenes, possibly since it avoids two distant faces to
462          * be in the same node better? */
463         axis = occ_find_bbox_axis(tree, begin, end, min, max);
464         mid = 0.5f * (min[axis] + max[axis]);
465
466         a = begin;
467         enda = end;
468         while (a < enda) {
469                 if (tree->co[a][axis] > mid) {
470                         enda--;
471                         SWAP(OccFace, tree->face[a], tree->face[enda]);
472                         SWAP(float, tree->co[a][0], tree->co[enda][0]);
473                         SWAP(float, tree->co[a][1], tree->co[enda][1]);
474                         SWAP(float, tree->co[a][2], tree->co[enda][2]);
475                 }
476                 else
477                         a++;
478         }
479
480         *split = enda;
481 }
482
483 static void occ_build_8_split(OcclusionTree *tree, int begin, int end, int *offset, int *count)
484 {
485         /* split faces into eight groups */
486         int b, splitx, splity[2], splitz[4];
487
488         occ_build_split(tree, begin, end, &splitx);
489
490         /* force split if none found, to deal with degenerate geometry */
491         if (splitx == begin || splitx == end)
492                 splitx = (begin + end) / 2;
493
494         occ_build_split(tree, begin, splitx, &splity[0]);
495         occ_build_split(tree, splitx, end, &splity[1]);
496
497         occ_build_split(tree, begin, splity[0], &splitz[0]);
498         occ_build_split(tree, splity[0], splitx, &splitz[1]);
499         occ_build_split(tree, splitx, splity[1], &splitz[2]);
500         occ_build_split(tree, splity[1], end, &splitz[3]);
501
502         offset[0] = begin;
503         offset[1] = splitz[0];
504         offset[2] = splity[0];
505         offset[3] = splitz[1];
506         offset[4] = splitx;
507         offset[5] = splitz[2];
508         offset[6] = splity[1];
509         offset[7] = splitz[3];
510
511         for (b = 0; b < 7; b++)
512                 count[b] = offset[b + 1] - offset[b];
513         count[7] = end - offset[7];
514 }
515
516 static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth);
517
518 static void *exec_occ_build(void *data)
519 {
520         OcclusionBuildThread *othread = (OcclusionBuildThread *)data;
521
522         occ_build_recursive(othread->tree, othread->node, othread->begin, othread->end, othread->depth);
523
524         return 0;
525 }
526
527 static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth)
528 {
529         ListBase threads;
530         OcclusionBuildThread othreads[BLENDER_MAX_THREADS];
531         OccNode *child, tmpnode;
532         /* OccFace *face; */
533         int a, b, totthread = 0, offset[TOTCHILD], count[TOTCHILD];
534
535         /* add a new node */
536         node->occlusion = 1.0f;
537
538         /* leaf node with only children */
539         if (end - begin <= TOTCHILD) {
540                 for (a = begin, b = 0; a < end; a++, b++) {
541                         /* face= &tree->face[a]; */
542                         node->child[b].face = a;
543                         node->childflag |= (1 << b);
544                 }
545         }
546         else {
547                 /* order faces */
548                 occ_build_8_split(tree, begin, end, offset, count);
549
550                 if (depth == 1 && tree->dothreadedbuild)
551                         BLI_init_threads(&threads, exec_occ_build, tree->totbuildthread);
552
553                 for (b = 0; b < TOTCHILD; b++) {
554                         if (count[b] == 0) {
555                                 node->child[b].node = NULL;
556                         }
557                         else if (count[b] == 1) {
558                                 /* face= &tree->face[offset[b]]; */
559                                 node->child[b].face = offset[b];
560                                 node->childflag |= (1 << b);
561                         }
562                         else {
563                                 if (tree->dothreadedbuild)
564                                         BLI_lock_thread(LOCK_CUSTOM1);
565
566                                 child = BLI_memarena_alloc(tree->arena, sizeof(OccNode));
567                                 node->child[b].node = child;
568
569                                 /* keep track of maximum depth for stack */
570                                 if (depth + 1 > tree->maxdepth)
571                                         tree->maxdepth = depth + 1;
572
573                                 if (tree->dothreadedbuild)
574                                         BLI_unlock_thread(LOCK_CUSTOM1);
575
576                                 if (depth == 1 && tree->dothreadedbuild) {
577                                         othreads[totthread].tree = tree;
578                                         othreads[totthread].node = child;
579                                         othreads[totthread].begin = offset[b];
580                                         othreads[totthread].end = offset[b] + count[b];
581                                         othreads[totthread].depth = depth + 1;
582                                         BLI_insert_thread(&threads, &othreads[totthread]);
583                                         totthread++;
584                                 }
585                                 else
586                                         occ_build_recursive(tree, child, offset[b], offset[b] + count[b], depth + 1);
587                         }
588                 }
589
590                 if (depth == 1 && tree->dothreadedbuild)
591                         BLI_end_threads(&threads);
592         }
593
594         /* combine area, position and sh */
595         for (b = 0; b < TOTCHILD; b++) {
596                 if (node->childflag & (1 << b)) {
597                         child = &tmpnode;
598                         occ_node_from_face(tree->face + node->child[b].face, &tmpnode);
599                 }
600                 else {
601                         child = node->child[b].node;
602                 }
603
604                 if (child) {
605                         node->area += child->area;
606                         sh_add(node->sh, node->sh, child->sh);
607                         madd_v3_v3fl(node->co, child->co, child->area);
608                 }
609         }
610
611         if (node->area != 0.0f)
612                 mul_v3_fl(node->co, 1.0f / node->area);
613
614         /* compute maximum distance from center */
615         node->dco = 0.0f;
616         if (node->area > 0.0f)
617                 occ_build_dco(tree, node, node->co, &node->dco);
618 }
619
620 static void occ_build_sh_normalize(OccNode *node)
621 {
622         /* normalize spherical harmonics to not include area, so
623          * we can clamp the dot product and then mutliply by area */
624         int b;
625
626         if (node->area != 0.0f)
627                 sh_mul(node->sh, 1.0f / node->area);
628
629         for (b = 0; b < TOTCHILD; b++) {
630                 if (node->childflag & (1 << b)) {
631                         /* pass */
632                 }
633                 else if (node->child[b].node) {
634                         occ_build_sh_normalize(node->child[b].node);
635                 }
636         }
637 }
638
639 static OcclusionTree *occ_tree_build(Render *re)
640 {
641         OcclusionTree *tree;
642         ObjectInstanceRen *obi;
643         ObjectRen *obr;
644         Material *ma;
645         VlakRen *vlr = NULL;
646         int a, b, c, totface;
647
648         /* count */
649         totface = 0;
650         for (obi = re->instancetable.first; obi; obi = obi->next) {
651                 obr = obi->obr;
652                 for (a = 0; a < obr->totvlak; a++) {
653                         if ((a & 255) == 0) vlr = obr->vlaknodes[a >> 8].vlak;
654                         else vlr++;
655
656                         ma = vlr->mat;
657
658                         if ((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE))
659                                 totface++;
660                 }
661         }
662
663         if (totface == 0)
664                 return NULL;
665         
666         tree = MEM_callocN(sizeof(OcclusionTree), "OcclusionTree");
667         tree->totface = totface;
668
669         /* parameters */
670         tree->error = get_render_aosss_error(&re->r, re->wrld.ao_approx_error);
671         tree->distfac = (re->wrld.aomode & WO_AODIST) ? re->wrld.aodistfac : 0.0f;
672         tree->doindirect = (re->wrld.ao_indirect_energy > 0.0f && re->wrld.ao_indirect_bounces > 0);
673
674         /* allocation */
675         tree->arena = BLI_memarena_new(0x8000 * sizeof(OccNode), "occ tree arena");
676         BLI_memarena_use_calloc(tree->arena);
677
678         if (re->wrld.aomode & WO_AOCACHE)
679                 tree->cache = MEM_callocN(sizeof(OcclusionCache) * BLENDER_MAX_THREADS, "OcclusionCache");
680
681         tree->face = MEM_callocN(sizeof(OccFace) * totface, "OcclusionFace");
682         tree->co = MEM_callocN(sizeof(float) * 3 * totface, "OcclusionCo");
683         tree->occlusion = MEM_callocN(sizeof(float) * totface, "OcclusionOcclusion");
684
685         if (tree->doindirect)
686                 tree->rad = MEM_callocN(sizeof(float) * 3 * totface, "OcclusionRad");
687
688         /* make array of face pointers */
689         for (b = 0, c = 0, obi = re->instancetable.first; obi; obi = obi->next, c++) {
690                 obr = obi->obr;
691                 for (a = 0; a < obr->totvlak; a++) {
692                         if ((a & 255) == 0) vlr = obr->vlaknodes[a >> 8].vlak;
693                         else vlr++;
694
695                         ma = vlr->mat;
696
697                         if ((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE)) {
698                                 tree->face[b].obi = c;
699                                 tree->face[b].facenr = a;
700                                 tree->occlusion[b] = 1.0f;
701                                 occ_face(&tree->face[b], tree->co[b], NULL, NULL); 
702                                 b++;
703                         }
704                 }
705         }
706
707         /* threads */
708         tree->totbuildthread = (re->r.threads > 1 && totface > 10000) ? 8 : 1;
709         tree->dothreadedbuild = (tree->totbuildthread > 1);
710
711         /* recurse */
712         tree->root = BLI_memarena_alloc(tree->arena, sizeof(OccNode));
713         tree->maxdepth = 1;
714         occ_build_recursive(tree, tree->root, 0, totface, 1);
715
716         if (tree->doindirect) {
717                 occ_build_shade(re, tree);
718                 occ_sum_occlusion(tree, tree->root);
719         }
720         
721         MEM_freeN(tree->co);
722         tree->co = NULL;
723
724         occ_build_sh_normalize(tree->root);
725
726         for (a = 0; a < BLENDER_MAX_THREADS; a++)
727                 tree->stack[a] = MEM_callocN(sizeof(OccNode) * TOTCHILD * (tree->maxdepth + 1), "OccStack");
728
729         return tree;
730 }
731
732 static void occ_free_tree(OcclusionTree *tree)
733 {
734         int a;
735
736         if (tree) {
737                 if (tree->arena) BLI_memarena_free(tree->arena);
738                 for (a = 0; a < BLENDER_MAX_THREADS; a++)
739                         if (tree->stack[a])
740                                 MEM_freeN(tree->stack[a]);
741                 if (tree->occlusion) MEM_freeN(tree->occlusion);
742                 if (tree->cache) MEM_freeN(tree->cache);
743                 if (tree->face) MEM_freeN(tree->face);
744                 if (tree->rad) MEM_freeN(tree->rad);
745                 MEM_freeN(tree);
746         }
747 }
748
749 /* ------------------------- Traversal --------------------------- */
750
751 static float occ_solid_angle(OccNode *node, const float v[3], float d2, float invd2, const float receivenormal[3])
752 {
753         float dotreceive, dotemit;
754         float ev[3];
755
756         ev[0] = -v[0] * invd2;
757         ev[1] = -v[1] * invd2;
758         ev[2] = -v[2] * invd2;
759         dotemit = sh_eval(node->sh, ev);
760         dotreceive = dot_v3v3(receivenormal, v) * invd2;
761
762         CLAMP(dotemit, 0.0f, 1.0f);
763         CLAMP(dotreceive, 0.0f, 1.0f);
764         
765         return ((node->area * dotemit * dotreceive) / (d2 + node->area * INVPI)) * INVPI;
766 }
767
768 static float occ_form_factor(OccFace *face, float *p, float *n)
769 {
770         ObjectInstanceRen *obi;
771         VlakRen *vlr;
772         float v1[3], v2[3], v3[3], v4[3], q0[3], q1[3], q2[3], q3[3], contrib = 0.0f;
773
774         obi = &R.objectinstance[face->obi];
775         vlr = RE_findOrAddVlak(obi->obr, face->facenr);
776
777         copy_v3_v3(v1, vlr->v1->co);
778         copy_v3_v3(v2, vlr->v2->co);
779         copy_v3_v3(v3, vlr->v3->co);
780
781         if (obi->flag & R_TRANSFORMED) {
782                 mul_m4_v3(obi->mat, v1);
783                 mul_m4_v3(obi->mat, v2);
784                 mul_m4_v3(obi->mat, v3);
785         }
786
787         if (form_factor_visible_quad(p, n, v1, v2, v3, q0, q1, q2, q3))
788                 contrib += form_factor_quad(p, n, q0, q1, q2, q3);
789
790         if (vlr->v4) {
791                 copy_v3_v3(v4, vlr->v4->co);
792                 if (obi->flag & R_TRANSFORMED)
793                         mul_m4_v3(obi->mat, v4);
794
795                 if (form_factor_visible_quad(p, n, v1, v3, v4, q0, q1, q2, q3))
796                         contrib += form_factor_quad(p, n, q0, q1, q2, q3);
797         }
798
799         return contrib;
800 }
801
802 static void occ_lookup(OcclusionTree *tree, int thread, OccFace *exclude,
803                        const float pp[3], const float pn[3], float *occ, float rad[3], float bentn[3])
804 {
805         OccNode *node, **stack;
806         OccFace *face;
807         float resultocc, resultrad[3], v[3], p[3], n[3], co[3], invd2;
808         float distfac, fac, error, d2, weight, emitarea;
809         int b, f, totstack;
810
811         /* init variables */
812         copy_v3_v3(p, pp);
813         copy_v3_v3(n, pn);
814         madd_v3_v3fl(p, n, 1e-4f);
815
816         if (bentn)
817                 copy_v3_v3(bentn, n);
818         
819         error = tree->error;
820         distfac = tree->distfac;
821
822         resultocc = 0.0f;
823         zero_v3(resultrad);
824
825         /* init stack */
826         stack = tree->stack[thread];
827         stack[0] = tree->root;
828         totstack = 1;
829
830         while (totstack) {
831                 /* pop point off the stack */
832                 node = stack[--totstack];
833
834                 sub_v3_v3v3(v, node->co, p);
835                 d2 = dot_v3v3(v, v) + 1e-16f;
836                 emitarea = MAX2(node->area, node->dco);
837
838                 if (d2 * error > emitarea) {
839                         if (distfac != 0.0f) {
840                                 fac = 1.0f / (1.0f + distfac * d2);
841                                 if (fac < 0.01f)
842                                         continue;
843                         }
844                         else
845                                 fac = 1.0f;
846
847                         /* accumulate occlusion from spherical harmonics */
848                         invd2 = 1.0f / sqrtf(d2);
849                         weight = occ_solid_angle(node, v, d2, invd2, n);
850
851                         if (rad)
852                                 madd_v3_v3fl(resultrad, node->rad, weight * fac);
853
854                         weight *= node->occlusion;
855
856                         if (bentn) {
857                                 bentn[0] -= weight * invd2 * v[0];
858                                 bentn[1] -= weight * invd2 * v[1];
859                                 bentn[2] -= weight * invd2 * v[2];
860                         }
861
862                         resultocc += weight * fac;
863                 }
864                 else {
865                         /* traverse into children */
866                         for (b = 0; b < TOTCHILD; b++) {
867                                 if (node->childflag & (1 << b)) {
868                                         f = node->child[b].face;
869                                         face = &tree->face[f];
870
871                                         /* accumulate occlusion with face form factor */
872                                         if (!exclude || !(face->obi == exclude->obi && face->facenr == exclude->facenr)) {
873                                                 if (bentn || distfac != 0.0f) {
874                                                         occ_face(face, co, NULL, NULL); 
875                                                         sub_v3_v3v3(v, co, p);
876                                                         d2 = dot_v3v3(v, v) + 1e-16f;
877
878                                                         fac = (distfac == 0.0f) ? 1.0f : 1.0f / (1.0f + distfac * d2);
879                                                         if (fac < 0.01f)
880                                                                 continue;
881                                                 }
882                                                 else
883                                                         fac = 1.0f;
884
885                                                 weight = occ_form_factor(face, p, n);
886
887                                                 if (rad)
888                                                         madd_v3_v3fl(resultrad, tree->rad[f], weight * fac);
889
890                                                 weight *= tree->occlusion[f];
891
892                                                 if (bentn) {
893                                                         invd2 = 1.0f / sqrtf(d2);
894                                                         bentn[0] -= weight * invd2 * v[0];
895                                                         bentn[1] -= weight * invd2 * v[1];
896                                                         bentn[2] -= weight * invd2 * v[2];
897                                                 }
898
899                                                 resultocc += weight * fac;
900                                         }
901                                 }
902                                 else if (node->child[b].node) {
903                                         /* push child on the stack */
904                                         stack[totstack++] = node->child[b].node;
905                                 }
906                         }
907                 }
908         }
909
910         if (occ) *occ = resultocc;
911         if (rad) copy_v3_v3(rad, resultrad);
912 #if 0
913         if (rad && exclude) {
914                 int a;
915                 for (a = 0; a < tree->totface; a++)
916                         if ((tree->face[a].obi == exclude->obi && tree->face[a].facenr == exclude->facenr))
917                                 copy_v3_v3(rad, tree->rad[a]);
918         }
919 #endif
920         if (bentn) normalize_v3(bentn);
921 }
922
923 static void occ_compute_bounces(Render *re, OcclusionTree *tree, int totbounce)
924 {
925         float (*rad)[3], (*sum)[3], (*tmp)[3], co[3], n[3], occ;
926         int bounce, i;
927
928         rad = MEM_callocN(sizeof(float) * 3 * tree->totface, "OcclusionBounceRad");
929         sum = MEM_dupallocN(tree->rad);
930
931         for (bounce = 1; bounce < totbounce; bounce++) {
932                 for (i = 0; i < tree->totface; i++) {
933                         occ_face(&tree->face[i], co, n, NULL);
934                         madd_v3_v3fl(co, n, 1e-8f);
935
936                         occ_lookup(tree, 0, &tree->face[i], co, n, &occ, rad[i], NULL);
937                         rad[i][0] = MAX2(rad[i][0], 0.0f);
938                         rad[i][1] = MAX2(rad[i][1], 0.0f);
939                         rad[i][2] = MAX2(rad[i][2], 0.0f);
940                         add_v3_v3(sum[i], rad[i]);
941
942                         if (re->test_break(re->tbh))
943                                 break;
944                 }
945
946                 if (re->test_break(re->tbh))
947                         break;
948
949                 tmp = tree->rad;
950                 tree->rad = rad;
951                 rad = tmp;
952
953                 occ_sum_occlusion(tree, tree->root);
954         }
955
956         MEM_freeN(rad);
957         MEM_freeN(tree->rad);
958         tree->rad = sum;
959
960         if (!re->test_break(re->tbh))
961                 occ_sum_occlusion(tree, tree->root);
962 }
963
964 static void occ_compute_passes(Render *re, OcclusionTree *tree, int totpass)
965 {
966         float *occ, co[3], n[3];
967         int pass, i;
968         
969         occ = MEM_callocN(sizeof(float) * tree->totface, "OcclusionPassOcc");
970
971         for (pass = 0; pass < totpass; pass++) {
972                 for (i = 0; i < tree->totface; i++) {
973                         occ_face(&tree->face[i], co, n, NULL);
974                         negate_v3(n);
975                         madd_v3_v3fl(co, n, 1e-8f);
976
977                         occ_lookup(tree, 0, &tree->face[i], co, n, &occ[i], NULL, NULL);
978                         if (re->test_break(re->tbh))
979                                 break;
980                 }
981
982                 if (re->test_break(re->tbh))
983                         break;
984
985                 for (i = 0; i < tree->totface; i++) {
986                         tree->occlusion[i] -= occ[i]; //MAX2(1.0f-occ[i], 0.0f);
987                         if (tree->occlusion[i] < 0.0f)
988                                 tree->occlusion[i] = 0.0f;
989                 }
990
991                 occ_sum_occlusion(tree, tree->root);
992         }
993
994         MEM_freeN(occ);
995 }
996
997 static void sample_occ_tree(Render *re, OcclusionTree *tree, OccFace *exclude,
998                             const float co[3], const float n[3], int thread, int onlyshadow,
999                             float *ao, float *env, float *indirect)
1000 {
1001         float nn[3], bn[3], fac, occ, occlusion, correction, rad[3];
1002         int envcolor;
1003
1004         envcolor = re->wrld.aocolor;
1005         if (onlyshadow)
1006                 envcolor = WO_AOPLAIN;
1007
1008         negate_v3_v3(nn, n);
1009
1010         occ_lookup(tree, thread, exclude, co, nn, &occ, (tree->doindirect) ? rad : NULL, (env && envcolor) ? bn : NULL);
1011
1012         correction = re->wrld.ao_approx_correction;
1013
1014         occlusion = (1.0f - correction) * (1.0f - occ);
1015         CLAMP(occlusion, 0.0f, 1.0f);
1016         if (correction != 0.0f)
1017                 occlusion += correction * expf(-occ);
1018
1019         if (env) {
1020                 /* sky shading using bent normal */
1021                 if (ELEM(envcolor, WO_AOSKYCOL, WO_AOSKYTEX)) {
1022                         fac = 0.5f * (1.0f + dot_v3v3(bn, re->grvec));
1023                         env[0] = (1.0f - fac) * re->wrld.horr + fac * re->wrld.zenr;
1024                         env[1] = (1.0f - fac) * re->wrld.horg + fac * re->wrld.zeng;
1025                         env[2] = (1.0f - fac) * re->wrld.horb + fac * re->wrld.zenb;
1026
1027                         mul_v3_fl(env, occlusion);
1028                 }
1029                 else {
1030                         env[0] = occlusion;
1031                         env[1] = occlusion;
1032                         env[2] = occlusion;
1033                 }
1034 #if 0
1035                 else {  /* WO_AOSKYTEX */
1036                         float dxyview[3];
1037                         bn[0] = -bn[0];
1038                         bn[1] = -bn[1];
1039                         bn[2] = -bn[2];
1040                         dxyview[0] = 1.0f;
1041                         dxyview[1] = 1.0f;
1042                         dxyview[2] = 0.0f;
1043                         shadeSkyView(ao, co, bn, dxyview);
1044                 }
1045 #endif
1046         }
1047
1048         if (ao) {
1049                 ao[0] = occlusion;
1050                 ao[1] = occlusion;
1051                 ao[2] = occlusion;
1052         }
1053
1054         if (tree->doindirect) copy_v3_v3(indirect, rad);
1055         else zero_v3(indirect);
1056 }
1057
1058 /* ---------------------------- Caching ------------------------------- */
1059
1060 static OcclusionCacheSample *find_occ_sample(OcclusionCache *cache, int x, int y)
1061 {
1062         x -= cache->x;
1063         y -= cache->y;
1064
1065         x /= cache->step;
1066         y /= cache->step;
1067         x *= cache->step;
1068         y *= cache->step;
1069
1070         if (x < 0 || x >= cache->w || y < 0 || y >= cache->h)
1071                 return NULL;
1072         else
1073                 return &cache->sample[y * cache->w + x];
1074 }
1075
1076 static int sample_occ_cache(OcclusionTree *tree, float *co, float *n, int x, int y, int thread, float *ao, float *env, float *indirect)
1077 {
1078         OcclusionCache *cache;
1079         OcclusionCacheSample *samples[4], *sample;
1080         float wn[4], wz[4], wb[4], tx, ty, w, totw, mino, maxo;
1081         float d[3], dist2;
1082         int i, x1, y1, x2, y2;
1083
1084         if (!tree->cache)
1085                 return 0;
1086         
1087         /* first try to find a sample in the same pixel */
1088         cache = &tree->cache[thread];
1089
1090         if (cache->sample && cache->step) {
1091                 sample = &cache->sample[(y - cache->y) * cache->w + (x - cache->x)];
1092                 if (sample->filled) {
1093                         sub_v3_v3v3(d, sample->co, co);
1094                         dist2 = dot_v3v3(d, d);
1095                         if (dist2 < 0.5f * sample->dist2 && dot_v3v3(sample->n, n) > 0.98f) {
1096                                 copy_v3_v3(ao, sample->ao);
1097                                 copy_v3_v3(env, sample->env);
1098                                 copy_v3_v3(indirect, sample->indirect);
1099                                 return 1;
1100                         }
1101                 }
1102         }
1103         else
1104                 return 0;
1105
1106         /* try to interpolate between 4 neighboring pixels */
1107         samples[0] = find_occ_sample(cache, x, y);
1108         samples[1] = find_occ_sample(cache, x + cache->step, y);
1109         samples[2] = find_occ_sample(cache, x, y + cache->step);
1110         samples[3] = find_occ_sample(cache, x + cache->step, y + cache->step);
1111
1112         for (i = 0; i < 4; i++)
1113                 if (!samples[i] || !samples[i]->filled)
1114                         return 0;
1115
1116         /* require intensities not being too different */
1117         mino = min_ffff(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
1118         maxo = max_ffff(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
1119
1120         if (maxo - mino > 0.05f)
1121                 return 0;
1122
1123         /* compute weighted interpolation between samples */
1124         zero_v3(ao);
1125         zero_v3(env);
1126         zero_v3(indirect);
1127         totw = 0.0f;
1128
1129         x1 = samples[0]->x;
1130         y1 = samples[0]->y;
1131         x2 = samples[3]->x;
1132         y2 = samples[3]->y;
1133
1134         tx = (float)(x2 - x) / (float)(x2 - x1);
1135         ty = (float)(y2 - y) / (float)(y2 - y1);
1136
1137         wb[3] = (1.0f - tx) * (1.0f - ty);
1138         wb[2] = (tx) * (1.0f - ty);
1139         wb[1] = (1.0f - tx) * (ty);
1140         wb[0] = tx * ty;
1141
1142         for (i = 0; i < 4; i++) {
1143                 sub_v3_v3v3(d, samples[i]->co, co);
1144                 //dist2 = dot_v3v3(d, d);
1145
1146                 wz[i] = 1.0f; //(samples[i]->dist2/(1e-4f + dist2));
1147                 wn[i] = pow(dot_v3v3(samples[i]->n, n), 32.0f);
1148
1149                 w = wb[i] * wn[i] * wz[i];
1150
1151                 totw += w;
1152                 madd_v3_v3fl(ao, samples[i]->ao, w);
1153                 madd_v3_v3fl(env, samples[i]->env, w);
1154                 madd_v3_v3fl(indirect, samples[i]->indirect, w);
1155         }
1156
1157         if (totw >= 0.9f) {
1158                 totw = 1.0f / totw;
1159                 mul_v3_fl(ao, totw);
1160                 mul_v3_fl(env, totw);
1161                 mul_v3_fl(indirect, totw);
1162                 return 1;
1163         }
1164
1165         return 0;
1166 }
1167
1168 static void sample_occ_surface(ShadeInput *shi)
1169 {
1170         StrandRen *strand = shi->strand;
1171         StrandSurface *mesh = strand->buffer->surface;
1172         int *face, *index = RE_strandren_get_face(shi->obr, strand, 0);
1173         float w[4], *co1, *co2, *co3, *co4;
1174
1175         if (mesh && mesh->face && mesh->co && mesh->ao && index) {
1176                 face = mesh->face[*index];
1177
1178                 co1 = mesh->co[face[0]];
1179                 co2 = mesh->co[face[1]];
1180                 co3 = mesh->co[face[2]];
1181                 co4 = (face[3]) ? mesh->co[face[3]] : NULL;
1182
1183                 interp_weights_face_v3(w, co1, co2, co3, co4, strand->vert->co);
1184
1185                 zero_v3(shi->ao);
1186                 zero_v3(shi->env);
1187                 zero_v3(shi->indirect);
1188
1189                 madd_v3_v3fl(shi->ao, mesh->ao[face[0]], w[0]);
1190                 madd_v3_v3fl(shi->env, mesh->env[face[0]], w[0]);
1191                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[0]], w[0]);
1192                 madd_v3_v3fl(shi->ao, mesh->ao[face[1]], w[1]);
1193                 madd_v3_v3fl(shi->env, mesh->env[face[1]], w[1]);
1194                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[1]], w[1]);
1195                 madd_v3_v3fl(shi->ao, mesh->ao[face[2]], w[2]);
1196                 madd_v3_v3fl(shi->env, mesh->env[face[2]], w[2]);
1197                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[2]], w[2]);
1198                 if (face[3]) {
1199                         madd_v3_v3fl(shi->ao, mesh->ao[face[3]], w[3]);
1200                         madd_v3_v3fl(shi->env, mesh->env[face[3]], w[3]);
1201                         madd_v3_v3fl(shi->indirect, mesh->indirect[face[3]], w[3]);
1202                 }
1203         }
1204         else {
1205                 shi->ao[0] = 1.0f;
1206                 shi->ao[1] = 1.0f;
1207                 shi->ao[2] = 1.0f;
1208                 zero_v3(shi->env);
1209                 zero_v3(shi->indirect);
1210         }
1211 }
1212
1213 /* ------------------------- External Functions --------------------------- */
1214
1215 static void *exec_strandsurface_sample(void *data)
1216 {
1217         OcclusionThread *othread = (OcclusionThread *)data;
1218         Render *re = othread->re;
1219         StrandSurface *mesh = othread->mesh;
1220         float ao[3], env[3], indirect[3], co[3], n[3], *co1, *co2, *co3, *co4;
1221         int a, *face;
1222
1223         for (a = othread->begin; a < othread->end; a++) {
1224                 face = mesh->face[a];
1225                 co1 = mesh->co[face[0]];
1226                 co2 = mesh->co[face[1]];
1227                 co3 = mesh->co[face[2]];
1228
1229                 if (face[3]) {
1230                         co4 = mesh->co[face[3]];
1231
1232                         mid_v3_v3v3(co, co1, co3);
1233                         normal_quad_v3(n, co1, co2, co3, co4);
1234                 }
1235                 else {
1236                         cent_tri_v3(co, co1, co2, co3);
1237                         normal_tri_v3(n, co1, co2, co3);
1238                 }
1239                 negate_v3(n);
1240
1241                 sample_occ_tree(re, re->occlusiontree, NULL, co, n, othread->thread, 0, ao, env, indirect);
1242                 copy_v3_v3(othread->faceao[a], ao);
1243                 copy_v3_v3(othread->faceenv[a], env);
1244                 copy_v3_v3(othread->faceindirect[a], indirect);
1245         }
1246
1247         return 0;
1248 }
1249
1250 void make_occ_tree(Render *re)
1251 {
1252         OcclusionThread othreads[BLENDER_MAX_THREADS];
1253         OcclusionTree *tree;
1254         StrandSurface *mesh;
1255         ListBase threads;
1256         float ao[3], env[3], indirect[3], (*faceao)[3], (*faceenv)[3], (*faceindirect)[3];
1257         int a, totface, totthread, *face, *count;
1258
1259         /* ugly, needed for occ_face */
1260         R = *re;
1261
1262         re->i.infostr = IFACE_("Occlusion preprocessing");
1263         re->stats_draw(re->sdh, &re->i);
1264         
1265         re->occlusiontree = tree = occ_tree_build(re);
1266         
1267         if (tree) {
1268                 if (re->wrld.ao_approx_passes > 0)
1269                         occ_compute_passes(re, tree, re->wrld.ao_approx_passes);
1270                 if (tree->doindirect && (re->wrld.mode & WO_INDIRECT_LIGHT))
1271                         occ_compute_bounces(re, tree, re->wrld.ao_indirect_bounces);
1272
1273                 for (mesh = re->strandsurface.first; mesh; mesh = mesh->next) {
1274                         if (!mesh->face || !mesh->co || !mesh->ao)
1275                                 continue;
1276
1277                         count = MEM_callocN(sizeof(int) * mesh->totvert, "OcclusionCount");
1278                         faceao = MEM_callocN(sizeof(float) * 3 * mesh->totface, "StrandSurfFaceAO");
1279                         faceenv = MEM_callocN(sizeof(float) * 3 * mesh->totface, "StrandSurfFaceEnv");
1280                         faceindirect = MEM_callocN(sizeof(float) * 3 * mesh->totface, "StrandSurfFaceIndirect");
1281
1282                         totthread = (mesh->totface > 10000) ? re->r.threads : 1;
1283                         totface = mesh->totface / totthread;
1284                         for (a = 0; a < totthread; a++) {
1285                                 othreads[a].re = re;
1286                                 othreads[a].faceao = faceao;
1287                                 othreads[a].faceenv = faceenv;
1288                                 othreads[a].faceindirect = faceindirect;
1289                                 othreads[a].thread = a;
1290                                 othreads[a].mesh = mesh;
1291                                 othreads[a].begin = a * totface;
1292                                 othreads[a].end = (a == totthread - 1) ? mesh->totface : (a + 1) * totface;
1293                         }
1294
1295                         if (totthread == 1) {
1296                                 exec_strandsurface_sample(&othreads[0]);
1297                         }
1298                         else {
1299                                 BLI_init_threads(&threads, exec_strandsurface_sample, totthread);
1300
1301                                 for (a = 0; a < totthread; a++)
1302                                         BLI_insert_thread(&threads, &othreads[a]);
1303
1304                                 BLI_end_threads(&threads);
1305                         }
1306
1307                         for (a = 0; a < mesh->totface; a++) {
1308                                 face = mesh->face[a];
1309
1310                                 copy_v3_v3(ao, faceao[a]);
1311                                 copy_v3_v3(env, faceenv[a]);
1312                                 copy_v3_v3(indirect, faceindirect[a]);
1313
1314                                 add_v3_v3(mesh->ao[face[0]], ao);
1315                                 add_v3_v3(mesh->env[face[0]], env);
1316                                 add_v3_v3(mesh->indirect[face[0]], indirect);
1317                                 count[face[0]]++;
1318                                 add_v3_v3(mesh->ao[face[1]], ao);
1319                                 add_v3_v3(mesh->env[face[1]], env);
1320                                 add_v3_v3(mesh->indirect[face[1]], indirect);
1321                                 count[face[1]]++;
1322                                 add_v3_v3(mesh->ao[face[2]], ao);
1323                                 add_v3_v3(mesh->env[face[2]], env);
1324                                 add_v3_v3(mesh->indirect[face[2]], indirect);
1325                                 count[face[2]]++;
1326
1327                                 if (face[3]) {
1328                                         add_v3_v3(mesh->ao[face[3]], ao);
1329                                         add_v3_v3(mesh->env[face[3]], env);
1330                                         add_v3_v3(mesh->indirect[face[3]], indirect);
1331                                         count[face[3]]++;
1332                                 }
1333                         }
1334
1335                         for (a = 0; a < mesh->totvert; a++) {
1336                                 if (count[a]) {
1337                                         mul_v3_fl(mesh->ao[a], 1.0f / count[a]);
1338                                         mul_v3_fl(mesh->env[a], 1.0f / count[a]);
1339                                         mul_v3_fl(mesh->indirect[a], 1.0f / count[a]);
1340                                 }
1341                         }
1342
1343                         MEM_freeN(count);
1344                         MEM_freeN(faceao);
1345                         MEM_freeN(faceenv);
1346                         MEM_freeN(faceindirect);
1347                 }
1348         }
1349 }
1350
1351 void free_occ(Render *re)
1352 {
1353         if (re->occlusiontree) {
1354                 occ_free_tree(re->occlusiontree);
1355                 re->occlusiontree = NULL;
1356         }
1357 }
1358
1359 void sample_occ(Render *re, ShadeInput *shi)
1360 {
1361         OcclusionTree *tree = re->occlusiontree;
1362         OcclusionCache *cache;
1363         OcclusionCacheSample *sample;
1364         OccFace exclude;
1365         int onlyshadow;
1366
1367         if (tree) {
1368                 if (shi->strand) {
1369                         sample_occ_surface(shi);
1370                 }
1371                 /* try to get result from the cache if possible */
1372                 else if (shi->depth != 0 || !sample_occ_cache(tree, shi->co, shi->vno, shi->xs, shi->ys, shi->thread, shi->ao, shi->env, shi->indirect)) {
1373                         /* no luck, let's sample the occlusion */
1374                         exclude.obi = shi->obi - re->objectinstance;
1375                         exclude.facenr = shi->vlr->index;
1376                         onlyshadow = (shi->mat->mode & MA_ONLYSHADOW);
1377                         sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect);
1378
1379                         /* fill result into sample, each time */
1380                         if (tree->cache) {
1381                                 cache = &tree->cache[shi->thread];
1382
1383                                 if (cache->sample && cache->step) {
1384                                         sample = &cache->sample[(shi->ys - cache->y) * cache->w + (shi->xs - cache->x)];
1385                                         copy_v3_v3(sample->co, shi->co);
1386                                         copy_v3_v3(sample->n, shi->vno);
1387                                         copy_v3_v3(sample->ao, shi->ao);
1388                                         copy_v3_v3(sample->env, shi->env);
1389                                         copy_v3_v3(sample->indirect, shi->indirect);
1390                                         sample->intensity = max_fff(sample->ao[0], sample->ao[1], sample->ao[2]);
1391                                         sample->intensity = max_ff(sample->intensity, max_fff(sample->env[0], sample->env[1], sample->env[2]));
1392                                         sample->intensity = max_ff(sample->intensity, max_fff(sample->indirect[0], sample->indirect[1], sample->indirect[2]));
1393                                         sample->dist2 = dot_v3v3(shi->dxco, shi->dxco) + dot_v3v3(shi->dyco, shi->dyco);
1394                                         sample->filled = 1;
1395                                 }
1396                         }
1397                 }
1398         }
1399         else {
1400                 shi->ao[0] = 1.0f;
1401                 shi->ao[1] = 1.0f;
1402                 shi->ao[2] = 1.0f;
1403
1404                 shi->env[0] = 0.0f;
1405                 shi->env[1] = 0.0f;
1406                 shi->env[2] = 0.0f;
1407
1408                 shi->indirect[0] = 0.0f;
1409                 shi->indirect[1] = 0.0f;
1410                 shi->indirect[2] = 0.0f;
1411         }
1412 }
1413
1414 void cache_occ_samples(Render *re, RenderPart *pa, ShadeSample *ssamp)
1415 {
1416         OcclusionTree *tree = re->occlusiontree;
1417         PixStr ps;
1418         OcclusionCache *cache;
1419         OcclusionCacheSample *sample;
1420         OccFace exclude;
1421         ShadeInput *shi;
1422         intptr_t *rd = NULL;
1423         int *ro = NULL, *rp = NULL, *rz = NULL, onlyshadow;
1424         int x, y, step = CACHE_STEP;
1425
1426         if (!tree->cache)
1427                 return;
1428
1429         cache = &tree->cache[pa->thread];
1430         cache->w = pa->rectx;
1431         cache->h = pa->recty;
1432         cache->x = pa->disprect.xmin;
1433         cache->y = pa->disprect.ymin;
1434         cache->step = step;
1435         cache->sample = MEM_callocN(sizeof(OcclusionCacheSample) * cache->w * cache->h, "OcclusionCacheSample");
1436         sample = cache->sample;
1437
1438         if (re->osa) {
1439                 rd = pa->rectdaps;
1440         }
1441         else {
1442                 /* fake pixel struct for non-osa */
1443                 ps.next = NULL;
1444                 ps.mask = 0xFFFF;
1445
1446                 ro = pa->recto;
1447                 rp = pa->rectp;
1448                 rz = pa->rectz;
1449         }
1450
1451         /* compute a sample at every step pixels */
1452         for (y = pa->disprect.ymin; y < pa->disprect.ymax; y++) {
1453                 for (x = pa->disprect.xmin; x < pa->disprect.xmax; x++, sample++, rd++, ro++, rp++, rz++) {
1454                         if (!(((x - pa->disprect.xmin + step) % step) == 0 || x == pa->disprect.xmax - 1))
1455                                 continue;
1456                         if (!(((y - pa->disprect.ymin + step) % step) == 0 || y == pa->disprect.ymax - 1))
1457                                 continue;
1458
1459                         if (re->osa) {
1460                                 if (!*rd) continue;
1461
1462                                 shade_samples_fill_with_ps(ssamp, (PixStr *)(*rd), x, y);
1463                         }
1464                         else {
1465                                 if (!*rp) continue;
1466
1467                                 ps.obi = *ro;
1468                                 ps.facenr = *rp;
1469                                 ps.z = *rz;
1470                                 shade_samples_fill_with_ps(ssamp, &ps, x, y);
1471                         }
1472
1473                         shi = ssamp->shi;
1474                         if (shi->vlr) {
1475                                 onlyshadow = (shi->mat->mode & MA_ONLYSHADOW);
1476                                 exclude.obi = shi->obi - re->objectinstance;
1477                                 exclude.facenr = shi->vlr->index;
1478                                 sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect);
1479
1480                                 copy_v3_v3(sample->co, shi->co);
1481                                 copy_v3_v3(sample->n, shi->vno);
1482                                 copy_v3_v3(sample->ao, shi->ao);
1483                                 copy_v3_v3(sample->env, shi->env);
1484                                 copy_v3_v3(sample->indirect, shi->indirect);
1485                                 sample->intensity = max_fff(sample->ao[0], sample->ao[1], sample->ao[2]);
1486                                 sample->intensity = max_ff(sample->intensity, max_fff(sample->env[0], sample->env[1], sample->env[2]));
1487                                 sample->intensity = max_ff(sample->intensity, max_fff(sample->indirect[0], sample->indirect[1], sample->indirect[2]));
1488                                 sample->dist2 = dot_v3v3(shi->dxco, shi->dxco) + dot_v3v3(shi->dyco, shi->dyco);
1489                                 sample->x = shi->xs;
1490                                 sample->y = shi->ys;
1491                                 sample->filled = 1;
1492                         }
1493
1494                         if (re->test_break(re->tbh))
1495                                 break;
1496                 }
1497         }
1498 }
1499
1500 void free_occ_samples(Render *re, RenderPart *pa)
1501 {
1502         OcclusionTree *tree = re->occlusiontree;
1503         OcclusionCache *cache;
1504
1505         if (tree->cache) {
1506                 cache = &tree->cache[pa->thread];
1507
1508                 if (cache->sample)
1509                         MEM_freeN(cache->sample);
1510
1511                 cache->w = 0;
1512                 cache->h = 0;
1513                 cache->step = 0;
1514         }
1515 }
1516