Merging r46111 through r46136 from trunk into soc-2011-tomato
[blender-staging.git] / source / blender / render / intern / source / sss.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2007 Blender Foundation.
19  * All rights reserved.
20  *
21  * The Original Code is: all of this file.
22  *
23  * Contributor(s): none yet.
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 /** \file blender/render/intern/source/sss.c
29  *  \ingroup render
30  */
31
32 /* Possible Improvements:
33  * - add fresnel terms
34  * - adapt Rd table to scale, now with small scale there are a lot of misses?
35  * - possible interesting method: perform sss on all samples in the tree,
36  *   and then use those values interpolated somehow later. can also do this
37  *   filtering on demand for speed. since we are doing things in screen
38  *   space now there is an exact correspondence
39  * - avoid duplicate shading (filtering points in advance, irradiance cache
40  *   like lookup?)
41  * - lower resolution samples
42  */
43
44 #include <math.h>
45 #include <string.h>
46 #include <stdio.h>
47 #include <string.h>
48
49 /* external modules: */
50 #include "MEM_guardedalloc.h"
51
52 #include "BLI_math.h"
53 #include "BLI_blenlib.h"
54 #include "BLI_utildefines.h"
55 #include "BLI_ghash.h"
56 #include "BLI_memarena.h"
57
58 #include "PIL_time.h"
59
60 #include "DNA_material_types.h"
61
62 #include "BKE_colortools.h"
63 #include "BKE_global.h"
64 #include "BKE_main.h"
65 #include "BKE_material.h"
66 #include "BKE_node.h"
67 #include "BKE_scene.h"
68
69
70 /* this module */
71 #include "render_types.h"
72 #include "rendercore.h"
73 #include "renderdatabase.h" 
74 #include "shading.h"
75 #include "sss.h"
76 #include "zbuf.h"
77
78 /* Generic Multiple Scattering API */
79
80 /* Relevant papers:
81  * [1] A Practical Model for Subsurface Light Transport
82  * [2] A Rapid Hierarchical Rendering Technique for Translucent Materials
83  * [3] Efficient Rendering of Local Subsurface Scattering
84  * [4] Implementing a skin BSSRDF (or several...)
85  */
86
87 /* Defines */
88
89 #define RD_TABLE_RANGE          100.0f
90 #define RD_TABLE_RANGE_2        10000.0f
91 #define RD_TABLE_SIZE           10000
92
93 #define MAX_OCTREE_NODE_POINTS  8
94 #define MAX_OCTREE_DEPTH                15
95
96 /* Struct Definitions */
97
98 struct ScatterSettings {
99         float eta;              /* index of refraction */
100         float sigma_a;  /* absorption coefficient */
101         float sigma_s_; /* reduced scattering coefficient */
102         float sigma_t_; /* reduced extinction coefficient */
103         float sigma;    /* effective extinction coefficient */
104         float Fdr;              /* diffuse fresnel reflectance */
105         float D;                /* diffusion constant */
106         float A;
107         float alpha_;   /* reduced albedo */
108         float zr;               /* distance of virtual lightsource above surface */
109         float zv;               /* distance of virtual lightsource below surface */
110         float ld;               /* mean free path */
111         float ro;               /* diffuse reflectance */
112         float color;
113         float invsigma_t_;
114         float frontweight;
115         float backweight;
116
117         float *tableRd;  /* lookup table to avoid computing Rd */
118         float *tableRd2; /* lookup table to avoid computing Rd for bigger values */
119 };
120
121 typedef struct ScatterPoint {
122         float co[3];
123         float rad[3];
124         float area;
125         int back;
126 } ScatterPoint;
127
128 typedef struct ScatterNode {
129         float co[3];
130         float rad[3];
131         float backrad[3];
132         float area, backarea;
133
134         int totpoint;
135         ScatterPoint *points;
136
137         float split[3];
138         struct ScatterNode *child[8];
139 } ScatterNode;
140
141 struct ScatterTree {
142         MemArena *arena;
143
144         ScatterSettings *ss[3];
145         float error, scale;
146
147         ScatterNode *root;
148         ScatterPoint *points;
149         ScatterPoint **refpoints;
150         ScatterPoint **tmppoints;
151         int totpoint;
152         float min[3], max[3];
153 };
154
155 typedef struct ScatterResult {
156         float rad[3];
157         float backrad[3];
158         float rdsum[3];
159         float backrdsum[3];
160 } ScatterResult;
161
162 /* Functions for BSSRDF reparametrization in to more intuitive parameters,
163  * see [2] section 4 for more info. */
164
165 static float f_Rd(float alpha_, float A, float ro)
166 {
167         float sq;
168
169         sq= sqrt(3.0f*(1.0f - alpha_));
170         return (alpha_/2.0f)*(1.0f + expf((-4.0f/3.0f)*A*sq))*expf(-sq) - ro;
171 }
172
173 static float compute_reduced_albedo(ScatterSettings *ss)
174 {
175         const float tolerance= 1e-8;
176         const int max_iteration_count= 20;
177         float d, fsub, xn_1= 0.0f, xn= 1.0f, fxn, fxn_1;
178         int i;
179
180         /* use secant method to compute reduced albedo using Rd function inverse
181          * with a given reflectance */
182         fxn= f_Rd(xn, ss->A, ss->ro);
183         fxn_1= f_Rd(xn_1, ss->A, ss->ro);
184
185         for (i= 0; i < max_iteration_count; i++) {
186                 fsub= (fxn - fxn_1);
187                 if (fabsf(fsub) < tolerance)
188                         break;
189                 d= ((xn - xn_1)/fsub)*fxn;
190                 if (fabsf(d) < tolerance)
191                         break;
192
193                 xn_1= xn;
194                 fxn_1= fxn;
195                 xn= xn - d;
196
197                 if (xn > 1.0f) xn= 1.0f;
198                 if (xn_1 > 1.0f) xn_1= 1.0f;
199                 
200                 fxn= f_Rd(xn, ss->A, ss->ro);
201         }
202
203         /* avoid division by zero later */
204         if (xn <= 0.0f)
205                 xn= 0.00001f;
206
207         return xn;
208 }
209
210 /* Exponential falloff functions */
211
212 static float Rd_rsquare(ScatterSettings *ss, float rr)
213 {
214         float sr, sv, Rdr, Rdv;
215
216         sr= sqrt(rr + ss->zr*ss->zr);
217         sv= sqrt(rr + ss->zv*ss->zv);
218
219         Rdr= ss->zr*(1.0f + ss->sigma*sr)*expf(-ss->sigma*sr)/(sr*sr*sr);
220         Rdv= ss->zv*(1.0f + ss->sigma*sv)*expf(-ss->sigma*sv)/(sv*sv*sv);
221
222         return /*ss->alpha_*/(1.0f/(4.0f*(float)M_PI))*(Rdr + Rdv);
223 }
224
225 static float Rd(ScatterSettings *ss, float r)
226 {
227         return Rd_rsquare(ss, r*r);
228 }
229
230 /* table lookups for Rd. this avoids expensive exp calls. we use two
231  * separate tables as well for lower and higher numbers to improve
232  * precision, since the number are poorly distributed because we do
233  * a lookup with the squared distance for smaller distances, saving
234  * another sqrt. */
235
236 static void approximate_Rd_rgb(ScatterSettings **ss, float rr, float *rd)
237 {
238         float indexf, t, idxf;
239         int index;
240
241         if (rr > (RD_TABLE_RANGE_2*RD_TABLE_RANGE_2));
242         else if (rr > RD_TABLE_RANGE) {
243                 rr= sqrt(rr);
244                 indexf= rr*(RD_TABLE_SIZE/RD_TABLE_RANGE_2);
245                 index= (int)indexf;
246                 idxf= (float)index;
247                 t= indexf - idxf;
248
249                 if (index >= 0 && index < RD_TABLE_SIZE) {
250                         rd[0]= (ss[0]->tableRd2[index]*(1-t) + ss[0]->tableRd2[index+1]*t);
251                         rd[1]= (ss[1]->tableRd2[index]*(1-t) + ss[1]->tableRd2[index+1]*t);
252                         rd[2]= (ss[2]->tableRd2[index]*(1-t) + ss[2]->tableRd2[index+1]*t);
253                         return;
254                 }
255         }
256         else {
257                 indexf= rr*(RD_TABLE_SIZE/RD_TABLE_RANGE);
258                 index= (int)indexf;
259                 idxf= (float)index;
260                 t= indexf - idxf;
261
262                 if (index >= 0 && index < RD_TABLE_SIZE) {
263                         rd[0]= (ss[0]->tableRd[index]*(1-t) + ss[0]->tableRd[index+1]*t);
264                         rd[1]= (ss[1]->tableRd[index]*(1-t) + ss[1]->tableRd[index+1]*t);
265                         rd[2]= (ss[2]->tableRd[index]*(1-t) + ss[2]->tableRd[index+1]*t);
266                         return;
267                 }
268         }
269
270         /* fallback to slow Rd computation */
271         rd[0]= Rd_rsquare(ss[0], rr);
272         rd[1]= Rd_rsquare(ss[1], rr);
273         rd[2]= Rd_rsquare(ss[2], rr);
274 }
275
276 static void build_Rd_table(ScatterSettings *ss)
277 {
278         float r;
279         int i, size = RD_TABLE_SIZE+1;
280
281         ss->tableRd= MEM_mallocN(sizeof(float)*size, "scatterTableRd");
282         ss->tableRd2= MEM_mallocN(sizeof(float)*size, "scatterTableRd");
283
284         for (i= 0; i < size; i++) {
285                 r= i*(RD_TABLE_RANGE/RD_TABLE_SIZE);
286                 /*if (r < ss->invsigma_t_*ss->invsigma_t_)
287                         r= ss->invsigma_t_*ss->invsigma_t_;*/
288                 ss->tableRd[i]= Rd(ss, sqrt(r));
289
290                 r= i*(RD_TABLE_RANGE_2/RD_TABLE_SIZE);
291                 /*if (r < ss->invsigma_t_)
292                         r= ss->invsigma_t_;*/
293                 ss->tableRd2[i]= Rd(ss, r);
294         }
295 }
296
297 ScatterSettings *scatter_settings_new(float refl, float radius, float ior, float reflfac, float frontweight, float backweight)
298 {
299         ScatterSettings *ss;
300         
301         ss= MEM_callocN(sizeof(ScatterSettings), "ScatterSettings");
302
303         /* see [1] and [3] for these formulas */
304         ss->eta= ior;
305         ss->Fdr= -1.440f/ior*ior + 0.710f/ior + 0.668f + 0.0636f*ior;
306         ss->A= (1.0f + ss->Fdr)/(1.0f - ss->Fdr);
307         ss->ld= radius;
308         ss->ro= MIN2(refl, 0.999f);
309         ss->color= ss->ro*reflfac + (1.0f-reflfac);
310
311         ss->alpha_= compute_reduced_albedo(ss);
312
313         ss->sigma= 1.0f/ss->ld;
314         ss->sigma_t_= ss->sigma/sqrtf(3.0f*(1.0f - ss->alpha_));
315         ss->sigma_s_= ss->alpha_*ss->sigma_t_;
316         ss->sigma_a= ss->sigma_t_ - ss->sigma_s_;
317
318         ss->D= 1.0f/(3.0f*ss->sigma_t_);
319
320         ss->zr= 1.0f/ss->sigma_t_;
321         ss->zv= ss->zr + 4.0f*ss->A*ss->D;
322
323         ss->invsigma_t_= 1.0f/ss->sigma_t_;
324
325         ss->frontweight= frontweight;
326         ss->backweight= backweight;
327
328         /* precompute a table of Rd values for quick lookup */
329         build_Rd_table(ss);
330
331         return ss;
332 }
333
334 void scatter_settings_free(ScatterSettings *ss)
335 {
336         MEM_freeN(ss->tableRd);
337         MEM_freeN(ss->tableRd2);
338         MEM_freeN(ss);
339 }
340
341 /* Hierarchical method as in [2]. */
342
343 /* traversal */
344
345 #define SUBNODE_INDEX(co, split) \
346         ((co[0]>=split[0]) + (co[1]>=split[1])*2 + (co[2]>=split[2])*4)
347         
348 static void add_radiance(ScatterTree *tree, float *frontrad, float *backrad, float area, float backarea, float rr, ScatterResult *result)
349 {
350         float rd[3], frontrd[3], backrd[3];
351
352         approximate_Rd_rgb(tree->ss, rr, rd);
353
354         if (frontrad && area) {
355                 frontrd[0] = rd[0]*area;
356                 frontrd[1] = rd[1]*area;
357                 frontrd[2] = rd[2]*area;
358
359                 result->rad[0] += frontrad[0]*frontrd[0];
360                 result->rad[1] += frontrad[1]*frontrd[1];
361                 result->rad[2] += frontrad[2]*frontrd[2];
362
363                 result->rdsum[0] += frontrd[0];
364                 result->rdsum[1] += frontrd[1];
365                 result->rdsum[2] += frontrd[2];
366         }
367         if (backrad && backarea) {
368                 backrd[0] = rd[0]*backarea;
369                 backrd[1] = rd[1]*backarea;
370                 backrd[2] = rd[2]*backarea;
371
372                 result->backrad[0] += backrad[0]*backrd[0];
373                 result->backrad[1] += backrad[1]*backrd[1];
374                 result->backrad[2] += backrad[2]*backrd[2];
375
376                 result->backrdsum[0] += backrd[0];
377                 result->backrdsum[1] += backrd[1];
378                 result->backrdsum[2] += backrd[2];
379         }
380 }
381
382 static void traverse_octree(ScatterTree *tree, ScatterNode *node, float *co, int self, ScatterResult *result)
383 {
384         float sub[3], dist;
385         int i, index = 0;
386
387         if (node->totpoint > 0) {
388                 /* leaf - add radiance from all samples */
389                 for (i=0; i<node->totpoint; i++) {
390                         ScatterPoint *p= &node->points[i];
391
392                         sub_v3_v3v3(sub, co, p->co);
393                         dist= dot_v3v3(sub, sub);
394
395                         if (p->back)
396                                 add_radiance(tree, NULL, p->rad, 0.0f, p->area, dist, result);
397                         else
398                                 add_radiance(tree, p->rad, NULL, p->area, 0.0f, dist, result);
399                 }
400         }
401         else {
402                 /* branch */
403                 if (self)
404                         index = SUBNODE_INDEX(co, node->split);
405
406                 for (i=0; i<8; i++) {
407                         if (node->child[i]) {
408                                 ScatterNode *subnode= node->child[i];
409
410                                 if (self && index == i) {
411                                         /* always traverse node containing the point */
412                                         traverse_octree(tree, subnode, co, 1, result);
413                                 }
414                                 else {
415                                         /* decide subnode traversal based on maximum solid angle */
416                                         sub_v3_v3v3(sub, co, subnode->co);
417                                         dist= dot_v3v3(sub, sub);
418
419                                         /* actually area/dist > error, but this avoids division */
420                                         if (subnode->area+subnode->backarea>tree->error*dist) {
421                                                 traverse_octree(tree, subnode, co, 0, result);
422                                         }
423                                         else {
424                                                 add_radiance(tree, subnode->rad, subnode->backrad,
425                                                         subnode->area, subnode->backarea, dist, result);
426                                         }
427                                 }
428                         }
429                 }
430         }
431 }
432
433 static void compute_radiance(ScatterTree *tree, float *co, float *rad)
434 {
435         ScatterResult result;
436         float rdsum[3], backrad[3], backrdsum[3];
437
438         memset(&result, 0, sizeof(result));
439
440         traverse_octree(tree, tree->root, co, 1, &result);
441
442         /* the original paper doesn't do this, but we normalize over the
443          * sampled area and multiply with the reflectance. this is because
444          * our point samples are incomplete, there are no samples on parts
445          * of the mesh not visible from the camera. this can not only make
446          * it darker, but also lead to ugly color shifts */
447
448         mul_v3_fl(result.rad, tree->ss[0]->frontweight);
449         mul_v3_fl(result.backrad, tree->ss[0]->backweight);
450
451         copy_v3_v3(rad, result.rad);
452         add_v3_v3v3(backrad, result.rad, result.backrad);
453
454         copy_v3_v3(rdsum, result.rdsum);
455         add_v3_v3v3(backrdsum, result.rdsum, result.backrdsum);
456
457         if (rdsum[0] > 1e-16f) rad[0]= tree->ss[0]->color*rad[0]/rdsum[0];
458         if (rdsum[1] > 1e-16f) rad[1]= tree->ss[1]->color*rad[1]/rdsum[1];
459         if (rdsum[2] > 1e-16f) rad[2]= tree->ss[2]->color*rad[2]/rdsum[2];
460
461         if (backrdsum[0] > 1e-16f) backrad[0]= tree->ss[0]->color*backrad[0]/backrdsum[0];
462         if (backrdsum[1] > 1e-16f) backrad[1]= tree->ss[1]->color*backrad[1]/backrdsum[1];
463         if (backrdsum[2] > 1e-16f) backrad[2]= tree->ss[2]->color*backrad[2]/backrdsum[2];
464
465         rad[0]= MAX2(rad[0], backrad[0]);
466         rad[1]= MAX2(rad[1], backrad[1]);
467         rad[2]= MAX2(rad[2], backrad[2]);
468 }
469
470 /* building */
471
472 static void sum_leaf_radiance(ScatterTree *UNUSED(tree), ScatterNode *node)
473 {
474         ScatterPoint *p;
475         float rad, totrad= 0.0f, inv;
476         int i;
477
478         node->co[0]= node->co[1]= node->co[2]= 0.0;
479         node->rad[0]= node->rad[1]= node->rad[2]= 0.0;
480         node->backrad[0]= node->backrad[1]= node->backrad[2]= 0.0;
481
482         /* compute total rad, rad weighted average position,
483          * and total area */
484         for (i=0; i<node->totpoint; i++) {
485                 p= &node->points[i];
486
487                 rad= p->area*fabsf(p->rad[0] + p->rad[1] + p->rad[2]);
488                 totrad += rad;
489
490                 node->co[0] += rad*p->co[0];
491                 node->co[1] += rad*p->co[1];
492                 node->co[2] += rad*p->co[2];
493
494                 if (p->back) {
495                         node->backrad[0] += p->rad[0]*p->area;
496                         node->backrad[1] += p->rad[1]*p->area;
497                         node->backrad[2] += p->rad[2]*p->area;
498
499                         node->backarea += p->area;
500                 }
501                 else {
502                         node->rad[0] += p->rad[0]*p->area;
503                         node->rad[1] += p->rad[1]*p->area;
504                         node->rad[2] += p->rad[2]*p->area;
505
506                         node->area += p->area;
507                 }
508         }
509
510         if (node->area > 1e-16f) {
511                 inv= 1.0f/node->area;
512                 node->rad[0] *= inv;
513                 node->rad[1] *= inv;
514                 node->rad[2] *= inv;
515         }
516         if (node->backarea > 1e-16f) {
517                 inv= 1.0f/node->backarea;
518                 node->backrad[0] *= inv;
519                 node->backrad[1] *= inv;
520                 node->backrad[2] *= inv;
521         }
522
523         if (totrad > 1e-16f) {
524                 inv= 1.0f/totrad;
525                 node->co[0] *= inv;
526                 node->co[1] *= inv;
527                 node->co[2] *= inv;
528         }
529         else {
530                 /* make sure that if radiance is 0.0f, we still have these points in
531                  * the tree at a good position, they count for rdsum too */
532                 for (i=0; i<node->totpoint; i++) {
533                         p= &node->points[i];
534
535                         node->co[0] += p->co[0];
536                         node->co[1] += p->co[1];
537                         node->co[2] += p->co[2];
538                 }
539
540                 node->co[0] /= node->totpoint;
541                 node->co[1] /= node->totpoint;
542                 node->co[2] /= node->totpoint;
543         }
544 }
545
546 static void sum_branch_radiance(ScatterTree *UNUSED(tree), ScatterNode *node)
547 {
548         ScatterNode *subnode;
549         float rad, totrad= 0.0f, inv;
550         int i, totnode;
551
552         node->co[0]= node->co[1]= node->co[2]= 0.0;
553         node->rad[0]= node->rad[1]= node->rad[2]= 0.0;
554         node->backrad[0]= node->backrad[1]= node->backrad[2]= 0.0;
555
556         /* compute total rad, rad weighted average position,
557          * and total area */
558         for (i=0; i<8; i++) {
559                 if (node->child[i] == NULL)
560                         continue;
561
562                 subnode= node->child[i];
563
564                 rad= subnode->area*fabsf(subnode->rad[0] + subnode->rad[1] + subnode->rad[2]);
565                 rad += subnode->backarea*fabsf(subnode->backrad[0] + subnode->backrad[1] + subnode->backrad[2]);
566                 totrad += rad;
567
568                 node->co[0] += rad*subnode->co[0];
569                 node->co[1] += rad*subnode->co[1];
570                 node->co[2] += rad*subnode->co[2];
571
572                 node->rad[0] += subnode->rad[0]*subnode->area;
573                 node->rad[1] += subnode->rad[1]*subnode->area;
574                 node->rad[2] += subnode->rad[2]*subnode->area;
575
576                 node->backrad[0] += subnode->backrad[0]*subnode->backarea;
577                 node->backrad[1] += subnode->backrad[1]*subnode->backarea;
578                 node->backrad[2] += subnode->backrad[2]*subnode->backarea;
579
580                 node->area += subnode->area;
581                 node->backarea += subnode->backarea;
582         }
583
584         if (node->area > 1e-16f) {
585                 inv= 1.0f/node->area;
586                 node->rad[0] *= inv;
587                 node->rad[1] *= inv;
588                 node->rad[2] *= inv;
589         }
590         if (node->backarea > 1e-16f) {
591                 inv= 1.0f/node->backarea;
592                 node->backrad[0] *= inv;
593                 node->backrad[1] *= inv;
594                 node->backrad[2] *= inv;
595         }
596
597         if (totrad > 1e-16f) {
598                 inv= 1.0f/totrad;
599                 node->co[0] *= inv;
600                 node->co[1] *= inv;
601                 node->co[2] *= inv;
602         }
603         else {
604                 /* make sure that if radiance is 0.0f, we still have these points in
605                  * the tree at a good position, they count for rdsum too */
606                 totnode= 0;
607
608                 for (i=0; i<8; i++) {
609                         if (node->child[i]) {
610                                 subnode= node->child[i];
611
612                                 node->co[0] += subnode->co[0];
613                                 node->co[1] += subnode->co[1];
614                                 node->co[2] += subnode->co[2];
615
616                                 totnode++;
617                         }
618                 }
619
620                 node->co[0] /= totnode;
621                 node->co[1] /= totnode;
622                 node->co[2] /= totnode;
623         }
624 }
625
626 static void sum_radiance(ScatterTree *tree, ScatterNode *node)
627 {
628         if (node->totpoint > 0) {
629                 sum_leaf_radiance(tree, node);
630         }
631         else {
632                 int i;
633
634                 for (i=0; i<8; i++)
635                         if (node->child[i])
636                                 sum_radiance(tree, node->child[i]);
637
638                 sum_branch_radiance(tree, node);
639         }
640 }
641
642 static void subnode_middle(int i, float *mid, float *subsize, float *submid)
643 {
644         int x= i & 1, y= i & 2, z= i & 4;
645
646         submid[0]= mid[0] + ((x)? subsize[0]: -subsize[0]);
647         submid[1]= mid[1] + ((y)? subsize[1]: -subsize[1]);
648         submid[2]= mid[2] + ((z)? subsize[2]: -subsize[2]);
649 }
650
651 static void create_octree_node(ScatterTree *tree, ScatterNode *node, float *mid, float *size, ScatterPoint **refpoints, int depth)
652 {
653         ScatterNode *subnode;
654         ScatterPoint **subrefpoints, **tmppoints= tree->tmppoints;
655         int index, nsize[8], noffset[8], i, subco, usednodes, usedi;
656         float submid[3], subsize[3];
657
658         /* stopping condition */
659         if (node->totpoint <= MAX_OCTREE_NODE_POINTS || depth == MAX_OCTREE_DEPTH) {
660                 for (i=0; i<node->totpoint; i++)
661                         node->points[i]= *(refpoints[i]);
662
663                 return;
664         }
665
666         subsize[0]= size[0]*0.5f;
667         subsize[1]= size[1]*0.5f;
668         subsize[2]= size[2]*0.5f;
669
670         node->split[0]= mid[0];
671         node->split[1]= mid[1];
672         node->split[2]= mid[2];
673
674         memset(nsize, 0, sizeof(nsize));
675         memset(noffset, 0, sizeof(noffset));
676
677         /* count points in subnodes */
678         for (i=0; i<node->totpoint; i++) {
679                 index= SUBNODE_INDEX(refpoints[i]->co, node->split);
680                 tmppoints[i]= refpoints[i];
681                 nsize[index]++;
682         }
683
684         /* here we check if only one subnode is used. if this is the case, we don't
685          * create a new node, but rather call this function again, with different
686          * size and middle position for the same node. */
687         for (usedi=0, usednodes=0, i=0; i<8; i++) {
688                 if (nsize[i]) {
689                         usednodes++;
690                         usedi = i;
691                 }
692                 if (i != 0)
693                         noffset[i]= noffset[i-1]+nsize[i-1];
694         }
695         
696         if (usednodes<=1) {
697                 subnode_middle(usedi, mid, subsize, submid);
698                 create_octree_node(tree, node, submid, subsize, refpoints, depth+1);
699                 return;
700         }
701
702         /* reorder refpoints by subnode */
703         for (i=0; i<node->totpoint; i++) {
704                 index= SUBNODE_INDEX(tmppoints[i]->co, node->split);
705                 refpoints[noffset[index]]= tmppoints[i];
706                 noffset[index]++;
707         }
708
709         /* create subnodes */
710         for (subco=0, i=0; i<8; subco+=nsize[i], i++) {
711                 if (nsize[i] > 0) {
712                         subnode= BLI_memarena_alloc(tree->arena, sizeof(ScatterNode));
713                         node->child[i]= subnode;
714                         subnode->points= node->points + subco;
715                         subnode->totpoint= nsize[i];
716                         subrefpoints= refpoints + subco;
717
718                         subnode_middle(i, mid, subsize, submid);
719
720                         create_octree_node(tree, subnode, submid, subsize, subrefpoints,
721                                 depth+1);
722                 }
723                 else
724                         node->child[i]= NULL;
725         }
726
727         node->points= NULL;
728         node->totpoint= 0;
729 }
730
731 /* public functions */
732
733 ScatterTree *scatter_tree_new(ScatterSettings *ss[3], float scale, float error,
734         float (*co)[3], float (*color)[3], float *area, int totpoint)
735 {
736         ScatterTree *tree;
737         ScatterPoint *points, **refpoints;
738         int i;
739
740         /* allocate tree */
741         tree= MEM_callocN(sizeof(ScatterTree), "ScatterTree");
742         tree->scale= scale;
743         tree->error= error;
744         tree->totpoint= totpoint;
745
746         tree->ss[0]= ss[0];
747         tree->ss[1]= ss[1];
748         tree->ss[2]= ss[2];
749
750         points= MEM_callocN(sizeof(ScatterPoint)*totpoint, "ScatterPoints");
751         refpoints= MEM_callocN(sizeof(ScatterPoint*)*totpoint, "ScatterRefPoints");
752
753         tree->points= points;
754         tree->refpoints= refpoints;
755
756         /* build points */
757         INIT_MINMAX(tree->min, tree->max);
758
759         for (i=0; i<totpoint; i++) {
760                 copy_v3_v3(points[i].co, co[i]);
761                 copy_v3_v3(points[i].rad, color[i]);
762                 points[i].area= fabsf(area[i])/(tree->scale*tree->scale);
763                 points[i].back= (area[i] < 0.0f);
764
765                 mul_v3_fl(points[i].co, 1.0f/tree->scale);
766                 DO_MINMAX(points[i].co, tree->min, tree->max);
767
768                 refpoints[i]= points + i;
769         }
770
771         return tree;
772 }
773
774 void scatter_tree_build(ScatterTree *tree)
775 {
776         ScatterPoint *newpoints, **tmppoints;
777         float mid[3], size[3];
778         int totpoint= tree->totpoint;
779
780         newpoints= MEM_callocN(sizeof(ScatterPoint)*totpoint, "ScatterPoints");
781         tmppoints= MEM_callocN(sizeof(ScatterPoint*)*totpoint, "ScatterTmpPoints");
782         tree->tmppoints= tmppoints;
783
784         tree->arena= BLI_memarena_new(0x8000 * sizeof(ScatterNode), "sss tree arena");
785         BLI_memarena_use_calloc(tree->arena);
786
787         /* build tree */
788         tree->root= BLI_memarena_alloc(tree->arena, sizeof(ScatterNode));
789         tree->root->points= newpoints;
790         tree->root->totpoint= totpoint;
791
792         mid[0]= (tree->min[0]+tree->max[0])*0.5f;
793         mid[1]= (tree->min[1]+tree->max[1])*0.5f;
794         mid[2]= (tree->min[2]+tree->max[2])*0.5f;
795
796         size[0]= (tree->max[0]-tree->min[0])*0.5f;
797         size[1]= (tree->max[1]-tree->min[1])*0.5f;
798         size[2]= (tree->max[2]-tree->min[2])*0.5f;
799
800         create_octree_node(tree, tree->root, mid, size, tree->refpoints, 0);
801
802         MEM_freeN(tree->points);
803         MEM_freeN(tree->refpoints);
804         MEM_freeN(tree->tmppoints);
805         tree->refpoints= NULL;
806         tree->tmppoints= NULL;
807         tree->points= newpoints;
808         
809         /* sum radiance at nodes */
810         sum_radiance(tree, tree->root);
811 }
812
813 void scatter_tree_sample(ScatterTree *tree, float *co, float *color)
814 {
815         float sco[3];
816
817         copy_v3_v3(sco, co);
818         mul_v3_fl(sco, 1.0f/tree->scale);
819
820         compute_radiance(tree, sco, color);
821 }
822
823 void scatter_tree_free(ScatterTree *tree)
824 {
825         if (tree->arena) BLI_memarena_free(tree->arena);
826         if (tree->points) MEM_freeN(tree->points);
827         if (tree->refpoints) MEM_freeN(tree->refpoints);
828                 
829         MEM_freeN(tree);
830 }
831
832 /* Internal Renderer API */
833
834 /* sss tree building */
835
836 typedef struct SSSData {
837         ScatterTree *tree;
838         ScatterSettings *ss[3];
839 } SSSData;
840
841 typedef struct SSSPoints {
842         struct SSSPoints *next, *prev;
843
844         float (*co)[3];
845         float (*color)[3];
846         float *area;
847         int totpoint;
848 } SSSPoints;
849
850 static void sss_create_tree_mat(Render *re, Material *mat)
851 {
852         SSSPoints *p;
853         RenderResult *rr;
854         ListBase points;
855         float (*co)[3] = NULL, (*color)[3] = NULL, *area = NULL;
856         int totpoint = 0, osa, osaflag, partsdone;
857
858         if (re->test_break(re->tbh))
859                 return;
860         
861         points.first= points.last= NULL;
862
863         /* TODO: this is getting a bit ugly, copying all those variables and
864          * setting them back, maybe we need to create our own Render? */
865
866         /* do SSS preprocessing render */
867         BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE);
868         rr= re->result;
869         osa= re->osa;
870         osaflag= re->r.mode & R_OSA;
871         partsdone= re->i.partsdone;
872
873         re->osa= 0;
874         re->r.mode &= ~R_OSA;
875         re->sss_points= &points;
876         re->sss_mat= mat;
877         re->i.partsdone= 0;
878
879         if (!(re->r.scemode & R_PREVIEWBUTS))
880                 re->result= NULL;
881         BLI_rw_mutex_unlock(&re->resultmutex);
882
883         RE_TileProcessor(re);
884         
885         BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE);
886         if (!(re->r.scemode & R_PREVIEWBUTS)) {
887                 RE_FreeRenderResult(re->result);
888                 re->result= rr;
889         }
890         BLI_rw_mutex_unlock(&re->resultmutex);
891
892         re->i.partsdone= partsdone;
893         re->sss_mat= NULL;
894         re->sss_points= NULL;
895         re->osa= osa;
896         if (osaflag) re->r.mode |= R_OSA;
897
898         /* no points? no tree */
899         if (!points.first)
900                 return;
901
902         /* merge points together into a single buffer */
903         if (!re->test_break(re->tbh)) {
904                 for (totpoint=0, p=points.first; p; p=p->next)
905                         totpoint += p->totpoint;
906                 
907                 co= MEM_mallocN(sizeof(*co)*totpoint, "SSSCo");
908                 color= MEM_mallocN(sizeof(*color)*totpoint, "SSSColor");
909                 area= MEM_mallocN(sizeof(*area)*totpoint, "SSSArea");
910
911                 for (totpoint=0, p=points.first; p; p=p->next) {
912                         memcpy(co+totpoint, p->co, sizeof(*co)*p->totpoint);
913                         memcpy(color+totpoint, p->color, sizeof(*color)*p->totpoint);
914                         memcpy(area+totpoint, p->area, sizeof(*area)*p->totpoint);
915                         totpoint += p->totpoint;
916                 }
917         }
918
919         /* free points */
920         for (p=points.first; p; p=p->next) {
921                 MEM_freeN(p->co);
922                 MEM_freeN(p->color);
923                 MEM_freeN(p->area);
924         }
925         BLI_freelistN(&points);
926
927         /* build tree */
928         if (!re->test_break(re->tbh)) {
929                 SSSData *sss= MEM_callocN(sizeof(*sss), "SSSData");
930                 float ior= mat->sss_ior, cfac= mat->sss_colfac;
931                 float *radius= mat->sss_radius;
932                 float fw= mat->sss_front, bw= mat->sss_back;
933                 float error = mat->sss_error;
934
935                 error= get_render_aosss_error(&re->r, error);
936                 if ((re->r.scemode & R_PREVIEWBUTS) && error < 0.5f)
937                         error= 0.5f;
938                 
939                 sss->ss[0]= scatter_settings_new(mat->sss_col[0], radius[0], ior, cfac, fw, bw);
940                 sss->ss[1]= scatter_settings_new(mat->sss_col[1], radius[1], ior, cfac, fw, bw);
941                 sss->ss[2]= scatter_settings_new(mat->sss_col[2], radius[2], ior, cfac, fw, bw);
942                 sss->tree= scatter_tree_new(sss->ss, mat->sss_scale, error,
943                         co, color, area, totpoint);
944
945                 MEM_freeN(co);
946                 MEM_freeN(color);
947                 MEM_freeN(area);
948
949                 scatter_tree_build(sss->tree);
950
951                 BLI_ghash_insert(re->sss_hash, mat, sss);
952         }
953         else {
954                 if (co) MEM_freeN(co);
955                 if (color) MEM_freeN(color);
956                 if (area) MEM_freeN(area);
957         }
958 }
959
960 void sss_add_points(Render *re, float (*co)[3], float (*color)[3], float *area, int totpoint)
961 {
962         SSSPoints *p;
963         
964         if (totpoint > 0) {
965                 p= MEM_callocN(sizeof(SSSPoints), "SSSPoints");
966
967                 p->co= co;
968                 p->color= color;
969                 p->area= area;
970                 p->totpoint= totpoint;
971
972                 BLI_lock_thread(LOCK_CUSTOM1);
973                 BLI_addtail(re->sss_points, p);
974                 BLI_unlock_thread(LOCK_CUSTOM1);
975         }
976 }
977
978 static void sss_free_tree(SSSData *sss)
979 {
980         scatter_tree_free(sss->tree);
981         scatter_settings_free(sss->ss[0]);
982         scatter_settings_free(sss->ss[1]);
983         scatter_settings_free(sss->ss[2]);
984         MEM_freeN(sss);
985 }
986
987 /* public functions */
988
989 void make_sss_tree(Render *re)
990 {
991         Material *mat;
992         
993         re->sss_hash= BLI_ghash_new(BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "make_sss_tree gh");
994
995         re->i.infostr= "SSS preprocessing";
996         re->stats_draw(re->sdh, &re->i);
997         
998         for (mat= re->main->mat.first; mat; mat= mat->id.next)
999                 if (mat->id.us && (mat->flag & MA_IS_USED) && (mat->sss_flag & MA_DIFF_SSS))
1000                         sss_create_tree_mat(re, mat);
1001         
1002         /* XXX preview exception */
1003         /* localizing preview render data is not fun for node trees :( */
1004         if (re->main!=G.main) {
1005                 for (mat= G.main->mat.first; mat; mat= mat->id.next)
1006                         if (mat->id.us && (mat->flag & MA_IS_USED) && (mat->sss_flag & MA_DIFF_SSS))
1007                                 sss_create_tree_mat(re, mat);
1008         }
1009         
1010 }
1011
1012 void free_sss(Render *re)
1013 {
1014         if (re->sss_hash) {
1015                 GHashIterator *it= BLI_ghashIterator_new(re->sss_hash);
1016
1017                 while (!BLI_ghashIterator_isDone(it)) {
1018                         sss_free_tree(BLI_ghashIterator_getValue(it));
1019                         BLI_ghashIterator_step(it);
1020                 }
1021
1022                 BLI_ghashIterator_free(it);
1023                 BLI_ghash_free(re->sss_hash, NULL, NULL);
1024                 re->sss_hash= NULL;
1025         }
1026 }
1027
1028 int sample_sss(Render *re, Material *mat, float *co, float *color)
1029 {
1030         if (re->sss_hash) {
1031                 SSSData *sss= BLI_ghash_lookup(re->sss_hash, mat);
1032
1033                 if (sss) {
1034                         scatter_tree_sample(sss->tree, co, color);
1035                         return 1;
1036                 }
1037                 else {
1038                         color[0]= 0.0f;
1039                         color[1]= 0.0f;
1040                         color[2]= 0.0f;
1041                 }
1042         }
1043
1044         return 0;
1045 }
1046
1047 int sss_pass_done(struct Render *re, struct Material *mat)
1048 {
1049         return ((re->flag & R_BAKING) || !(re->r.mode & R_SSS) || (re->sss_hash && BLI_ghash_lookup(re->sss_hash, mat)));
1050 }
1051