Fixes for SSS with render layers. Now in the preprocessing pass
[blender.git] / source / blender / render / intern / source / sss.c
1 /* 
2  * $Id$
3  *
4  * ***** BEGIN GPL LICENSE BLOCK *****
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software Foundation,
18  * Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19  *
20  * The Original Code is Copyright (C) 2007 Blender Foundation.
21  * All rights reserved.
22  *
23  * The Original Code is: all of this file.
24  *
25  * Contributor(s): none yet.
26  *
27  * ***** END GPL LICENSE BLOCK *****
28  */
29
30 /* Possible Improvements:
31    - add fresnel terms
32    - adapt Rd table to scale, now with small scale there are a lot of misses?
33    - possible interesting method: perform sss on all samples in the tree,
34      and then use those values interpolated somehow later. can also do this
35          filtering on demand for speed. since we are doing things in screen
36          space now there is an exact correspondence
37    - avoid duplicate shading (filtering points in advance, irradiance cache
38      like lookup?)
39    - lower resolution samples
40 */
41
42 #include <math.h>
43 #include <string.h>
44 #include <stdio.h>
45 #include <string.h>
46
47 /* external modules: */
48 #include "MEM_guardedalloc.h"
49
50 #include "BLI_arithb.h"
51 #include "BLI_blenlib.h"
52 #include "BLI_ghash.h"
53 #include "BLI_memarena.h"
54 #include "PIL_time.h"
55
56 #include "DNA_material_types.h"
57
58 #include "BKE_global.h"
59 #include "BKE_main.h"
60 #include "BKE_material.h"
61 #include "BKE_node.h"
62 #include "BKE_utildefines.h"
63
64 /* this module */
65 #include "render_types.h"
66 #include "rendercore.h"
67 #include "renderdatabase.h" 
68 #include "shading.h"
69 #include "sss.h"
70 #include "zbuf.h"
71
72 extern Render R; // meh
73
74 /* Generic Multiple Scattering API */
75
76 /* Relevant papers:
77         [1] A Practical Model for Subsurface Light Transport
78         [2] A Rapid Hierarchical Rendering Technique for Translucent Materials
79         [3] Efficient Rendering of Local Subsurface Scattering
80         [4] Implementing a skin BSSRDF (or several...)
81 */
82
83 /* Defines */
84
85 #define RD_TABLE_RANGE          100.0f
86 #define RD_TABLE_RANGE_2        10000.0f
87 #define RD_TABLE_SIZE           10000
88
89 #define MAX_OCTREE_NODE_POINTS  8
90 #define MAX_OCTREE_DEPTH                15
91
92 /* Struct Definitions */
93
94 struct ScatterSettings {
95         float eta;              /* index of refraction */
96         float sigma_a;  /* absorption coefficient */
97         float sigma_s_; /* reduced scattering coefficient */
98         float sigma_t_; /* reduced extinction coefficient */
99         float sigma;    /* effective extinction coefficient */
100         float Fdr;              /* diffuse fresnel reflectance */
101         float D;                /* diffusion constant */
102         float A;
103         float alpha_;   /* reduced albedo */
104         float zr;               /* distance of virtual lightsource above surface */
105         float zv;               /* distance of virtual lightsource below surface */
106         float ld;               /* mean free path */
107         float ro;               /* diffuse reflectance */
108         float color;
109         float invsigma_t_;
110         float frontweight;
111         float backweight;
112
113         float *tableRd;  /* lookup table to avoid computing Rd */
114         float *tableRd2; /* lookup table to avoid computing Rd for bigger values */
115 };
116
117 typedef struct ScatterPoint {
118         float co[3];
119         float rad[3];
120         float area;
121         int back;
122 } ScatterPoint;
123
124 typedef struct ScatterNode {
125         float co[3];
126         float rad[3];
127         float backrad[3];
128         float area, backarea;
129
130         int totpoint;
131         ScatterPoint *points;
132
133         float split[3];
134         struct ScatterNode *child[8];
135 } ScatterNode;
136
137 struct ScatterTree {
138         MemArena *arena;
139
140         ScatterSettings *ss[3];
141         float error, scale;
142
143         ScatterNode *root;
144         ScatterPoint *points;
145         ScatterPoint **refpoints;
146         ScatterPoint **tmppoints;
147         int totpoint;
148         float min[3], max[3];
149 };
150
151 typedef struct ScatterResult {
152         float rad[3];
153         float backrad[3];
154         float rdsum[3];
155         float backrdsum[3];
156 } ScatterResult;
157
158 /* Functions for BSSRDF reparametrization in to more intuitive parameters,
159    see [2] section 4 for more info. */
160
161 static float f_Rd(float alpha_, float A, float ro)
162 {
163         float sq;
164
165         sq= sqrt(3.0f*(1.0f - alpha_));
166         return (alpha_/2.0f)*(1.0f + exp((-4.0f/3.0f)*A*sq))*exp(-sq) - ro;
167 }
168
169 static float compute_reduced_albedo(ScatterSettings *ss)
170 {
171         const float tolerance= 1e-8;
172         const int max_iteration_count= 20;
173         float d, fsub, xn_1= 0.0f , xn= 1.0f, fxn, fxn_1;
174         int i;
175
176         /* use secant method to compute reduced albedo using Rd function inverse
177            with a given reflectance */
178         fxn= f_Rd(xn, ss->A, ss->ro);
179         fxn_1= f_Rd(xn_1, ss->A, ss->ro);
180
181         for(i= 0; i < max_iteration_count; i++) {
182                 fsub= (fxn - fxn_1);
183                 if(fabs(fsub) < tolerance)
184                         break;
185                 d= ((xn - xn_1)/fsub)*fxn;
186                 if(fabs(d) < tolerance)
187                         break;
188
189                 xn_1= xn;
190                 fxn_1= fxn;
191                 xn= xn - d;
192
193                 if(xn > 1.0f) xn= 1.0f;
194                 if(xn_1 > 1.0f) xn_1= 1.0f;
195                 
196                 fxn= f_Rd(xn, ss->A, ss->ro);
197     }
198
199         /* avoid division by zero later */
200         if(xn <= 0.0f)
201                 xn= 0.00001f;
202
203     return xn;
204 }
205
206 /* Exponential falloff functions */
207
208 static float Rd_rsquare(ScatterSettings *ss, float rr)
209 {
210         float sr, sv, Rdr, Rdv;
211
212         sr= sqrt(rr + ss->zr*ss->zr);
213         sv= sqrt(rr + ss->zv*ss->zv);
214
215         Rdr= ss->zr*(1.0f + ss->sigma*sr)*exp(-ss->sigma*sr)/(sr*sr*sr);
216         Rdv= ss->zv*(1.0f + ss->sigma*sv)*exp(-ss->sigma*sv)/(sv*sv*sv);
217
218         return /*ss->alpha_*/(1.0f/(4.0f*M_PI))*(Rdr + Rdv);
219 }
220
221 static float Rd(ScatterSettings *ss, float r)
222 {
223         return Rd_rsquare(ss, r*r);
224 }
225
226 /* table lookups for Rd. this avoids expensive exp calls. we use two
227    separate tables as well for lower and higher numbers to improve
228    precision, since the number are poorly distributed because we do
229    a lookup with the squared distance for smaller distances, saving
230    another sqrt. */
231
232 static void approximate_Rd_rgb(ScatterSettings **ss, float rr, float *rd)
233 {
234         float indexf, t, idxf;
235         int index;
236
237         if(rr > (RD_TABLE_RANGE_2*RD_TABLE_RANGE_2));
238         else if(rr > RD_TABLE_RANGE) {
239                 rr= sqrt(rr);
240                 indexf= rr*(RD_TABLE_SIZE/RD_TABLE_RANGE_2);
241                 index= (int)indexf;
242                 idxf= (float)index;
243                 t= indexf - idxf;
244
245                 if(index >= 0 && index < RD_TABLE_SIZE) {
246                         rd[0]= (ss[0]->tableRd2[index]*(1-t) + ss[0]->tableRd2[index+1]*t);
247                         rd[1]= (ss[1]->tableRd2[index]*(1-t) + ss[1]->tableRd2[index+1]*t);
248                         rd[2]= (ss[2]->tableRd2[index]*(1-t) + ss[2]->tableRd2[index+1]*t);
249                         return;
250                 }
251         }
252         else {
253                 indexf= rr*(RD_TABLE_SIZE/RD_TABLE_RANGE);
254                 index= (int)indexf;
255                 idxf= (float)index;
256                 t= indexf - idxf;
257
258                 if(index >= 0 && index < RD_TABLE_SIZE) {
259                         rd[0]= (ss[0]->tableRd[index]*(1-t) + ss[0]->tableRd[index+1]*t);
260                         rd[1]= (ss[1]->tableRd[index]*(1-t) + ss[1]->tableRd[index+1]*t);
261                         rd[2]= (ss[2]->tableRd[index]*(1-t) + ss[2]->tableRd[index+1]*t);
262                         return;
263                 }
264         }
265
266         /* fallback to slow Rd computation */
267         rd[0]= Rd_rsquare(ss[0], rr);
268         rd[1]= Rd_rsquare(ss[1], rr);
269         rd[2]= Rd_rsquare(ss[2], rr);
270 }
271
272 static void build_Rd_table(ScatterSettings *ss)
273 {
274         float r;
275         int i, size = RD_TABLE_SIZE+1;
276
277         ss->tableRd= MEM_mallocN(sizeof(float)*size, "scatterTableRd");
278         ss->tableRd2= MEM_mallocN(sizeof(float)*size, "scatterTableRd");
279
280         for(i= 0; i < size; i++) {
281                 r= i*(RD_TABLE_RANGE/RD_TABLE_SIZE);
282                 /*if(r < ss->invsigma_t_*ss->invsigma_t_)
283                         r= ss->invsigma_t_*ss->invsigma_t_;*/
284                 ss->tableRd[i]= Rd(ss, sqrt(r));
285
286                 r= i*(RD_TABLE_RANGE_2/RD_TABLE_SIZE);
287                 /*if(r < ss->invsigma_t_)
288                         r= ss->invsigma_t_;*/
289                 ss->tableRd2[i]= Rd(ss, r);
290         }
291 }
292
293 ScatterSettings *scatter_settings_new(float refl, float radius, float ior, float reflfac, float frontweight, float backweight)
294 {
295         ScatterSettings *ss;
296         
297         ss= MEM_callocN(sizeof(ScatterSettings), "ScatterSettings");
298
299         /* see [1] and [3] for these formulas */
300         ss->eta= ior;
301         ss->Fdr= -1.440f/ior*ior + 0.710f/ior + 0.668f + 0.0636f*ior;
302         ss->A= (1.0f + ss->Fdr)/(1.0f - ss->Fdr);
303         ss->ld= radius;
304         ss->ro= MIN2(refl, 0.999f);
305         ss->color= ss->ro*reflfac + (1.0f-reflfac);
306
307         ss->alpha_= compute_reduced_albedo(ss);
308
309         ss->sigma= 1.0f/ss->ld;
310         ss->sigma_t_= ss->sigma/sqrt(3.0f*(1.0f - ss->alpha_));
311         ss->sigma_s_= ss->alpha_*ss->sigma_t_;
312         ss->sigma_a= ss->sigma_t_ - ss->sigma_s_;
313
314         ss->D= 1.0f/(3.0f*ss->sigma_t_);
315
316         ss->zr= 1.0f/ss->sigma_t_;
317         ss->zv= ss->zr + 4.0f*ss->A*ss->D;
318
319         ss->invsigma_t_= 1.0f/ss->sigma_t_;
320
321         ss->frontweight= frontweight;
322         ss->backweight= backweight;
323
324         /* precompute a table of Rd values for quick lookup */
325         build_Rd_table(ss);
326
327         return ss;
328 }
329
330 void scatter_settings_free(ScatterSettings *ss)
331 {
332         MEM_freeN(ss->tableRd);
333         MEM_freeN(ss->tableRd2);
334         MEM_freeN(ss);
335 }
336
337 /* Hierarchical method as in [2]. */
338
339 /* traversal */
340
341 #define SUBNODE_INDEX(co, split) \
342         ((co[0]>=split[0]) + (co[1]>=split[1])*2 + (co[2]>=split[2])*4)
343         
344 static void add_radiance(ScatterTree *tree, float *frontrad, float *backrad, float area, float backarea, float rr, ScatterResult *result)
345 {
346         float rd[3], frontrd[3], backrd[3];
347
348         approximate_Rd_rgb(tree->ss, rr, rd);
349
350         if(frontrad && area) {
351                 frontrd[0] = rd[0]*area;
352                 frontrd[1] = rd[1]*area;
353                 frontrd[2] = rd[2]*area;
354
355                 result->rad[0] += frontrad[0]*frontrd[0];
356                 result->rad[1] += frontrad[1]*frontrd[1];
357                 result->rad[2] += frontrad[2]*frontrd[2];
358
359                 result->rdsum[0] += frontrd[0];
360                 result->rdsum[1] += frontrd[1];
361                 result->rdsum[2] += frontrd[2];
362         }
363         if(backrad && backarea) {
364                 backrd[0] = rd[0]*backarea;
365                 backrd[1] = rd[1]*backarea;
366                 backrd[2] = rd[2]*backarea;
367
368                 result->backrad[0] += backrad[0]*backrd[0];
369                 result->backrad[1] += backrad[1]*backrd[1];
370                 result->backrad[2] += backrad[2]*backrd[2];
371
372                 result->backrdsum[0] += backrd[0];
373                 result->backrdsum[1] += backrd[1];
374                 result->backrdsum[2] += backrd[2];
375         }
376 }
377
378 static void traverse_octree(ScatterTree *tree, ScatterNode *node, float *co, int self, ScatterResult *result)
379 {
380         float sub[3], dist;
381         int i, index = 0;
382
383         if(node->totpoint > 0) {
384                 /* leaf - add radiance from all samples */
385                 for(i=0; i<node->totpoint; i++) {
386                         ScatterPoint *p= &node->points[i];
387
388                         VECSUB(sub, co, p->co);
389                         dist= INPR(sub, sub);
390
391                         if(p->back)
392                                 add_radiance(tree, NULL, p->rad, 0.0f, p->area, dist, result);
393                         else
394                                 add_radiance(tree, p->rad, NULL, p->area, 0.0f, dist, result);
395                 }
396         }
397         else {
398                 /* branch */
399                 if (self)
400                         index = SUBNODE_INDEX(co, node->split);
401
402                 for(i=0; i<8; i++) {
403                         if(node->child[i]) {
404                                 ScatterNode *subnode= node->child[i];
405
406                                 if(self && index == i) {
407                                         /* always traverse node containing the point */
408                                         traverse_octree(tree, subnode, co, 1, result);
409                                 }
410                                 else {
411                                         /* decide subnode traversal based on maximum solid angle */
412                                         VECSUB(sub, co, subnode->co);
413                                         dist= INPR(sub, sub);
414
415                                         /* actually area/dist > error, but this avoids division */
416                                         if(subnode->area+subnode->backarea>tree->error*dist) {
417                                                 traverse_octree(tree, subnode, co, 0, result);
418                                         }
419                                         else {
420                                                 add_radiance(tree, subnode->rad, subnode->backrad,
421                                                         subnode->area, subnode->backarea, dist, result);
422                                         }
423                                 }
424                         }
425                 }
426         }
427 }
428
429 static void compute_radiance(ScatterTree *tree, float *co, float *rad)
430 {
431         ScatterResult result;
432         float rdsum[3], backrad[3], backrdsum[3];
433
434         memset(&result, 0, sizeof(result));
435
436         traverse_octree(tree, tree->root, co, 1, &result);
437
438         /* the original paper doesn't do this, but we normalize over the
439            sampled area and multiply with the reflectance. this is because
440            our point samples are incomplete, there are no samples on parts
441            of the mesh not visible from the camera. this can not only make
442            it darker, but also lead to ugly color shifts */
443
444         VecMulf(result.rad, tree->ss[0]->frontweight);
445         VecMulf(result.backrad, tree->ss[0]->backweight);
446
447         VECCOPY(rad, result.rad);
448         VECADD(backrad, result.rad, result.backrad);
449
450         VECCOPY(rdsum, result.rdsum);
451         VECADD(backrdsum, result.rdsum, result.backrdsum);
452
453         if(rdsum[0] > 0.0f) rad[0]= tree->ss[0]->color*rad[0]/rdsum[0];
454         if(rdsum[1] > 0.0f) rad[1]= tree->ss[1]->color*rad[1]/rdsum[1];
455         if(rdsum[2] > 0.0f) rad[2]= tree->ss[2]->color*rad[2]/rdsum[2];
456
457         if(backrdsum[0] > 0.0f) backrad[0]= tree->ss[0]->color*backrad[0]/backrdsum[0];
458         if(backrdsum[1] > 0.0f) backrad[1]= tree->ss[1]->color*backrad[1]/backrdsum[1];
459         if(backrdsum[2] > 0.0f) backrad[2]= tree->ss[2]->color*backrad[2]/backrdsum[2];
460
461         rad[0]= MAX2(rad[0], backrad[0]);
462         rad[1]= MAX2(rad[1], backrad[1]);
463         rad[2]= MAX2(rad[2], backrad[2]);
464 }
465
466 /* building */
467
468 static void sum_leaf_radiance(ScatterTree *tree, ScatterNode *node)
469 {
470         ScatterPoint *p;
471         float rad, totrad= 0.0f, inv;
472         int i;
473
474         node->co[0]= node->co[1]= node->co[2]= 0.0;
475         node->rad[0]= node->rad[1]= node->rad[2]= 0.0;
476         node->backrad[0]= node->backrad[1]= node->backrad[2]= 0.0;
477
478         /* compute total rad, rad weighted average position,
479            and total area */
480         for(i=0; i<node->totpoint; i++) {
481                 p= &node->points[i];
482
483                 rad= p->area*fabs(p->rad[0] + p->rad[1] + p->rad[2]);
484                 totrad += rad;
485
486                 node->co[0] += rad*p->co[0];
487                 node->co[1] += rad*p->co[1];
488                 node->co[2] += rad*p->co[2];
489
490                 if(p->back) {
491                         node->backrad[0] += p->rad[0]*p->area;
492                         node->backrad[1] += p->rad[1]*p->area;
493                         node->backrad[2] += p->rad[2]*p->area;
494
495                         node->backarea += p->area;
496                 }
497                 else {
498                         node->rad[0] += p->rad[0]*p->area;
499                         node->rad[1] += p->rad[1]*p->area;
500                         node->rad[2] += p->rad[2]*p->area;
501
502                         node->area += p->area;
503                 }
504         }
505
506         if(node->area > 0) {
507                 inv= 1.0/node->area;
508                 node->rad[0] *= inv;
509                 node->rad[1] *= inv;
510                 node->rad[2] *= inv;
511         }
512         if(node->backarea > 0) {
513                 inv= 1.0/node->backarea;
514                 node->backrad[0] *= inv;
515                 node->backrad[1] *= inv;
516                 node->backrad[2] *= inv;
517         }
518
519         if(totrad > 0.0f) {
520                 inv= 1.0/totrad;
521                 node->co[0] *= inv;
522                 node->co[1] *= inv;
523                 node->co[2] *= inv;
524         }
525         else {
526                 /* make sure that if radiance is 0.0f, we still have these points in
527                    the tree at a good position, they count for rdsum too */
528                 for(i=0; i<node->totpoint; i++) {
529                         p= &node->points[i];
530
531                         node->co[0] += p->co[0];
532                         node->co[1] += p->co[1];
533                         node->co[2] += p->co[2];
534                 }
535
536                 node->co[0] /= node->totpoint;
537                 node->co[1] /= node->totpoint;
538                 node->co[2] /= node->totpoint;
539         }
540 }
541
542 static void sum_branch_radiance(ScatterTree *tree, ScatterNode *node)
543 {
544         ScatterNode *subnode;
545         float rad, totrad= 0.0f, inv;
546         int i, totnode;
547
548         node->co[0]= node->co[1]= node->co[2]= 0.0;
549         node->rad[0]= node->rad[1]= node->rad[2]= 0.0;
550         node->backrad[0]= node->backrad[1]= node->backrad[2]= 0.0;
551
552         /* compute total rad, rad weighted average position,
553            and total area */
554         for(i=0; i<8; i++) {
555                 if(node->child[i] == NULL)
556                         continue;
557
558                 subnode= node->child[i];
559
560                 rad= subnode->area*fabs(subnode->rad[0] + subnode->rad[1] + subnode->rad[2]);
561                 rad += subnode->backarea*fabs(subnode->backrad[0] + subnode->backrad[1] + subnode->backrad[2]);
562                 totrad += rad;
563
564                 node->co[0] += rad*subnode->co[0];
565                 node->co[1] += rad*subnode->co[1];
566                 node->co[2] += rad*subnode->co[2];
567
568                 node->rad[0] += subnode->rad[0]*subnode->area;
569                 node->rad[1] += subnode->rad[1]*subnode->area;
570                 node->rad[2] += subnode->rad[2]*subnode->area;
571
572                 node->backrad[0] += subnode->backrad[0]*subnode->backarea;
573                 node->backrad[1] += subnode->backrad[1]*subnode->backarea;
574                 node->backrad[2] += subnode->backrad[2]*subnode->backarea;
575
576                 node->area += subnode->area;
577                 node->backarea += subnode->backarea;
578         }
579
580         if(node->area > 0) {
581                 inv= 1.0/node->area;
582                 node->rad[0] *= inv;
583                 node->rad[1] *= inv;
584                 node->rad[2] *= inv;
585         }
586         if(node->backarea > 0) {
587                 inv= 1.0/node->backarea;
588                 node->backrad[0] *= inv;
589                 node->backrad[1] *= inv;
590                 node->backrad[2] *= inv;
591         }
592
593         if(totrad > 0.0f) {
594                 inv= 1.0/totrad;
595                 node->co[0] *= inv;
596                 node->co[1] *= inv;
597                 node->co[2] *= inv;
598         }
599         else {
600                 /* make sure that if radiance is 0.0f, we still have these points in
601                    the tree at a good position, they count for rdsum too */
602                 totnode= 0;
603
604                 for(i=0; i<8; i++) {
605                         if(node->child[i]) {
606                                 subnode= node->child[i];
607
608                                 node->co[0] += subnode->co[0];
609                                 node->co[1] += subnode->co[1];
610                                 node->co[2] += subnode->co[2];
611
612                                 totnode++;
613                         }
614                 }
615
616                 node->co[0] /= totnode;
617                 node->co[1] /= totnode;
618                 node->co[2] /= totnode;
619         }
620 }
621
622 static void sum_radiance(ScatterTree *tree, ScatterNode *node)
623 {
624         if(node->totpoint > 0) {
625                 sum_leaf_radiance(tree, node);
626         }
627         else {
628                 int i;
629
630                 for(i=0; i<8; i++)
631                         if(node->child[i])
632                                 sum_radiance(tree, node->child[i]);
633
634                 sum_branch_radiance(tree, node);
635         }
636 }
637
638 static void subnode_middle(int i, float *mid, float *subsize, float *submid)
639 {
640         int x= i & 1, y= i & 2, z= i & 4;
641
642         submid[0]= mid[0] + ((x)? subsize[0]: -subsize[0]);
643         submid[1]= mid[1] + ((y)? subsize[1]: -subsize[1]);
644         submid[2]= mid[2] + ((z)? subsize[2]: -subsize[2]);
645 }
646
647 static void create_octree_node(ScatterTree *tree, ScatterNode *node, float *mid, float *size, ScatterPoint **refpoints, int depth)
648 {
649         ScatterNode *subnode;
650         ScatterPoint **subrefpoints, **tmppoints= tree->tmppoints;
651         int index, nsize[8], noffset[8], i, subco, usednodes, usedi;
652         float submid[3], subsize[3];
653
654         /* stopping condition */
655         if(node->totpoint <= MAX_OCTREE_NODE_POINTS || depth == MAX_OCTREE_DEPTH) {
656                 for(i=0; i<node->totpoint; i++)
657                         node->points[i]= *(refpoints[i]);
658
659                 return;
660         }
661
662         subsize[0]= size[0]*0.5;
663         subsize[1]= size[1]*0.5;
664         subsize[2]= size[2]*0.5;
665
666         node->split[0]= mid[0];
667         node->split[1]= mid[1];
668         node->split[2]= mid[2];
669
670         memset(nsize, 0, sizeof(nsize));
671         memset(noffset, 0, sizeof(noffset));
672
673         /* count points in subnodes */
674         for(i=0; i<node->totpoint; i++) {
675                 index= SUBNODE_INDEX(refpoints[i]->co, node->split);
676                 tmppoints[i]= refpoints[i];
677                 nsize[index]++;
678         }
679
680         /* here we check if only one subnode is used. if this is the case, we don't
681            create a new node, but rather call this function again, with different 
682            size and middle position for the same node. */
683         for(usedi=0, usednodes=0, i=0; i<8; i++) {
684                 if(nsize[i]) {
685                         usednodes++;
686                         usedi = i;
687                 }
688                 if(i != 0)
689                         noffset[i]= noffset[i-1]+nsize[i-1];
690         }
691         
692         if(usednodes<=1) {
693                 subnode_middle(usedi, mid, subsize, submid);
694                 create_octree_node(tree, node, submid, subsize, refpoints, depth+1);
695                 return;
696         }
697
698         /* reorder refpoints by subnode */
699         for(i=0; i<node->totpoint; i++) {
700                 index= SUBNODE_INDEX(tmppoints[i]->co, node->split);
701                 refpoints[noffset[index]]= tmppoints[i];
702                 noffset[index]++;
703         }
704
705         /* create subnodes */
706         for(subco=0, i=0; i<8; subco+=nsize[i], i++) {
707                 if(nsize[i] > 0) {
708                         subnode= BLI_memarena_alloc(tree->arena, sizeof(ScatterNode));
709                         node->child[i]= subnode;
710                         subnode->points= node->points + subco;
711                         subnode->totpoint= nsize[i];
712                         subrefpoints= refpoints + subco;
713
714                         subnode_middle(i, mid, subsize, submid);
715
716                         create_octree_node(tree, subnode, submid, subsize, subrefpoints,
717                                 depth+1);
718                 }
719                 else
720                         node->child[i]= NULL;
721         }
722
723         node->points= NULL;
724         node->totpoint= 0;
725 }
726
727 /* public functions */
728
729 ScatterTree *scatter_tree_new(ScatterSettings *ss[3], float scale, float error,
730         float (*co)[3], float (*color)[3], float *area, int totpoint)
731 {
732         ScatterTree *tree;
733         ScatterPoint *points, **refpoints;
734         int i;
735
736         /* allocate tree */
737         tree= MEM_callocN(sizeof(ScatterTree), "ScatterTree");
738         tree->scale= scale;
739         tree->error= error;
740         tree->totpoint= totpoint;
741
742         tree->ss[0]= ss[0];
743         tree->ss[1]= ss[1];
744         tree->ss[2]= ss[2];
745
746         points= MEM_callocN(sizeof(ScatterPoint)*totpoint, "ScatterPoints");
747         refpoints= MEM_callocN(sizeof(ScatterPoint*)*totpoint, "ScatterRefPoints");
748
749         tree->points= points;
750         tree->refpoints= refpoints;
751
752         /* build points */
753         INIT_MINMAX(tree->min, tree->max);
754
755         for(i=0; i<totpoint; i++) {
756                 VECCOPY(points[i].co, co[i]);
757                 VECCOPY(points[i].rad, color[i]);
758                 points[i].area= fabs(area[i])/(tree->scale*tree->scale);
759                 points[i].back= (area[i] < 0.0f);
760
761                 VecMulf(points[i].co, 1.0f/tree->scale);
762                 DO_MINMAX(points[i].co, tree->min, tree->max);
763
764                 refpoints[i]= points + i;
765         }
766
767         return tree;
768 }
769
770 void scatter_tree_build(ScatterTree *tree)
771 {
772         ScatterPoint *newpoints, **tmppoints;
773         float mid[3], size[3];
774         int totpoint= tree->totpoint;
775
776         newpoints= MEM_callocN(sizeof(ScatterPoint)*totpoint, "ScatterPoints");
777         tmppoints= MEM_callocN(sizeof(ScatterPoint*)*totpoint, "ScatterTmpPoints");
778         tree->tmppoints= tmppoints;
779
780         tree->arena= BLI_memarena_new(0x8000 * sizeof(ScatterNode));
781         BLI_memarena_use_calloc(tree->arena);
782
783         /* build tree */
784         tree->root= BLI_memarena_alloc(tree->arena, sizeof(ScatterNode));
785         tree->root->points= newpoints;
786         tree->root->totpoint= totpoint;
787
788         mid[0]= (tree->min[0]+tree->max[0])*0.5;
789         mid[1]= (tree->min[1]+tree->max[1])*0.5;
790         mid[2]= (tree->min[2]+tree->max[2])*0.5;
791
792         size[0]= (tree->max[0]-tree->min[0])*0.5;
793         size[1]= (tree->max[1]-tree->min[1])*0.5;
794         size[2]= (tree->max[2]-tree->min[2])*0.5;
795
796         create_octree_node(tree, tree->root, mid, size, tree->refpoints, 0);
797
798         MEM_freeN(tree->points);
799         MEM_freeN(tree->refpoints);
800         MEM_freeN(tree->tmppoints);
801         tree->refpoints= NULL;
802         tree->tmppoints= NULL;
803         tree->points= newpoints;
804         
805         /* sum radiance at nodes */
806         sum_radiance(tree, tree->root);
807 }
808
809 void scatter_tree_sample(ScatterTree *tree, float *co, float *color)
810 {
811         float sco[3];
812
813         VECCOPY(sco, co);
814         VecMulf(sco, 1.0f/tree->scale);
815
816         compute_radiance(tree, sco, color);
817 }
818
819 void scatter_tree_free(ScatterTree *tree)
820 {
821         if (tree->arena) BLI_memarena_free(tree->arena);
822         if (tree->points) MEM_freeN(tree->points);
823         if (tree->refpoints) MEM_freeN(tree->refpoints);
824                 
825         MEM_freeN(tree);
826 }
827
828 /* Internal Renderer API */
829
830 /* sss tree building */
831
832 typedef struct SSSData {
833         ScatterTree *tree;
834         ScatterSettings *ss[3];
835 } SSSData;
836
837 typedef struct SSSPoints {
838         struct SSSPoints *next, *prev;
839
840         float (*co)[3];
841         float (*color)[3];
842         float *area;
843         int totpoint;
844 } SSSPoints;
845
846 static void sss_create_tree_mat(Render *re, Material *mat)
847 {
848         SSSPoints *p;
849         RenderResult *rr;
850         ListBase layers, points;
851         float (*co)[3] = NULL, (*color)[3] = NULL, *area = NULL;
852         int totpoint = 0, osa, osaflag, partsdone;
853
854         if(re->test_break())
855                 return;
856         
857         points.first= points.last= NULL;
858
859         /* TODO: this is getting a bit ugly, copying all those variables and
860            setting them back, maybe we need to create our own Render? */
861
862         /* do SSS preprocessing render */
863         rr= re->result;
864         layers= rr->layers;
865         osa= re->osa;
866         osaflag= re->r.mode & R_OSA;
867         partsdone= re->i.partsdone;
868
869         rr->layers.first= rr->layers.last= NULL;
870         re->osa= 0;
871         re->r.mode &= ~R_OSA;
872         re->sss_points= &points;
873         re->sss_mat= mat;
874         re->i.partsdone= 0;
875         re->result= NULL;
876
877         RE_TileProcessor(re, 0, !(re->r.mode & R_PREVIEWBUTS));
878         RE_FreeRenderResult(re->result);
879
880         re->result= rr;
881         re->i.partsdone= partsdone;
882         re->sss_mat= NULL;
883         re->sss_points= NULL;
884         rr->layers= layers;
885         re->osa= osa;
886         if (osaflag) re->r.mode |= R_OSA;
887
888         /* no points? no tree */
889         if(!points.first)
890                 return;
891
892         /* merge points together into a single buffer */
893         if(!re->test_break()) {
894                 for(totpoint=0, p=points.first; p; p=p->next)
895                         totpoint += p->totpoint;
896                 
897                 co= MEM_mallocN(sizeof(*co)*totpoint, "SSSCo");
898                 color= MEM_mallocN(sizeof(*color)*totpoint, "SSSColor");
899                 area= MEM_mallocN(sizeof(*area)*totpoint, "SSSArea");
900
901                 for(totpoint=0, p=points.first; p; p=p->next) {
902                         memcpy(co+totpoint, p->co, sizeof(*co)*p->totpoint);
903                         memcpy(color+totpoint, p->color, sizeof(*color)*p->totpoint);
904                         memcpy(area+totpoint, p->area, sizeof(*area)*p->totpoint);
905                         totpoint += p->totpoint;
906                 }
907         }
908
909         /* free points */
910         for(p=points.first; p; p=p->next) {
911                 MEM_freeN(p->co);
912                 MEM_freeN(p->color);
913                 MEM_freeN(p->area);
914         }
915         BLI_freelistN(&points);
916
917         /* build tree */
918         if(!re->test_break()) {
919                 SSSData *sss= MEM_callocN(sizeof(*sss), "SSSData");
920                 float ior= mat->sss_ior, cfac= mat->sss_colfac;
921                 float *col= mat->sss_col, *radius= mat->sss_radius;
922                 float fw= mat->sss_front, bw= mat->sss_back;
923                 float error = mat->sss_error;
924                 
925                 if((re->r.scemode & R_PREVIEWBUTS) && error < 0.5f)
926                         error= 0.5f;
927
928                 sss->ss[0]= scatter_settings_new(col[0], radius[0], ior, cfac, fw, bw);
929                 sss->ss[1]= scatter_settings_new(col[1], radius[1], ior, cfac, fw, bw);
930                 sss->ss[2]= scatter_settings_new(col[2], radius[2], ior, cfac, fw, bw);
931                 sss->tree= scatter_tree_new(sss->ss, mat->sss_scale, error,
932                         co, color, area, totpoint);
933
934                 MEM_freeN(co);
935                 MEM_freeN(color);
936                 MEM_freeN(area);
937
938                 scatter_tree_build(sss->tree);
939
940                 BLI_ghash_insert(re->sss_hash, mat, sss);
941         }
942         else {
943                 if (co) MEM_freeN(co);
944                 if (color) MEM_freeN(color);
945                 if (area) MEM_freeN(area);
946         }
947 }
948
949 void sss_add_points(Render *re, float (*co)[3], float (*color)[3], float *area, int totpoint)
950 {
951         SSSPoints *p;
952         
953         if(totpoint > 0) {
954                 p= MEM_callocN(sizeof(SSSPoints), "SSSPoints");
955
956                 p->co= co;
957                 p->color= color;
958                 p->area= area;
959                 p->totpoint= totpoint;
960
961                 BLI_lock_thread(LOCK_CUSTOM1);
962                 BLI_addtail(re->sss_points, p);
963                 BLI_unlock_thread(LOCK_CUSTOM1);
964         }
965 }
966
967 static void sss_free_tree(SSSData *sss)
968 {
969         scatter_tree_free(sss->tree);
970         scatter_settings_free(sss->ss[0]);
971         scatter_settings_free(sss->ss[1]);
972         scatter_settings_free(sss->ss[2]);
973         MEM_freeN(sss);
974 }
975
976 /* public functions */
977
978 void make_sss_tree(Render *re)
979 {
980         Material *mat;
981         
982         re->sss_hash= BLI_ghash_new(BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp);
983
984         re->i.infostr= "SSS preprocessing";
985         re->stats_draw(&re->i);
986         
987         for(mat= G.main->mat.first; mat; mat= mat->id.next)
988                 if(mat->id.us && (mat->sss_flag & MA_DIFF_SSS))
989                         sss_create_tree_mat(re, mat);
990 }
991
992 void free_sss(Render *re)
993 {
994         if(re->sss_hash) {
995                 GHashIterator *it= BLI_ghashIterator_new(re->sss_hash);
996
997                 while(!BLI_ghashIterator_isDone(it)) {
998                         sss_free_tree(BLI_ghashIterator_getValue(it));
999                         BLI_ghashIterator_step(it);
1000                 }
1001
1002                 BLI_ghashIterator_free(it);
1003                 BLI_ghash_free(re->sss_hash, NULL, NULL);
1004                 re->sss_hash= NULL;
1005         }
1006 }
1007
1008 int sample_sss(Render *re, Material *mat, float *co, float *color)
1009 {
1010         if(re->sss_hash) {
1011                 SSSData *sss= BLI_ghash_lookup(re->sss_hash, mat);
1012
1013                 if(sss) {
1014                         scatter_tree_sample(sss->tree, co, color);
1015                         return 1;
1016                 }
1017                 else {
1018                         color[0]= 0.0f;
1019                         color[1]= 0.0f;
1020                         color[2]= 0.0f;
1021                 }
1022         }
1023
1024         return 0;
1025 }
1026
1027 int has_sss_tree(struct Render *re, struct Material *mat)
1028 {
1029         return (re->sss_hash && BLI_ghash_lookup(re->sss_hash, mat));
1030 }
1031