svn merge -r 21301:21372 https://svn.blender.org/svnroot/bf-blender/branches/blender2...
[blender.git] / source / blender / render / intern / source / rayshade.c
1 /**
2  * $Id$
3  *
4  * ***** BEGIN GPL LICENSE BLOCK *****
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version. 
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software Foundation,
18  * Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19  *
20  * The Original Code is Copyright (C) 1990-1998 NeoGeo BV.
21  * All rights reserved.
22  *
23  * Contributors: 2004/2005 Blender Foundation, full recode
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 #include <math.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <float.h>
32 #include <assert.h>
33
34 #include "MEM_guardedalloc.h"
35
36 #include "DNA_material_types.h"
37 #include "DNA_lamp_types.h"
38
39 #include "BKE_global.h"
40 #include "BKE_node.h"
41 #include "BKE_utildefines.h"
42
43 #include "BLI_arithb.h"
44 #include "BLI_blenlib.h"
45 #include "BLI_jitter.h"
46 #include "BLI_rand.h"
47
48 #include "PIL_time.h"
49
50 #include "render_types.h"
51 #include "renderpipeline.h"
52 #include "rendercore.h"
53 #include "renderdatabase.h"
54 #include "pixelblending.h"
55 #include "pixelshading.h"
56 #include "shading.h"
57 #include "texture.h"
58
59 #include "RE_raytrace.h"
60 #include "rayobject.h"
61
62 #define RAY_TRA         1
63 #define RAY_TRAFLIP     2
64
65 #define DEPTH_SHADOW_TRA  10
66
67 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
68 /* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
69 /* only to be used here in this file, it's for speed */
70 extern struct Render R;
71 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
72
73 #if 0
74 static int vlr_check_intersect(Isect *is, int ob, RayFace *face)
75 {
76         ObjectInstanceRen *obi= RAY_OBJECT_GET((Render*)is->userdata, ob);
77         VlakRen *vlr = (VlakRen*)face;
78
79         /* for baking selected to active non-traceable materials might still
80          * be in the raytree */
81         if(!(vlr->mat->mode & MA_TRACEBLE))
82                 return 0;
83
84         /* I know... cpu cycle waste, might do smarter once */
85         if(is->mode==RE_RAY_MIRROR)
86                 return !(vlr->mat->mode & MA_ONLYCAST);
87         else
88                 return (is->lay & obi->lay);
89 }
90 #endif
91
92 void freeraytree(Render *re)
93 {
94         ObjectInstanceRen *obi;
95         
96         if(re->raytree)
97         {
98                 RE_rayobject_free(re->raytree);
99                 re->raytree = NULL;
100         }
101         if(re->rayfaces)
102         {
103                 MEM_freeN(re->rayfaces);
104                 re->rayfaces = NULL;
105         }
106
107         for(obi=re->instancetable.first; obi; obi=obi->next)
108         {
109                 ObjectRen *obr = obi->obr;
110                 if(obr->raytree)
111                 {
112                         RE_rayobject_free(obr->raytree);
113                         obr->raytree = NULL;
114                 }
115                 if(obr->rayfaces)
116                 {
117                         MEM_freeN(obr->rayfaces);
118                         obr->rayfaces = NULL;
119                 }
120                 if(obi->raytree)
121                 {
122                         RE_rayobject_free(obi->raytree);
123                         obi->raytree = NULL;
124                 }
125         }
126 }
127
128 static int is_raytraceable_vlr(Render *re, VlakRen *vlr)
129 {
130         if((re->flag & R_BAKE_TRACE) || (vlr->mat->mode & MA_TRACEBLE))
131         if((vlr->mat->mode & MA_WIRE)==0)
132                 return 1;
133         return 0;
134 }
135
136 static int is_raytraceable(Render *re, ObjectInstanceRen *obi)
137 {
138         int v;
139         ObjectRen *obr = obi->obr;
140
141         if(re->excludeob && obr->ob == re->excludeob)
142                 return 0;
143
144         for(v=0;v<obr->totvlak;v++)
145         {
146                 VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255);
147                 if(is_raytraceable_vlr(re, vlr))
148                         return 1;
149         }
150         return 0;
151 }
152
153 RayObject* makeraytree_object(Render *re, ObjectInstanceRen *obi)
154 {
155         //TODO
156         // out-of-memory safeproof
157         // break render
158         // update render stats
159         ObjectRen *obr = obi->obr;
160         
161         if(obr->raytree == NULL)
162         {
163                 RayObject *raytree;
164                 RayFace *face;
165                 int v;
166                 
167                 //Count faces
168                 int faces = 0;
169                 for(v=0;v<obr->totvlak;v++)
170                 {
171                         VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255);
172                         if(is_raytraceable_vlr(re, vlr))
173                                 faces++;
174                 }
175                 assert( faces > 0 );
176
177                 //Create Ray cast accelaration structure
178                 
179                 //TODO dynamic ocres
180                 if(re->r.raystructure == R_RAYSTRUCTURE_HIER_BVH_OCTREE)
181                         raytree = obr->raytree = RE_rayobject_octree_create( re->r.ocres, faces );
182                 else //if(re->r.raystructure == R_RAYSTRUCTURE_HIER_BVH_BVH)
183                         raytree = obr->raytree = RE_rayobject_tree_create( faces );
184                         
185                 face = obr->rayfaces = (RayFace*)MEM_callocN(faces*sizeof(RayFace), "ObjectRen faces");
186                 obr->rayobi = obi;
187                 
188                 for(v=0;v<obr->totvlak;v++)
189                 {
190                         VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255);
191                         if(is_raytraceable_vlr(re, vlr))
192                         {
193                                 face->v1 = vlr->v1->co;
194                                 face->v2 = vlr->v2->co;
195                                 face->v3 = vlr->v3->co;
196                                 face->v4 = vlr->v4 ? vlr->v4->co : NULL;
197                                 
198                                 face->ob   = obi;
199                                 face->face = vlr;
200                                 
201                                 RE_rayobject_add( raytree, RayObject_unalignRayFace(face) );
202                                 
203                                 face++;
204                         }
205                 }
206                 RE_rayobject_done( raytree );
207         }
208
209
210         if(obi->flag & R_TRANSFORMED)
211         {
212                 obi->raytree = RE_rayobject_instance_create( obr->raytree, obi->mat, obi, obi->obr->rayobi );
213         }
214         
215         if(obi->raytree) return obi->raytree;
216         return obi->obr->raytree;
217 }
218
219 /*
220  * create an hierarchic raytrace structure with all objects
221  *
222  * R_TRANSFORMED objects instances reuse the same tree by using the rayobject_instance
223  */
224 static void makeraytree_hier(Render *re)
225 {
226         //TODO
227         // out-of-memory safeproof
228         // break render
229         // update render stats
230
231         ObjectInstanceRen *obi;
232         int num_objects = 0;
233
234         re->i.infostr="Creating raytrace structure";
235         re->stats_draw(re->sdh, &re->i);
236
237         //Count number of objects
238         for(obi=re->instancetable.first; obi; obi=obi->next)
239         if(is_raytraceable(re, obi))
240                 num_objects++;
241
242         //Create raytree
243         re->raytree = RE_rayobject_tree_create( num_objects );
244         
245         for(obi=re->instancetable.first; obi; obi=obi->next)
246         if(is_raytraceable(re, obi))
247         {
248                 RayObject *obj = makeraytree_object(re, obi);
249                 RE_rayobject_add( re->raytree, obj );
250
251                 if(re->test_break(re->tbh))
252                         break;
253         }
254
255         if(!re->test_break(re->tbh))
256         {
257                 RE_rayobject_done( re->raytree );
258         }
259
260         re->i.infostr= NULL;
261         re->stats_draw(re->sdh, &re->i);
262 }
263
264 /*
265  * create a single raytrace structure with all faces
266  */
267 static void makeraytree_single(Render *re)
268 {
269         ObjectInstanceRen *obi;
270         RayObject *raytree;
271         RayFace *face;
272         int faces = 0, obs = 0;
273
274         for(obi=re->instancetable.first; obi; obi=obi->next)
275         if(is_raytraceable(re, obi))
276         {
277                 int v;
278                 ObjectRen *obr = obi->obr;
279                 obs++;
280                 
281                 assert((obi->flag & R_TRANSFORMED) == 0); //Not suported
282         
283                 for(v=0;v<obr->totvlak;v++)
284                 {
285                         VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255);
286                         if(is_raytraceable_vlr(re, vlr))
287                                 faces++;
288                 }
289         }
290         
291         //Create raytree
292         if(re->r.raystructure == R_RAYSTRUCTURE_SINGLE_OCTREE)
293                 raytree = re->raytree = RE_rayobject_octree_create( re->r.ocres, faces );
294         else //if(re->r.raystructure == R_RAYSTRUCTURE_SINGLE_BVH)
295                 raytree = re->raytree = RE_rayobject_tree_create( faces );
296
297         face    = re->rayfaces  = (RayFace*)MEM_callocN(faces*sizeof(RayFace), "Render ray faces");
298         
299         for(obi=re->instancetable.first; obi; obi=obi->next)
300         if(is_raytraceable(re, obi))
301         {
302                 int v;
303                 ObjectRen *obr = obi->obr;
304
305                 for(v=0;v<obr->totvlak;v++)
306                 {
307                         VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255);
308                         face->v1 = vlr->v1->co;
309                         face->v2 = vlr->v2->co;
310                         face->v3 = vlr->v3->co;
311                         face->v4 = vlr->v4 ? vlr->v4->co : NULL;
312                         
313                         face->ob   = obi;
314                         face->face = vlr;
315                         
316                         RE_rayobject_add( raytree, RayObject_unalignRayFace(face) );
317                         face++;
318                 }
319         }
320         RE_rayobject_done( raytree );   
321 }
322
323 void makeraytree(Render *re)
324 {
325         if(ELEM(re->r.raystructure, R_RAYSTRUCTURE_SINGLE_BVH, R_RAYSTRUCTURE_SINGLE_OCTREE))
326                 BENCH(makeraytree_single(re), tree_build);
327         else
328                 BENCH(makeraytree_hier(re), tree_build);
329 }
330
331
332
333 static void shade_ray(Isect *is, ShadeInput *shi, ShadeResult *shr)
334 {
335         ObjectInstanceRen *obi= (ObjectInstanceRen*)is->hit.ob;
336         VlakRen *vlr= (VlakRen*)is->hit.face;
337         int osatex= 0;
338         
339         /* set up view vector */
340         VECCOPY(shi->view, is->vec);
341
342         /* render co */
343         shi->co[0]= is->start[0]+is->labda*(shi->view[0]);
344         shi->co[1]= is->start[1]+is->labda*(shi->view[1]);
345         shi->co[2]= is->start[2]+is->labda*(shi->view[2]);
346         
347         Normalize(shi->view);
348
349         shi->obi= obi;
350         shi->obr= obi->obr;
351         shi->vlr= vlr;
352         shi->mat= vlr->mat;
353         memcpy(&shi->r, &shi->mat->r, 23*sizeof(float));        // note, keep this synced with render_types.h
354         shi->har= shi->mat->har;
355         
356         // Osa structs we leave unchanged now
357         SWAP(int, osatex, shi->osatex);
358         
359         shi->dxco[0]= shi->dxco[1]= shi->dxco[2]= 0.0f;
360         shi->dyco[0]= shi->dyco[1]= shi->dyco[2]= 0.0f;
361         
362         // but, set Osa stuff to zero where it can confuse texture code
363         if(shi->mat->texco & (TEXCO_NORM|TEXCO_REFL) ) {
364                 shi->dxno[0]= shi->dxno[1]= shi->dxno[2]= 0.0f;
365                 shi->dyno[0]= shi->dyno[1]= shi->dyno[2]= 0.0f;
366         }
367
368         if(vlr->v4) {
369                 if(is->isect==2) 
370                         shade_input_set_triangle_i(shi, obi, vlr, 2, 1, 3);
371                 else
372                         shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 3);
373         }
374         else {
375                 shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
376         }
377
378         shi->u= is->u;
379         shi->v= is->v;
380         shi->dx_u= shi->dx_v= shi->dy_u= shi->dy_v=  0.0f;
381
382         shade_input_set_normals(shi);
383
384         /* point normals to viewing direction */
385         if(INPR(shi->facenor, shi->view) < 0.0f)
386                 shade_input_flip_normals(shi);
387
388         shade_input_set_shade_texco(shi);
389         
390         if(is->mode==RE_RAY_SHADOW_TRA) {
391                 /* temp hack to prevent recursion */
392                 if(shi->nodes==0 && shi->mat->nodetree && shi->mat->use_nodes) {
393                         ntreeShaderExecTree(shi->mat->nodetree, shi, shr);
394                         shi->mat= vlr->mat;             /* shi->mat is being set in nodetree */
395                 }
396                 else
397                         shade_color(shi, shr);
398         }
399         else {
400                 if(shi->mat->nodetree && shi->mat->use_nodes) {
401                         ntreeShaderExecTree(shi->mat->nodetree, shi, shr);
402                         shi->mat= vlr->mat;             /* shi->mat is being set in nodetree */
403                 }
404                 else
405                         shade_material_loop(shi, shr);
406                 
407                 /* raytrace likes to separate the spec color */
408                 VECSUB(shr->diff, shr->combined, shr->spec);
409         }       
410         
411         SWAP(int, osatex, shi->osatex);  // XXXXX!!!!
412
413 }
414
415 static int refraction(float *refract, float *n, float *view, float index)
416 {
417         float dot, fac;
418
419         VECCOPY(refract, view);
420         
421         dot= view[0]*n[0] + view[1]*n[1] + view[2]*n[2];
422
423         if(dot>0.0f) {
424                 index = 1.0f/index;
425                 fac= 1.0f - (1.0f - dot*dot)*index*index;
426                 if(fac<= 0.0f) return 0;
427                 fac= -dot*index + sqrt(fac);
428         }
429         else {
430                 fac= 1.0f - (1.0f - dot*dot)*index*index;
431                 if(fac<= 0.0f) return 0;
432                 fac= -dot*index - sqrt(fac);
433         }
434
435         refract[0]= index*view[0] + fac*n[0];
436         refract[1]= index*view[1] + fac*n[1];
437         refract[2]= index*view[2] + fac*n[2];
438
439         return 1;
440 }
441
442 /* orn = original face normal */
443 static void reflection(float *ref, float *n, float *view, float *orn)
444 {
445         float f1;
446         
447         f1= -2.0f*(n[0]*view[0]+ n[1]*view[1]+ n[2]*view[2]);
448         
449         ref[0]= (view[0]+f1*n[0]);
450         ref[1]= (view[1]+f1*n[1]);
451         ref[2]= (view[2]+f1*n[2]);
452
453         if(orn) {
454                 /* test phong normals, then we should prevent vector going to the back */
455                 f1= ref[0]*orn[0]+ ref[1]*orn[1]+ ref[2]*orn[2];
456                 if(f1>0.0f) {
457                         f1+= .01f;
458                         ref[0]-= f1*orn[0];
459                         ref[1]-= f1*orn[1];
460                         ref[2]-= f1*orn[2];
461                 }
462         }
463 }
464
465 #if 0
466 static void color_combine(float *result, float fac1, float fac2, float *col1, float *col2)
467 {
468         float col1t[3], col2t[3];
469         
470         col1t[0]= sqrt(col1[0]);
471         col1t[1]= sqrt(col1[1]);
472         col1t[2]= sqrt(col1[2]);
473         col2t[0]= sqrt(col2[0]);
474         col2t[1]= sqrt(col2[1]);
475         col2t[2]= sqrt(col2[2]);
476
477         result[0]= (fac1*col1t[0] + fac2*col2t[0]);
478         result[0]*= result[0];
479         result[1]= (fac1*col1t[1] + fac2*col2t[1]);
480         result[1]*= result[1];
481         result[2]= (fac1*col1t[2] + fac2*col2t[2]);
482         result[2]*= result[2];
483 }
484 #endif
485
486 static float shade_by_transmission(Isect *is, ShadeInput *shi, ShadeResult *shr)
487 {
488         float dx, dy, dz, d, p;
489
490         if (0 == (shi->mat->mode & (MA_RAYTRANSP|MA_ZTRA)))
491                 return -1;
492            
493         if (shi->mat->tx_limit <= 0.0f) {
494                 d= 1.0f;
495         } 
496         else {
497                 /* shi.co[] calculated by shade_ray() */
498                 dx= shi->co[0] - is->start[0];
499                 dy= shi->co[1] - is->start[1];
500                 dz= shi->co[2] - is->start[2];
501                 d= sqrt(dx*dx+dy*dy+dz*dz);
502                 if (d > shi->mat->tx_limit)
503                         d= shi->mat->tx_limit;
504
505                 p = shi->mat->tx_falloff;
506                 if(p < 0.0f) p= 0.0f;
507                 else if (p > 10.0f) p= 10.0f;
508
509                 shr->alpha *= pow(d, p);
510                 if (shr->alpha > 1.0f)
511                         shr->alpha= 1.0f;
512         }
513
514         return d;
515 }
516
517 static void ray_fadeout_endcolor(float *col, ShadeInput *origshi, ShadeInput *shi, ShadeResult *shr, Isect *isec, float *vec)
518 {
519         /* un-intersected rays get either rendered material color or sky color */
520         if (origshi->mat->fadeto_mir == MA_RAYMIR_FADETOMAT) {
521                 VECCOPY(col, shr->combined);
522         } else if (origshi->mat->fadeto_mir == MA_RAYMIR_FADETOSKY) {
523                 VECCOPY(shi->view, vec);
524                 Normalize(shi->view);
525                 
526                 shadeSkyView(col, isec->start, shi->view, NULL, shi->thread);
527                 shadeSunView(col, shi->view);
528         }
529 }
530
531 static void ray_fadeout(Isect *is, ShadeInput *shi, float *col, float *blendcol, float dist_mir)
532 {
533         /* if fading out, linear blend against fade color */
534         float blendfac;
535
536         blendfac = 1.0 - VecLenf(shi->co, is->start)/dist_mir;
537         
538         col[0] = col[0]*blendfac + (1.0 - blendfac)*blendcol[0];
539         col[1] = col[1]*blendfac + (1.0 - blendfac)*blendcol[1];
540         col[2] = col[2]*blendfac + (1.0 - blendfac)*blendcol[2];
541 }
542
543 /* the main recursive tracer itself */
544 static void traceray(ShadeInput *origshi, ShadeResult *origshr, short depth, float *start, float *vec, float *col, ObjectInstanceRen *obi, VlakRen *vlr, int traflag)
545 {
546         ShadeInput shi;
547         ShadeResult shr;
548         Isect isec;
549         float f, f1, fr, fg, fb;
550         float ref[3];
551         float dist_mir = origshi->mat->dist_mir;
552
553         /* Warning, This is not that nice, and possibly a bit slow for every ray,
554         however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */
555         memset(&shi, 0, sizeof(ShadeInput)); 
556         /* end warning! - Campbell */
557         
558         VECCOPY(isec.start, start);
559         VECCOPY(isec.vec, vec );
560         isec.labda = dist_mir > 0 ? dist_mir : RE_RAYTRACE_MAXDIST;
561         isec.mode= RE_RAY_MIRROR;
562         isec.skip = RE_SKIP_VLR_NEIGHBOUR;
563
564         isec.orig.ob   = obi;
565         isec.orig.face = vlr;
566
567         if(RE_rayobject_raycast(R.raytree, &isec)) {
568                 float d= 1.0f;
569                 
570                 shi.mask= origshi->mask;
571                 shi.osatex= origshi->osatex;
572                 shi.depth= 1;                                   /* only used to indicate tracing */
573                 shi.thread= origshi->thread;
574                 //shi.sample= 0; // memset above, so dont need this
575                 shi.xs= origshi->xs;
576                 shi.ys= origshi->ys;
577                 shi.lay= origshi->lay;
578                 shi.passflag= SCE_PASS_COMBINED; /* result of tracing needs no pass info */
579                 shi.combinedflag= 0xFFFFFF;              /* ray trace does all options */
580                 //shi.do_preview= 0; // memset above, so dont need this
581                 shi.light_override= origshi->light_override;
582                 shi.mat_override= origshi->mat_override;
583                 
584                 memset(&shr, 0, sizeof(ShadeResult));
585                 
586                 shade_ray(&isec, &shi, &shr);
587                 if (traflag & RAY_TRA)
588                         d= shade_by_transmission(&isec, &shi, &shr);
589                 
590                 if(depth>0) {
591
592                         if(shi.mat->mode_l & (MA_RAYTRANSP|MA_ZTRA) && shr.alpha < 1.0f) {
593                                 float nf, f, f1, refract[3], tracol[4];
594                                 
595                                 tracol[0]= shi.r;
596                                 tracol[1]= shi.g;
597                                 tracol[2]= shi.b;
598                                 tracol[3]= col[3];      // we pass on and accumulate alpha
599                                 
600                                 if(shi.mat->mode & MA_RAYTRANSP) {
601                                         /* odd depths: use normal facing viewer, otherwise flip */
602                                         if(traflag & RAY_TRAFLIP) {
603                                                 float norm[3];
604                                                 norm[0]= - shi.vn[0];
605                                                 norm[1]= - shi.vn[1];
606                                                 norm[2]= - shi.vn[2];
607                                                 if (!refraction(refract, norm, shi.view, shi.ang))
608                                                         reflection(refract, norm, shi.view, shi.vn);
609                                         }
610                                         else {
611                                                 if (!refraction(refract, shi.vn, shi.view, shi.ang))
612                                                         reflection(refract, shi.vn, shi.view, shi.vn);
613                                         }
614                                         traflag |= RAY_TRA;
615                                         traceray(origshi, origshr, depth-1, shi.co, refract, tracol, shi.obi, shi.vlr, traflag ^ RAY_TRAFLIP);
616                                 }
617                                 else
618                                         traceray(origshi, origshr, depth-1, shi.co, shi.view, tracol, shi.obi, shi.vlr, 0);
619                                 
620                                 f= shr.alpha; f1= 1.0f-f;
621                                 nf= d * shi.mat->filter;
622                                 fr= 1.0f+ nf*(shi.r-1.0f);
623                                 fg= 1.0f+ nf*(shi.g-1.0f);
624                                 fb= 1.0f+ nf*(shi.b-1.0f);
625                                 shr.diff[0]= f*shr.diff[0] + f1*fr*tracol[0];
626                                 shr.diff[1]= f*shr.diff[1] + f1*fg*tracol[1];
627                                 shr.diff[2]= f*shr.diff[2] + f1*fb*tracol[2];
628                                 
629                                 shr.spec[0] *=f;
630                                 shr.spec[1] *=f;
631                                 shr.spec[2] *=f;
632
633                                 col[3]= f1*tracol[3] + f;
634                         }
635                         else 
636                                 col[3]= 1.0f;
637
638                         if(shi.mat->mode_l & MA_RAYMIRROR) {
639                                 f= shi.ray_mirror;
640                                 if(f!=0.0f) f*= fresnel_fac(shi.view, shi.vn, shi.mat->fresnel_mir_i, shi.mat->fresnel_mir);
641                         }
642                         else f= 0.0f;
643                         
644                         if(f!=0.0f) {
645                                 float mircol[4];
646                                 
647                                 reflection(ref, shi.vn, shi.view, NULL);                        
648                                 traceray(origshi, origshr, depth-1, shi.co, ref, mircol, shi.obi, shi.vlr, 0);
649                         
650                                 f1= 1.0f-f;
651
652                                 /* combine */
653                                 //color_combine(col, f*fr*(1.0f-shr.spec[0]), f1, col, shr.diff);
654                                 //col[0]+= shr.spec[0];
655                                 //col[1]+= shr.spec[1];
656                                 //col[2]+= shr.spec[2];
657                                 
658                                 fr= shi.mirr;
659                                 fg= shi.mirg;
660                                 fb= shi.mirb;
661                 
662                                 col[0]= f*fr*(1.0f-shr.spec[0])*mircol[0] + f1*shr.diff[0] + shr.spec[0];
663                                 col[1]= f*fg*(1.0f-shr.spec[1])*mircol[1] + f1*shr.diff[1] + shr.spec[1];
664                                 col[2]= f*fb*(1.0f-shr.spec[2])*mircol[2] + f1*shr.diff[2] + shr.spec[2];
665                         }
666                         else {
667                                 col[0]= shr.diff[0] + shr.spec[0];
668                                 col[1]= shr.diff[1] + shr.spec[1];
669                                 col[2]= shr.diff[2] + shr.spec[2];
670                         }
671                         
672                         if (dist_mir > 0.0) {
673                                 float blendcol[3];
674                                 
675                                 /* max ray distance set, but found an intersection, so fade this color
676                                  * out towards the sky/material color for a smooth transition */
677                                 ray_fadeout_endcolor(blendcol, origshi, &shi, origshr, &isec, vec);
678                                 ray_fadeout(&isec, &shi, col, blendcol, dist_mir);
679                         }
680                 }
681                 else {
682                         col[0]= shr.diff[0] + shr.spec[0];
683                         col[1]= shr.diff[1] + shr.spec[1];
684                         col[2]= shr.diff[2] + shr.spec[2];
685                 }
686                 
687         }
688         else {
689                 ray_fadeout_endcolor(col, origshi, &shi, origshr, &isec, vec);
690         }
691 }
692
693 /* **************** jitter blocks ********** */
694
695 /* calc distributed planar energy */
696
697 static void DP_energy(float *table, float *vec, int tot, float xsize, float ysize)
698 {
699         int x, y, a;
700         float *fp, force[3], result[3];
701         float dx, dy, dist, min;
702         
703         min= MIN2(xsize, ysize);
704         min*= min;
705         result[0]= result[1]= 0.0f;
706         
707         for(y= -1; y<2; y++) {
708                 dy= ysize*y;
709                 for(x= -1; x<2; x++) {
710                         dx= xsize*x;
711                         fp= table;
712                         for(a=0; a<tot; a++, fp+= 2) {
713                                 force[0]= vec[0] - fp[0]-dx;
714                                 force[1]= vec[1] - fp[1]-dy;
715                                 dist= force[0]*force[0] + force[1]*force[1];
716                                 if(dist < min && dist>0.0f) {
717                                         result[0]+= force[0]/dist;
718                                         result[1]+= force[1]/dist;
719                                 }
720                         }
721                 }
722         }
723         vec[0] += 0.1*min*result[0]/(float)tot;
724         vec[1] += 0.1*min*result[1]/(float)tot;
725         // cyclic clamping
726         vec[0]= vec[0] - xsize*floor(vec[0]/xsize + 0.5);
727         vec[1]= vec[1] - ysize*floor(vec[1]/ysize + 0.5);
728 }
729
730 // random offset of 1 in 2
731 static void jitter_plane_offset(float *jitter1, float *jitter2, int tot, float sizex, float sizey, float ofsx, float ofsy)
732 {
733         float dsizex= sizex*ofsx;
734         float dsizey= sizey*ofsy;
735         float hsizex= 0.5*sizex, hsizey= 0.5*sizey;
736         int x;
737         
738         for(x=tot; x>0; x--, jitter1+=2, jitter2+=2) {
739                 jitter2[0]= jitter1[0] + dsizex;
740                 jitter2[1]= jitter1[1] + dsizey;
741                 if(jitter2[0] > hsizex) jitter2[0]-= sizex;
742                 if(jitter2[1] > hsizey) jitter2[1]-= sizey;
743         }
744 }
745
746 /* called from convertBlenderScene.c */
747 /* we do this in advance to get consistant random, not alter the render seed, and be threadsafe */
748 void init_jitter_plane(LampRen *lar)
749 {
750         float *fp;
751         int x, iter=12, tot= lar->ray_totsamp;
752         
753         /* test if already initialized */
754         if(lar->jitter) return;
755         
756         /* at least 4, or max threads+1 tables */
757         if(BLENDER_MAX_THREADS < 4) x= 4;
758         else x= BLENDER_MAX_THREADS+1;
759         fp= lar->jitter= MEM_callocN(x*tot*2*sizeof(float), "lamp jitter tab");
760         
761         /* if 1 sample, we leave table to be zero's */
762         if(tot>1) {
763                 
764                 /* set per-lamp fixed seed */
765                 BLI_srandom(tot);
766                 
767                 /* fill table with random locations, area_size large */
768                 for(x=0; x<tot; x++, fp+=2) {
769                         fp[0]= (BLI_frand()-0.5)*lar->area_size;
770                         fp[1]= (BLI_frand()-0.5)*lar->area_sizey;
771                 }
772                 
773                 while(iter--) {
774                         fp= lar->jitter;
775                         for(x=tot; x>0; x--, fp+=2) {
776                                 DP_energy(lar->jitter, fp, tot, lar->area_size, lar->area_sizey);
777                         }
778                 }
779         }       
780         /* create the dithered tables (could just check lamp type!) */
781         jitter_plane_offset(lar->jitter, lar->jitter+2*tot, tot, lar->area_size, lar->area_sizey, 0.5f, 0.0f);
782         jitter_plane_offset(lar->jitter, lar->jitter+4*tot, tot, lar->area_size, lar->area_sizey, 0.5f, 0.5f);
783         jitter_plane_offset(lar->jitter, lar->jitter+6*tot, tot, lar->area_size, lar->area_sizey, 0.0f, 0.5f);
784 }
785
786 /* table around origin, -0.5*size to 0.5*size */
787 static float *give_jitter_plane(LampRen *lar, int thread, int xs, int ys)
788 {
789         int tot;
790         
791         tot= lar->ray_totsamp;
792                         
793         if(lar->ray_samp_type & LA_SAMP_JITTER) {
794                 /* made it threadsafe */
795                 
796                 if(lar->xold[thread]!=xs || lar->yold[thread]!=ys) {
797                         jitter_plane_offset(lar->jitter, lar->jitter+2*(thread+1)*tot, tot, lar->area_size, lar->area_sizey, BLI_thread_frand(thread), BLI_thread_frand(thread));
798                         lar->xold[thread]= xs; 
799                         lar->yold[thread]= ys;
800                 }
801                 return lar->jitter+2*(thread+1)*tot;
802         }
803         if(lar->ray_samp_type & LA_SAMP_DITHER) {
804                 return lar->jitter + 2*tot*((xs & 1)+2*(ys & 1));
805         }
806         
807         return lar->jitter;
808 }
809
810
811 /* **************** QMC sampling *************** */
812
813 static void halton_sample(double *ht_invprimes, double *ht_nums, double *v)
814 {
815         // incremental halton sequence generator, from:
816         // "Instant Radiosity", Keller A.
817         unsigned int i;
818         
819         for (i = 0; i < 2; i++)
820         {
821                 double r = fabs((1.0 - ht_nums[i]) - 1e-10);
822                 
823                 if (ht_invprimes[i] >= r)
824                 {
825                         double lasth;
826                         double h = ht_invprimes[i];
827                         
828                         do {
829                                 lasth = h;
830                                 h *= ht_invprimes[i];
831                         } while (h >= r);
832                         
833                         ht_nums[i] += ((lasth + h) - 1.0);
834                 }
835                 else
836                         ht_nums[i] += ht_invprimes[i];
837                 
838                 v[i] = (float)ht_nums[i];
839         }
840 }
841
842 /* Generate Hammersley points in [0,1)^2
843  * From Lucille renderer */
844 static void hammersley_create(double *out, int n)
845 {
846         double p, t;
847         int k, kk;
848
849         for (k = 0; k < n; k++) {
850                 t = 0;
851                 for (p = 0.5, kk = k; kk; p *= 0.5, kk >>= 1) {
852                         if (kk & 1) {           /* kk mod 2 = 1         */
853                                 t += p;
854                         }
855                 }
856         
857                 out[2 * k + 0] = (double)k / (double)n;
858                 out[2 * k + 1] = t;
859         }
860 }
861
862 static struct QMCSampler *QMC_initSampler(int type, int tot)
863 {       
864         QMCSampler *qsa = MEM_callocN(sizeof(QMCSampler), "qmc sampler");
865         qsa->samp2d = MEM_callocN(2*sizeof(double)*tot, "qmc sample table");
866
867         qsa->tot = tot;
868         qsa->type = type;
869         
870         if (qsa->type==SAMP_TYPE_HAMMERSLEY) 
871                 hammersley_create(qsa->samp2d, qsa->tot);
872                 
873         return qsa;
874 }
875
876 static void QMC_initPixel(QMCSampler *qsa, int thread)
877 {
878         if (qsa->type==SAMP_TYPE_HAMMERSLEY)
879         {
880                 /* hammersley sequence is fixed, already created in QMCSampler init.
881                  * per pixel, gets a random offset. We create separate offsets per thread, for write-safety */
882                 qsa->offs[thread][0] = 0.5 * BLI_thread_frand(thread);
883                 qsa->offs[thread][1] = 0.5 * BLI_thread_frand(thread);
884         }
885         else {  /* SAMP_TYPE_HALTON */
886                 
887                 /* generate a new randomised halton sequence per pixel
888                  * to alleviate qmc artifacts and make it reproducable 
889                  * between threads/frames */
890                 double ht_invprimes[2], ht_nums[2];
891                 double r[2];
892                 int i;
893         
894                 ht_nums[0] = BLI_thread_frand(thread);
895                 ht_nums[1] = BLI_thread_frand(thread);
896                 ht_invprimes[0] = 0.5;
897                 ht_invprimes[1] = 1.0/3.0;
898                 
899                 for (i=0; i< qsa->tot; i++) {
900                         halton_sample(ht_invprimes, ht_nums, r);
901                         qsa->samp2d[2*i+0] = r[0];
902                         qsa->samp2d[2*i+1] = r[1];
903                 }
904         }
905 }
906
907 static void QMC_freeSampler(QMCSampler *qsa)
908 {
909         MEM_freeN(qsa->samp2d);
910         MEM_freeN(qsa);
911 }
912
913 static void QMC_getSample(double *s, QMCSampler *qsa, int thread, int num)
914 {
915         if (qsa->type == SAMP_TYPE_HAMMERSLEY) {
916                 s[0] = fmod(qsa->samp2d[2*num+0] + qsa->offs[thread][0], 1.0f);
917                 s[1] = fmod(qsa->samp2d[2*num+1] + qsa->offs[thread][1], 1.0f);
918         }
919         else { /* SAMP_TYPE_HALTON */
920                 s[0] = qsa->samp2d[2*num+0];
921                 s[1] = qsa->samp2d[2*num+1];
922         }
923 }
924
925 /* phong weighted disc using 'blur' for exponent, centred on 0,0 */
926 static void QMC_samplePhong(float *vec, QMCSampler *qsa, int thread, int num, float blur)
927 {
928         double s[2];
929         float phi, pz, sqr;
930         
931         QMC_getSample(s, qsa, thread, num);
932
933         phi = s[0]*2*M_PI;
934         pz = pow(s[1], blur);
935         sqr = sqrt(1.0f-pz*pz);
936
937         vec[0] = cos(phi)*sqr;
938         vec[1] = sin(phi)*sqr;
939         vec[2] = 0.0f;
940 }
941
942 /* rect of edge lengths sizex, sizey, centred on 0.0,0.0 i.e. ranging from -sizex/2 to +sizey/2 */
943 static void QMC_sampleRect(float *vec, QMCSampler *qsa, int thread, int num, float sizex, float sizey)
944 {
945         double s[2];
946
947         QMC_getSample(s, qsa, thread, num);
948                 
949         vec[0] = (s[0] - 0.5) * sizex;
950         vec[1] = (s[1] - 0.5) * sizey;
951         vec[2] = 0.0f;
952 }
953
954 /* disc of radius 'radius', centred on 0,0 */
955 static void QMC_sampleDisc(float *vec, QMCSampler *qsa, int thread, int num, float radius)
956 {
957         double s[2];
958         float phi, sqr;
959         
960         QMC_getSample(s, qsa, thread, num);
961         
962         phi = s[0]*2*M_PI;
963         sqr = sqrt(s[1]);
964
965         vec[0] = cos(phi)*sqr* radius/2.0;
966         vec[1] = sin(phi)*sqr* radius/2.0;
967         vec[2] = 0.0f;
968 }
969
970 /* uniform hemisphere sampling */
971 static void QMC_sampleHemi(float *vec, QMCSampler *qsa, int thread, int num)
972 {
973         double s[2];
974         float phi, sqr;
975         
976         QMC_getSample(s, qsa, thread, num);
977         
978         phi = s[0]*2.f*M_PI;    
979         sqr = sqrt(s[1]);
980
981         vec[0] = cos(phi)*sqr;
982         vec[1] = sin(phi)*sqr;
983         vec[2] = 1.f - s[1]*s[1];
984 }
985
986 #if 0 /* currently not used */
987 /* cosine weighted hemisphere sampling */
988 static void QMC_sampleHemiCosine(float *vec, QMCSampler *qsa, int thread, int num)
989 {
990         double s[2];
991         float phi, sqr;
992         
993         QMC_getSample(s, qsa, thread, num);
994         
995         phi = s[0]*2.f*M_PI;    
996         sqr = s[1]*sqrt(2-s[1]*s[1]);
997
998         vec[0] = cos(phi)*sqr;
999         vec[1] = sin(phi)*sqr;
1000         vec[2] = 1.f - s[1]*s[1];
1001
1002 }
1003 #endif
1004
1005 /* called from convertBlenderScene.c */
1006 void init_render_qmcsampler(Render *re)
1007 {
1008         re->qmcsamplers= MEM_callocN(sizeof(ListBase)*BLENDER_MAX_THREADS, "QMCListBase");
1009 }
1010
1011 static QMCSampler *get_thread_qmcsampler(Render *re, int thread, int type, int tot)
1012 {
1013         QMCSampler *qsa;
1014
1015         /* create qmc samplers as needed, since recursion makes it hard to
1016          * predict how many are needed */
1017
1018         for(qsa=re->qmcsamplers[thread].first; qsa; qsa=qsa->next) {
1019                 if(qsa->type == type && qsa->tot == tot && !qsa->used) {
1020                         qsa->used= 1;
1021                         return qsa;
1022                 }
1023         }
1024
1025         qsa= QMC_initSampler(type, tot);
1026         qsa->used= 1;
1027         BLI_addtail(&re->qmcsamplers[thread], qsa);
1028
1029         return qsa;
1030 }
1031
1032 static void release_thread_qmcsampler(Render *re, int thread, QMCSampler *qsa)
1033 {
1034         qsa->used= 0;
1035 }
1036
1037 void free_render_qmcsampler(Render *re)
1038 {
1039         QMCSampler *qsa, *next;
1040         int a;
1041
1042         if(re->qmcsamplers) {
1043                 for(a=0; a<BLENDER_MAX_THREADS; a++) {
1044                         for(qsa=re->qmcsamplers[a].first; qsa; qsa=next) {
1045                                 next= qsa->next;
1046                                 QMC_freeSampler(qsa);
1047                         }
1048
1049                         re->qmcsamplers[a].first= re->qmcsamplers[a].last= NULL;
1050                 }
1051
1052                 MEM_freeN(re->qmcsamplers);
1053                 re->qmcsamplers= NULL;
1054         }
1055 }
1056
1057 static int adaptive_sample_variance(int samples, float *col, float *colsq, float thresh)
1058 {
1059         float var[3], mean[3];
1060
1061         /* scale threshold just to give a bit more precision in input rather than dealing with 
1062          * tiny tiny numbers in the UI */
1063         thresh /= 2;
1064         
1065         mean[0] = col[0] / (float)samples;
1066         mean[1] = col[1] / (float)samples;
1067         mean[2] = col[2] / (float)samples;
1068
1069         var[0] = (colsq[0] / (float)samples) - (mean[0]*mean[0]);
1070         var[1] = (colsq[1] / (float)samples) - (mean[1]*mean[1]);
1071         var[2] = (colsq[2] / (float)samples) - (mean[2]*mean[2]);
1072         
1073         if ((var[0] * 0.4 < thresh) && (var[1] * 0.3 < thresh) && (var[2] * 0.6 < thresh))
1074                 return 1;
1075         else
1076                 return 0;
1077 }
1078
1079 static int adaptive_sample_contrast_val(int samples, float prev, float val, float thresh)
1080 {
1081         /* if the last sample's contribution to the total value was below a small threshold
1082          * (i.e. the samples taken are very similar), then taking more samples that are probably 
1083          * going to be the same is wasting effort */
1084         if (fabs( prev/(float)(samples-1) - val/(float)samples ) < thresh) {
1085                 return 1;
1086         } else
1087                 return 0;
1088 }
1089
1090 static float get_avg_speed(ShadeInput *shi)
1091 {
1092         float pre_x, pre_y, post_x, post_y, speedavg;
1093         
1094         pre_x = (shi->winspeed[0] == PASS_VECTOR_MAX)?0.0:shi->winspeed[0];
1095         pre_y = (shi->winspeed[1] == PASS_VECTOR_MAX)?0.0:shi->winspeed[1];
1096         post_x = (shi->winspeed[2] == PASS_VECTOR_MAX)?0.0:shi->winspeed[2];
1097         post_y = (shi->winspeed[3] == PASS_VECTOR_MAX)?0.0:shi->winspeed[3];
1098         
1099         speedavg = (sqrt(pre_x*pre_x + pre_y*pre_y) + sqrt(post_x*post_x + post_y*post_y)) / 2.0;
1100         
1101         return speedavg;
1102 }
1103
1104 /* ***************** main calls ************** */
1105
1106
1107 static void trace_refract(float *col, ShadeInput *shi, ShadeResult *shr)
1108 {
1109         QMCSampler *qsa=NULL;
1110         int samp_type;
1111         
1112         float samp3d[3], orthx[3], orthy[3];
1113         float v_refract[3], v_refract_new[3];
1114         float sampcol[4], colsq[4];
1115         
1116         float blur = pow(1.0 - shi->mat->gloss_tra, 3);
1117         short max_samples = shi->mat->samp_gloss_tra;
1118         float adapt_thresh = shi->mat->adapt_thresh_tra;
1119         
1120         int samples=0;
1121         
1122         colsq[0] = colsq[1] = colsq[2] = 0.0;
1123         col[0] = col[1] = col[2] = 0.0;
1124         col[3]= shr->alpha;
1125         
1126         if (blur > 0.0) {
1127                 if (adapt_thresh != 0.0) samp_type = SAMP_TYPE_HALTON;
1128                 else samp_type = SAMP_TYPE_HAMMERSLEY;
1129                         
1130                 /* all samples are generated per pixel */
1131                 qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples);
1132                 QMC_initPixel(qsa, shi->thread);
1133         } else 
1134                 max_samples = 1;
1135         
1136
1137         while (samples < max_samples) {         
1138                 refraction(v_refract, shi->vn, shi->view, shi->ang);
1139                 
1140                 if (max_samples > 1) {
1141                         /* get a quasi-random vector from a phong-weighted disc */
1142                         QMC_samplePhong(samp3d, qsa, shi->thread, samples, blur);
1143                                                 
1144                         VecOrthoBasisf(v_refract, orthx, orthy);
1145                         VecMulf(orthx, samp3d[0]);
1146                         VecMulf(orthy, samp3d[1]);
1147                                 
1148                         /* and perturb the refraction vector in it */
1149                         VecAddf(v_refract_new, v_refract, orthx);
1150                         VecAddf(v_refract_new, v_refract_new, orthy);
1151                         
1152                         Normalize(v_refract_new);
1153                 } else {
1154                         /* no blurriness, use the original normal */
1155                         VECCOPY(v_refract_new, v_refract);
1156                 }
1157         
1158                 traceray(shi, shr, shi->mat->ray_depth_tra, shi->co, v_refract_new, sampcol, shi->obi, shi->vlr, RAY_TRA|RAY_TRAFLIP);
1159         
1160                 col[0] += sampcol[0];
1161                 col[1] += sampcol[1];
1162                 col[2] += sampcol[2];
1163                 col[3] += sampcol[3];
1164                 
1165                 /* for variance calc */
1166                 colsq[0] += sampcol[0]*sampcol[0];
1167                 colsq[1] += sampcol[1]*sampcol[1];
1168                 colsq[2] += sampcol[2]*sampcol[2];
1169                 
1170                 samples++;
1171                 
1172                 /* adaptive sampling */
1173                 if (adapt_thresh < 1.0 && samples > max_samples/2) 
1174                 {
1175                         if (adaptive_sample_variance(samples, col, colsq, adapt_thresh))
1176                                 break;
1177                         
1178                         /* if the pixel so far is very dark, we can get away with less samples */
1179                         if ( (col[0] + col[1] + col[2])/3.0/(float)samples < 0.01 )
1180                                 max_samples--;
1181                 }
1182         }
1183         
1184         col[0] /= (float)samples;
1185         col[1] /= (float)samples;
1186         col[2] /= (float)samples;
1187         col[3] /= (float)samples;
1188         
1189         if (qsa)
1190                 release_thread_qmcsampler(&R, shi->thread, qsa);
1191 }
1192
1193 static void trace_reflect(float *col, ShadeInput *shi, ShadeResult *shr, float fresnelfac)
1194 {
1195         QMCSampler *qsa=NULL;
1196         int samp_type;
1197         
1198         float samp3d[3], orthx[3], orthy[3];
1199         float v_nor_new[3], v_reflect[3];
1200         float sampcol[4], colsq[4];
1201                 
1202         float blur = pow(1.0 - shi->mat->gloss_mir, 3);
1203         short max_samples = shi->mat->samp_gloss_mir;
1204         float adapt_thresh = shi->mat->adapt_thresh_mir;
1205         float aniso = 1.0 - shi->mat->aniso_gloss_mir;
1206         
1207         int samples=0;
1208         
1209         col[0] = col[1] = col[2] = 0.0;
1210         colsq[0] = colsq[1] = colsq[2] = 0.0;
1211         
1212         if (blur > 0.0) {
1213                 if (adapt_thresh != 0.0) samp_type = SAMP_TYPE_HALTON;
1214                 else samp_type = SAMP_TYPE_HAMMERSLEY;
1215                         
1216                 /* all samples are generated per pixel */
1217                 qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples);
1218                 QMC_initPixel(qsa, shi->thread);
1219         } else 
1220                 max_samples = 1;
1221         
1222         while (samples < max_samples) {
1223                                 
1224                 if (max_samples > 1) {
1225                         /* get a quasi-random vector from a phong-weighted disc */
1226                         QMC_samplePhong(samp3d, qsa, shi->thread, samples, blur);
1227
1228                         /* find the normal's perpendicular plane, blurring along tangents
1229                          * if tangent shading enabled */
1230                         if (shi->mat->mode & (MA_TANGENT_V)) {
1231                                 Crossf(orthx, shi->vn, shi->tang);      // bitangent
1232                                 VECCOPY(orthy, shi->tang);
1233                                 VecMulf(orthx, samp3d[0]);
1234                                 VecMulf(orthy, samp3d[1]*aniso);
1235                         } else {
1236                                 VecOrthoBasisf(shi->vn, orthx, orthy);
1237                                 VecMulf(orthx, samp3d[0]);
1238                                 VecMulf(orthy, samp3d[1]);
1239                         }
1240
1241                         /* and perturb the normal in it */
1242                         VecAddf(v_nor_new, shi->vn, orthx);
1243                         VecAddf(v_nor_new, v_nor_new, orthy);
1244                         Normalize(v_nor_new);
1245                 } else {
1246                         /* no blurriness, use the original normal */
1247                         VECCOPY(v_nor_new, shi->vn);
1248                 }
1249                 
1250                 if((shi->vlr->flag & R_SMOOTH)) 
1251                         reflection(v_reflect, v_nor_new, shi->view, shi->facenor);
1252                 else
1253                         reflection(v_reflect, v_nor_new, shi->view, NULL);
1254                 
1255                 traceray(shi, shr, shi->mat->ray_depth, shi->co, v_reflect, sampcol, shi->obi, shi->vlr, 0);
1256
1257                 
1258                 col[0] += sampcol[0];
1259                 col[1] += sampcol[1];
1260                 col[2] += sampcol[2];
1261         
1262                 /* for variance calc */
1263                 colsq[0] += sampcol[0]*sampcol[0];
1264                 colsq[1] += sampcol[1]*sampcol[1];
1265                 colsq[2] += sampcol[2]*sampcol[2];
1266                 
1267                 samples++;
1268
1269                 /* adaptive sampling */
1270                 if (adapt_thresh > 0.0 && samples > max_samples/3) 
1271                 {
1272                         if (adaptive_sample_variance(samples, col, colsq, adapt_thresh))
1273                                 break;
1274                         
1275                         /* if the pixel so far is very dark, we can get away with less samples */
1276                         if ( (col[0] + col[1] + col[2])/3.0/(float)samples < 0.01 )
1277                                 max_samples--;
1278                 
1279                         /* reduce samples when reflection is dim due to low ray mirror blend value or fresnel factor
1280                          * and when reflection is blurry */
1281                         if (fresnelfac < 0.1 * (blur+1)) {
1282                                 max_samples--;
1283                                 
1284                                 /* even more for very dim */
1285                                 if (fresnelfac < 0.05 * (blur+1)) 
1286                                         max_samples--;
1287                         }
1288                 }
1289         }
1290         
1291         col[0] /= (float)samples;
1292         col[1] /= (float)samples;
1293         col[2] /= (float)samples;
1294         
1295         if (qsa)
1296                 release_thread_qmcsampler(&R, shi->thread, qsa);
1297 }
1298
1299 /* extern call from render loop */
1300 void ray_trace(ShadeInput *shi, ShadeResult *shr)
1301 {
1302         float i, f, f1, fr, fg, fb;
1303         float mircol[4], tracol[4];
1304         float diff[3];
1305         int do_tra, do_mir;
1306         
1307         do_tra= ((shi->mat->mode & (MA_RAYTRANSP)) && shr->alpha!=1.0f);
1308         do_mir= ((shi->mat->mode & MA_RAYMIRROR) && shi->ray_mirror!=0.0f);
1309
1310         
1311         /* raytrace mirror amd refract like to separate the spec color */
1312         if(shi->combinedflag & SCE_PASS_SPEC)
1313                 VECSUB(diff, shr->combined, shr->spec) /* no ; */
1314         else
1315                 VECCOPY(diff, shr->combined);
1316         
1317         if(do_tra) {
1318                 float olddiff[3];
1319                 
1320                 trace_refract(tracol, shi, shr);
1321                 
1322                 f= shr->alpha; f1= 1.0f-f;
1323                 fr= 1.0f+ shi->mat->filter*(shi->r-1.0f);
1324                 fg= 1.0f+ shi->mat->filter*(shi->g-1.0f);
1325                 fb= 1.0f+ shi->mat->filter*(shi->b-1.0f);
1326                 
1327                 /* for refract pass */
1328                 VECCOPY(olddiff, diff);
1329                 
1330                 diff[0]= f*diff[0] + f1*fr*tracol[0];
1331                 diff[1]= f*diff[1] + f1*fg*tracol[1];
1332                 diff[2]= f*diff[2] + f1*fb*tracol[2];
1333                 
1334                 if(shi->passflag & SCE_PASS_REFRACT)
1335                         VECSUB(shr->refr, diff, olddiff);
1336                 
1337                 if(!(shi->combinedflag & SCE_PASS_REFRACT))
1338                         VECSUB(diff, diff, shr->refr);
1339                 
1340                 shr->alpha= tracol[3];
1341         }
1342         
1343         if(do_mir) {
1344         
1345                 i= shi->ray_mirror*fresnel_fac(shi->view, shi->vn, shi->mat->fresnel_mir_i, shi->mat->fresnel_mir);
1346                 if(i!=0.0f) {
1347                 
1348                         trace_reflect(mircol, shi, shr, i);
1349                         
1350                         fr= i*shi->mirr;
1351                         fg= i*shi->mirg;
1352                         fb= i*shi->mirb;
1353
1354                         if(shi->passflag & SCE_PASS_REFLECT) {
1355                                 /* mirror pass is not blocked out with spec */
1356                                 shr->refl[0]= fr*mircol[0] - fr*diff[0];
1357                                 shr->refl[1]= fg*mircol[1] - fg*diff[1];
1358                                 shr->refl[2]= fb*mircol[2] - fb*diff[2];
1359                         }
1360                         
1361                         if(shi->combinedflag & SCE_PASS_REFLECT) {
1362                                 
1363                                 f= fr*(1.0f-shr->spec[0]);      f1= 1.0f-i;
1364                                 diff[0]= f*mircol[0] + f1*diff[0];
1365                                 
1366                                 f= fg*(1.0f-shr->spec[1]);      f1= 1.0f-i;
1367                                 diff[1]= f*mircol[1] + f1*diff[1];
1368                                 
1369                                 f= fb*(1.0f-shr->spec[2]);      f1= 1.0f-i;
1370                                 diff[2]= f*mircol[2] + f1*diff[2];
1371                         }
1372                 }
1373         }
1374         /* put back together */
1375         if(shi->combinedflag & SCE_PASS_SPEC)
1376                 VECADD(shr->combined, diff, shr->spec) /* no ; */
1377         else
1378                 VECCOPY(shr->combined, diff);
1379 }
1380
1381 /* color 'shadfac' passes through 'col' with alpha and filter */
1382 /* filter is only applied on alpha defined transparent part */
1383 static void addAlphaLight(float *shadfac, float *col, float alpha, float filter)
1384 {
1385         float fr, fg, fb;
1386         
1387         fr= 1.0f+ filter*(col[0]-1.0f);
1388         fg= 1.0f+ filter*(col[1]-1.0f);
1389         fb= 1.0f+ filter*(col[2]-1.0f);
1390         
1391         shadfac[0]= alpha*col[0] + fr*(1.0f-alpha)*shadfac[0];
1392         shadfac[1]= alpha*col[1] + fg*(1.0f-alpha)*shadfac[1];
1393         shadfac[2]= alpha*col[2] + fb*(1.0f-alpha)*shadfac[2];
1394         
1395         shadfac[3]= (1.0f-alpha)*shadfac[3];
1396 }
1397
1398 static void ray_trace_shadow_tra(Isect *is, ShadeInput *origshi, int depth, int traflag)
1399 {
1400         /* ray to lamp, find first face that intersects, check alpha properties,
1401            if it has col[3]>0.0f  continue. so exit when alpha is full */
1402         ShadeInput shi;
1403         ShadeResult shr;
1404         
1405         if(RE_rayobject_raycast(R.raytree, is)) {
1406                 float d= 1.0f;
1407                 /* we got a face */
1408                 
1409                 /* Warning, This is not that nice, and possibly a bit slow for every ray,
1410                 however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */
1411                 memset(&shi, 0, sizeof(ShadeInput)); 
1412                 /* end warning! - Campbell */
1413                 
1414                 shi.depth= 1;                                   /* only used to indicate tracing */
1415                 shi.mask= origshi->mask;
1416                 shi.thread= origshi->thread;
1417                 shi.passflag= SCE_PASS_COMBINED;
1418                 shi.combinedflag= 0xFFFFFF;              /* ray trace does all options */
1419         
1420                 shi.xs= origshi->xs;
1421                 shi.ys= origshi->ys;
1422                 shi.lay= origshi->lay;
1423                 shi.nodes= origshi->nodes;
1424                 
1425                 shade_ray(is, &shi, &shr);
1426                 if (traflag & RAY_TRA)
1427                         d= shade_by_transmission(is, &shi, &shr);
1428                 
1429                 /* mix colors based on shadfac (rgb + amount of light factor) */
1430                 addAlphaLight(is->col, shr.diff, shr.alpha, d*shi.mat->filter);
1431                 
1432                 if(depth>0 && is->col[3]>0.0f) {
1433                         
1434                         /* adapt isect struct */
1435                         VECCOPY(is->start, shi.co);
1436
1437                         is->orig.ob   = shi.obi;
1438                         is->orig.face = shi.vlr;
1439
1440                         ray_trace_shadow_tra(is, origshi, depth-1, traflag | RAY_TRA);
1441                 }
1442         }
1443 }
1444
1445 /* not used, test function for ambient occlusion (yaf: pathlight) */
1446 /* main problem; has to be called within shading loop, giving unwanted recursion */
1447 int ray_trace_shadow_rad(ShadeInput *ship, ShadeResult *shr)
1448 {
1449         static int counter=0, only_one= 0;
1450         extern float hashvectf[];
1451         Isect isec;
1452         ShadeInput shi;
1453         ShadeResult shr_t;
1454         float vec[3], accum[3], div= 0.0f;
1455         int a;
1456         
1457         assert(0);
1458         
1459         if(only_one) {
1460                 return 0;
1461         }
1462         only_one= 1;
1463         
1464         accum[0]= accum[1]= accum[2]= 0.0f;
1465         isec.mode= RE_RAY_MIRROR;
1466         isec.orig.ob   = ship->obi;
1467         isec.orig.face = ship->vlr;
1468         
1469         for(a=0; a<8*8; a++) {
1470                 
1471                 counter+=3;
1472                 counter %= 768;
1473                 VECCOPY(vec, hashvectf+counter);
1474                 if(ship->vn[0]*vec[0]+ship->vn[1]*vec[1]+ship->vn[2]*vec[2]>0.0f) {
1475                         vec[0]-= vec[0];
1476                         vec[1]-= vec[1];
1477                         vec[2]-= vec[2];
1478                 }
1479
1480                 VECCOPY(isec.start, ship->co);
1481                 VECCOPY(isec.vec, vec );
1482                 isec.labda = RE_RAYTRACE_MAXDIST;
1483
1484                 if(RE_rayobject_raycast(R.raytree, &isec)) {
1485                         float fac;
1486                         
1487                         /* Warning, This is not that nice, and possibly a bit slow for every ray,
1488                         however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */
1489                         memset(&shi, 0, sizeof(ShadeInput)); 
1490                         /* end warning! - Campbell */
1491                         
1492                         shade_ray(&isec, &shi, &shr_t);
1493                         fac= isec.labda*isec.labda;
1494                         fac= 1.0f;
1495                         accum[0]+= fac*(shr_t.diff[0]+shr_t.spec[0]);
1496                         accum[1]+= fac*(shr_t.diff[1]+shr_t.spec[1]);
1497                         accum[2]+= fac*(shr_t.diff[2]+shr_t.spec[2]);
1498                         div+= fac;
1499                 }
1500                 else div+= 1.0f;
1501         }
1502         
1503         if(div!=0.0f) {
1504                 shr->diff[0]+= accum[0]/div;
1505                 shr->diff[1]+= accum[1]/div;
1506                 shr->diff[2]+= accum[2]/div;
1507         }
1508         shr->alpha= 1.0f;
1509         
1510         only_one= 0;
1511         return 1;
1512 }
1513
1514 /* aolight: function to create random unit sphere vectors for total random sampling */
1515 static void RandomSpherical(float *v)
1516 {
1517         float r;
1518         v[2] = 2.f*BLI_frand()-1.f;
1519         if ((r = 1.f - v[2]*v[2])>0.f) {
1520                 float a = 6.283185307f*BLI_frand();
1521                 r = sqrt(r);
1522                 v[0] = r * cos(a);
1523                 v[1] = r * sin(a);
1524         }
1525         else v[2] = 1.f;
1526 }
1527
1528 /* calc distributed spherical energy */
1529 static void DS_energy(float *sphere, int tot, float *vec)
1530 {
1531         float *fp, fac, force[3], res[3];
1532         int a;
1533         
1534         res[0]= res[1]= res[2]= 0.0f;
1535         
1536         for(a=0, fp=sphere; a<tot; a++, fp+=3) {
1537                 VecSubf(force, vec, fp);
1538                 fac= force[0]*force[0] + force[1]*force[1] + force[2]*force[2];
1539                 if(fac!=0.0f) {
1540                         fac= 1.0f/fac;
1541                         res[0]+= fac*force[0];
1542                         res[1]+= fac*force[1];
1543                         res[2]+= fac*force[2];
1544                 }
1545         }
1546
1547         VecMulf(res, 0.5);
1548         VecAddf(vec, vec, res);
1549         Normalize(vec);
1550         
1551 }
1552
1553 /* called from convertBlenderScene.c */
1554 /* creates an equally distributed spherical sample pattern */
1555 /* and allocates threadsafe memory */
1556 void init_ao_sphere(World *wrld)
1557 {
1558         float *fp;
1559         int a, tot, iter= 16;
1560
1561         /* we make twice the amount of samples, because only a hemisphere is used */
1562         tot= 2*wrld->aosamp*wrld->aosamp;
1563         
1564         wrld->aosphere= MEM_mallocN(3*tot*sizeof(float), "AO sphere");
1565         
1566         /* fixed random */
1567         BLI_srandom(tot);
1568         
1569         /* init */
1570         fp= wrld->aosphere;
1571         for(a=0; a<tot; a++, fp+= 3) {
1572                 RandomSpherical(fp);
1573         }
1574         
1575         while(iter--) {
1576                 for(a=0, fp= wrld->aosphere; a<tot; a++, fp+= 3) {
1577                         DS_energy(wrld->aosphere, tot, fp);
1578                 }
1579         }
1580         
1581         /* tables */
1582         wrld->aotables= MEM_mallocN(BLENDER_MAX_THREADS*3*tot*sizeof(float), "AO tables");
1583 }
1584
1585 /* give per thread a table, we have to compare xs ys because of way OSA works... */
1586 static float *threadsafe_table_sphere(int test, int thread, int xs, int ys, int tot)
1587 {
1588         static int xso[BLENDER_MAX_THREADS], yso[BLENDER_MAX_THREADS];
1589         static int firsttime= 1;
1590         
1591         if(firsttime) {
1592                 memset(xso, 255, sizeof(xso));
1593                 memset(yso, 255, sizeof(yso));
1594                 firsttime= 0;
1595         }
1596         
1597         if(xs==xso[thread] && ys==yso[thread]) return R.wrld.aotables+ thread*tot*3;
1598         if(test) return NULL;
1599         xso[thread]= xs; yso[thread]= ys;
1600         return R.wrld.aotables+ thread*tot*3;
1601 }
1602
1603 static float *sphere_sampler(int type, int resol, int thread, int xs, int ys)
1604 {
1605         int tot;
1606         float *vec;
1607         
1608         tot= 2*resol*resol;
1609
1610         if (type & WO_AORNDSMP) {
1611                 float *sphere;
1612                 int a;
1613                 
1614                 // always returns table
1615                 sphere= threadsafe_table_sphere(0, thread, xs, ys, tot);
1616
1617                 /* total random sampling. NOT THREADSAFE! (should be removed, is not useful) */
1618                 vec= sphere;
1619                 for (a=0; a<tot; a++, vec+=3) {
1620                         RandomSpherical(vec);
1621                 }
1622                 
1623                 return sphere;
1624         } 
1625         else {
1626                 float *sphere;
1627                 float cosfi, sinfi, cost, sint;
1628                 float ang, *vec1;
1629                 int a;
1630                 
1631                 // returns table if xs and ys were equal to last call
1632                 sphere= threadsafe_table_sphere(1, thread, xs, ys, tot);
1633                 if(sphere==NULL) {
1634                         sphere= threadsafe_table_sphere(0, thread, xs, ys, tot);
1635                         
1636                         // random rotation
1637                         ang= BLI_thread_frand(thread);
1638                         sinfi= sin(ang); cosfi= cos(ang);
1639                         ang= BLI_thread_frand(thread);
1640                         sint= sin(ang); cost= cos(ang);
1641                         
1642                         vec= R.wrld.aosphere;
1643                         vec1= sphere;
1644                         for (a=0; a<tot; a++, vec+=3, vec1+=3) {
1645                                 vec1[0]= cost*cosfi*vec[0] - sinfi*vec[1] + sint*cosfi*vec[2];
1646                                 vec1[1]= cost*sinfi*vec[0] + cosfi*vec[1] + sint*sinfi*vec[2];
1647                                 vec1[2]= -sint*vec[0] + cost*vec[2];                    
1648                         }
1649                 }
1650                 return sphere;
1651         }
1652 }
1653
1654 static void ray_ao_qmc(ShadeInput *shi, float *shadfac)
1655 {
1656         Isect isec;
1657         QMCSampler *qsa=NULL;
1658         float samp3d[3];
1659         float up[3], side[3], dir[3], nrm[3];
1660         
1661         float maxdist = R.wrld.aodist;
1662         float fac=0.0f, prev=0.0f;
1663         float adapt_thresh = R.wrld.ao_adapt_thresh;
1664         float adapt_speed_fac = R.wrld.ao_adapt_speed_fac;
1665         
1666         int samples=0;
1667         int max_samples = R.wrld.aosamp*R.wrld.aosamp;
1668         
1669         float dxyview[3], skyadded=0, div;
1670         int aocolor;
1671         
1672         isec.orig.ob   = shi->obi;
1673         isec.orig.face = shi->vlr;
1674         isec.skip = RE_SKIP_VLR_NEIGHBOUR;
1675
1676         isec.hit.ob   = 0;
1677         isec.hit.face = 0;
1678
1679         isec.last_hit = NULL;
1680         
1681         isec.mode= (R.wrld.aomode & WO_AODIST)?RE_RAY_SHADOW_TRA:RE_RAY_SHADOW;
1682         isec.lay= -1;
1683         
1684         shadfac[0]= shadfac[1]= shadfac[2]= 0.0f;
1685         
1686         /* prevent sky colors to be added for only shadow (shadow becomes alpha) */
1687         aocolor= R.wrld.aocolor;
1688         if(shi->mat->mode & MA_ONLYSHADOW)
1689                 aocolor= WO_AOPLAIN;
1690         
1691         if(aocolor == WO_AOSKYTEX) {
1692                 dxyview[0]= 1.0f/(float)R.wrld.aosamp;
1693                 dxyview[1]= 1.0f/(float)R.wrld.aosamp;
1694                 dxyview[2]= 0.0f;
1695         }
1696         
1697         if(shi->vlr->flag & R_SMOOTH) {
1698                 VECCOPY(nrm, shi->vn);
1699         }
1700         else {
1701                 VECCOPY(nrm, shi->facenor);
1702         }
1703         
1704         VecOrthoBasisf(nrm, up, side);
1705         
1706         /* sampling init */
1707         if (R.wrld.ao_samp_method==WO_AOSAMP_HALTON) {
1708                 float speedfac;
1709                 
1710                 speedfac = get_avg_speed(shi) * adapt_speed_fac;
1711                 CLAMP(speedfac, 1.0, 1000.0);
1712                 max_samples /= speedfac;
1713                 if (max_samples < 5) max_samples = 5;
1714                 
1715                 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples);
1716         } else if (R.wrld.ao_samp_method==WO_AOSAMP_HAMMERSLEY)
1717                 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples);
1718
1719         QMC_initPixel(qsa, shi->thread);
1720         
1721         while (samples < max_samples) {
1722
1723                 /* sampling, returns quasi-random vector in unit hemisphere */
1724                 QMC_sampleHemi(samp3d, qsa, shi->thread, samples);
1725
1726                 dir[0] = (samp3d[0]*up[0] + samp3d[1]*side[0] + samp3d[2]*nrm[0]);
1727                 dir[1] = (samp3d[0]*up[1] + samp3d[1]*side[1] + samp3d[2]*nrm[1]);
1728                 dir[2] = (samp3d[0]*up[2] + samp3d[1]*side[2] + samp3d[2]*nrm[2]);
1729                 
1730                 Normalize(dir);
1731                         
1732                 VECCOPY(isec.start, shi->co);
1733                 isec.vec[0] = -dir[0];
1734                 isec.vec[1] = -dir[1];
1735                 isec.vec[2] = -dir[2];
1736                 isec.labda = maxdist;
1737                 
1738                 prev = fac;
1739                 
1740                 if(RE_rayobject_raycast(R.raytree, &isec)) {
1741                         if (R.wrld.aomode & WO_AODIST) fac+= exp(-isec.labda*R.wrld.aodistfac); 
1742                         else fac+= 1.0f;
1743                 }
1744                 else if(aocolor!=WO_AOPLAIN) {
1745                         float skycol[4];
1746                         float skyfac, view[3];
1747                         
1748                         view[0]= -dir[0];
1749                         view[1]= -dir[1];
1750                         view[2]= -dir[2];
1751                         Normalize(view);
1752                         
1753                         if(aocolor==WO_AOSKYCOL) {
1754                                 skyfac= 0.5*(1.0f+view[0]*R.grvec[0]+ view[1]*R.grvec[1]+ view[2]*R.grvec[2]);
1755                                 shadfac[0]+= (1.0f-skyfac)*R.wrld.horr + skyfac*R.wrld.zenr;
1756                                 shadfac[1]+= (1.0f-skyfac)*R.wrld.horg + skyfac*R.wrld.zeng;
1757                                 shadfac[2]+= (1.0f-skyfac)*R.wrld.horb + skyfac*R.wrld.zenb;
1758                         }
1759                         else {  /* WO_AOSKYTEX */
1760                                 shadeSkyView(skycol, isec.start, view, dxyview, shi->thread);
1761                                 shadeSunView(skycol, shi->view);
1762                                 shadfac[0]+= skycol[0];
1763                                 shadfac[1]+= skycol[1];
1764                                 shadfac[2]+= skycol[2];
1765                         }
1766                         skyadded++;
1767                 }
1768                 
1769                 samples++;
1770                 
1771                 if (qsa->type == SAMP_TYPE_HALTON) {
1772                         /* adaptive sampling - consider samples below threshold as in shadow (or vice versa) and exit early */          
1773                         if (adapt_thresh > 0.0 && (samples > max_samples/2) ) {
1774                                 
1775                                 if (adaptive_sample_contrast_val(samples, prev, fac, adapt_thresh)) {
1776                                         break;
1777                                 }
1778                         }
1779                 }
1780         }
1781         
1782         if(aocolor!=WO_AOPLAIN && skyadded) {
1783                 div= (1.0f - fac/(float)samples)/((float)skyadded);
1784                 
1785                 shadfac[0]*= div;       // average color times distances/hits formula
1786                 shadfac[1]*= div;       // average color times distances/hits formula
1787                 shadfac[2]*= div;       // average color times distances/hits formula
1788         } else {
1789                 shadfac[0]= shadfac[1]= shadfac[2]= 1.0f - fac/(float)samples;
1790         }
1791         
1792         if (qsa)
1793                 release_thread_qmcsampler(&R, shi->thread, qsa);
1794 }
1795
1796 /* extern call from shade_lamp_loop, ambient occlusion calculus */
1797 static void ray_ao_spheresamp(ShadeInput *shi, float *shadfac)
1798 {
1799         Isect isec;
1800         float *vec, *nrm, div, bias, sh=0.0f;
1801         float maxdist = R.wrld.aodist;
1802         float dxyview[3];
1803         int j= -1, tot, actual=0, skyadded=0, aocolor, resol= R.wrld.aosamp;
1804         
1805         isec.orig.ob   = shi->obi;
1806         isec.orig.face = shi->vlr;
1807         isec.skip = RE_SKIP_VLR_NEIGHBOUR;
1808
1809         isec.hit.ob   = 0;
1810         isec.hit.face = 0;
1811         
1812         isec.last_hit = NULL;
1813         
1814         isec.mode= (R.wrld.aomode & WO_AODIST)?RE_RAY_SHADOW_TRA:RE_RAY_SHADOW;
1815         isec.lay= -1;
1816
1817
1818         shadfac[0]= shadfac[1]= shadfac[2]= 0.0f;
1819
1820         /* bias prevents smoothed faces to appear flat */
1821         if(shi->vlr->flag & R_SMOOTH) {
1822                 bias= R.wrld.aobias;
1823                 nrm= shi->vn;
1824         }
1825         else {
1826                 bias= 0.0f;
1827                 nrm= shi->facenor;
1828         }
1829
1830         /* prevent sky colors to be added for only shadow (shadow becomes alpha) */
1831         aocolor= R.wrld.aocolor;
1832         if(shi->mat->mode & MA_ONLYSHADOW)
1833                 aocolor= WO_AOPLAIN;
1834         
1835         if(resol>32) resol= 32;
1836         
1837         vec= sphere_sampler(R.wrld.aomode, resol, shi->thread, shi->xs, shi->ys);
1838         
1839         // warning: since we use full sphere now, and dotproduct is below, we do twice as much
1840         tot= 2*resol*resol;
1841
1842         if(aocolor == WO_AOSKYTEX) {
1843                 dxyview[0]= 1.0f/(float)resol;
1844                 dxyview[1]= 1.0f/(float)resol;
1845                 dxyview[2]= 0.0f;
1846         }
1847         
1848         while(tot--) {
1849                 
1850                 if ((vec[0]*nrm[0] + vec[1]*nrm[1] + vec[2]*nrm[2]) > bias) {
1851                         /* only ao samples for mask */
1852                         if(R.r.mode & R_OSA) {
1853                                 j++;
1854                                 if(j==R.osa) j= 0;
1855                                 if(!(shi->mask & (1<<j))) {
1856                                         vec+=3;
1857                                         continue;
1858                                 }
1859                         }
1860                         
1861                         actual++;
1862                         
1863                         /* always set start/vec/labda */
1864                         VECCOPY(isec.start, shi->co);
1865                         isec.vec[0] = -vec[0];
1866                         isec.vec[1] = -vec[1];
1867                         isec.vec[2] = -vec[2];
1868                         isec.labda = maxdist;
1869                         
1870                         /* do the trace */
1871                         if(RE_rayobject_raycast(R.raytree, &isec)) {
1872                                 if (R.wrld.aomode & WO_AODIST) sh+= exp(-isec.labda*R.wrld.aodistfac); 
1873                                 else sh+= 1.0f;
1874                         }
1875                         else if(aocolor!=WO_AOPLAIN) {
1876                                 float skycol[4];
1877                                 float fac, view[3];
1878                                 
1879                                 view[0]= -vec[0];
1880                                 view[1]= -vec[1];
1881                                 view[2]= -vec[2];
1882                                 Normalize(view);
1883                                 
1884                                 if(aocolor==WO_AOSKYCOL) {
1885                                         fac= 0.5*(1.0f+view[0]*R.grvec[0]+ view[1]*R.grvec[1]+ view[2]*R.grvec[2]);
1886                                         shadfac[0]+= (1.0f-fac)*R.wrld.horr + fac*R.wrld.zenr;
1887                                         shadfac[1]+= (1.0f-fac)*R.wrld.horg + fac*R.wrld.zeng;
1888                                         shadfac[2]+= (1.0f-fac)*R.wrld.horb + fac*R.wrld.zenb;
1889                                 }
1890                                 else {  /* WO_AOSKYTEX */
1891                                         shadeSkyView(skycol, isec.start, view, dxyview, shi->thread);
1892                                         shadeSunView(skycol, shi->view);
1893                                         shadfac[0]+= skycol[0];
1894                                         shadfac[1]+= skycol[1];
1895                                         shadfac[2]+= skycol[2];
1896                                 }
1897                                 skyadded++;
1898                         }
1899                 }
1900                 // samples
1901                 vec+= 3;
1902         }
1903         
1904         if(actual==0) sh= 1.0f;
1905         else sh = 1.0f - sh/((float)actual);
1906         
1907         if(aocolor!=WO_AOPLAIN && skyadded) {
1908                 div= sh/((float)skyadded);
1909                 
1910                 shadfac[0]*= div;       // average color times distances/hits formula
1911                 shadfac[1]*= div;       // average color times distances/hits formula
1912                 shadfac[2]*= div;       // average color times distances/hits formula
1913         }
1914         else {
1915                 shadfac[0]= shadfac[1]= shadfac[2]= sh;
1916         }
1917 }
1918
1919 void ray_ao(ShadeInput *shi, float *shadfac)
1920 {
1921         /* Unfortunately, the unusual way that the sphere sampler calculates roughly twice as many
1922          * samples as are actually traced, and skips them based on bias and OSA settings makes it very difficult
1923          * to reuse code between these two functions. This is the easiest way I can think of to do it
1924          * --broken */
1925         if (ELEM(R.wrld.ao_samp_method, WO_AOSAMP_HAMMERSLEY, WO_AOSAMP_HALTON))
1926                 ray_ao_qmc(shi, shadfac);
1927         else if (R.wrld.ao_samp_method == WO_AOSAMP_CONSTANT)
1928                 ray_ao_spheresamp(shi, shadfac);
1929 }
1930
1931 static void ray_shadow_jittered_coords(ShadeInput *shi, int max, float jitco[RE_MAX_OSA][3], int *totjitco)
1932 {
1933         /* magic numbers for reordering sample positions to give better
1934          * results with adaptive sample, when it usually only takes 4 samples */
1935         int order8[8] = {0, 1, 5, 6, 2, 3, 4, 7};
1936         int order11[11] = {1, 3, 8, 10, 0, 2, 4, 5, 6, 7, 9};
1937         int order16[16] = {1, 3, 9, 12, 0, 6, 7, 8, 13, 2, 4, 5, 10, 11, 14, 15};
1938         int count = count_mask(shi->mask);
1939
1940         /* for better antialising shadow samples are distributed over the subpixel
1941          * sample coordinates, this only works for raytracing depth 0 though */
1942         if(!shi->strand && shi->depth == 0 && count > 1 && count <= max) {
1943                 float xs, ys, zs, view[3];
1944                 int samp, ordsamp, tot= 0;
1945
1946                 for(samp=0; samp<R.osa; samp++) {
1947                         if(R.osa == 8) ordsamp = order8[samp];
1948                         else if(R.osa == 11) ordsamp = order11[samp];
1949                         else if(R.osa == 16) ordsamp = order16[samp];
1950                         else ordsamp = samp;
1951
1952                         if(shi->mask & (1<<ordsamp)) {
1953                                 /* zbuffer has this inverse corrected, ensures xs,ys are inside pixel */
1954                                 xs= (float)shi->scanco[0] + R.jit[ordsamp][0] + 0.5f;
1955                                 ys= (float)shi->scanco[1] + R.jit[ordsamp][1] + 0.5f;
1956                                 zs= shi->scanco[2];
1957
1958                                 shade_input_calc_viewco(shi, xs, ys, zs, view, NULL, jitco[tot], NULL, NULL);
1959                                 tot++;
1960                         }
1961                 }
1962
1963                 *totjitco= tot;
1964         }
1965         else {
1966                 VECCOPY(jitco[0], shi->co);
1967                 *totjitco= 1;
1968         }
1969 }
1970
1971 static void ray_shadow_qmc(ShadeInput *shi, LampRen *lar, float *lampco, int lampvec, float *shadfac, Isect *isec)
1972 {
1973         QMCSampler *qsa=NULL;
1974         int samples=0;
1975         float samp3d[3];
1976
1977         float fac=0.0f, vec[3], end[3];
1978         float colsq[4];
1979         float adapt_thresh = lar->adapt_thresh;
1980         int min_adapt_samples=4, max_samples = lar->ray_totsamp;
1981         float *co;
1982         int do_soft=1, full_osa=0;
1983
1984         float jitco[RE_MAX_OSA][3];
1985         int totjitco;
1986
1987         colsq[0] = colsq[1] = colsq[2] = 0.0;
1988         if(isec->mode==RE_RAY_SHADOW_TRA) {
1989                 shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 0.0f;
1990         } else
1991                 shadfac[3]= 1.0f;
1992         
1993         if (lar->ray_totsamp < 2) do_soft = 0;
1994         if ((R.r.mode & R_OSA) && (R.osa > 0) && (shi->vlr->flag & R_FULL_OSA)) full_osa = 1;
1995         
1996         if (full_osa) {
1997                 if (do_soft) max_samples  = max_samples/R.osa + 1;
1998                 else max_samples = 1;
1999         } else {
2000                 if (do_soft) max_samples = lar->ray_totsamp;
2001                 else max_samples = (R.osa > 4)?R.osa:5;
2002         }
2003         
2004         ray_shadow_jittered_coords(shi, max_samples, jitco, &totjitco);
2005
2006         /* sampling init */
2007         if (lar->ray_samp_method==LA_SAMP_HALTON)
2008                 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples);
2009         else if (lar->ray_samp_method==LA_SAMP_HAMMERSLEY)
2010                 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples);
2011         
2012         QMC_initPixel(qsa, shi->thread);
2013         
2014         VECCOPY(vec, lampco);
2015         
2016         isec->skip = RE_SKIP_VLR_NEIGHBOUR;
2017         while (samples < max_samples) {
2018
2019                 isec->orig.ob   = shi->obi;
2020                 isec->orig.face = shi->vlr;
2021
2022                 /* manually jitter the start shading co-ord per sample
2023                  * based on the pre-generated OSA texture sampling offsets, 
2024                  * for anti-aliasing sharp shadow edges. */
2025                 co = jitco[samples % totjitco];
2026
2027                 if (do_soft) {
2028                         /* sphere shadow source */
2029                         if (lar->type == LA_LOCAL) {
2030                                 float ru[3], rv[3], v[3], s[3];
2031                                 
2032                                 /* calc tangent plane vectors */
2033                                 v[0] = co[0] - lampco[0];
2034                                 v[1] = co[1] - lampco[1];
2035                                 v[2] = co[2] - lampco[2];
2036                                 Normalize(v);
2037                                 VecOrthoBasisf(v, ru, rv);
2038                                 
2039                                 /* sampling, returns quasi-random vector in area_size disc */
2040                                 QMC_sampleDisc(samp3d, qsa, shi->thread, samples,lar->area_size);
2041
2042                                 /* distribute disc samples across the tangent plane */
2043                                 s[0] = samp3d[0]*ru[0] + samp3d[1]*rv[0];
2044                                 s[1] = samp3d[0]*ru[1] + samp3d[1]*rv[1];
2045                                 s[2] = samp3d[0]*ru[2] + samp3d[1]*rv[2];
2046                                 
2047                                 VECCOPY(samp3d, s);
2048                         }
2049                         else {
2050                                 /* sampling, returns quasi-random vector in [sizex,sizey]^2 plane */
2051                                 QMC_sampleRect(samp3d, qsa, shi->thread, samples, lar->area_size, lar->area_sizey);
2052                                                                 
2053                                 /* align samples to lamp vector */
2054                                 Mat3MulVecfl(lar->mat, samp3d);
2055                         }
2056                         end[0] = vec[0]+samp3d[0];
2057                         end[1] = vec[1]+samp3d[1];
2058                         end[2] = vec[2]+samp3d[2];
2059                 } else {
2060                         VECCOPY(end, vec);
2061                 }
2062
2063                 if(shi->strand) {
2064                         /* bias away somewhat to avoid self intersection */
2065                         float jitbias= 0.5f*(VecLength(shi->dxco) + VecLength(shi->dyco));
2066                         float v[3];
2067
2068                         VECSUB(v, co, end);
2069                         Normalize(v);
2070
2071                         co[0] -= jitbias*v[0];
2072                         co[1] -= jitbias*v[1];
2073                         co[2] -= jitbias*v[2];
2074                 }
2075
2076                 VECCOPY(isec->start, co);
2077                 if(lampvec)
2078                 {
2079                         isec->vec[0] = end[0];
2080                         isec->vec[1] = end[1];
2081                         isec->vec[2] = end[2];
2082                         isec->labda = RE_RAYTRACE_MAXDIST;
2083                 }
2084                 else
2085                 {
2086                         isec->vec[0] = end[0]-isec->start[0];
2087                         isec->vec[1] = end[1]-isec->start[1];
2088                         isec->vec[2] = end[2]-isec->start[2];
2089                         isec->labda = 1.0f; // * Normalize(isec->vec);
2090                 }
2091                 
2092                 /* trace the ray */
2093                 if(isec->mode==RE_RAY_SHADOW_TRA) {
2094                         isec->col[0]= isec->col[1]= isec->col[2]=  1.0f;
2095                         isec->col[3]= 1.0f;
2096                         
2097                         ray_trace_shadow_tra(isec, shi, DEPTH_SHADOW_TRA, 0);
2098                         shadfac[0] += isec->col[0];
2099                         shadfac[1] += isec->col[1];
2100                         shadfac[2] += isec->col[2];
2101                         shadfac[3] += isec->col[3];
2102                         
2103                         /* for variance calc */
2104                         colsq[0] += isec->col[0]*isec->col[0];
2105                         colsq[1] += isec->col[1]*isec->col[1];
2106                         colsq[2] += isec->col[2]*isec->col[2];
2107                 }
2108                 else {
2109                         if( RE_rayobject_raycast(R.raytree, isec) ) fac+= 1.0f;
2110                 }
2111                 
2112                 samples++;
2113                 
2114                 if ((lar->ray_samp_method == LA_SAMP_HALTON)) {
2115                 
2116                         /* adaptive sampling - consider samples below threshold as in shadow (or vice versa) and exit early */
2117                         if ((max_samples > min_adapt_samples) && (adapt_thresh > 0.0) && (samples > max_samples / 3)) {
2118                                 if (isec->mode==RE_RAY_SHADOW_TRA) {
2119                                         if ((shadfac[3] / samples > (1.0-adapt_thresh)) || (shadfac[3] / samples < adapt_thresh))
2120                                                 break;
2121                                         else if (adaptive_sample_variance(samples, shadfac, colsq, adapt_thresh))
2122                                                 break;
2123                                 } else {
2124                                         if ((fac / samples > (1.0-adapt_thresh)) || (fac / samples < adapt_thresh))
2125                                                 break;
2126                                 }
2127                         }
2128                 }
2129         }
2130         
2131         if(isec->mode==RE_RAY_SHADOW_TRA) {
2132                 shadfac[0] /= samples;
2133                 shadfac[1] /= samples;
2134                 shadfac[2] /= samples;
2135                 shadfac[3] /= samples;
2136         } else
2137                 shadfac[3]= 1.0f-fac/samples;
2138
2139         if (qsa)
2140                 release_thread_qmcsampler(&R, shi->thread, qsa);
2141 }
2142
2143 static void ray_shadow_jitter(ShadeInput *shi, LampRen *lar, float *lampco, int lampvec, float *shadfac, Isect *isec)
2144 {
2145         /* area soft shadow */
2146         float *jitlamp;
2147         float fac=0.0f, div=0.0f, vec[3];
2148         int a, j= -1, mask;
2149         
2150         if(isec->mode==RE_RAY_SHADOW_TRA) {
2151                 shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 0.0f;
2152         }
2153         else shadfac[3]= 1.0f;
2154         
2155         fac= 0.0f;
2156         jitlamp= give_jitter_plane(lar, shi->thread, shi->xs, shi->ys);
2157
2158         a= lar->ray_totsamp;
2159         
2160         /* this correction to make sure we always take at least 1 sample */
2161         mask= shi->mask;
2162         if(a==4) mask |= (mask>>4)|(mask>>8);
2163         else if(a==9) mask |= (mask>>9);
2164         
2165         while(a--) {
2166                 
2167                 if(R.r.mode & R_OSA) {
2168                         j++;
2169                         if(j>=R.osa) j= 0;
2170                         if(!(mask & (1<<j))) {
2171                                 jitlamp+= 2;
2172                                 continue;
2173                         }
2174                 }
2175                 
2176                 isec->orig.ob   = shi->obi;
2177                 isec->orig.face = shi->vlr;
2178                 
2179                 vec[0]= jitlamp[0];
2180                 vec[1]= jitlamp[1];
2181                 vec[2]= 0.0f;
2182                 Mat3MulVecfl(lar->mat, vec);
2183                 
2184                 /* set start and vec */
2185                 VECCOPY(isec->start, shi->co);
2186                 if(lampvec)
2187                 {
2188                         isec->vec[0] = vec[0]+lampco[0];
2189                         isec->vec[1] = vec[1]+lampco[1];
2190                         isec->vec[2] = vec[2]+lampco[2];
2191                         isec->labda = RE_RAYTRACE_MAXDIST;
2192                 }
2193                 else
2194                 {
2195                         isec->vec[0] = vec[0]+lampco[0]-shi->co[0];
2196                         isec->vec[1] = vec[1]+lampco[1]-shi->co[1];
2197                         isec->vec[2] = vec[2]+lampco[2]-shi->co[2];
2198                         isec->labda = 1.0f;
2199                 }
2200                 isec->skip = RE_SKIP_VLR_NEIGHBOUR;
2201                 
2202                 if(isec->mode==RE_RAY_SHADOW_TRA) {
2203                         /* isec.col is like shadfac, so defines amount of light (0.0 is full shadow) */
2204                         isec->col[0]= isec->col[1]= isec->col[2]=  1.0f;
2205                         isec->col[3]= 1.0f;
2206                         
2207                         ray_trace_shadow_tra(isec, shi, DEPTH_SHADOW_TRA, 0);
2208                         shadfac[0] += isec->col[0];
2209                         shadfac[1] += isec->col[1];
2210                         shadfac[2] += isec->col[2];
2211                         shadfac[3] += isec->col[3];
2212                 }
2213                 else if( RE_rayobject_raycast(R.raytree, isec) ) fac+= 1.0f;
2214                 
2215                 div+= 1.0f;
2216                 jitlamp+= 2;
2217         }
2218         
2219         if(isec->mode==RE_RAY_SHADOW_TRA) {
2220                 shadfac[0] /= div;
2221                 shadfac[1] /= div;
2222                 shadfac[2] /= div;
2223                 shadfac[3] /= div;
2224         }
2225         else {
2226                 // sqrt makes nice umbra effect
2227                 if(lar->ray_samp_type & LA_SAMP_UMBRA)
2228                         shadfac[3]= sqrt(1.0f-fac/div);
2229                 else
2230                         shadfac[3]= 1.0f-fac/div;
2231         }
2232 }
2233 /* extern call from shade_lamp_loop */
2234 void ray_shadow(ShadeInput *shi, LampRen *lar, float *shadfac)
2235 {
2236         Isect isec;
2237         float lampco[3];
2238         int lampvec; /* indicates if lampco is a vector lamp */
2239
2240         /* setup isec */
2241         if(shi->mat->mode & MA_SHADOW_TRA) isec.mode= RE_RAY_SHADOW_TRA;
2242         else isec.mode= RE_RAY_SHADOW;
2243         
2244         if(lar->mode & (LA_LAYER|LA_LAYER_SHADOW))
2245                 isec.lay= lar->lay;
2246         else
2247                 isec.lay= -1;
2248
2249         /* only when not mir tracing, first hit optimm */
2250         if(shi->depth==0) {
2251                 isec.last_hit = lar->last_hit[shi->thread];
2252         }
2253         else {
2254                 isec.last_hit = NULL;
2255         }
2256         
2257         if(lar->type==LA_SUN || lar->type==LA_HEMI) {
2258                 lampco[0]= -lar->vec[0];
2259                 lampco[1]= -lar->vec[1];
2260                 lampco[2]= -lar->vec[2];
2261                 lampvec = 1;
2262         }
2263         else {
2264                 VECCOPY(lampco, lar->co);
2265                 lampvec = 0;
2266         }
2267         
2268         if (ELEM(lar->ray_samp_method, LA_SAMP_HALTON, LA_SAMP_HAMMERSLEY)) {
2269                 
2270                 ray_shadow_qmc(shi, lar, lampco, lampvec, shadfac, &isec);
2271                 
2272         } else {
2273                 if(lar->ray_totsamp<2) {
2274                         
2275                         isec.orig.ob   = shi->obi;
2276                         isec.orig.face = shi->vlr;
2277                         
2278                         shadfac[3]= 1.0f; // 1.0=full light
2279                         
2280                         /* set up isec vec */
2281                         VECCOPY(isec.start, shi->co);
2282                         if(lampvec)
2283                         {
2284                                 VECCOPY(isec.vec, lampco);
2285                                 isec.labda = RE_RAYTRACE_MAXDIST;
2286                         }
2287                         else
2288                         {
2289                                 VECSUB(isec.vec, lampco, isec.start);
2290                                 isec.labda = 1.0f;
2291                         }
2292
2293                         if(isec.mode==RE_RAY_SHADOW_TRA) {
2294                                 /* isec.col is like shadfac, so defines amount of light (0.0 is full shadow) */
2295                                 isec.col[0]= isec.col[1]= isec.col[2]=  1.0f;
2296                                 isec.col[3]= 1.0f;
2297
2298                                 ray_trace_shadow_tra(&isec, shi, DEPTH_SHADOW_TRA, 0);
2299                                 QUATCOPY(shadfac, isec.col);
2300                         }
2301                         else if(RE_rayobject_raycast(R.raytree, &isec))
2302                                 shadfac[3]= 0.0f;
2303                 }
2304                 else {
2305                         ray_shadow_jitter(shi, lar, lampco, lampvec, shadfac, &isec);
2306                 }
2307         }
2308                 
2309         /* for first hit optim, set last interesected shadow face */
2310         if(shi->depth==0) {
2311                 lar->last_hit[shi->thread] = isec.last_hit;
2312         }
2313
2314 }
2315
2316 #if 0
2317 /* only when face points away from lamp, in direction of lamp, trace ray and find first exit point */
2318 static void ray_translucent(ShadeInput *shi, LampRen *lar, float *distfac, float *co)
2319 {
2320         Isect isec;
2321         float lampco[3];
2322         
2323         assert(0);
2324         
2325         /* setup isec */
2326         isec.mode= RE_RAY_SHADOW_TRA;
2327         
2328         if(lar->mode & LA_LAYER) isec.lay= lar->lay; else isec.lay= -1;
2329         
2330         if(lar->type==LA_SUN || lar->type==LA_HEMI) {
2331                 lampco[0]= shi->co[0] - RE_RAYTRACE_MAXDIST*lar->vec[0];
2332                 lampco[1]= shi->co[1] - RE_RAYTRACE_MAXDIST*lar->vec[1];
2333                 lampco[2]= shi->co[2] - RE_RAYTRACE_MAXDIST*lar->vec[2];
2334         }
2335         else {
2336                 VECCOPY(lampco, lar->co);
2337         }
2338         
2339         isec.orig.ob   = shi->obi;
2340         isec.orig.face = shi->vlr;
2341         
2342         /* set up isec vec */
2343         VECCOPY(isec.start, shi->co);
2344         VECCOPY(isec.end, lampco);
2345         
2346         if(RE_rayobject_raycast(R.raytree, &isec)) {
2347                 /* we got a face */
2348                 
2349                 /* render co */
2350                 co[0]= isec.start[0]+isec.labda*(isec.vec[0]);
2351                 co[1]= isec.start[1]+isec.labda*(isec.vec[1]);
2352                 co[2]= isec.start[2]+isec.labda*(isec.vec[2]);
2353                 
2354                 *distfac= VecLength(isec.vec);
2355         }
2356         else
2357                 *distfac= 0.0f;
2358 }
2359
2360 #endif
2361