*ray<->bb hit optimization... just to make it "slightly" faster :P
[blender.git] / source / blender / render / intern / source / rayshade.c
1 /**
2  * $Id$
3  *
4  * ***** BEGIN GPL LICENSE BLOCK *****
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version. 
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software Foundation,
18  * Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19  *
20  * The Original Code is Copyright (C) 1990-1998 NeoGeo BV.
21  * All rights reserved.
22  *
23  * Contributors: 2004/2005 Blender Foundation, full recode
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 #include <math.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <float.h>
32 #include <assert.h>
33
34 #include "MEM_guardedalloc.h"
35
36 #include "DNA_material_types.h"
37 #include "DNA_lamp_types.h"
38
39 #include "BKE_global.h"
40 #include "BKE_node.h"
41 #include "BKE_utildefines.h"
42
43 #include "BLI_arithb.h"
44 #include "BLI_blenlib.h"
45 #include "BLI_jitter.h"
46 #include "BLI_rand.h"
47
48 #include "PIL_time.h"
49
50 #include "render_types.h"
51 #include "renderpipeline.h"
52 #include "rendercore.h"
53 #include "renderdatabase.h"
54 #include "pixelblending.h"
55 #include "pixelshading.h"
56 #include "shading.h"
57 #include "texture.h"
58
59 #include "RE_raytrace.h"
60 #include "rayobject.h"
61
62 #define RAY_TRA         1
63 #define RAY_TRAFLIP     2
64
65 #define DEPTH_SHADOW_TRA  10
66
67 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
68 /* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
69 /* only to be used here in this file, it's for speed */
70 extern struct Render R;
71 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
72
73 #if 0
74 static int vlr_check_intersect(Isect *is, int ob, RayFace *face)
75 {
76         ObjectInstanceRen *obi= RAY_OBJECT_GET((Render*)is->userdata, ob);
77         VlakRen *vlr = (VlakRen*)face;
78
79         /* for baking selected to active non-traceable materials might still
80          * be in the raytree */
81         if(!(vlr->mat->mode & MA_TRACEBLE))
82                 return 0;
83
84         /* I know... cpu cycle waste, might do smarter once */
85         if(is->mode==RE_RAY_MIRROR)
86                 return !(vlr->mat->mode & MA_ONLYCAST);
87         else
88                 return (is->lay & obi->lay);
89 }
90 #endif
91
92 void freeraytree(Render *re)
93 {
94         ObjectInstanceRen *obi;
95         
96         if(re->raytree)
97         {
98                 RE_rayobject_free(re->raytree);
99                 re->raytree = NULL;
100         }
101         if(re->rayfaces)
102         {
103                 MEM_freeN(re->rayfaces);
104                 re->rayfaces = NULL;
105         }
106
107         for(obi=re->instancetable.first; obi; obi=obi->next)
108         {
109                 ObjectRen *obr = obi->obr;
110                 if(obr->raytree)
111                 {
112                         RE_rayobject_free(obr->raytree);
113                         obr->raytree = NULL;
114                 }
115                 if(obr->rayfaces)
116                 {
117                         MEM_freeN(obr->rayfaces);
118                         obr->rayfaces = NULL;
119                 }
120                 if(obi->raytree)
121                 {
122                         RE_rayobject_free(obi->raytree);
123                         obi->raytree = NULL;
124                 }
125         }
126 }
127
128 static int is_raytraceable_vlr(Render *re, VlakRen *vlr)
129 {
130         if((re->flag & R_BAKE_TRACE) || (vlr->mat->mode & MA_TRACEBLE))
131         if((vlr->mat->mode & MA_WIRE)==0)
132                 return 1;
133         return 0;
134 }
135
136 static int is_raytraceable(Render *re, ObjectInstanceRen *obi)
137 {
138         int v;
139         ObjectRen *obr = obi->obr;
140
141         if(re->excludeob && obr->ob == re->excludeob)
142                 return 0;
143
144         for(v=0;v<obr->totvlak;v++)
145         {
146                 VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255);
147                 if(is_raytraceable_vlr(re, vlr))
148                         return 1;
149         }
150         return 0;
151 }
152
153 RayObject* makeraytree_object(Render *re, ObjectInstanceRen *obi)
154 {
155         //TODO
156         // out-of-memory safeproof
157         // break render
158         // update render stats
159         ObjectRen *obr = obi->obr;
160         
161         if(obr->raytree == NULL)
162         {
163                 RayObject *raytree;
164                 RayFace *face;
165                 int v;
166                 
167                 //Count faces
168                 int faces = 0;
169                 for(v=0;v<obr->totvlak;v++)
170                 {
171                         VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255);
172                         if(is_raytraceable_vlr(re, vlr))
173                                 faces++;
174                 }
175                 assert( faces > 0 );
176
177                 //Create Ray cast accelaration structure
178
179                 //TODO dynamic ocres
180 //              raytree = obr->raytree = RE_rayobject_octree_create( re->r.ocres, faces );
181                 raytree = obr->raytree = RE_rayobject_bvh_create( faces );
182                 face = obr->rayfaces = (RayFace*)MEM_callocN(faces*sizeof(RayFace), "ObjectRen faces");
183                 obr->rayobi = obi;
184                 
185                 for(v=0;v<obr->totvlak;v++)
186                 {
187                         VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255);
188                         if(is_raytraceable_vlr(re, vlr))
189                         {
190                                 face->v1 = vlr->v1->co;
191                                 face->v2 = vlr->v2->co;
192                                 face->v3 = vlr->v3->co;
193                                 face->v4 = vlr->v4 ? vlr->v4->co : NULL;
194                                 
195                                 face->ob   = obi;
196                                 face->face = vlr;
197                                 
198                                 RE_rayobject_add( raytree, (RayObject*)face++ );
199                         }
200                 }
201                 RE_rayobject_done( raytree );
202         }
203
204
205         if(obi->flag & R_TRANSFORMED)
206         {
207                 obi->raytree = RE_rayobject_instance_create( obr->raytree, obi->mat, obi, obi->obr->rayobi );
208         }
209         
210         if(obi->raytree) return obi->raytree;
211         return obi->obr->raytree;
212 }
213
214 /*
215  * create an hierarchic raytrace structure with all objects
216  *
217  * R_TRANSFORMED objects instances reuse the same tree by using the rayobject_instance
218  */
219 static void makeraytree_hier(Render *re)
220 {
221         //TODO
222         // out-of-memory safeproof
223         // break render
224         // update render stats
225
226         ObjectInstanceRen *obi;
227         int num_objects = 0;
228
229         re->i.infostr="Creating raytrace structure";
230         re->stats_draw(re->sdh, &re->i);
231
232         //Count number of objects
233         for(obi=re->instancetable.first; obi; obi=obi->next)
234         if(is_raytraceable(re, obi))
235                 num_objects++;
236
237         //Create raytree
238         re->raytree = RE_rayobject_bvh_create( num_objects );
239         
240         for(obi=re->instancetable.first; obi; obi=obi->next)
241         if(is_raytraceable(re, obi))
242         {
243                 RayObject *obj = makeraytree_object(re, obi);
244                 RE_rayobject_add( re->raytree, obj );
245
246                 if(re->test_break(re->tbh))
247                         break;
248         }
249
250         if(!re->test_break(re->tbh))
251         {
252                 RE_rayobject_done( re->raytree );
253         }
254
255         re->i.infostr= NULL;
256         re->stats_draw(re->sdh, &re->i);
257 }
258
259 /*
260  * create a single raytrace structure with all faces
261  */
262 static void makeraytree_single(Render *re)
263 {
264         ObjectInstanceRen *obi;
265         RayObject *raytree;
266         RayFace *face;
267         int faces = 0, obs = 0;
268
269         for(obi=re->instancetable.first; obi; obi=obi->next)
270         if(is_raytraceable(re, obi))
271         {
272                 int v;
273                 ObjectRen *obr = obi->obr;
274                 obs++;
275                 
276                 assert((obi->flag & R_TRANSFORMED) == 0); //Not suported
277         
278                 for(v=0;v<obr->totvlak;v++)
279                 {
280                         VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255);
281                         if(is_raytraceable_vlr(re, vlr))
282                                 faces++;
283                 }
284         }
285         
286         //Create raytree
287 //      raytree = re->raytree   = RE_rayobject_octree_create(re->r.ocres, faces);
288         raytree = re->raytree   = RE_rayobject_bvh_create(faces);
289         face    = re->rayfaces  = (RayFace*)MEM_callocN(faces*sizeof(RayFace), "Render ray faces");
290         
291         for(obi=re->instancetable.first; obi; obi=obi->next)
292         if(is_raytraceable(re, obi))
293         {
294                 int v;
295                 ObjectRen *obr = obi->obr;
296
297                 for(v=0;v<obr->totvlak;v++)
298                 {
299                         VlakRen *vlr = obr->vlaknodes[v>>8].vlak + (v&255);
300                         face->v1 = vlr->v1->co;
301                         face->v2 = vlr->v2->co;
302                         face->v3 = vlr->v3->co;
303                         face->v4 = vlr->v4 ? vlr->v4->co : NULL;
304                         
305                         face->ob   = obi;
306                         face->face = vlr;
307                         
308                         RE_rayobject_add( raytree, (RayObject*)face++ );
309                 }
310         }
311         RE_rayobject_done( raytree );   
312 }
313
314 void makeraytree(Render *re)
315 {
316         if(1)
317                 makeraytree_hier(re);
318         else
319                 makeraytree_single(re);
320 }
321
322
323
324 static void shade_ray(Isect *is, ShadeInput *shi, ShadeResult *shr)
325 {
326         ObjectInstanceRen *obi= (ObjectInstanceRen*)is->hit.ob;
327         VlakRen *vlr= (VlakRen*)is->hit.face;
328         int osatex= 0;
329         
330         /* set up view vector */
331         VECCOPY(shi->view, is->vec);
332
333         /* render co */
334         shi->co[0]= is->start[0]+is->labda*(shi->view[0]);
335         shi->co[1]= is->start[1]+is->labda*(shi->view[1]);
336         shi->co[2]= is->start[2]+is->labda*(shi->view[2]);
337         
338         Normalize(shi->view);
339
340         shi->obi= obi;
341         shi->obr= obi->obr;
342         shi->vlr= vlr;
343         shi->mat= vlr->mat;
344         memcpy(&shi->r, &shi->mat->r, 23*sizeof(float));        // note, keep this synced with render_types.h
345         shi->har= shi->mat->har;
346         
347         // Osa structs we leave unchanged now
348         SWAP(int, osatex, shi->osatex);
349         
350         shi->dxco[0]= shi->dxco[1]= shi->dxco[2]= 0.0f;
351         shi->dyco[0]= shi->dyco[1]= shi->dyco[2]= 0.0f;
352         
353         // but, set Osa stuff to zero where it can confuse texture code
354         if(shi->mat->texco & (TEXCO_NORM|TEXCO_REFL) ) {
355                 shi->dxno[0]= shi->dxno[1]= shi->dxno[2]= 0.0f;
356                 shi->dyno[0]= shi->dyno[1]= shi->dyno[2]= 0.0f;
357         }
358
359         if(vlr->v4) {
360                 if(is->isect==2) 
361                         shade_input_set_triangle_i(shi, obi, vlr, 2, 1, 3);
362                 else
363                         shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 3);
364         }
365         else {
366                 shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
367         }
368
369         shi->u= is->u;
370         shi->v= is->v;
371         shi->dx_u= shi->dx_v= shi->dy_u= shi->dy_v=  0.0f;
372
373         shade_input_set_normals(shi);
374
375         /* point normals to viewing direction */
376         if(INPR(shi->facenor, shi->view) < 0.0f)
377                 shade_input_flip_normals(shi);
378
379         shade_input_set_shade_texco(shi);
380         
381         if(is->mode==RE_RAY_SHADOW_TRA) 
382                 if(shi->mat->nodetree && shi->mat->use_nodes) {
383                         ntreeShaderExecTree(shi->mat->nodetree, shi, shr);
384                         shi->mat= vlr->mat;             /* shi->mat is being set in nodetree */
385                 }
386                 else
387                         shade_color(shi, shr);
388         else {
389                 if(shi->mat->nodetree && shi->mat->use_nodes) {
390                         ntreeShaderExecTree(shi->mat->nodetree, shi, shr);
391                         shi->mat= vlr->mat;             /* shi->mat is being set in nodetree */
392                 }
393                 else
394                         shade_material_loop(shi, shr);
395                 
396                 /* raytrace likes to separate the spec color */
397                 VECSUB(shr->diff, shr->combined, shr->spec);
398         }       
399         
400         SWAP(int, osatex, shi->osatex);  // XXXXX!!!!
401
402 }
403
404 static int refraction(float *refract, float *n, float *view, float index)
405 {
406         float dot, fac;
407
408         VECCOPY(refract, view);
409         
410         dot= view[0]*n[0] + view[1]*n[1] + view[2]*n[2];
411
412         if(dot>0.0f) {
413                 index = 1.0f/index;
414                 fac= 1.0f - (1.0f - dot*dot)*index*index;
415                 if(fac<= 0.0f) return 0;
416                 fac= -dot*index + sqrt(fac);
417         }
418         else {
419                 fac= 1.0f - (1.0f - dot*dot)*index*index;
420                 if(fac<= 0.0f) return 0;
421                 fac= -dot*index - sqrt(fac);
422         }
423
424         refract[0]= index*view[0] + fac*n[0];
425         refract[1]= index*view[1] + fac*n[1];
426         refract[2]= index*view[2] + fac*n[2];
427
428         return 1;
429 }
430
431 /* orn = original face normal */
432 static void reflection(float *ref, float *n, float *view, float *orn)
433 {
434         float f1;
435         
436         f1= -2.0f*(n[0]*view[0]+ n[1]*view[1]+ n[2]*view[2]);
437         
438         ref[0]= (view[0]+f1*n[0]);
439         ref[1]= (view[1]+f1*n[1]);
440         ref[2]= (view[2]+f1*n[2]);
441
442         if(orn) {
443                 /* test phong normals, then we should prevent vector going to the back */
444                 f1= ref[0]*orn[0]+ ref[1]*orn[1]+ ref[2]*orn[2];
445                 if(f1>0.0f) {
446                         f1+= .01f;
447                         ref[0]-= f1*orn[0];
448                         ref[1]-= f1*orn[1];
449                         ref[2]-= f1*orn[2];
450                 }
451         }
452 }
453
454 #if 0
455 static void color_combine(float *result, float fac1, float fac2, float *col1, float *col2)
456 {
457         float col1t[3], col2t[3];
458         
459         col1t[0]= sqrt(col1[0]);
460         col1t[1]= sqrt(col1[1]);
461         col1t[2]= sqrt(col1[2]);
462         col2t[0]= sqrt(col2[0]);
463         col2t[1]= sqrt(col2[1]);
464         col2t[2]= sqrt(col2[2]);
465
466         result[0]= (fac1*col1t[0] + fac2*col2t[0]);
467         result[0]*= result[0];
468         result[1]= (fac1*col1t[1] + fac2*col2t[1]);
469         result[1]*= result[1];
470         result[2]= (fac1*col1t[2] + fac2*col2t[2]);
471         result[2]*= result[2];
472 }
473 #endif
474
475 static float shade_by_transmission(Isect *is, ShadeInput *shi, ShadeResult *shr)
476 {
477         float dx, dy, dz, d, p;
478
479         if (0 == (shi->mat->mode & (MA_RAYTRANSP|MA_ZTRA)))
480                 return -1;
481            
482         if (shi->mat->tx_limit <= 0.0f) {
483                 d= 1.0f;
484         } 
485         else {
486                 /* shi.co[] calculated by shade_ray() */
487                 dx= shi->co[0] - is->start[0];
488                 dy= shi->co[1] - is->start[1];
489                 dz= shi->co[2] - is->start[2];
490                 d= sqrt(dx*dx+dy*dy+dz*dz);
491                 if (d > shi->mat->tx_limit)
492                         d= shi->mat->tx_limit;
493
494                 p = shi->mat->tx_falloff;
495                 if(p < 0.0f) p= 0.0f;
496                 else if (p > 10.0f) p= 10.0f;
497
498                 shr->alpha *= pow(d, p);
499                 if (shr->alpha > 1.0f)
500                         shr->alpha= 1.0f;
501         }
502
503         return d;
504 }
505
506 static void ray_fadeout_endcolor(float *col, ShadeInput *origshi, ShadeInput *shi, ShadeResult *shr, Isect *isec, float *vec)
507 {
508         /* un-intersected rays get either rendered material color or sky color */
509         if (origshi->mat->fadeto_mir == MA_RAYMIR_FADETOMAT) {
510                 VECCOPY(col, shr->combined);
511         } else if (origshi->mat->fadeto_mir == MA_RAYMIR_FADETOSKY) {
512                 VECCOPY(shi->view, vec);
513                 Normalize(shi->view);
514                 
515                 shadeSkyView(col, isec->start, shi->view, NULL, shi->thread);
516                 shadeSunView(col, shi->view);
517         }
518 }
519
520 static void ray_fadeout(Isect *is, ShadeInput *shi, float *col, float *blendcol, float dist_mir)
521 {
522         /* if fading out, linear blend against fade color */
523         float blendfac;
524
525         blendfac = 1.0 - VecLenf(shi->co, is->start)/dist_mir;
526         
527         col[0] = col[0]*blendfac + (1.0 - blendfac)*blendcol[0];
528         col[1] = col[1]*blendfac + (1.0 - blendfac)*blendcol[1];
529         col[2] = col[2]*blendfac + (1.0 - blendfac)*blendcol[2];
530 }
531
532 /* the main recursive tracer itself */
533 static void traceray(ShadeInput *origshi, ShadeResult *origshr, short depth, float *start, float *vec, float *col, ObjectInstanceRen *obi, VlakRen *vlr, int traflag)
534 {
535         ShadeInput shi;
536         ShadeResult shr;
537         Isect isec;
538         float f, f1, fr, fg, fb;
539         float ref[3];
540         float dist_mir = origshi->mat->dist_mir;
541
542         /* Warning, This is not that nice, and possibly a bit slow for every ray,
543         however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */
544         memset(&shi, 0, sizeof(ShadeInput)); 
545         /* end warning! - Campbell */
546         
547         VECCOPY(isec.start, start);
548         VECCOPY(isec.vec, vec );
549         isec.labda = dist_mir > 0 ? dist_mir : RE_RAYTRACE_MAXDIST;
550         isec.mode= RE_RAY_MIRROR;
551         isec.skip = RE_SKIP_VLR_NEIGHBOUR;
552
553         isec.orig.ob   = obi;
554         isec.orig.face = vlr;
555
556         if(RE_rayobject_raycast(R.raytree, &isec)) {
557                 float d= 1.0f;
558                 
559                 shi.mask= origshi->mask;
560                 shi.osatex= origshi->osatex;
561                 shi.depth= 1;                                   /* only used to indicate tracing */
562                 shi.thread= origshi->thread;
563                 //shi.sample= 0; // memset above, so dont need this
564                 shi.xs= origshi->xs;
565                 shi.ys= origshi->ys;
566                 shi.lay= origshi->lay;
567                 shi.passflag= SCE_PASS_COMBINED; /* result of tracing needs no pass info */
568                 shi.combinedflag= 0xFFFFFF;              /* ray trace does all options */
569                 //shi.do_preview= 0; // memset above, so dont need this
570                 shi.light_override= origshi->light_override;
571                 shi.mat_override= origshi->mat_override;
572                 
573                 memset(&shr, 0, sizeof(ShadeResult));
574                 
575                 shade_ray(&isec, &shi, &shr);
576                 if (traflag & RAY_TRA)
577                         d= shade_by_transmission(&isec, &shi, &shr);
578                 
579                 if(depth>0) {
580
581                         if(shi.mat->mode_l & (MA_RAYTRANSP|MA_ZTRA) && shr.alpha < 1.0f) {
582                                 float nf, f, f1, refract[3], tracol[4];
583                                 
584                                 tracol[0]= shi.r;
585                                 tracol[1]= shi.g;
586                                 tracol[2]= shi.b;
587                                 tracol[3]= col[3];      // we pass on and accumulate alpha
588                                 
589                                 if(shi.mat->mode & MA_RAYTRANSP) {
590                                         /* odd depths: use normal facing viewer, otherwise flip */
591                                         if(traflag & RAY_TRAFLIP) {
592                                                 float norm[3];
593                                                 norm[0]= - shi.vn[0];
594                                                 norm[1]= - shi.vn[1];
595                                                 norm[2]= - shi.vn[2];
596                                                 if (!refraction(refract, norm, shi.view, shi.ang))
597                                                         reflection(refract, norm, shi.view, shi.vn);
598                                         }
599                                         else {
600                                                 if (!refraction(refract, shi.vn, shi.view, shi.ang))
601                                                         reflection(refract, shi.vn, shi.view, shi.vn);
602                                         }
603                                         traflag |= RAY_TRA;
604                                         traceray(origshi, origshr, depth-1, shi.co, refract, tracol, shi.obi, shi.vlr, traflag ^ RAY_TRAFLIP);
605                                 }
606                                 else
607                                         traceray(origshi, origshr, depth-1, shi.co, shi.view, tracol, shi.obi, shi.vlr, 0);
608                                 
609                                 f= shr.alpha; f1= 1.0f-f;
610                                 nf= d * shi.mat->filter;
611                                 fr= 1.0f+ nf*(shi.r-1.0f);
612                                 fg= 1.0f+ nf*(shi.g-1.0f);
613                                 fb= 1.0f+ nf*(shi.b-1.0f);
614                                 shr.diff[0]= f*shr.diff[0] + f1*fr*tracol[0];
615                                 shr.diff[1]= f*shr.diff[1] + f1*fg*tracol[1];
616                                 shr.diff[2]= f*shr.diff[2] + f1*fb*tracol[2];
617                                 
618                                 shr.spec[0] *=f;
619                                 shr.spec[1] *=f;
620                                 shr.spec[2] *=f;
621
622                                 col[3]= f1*tracol[3] + f;
623                         }
624                         else 
625                                 col[3]= 1.0f;
626
627                         if(shi.mat->mode_l & MA_RAYMIRROR) {
628                                 f= shi.ray_mirror;
629                                 if(f!=0.0f) f*= fresnel_fac(shi.view, shi.vn, shi.mat->fresnel_mir_i, shi.mat->fresnel_mir);
630                         }
631                         else f= 0.0f;
632                         
633                         if(f!=0.0f) {
634                                 float mircol[4];
635                                 
636                                 reflection(ref, shi.vn, shi.view, NULL);                        
637                                 traceray(origshi, origshr, depth-1, shi.co, ref, mircol, shi.obi, shi.vlr, 0);
638                         
639                                 f1= 1.0f-f;
640
641                                 /* combine */
642                                 //color_combine(col, f*fr*(1.0f-shr.spec[0]), f1, col, shr.diff);
643                                 //col[0]+= shr.spec[0];
644                                 //col[1]+= shr.spec[1];
645                                 //col[2]+= shr.spec[2];
646                                 
647                                 fr= shi.mirr;
648                                 fg= shi.mirg;
649                                 fb= shi.mirb;
650                 
651                                 col[0]= f*fr*(1.0f-shr.spec[0])*mircol[0] + f1*shr.diff[0] + shr.spec[0];
652                                 col[1]= f*fg*(1.0f-shr.spec[1])*mircol[1] + f1*shr.diff[1] + shr.spec[1];
653                                 col[2]= f*fb*(1.0f-shr.spec[2])*mircol[2] + f1*shr.diff[2] + shr.spec[2];
654                         }
655                         else {
656                                 col[0]= shr.diff[0] + shr.spec[0];
657                                 col[1]= shr.diff[1] + shr.spec[1];
658                                 col[2]= shr.diff[2] + shr.spec[2];
659                         }
660                         
661                         if (dist_mir > 0.0) {
662                                 float blendcol[3];
663                                 
664                                 /* max ray distance set, but found an intersection, so fade this color
665                                  * out towards the sky/material color for a smooth transition */
666                                 ray_fadeout_endcolor(blendcol, origshi, &shi, origshr, &isec, vec);
667                                 ray_fadeout(&isec, &shi, col, blendcol, dist_mir);
668                         }
669                 }
670                 else {
671                         col[0]= shr.diff[0] + shr.spec[0];
672                         col[1]= shr.diff[1] + shr.spec[1];
673                         col[2]= shr.diff[2] + shr.spec[2];
674                 }
675                 
676         }
677         else {
678                 ray_fadeout_endcolor(col, origshi, &shi, origshr, &isec, vec);
679         }
680 }
681
682 /* **************** jitter blocks ********** */
683
684 /* calc distributed planar energy */
685
686 static void DP_energy(float *table, float *vec, int tot, float xsize, float ysize)
687 {
688         int x, y, a;
689         float *fp, force[3], result[3];
690         float dx, dy, dist, min;
691         
692         min= MIN2(xsize, ysize);
693         min*= min;
694         result[0]= result[1]= 0.0f;
695         
696         for(y= -1; y<2; y++) {
697                 dy= ysize*y;
698                 for(x= -1; x<2; x++) {
699                         dx= xsize*x;
700                         fp= table;
701                         for(a=0; a<tot; a++, fp+= 2) {
702                                 force[0]= vec[0] - fp[0]-dx;
703                                 force[1]= vec[1] - fp[1]-dy;
704                                 dist= force[0]*force[0] + force[1]*force[1];
705                                 if(dist < min && dist>0.0f) {
706                                         result[0]+= force[0]/dist;
707                                         result[1]+= force[1]/dist;
708                                 }
709                         }
710                 }
711         }
712         vec[0] += 0.1*min*result[0]/(float)tot;
713         vec[1] += 0.1*min*result[1]/(float)tot;
714         // cyclic clamping
715         vec[0]= vec[0] - xsize*floor(vec[0]/xsize + 0.5);
716         vec[1]= vec[1] - ysize*floor(vec[1]/ysize + 0.5);
717 }
718
719 // random offset of 1 in 2
720 static void jitter_plane_offset(float *jitter1, float *jitter2, int tot, float sizex, float sizey, float ofsx, float ofsy)
721 {
722         float dsizex= sizex*ofsx;
723         float dsizey= sizey*ofsy;
724         float hsizex= 0.5*sizex, hsizey= 0.5*sizey;
725         int x;
726         
727         for(x=tot; x>0; x--, jitter1+=2, jitter2+=2) {
728                 jitter2[0]= jitter1[0] + dsizex;
729                 jitter2[1]= jitter1[1] + dsizey;
730                 if(jitter2[0] > hsizex) jitter2[0]-= sizex;
731                 if(jitter2[1] > hsizey) jitter2[1]-= sizey;
732         }
733 }
734
735 /* called from convertBlenderScene.c */
736 /* we do this in advance to get consistant random, not alter the render seed, and be threadsafe */
737 void init_jitter_plane(LampRen *lar)
738 {
739         float *fp;
740         int x, iter=12, tot= lar->ray_totsamp;
741         
742         /* test if already initialized */
743         if(lar->jitter) return;
744         
745         /* at least 4, or max threads+1 tables */
746         if(BLENDER_MAX_THREADS < 4) x= 4;
747         else x= BLENDER_MAX_THREADS+1;
748         fp= lar->jitter= MEM_callocN(x*tot*2*sizeof(float), "lamp jitter tab");
749         
750         /* if 1 sample, we leave table to be zero's */
751         if(tot>1) {
752                 
753                 /* set per-lamp fixed seed */
754                 BLI_srandom(tot);
755                 
756                 /* fill table with random locations, area_size large */
757                 for(x=0; x<tot; x++, fp+=2) {
758                         fp[0]= (BLI_frand()-0.5)*lar->area_size;
759                         fp[1]= (BLI_frand()-0.5)*lar->area_sizey;
760                 }
761                 
762                 while(iter--) {
763                         fp= lar->jitter;
764                         for(x=tot; x>0; x--, fp+=2) {
765                                 DP_energy(lar->jitter, fp, tot, lar->area_size, lar->area_sizey);
766                         }
767                 }
768         }       
769         /* create the dithered tables (could just check lamp type!) */
770         jitter_plane_offset(lar->jitter, lar->jitter+2*tot, tot, lar->area_size, lar->area_sizey, 0.5f, 0.0f);
771         jitter_plane_offset(lar->jitter, lar->jitter+4*tot, tot, lar->area_size, lar->area_sizey, 0.5f, 0.5f);
772         jitter_plane_offset(lar->jitter, lar->jitter+6*tot, tot, lar->area_size, lar->area_sizey, 0.0f, 0.5f);
773 }
774
775 /* table around origin, -0.5*size to 0.5*size */
776 static float *give_jitter_plane(LampRen *lar, int thread, int xs, int ys)
777 {
778         int tot;
779         
780         tot= lar->ray_totsamp;
781                         
782         if(lar->ray_samp_type & LA_SAMP_JITTER) {
783                 /* made it threadsafe */
784                 
785                 if(lar->xold[thread]!=xs || lar->yold[thread]!=ys) {
786                         jitter_plane_offset(lar->jitter, lar->jitter+2*(thread+1)*tot, tot, lar->area_size, lar->area_sizey, BLI_thread_frand(thread), BLI_thread_frand(thread));
787                         lar->xold[thread]= xs; 
788                         lar->yold[thread]= ys;
789                 }
790                 return lar->jitter+2*(thread+1)*tot;
791         }
792         if(lar->ray_samp_type & LA_SAMP_DITHER) {
793                 return lar->jitter + 2*tot*((xs & 1)+2*(ys & 1));
794         }
795         
796         return lar->jitter;
797 }
798
799
800 /* **************** QMC sampling *************** */
801
802 static void halton_sample(double *ht_invprimes, double *ht_nums, double *v)
803 {
804         // incremental halton sequence generator, from:
805         // "Instant Radiosity", Keller A.
806         unsigned int i;
807         
808         for (i = 0; i < 2; i++)
809         {
810                 double r = fabs((1.0 - ht_nums[i]) - 1e-10);
811                 
812                 if (ht_invprimes[i] >= r)
813                 {
814                         double lasth;
815                         double h = ht_invprimes[i];
816                         
817                         do {
818                                 lasth = h;
819                                 h *= ht_invprimes[i];
820                         } while (h >= r);
821                         
822                         ht_nums[i] += ((lasth + h) - 1.0);
823                 }
824                 else
825                         ht_nums[i] += ht_invprimes[i];
826                 
827                 v[i] = (float)ht_nums[i];
828         }
829 }
830
831 /* Generate Hammersley points in [0,1)^2
832  * From Lucille renderer */
833 static void hammersley_create(double *out, int n)
834 {
835         double p, t;
836         int k, kk;
837
838         for (k = 0; k < n; k++) {
839                 t = 0;
840                 for (p = 0.5, kk = k; kk; p *= 0.5, kk >>= 1) {
841                         if (kk & 1) {           /* kk mod 2 = 1         */
842                                 t += p;
843                         }
844                 }
845         
846                 out[2 * k + 0] = (double)k / (double)n;
847                 out[2 * k + 1] = t;
848         }
849 }
850
851 static struct QMCSampler *QMC_initSampler(int type, int tot)
852 {       
853         QMCSampler *qsa = MEM_callocN(sizeof(QMCSampler), "qmc sampler");
854         qsa->samp2d = MEM_callocN(2*sizeof(double)*tot, "qmc sample table");
855
856         qsa->tot = tot;
857         qsa->type = type;
858         
859         if (qsa->type==SAMP_TYPE_HAMMERSLEY) 
860                 hammersley_create(qsa->samp2d, qsa->tot);
861                 
862         return qsa;
863 }
864
865 static void QMC_initPixel(QMCSampler *qsa, int thread)
866 {
867         if (qsa->type==SAMP_TYPE_HAMMERSLEY)
868         {
869                 /* hammersley sequence is fixed, already created in QMCSampler init.
870                  * per pixel, gets a random offset. We create separate offsets per thread, for write-safety */
871                 qsa->offs[thread][0] = 0.5 * BLI_thread_frand(thread);
872                 qsa->offs[thread][1] = 0.5 * BLI_thread_frand(thread);
873         }
874         else {  /* SAMP_TYPE_HALTON */
875                 
876                 /* generate a new randomised halton sequence per pixel
877                  * to alleviate qmc artifacts and make it reproducable 
878                  * between threads/frames */
879                 double ht_invprimes[2], ht_nums[2];
880                 double r[2];
881                 int i;
882         
883                 ht_nums[0] = BLI_thread_frand(thread);
884                 ht_nums[1] = BLI_thread_frand(thread);
885                 ht_invprimes[0] = 0.5;
886                 ht_invprimes[1] = 1.0/3.0;
887                 
888                 for (i=0; i< qsa->tot; i++) {
889                         halton_sample(ht_invprimes, ht_nums, r);
890                         qsa->samp2d[2*i+0] = r[0];
891                         qsa->samp2d[2*i+1] = r[1];
892                 }
893         }
894 }
895
896 static void QMC_freeSampler(QMCSampler *qsa)
897 {
898         MEM_freeN(qsa->samp2d);
899         MEM_freeN(qsa);
900 }
901
902 static void QMC_getSample(double *s, QMCSampler *qsa, int thread, int num)
903 {
904         if (qsa->type == SAMP_TYPE_HAMMERSLEY) {
905                 s[0] = fmod(qsa->samp2d[2*num+0] + qsa->offs[thread][0], 1.0f);
906                 s[1] = fmod(qsa->samp2d[2*num+1] + qsa->offs[thread][1], 1.0f);
907         }
908         else { /* SAMP_TYPE_HALTON */
909                 s[0] = qsa->samp2d[2*num+0];
910                 s[1] = qsa->samp2d[2*num+1];
911         }
912 }
913
914 /* phong weighted disc using 'blur' for exponent, centred on 0,0 */
915 static void QMC_samplePhong(float *vec, QMCSampler *qsa, int thread, int num, float blur)
916 {
917         double s[2];
918         float phi, pz, sqr;
919         
920         QMC_getSample(s, qsa, thread, num);
921
922         phi = s[0]*2*M_PI;
923         pz = pow(s[1], blur);
924         sqr = sqrt(1.0f-pz*pz);
925
926         vec[0] = cos(phi)*sqr;
927         vec[1] = sin(phi)*sqr;
928         vec[2] = 0.0f;
929 }
930
931 /* rect of edge lengths sizex, sizey, centred on 0.0,0.0 i.e. ranging from -sizex/2 to +sizey/2 */
932 static void QMC_sampleRect(float *vec, QMCSampler *qsa, int thread, int num, float sizex, float sizey)
933 {
934         double s[2];
935
936         QMC_getSample(s, qsa, thread, num);
937                 
938         vec[0] = (s[0] - 0.5) * sizex;
939         vec[1] = (s[1] - 0.5) * sizey;
940         vec[2] = 0.0f;
941 }
942
943 /* disc of radius 'radius', centred on 0,0 */
944 static void QMC_sampleDisc(float *vec, QMCSampler *qsa, int thread, int num, float radius)
945 {
946         double s[2];
947         float phi, sqr;
948         
949         QMC_getSample(s, qsa, thread, num);
950         
951         phi = s[0]*2*M_PI;
952         sqr = sqrt(s[1]);
953
954         vec[0] = cos(phi)*sqr* radius/2.0;
955         vec[1] = sin(phi)*sqr* radius/2.0;
956         vec[2] = 0.0f;
957 }
958
959 /* uniform hemisphere sampling */
960 static void QMC_sampleHemi(float *vec, QMCSampler *qsa, int thread, int num)
961 {
962         double s[2];
963         float phi, sqr;
964         
965         QMC_getSample(s, qsa, thread, num);
966         
967         phi = s[0]*2.f*M_PI;    
968         sqr = sqrt(s[1]);
969
970         vec[0] = cos(phi)*sqr;
971         vec[1] = sin(phi)*sqr;
972         vec[2] = 1.f - s[1]*s[1];
973 }
974
975 #if 0 /* currently not used */
976 /* cosine weighted hemisphere sampling */
977 static void QMC_sampleHemiCosine(float *vec, QMCSampler *qsa, int thread, int num)
978 {
979         double s[2];
980         float phi, sqr;
981         
982         QMC_getSample(s, qsa, thread, num);
983         
984         phi = s[0]*2.f*M_PI;    
985         sqr = s[1]*sqrt(2-s[1]*s[1]);
986
987         vec[0] = cos(phi)*sqr;
988         vec[1] = sin(phi)*sqr;
989         vec[2] = 1.f - s[1]*s[1];
990
991 }
992 #endif
993
994 /* called from convertBlenderScene.c */
995 void init_render_qmcsampler(Render *re)
996 {
997         re->qmcsamplers= MEM_callocN(sizeof(ListBase)*BLENDER_MAX_THREADS, "QMCListBase");
998 }
999
1000 static QMCSampler *get_thread_qmcsampler(Render *re, int thread, int type, int tot)
1001 {
1002         QMCSampler *qsa;
1003
1004         /* create qmc samplers as needed, since recursion makes it hard to
1005          * predict how many are needed */
1006
1007         for(qsa=re->qmcsamplers[thread].first; qsa; qsa=qsa->next) {
1008                 if(qsa->type == type && qsa->tot == tot && !qsa->used) {
1009                         qsa->used= 1;
1010                         return qsa;
1011                 }
1012         }
1013
1014         qsa= QMC_initSampler(type, tot);
1015         qsa->used= 1;
1016         BLI_addtail(&re->qmcsamplers[thread], qsa);
1017
1018         return qsa;
1019 }
1020
1021 static void release_thread_qmcsampler(Render *re, int thread, QMCSampler *qsa)
1022 {
1023         qsa->used= 0;
1024 }
1025
1026 void free_render_qmcsampler(Render *re)
1027 {
1028         QMCSampler *qsa, *next;
1029         int a;
1030
1031         if(re->qmcsamplers) {
1032                 for(a=0; a<BLENDER_MAX_THREADS; a++) {
1033                         for(qsa=re->qmcsamplers[a].first; qsa; qsa=next) {
1034                                 next= qsa->next;
1035                                 QMC_freeSampler(qsa);
1036                         }
1037
1038                         re->qmcsamplers[a].first= re->qmcsamplers[a].last= NULL;
1039                 }
1040
1041                 MEM_freeN(re->qmcsamplers);
1042                 re->qmcsamplers= NULL;
1043         }
1044 }
1045
1046 static int adaptive_sample_variance(int samples, float *col, float *colsq, float thresh)
1047 {
1048         float var[3], mean[3];
1049
1050         /* scale threshold just to give a bit more precision in input rather than dealing with 
1051          * tiny tiny numbers in the UI */
1052         thresh /= 2;
1053         
1054         mean[0] = col[0] / (float)samples;
1055         mean[1] = col[1] / (float)samples;
1056         mean[2] = col[2] / (float)samples;
1057
1058         var[0] = (colsq[0] / (float)samples) - (mean[0]*mean[0]);
1059         var[1] = (colsq[1] / (float)samples) - (mean[1]*mean[1]);
1060         var[2] = (colsq[2] / (float)samples) - (mean[2]*mean[2]);
1061         
1062         if ((var[0] * 0.4 < thresh) && (var[1] * 0.3 < thresh) && (var[2] * 0.6 < thresh))
1063                 return 1;
1064         else
1065                 return 0;
1066 }
1067
1068 static int adaptive_sample_contrast_val(int samples, float prev, float val, float thresh)
1069 {
1070         /* if the last sample's contribution to the total value was below a small threshold
1071          * (i.e. the samples taken are very similar), then taking more samples that are probably 
1072          * going to be the same is wasting effort */
1073         if (fabs( prev/(float)(samples-1) - val/(float)samples ) < thresh) {
1074                 return 1;
1075         } else
1076                 return 0;
1077 }
1078
1079 static float get_avg_speed(ShadeInput *shi)
1080 {
1081         float pre_x, pre_y, post_x, post_y, speedavg;
1082         
1083         pre_x = (shi->winspeed[0] == PASS_VECTOR_MAX)?0.0:shi->winspeed[0];
1084         pre_y = (shi->winspeed[1] == PASS_VECTOR_MAX)?0.0:shi->winspeed[1];
1085         post_x = (shi->winspeed[2] == PASS_VECTOR_MAX)?0.0:shi->winspeed[2];
1086         post_y = (shi->winspeed[3] == PASS_VECTOR_MAX)?0.0:shi->winspeed[3];
1087         
1088         speedavg = (sqrt(pre_x*pre_x + pre_y*pre_y) + sqrt(post_x*post_x + post_y*post_y)) / 2.0;
1089         
1090         return speedavg;
1091 }
1092
1093 /* ***************** main calls ************** */
1094
1095
1096 static void trace_refract(float *col, ShadeInput *shi, ShadeResult *shr)
1097 {
1098         QMCSampler *qsa=NULL;
1099         int samp_type;
1100         
1101         float samp3d[3], orthx[3], orthy[3];
1102         float v_refract[3], v_refract_new[3];
1103         float sampcol[4], colsq[4];
1104         
1105         float blur = pow(1.0 - shi->mat->gloss_tra, 3);
1106         short max_samples = shi->mat->samp_gloss_tra;
1107         float adapt_thresh = shi->mat->adapt_thresh_tra;
1108         
1109         int samples=0;
1110         
1111         colsq[0] = colsq[1] = colsq[2] = 0.0;
1112         col[0] = col[1] = col[2] = 0.0;
1113         col[3]= shr->alpha;
1114         
1115         if (blur > 0.0) {
1116                 if (adapt_thresh != 0.0) samp_type = SAMP_TYPE_HALTON;
1117                 else samp_type = SAMP_TYPE_HAMMERSLEY;
1118                         
1119                 /* all samples are generated per pixel */
1120                 qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples);
1121                 QMC_initPixel(qsa, shi->thread);
1122         } else 
1123                 max_samples = 1;
1124         
1125
1126         while (samples < max_samples) {         
1127                 refraction(v_refract, shi->vn, shi->view, shi->ang);
1128                 
1129                 if (max_samples > 1) {
1130                         /* get a quasi-random vector from a phong-weighted disc */
1131                         QMC_samplePhong(samp3d, qsa, shi->thread, samples, blur);
1132                                                 
1133                         VecOrthoBasisf(v_refract, orthx, orthy);
1134                         VecMulf(orthx, samp3d[0]);
1135                         VecMulf(orthy, samp3d[1]);
1136                                 
1137                         /* and perturb the refraction vector in it */
1138                         VecAddf(v_refract_new, v_refract, orthx);
1139                         VecAddf(v_refract_new, v_refract_new, orthy);
1140                         
1141                         Normalize(v_refract_new);
1142                 } else {
1143                         /* no blurriness, use the original normal */
1144                         VECCOPY(v_refract_new, v_refract);
1145                 }
1146         
1147                 traceray(shi, shr, shi->mat->ray_depth_tra, shi->co, v_refract_new, sampcol, shi->obi, shi->vlr, RAY_TRA|RAY_TRAFLIP);
1148         
1149                 col[0] += sampcol[0];
1150                 col[1] += sampcol[1];
1151                 col[2] += sampcol[2];
1152                 col[3] += sampcol[3];
1153                 
1154                 /* for variance calc */
1155                 colsq[0] += sampcol[0]*sampcol[0];
1156                 colsq[1] += sampcol[1]*sampcol[1];
1157                 colsq[2] += sampcol[2]*sampcol[2];
1158                 
1159                 samples++;
1160                 
1161                 /* adaptive sampling */
1162                 if (adapt_thresh < 1.0 && samples > max_samples/2) 
1163                 {
1164                         if (adaptive_sample_variance(samples, col, colsq, adapt_thresh))
1165                                 break;
1166                         
1167                         /* if the pixel so far is very dark, we can get away with less samples */
1168                         if ( (col[0] + col[1] + col[2])/3.0/(float)samples < 0.01 )
1169                                 max_samples--;
1170                 }
1171         }
1172         
1173         col[0] /= (float)samples;
1174         col[1] /= (float)samples;
1175         col[2] /= (float)samples;
1176         col[3] /= (float)samples;
1177         
1178         if (qsa)
1179                 release_thread_qmcsampler(&R, shi->thread, qsa);
1180 }
1181
1182 static void trace_reflect(float *col, ShadeInput *shi, ShadeResult *shr, float fresnelfac)
1183 {
1184         QMCSampler *qsa=NULL;
1185         int samp_type;
1186         
1187         float samp3d[3], orthx[3], orthy[3];
1188         float v_nor_new[3], v_reflect[3];
1189         float sampcol[4], colsq[4];
1190                 
1191         float blur = pow(1.0 - shi->mat->gloss_mir, 3);
1192         short max_samples = shi->mat->samp_gloss_mir;
1193         float adapt_thresh = shi->mat->adapt_thresh_mir;
1194         float aniso = 1.0 - shi->mat->aniso_gloss_mir;
1195         
1196         int samples=0;
1197         
1198         col[0] = col[1] = col[2] = 0.0;
1199         colsq[0] = colsq[1] = colsq[2] = 0.0;
1200         
1201         if (blur > 0.0) {
1202                 if (adapt_thresh != 0.0) samp_type = SAMP_TYPE_HALTON;
1203                 else samp_type = SAMP_TYPE_HAMMERSLEY;
1204                         
1205                 /* all samples are generated per pixel */
1206                 qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples);
1207                 QMC_initPixel(qsa, shi->thread);
1208         } else 
1209                 max_samples = 1;
1210         
1211         while (samples < max_samples) {
1212                                 
1213                 if (max_samples > 1) {
1214                         /* get a quasi-random vector from a phong-weighted disc */
1215                         QMC_samplePhong(samp3d, qsa, shi->thread, samples, blur);
1216
1217                         /* find the normal's perpendicular plane, blurring along tangents
1218                          * if tangent shading enabled */
1219                         if (shi->mat->mode & (MA_TANGENT_V)) {
1220                                 Crossf(orthx, shi->vn, shi->tang);      // bitangent
1221                                 VECCOPY(orthy, shi->tang);
1222                                 VecMulf(orthx, samp3d[0]);
1223                                 VecMulf(orthy, samp3d[1]*aniso);
1224                         } else {
1225                                 VecOrthoBasisf(shi->vn, orthx, orthy);
1226                                 VecMulf(orthx, samp3d[0]);
1227                                 VecMulf(orthy, samp3d[1]);
1228                         }
1229
1230                         /* and perturb the normal in it */
1231                         VecAddf(v_nor_new, shi->vn, orthx);
1232                         VecAddf(v_nor_new, v_nor_new, orthy);
1233                         Normalize(v_nor_new);
1234                 } else {
1235                         /* no blurriness, use the original normal */
1236                         VECCOPY(v_nor_new, shi->vn);
1237                 }
1238                 
1239                 if((shi->vlr->flag & R_SMOOTH)) 
1240                         reflection(v_reflect, v_nor_new, shi->view, shi->facenor);
1241                 else
1242                         reflection(v_reflect, v_nor_new, shi->view, NULL);
1243                 
1244                 traceray(shi, shr, shi->mat->ray_depth, shi->co, v_reflect, sampcol, shi->obi, shi->vlr, 0);
1245
1246                 
1247                 col[0] += sampcol[0];
1248                 col[1] += sampcol[1];
1249                 col[2] += sampcol[2];
1250         
1251                 /* for variance calc */
1252                 colsq[0] += sampcol[0]*sampcol[0];
1253                 colsq[1] += sampcol[1]*sampcol[1];
1254                 colsq[2] += sampcol[2]*sampcol[2];
1255                 
1256                 samples++;
1257
1258                 /* adaptive sampling */
1259                 if (adapt_thresh > 0.0 && samples > max_samples/3) 
1260                 {
1261                         if (adaptive_sample_variance(samples, col, colsq, adapt_thresh))
1262                                 break;
1263                         
1264                         /* if the pixel so far is very dark, we can get away with less samples */
1265                         if ( (col[0] + col[1] + col[2])/3.0/(float)samples < 0.01 )
1266                                 max_samples--;
1267                 
1268                         /* reduce samples when reflection is dim due to low ray mirror blend value or fresnel factor
1269                          * and when reflection is blurry */
1270                         if (fresnelfac < 0.1 * (blur+1)) {
1271                                 max_samples--;
1272                                 
1273                                 /* even more for very dim */
1274                                 if (fresnelfac < 0.05 * (blur+1)) 
1275                                         max_samples--;
1276                         }
1277                 }
1278         }
1279         
1280         col[0] /= (float)samples;
1281         col[1] /= (float)samples;
1282         col[2] /= (float)samples;
1283         
1284         if (qsa)
1285                 release_thread_qmcsampler(&R, shi->thread, qsa);
1286 }
1287
1288 /* extern call from render loop */
1289 void ray_trace(ShadeInput *shi, ShadeResult *shr)
1290 {
1291         float i, f, f1, fr, fg, fb;
1292         float mircol[4], tracol[4];
1293         float diff[3];
1294         int do_tra, do_mir;
1295         
1296         do_tra= ((shi->mat->mode & (MA_RAYTRANSP)) && shr->alpha!=1.0f);
1297         do_mir= ((shi->mat->mode & MA_RAYMIRROR) && shi->ray_mirror!=0.0f);
1298
1299         
1300         /* raytrace mirror amd refract like to separate the spec color */
1301         if(shi->combinedflag & SCE_PASS_SPEC)
1302                 VECSUB(diff, shr->combined, shr->spec) /* no ; */
1303         else
1304                 VECCOPY(diff, shr->combined);
1305         
1306         if(do_tra) {
1307                 float olddiff[3];
1308                 
1309                 trace_refract(tracol, shi, shr);
1310                 
1311                 f= shr->alpha; f1= 1.0f-f;
1312                 fr= 1.0f+ shi->mat->filter*(shi->r-1.0f);
1313                 fg= 1.0f+ shi->mat->filter*(shi->g-1.0f);
1314                 fb= 1.0f+ shi->mat->filter*(shi->b-1.0f);
1315                 
1316                 /* for refract pass */
1317                 VECCOPY(olddiff, diff);
1318                 
1319                 diff[0]= f*diff[0] + f1*fr*tracol[0];
1320                 diff[1]= f*diff[1] + f1*fg*tracol[1];
1321                 diff[2]= f*diff[2] + f1*fb*tracol[2];
1322                 
1323                 if(shi->passflag & SCE_PASS_REFRACT)
1324                         VECSUB(shr->refr, diff, olddiff);
1325                 
1326                 if(!(shi->combinedflag & SCE_PASS_REFRACT))
1327                         VECSUB(diff, diff, shr->refr);
1328                 
1329                 shr->alpha= tracol[3];
1330         }
1331         
1332         if(do_mir) {
1333         
1334                 i= shi->ray_mirror*fresnel_fac(shi->view, shi->vn, shi->mat->fresnel_mir_i, shi->mat->fresnel_mir);
1335                 if(i!=0.0f) {
1336                 
1337                         trace_reflect(mircol, shi, shr, i);
1338                         
1339                         fr= i*shi->mirr;
1340                         fg= i*shi->mirg;
1341                         fb= i*shi->mirb;
1342
1343                         if(shi->passflag & SCE_PASS_REFLECT) {
1344                                 /* mirror pass is not blocked out with spec */
1345                                 shr->refl[0]= fr*mircol[0] - fr*diff[0];
1346                                 shr->refl[1]= fg*mircol[1] - fg*diff[1];
1347                                 shr->refl[2]= fb*mircol[2] - fb*diff[2];
1348                         }
1349                         
1350                         if(shi->combinedflag & SCE_PASS_REFLECT) {
1351                                 
1352                                 f= fr*(1.0f-shr->spec[0]);      f1= 1.0f-i;
1353                                 diff[0]= f*mircol[0] + f1*diff[0];
1354                                 
1355                                 f= fg*(1.0f-shr->spec[1]);      f1= 1.0f-i;
1356                                 diff[1]= f*mircol[1] + f1*diff[1];
1357                                 
1358                                 f= fb*(1.0f-shr->spec[2]);      f1= 1.0f-i;
1359                                 diff[2]= f*mircol[2] + f1*diff[2];
1360                         }
1361                 }
1362         }
1363         /* put back together */
1364         if(shi->combinedflag & SCE_PASS_SPEC)
1365                 VECADD(shr->combined, diff, shr->spec) /* no ; */
1366         else
1367                 VECCOPY(shr->combined, diff);
1368 }
1369
1370 /* color 'shadfac' passes through 'col' with alpha and filter */
1371 /* filter is only applied on alpha defined transparent part */
1372 static void addAlphaLight(float *shadfac, float *col, float alpha, float filter)
1373 {
1374         float fr, fg, fb;
1375         
1376         fr= 1.0f+ filter*(col[0]-1.0f);
1377         fg= 1.0f+ filter*(col[1]-1.0f);
1378         fb= 1.0f+ filter*(col[2]-1.0f);
1379         
1380         shadfac[0]= alpha*col[0] + fr*(1.0f-alpha)*shadfac[0];
1381         shadfac[1]= alpha*col[1] + fg*(1.0f-alpha)*shadfac[1];
1382         shadfac[2]= alpha*col[2] + fb*(1.0f-alpha)*shadfac[2];
1383         
1384         shadfac[3]= (1.0f-alpha)*shadfac[3];
1385 }
1386
1387 static void ray_trace_shadow_tra(Isect *is, int depth, int traflag)
1388 {
1389         /* ray to lamp, find first face that intersects, check alpha properties,
1390            if it has col[3]>0.0f  continue. so exit when alpha is full */
1391         ShadeInput shi;
1392         ShadeResult shr;
1393         
1394         assert(0);
1395
1396         if(RE_rayobject_raycast(R.raytree, is)) {
1397                 float d= 1.0f;
1398                 /* we got a face */
1399                 
1400                 /* Warning, This is not that nice, and possibly a bit slow for every ray,
1401                 however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */
1402                 memset(&shi, 0, sizeof(ShadeInput)); 
1403                 /* end warning! - Campbell */
1404                 
1405                 shi.depth= 1;                                   /* only used to indicate tracing */
1406                 shi.mask= 1;
1407                 
1408                 /*shi.osatex= 0;
1409                 shi.thread= shi.sample= 0;
1410                 shi.lay= 0;
1411                 shi.passflag= 0;
1412                 shi.combinedflag= 0;
1413                 shi.do_preview= 0;
1414                 shi.light_override= NULL;
1415                 shi.mat_override= NULL;*/
1416                 
1417                 shade_ray(is, &shi, &shr);
1418                 if (traflag & RAY_TRA)
1419                         d= shade_by_transmission(is, &shi, &shr);
1420                 
1421                 /* mix colors based on shadfac (rgb + amount of light factor) */
1422                 addAlphaLight(is->col, shr.diff, shr.alpha, d*shi.mat->filter);
1423                 
1424                 if(depth>0 && is->col[3]>0.0f) {
1425                         
1426                         /* adapt isect struct */
1427                         VECCOPY(is->start, shi.co);
1428
1429                         is->orig.ob   = shi.obi;
1430                         is->orig.face = shi.vlr;
1431
1432                         ray_trace_shadow_tra(is, depth-1, traflag | RAY_TRA);
1433                 }
1434         }
1435 }
1436
1437 /* not used, test function for ambient occlusion (yaf: pathlight) */
1438 /* main problem; has to be called within shading loop, giving unwanted recursion */
1439 int ray_trace_shadow_rad(ShadeInput *ship, ShadeResult *shr)
1440 {
1441         static int counter=0, only_one= 0;
1442         extern float hashvectf[];
1443         Isect isec;
1444         ShadeInput shi;
1445         ShadeResult shr_t;
1446         float vec[3], accum[3], div= 0.0f;
1447         int a;
1448         
1449         assert(0);
1450         
1451         if(only_one) {
1452                 return 0;
1453         }
1454         only_one= 1;
1455         
1456         accum[0]= accum[1]= accum[2]= 0.0f;
1457         isec.mode= RE_RAY_MIRROR;
1458         isec.orig.ob   = ship->obi;
1459         isec.orig.face = ship->vlr;
1460         
1461         for(a=0; a<8*8; a++) {
1462                 
1463                 counter+=3;
1464                 counter %= 768;
1465                 VECCOPY(vec, hashvectf+counter);
1466                 if(ship->vn[0]*vec[0]+ship->vn[1]*vec[1]+ship->vn[2]*vec[2]>0.0f) {
1467                         vec[0]-= vec[0];
1468                         vec[1]-= vec[1];
1469                         vec[2]-= vec[2];
1470                 }
1471
1472                 VECCOPY(isec.start, ship->co);
1473                 VECCOPY(isec.vec, vec );
1474                 isec.labda = RE_RAYTRACE_MAXDIST;
1475
1476                 if(RE_rayobject_raycast(R.raytree, &isec)) {
1477                         float fac;
1478                         
1479                         /* Warning, This is not that nice, and possibly a bit slow for every ray,
1480                         however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */
1481                         memset(&shi, 0, sizeof(ShadeInput)); 
1482                         /* end warning! - Campbell */
1483                         
1484                         shade_ray(&isec, &shi, &shr_t);
1485                         fac= isec.labda*isec.labda;
1486                         fac= 1.0f;
1487                         accum[0]+= fac*(shr_t.diff[0]+shr_t.spec[0]);
1488                         accum[1]+= fac*(shr_t.diff[1]+shr_t.spec[1]);
1489                         accum[2]+= fac*(shr_t.diff[2]+shr_t.spec[2]);
1490                         div+= fac;
1491                 }
1492                 else div+= 1.0f;
1493         }
1494         
1495         if(div!=0.0f) {
1496                 shr->diff[0]+= accum[0]/div;
1497                 shr->diff[1]+= accum[1]/div;
1498                 shr->diff[2]+= accum[2]/div;
1499         }
1500         shr->alpha= 1.0f;
1501         
1502         only_one= 0;
1503         return 1;
1504 }
1505
1506 /* aolight: function to create random unit sphere vectors for total random sampling */
1507 static void RandomSpherical(float *v)
1508 {
1509         float r;
1510         v[2] = 2.f*BLI_frand()-1.f;
1511         if ((r = 1.f - v[2]*v[2])>0.f) {
1512                 float a = 6.283185307f*BLI_frand();
1513                 r = sqrt(r);
1514                 v[0] = r * cos(a);
1515                 v[1] = r * sin(a);
1516         }
1517         else v[2] = 1.f;
1518 }
1519
1520 /* calc distributed spherical energy */
1521 static void DS_energy(float *sphere, int tot, float *vec)
1522 {
1523         float *fp, fac, force[3], res[3];
1524         int a;
1525         
1526         res[0]= res[1]= res[2]= 0.0f;
1527         
1528         for(a=0, fp=sphere; a<tot; a++, fp+=3) {
1529                 VecSubf(force, vec, fp);
1530                 fac= force[0]*force[0] + force[1]*force[1] + force[2]*force[2];
1531                 if(fac!=0.0f) {
1532                         fac= 1.0f/fac;
1533                         res[0]+= fac*force[0];
1534                         res[1]+= fac*force[1];
1535                         res[2]+= fac*force[2];
1536                 }
1537         }
1538
1539         VecMulf(res, 0.5);
1540         VecAddf(vec, vec, res);
1541         Normalize(vec);
1542         
1543 }
1544
1545 /* called from convertBlenderScene.c */
1546 /* creates an equally distributed spherical sample pattern */
1547 /* and allocates threadsafe memory */
1548 void init_ao_sphere(World *wrld)
1549 {
1550         float *fp;
1551         int a, tot, iter= 16;
1552
1553         /* we make twice the amount of samples, because only a hemisphere is used */
1554         tot= 2*wrld->aosamp*wrld->aosamp;
1555         
1556         wrld->aosphere= MEM_mallocN(3*tot*sizeof(float), "AO sphere");
1557         
1558         /* fixed random */
1559         BLI_srandom(tot);
1560         
1561         /* init */
1562         fp= wrld->aosphere;
1563         for(a=0; a<tot; a++, fp+= 3) {
1564                 RandomSpherical(fp);
1565         }
1566         
1567         while(iter--) {
1568                 for(a=0, fp= wrld->aosphere; a<tot; a++, fp+= 3) {
1569                         DS_energy(wrld->aosphere, tot, fp);
1570                 }
1571         }
1572         
1573         /* tables */
1574         wrld->aotables= MEM_mallocN(BLENDER_MAX_THREADS*3*tot*sizeof(float), "AO tables");
1575 }
1576
1577 /* give per thread a table, we have to compare xs ys because of way OSA works... */
1578 static float *threadsafe_table_sphere(int test, int thread, int xs, int ys, int tot)
1579 {
1580         static int xso[BLENDER_MAX_THREADS], yso[BLENDER_MAX_THREADS];
1581         static int firsttime= 1;
1582         
1583         if(firsttime) {
1584                 memset(xso, 255, sizeof(xso));
1585                 memset(yso, 255, sizeof(yso));
1586                 firsttime= 0;
1587         }
1588         
1589         if(xs==xso[thread] && ys==yso[thread]) return R.wrld.aotables+ thread*tot*3;
1590         if(test) return NULL;
1591         xso[thread]= xs; yso[thread]= ys;
1592         return R.wrld.aotables+ thread*tot*3;
1593 }
1594
1595 static float *sphere_sampler(int type, int resol, int thread, int xs, int ys)
1596 {
1597         int tot;
1598         float *vec;
1599         
1600         tot= 2*resol*resol;
1601
1602         if (type & WO_AORNDSMP) {
1603                 float *sphere;
1604                 int a;
1605                 
1606                 // always returns table
1607                 sphere= threadsafe_table_sphere(0, thread, xs, ys, tot);
1608
1609                 /* total random sampling. NOT THREADSAFE! (should be removed, is not useful) */
1610                 vec= sphere;
1611                 for (a=0; a<tot; a++, vec+=3) {
1612                         RandomSpherical(vec);
1613                 }
1614                 
1615                 return sphere;
1616         } 
1617         else {
1618                 float *sphere;
1619                 float cosfi, sinfi, cost, sint;
1620                 float ang, *vec1;
1621                 int a;
1622                 
1623                 // returns table if xs and ys were equal to last call
1624                 sphere= threadsafe_table_sphere(1, thread, xs, ys, tot);
1625                 if(sphere==NULL) {
1626                         sphere= threadsafe_table_sphere(0, thread, xs, ys, tot);
1627                         
1628                         // random rotation
1629                         ang= BLI_thread_frand(thread);
1630                         sinfi= sin(ang); cosfi= cos(ang);
1631                         ang= BLI_thread_frand(thread);
1632                         sint= sin(ang); cost= cos(ang);
1633                         
1634                         vec= R.wrld.aosphere;
1635                         vec1= sphere;
1636                         for (a=0; a<tot; a++, vec+=3, vec1+=3) {
1637                                 vec1[0]= cost*cosfi*vec[0] - sinfi*vec[1] + sint*cosfi*vec[2];
1638                                 vec1[1]= cost*sinfi*vec[0] + cosfi*vec[1] + sint*sinfi*vec[2];
1639                                 vec1[2]= -sint*vec[0] + cost*vec[2];                    
1640                         }
1641                 }
1642                 return sphere;
1643         }
1644 }
1645
1646 static void ray_ao_qmc(ShadeInput *shi, float *shadfac)
1647 {
1648         Isect isec;
1649         QMCSampler *qsa=NULL;
1650         float samp3d[3];
1651         float up[3], side[3], dir[3], nrm[3];
1652         
1653         float maxdist = R.wrld.aodist;
1654         float fac=0.0f, prev=0.0f;
1655         float adapt_thresh = R.wrld.ao_adapt_thresh;
1656         float adapt_speed_fac = R.wrld.ao_adapt_speed_fac;
1657         
1658         int samples=0;
1659         int max_samples = R.wrld.aosamp*R.wrld.aosamp;
1660         
1661         float dxyview[3], skyadded=0, div;
1662         int aocolor;
1663         
1664         assert(0);
1665         
1666         isec.orig.ob   = shi->obi;
1667         isec.orig.face = shi->vlr;
1668
1669         isec.hit.ob   = 0;
1670         isec.hit.face = 0;
1671
1672         isec.last_hit = NULL;
1673         
1674         isec.mode= (R.wrld.aomode & WO_AODIST)?RE_RAY_SHADOW_TRA:RE_RAY_SHADOW;
1675         isec.lay= -1;
1676         
1677         shadfac[0]= shadfac[1]= shadfac[2]= 0.0f;
1678         
1679         /* prevent sky colors to be added for only shadow (shadow becomes alpha) */
1680         aocolor= R.wrld.aocolor;
1681         if(shi->mat->mode & MA_ONLYSHADOW)
1682                 aocolor= WO_AOPLAIN;
1683         
1684         if(aocolor == WO_AOSKYTEX) {
1685                 dxyview[0]= 1.0f/(float)R.wrld.aosamp;
1686                 dxyview[1]= 1.0f/(float)R.wrld.aosamp;
1687                 dxyview[2]= 0.0f;
1688         }
1689         
1690         if(shi->vlr->flag & R_SMOOTH) {
1691                 VECCOPY(nrm, shi->vn);
1692         }
1693         else {
1694                 VECCOPY(nrm, shi->facenor);
1695         }
1696         
1697         VecOrthoBasisf(nrm, up, side);
1698         
1699         /* sampling init */
1700         if (R.wrld.ao_samp_method==WO_AOSAMP_HALTON) {
1701                 float speedfac;
1702                 
1703                 speedfac = get_avg_speed(shi) * adapt_speed_fac;
1704                 CLAMP(speedfac, 1.0, 1000.0);
1705                 max_samples /= speedfac;
1706                 if (max_samples < 5) max_samples = 5;
1707                 
1708                 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples);
1709         } else if (R.wrld.ao_samp_method==WO_AOSAMP_HAMMERSLEY)
1710                 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples);
1711
1712         QMC_initPixel(qsa, shi->thread);
1713         
1714         while (samples < max_samples) {
1715
1716                 /* sampling, returns quasi-random vector in unit hemisphere */
1717                 QMC_sampleHemi(samp3d, qsa, shi->thread, samples);
1718
1719                 dir[0] = (samp3d[0]*up[0] + samp3d[1]*side[0] + samp3d[2]*nrm[0]);
1720                 dir[1] = (samp3d[0]*up[1] + samp3d[1]*side[1] + samp3d[2]*nrm[1]);
1721                 dir[2] = (samp3d[0]*up[2] + samp3d[1]*side[2] + samp3d[2]*nrm[2]);
1722                 
1723                 Normalize(dir);
1724                         
1725                 VECCOPY(isec.start, shi->co);
1726                 isec.vec[0] = -dir[0];
1727                 isec.vec[1] = -dir[1];
1728                 isec.vec[2] = -dir[2];
1729                 isec.labda = maxdist;
1730                 
1731                 prev = fac;
1732                 
1733                 if(RE_rayobject_raycast(R.raytree, &isec)) {
1734                         if (R.wrld.aomode & WO_AODIST) fac+= exp(-isec.labda*R.wrld.aodistfac); 
1735                         else fac+= 1.0f;
1736                 }
1737                 else if(aocolor!=WO_AOPLAIN) {
1738                         float skycol[4];
1739                         float skyfac, view[3];
1740                         
1741                         view[0]= -dir[0];
1742                         view[1]= -dir[1];
1743                         view[2]= -dir[2];
1744                         Normalize(view);
1745                         
1746                         if(aocolor==WO_AOSKYCOL) {
1747                                 skyfac= 0.5*(1.0f+view[0]*R.grvec[0]+ view[1]*R.grvec[1]+ view[2]*R.grvec[2]);
1748                                 shadfac[0]+= (1.0f-skyfac)*R.wrld.horr + skyfac*R.wrld.zenr;
1749                                 shadfac[1]+= (1.0f-skyfac)*R.wrld.horg + skyfac*R.wrld.zeng;
1750                                 shadfac[2]+= (1.0f-skyfac)*R.wrld.horb + skyfac*R.wrld.zenb;
1751                         }
1752                         else {  /* WO_AOSKYTEX */
1753                                 shadeSkyView(skycol, isec.start, view, dxyview, shi->thread);
1754                                 shadeSunView(skycol, shi->view);
1755                                 shadfac[0]+= skycol[0];
1756                                 shadfac[1]+= skycol[1];
1757                                 shadfac[2]+= skycol[2];
1758                         }
1759                         skyadded++;
1760                 }
1761                 
1762                 samples++;
1763                 
1764                 if (qsa->type == SAMP_TYPE_HALTON) {
1765                         /* adaptive sampling - consider samples below threshold as in shadow (or vice versa) and exit early */          
1766                         if (adapt_thresh > 0.0 && (samples > max_samples/2) ) {
1767                                 
1768                                 if (adaptive_sample_contrast_val(samples, prev, fac, adapt_thresh)) {
1769                                         break;
1770                                 }
1771                         }
1772                 }
1773         }
1774         
1775         if(aocolor!=WO_AOPLAIN && skyadded) {
1776                 div= (1.0f - fac/(float)samples)/((float)skyadded);
1777                 
1778                 shadfac[0]*= div;       // average color times distances/hits formula
1779                 shadfac[1]*= div;       // average color times distances/hits formula
1780                 shadfac[2]*= div;       // average color times distances/hits formula
1781         } else {
1782                 shadfac[0]= shadfac[1]= shadfac[2]= 1.0f - fac/(float)samples;
1783         }
1784         
1785         if (qsa)
1786                 release_thread_qmcsampler(&R, shi->thread, qsa);
1787 }
1788
1789 /* extern call from shade_lamp_loop, ambient occlusion calculus */
1790 static void ray_ao_spheresamp(ShadeInput *shi, float *shadfac)
1791 {
1792         Isect isec;
1793         float *vec, *nrm, div, bias, sh=0.0f;
1794         float maxdist = R.wrld.aodist;
1795         float dxyview[3];
1796         int j= -1, tot, actual=0, skyadded=0, aocolor, resol= R.wrld.aosamp;
1797         
1798         assert(0);
1799         
1800         isec.orig.ob   = shi->obi;
1801         isec.orig.face = shi->vlr;
1802
1803         isec.hit.ob   = 0;
1804         isec.hit.face = 0;
1805         
1806         isec.last_hit = NULL;
1807         
1808         isec.mode= (R.wrld.aomode & WO_AODIST)?RE_RAY_SHADOW_TRA:RE_RAY_SHADOW;
1809         isec.lay= -1;
1810
1811
1812         shadfac[0]= shadfac[1]= shadfac[2]= 0.0f;
1813
1814         /* bias prevents smoothed faces to appear flat */
1815         if(shi->vlr->flag & R_SMOOTH) {
1816                 bias= R.wrld.aobias;
1817                 nrm= shi->vn;
1818         }
1819         else {
1820                 bias= 0.0f;
1821                 nrm= shi->facenor;
1822         }
1823
1824         /* prevent sky colors to be added for only shadow (shadow becomes alpha) */
1825         aocolor= R.wrld.aocolor;
1826         if(shi->mat->mode & MA_ONLYSHADOW)
1827                 aocolor= WO_AOPLAIN;
1828         
1829         if(resol>32) resol= 32;
1830         
1831         vec= sphere_sampler(R.wrld.aomode, resol, shi->thread, shi->xs, shi->ys);
1832         
1833         // warning: since we use full sphere now, and dotproduct is below, we do twice as much
1834         tot= 2*resol*resol;
1835
1836         if(aocolor == WO_AOSKYTEX) {
1837                 dxyview[0]= 1.0f/(float)resol;
1838                 dxyview[1]= 1.0f/(float)resol;
1839                 dxyview[2]= 0.0f;
1840         }
1841         
1842         while(tot--) {
1843                 
1844                 if ((vec[0]*nrm[0] + vec[1]*nrm[1] + vec[2]*nrm[2]) > bias) {
1845                         /* only ao samples for mask */
1846                         if(R.r.mode & R_OSA) {
1847                                 j++;
1848                                 if(j==R.osa) j= 0;
1849                                 if(!(shi->mask & (1<<j))) {
1850                                         vec+=3;
1851                                         continue;
1852                                 }
1853                         }
1854                         
1855                         actual++;
1856                         
1857                         /* always set start/vec/labda */
1858                         VECCOPY(isec.start, shi->co);
1859                         isec.vec[0] = -vec[0];
1860                         isec.vec[1] = -vec[1];
1861                         isec.vec[2] = -vec[2];
1862                         isec.labda = maxdist;
1863                         
1864                         /* do the trace */
1865                         if(RE_rayobject_raycast(R.raytree, &isec)) {
1866                                 if (R.wrld.aomode & WO_AODIST) sh+= exp(-isec.labda*R.wrld.aodistfac); 
1867                                 else sh+= 1.0f;
1868                         }
1869                         else if(aocolor!=WO_AOPLAIN) {
1870                                 float skycol[4];
1871                                 float fac, view[3];
1872                                 
1873                                 view[0]= -vec[0];
1874                                 view[1]= -vec[1];
1875                                 view[2]= -vec[2];
1876                                 Normalize(view);
1877                                 
1878                                 if(aocolor==WO_AOSKYCOL) {
1879                                         fac= 0.5*(1.0f+view[0]*R.grvec[0]+ view[1]*R.grvec[1]+ view[2]*R.grvec[2]);
1880                                         shadfac[0]+= (1.0f-fac)*R.wrld.horr + fac*R.wrld.zenr;
1881                                         shadfac[1]+= (1.0f-fac)*R.wrld.horg + fac*R.wrld.zeng;
1882                                         shadfac[2]+= (1.0f-fac)*R.wrld.horb + fac*R.wrld.zenb;
1883                                 }
1884                                 else {  /* WO_AOSKYTEX */
1885                                         shadeSkyView(skycol, isec.start, view, dxyview, shi->thread);
1886                                         shadeSunView(skycol, shi->view);
1887                                         shadfac[0]+= skycol[0];
1888                                         shadfac[1]+= skycol[1];
1889                                         shadfac[2]+= skycol[2];
1890                                 }
1891                                 skyadded++;
1892                         }
1893                 }
1894                 // samples
1895                 vec+= 3;
1896         }
1897         
1898         if(actual==0) sh= 1.0f;
1899         else sh = 1.0f - sh/((float)actual);
1900         
1901         if(aocolor!=WO_AOPLAIN && skyadded) {
1902                 div= sh/((float)skyadded);
1903                 
1904                 shadfac[0]*= div;       // average color times distances/hits formula
1905                 shadfac[1]*= div;       // average color times distances/hits formula
1906                 shadfac[2]*= div;       // average color times distances/hits formula
1907         }
1908         else {
1909                 shadfac[0]= shadfac[1]= shadfac[2]= sh;
1910         }
1911 }
1912
1913 void ray_ao(ShadeInput *shi, float *shadfac)
1914 {
1915         /* Unfortunately, the unusual way that the sphere sampler calculates roughly twice as many
1916          * samples as are actually traced, and skips them based on bias and OSA settings makes it very difficult
1917          * to reuse code between these two functions. This is the easiest way I can think of to do it
1918          * --broken */
1919         if (ELEM(R.wrld.ao_samp_method, WO_AOSAMP_HAMMERSLEY, WO_AOSAMP_HALTON))
1920                 ray_ao_qmc(shi, shadfac);
1921         else if (R.wrld.ao_samp_method == WO_AOSAMP_CONSTANT)
1922                 ray_ao_spheresamp(shi, shadfac);
1923 }
1924
1925 static void ray_shadow_jittered_coords(ShadeInput *shi, int max, float jitco[RE_MAX_OSA][3], int *totjitco)
1926 {
1927         /* magic numbers for reordering sample positions to give better
1928          * results with adaptive sample, when it usually only takes 4 samples */
1929         int order8[8] = {0, 1, 5, 6, 2, 3, 4, 7};
1930         int order11[11] = {1, 3, 8, 10, 0, 2, 4, 5, 6, 7, 9};
1931         int order16[16] = {1, 3, 9, 12, 0, 6, 7, 8, 13, 2, 4, 5, 10, 11, 14, 15};
1932         int count = count_mask(shi->mask);
1933
1934         /* for better antialising shadow samples are distributed over the subpixel
1935          * sample coordinates, this only works for raytracing depth 0 though */
1936         if(!shi->strand && shi->depth == 0 && count > 1 && count <= max) {
1937                 float xs, ys, zs, view[3];
1938                 int samp, ordsamp, tot= 0;
1939
1940                 for(samp=0; samp<R.osa; samp++) {
1941                         if(R.osa == 8) ordsamp = order8[samp];
1942                         else if(R.osa == 11) ordsamp = order11[samp];
1943                         else if(R.osa == 16) ordsamp = order16[samp];
1944                         else ordsamp = samp;
1945
1946                         if(shi->mask & (1<<ordsamp)) {
1947                                 /* zbuffer has this inverse corrected, ensures xs,ys are inside pixel */
1948                                 xs= (float)shi->scanco[0] + R.jit[ordsamp][0] + 0.5f;
1949                                 ys= (float)shi->scanco[1] + R.jit[ordsamp][1] + 0.5f;
1950                                 zs= shi->scanco[2];
1951
1952                                 shade_input_calc_viewco(shi, xs, ys, zs, view, NULL, jitco[tot], NULL, NULL);
1953                                 tot++;
1954                         }
1955                 }
1956
1957                 *totjitco= tot;
1958         }
1959         else {
1960                 VECCOPY(jitco[0], shi->co);
1961                 *totjitco= 1;
1962         }
1963 }
1964
1965 static void ray_shadow_qmc(ShadeInput *shi, LampRen *lar, float *lampco, float *shadfac, Isect *isec)
1966 {
1967         QMCSampler *qsa=NULL;
1968         int samples=0;
1969         float samp3d[3];
1970
1971         float fac=0.0f, vec[3], end[3];
1972         float colsq[4];
1973         float adapt_thresh = lar->adapt_thresh;
1974         int min_adapt_samples=4, max_samples = lar->ray_totsamp;
1975         float *co;
1976         int do_soft=1, full_osa=0;
1977
1978         float jitco[RE_MAX_OSA][3];
1979         int totjitco;
1980
1981 //      assert(0);
1982
1983         colsq[0] = colsq[1] = colsq[2] = 0.0;
1984         if(isec->mode==RE_RAY_SHADOW_TRA) {
1985                 shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 0.0f;
1986         } else
1987                 shadfac[3]= 1.0f;
1988         
1989         if (lar->ray_totsamp < 2) do_soft = 0;
1990         if ((R.r.mode & R_OSA) && (R.osa > 0) && (shi->vlr->flag & R_FULL_OSA)) full_osa = 1;
1991         
1992         if (full_osa) {
1993                 if (do_soft) max_samples  = max_samples/R.osa + 1;
1994                 else max_samples = 1;
1995         } else {
1996                 if (do_soft) max_samples = lar->ray_totsamp;
1997                 else max_samples = (R.osa > 4)?R.osa:5;
1998         }
1999         
2000         ray_shadow_jittered_coords(shi, max_samples, jitco, &totjitco);
2001
2002         /* sampling init */
2003         if (lar->ray_samp_method==LA_SAMP_HALTON)
2004                 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples);
2005         else if (lar->ray_samp_method==LA_SAMP_HAMMERSLEY)
2006                 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples);
2007         
2008         QMC_initPixel(qsa, shi->thread);
2009         
2010         VECCOPY(vec, lampco);
2011         
2012         isec->skip = RE_SKIP_VLR_NEIGHBOUR;
2013         while (samples < max_samples) {
2014
2015                 isec->orig.ob   = shi->obi;
2016                 isec->orig.face = shi->vlr;
2017
2018                 /* manually jitter the start shading co-ord per sample
2019                  * based on the pre-generated OSA texture sampling offsets, 
2020                  * for anti-aliasing sharp shadow edges. */
2021                 co = jitco[samples % totjitco];
2022
2023                 if (do_soft) {
2024                         /* sphere shadow source */
2025                         if (lar->type == LA_LOCAL) {
2026                                 float ru[3], rv[3], v[3], s[3];
2027                                 
2028                                 /* calc tangent plane vectors */
2029                                 v[0] = co[0] - lampco[0];
2030                                 v[1] = co[1] - lampco[1];
2031                                 v[2] = co[2] - lampco[2];
2032                                 Normalize(v);
2033                                 VecOrthoBasisf(v, ru, rv);
2034                                 
2035                                 /* sampling, returns quasi-random vector in area_size disc */
2036                                 QMC_sampleDisc(samp3d, qsa, shi->thread, samples,lar->area_size);
2037
2038                                 /* distribute disc samples across the tangent plane */
2039                                 s[0] = samp3d[0]*ru[0] + samp3d[1]*rv[0];
2040                                 s[1] = samp3d[0]*ru[1] + samp3d[1]*rv[1];
2041                                 s[2] = samp3d[0]*ru[2] + samp3d[1]*rv[2];
2042                                 
2043                                 VECCOPY(samp3d, s);
2044                         }
2045                         else {
2046                                 /* sampling, returns quasi-random vector in [sizex,sizey]^2 plane */
2047                                 QMC_sampleRect(samp3d, qsa, shi->thread, samples, lar->area_size, lar->area_sizey);
2048                                                                 
2049                                 /* align samples to lamp vector */
2050                                 Mat3MulVecfl(lar->mat, samp3d);
2051                         }
2052                         end[0] = vec[0]+samp3d[0];
2053                         end[1] = vec[1]+samp3d[1];
2054                         end[2] = vec[2]+samp3d[2];
2055                 } else {
2056                         VECCOPY(end, vec);
2057                 }
2058
2059                 if(shi->strand) {
2060                         /* bias away somewhat to avoid self intersection */
2061                         float jitbias= 0.5f*(VecLength(shi->dxco) + VecLength(shi->dyco));
2062                         float v[3];
2063
2064                         VECSUB(v, co, end);
2065                         Normalize(v);
2066
2067                         co[0] -= jitbias*v[0];
2068                         co[1] -= jitbias*v[1];
2069                         co[2] -= jitbias*v[2];
2070                 }
2071
2072                 VECCOPY(isec->start, co);
2073 //              VECSUB(isec->vec, end, isec->start);
2074                 isec->vec[0] = end[0]-isec->start[0];
2075                 isec->vec[1] = end[1]-isec->start[1];
2076                 isec->vec[2] = end[2]-isec->start[2];
2077                 isec->labda = 1.0f; // * Normalize(isec->vec);
2078                 
2079                 /* trace the ray */
2080                 if(isec->mode==RE_RAY_SHADOW_TRA) {
2081                         isec->col[0]= isec->col[1]= isec->col[2]=  1.0f;
2082                         isec->col[3]= 1.0f;
2083                         
2084                         ray_trace_shadow_tra(isec, DEPTH_SHADOW_TRA, 0);
2085                         shadfac[0] += isec->col[0];
2086                         shadfac[1] += isec->col[1];
2087                         shadfac[2] += isec->col[2];
2088                         shadfac[3] += isec->col[3];
2089                         
2090                         /* for variance calc */
2091                         colsq[0] += isec->col[0]*isec->col[0];
2092                         colsq[1] += isec->col[1]*isec->col[1];
2093                         colsq[2] += isec->col[2]*isec->col[2];
2094                 }
2095                 else {
2096                         if( RE_rayobject_raycast(R.raytree, isec) ) fac+= 1.0f;
2097                 }
2098                 
2099                 samples++;
2100                 
2101                 if ((lar->ray_samp_method == LA_SAMP_HALTON)) {
2102                 
2103                         /* adaptive sampling - consider samples below threshold as in shadow (or vice versa) and exit early */
2104                         if ((max_samples > min_adapt_samples) && (adapt_thresh > 0.0) && (samples > max_samples / 3)) {
2105                                 if (isec->mode==RE_RAY_SHADOW_TRA) {
2106                                         if ((shadfac[3] / samples > (1.0-adapt_thresh)) || (shadfac[3] / samples < adapt_thresh))
2107                                                 break;
2108                                         else if (adaptive_sample_variance(samples, shadfac, colsq, adapt_thresh))
2109                                                 break;
2110                                 } else {
2111                                         if ((fac / samples > (1.0-adapt_thresh)) || (fac / samples < adapt_thresh))
2112                                                 break;
2113                                 }
2114                         }
2115                 }
2116         }
2117         
2118         if(isec->mode==RE_RAY_SHADOW_TRA) {
2119                 shadfac[0] /= samples;
2120                 shadfac[1] /= samples;
2121                 shadfac[2] /= samples;
2122                 shadfac[3] /= samples;
2123         } else
2124                 shadfac[3]= 1.0f-fac/samples;
2125
2126         if (qsa)
2127                 release_thread_qmcsampler(&R, shi->thread, qsa);
2128 }
2129
2130 static void ray_shadow_jitter(ShadeInput *shi, LampRen *lar, float *lampco, float *shadfac, Isect *isec)
2131 {
2132         /* area soft shadow */
2133         float *jitlamp;
2134         float fac=0.0f, div=0.0f, vec[3];
2135         int a, j= -1, mask;
2136         
2137         assert(0);
2138         
2139         if(isec->mode==RE_RAY_SHADOW_TRA) {
2140                 shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 0.0f;
2141         }
2142         else shadfac[3]= 1.0f;
2143         
2144         fac= 0.0f;
2145         jitlamp= give_jitter_plane(lar, shi->thread, shi->xs, shi->ys);
2146
2147         a= lar->ray_totsamp;
2148         
2149         /* this correction to make sure we always take at least 1 sample */
2150         mask= shi->mask;
2151         if(a==4) mask |= (mask>>4)|(mask>>8);
2152         else if(a==9) mask |= (mask>>9);
2153         
2154         while(a--) {
2155                 
2156                 if(R.r.mode & R_OSA) {
2157                         j++;
2158                         if(j>=R.osa) j= 0;
2159                         if(!(mask & (1<<j))) {
2160                                 jitlamp+= 2;
2161                                 continue;
2162                         }
2163                 }
2164                 
2165                 isec->orig.ob   = shi->obi;
2166                 isec->orig.face = shi->vlr;
2167                 
2168                 vec[0]= jitlamp[0];
2169                 vec[1]= jitlamp[1];
2170                 vec[2]= 0.0f;
2171                 Mat3MulVecfl(lar->mat, vec);
2172                 
2173                 /* set start and vec */
2174                 VECCOPY(isec->start, shi->co);
2175                 VECCOPY(isec->vec, vec);
2176                 isec->labda = 1.0f;
2177                 isec->skip = RE_SKIP_VLR_NEIGHBOUR;
2178                 
2179                 if(isec->mode==RE_RAY_SHADOW_TRA) {
2180                         /* isec.col is like shadfac, so defines amount of light (0.0 is full shadow) */
2181                         isec->col[0]= isec->col[1]= isec->col[2]=  1.0f;
2182                         isec->col[3]= 1.0f;
2183                         
2184                         ray_trace_shadow_tra(isec, DEPTH_SHADOW_TRA, 0);
2185                         shadfac[0] += isec->col[0];
2186                         shadfac[1] += isec->col[1];
2187                         shadfac[2] += isec->col[2];
2188                         shadfac[3] += isec->col[3];
2189                 }
2190                 else if( RE_rayobject_raycast(R.raytree, isec) ) fac+= 1.0f;
2191                 
2192                 div+= 1.0f;
2193                 jitlamp+= 2;
2194         }
2195         
2196         if(isec->mode==RE_RAY_SHADOW_TRA) {
2197                 shadfac[0] /= div;
2198                 shadfac[1] /= div;
2199                 shadfac[2] /= div;
2200                 shadfac[3] /= div;
2201         }
2202         else {
2203                 // sqrt makes nice umbra effect
2204                 if(lar->ray_samp_type & LA_SAMP_UMBRA)
2205                         shadfac[3]= sqrt(1.0f-fac/div);
2206                 else
2207                         shadfac[3]= 1.0f-fac/div;
2208         }
2209 }
2210 /* extern call from shade_lamp_loop */
2211 void ray_shadow(ShadeInput *shi, LampRen *lar, float *shadfac)
2212 {
2213         Isect isec;
2214         float lampco[3];
2215
2216         /* setup isec */
2217         if(shi->mat->mode & MA_SHADOW_TRA) isec.mode= RE_RAY_SHADOW_TRA;
2218         else isec.mode= RE_RAY_SHADOW;
2219         
2220         if(lar->mode & (LA_LAYER|LA_LAYER_SHADOW))
2221                 isec.lay= lar->lay;
2222         else
2223                 isec.lay= -1;
2224
2225         /* only when not mir tracing, first hit optimm */
2226         if(shi->depth==0) {
2227                 isec.last_hit = lar->last_hit[shi->thread];
2228         }
2229         else {
2230                 isec.last_hit = NULL;
2231         }
2232         
2233         if(lar->type==LA_SUN || lar->type==LA_HEMI) {
2234                 lampco[0]= shi->co[0] - RE_RAYTRACE_MAXDIST*lar->vec[0];
2235                 lampco[1]= shi->co[1] - RE_RAYTRACE_MAXDIST*lar->vec[1];
2236                 lampco[2]= shi->co[2] - RE_RAYTRACE_MAXDIST*lar->vec[2];
2237         }
2238         else {
2239                 VECCOPY(lampco, lar->co);
2240         }
2241         
2242         if (ELEM(lar->ray_samp_method, LA_SAMP_HALTON, LA_SAMP_HAMMERSLEY)) {
2243                 
2244                 ray_shadow_qmc(shi, lar, lampco, shadfac, &isec);
2245                 
2246         } else {
2247                 if(lar->ray_totsamp<2) {
2248                         
2249                         isec.orig.ob   = shi->obi;
2250                         isec.orig.face = shi->vlr;
2251                         
2252                         shadfac[3]= 1.0f; // 1.0=full light
2253                         
2254                         /* set up isec vec */
2255                         VECCOPY(isec.start, shi->co);
2256                         VECSUB(isec.vec, lampco, isec.start);
2257                         isec.labda = 1.0f;
2258
2259                         if(isec.mode==RE_RAY_SHADOW_TRA) {
2260                                 /* isec.col is like shadfac, so defines amount of light (0.0 is full shadow) */
2261                                 isec.col[0]= isec.col[1]= isec.col[2]=  1.0f;
2262                                 isec.col[3]= 1.0f;
2263
2264                                 ray_trace_shadow_tra(&isec, DEPTH_SHADOW_TRA, 0);
2265                                 QUATCOPY(shadfac, isec.col);
2266                         }
2267                         else
2268                         {
2269                                 assert(0);
2270                                 if(RE_rayobject_raycast(R.raytree, &isec)) shadfac[3]= 0.0f;
2271                         }
2272                 }
2273                 else {
2274                         ray_shadow_jitter(shi, lar, lampco, shadfac, &isec);
2275                 }
2276         }
2277                 
2278         /* for first hit optim, set last interesected shadow face */
2279         if(shi->depth==0) {
2280                 lar->last_hit[shi->thread] = isec.last_hit;
2281         }
2282
2283 }
2284
2285 #if 0
2286 /* only when face points away from lamp, in direction of lamp, trace ray and find first exit point */
2287 static void ray_translucent(ShadeInput *shi, LampRen *lar, float *distfac, float *co)
2288 {
2289         Isect isec;
2290         float lampco[3];
2291         
2292         assert(0);
2293         
2294         /* setup isec */
2295         isec.mode= RE_RAY_SHADOW_TRA;
2296         
2297         if(lar->mode & LA_LAYER) isec.lay= lar->lay; else isec.lay= -1;
2298         
2299         if(lar->type==LA_SUN || lar->type==LA_HEMI) {
2300                 lampco[0]= shi->co[0] - RE_RAYTRACE_MAXDIST*lar->vec[0];
2301                 lampco[1]= shi->co[1] - RE_RAYTRACE_MAXDIST*lar->vec[1];
2302                 lampco[2]= shi->co[2] - RE_RAYTRACE_MAXDIST*lar->vec[2];
2303         }
2304         else {
2305                 VECCOPY(lampco, lar->co);
2306         }
2307         
2308         isec.orig.ob   = shi->obi;
2309         isec.orig.face = shi->vlr;
2310         
2311         /* set up isec vec */
2312         VECCOPY(isec.start, shi->co);
2313         VECCOPY(isec.end, lampco);
2314         
2315         if(RE_rayobject_raycast(R.raytree, &isec)) {
2316                 /* we got a face */
2317                 
2318                 /* render co */
2319                 co[0]= isec.start[0]+isec.labda*(isec.vec[0]);
2320                 co[1]= isec.start[1]+isec.labda*(isec.vec[1]);
2321                 co[2]= isec.start[2]+isec.labda*(isec.vec[2]);
2322                 
2323                 *distfac= VecLength(isec.vec);
2324         }
2325         else
2326                 *distfac= 0.0f;
2327 }
2328
2329 #endif
2330