Renamed exported functions from render to have the RE_ prefix
[blender-staging.git] / source / blender / render / intern / source / rayshade.c
1 /**
2  * $Id$
3  *
4  * ***** BEGIN GPL LICENSE BLOCK *****
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version. 
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software Foundation,
18  * Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19  *
20  * The Original Code is Copyright (C) 1990-1998 NeoGeo BV.
21  * All rights reserved.
22  *
23  * Contributors: 2004/2005 Blender Foundation, full recode
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 #include <math.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <float.h>
32 #include <assert.h>
33
34 #include "MEM_guardedalloc.h"
35
36 #include "DNA_material_types.h"
37 #include "DNA_lamp_types.h"
38
39 #include "BKE_global.h"
40 #include "BKE_node.h"
41 #include "BKE_utildefines.h"
42
43 #include "BLI_arithb.h"
44 #include "BLI_blenlib.h"
45 #include "BLI_jitter.h"
46 #include "BLI_rand.h"
47
48 #include "PIL_time.h"
49
50 #include "render_types.h"
51 #include "renderpipeline.h"
52 #include "rendercore.h"
53 #include "renderdatabase.h"
54 #include "pixelblending.h"
55 #include "pixelshading.h"
56 #include "shading.h"
57 #include "texture.h"
58
59 #include "RE_raytrace.h"
60 #include "rayobject.h"
61
62 #define RAY_TRA         1
63 #define RAY_TRAFLIP     2
64
65 #define DEPTH_SHADOW_TRA  10
66
67 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
68 /* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
69 /* only to be used here in this file, it's for speed */
70 extern struct Render R;
71 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
72
73 #if 0
74 static void vlr_face_coords(RayFace *face, float **v1, float **v2, float **v3, float **v4)
75 {
76         VlakRen *vlr= (VlakRen*)face;
77
78         *v1 = (vlr->v1)? vlr->v1->co: NULL;
79         *v2 = (vlr->v2)? vlr->v2->co: NULL;
80         *v3 = (vlr->v3)? vlr->v3->co: NULL;
81         *v4 = (vlr->v4)? vlr->v4->co: NULL;
82 }
83
84 static int vlr_check_intersect(Isect *is, int ob, RayFace *face)
85 {
86         ObjectInstanceRen *obi= RAY_OBJECT_GET((Render*)is->userdata, ob);
87         VlakRen *vlr = (VlakRen*)face;
88
89         /* for baking selected to active non-traceable materials might still
90          * be in the raytree */
91         if(!(vlr->mat->mode & MA_TRACEBLE))
92                 return 0;
93
94         /* I know... cpu cycle waste, might do smarter once */
95         if(is->mode==RE_RAY_MIRROR)
96                 return !(vlr->mat->mode & MA_ONLYCAST);
97         else
98                 return (is->lay & obi->lay);
99 }
100
101 static float *vlr_get_transform(void *userdata, int i)
102 {
103         ObjectInstanceRen *obi= RAY_OBJECT_GET((Render*)userdata, i);
104
105         return (obi->flag & R_TRANSFORMED)? (float*)obi->mat: NULL;
106 }
107 #endif
108
109 void freeraytree(Render *re)
110 {
111         if(re->raytree) {
112                 RE_rayobject_free(re->raytree);
113                 re->raytree= NULL;
114                 MEM_freeN( re->rayfaces );
115         }
116 }
117 void makeraytree(Render *re)
118 {
119         ObjectInstanceRen *obi;
120         ObjectRen *obr;
121         VlakRen *vlr= NULL;
122         double lasttime= PIL_check_seconds_timer();
123         int v, totv = 0, totface = 0;
124         RayFace *faces, *cur_face;
125
126         //TODO (for now octree only supports RayFaces so we need to create them)
127         //
128         //count faces
129         for(obi=re->instancetable.first; obi; obi=obi->next) {
130                 obr= obi->obr;
131
132                 if(re->excludeob && obr->ob == re->excludeob)
133                         continue;
134
135                 for(v=0;v<obr->totvlak;v++) {
136                         if((v & 255)==0) vlr= obr->vlaknodes[v>>8].vlak;
137                         else vlr++;
138                         /* baking selected to active needs non-traceable too */
139                         if((re->flag & R_BAKE_TRACE) || (vlr->mat->mode & MA_TRACEBLE)) {       
140                                 if((vlr->mat->mode & MA_WIRE)==0) {     
141                                         totface++;
142                                 }
143                         }
144                 }
145         }
146         
147         re->raytree = RE_rayobject_octree_create( re->r.ocres, totface );
148         
149         //Fill rayfaces
150         re->rayfaces = (RayObject*)MEM_callocN(totface*sizeof(RayFace), "render faces");
151         cur_face = faces = (RayFace*)re->rayfaces;
152         
153         for(obi=re->instancetable.first; obi; obi=obi->next) {
154                 obr= obi->obr;
155
156                 if(re->excludeob && obr->ob == re->excludeob)
157                         continue;
158
159                 for(v=0;v<obr->totvlak;v++) {
160                         if((v & 255)==0)
161                         {
162                                 double time= PIL_check_seconds_timer();
163
164                                 vlr= obr->vlaknodes[v>>8].vlak;
165
166                                 vlr= obr->vlaknodes[v>>8].vlak;
167                                 if(re->test_break(re->tbh))
168                                         break;
169                                 if(time-lasttime>1.0f) {
170                                         char str[32];
171                                         sprintf(str, "Filling Octree: %d", totv);
172                                         re->i.infostr= str;
173                                         re->stats_draw(re->sdh, &re->i);
174                                         re->i.infostr= NULL;
175                                         lasttime= time;
176                                 }
177                         }
178                         else vlr++;
179                         /* baking selected to active needs non-traceable too */
180                         if((re->flag & R_BAKE_TRACE) || (vlr->mat->mode & MA_TRACEBLE)) {       
181                                 if((vlr->mat->mode & MA_WIRE)==0) {     
182                                         cur_face->v1 = vlr->v1->co;
183                                         cur_face->v2 = vlr->v2->co;
184                                         cur_face->v3 = vlr->v3->co;
185                                         cur_face->v4 = vlr->v4 ? vlr->v4->co : NULL;
186                                         
187                                         cur_face->ob   = (void*)obi;
188                                         cur_face->face = vlr;
189                                         
190                                         RE_rayobject_add( re->raytree, (RayObject*) cur_face );
191                                         cur_face++;
192                                 }
193                         }
194                 }
195         }
196
197         RE_rayobject_done( re->raytree );
198 //TODO  vlr_face_coords, vlr_check_intersect, vlr_get_transform, re);
199
200         re->i.infostr= NULL;
201         re->stats_draw(re->sdh, &re->i);
202 }
203
204 static void shade_ray(Isect *is, ShadeInput *shi, ShadeResult *shr)
205 {
206         ObjectInstanceRen *obi= (ObjectInstanceRen*)is->hit.ob;
207         VlakRen *vlr= (VlakRen*)is->hit.face;
208         int osatex= 0;
209         
210         /* set up view vector */
211         VECCOPY(shi->view, is->vec);
212
213         /* render co */
214         shi->co[0]= is->start[0]+is->labda*(shi->view[0]);
215         shi->co[1]= is->start[1]+is->labda*(shi->view[1]);
216         shi->co[2]= is->start[2]+is->labda*(shi->view[2]);
217         
218         Normalize(shi->view);
219
220         shi->obi= obi;
221         shi->obr= obi->obr;
222         shi->vlr= vlr;
223         shi->mat= vlr->mat;
224         memcpy(&shi->r, &shi->mat->r, 23*sizeof(float));        // note, keep this synced with render_types.h
225         shi->har= shi->mat->har;
226         
227         // Osa structs we leave unchanged now
228         SWAP(int, osatex, shi->osatex);
229         
230         shi->dxco[0]= shi->dxco[1]= shi->dxco[2]= 0.0f;
231         shi->dyco[0]= shi->dyco[1]= shi->dyco[2]= 0.0f;
232         
233         // but, set Osa stuff to zero where it can confuse texture code
234         if(shi->mat->texco & (TEXCO_NORM|TEXCO_REFL) ) {
235                 shi->dxno[0]= shi->dxno[1]= shi->dxno[2]= 0.0f;
236                 shi->dyno[0]= shi->dyno[1]= shi->dyno[2]= 0.0f;
237         }
238
239         if(vlr->v4) {
240                 if(is->isect==2) 
241                         shade_input_set_triangle_i(shi, obi, vlr, 2, 1, 3);
242                 else
243                         shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 3);
244         }
245         else {
246                 shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
247         }
248
249         shi->u= is->u;
250         shi->v= is->v;
251         shi->dx_u= shi->dx_v= shi->dy_u= shi->dy_v=  0.0f;
252
253         shade_input_set_normals(shi);
254
255         /* point normals to viewing direction */
256         if(INPR(shi->facenor, shi->view) < 0.0f)
257                 shade_input_flip_normals(shi);
258
259         shade_input_set_shade_texco(shi);
260         
261         if(is->mode==RE_RAY_SHADOW_TRA) 
262                 if(shi->mat->nodetree && shi->mat->use_nodes) {
263                         ntreeShaderExecTree(shi->mat->nodetree, shi, shr);
264                         shi->mat= vlr->mat;             /* shi->mat is being set in nodetree */
265                 }
266                 else
267                         shade_color(shi, shr);
268         else {
269                 if(shi->mat->nodetree && shi->mat->use_nodes) {
270                         ntreeShaderExecTree(shi->mat->nodetree, shi, shr);
271                         shi->mat= vlr->mat;             /* shi->mat is being set in nodetree */
272                 }
273                 else
274                         shade_material_loop(shi, shr);
275                 
276                 /* raytrace likes to separate the spec color */
277                 VECSUB(shr->diff, shr->combined, shr->spec);
278         }       
279         
280         SWAP(int, osatex, shi->osatex);  // XXXXX!!!!
281
282 }
283
284 static int refraction(float *refract, float *n, float *view, float index)
285 {
286         float dot, fac;
287
288         VECCOPY(refract, view);
289         
290         dot= view[0]*n[0] + view[1]*n[1] + view[2]*n[2];
291
292         if(dot>0.0f) {
293                 index = 1.0f/index;
294                 fac= 1.0f - (1.0f - dot*dot)*index*index;
295                 if(fac<= 0.0f) return 0;
296                 fac= -dot*index + sqrt(fac);
297         }
298         else {
299                 fac= 1.0f - (1.0f - dot*dot)*index*index;
300                 if(fac<= 0.0f) return 0;
301                 fac= -dot*index - sqrt(fac);
302         }
303
304         refract[0]= index*view[0] + fac*n[0];
305         refract[1]= index*view[1] + fac*n[1];
306         refract[2]= index*view[2] + fac*n[2];
307
308         return 1;
309 }
310
311 /* orn = original face normal */
312 static void reflection(float *ref, float *n, float *view, float *orn)
313 {
314         float f1;
315         
316         f1= -2.0f*(n[0]*view[0]+ n[1]*view[1]+ n[2]*view[2]);
317         
318         ref[0]= (view[0]+f1*n[0]);
319         ref[1]= (view[1]+f1*n[1]);
320         ref[2]= (view[2]+f1*n[2]);
321
322         if(orn) {
323                 /* test phong normals, then we should prevent vector going to the back */
324                 f1= ref[0]*orn[0]+ ref[1]*orn[1]+ ref[2]*orn[2];
325                 if(f1>0.0f) {
326                         f1+= .01f;
327                         ref[0]-= f1*orn[0];
328                         ref[1]-= f1*orn[1];
329                         ref[2]-= f1*orn[2];
330                 }
331         }
332 }
333
334 #if 0
335 static void color_combine(float *result, float fac1, float fac2, float *col1, float *col2)
336 {
337         float col1t[3], col2t[3];
338         
339         col1t[0]= sqrt(col1[0]);
340         col1t[1]= sqrt(col1[1]);
341         col1t[2]= sqrt(col1[2]);
342         col2t[0]= sqrt(col2[0]);
343         col2t[1]= sqrt(col2[1]);
344         col2t[2]= sqrt(col2[2]);
345
346         result[0]= (fac1*col1t[0] + fac2*col2t[0]);
347         result[0]*= result[0];
348         result[1]= (fac1*col1t[1] + fac2*col2t[1]);
349         result[1]*= result[1];
350         result[2]= (fac1*col1t[2] + fac2*col2t[2]);
351         result[2]*= result[2];
352 }
353 #endif
354
355 static float shade_by_transmission(Isect *is, ShadeInput *shi, ShadeResult *shr)
356 {
357         float dx, dy, dz, d, p;
358
359         if (0 == (shi->mat->mode & (MA_RAYTRANSP|MA_ZTRA)))
360                 return -1;
361            
362         if (shi->mat->tx_limit <= 0.0f) {
363                 d= 1.0f;
364         } 
365         else {
366                 /* shi.co[] calculated by shade_ray() */
367                 dx= shi->co[0] - is->start[0];
368                 dy= shi->co[1] - is->start[1];
369                 dz= shi->co[2] - is->start[2];
370                 d= sqrt(dx*dx+dy*dy+dz*dz);
371                 if (d > shi->mat->tx_limit)
372                         d= shi->mat->tx_limit;
373
374                 p = shi->mat->tx_falloff;
375                 if(p < 0.0f) p= 0.0f;
376                 else if (p > 10.0f) p= 10.0f;
377
378                 shr->alpha *= pow(d, p);
379                 if (shr->alpha > 1.0f)
380                         shr->alpha= 1.0f;
381         }
382
383         return d;
384 }
385
386 static void ray_fadeout_endcolor(float *col, ShadeInput *origshi, ShadeInput *shi, ShadeResult *shr, Isect *isec, float *vec)
387 {
388         /* un-intersected rays get either rendered material color or sky color */
389         if (origshi->mat->fadeto_mir == MA_RAYMIR_FADETOMAT) {
390                 VECCOPY(col, shr->combined);
391         } else if (origshi->mat->fadeto_mir == MA_RAYMIR_FADETOSKY) {
392                 VECCOPY(shi->view, vec);
393                 Normalize(shi->view);
394                 
395                 shadeSkyView(col, isec->start, shi->view, NULL, shi->thread);
396                 shadeSunView(col, shi->view);
397         }
398 }
399
400 static void ray_fadeout(Isect *is, ShadeInput *shi, float *col, float *blendcol, float dist_mir)
401 {
402         /* if fading out, linear blend against fade color */
403         float blendfac;
404
405         blendfac = 1.0 - VecLenf(shi->co, is->start)/dist_mir;
406         
407         col[0] = col[0]*blendfac + (1.0 - blendfac)*blendcol[0];
408         col[1] = col[1]*blendfac + (1.0 - blendfac)*blendcol[1];
409         col[2] = col[2]*blendfac + (1.0 - blendfac)*blendcol[2];
410 }
411
412 /* the main recursive tracer itself */
413 static void traceray(ShadeInput *origshi, ShadeResult *origshr, short depth, float *start, float *vec, float *col, ObjectInstanceRen *obi, VlakRen *vlr, int traflag)
414 {
415         ShadeInput shi;
416         ShadeResult shr;
417         Isect isec;
418         float f, f1, fr, fg, fb;
419         float ref[3];
420         float dist_mir = origshi->mat->dist_mir;
421
422         assert(0);
423
424         /* Warning, This is not that nice, and possibly a bit slow for every ray,
425         however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */
426         memset(&shi, 0, sizeof(ShadeInput)); 
427         /* end warning! - Campbell */
428         
429         VECCOPY(isec.start, start);
430         VECCOPY(isec.vec, vec );
431         isec.labda = dist_mir > 0 ? dist_mir : RE_RAYTRACE_MAXDIST;
432         isec.mode= RE_RAY_MIRROR;
433
434         isec.orig.ob   = obi;
435         isec.orig.face = vlr;
436
437         if(RE_rayobject_raycast(R.raytree, &isec)) {
438                 float d= 1.0f;
439                 
440                 shi.mask= origshi->mask;
441                 shi.osatex= origshi->osatex;
442                 shi.depth= 1;                                   /* only used to indicate tracing */
443                 shi.thread= origshi->thread;
444                 //shi.sample= 0; // memset above, so dont need this
445                 shi.xs= origshi->xs;
446                 shi.ys= origshi->ys;
447                 shi.lay= origshi->lay;
448                 shi.passflag= SCE_PASS_COMBINED; /* result of tracing needs no pass info */
449                 shi.combinedflag= 0xFFFFFF;              /* ray trace does all options */
450                 //shi.do_preview= 0; // memset above, so dont need this
451                 shi.light_override= origshi->light_override;
452                 shi.mat_override= origshi->mat_override;
453                 
454                 memset(&shr, 0, sizeof(ShadeResult));
455                 
456                 shade_ray(&isec, &shi, &shr);
457                 if (traflag & RAY_TRA)
458                         d= shade_by_transmission(&isec, &shi, &shr);
459                 
460                 if(depth>0) {
461
462                         if(shi.mat->mode_l & (MA_RAYTRANSP|MA_ZTRA) && shr.alpha < 1.0f) {
463                                 float nf, f, f1, refract[3], tracol[4];
464                                 
465                                 tracol[0]= shi.r;
466                                 tracol[1]= shi.g;
467                                 tracol[2]= shi.b;
468                                 tracol[3]= col[3];      // we pass on and accumulate alpha
469                                 
470                                 if(shi.mat->mode & MA_RAYTRANSP) {
471                                         /* odd depths: use normal facing viewer, otherwise flip */
472                                         if(traflag & RAY_TRAFLIP) {
473                                                 float norm[3];
474                                                 norm[0]= - shi.vn[0];
475                                                 norm[1]= - shi.vn[1];
476                                                 norm[2]= - shi.vn[2];
477                                                 if (!refraction(refract, norm, shi.view, shi.ang))
478                                                         reflection(refract, norm, shi.view, shi.vn);
479                                         }
480                                         else {
481                                                 if (!refraction(refract, shi.vn, shi.view, shi.ang))
482                                                         reflection(refract, shi.vn, shi.view, shi.vn);
483                                         }
484                                         traflag |= RAY_TRA;
485                                         traceray(origshi, origshr, depth-1, shi.co, refract, tracol, shi.obi, shi.vlr, traflag ^ RAY_TRAFLIP);
486                                 }
487                                 else
488                                         traceray(origshi, origshr, depth-1, shi.co, shi.view, tracol, shi.obi, shi.vlr, 0);
489                                 
490                                 f= shr.alpha; f1= 1.0f-f;
491                                 nf= d * shi.mat->filter;
492                                 fr= 1.0f+ nf*(shi.r-1.0f);
493                                 fg= 1.0f+ nf*(shi.g-1.0f);
494                                 fb= 1.0f+ nf*(shi.b-1.0f);
495                                 shr.diff[0]= f*shr.diff[0] + f1*fr*tracol[0];
496                                 shr.diff[1]= f*shr.diff[1] + f1*fg*tracol[1];
497                                 shr.diff[2]= f*shr.diff[2] + f1*fb*tracol[2];
498                                 
499                                 shr.spec[0] *=f;
500                                 shr.spec[1] *=f;
501                                 shr.spec[2] *=f;
502
503                                 col[3]= f1*tracol[3] + f;
504                         }
505                         else 
506                                 col[3]= 1.0f;
507
508                         if(shi.mat->mode_l & MA_RAYMIRROR) {
509                                 f= shi.ray_mirror;
510                                 if(f!=0.0f) f*= fresnel_fac(shi.view, shi.vn, shi.mat->fresnel_mir_i, shi.mat->fresnel_mir);
511                         }
512                         else f= 0.0f;
513                         
514                         if(f!=0.0f) {
515                                 float mircol[4];
516                                 
517                                 reflection(ref, shi.vn, shi.view, NULL);                        
518                                 traceray(origshi, origshr, depth-1, shi.co, ref, mircol, shi.obi, shi.vlr, 0);
519                         
520                                 f1= 1.0f-f;
521
522                                 /* combine */
523                                 //color_combine(col, f*fr*(1.0f-shr.spec[0]), f1, col, shr.diff);
524                                 //col[0]+= shr.spec[0];
525                                 //col[1]+= shr.spec[1];
526                                 //col[2]+= shr.spec[2];
527                                 
528                                 fr= shi.mirr;
529                                 fg= shi.mirg;
530                                 fb= shi.mirb;
531                 
532                                 col[0]= f*fr*(1.0f-shr.spec[0])*mircol[0] + f1*shr.diff[0] + shr.spec[0];
533                                 col[1]= f*fg*(1.0f-shr.spec[1])*mircol[1] + f1*shr.diff[1] + shr.spec[1];
534                                 col[2]= f*fb*(1.0f-shr.spec[2])*mircol[2] + f1*shr.diff[2] + shr.spec[2];
535                         }
536                         else {
537                                 col[0]= shr.diff[0] + shr.spec[0];
538                                 col[1]= shr.diff[1] + shr.spec[1];
539                                 col[2]= shr.diff[2] + shr.spec[2];
540                         }
541                         
542                         if (dist_mir > 0.0) {
543                                 float blendcol[3];
544                                 
545                                 /* max ray distance set, but found an intersection, so fade this color
546                                  * out towards the sky/material color for a smooth transition */
547                                 ray_fadeout_endcolor(blendcol, origshi, &shi, origshr, &isec, vec);
548                                 ray_fadeout(&isec, &shi, col, blendcol, dist_mir);
549                         }
550                 }
551                 else {
552                         col[0]= shr.diff[0] + shr.spec[0];
553                         col[1]= shr.diff[1] + shr.spec[1];
554                         col[2]= shr.diff[2] + shr.spec[2];
555                 }
556                 
557         }
558         else {
559                 ray_fadeout_endcolor(col, origshi, &shi, origshr, &isec, vec);
560         }
561 }
562
563 /* **************** jitter blocks ********** */
564
565 /* calc distributed planar energy */
566
567 static void DP_energy(float *table, float *vec, int tot, float xsize, float ysize)
568 {
569         int x, y, a;
570         float *fp, force[3], result[3];
571         float dx, dy, dist, min;
572         
573         min= MIN2(xsize, ysize);
574         min*= min;
575         result[0]= result[1]= 0.0f;
576         
577         for(y= -1; y<2; y++) {
578                 dy= ysize*y;
579                 for(x= -1; x<2; x++) {
580                         dx= xsize*x;
581                         fp= table;
582                         for(a=0; a<tot; a++, fp+= 2) {
583                                 force[0]= vec[0] - fp[0]-dx;
584                                 force[1]= vec[1] - fp[1]-dy;
585                                 dist= force[0]*force[0] + force[1]*force[1];
586                                 if(dist < min && dist>0.0f) {
587                                         result[0]+= force[0]/dist;
588                                         result[1]+= force[1]/dist;
589                                 }
590                         }
591                 }
592         }
593         vec[0] += 0.1*min*result[0]/(float)tot;
594         vec[1] += 0.1*min*result[1]/(float)tot;
595         // cyclic clamping
596         vec[0]= vec[0] - xsize*floor(vec[0]/xsize + 0.5);
597         vec[1]= vec[1] - ysize*floor(vec[1]/ysize + 0.5);
598 }
599
600 // random offset of 1 in 2
601 static void jitter_plane_offset(float *jitter1, float *jitter2, int tot, float sizex, float sizey, float ofsx, float ofsy)
602 {
603         float dsizex= sizex*ofsx;
604         float dsizey= sizey*ofsy;
605         float hsizex= 0.5*sizex, hsizey= 0.5*sizey;
606         int x;
607         
608         for(x=tot; x>0; x--, jitter1+=2, jitter2+=2) {
609                 jitter2[0]= jitter1[0] + dsizex;
610                 jitter2[1]= jitter1[1] + dsizey;
611                 if(jitter2[0] > hsizex) jitter2[0]-= sizex;
612                 if(jitter2[1] > hsizey) jitter2[1]-= sizey;
613         }
614 }
615
616 /* called from convertBlenderScene.c */
617 /* we do this in advance to get consistant random, not alter the render seed, and be threadsafe */
618 void init_jitter_plane(LampRen *lar)
619 {
620         float *fp;
621         int x, iter=12, tot= lar->ray_totsamp;
622         
623         /* test if already initialized */
624         if(lar->jitter) return;
625         
626         /* at least 4, or max threads+1 tables */
627         if(BLENDER_MAX_THREADS < 4) x= 4;
628         else x= BLENDER_MAX_THREADS+1;
629         fp= lar->jitter= MEM_callocN(x*tot*2*sizeof(float), "lamp jitter tab");
630         
631         /* if 1 sample, we leave table to be zero's */
632         if(tot>1) {
633                 
634                 /* set per-lamp fixed seed */
635                 BLI_srandom(tot);
636                 
637                 /* fill table with random locations, area_size large */
638                 for(x=0; x<tot; x++, fp+=2) {
639                         fp[0]= (BLI_frand()-0.5)*lar->area_size;
640                         fp[1]= (BLI_frand()-0.5)*lar->area_sizey;
641                 }
642                 
643                 while(iter--) {
644                         fp= lar->jitter;
645                         for(x=tot; x>0; x--, fp+=2) {
646                                 DP_energy(lar->jitter, fp, tot, lar->area_size, lar->area_sizey);
647                         }
648                 }
649         }       
650         /* create the dithered tables (could just check lamp type!) */
651         jitter_plane_offset(lar->jitter, lar->jitter+2*tot, tot, lar->area_size, lar->area_sizey, 0.5f, 0.0f);
652         jitter_plane_offset(lar->jitter, lar->jitter+4*tot, tot, lar->area_size, lar->area_sizey, 0.5f, 0.5f);
653         jitter_plane_offset(lar->jitter, lar->jitter+6*tot, tot, lar->area_size, lar->area_sizey, 0.0f, 0.5f);
654 }
655
656 /* table around origin, -0.5*size to 0.5*size */
657 static float *give_jitter_plane(LampRen *lar, int thread, int xs, int ys)
658 {
659         int tot;
660         
661         tot= lar->ray_totsamp;
662                         
663         if(lar->ray_samp_type & LA_SAMP_JITTER) {
664                 /* made it threadsafe */
665                 
666                 if(lar->xold[thread]!=xs || lar->yold[thread]!=ys) {
667                         jitter_plane_offset(lar->jitter, lar->jitter+2*(thread+1)*tot, tot, lar->area_size, lar->area_sizey, BLI_thread_frand(thread), BLI_thread_frand(thread));
668                         lar->xold[thread]= xs; 
669                         lar->yold[thread]= ys;
670                 }
671                 return lar->jitter+2*(thread+1)*tot;
672         }
673         if(lar->ray_samp_type & LA_SAMP_DITHER) {
674                 return lar->jitter + 2*tot*((xs & 1)+2*(ys & 1));
675         }
676         
677         return lar->jitter;
678 }
679
680
681 /* **************** QMC sampling *************** */
682
683 static void halton_sample(double *ht_invprimes, double *ht_nums, double *v)
684 {
685         // incremental halton sequence generator, from:
686         // "Instant Radiosity", Keller A.
687         unsigned int i;
688         
689         for (i = 0; i < 2; i++)
690         {
691                 double r = fabs((1.0 - ht_nums[i]) - 1e-10);
692                 
693                 if (ht_invprimes[i] >= r)
694                 {
695                         double lasth;
696                         double h = ht_invprimes[i];
697                         
698                         do {
699                                 lasth = h;
700                                 h *= ht_invprimes[i];
701                         } while (h >= r);
702                         
703                         ht_nums[i] += ((lasth + h) - 1.0);
704                 }
705                 else
706                         ht_nums[i] += ht_invprimes[i];
707                 
708                 v[i] = (float)ht_nums[i];
709         }
710 }
711
712 /* Generate Hammersley points in [0,1)^2
713  * From Lucille renderer */
714 static void hammersley_create(double *out, int n)
715 {
716         double p, t;
717         int k, kk;
718
719         for (k = 0; k < n; k++) {
720                 t = 0;
721                 for (p = 0.5, kk = k; kk; p *= 0.5, kk >>= 1) {
722                         if (kk & 1) {           /* kk mod 2 = 1         */
723                                 t += p;
724                         }
725                 }
726         
727                 out[2 * k + 0] = (double)k / (double)n;
728                 out[2 * k + 1] = t;
729         }
730 }
731
732 static struct QMCSampler *QMC_initSampler(int type, int tot)
733 {       
734         QMCSampler *qsa = MEM_callocN(sizeof(QMCSampler), "qmc sampler");
735         qsa->samp2d = MEM_callocN(2*sizeof(double)*tot, "qmc sample table");
736
737         qsa->tot = tot;
738         qsa->type = type;
739         
740         if (qsa->type==SAMP_TYPE_HAMMERSLEY) 
741                 hammersley_create(qsa->samp2d, qsa->tot);
742                 
743         return qsa;
744 }
745
746 static void QMC_initPixel(QMCSampler *qsa, int thread)
747 {
748         if (qsa->type==SAMP_TYPE_HAMMERSLEY)
749         {
750                 /* hammersley sequence is fixed, already created in QMCSampler init.
751                  * per pixel, gets a random offset. We create separate offsets per thread, for write-safety */
752                 qsa->offs[thread][0] = 0.5 * BLI_thread_frand(thread);
753                 qsa->offs[thread][1] = 0.5 * BLI_thread_frand(thread);
754         }
755         else {  /* SAMP_TYPE_HALTON */
756                 
757                 /* generate a new randomised halton sequence per pixel
758                  * to alleviate qmc artifacts and make it reproducable 
759                  * between threads/frames */
760                 double ht_invprimes[2], ht_nums[2];
761                 double r[2];
762                 int i;
763         
764                 ht_nums[0] = BLI_thread_frand(thread);
765                 ht_nums[1] = BLI_thread_frand(thread);
766                 ht_invprimes[0] = 0.5;
767                 ht_invprimes[1] = 1.0/3.0;
768                 
769                 for (i=0; i< qsa->tot; i++) {
770                         halton_sample(ht_invprimes, ht_nums, r);
771                         qsa->samp2d[2*i+0] = r[0];
772                         qsa->samp2d[2*i+1] = r[1];
773                 }
774         }
775 }
776
777 static void QMC_freeSampler(QMCSampler *qsa)
778 {
779         MEM_freeN(qsa->samp2d);
780         MEM_freeN(qsa);
781 }
782
783 static void QMC_getSample(double *s, QMCSampler *qsa, int thread, int num)
784 {
785         if (qsa->type == SAMP_TYPE_HAMMERSLEY) {
786                 s[0] = fmod(qsa->samp2d[2*num+0] + qsa->offs[thread][0], 1.0f);
787                 s[1] = fmod(qsa->samp2d[2*num+1] + qsa->offs[thread][1], 1.0f);
788         }
789         else { /* SAMP_TYPE_HALTON */
790                 s[0] = qsa->samp2d[2*num+0];
791                 s[1] = qsa->samp2d[2*num+1];
792         }
793 }
794
795 /* phong weighted disc using 'blur' for exponent, centred on 0,0 */
796 static void QMC_samplePhong(float *vec, QMCSampler *qsa, int thread, int num, float blur)
797 {
798         double s[2];
799         float phi, pz, sqr;
800         
801         QMC_getSample(s, qsa, thread, num);
802
803         phi = s[0]*2*M_PI;
804         pz = pow(s[1], blur);
805         sqr = sqrt(1.0f-pz*pz);
806
807         vec[0] = cos(phi)*sqr;
808         vec[1] = sin(phi)*sqr;
809         vec[2] = 0.0f;
810 }
811
812 /* rect of edge lengths sizex, sizey, centred on 0.0,0.0 i.e. ranging from -sizex/2 to +sizey/2 */
813 static void QMC_sampleRect(float *vec, QMCSampler *qsa, int thread, int num, float sizex, float sizey)
814 {
815         double s[2];
816
817         QMC_getSample(s, qsa, thread, num);
818                 
819         vec[0] = (s[0] - 0.5) * sizex;
820         vec[1] = (s[1] - 0.5) * sizey;
821         vec[2] = 0.0f;
822 }
823
824 /* disc of radius 'radius', centred on 0,0 */
825 static void QMC_sampleDisc(float *vec, QMCSampler *qsa, int thread, int num, float radius)
826 {
827         double s[2];
828         float phi, sqr;
829         
830         QMC_getSample(s, qsa, thread, num);
831         
832         phi = s[0]*2*M_PI;
833         sqr = sqrt(s[1]);
834
835         vec[0] = cos(phi)*sqr* radius/2.0;
836         vec[1] = sin(phi)*sqr* radius/2.0;
837         vec[2] = 0.0f;
838 }
839
840 /* uniform hemisphere sampling */
841 static void QMC_sampleHemi(float *vec, QMCSampler *qsa, int thread, int num)
842 {
843         double s[2];
844         float phi, sqr;
845         
846         QMC_getSample(s, qsa, thread, num);
847         
848         phi = s[0]*2.f*M_PI;    
849         sqr = sqrt(s[1]);
850
851         vec[0] = cos(phi)*sqr;
852         vec[1] = sin(phi)*sqr;
853         vec[2] = 1.f - s[1]*s[1];
854 }
855
856 #if 0 /* currently not used */
857 /* cosine weighted hemisphere sampling */
858 static void QMC_sampleHemiCosine(float *vec, QMCSampler *qsa, int thread, int num)
859 {
860         double s[2];
861         float phi, sqr;
862         
863         QMC_getSample(s, qsa, thread, num);
864         
865         phi = s[0]*2.f*M_PI;    
866         sqr = s[1]*sqrt(2-s[1]*s[1]);
867
868         vec[0] = cos(phi)*sqr;
869         vec[1] = sin(phi)*sqr;
870         vec[2] = 1.f - s[1]*s[1];
871
872 }
873 #endif
874
875 /* called from convertBlenderScene.c */
876 void init_render_qmcsampler(Render *re)
877 {
878         re->qmcsamplers= MEM_callocN(sizeof(ListBase)*BLENDER_MAX_THREADS, "QMCListBase");
879 }
880
881 static QMCSampler *get_thread_qmcsampler(Render *re, int thread, int type, int tot)
882 {
883         QMCSampler *qsa;
884
885         /* create qmc samplers as needed, since recursion makes it hard to
886          * predict how many are needed */
887
888         for(qsa=re->qmcsamplers[thread].first; qsa; qsa=qsa->next) {
889                 if(qsa->type == type && qsa->tot == tot && !qsa->used) {
890                         qsa->used= 1;
891                         return qsa;
892                 }
893         }
894
895         qsa= QMC_initSampler(type, tot);
896         qsa->used= 1;
897         BLI_addtail(&re->qmcsamplers[thread], qsa);
898
899         return qsa;
900 }
901
902 static void release_thread_qmcsampler(Render *re, int thread, QMCSampler *qsa)
903 {
904         qsa->used= 0;
905 }
906
907 void free_render_qmcsampler(Render *re)
908 {
909         QMCSampler *qsa, *next;
910         int a;
911
912         if(re->qmcsamplers) {
913                 for(a=0; a<BLENDER_MAX_THREADS; a++) {
914                         for(qsa=re->qmcsamplers[a].first; qsa; qsa=next) {
915                                 next= qsa->next;
916                                 QMC_freeSampler(qsa);
917                         }
918
919                         re->qmcsamplers[a].first= re->qmcsamplers[a].last= NULL;
920                 }
921
922                 MEM_freeN(re->qmcsamplers);
923                 re->qmcsamplers= NULL;
924         }
925 }
926
927 static int adaptive_sample_variance(int samples, float *col, float *colsq, float thresh)
928 {
929         float var[3], mean[3];
930
931         /* scale threshold just to give a bit more precision in input rather than dealing with 
932          * tiny tiny numbers in the UI */
933         thresh /= 2;
934         
935         mean[0] = col[0] / (float)samples;
936         mean[1] = col[1] / (float)samples;
937         mean[2] = col[2] / (float)samples;
938
939         var[0] = (colsq[0] / (float)samples) - (mean[0]*mean[0]);
940         var[1] = (colsq[1] / (float)samples) - (mean[1]*mean[1]);
941         var[2] = (colsq[2] / (float)samples) - (mean[2]*mean[2]);
942         
943         if ((var[0] * 0.4 < thresh) && (var[1] * 0.3 < thresh) && (var[2] * 0.6 < thresh))
944                 return 1;
945         else
946                 return 0;
947 }
948
949 static int adaptive_sample_contrast_val(int samples, float prev, float val, float thresh)
950 {
951         /* if the last sample's contribution to the total value was below a small threshold
952          * (i.e. the samples taken are very similar), then taking more samples that are probably 
953          * going to be the same is wasting effort */
954         if (fabs( prev/(float)(samples-1) - val/(float)samples ) < thresh) {
955                 return 1;
956         } else
957                 return 0;
958 }
959
960 static float get_avg_speed(ShadeInput *shi)
961 {
962         float pre_x, pre_y, post_x, post_y, speedavg;
963         
964         pre_x = (shi->winspeed[0] == PASS_VECTOR_MAX)?0.0:shi->winspeed[0];
965         pre_y = (shi->winspeed[1] == PASS_VECTOR_MAX)?0.0:shi->winspeed[1];
966         post_x = (shi->winspeed[2] == PASS_VECTOR_MAX)?0.0:shi->winspeed[2];
967         post_y = (shi->winspeed[3] == PASS_VECTOR_MAX)?0.0:shi->winspeed[3];
968         
969         speedavg = (sqrt(pre_x*pre_x + pre_y*pre_y) + sqrt(post_x*post_x + post_y*post_y)) / 2.0;
970         
971         return speedavg;
972 }
973
974 /* ***************** main calls ************** */
975
976
977 static void trace_refract(float *col, ShadeInput *shi, ShadeResult *shr)
978 {
979         QMCSampler *qsa=NULL;
980         int samp_type;
981         
982         float samp3d[3], orthx[3], orthy[3];
983         float v_refract[3], v_refract_new[3];
984         float sampcol[4], colsq[4];
985         
986         float blur = pow(1.0 - shi->mat->gloss_tra, 3);
987         short max_samples = shi->mat->samp_gloss_tra;
988         float adapt_thresh = shi->mat->adapt_thresh_tra;
989         
990         int samples=0;
991         
992         colsq[0] = colsq[1] = colsq[2] = 0.0;
993         col[0] = col[1] = col[2] = 0.0;
994         col[3]= shr->alpha;
995         
996         if (blur > 0.0) {
997                 if (adapt_thresh != 0.0) samp_type = SAMP_TYPE_HALTON;
998                 else samp_type = SAMP_TYPE_HAMMERSLEY;
999                         
1000                 /* all samples are generated per pixel */
1001                 qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples);
1002                 QMC_initPixel(qsa, shi->thread);
1003         } else 
1004                 max_samples = 1;
1005         
1006
1007         while (samples < max_samples) {         
1008                 refraction(v_refract, shi->vn, shi->view, shi->ang);
1009                 
1010                 if (max_samples > 1) {
1011                         /* get a quasi-random vector from a phong-weighted disc */
1012                         QMC_samplePhong(samp3d, qsa, shi->thread, samples, blur);
1013                                                 
1014                         VecOrthoBasisf(v_refract, orthx, orthy);
1015                         VecMulf(orthx, samp3d[0]);
1016                         VecMulf(orthy, samp3d[1]);
1017                                 
1018                         /* and perturb the refraction vector in it */
1019                         VecAddf(v_refract_new, v_refract, orthx);
1020                         VecAddf(v_refract_new, v_refract_new, orthy);
1021                         
1022                         Normalize(v_refract_new);
1023                 } else {
1024                         /* no blurriness, use the original normal */
1025                         VECCOPY(v_refract_new, v_refract);
1026                 }
1027         
1028                 traceray(shi, shr, shi->mat->ray_depth_tra, shi->co, v_refract_new, sampcol, shi->obi, shi->vlr, RAY_TRA|RAY_TRAFLIP);
1029         
1030                 col[0] += sampcol[0];
1031                 col[1] += sampcol[1];
1032                 col[2] += sampcol[2];
1033                 col[3] += sampcol[3];
1034                 
1035                 /* for variance calc */
1036                 colsq[0] += sampcol[0]*sampcol[0];
1037                 colsq[1] += sampcol[1]*sampcol[1];
1038                 colsq[2] += sampcol[2]*sampcol[2];
1039                 
1040                 samples++;
1041                 
1042                 /* adaptive sampling */
1043                 if (adapt_thresh < 1.0 && samples > max_samples/2) 
1044                 {
1045                         if (adaptive_sample_variance(samples, col, colsq, adapt_thresh))
1046                                 break;
1047                         
1048                         /* if the pixel so far is very dark, we can get away with less samples */
1049                         if ( (col[0] + col[1] + col[2])/3.0/(float)samples < 0.01 )
1050                                 max_samples--;
1051                 }
1052         }
1053         
1054         col[0] /= (float)samples;
1055         col[1] /= (float)samples;
1056         col[2] /= (float)samples;
1057         col[3] /= (float)samples;
1058         
1059         if (qsa)
1060                 release_thread_qmcsampler(&R, shi->thread, qsa);
1061 }
1062
1063 static void trace_reflect(float *col, ShadeInput *shi, ShadeResult *shr, float fresnelfac)
1064 {
1065         QMCSampler *qsa=NULL;
1066         int samp_type;
1067         
1068         float samp3d[3], orthx[3], orthy[3];
1069         float v_nor_new[3], v_reflect[3];
1070         float sampcol[4], colsq[4];
1071                 
1072         float blur = pow(1.0 - shi->mat->gloss_mir, 3);
1073         short max_samples = shi->mat->samp_gloss_mir;
1074         float adapt_thresh = shi->mat->adapt_thresh_mir;
1075         float aniso = 1.0 - shi->mat->aniso_gloss_mir;
1076         
1077         int samples=0;
1078         
1079         col[0] = col[1] = col[2] = 0.0;
1080         colsq[0] = colsq[1] = colsq[2] = 0.0;
1081         
1082         if (blur > 0.0) {
1083                 if (adapt_thresh != 0.0) samp_type = SAMP_TYPE_HALTON;
1084                 else samp_type = SAMP_TYPE_HAMMERSLEY;
1085                         
1086                 /* all samples are generated per pixel */
1087                 qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples);
1088                 QMC_initPixel(qsa, shi->thread);
1089         } else 
1090                 max_samples = 1;
1091         
1092         while (samples < max_samples) {
1093                                 
1094                 if (max_samples > 1) {
1095                         /* get a quasi-random vector from a phong-weighted disc */
1096                         QMC_samplePhong(samp3d, qsa, shi->thread, samples, blur);
1097
1098                         /* find the normal's perpendicular plane, blurring along tangents
1099                          * if tangent shading enabled */
1100                         if (shi->mat->mode & (MA_TANGENT_V)) {
1101                                 Crossf(orthx, shi->vn, shi->tang);      // bitangent
1102                                 VECCOPY(orthy, shi->tang);
1103                                 VecMulf(orthx, samp3d[0]);
1104                                 VecMulf(orthy, samp3d[1]*aniso);
1105                         } else {
1106                                 VecOrthoBasisf(shi->vn, orthx, orthy);
1107                                 VecMulf(orthx, samp3d[0]);
1108                                 VecMulf(orthy, samp3d[1]);
1109                         }
1110
1111                         /* and perturb the normal in it */
1112                         VecAddf(v_nor_new, shi->vn, orthx);
1113                         VecAddf(v_nor_new, v_nor_new, orthy);
1114                         Normalize(v_nor_new);
1115                 } else {
1116                         /* no blurriness, use the original normal */
1117                         VECCOPY(v_nor_new, shi->vn);
1118                 }
1119                 
1120                 if((shi->vlr->flag & R_SMOOTH)) 
1121                         reflection(v_reflect, v_nor_new, shi->view, shi->facenor);
1122                 else
1123                         reflection(v_reflect, v_nor_new, shi->view, NULL);
1124                 
1125                 traceray(shi, shr, shi->mat->ray_depth, shi->co, v_reflect, sampcol, shi->obi, shi->vlr, 0);
1126
1127                 
1128                 col[0] += sampcol[0];
1129                 col[1] += sampcol[1];
1130                 col[2] += sampcol[2];
1131         
1132                 /* for variance calc */
1133                 colsq[0] += sampcol[0]*sampcol[0];
1134                 colsq[1] += sampcol[1]*sampcol[1];
1135                 colsq[2] += sampcol[2]*sampcol[2];
1136                 
1137                 samples++;
1138
1139                 /* adaptive sampling */
1140                 if (adapt_thresh > 0.0 && samples > max_samples/3) 
1141                 {
1142                         if (adaptive_sample_variance(samples, col, colsq, adapt_thresh))
1143                                 break;
1144                         
1145                         /* if the pixel so far is very dark, we can get away with less samples */
1146                         if ( (col[0] + col[1] + col[2])/3.0/(float)samples < 0.01 )
1147                                 max_samples--;
1148                 
1149                         /* reduce samples when reflection is dim due to low ray mirror blend value or fresnel factor
1150                          * and when reflection is blurry */
1151                         if (fresnelfac < 0.1 * (blur+1)) {
1152                                 max_samples--;
1153                                 
1154                                 /* even more for very dim */
1155                                 if (fresnelfac < 0.05 * (blur+1)) 
1156                                         max_samples--;
1157                         }
1158                 }
1159         }
1160         
1161         col[0] /= (float)samples;
1162         col[1] /= (float)samples;
1163         col[2] /= (float)samples;
1164         
1165         if (qsa)
1166                 release_thread_qmcsampler(&R, shi->thread, qsa);
1167 }
1168
1169 /* extern call from render loop */
1170 void ray_trace(ShadeInput *shi, ShadeResult *shr)
1171 {
1172         float i, f, f1, fr, fg, fb;
1173         float mircol[4], tracol[4];
1174         float diff[3];
1175         int do_tra, do_mir;
1176         
1177         do_tra= ((shi->mat->mode & (MA_RAYTRANSP)) && shr->alpha!=1.0f);
1178         do_mir= ((shi->mat->mode & MA_RAYMIRROR) && shi->ray_mirror!=0.0f);
1179
1180         
1181         /* raytrace mirror amd refract like to separate the spec color */
1182         if(shi->combinedflag & SCE_PASS_SPEC)
1183                 VECSUB(diff, shr->combined, shr->spec) /* no ; */
1184         else
1185                 VECCOPY(diff, shr->combined);
1186         
1187         if(do_tra) {
1188                 float olddiff[3];
1189                 
1190                 trace_refract(tracol, shi, shr);
1191                 
1192                 f= shr->alpha; f1= 1.0f-f;
1193                 fr= 1.0f+ shi->mat->filter*(shi->r-1.0f);
1194                 fg= 1.0f+ shi->mat->filter*(shi->g-1.0f);
1195                 fb= 1.0f+ shi->mat->filter*(shi->b-1.0f);
1196                 
1197                 /* for refract pass */
1198                 VECCOPY(olddiff, diff);
1199                 
1200                 diff[0]= f*diff[0] + f1*fr*tracol[0];
1201                 diff[1]= f*diff[1] + f1*fg*tracol[1];
1202                 diff[2]= f*diff[2] + f1*fb*tracol[2];
1203                 
1204                 if(shi->passflag & SCE_PASS_REFRACT)
1205                         VECSUB(shr->refr, diff, olddiff);
1206                 
1207                 if(!(shi->combinedflag & SCE_PASS_REFRACT))
1208                         VECSUB(diff, diff, shr->refr);
1209                 
1210                 shr->alpha= tracol[3];
1211         }
1212         
1213         if(do_mir) {
1214         
1215                 i= shi->ray_mirror*fresnel_fac(shi->view, shi->vn, shi->mat->fresnel_mir_i, shi->mat->fresnel_mir);
1216                 if(i!=0.0f) {
1217                 
1218                         trace_reflect(mircol, shi, shr, i);
1219                         
1220                         fr= i*shi->mirr;
1221                         fg= i*shi->mirg;
1222                         fb= i*shi->mirb;
1223
1224                         if(shi->passflag & SCE_PASS_REFLECT) {
1225                                 /* mirror pass is not blocked out with spec */
1226                                 shr->refl[0]= fr*mircol[0] - fr*diff[0];
1227                                 shr->refl[1]= fg*mircol[1] - fg*diff[1];
1228                                 shr->refl[2]= fb*mircol[2] - fb*diff[2];
1229                         }
1230                         
1231                         if(shi->combinedflag & SCE_PASS_REFLECT) {
1232                                 
1233                                 f= fr*(1.0f-shr->spec[0]);      f1= 1.0f-i;
1234                                 diff[0]= f*mircol[0] + f1*diff[0];
1235                                 
1236                                 f= fg*(1.0f-shr->spec[1]);      f1= 1.0f-i;
1237                                 diff[1]= f*mircol[1] + f1*diff[1];
1238                                 
1239                                 f= fb*(1.0f-shr->spec[2]);      f1= 1.0f-i;
1240                                 diff[2]= f*mircol[2] + f1*diff[2];
1241                         }
1242                 }
1243         }
1244         /* put back together */
1245         if(shi->combinedflag & SCE_PASS_SPEC)
1246                 VECADD(shr->combined, diff, shr->spec) /* no ; */
1247         else
1248                 VECCOPY(shr->combined, diff);
1249 }
1250
1251 /* color 'shadfac' passes through 'col' with alpha and filter */
1252 /* filter is only applied on alpha defined transparent part */
1253 static void addAlphaLight(float *shadfac, float *col, float alpha, float filter)
1254 {
1255         float fr, fg, fb;
1256         
1257         fr= 1.0f+ filter*(col[0]-1.0f);
1258         fg= 1.0f+ filter*(col[1]-1.0f);
1259         fb= 1.0f+ filter*(col[2]-1.0f);
1260         
1261         shadfac[0]= alpha*col[0] + fr*(1.0f-alpha)*shadfac[0];
1262         shadfac[1]= alpha*col[1] + fg*(1.0f-alpha)*shadfac[1];
1263         shadfac[2]= alpha*col[2] + fb*(1.0f-alpha)*shadfac[2];
1264         
1265         shadfac[3]= (1.0f-alpha)*shadfac[3];
1266 }
1267
1268 static void ray_trace_shadow_tra(Isect *is, int depth, int traflag)
1269 {
1270         /* ray to lamp, find first face that intersects, check alpha properties,
1271            if it has col[3]>0.0f  continue. so exit when alpha is full */
1272         ShadeInput shi;
1273         ShadeResult shr;
1274         
1275         assert(0);
1276
1277         if(RE_rayobject_raycast(R.raytree, is)) {
1278                 float d= 1.0f;
1279                 /* we got a face */
1280                 
1281                 /* Warning, This is not that nice, and possibly a bit slow for every ray,
1282                 however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */
1283                 memset(&shi, 0, sizeof(ShadeInput)); 
1284                 /* end warning! - Campbell */
1285                 
1286                 shi.depth= 1;                                   /* only used to indicate tracing */
1287                 shi.mask= 1;
1288                 
1289                 /*shi.osatex= 0;
1290                 shi.thread= shi.sample= 0;
1291                 shi.lay= 0;
1292                 shi.passflag= 0;
1293                 shi.combinedflag= 0;
1294                 shi.do_preview= 0;
1295                 shi.light_override= NULL;
1296                 shi.mat_override= NULL;*/
1297                 
1298                 shade_ray(is, &shi, &shr);
1299                 if (traflag & RAY_TRA)
1300                         d= shade_by_transmission(is, &shi, &shr);
1301                 
1302                 /* mix colors based on shadfac (rgb + amount of light factor) */
1303                 addAlphaLight(is->col, shr.diff, shr.alpha, d*shi.mat->filter);
1304                 
1305                 if(depth>0 && is->col[3]>0.0f) {
1306                         
1307                         /* adapt isect struct */
1308                         VECCOPY(is->start, shi.co);
1309
1310                         is->orig.ob   = shi.obi;
1311                         is->orig.face = shi.vlr;
1312
1313                         ray_trace_shadow_tra(is, depth-1, traflag | RAY_TRA);
1314                 }
1315         }
1316 }
1317
1318 /* not used, test function for ambient occlusion (yaf: pathlight) */
1319 /* main problem; has to be called within shading loop, giving unwanted recursion */
1320 int ray_trace_shadow_rad(ShadeInput *ship, ShadeResult *shr)
1321 {
1322         static int counter=0, only_one= 0;
1323         extern float hashvectf[];
1324         Isect isec;
1325         ShadeInput shi;
1326         ShadeResult shr_t;
1327         float vec[3], accum[3], div= 0.0f;
1328         int a;
1329         
1330         assert(0);
1331         
1332         if(only_one) {
1333                 return 0;
1334         }
1335         only_one= 1;
1336         
1337         accum[0]= accum[1]= accum[2]= 0.0f;
1338         isec.mode= RE_RAY_MIRROR;
1339         isec.orig.ob   = ship->obi;
1340         isec.orig.face = ship->vlr;
1341         
1342         for(a=0; a<8*8; a++) {
1343                 
1344                 counter+=3;
1345                 counter %= 768;
1346                 VECCOPY(vec, hashvectf+counter);
1347                 if(ship->vn[0]*vec[0]+ship->vn[1]*vec[1]+ship->vn[2]*vec[2]>0.0f) {
1348                         vec[0]-= vec[0];
1349                         vec[1]-= vec[1];
1350                         vec[2]-= vec[2];
1351                 }
1352
1353                 VECCOPY(isec.start, ship->co);
1354                 VECCOPY(isec.vec, vec );
1355                 isec.labda = RE_RAYTRACE_MAXDIST;
1356
1357                 if(RE_rayobject_raycast(R.raytree, &isec)) {
1358                         float fac;
1359                         
1360                         /* Warning, This is not that nice, and possibly a bit slow for every ray,
1361                         however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */
1362                         memset(&shi, 0, sizeof(ShadeInput)); 
1363                         /* end warning! - Campbell */
1364                         
1365                         shade_ray(&isec, &shi, &shr_t);
1366                         fac= isec.labda*isec.labda;
1367                         fac= 1.0f;
1368                         accum[0]+= fac*(shr_t.diff[0]+shr_t.spec[0]);
1369                         accum[1]+= fac*(shr_t.diff[1]+shr_t.spec[1]);
1370                         accum[2]+= fac*(shr_t.diff[2]+shr_t.spec[2]);
1371                         div+= fac;
1372                 }
1373                 else div+= 1.0f;
1374         }
1375         
1376         if(div!=0.0f) {
1377                 shr->diff[0]+= accum[0]/div;
1378                 shr->diff[1]+= accum[1]/div;
1379                 shr->diff[2]+= accum[2]/div;
1380         }
1381         shr->alpha= 1.0f;
1382         
1383         only_one= 0;
1384         return 1;
1385 }
1386
1387 /* aolight: function to create random unit sphere vectors for total random sampling */
1388 static void RandomSpherical(float *v)
1389 {
1390         float r;
1391         v[2] = 2.f*BLI_frand()-1.f;
1392         if ((r = 1.f - v[2]*v[2])>0.f) {
1393                 float a = 6.283185307f*BLI_frand();
1394                 r = sqrt(r);
1395                 v[0] = r * cos(a);
1396                 v[1] = r * sin(a);
1397         }
1398         else v[2] = 1.f;
1399 }
1400
1401 /* calc distributed spherical energy */
1402 static void DS_energy(float *sphere, int tot, float *vec)
1403 {
1404         float *fp, fac, force[3], res[3];
1405         int a;
1406         
1407         res[0]= res[1]= res[2]= 0.0f;
1408         
1409         for(a=0, fp=sphere; a<tot; a++, fp+=3) {
1410                 VecSubf(force, vec, fp);
1411                 fac= force[0]*force[0] + force[1]*force[1] + force[2]*force[2];
1412                 if(fac!=0.0f) {
1413                         fac= 1.0f/fac;
1414                         res[0]+= fac*force[0];
1415                         res[1]+= fac*force[1];
1416                         res[2]+= fac*force[2];
1417                 }
1418         }
1419
1420         VecMulf(res, 0.5);
1421         VecAddf(vec, vec, res);
1422         Normalize(vec);
1423         
1424 }
1425
1426 /* called from convertBlenderScene.c */
1427 /* creates an equally distributed spherical sample pattern */
1428 /* and allocates threadsafe memory */
1429 void init_ao_sphere(World *wrld)
1430 {
1431         float *fp;
1432         int a, tot, iter= 16;
1433
1434         /* we make twice the amount of samples, because only a hemisphere is used */
1435         tot= 2*wrld->aosamp*wrld->aosamp;
1436         
1437         wrld->aosphere= MEM_mallocN(3*tot*sizeof(float), "AO sphere");
1438         
1439         /* fixed random */
1440         BLI_srandom(tot);
1441         
1442         /* init */
1443         fp= wrld->aosphere;
1444         for(a=0; a<tot; a++, fp+= 3) {
1445                 RandomSpherical(fp);
1446         }
1447         
1448         while(iter--) {
1449                 for(a=0, fp= wrld->aosphere; a<tot; a++, fp+= 3) {
1450                         DS_energy(wrld->aosphere, tot, fp);
1451                 }
1452         }
1453         
1454         /* tables */
1455         wrld->aotables= MEM_mallocN(BLENDER_MAX_THREADS*3*tot*sizeof(float), "AO tables");
1456 }
1457
1458 /* give per thread a table, we have to compare xs ys because of way OSA works... */
1459 static float *threadsafe_table_sphere(int test, int thread, int xs, int ys, int tot)
1460 {
1461         static int xso[BLENDER_MAX_THREADS], yso[BLENDER_MAX_THREADS];
1462         static int firsttime= 1;
1463         
1464         if(firsttime) {
1465                 memset(xso, 255, sizeof(xso));
1466                 memset(yso, 255, sizeof(yso));
1467                 firsttime= 0;
1468         }
1469         
1470         if(xs==xso[thread] && ys==yso[thread]) return R.wrld.aotables+ thread*tot*3;
1471         if(test) return NULL;
1472         xso[thread]= xs; yso[thread]= ys;
1473         return R.wrld.aotables+ thread*tot*3;
1474 }
1475
1476 static float *sphere_sampler(int type, int resol, int thread, int xs, int ys)
1477 {
1478         int tot;
1479         float *vec;
1480         
1481         tot= 2*resol*resol;
1482
1483         if (type & WO_AORNDSMP) {
1484                 float *sphere;
1485                 int a;
1486                 
1487                 // always returns table
1488                 sphere= threadsafe_table_sphere(0, thread, xs, ys, tot);
1489
1490                 /* total random sampling. NOT THREADSAFE! (should be removed, is not useful) */
1491                 vec= sphere;
1492                 for (a=0; a<tot; a++, vec+=3) {
1493                         RandomSpherical(vec);
1494                 }
1495                 
1496                 return sphere;
1497         } 
1498         else {
1499                 float *sphere;
1500                 float cosfi, sinfi, cost, sint;
1501                 float ang, *vec1;
1502                 int a;
1503                 
1504                 // returns table if xs and ys were equal to last call
1505                 sphere= threadsafe_table_sphere(1, thread, xs, ys, tot);
1506                 if(sphere==NULL) {
1507                         sphere= threadsafe_table_sphere(0, thread, xs, ys, tot);
1508                         
1509                         // random rotation
1510                         ang= BLI_thread_frand(thread);
1511                         sinfi= sin(ang); cosfi= cos(ang);
1512                         ang= BLI_thread_frand(thread);
1513                         sint= sin(ang); cost= cos(ang);
1514                         
1515                         vec= R.wrld.aosphere;
1516                         vec1= sphere;
1517                         for (a=0; a<tot; a++, vec+=3, vec1+=3) {
1518                                 vec1[0]= cost*cosfi*vec[0] - sinfi*vec[1] + sint*cosfi*vec[2];
1519                                 vec1[1]= cost*sinfi*vec[0] + cosfi*vec[1] + sint*sinfi*vec[2];
1520                                 vec1[2]= -sint*vec[0] + cost*vec[2];                    
1521                         }
1522                 }
1523                 return sphere;
1524         }
1525 }
1526
1527 static void ray_ao_qmc(ShadeInput *shi, float *shadfac)
1528 {
1529         Isect isec;
1530         QMCSampler *qsa=NULL;
1531         float samp3d[3];
1532         float up[3], side[3], dir[3], nrm[3];
1533         
1534         float maxdist = R.wrld.aodist;
1535         float fac=0.0f, prev=0.0f;
1536         float adapt_thresh = R.wrld.ao_adapt_thresh;
1537         float adapt_speed_fac = R.wrld.ao_adapt_speed_fac;
1538         
1539         int samples=0;
1540         int max_samples = R.wrld.aosamp*R.wrld.aosamp;
1541         
1542         float dxyview[3], skyadded=0, div;
1543         int aocolor;
1544         
1545         assert(0);
1546         
1547         isec.orig.ob   = shi->obi;
1548         isec.orig.face = shi->vlr;
1549
1550         isec.hit.ob   = 0;
1551         isec.hit.face = 0;
1552
1553         isec.last_hit = NULL;
1554         
1555         isec.mode= (R.wrld.aomode & WO_AODIST)?RE_RAY_SHADOW_TRA:RE_RAY_SHADOW;
1556         isec.lay= -1;
1557         
1558         shadfac[0]= shadfac[1]= shadfac[2]= 0.0f;
1559         
1560         /* prevent sky colors to be added for only shadow (shadow becomes alpha) */
1561         aocolor= R.wrld.aocolor;
1562         if(shi->mat->mode & MA_ONLYSHADOW)
1563                 aocolor= WO_AOPLAIN;
1564         
1565         if(aocolor == WO_AOSKYTEX) {
1566                 dxyview[0]= 1.0f/(float)R.wrld.aosamp;
1567                 dxyview[1]= 1.0f/(float)R.wrld.aosamp;
1568                 dxyview[2]= 0.0f;
1569         }
1570         
1571         if(shi->vlr->flag & R_SMOOTH) {
1572                 VECCOPY(nrm, shi->vn);
1573         }
1574         else {
1575                 VECCOPY(nrm, shi->facenor);
1576         }
1577         
1578         VecOrthoBasisf(nrm, up, side);
1579         
1580         /* sampling init */
1581         if (R.wrld.ao_samp_method==WO_AOSAMP_HALTON) {
1582                 float speedfac;
1583                 
1584                 speedfac = get_avg_speed(shi) * adapt_speed_fac;
1585                 CLAMP(speedfac, 1.0, 1000.0);
1586                 max_samples /= speedfac;
1587                 if (max_samples < 5) max_samples = 5;
1588                 
1589                 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples);
1590         } else if (R.wrld.ao_samp_method==WO_AOSAMP_HAMMERSLEY)
1591                 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples);
1592
1593         QMC_initPixel(qsa, shi->thread);
1594         
1595         while (samples < max_samples) {
1596
1597                 /* sampling, returns quasi-random vector in unit hemisphere */
1598                 QMC_sampleHemi(samp3d, qsa, shi->thread, samples);
1599
1600                 dir[0] = (samp3d[0]*up[0] + samp3d[1]*side[0] + samp3d[2]*nrm[0]);
1601                 dir[1] = (samp3d[0]*up[1] + samp3d[1]*side[1] + samp3d[2]*nrm[1]);
1602                 dir[2] = (samp3d[0]*up[2] + samp3d[1]*side[2] + samp3d[2]*nrm[2]);
1603                 
1604                 Normalize(dir);
1605                         
1606                 VECCOPY(isec.start, shi->co);
1607                 isec.vec[0] = -dir[0];
1608                 isec.vec[1] = -dir[1];
1609                 isec.vec[2] = -dir[2];
1610                 isec.labda = maxdist;
1611                 
1612                 prev = fac;
1613                 
1614                 if(RE_rayobject_raycast(R.raytree, &isec)) {
1615                         if (R.wrld.aomode & WO_AODIST) fac+= exp(-isec.labda*R.wrld.aodistfac); 
1616                         else fac+= 1.0f;
1617                 }
1618                 else if(aocolor!=WO_AOPLAIN) {
1619                         float skycol[4];
1620                         float skyfac, view[3];
1621                         
1622                         view[0]= -dir[0];
1623                         view[1]= -dir[1];
1624                         view[2]= -dir[2];
1625                         Normalize(view);
1626                         
1627                         if(aocolor==WO_AOSKYCOL) {
1628                                 skyfac= 0.5*(1.0f+view[0]*R.grvec[0]+ view[1]*R.grvec[1]+ view[2]*R.grvec[2]);
1629                                 shadfac[0]+= (1.0f-skyfac)*R.wrld.horr + skyfac*R.wrld.zenr;
1630                                 shadfac[1]+= (1.0f-skyfac)*R.wrld.horg + skyfac*R.wrld.zeng;
1631                                 shadfac[2]+= (1.0f-skyfac)*R.wrld.horb + skyfac*R.wrld.zenb;
1632                         }
1633                         else {  /* WO_AOSKYTEX */
1634                                 shadeSkyView(skycol, isec.start, view, dxyview, shi->thread);
1635                                 shadeSunView(skycol, shi->view);
1636                                 shadfac[0]+= skycol[0];
1637                                 shadfac[1]+= skycol[1];
1638                                 shadfac[2]+= skycol[2];
1639                         }
1640                         skyadded++;
1641                 }
1642                 
1643                 samples++;
1644                 
1645                 if (qsa->type == SAMP_TYPE_HALTON) {
1646                         /* adaptive sampling - consider samples below threshold as in shadow (or vice versa) and exit early */          
1647                         if (adapt_thresh > 0.0 && (samples > max_samples/2) ) {
1648                                 
1649                                 if (adaptive_sample_contrast_val(samples, prev, fac, adapt_thresh)) {
1650                                         break;
1651                                 }
1652                         }
1653                 }
1654         }
1655         
1656         if(aocolor!=WO_AOPLAIN && skyadded) {
1657                 div= (1.0f - fac/(float)samples)/((float)skyadded);
1658                 
1659                 shadfac[0]*= div;       // average color times distances/hits formula
1660                 shadfac[1]*= div;       // average color times distances/hits formula
1661                 shadfac[2]*= div;       // average color times distances/hits formula
1662         } else {
1663                 shadfac[0]= shadfac[1]= shadfac[2]= 1.0f - fac/(float)samples;
1664         }
1665         
1666         if (qsa)
1667                 release_thread_qmcsampler(&R, shi->thread, qsa);
1668 }
1669
1670 /* extern call from shade_lamp_loop, ambient occlusion calculus */
1671 static void ray_ao_spheresamp(ShadeInput *shi, float *shadfac)
1672 {
1673         Isect isec;
1674         float *vec, *nrm, div, bias, sh=0.0f;
1675         float maxdist = R.wrld.aodist;
1676         float dxyview[3];
1677         int j= -1, tot, actual=0, skyadded=0, aocolor, resol= R.wrld.aosamp;
1678         
1679         assert(0);
1680         
1681         isec.orig.ob   = shi->obi;
1682         isec.orig.face = shi->vlr;
1683
1684         isec.hit.ob   = 0;
1685         isec.hit.face = 0;
1686         
1687         isec.last_hit = NULL;
1688         
1689         isec.mode= (R.wrld.aomode & WO_AODIST)?RE_RAY_SHADOW_TRA:RE_RAY_SHADOW;
1690         isec.lay= -1;
1691
1692
1693         shadfac[0]= shadfac[1]= shadfac[2]= 0.0f;
1694
1695         /* bias prevents smoothed faces to appear flat */
1696         if(shi->vlr->flag & R_SMOOTH) {
1697                 bias= R.wrld.aobias;
1698                 nrm= shi->vn;
1699         }
1700         else {
1701                 bias= 0.0f;
1702                 nrm= shi->facenor;
1703         }
1704
1705         /* prevent sky colors to be added for only shadow (shadow becomes alpha) */
1706         aocolor= R.wrld.aocolor;
1707         if(shi->mat->mode & MA_ONLYSHADOW)
1708                 aocolor= WO_AOPLAIN;
1709         
1710         if(resol>32) resol= 32;
1711         
1712         vec= sphere_sampler(R.wrld.aomode, resol, shi->thread, shi->xs, shi->ys);
1713         
1714         // warning: since we use full sphere now, and dotproduct is below, we do twice as much
1715         tot= 2*resol*resol;
1716
1717         if(aocolor == WO_AOSKYTEX) {
1718                 dxyview[0]= 1.0f/(float)resol;
1719                 dxyview[1]= 1.0f/(float)resol;
1720                 dxyview[2]= 0.0f;
1721         }
1722         
1723         while(tot--) {
1724                 
1725                 if ((vec[0]*nrm[0] + vec[1]*nrm[1] + vec[2]*nrm[2]) > bias) {
1726                         /* only ao samples for mask */
1727                         if(R.r.mode & R_OSA) {
1728                                 j++;
1729                                 if(j==R.osa) j= 0;
1730                                 if(!(shi->mask & (1<<j))) {
1731                                         vec+=3;
1732                                         continue;
1733                                 }
1734                         }
1735                         
1736                         actual++;
1737                         
1738                         /* always set start/vec/labda */
1739                         VECCOPY(isec.start, shi->co);
1740                         isec.vec[0] = -vec[0];
1741                         isec.vec[1] = -vec[1];
1742                         isec.vec[2] = -vec[2];
1743                         isec.labda = maxdist;
1744                         
1745                         /* do the trace */
1746                         if(RE_rayobject_raycast(R.raytree, &isec)) {
1747                                 if (R.wrld.aomode & WO_AODIST) sh+= exp(-isec.labda*R.wrld.aodistfac); 
1748                                 else sh+= 1.0f;
1749                         }
1750                         else if(aocolor!=WO_AOPLAIN) {
1751                                 float skycol[4];
1752                                 float fac, view[3];
1753                                 
1754                                 view[0]= -vec[0];
1755                                 view[1]= -vec[1];
1756                                 view[2]= -vec[2];
1757                                 Normalize(view);
1758                                 
1759                                 if(aocolor==WO_AOSKYCOL) {
1760                                         fac= 0.5*(1.0f+view[0]*R.grvec[0]+ view[1]*R.grvec[1]+ view[2]*R.grvec[2]);
1761                                         shadfac[0]+= (1.0f-fac)*R.wrld.horr + fac*R.wrld.zenr;
1762                                         shadfac[1]+= (1.0f-fac)*R.wrld.horg + fac*R.wrld.zeng;
1763                                         shadfac[2]+= (1.0f-fac)*R.wrld.horb + fac*R.wrld.zenb;
1764                                 }
1765                                 else {  /* WO_AOSKYTEX */
1766                                         shadeSkyView(skycol, isec.start, view, dxyview, shi->thread);
1767                                         shadeSunView(skycol, shi->view);
1768                                         shadfac[0]+= skycol[0];
1769                                         shadfac[1]+= skycol[1];
1770                                         shadfac[2]+= skycol[2];
1771                                 }
1772                                 skyadded++;
1773                         }
1774                 }
1775                 // samples
1776                 vec+= 3;
1777         }
1778         
1779         if(actual==0) sh= 1.0f;
1780         else sh = 1.0f - sh/((float)actual);
1781         
1782         if(aocolor!=WO_AOPLAIN && skyadded) {
1783                 div= sh/((float)skyadded);
1784                 
1785                 shadfac[0]*= div;       // average color times distances/hits formula
1786                 shadfac[1]*= div;       // average color times distances/hits formula
1787                 shadfac[2]*= div;       // average color times distances/hits formula
1788         }
1789         else {
1790                 shadfac[0]= shadfac[1]= shadfac[2]= sh;
1791         }
1792 }
1793
1794 void ray_ao(ShadeInput *shi, float *shadfac)
1795 {
1796         /* Unfortunately, the unusual way that the sphere sampler calculates roughly twice as many
1797          * samples as are actually traced, and skips them based on bias and OSA settings makes it very difficult
1798          * to reuse code between these two functions. This is the easiest way I can think of to do it
1799          * --broken */
1800         if (ELEM(R.wrld.ao_samp_method, WO_AOSAMP_HAMMERSLEY, WO_AOSAMP_HALTON))
1801                 ray_ao_qmc(shi, shadfac);
1802         else if (R.wrld.ao_samp_method == WO_AOSAMP_CONSTANT)
1803                 ray_ao_spheresamp(shi, shadfac);
1804 }
1805
1806 static void ray_shadow_jittered_coords(ShadeInput *shi, int max, float jitco[RE_MAX_OSA][3], int *totjitco)
1807 {
1808         /* magic numbers for reordering sample positions to give better
1809          * results with adaptive sample, when it usually only takes 4 samples */
1810         int order8[8] = {0, 1, 5, 6, 2, 3, 4, 7};
1811         int order11[11] = {1, 3, 8, 10, 0, 2, 4, 5, 6, 7, 9};
1812         int order16[16] = {1, 3, 9, 12, 0, 6, 7, 8, 13, 2, 4, 5, 10, 11, 14, 15};
1813         int count = count_mask(shi->mask);
1814
1815         /* for better antialising shadow samples are distributed over the subpixel
1816          * sample coordinates, this only works for raytracing depth 0 though */
1817         if(!shi->strand && shi->depth == 0 && count > 1 && count <= max) {
1818                 float xs, ys, zs, view[3];
1819                 int samp, ordsamp, tot= 0;
1820
1821                 for(samp=0; samp<R.osa; samp++) {
1822                         if(R.osa == 8) ordsamp = order8[samp];
1823                         else if(R.osa == 11) ordsamp = order11[samp];
1824                         else if(R.osa == 16) ordsamp = order16[samp];
1825                         else ordsamp = samp;
1826
1827                         if(shi->mask & (1<<ordsamp)) {
1828                                 /* zbuffer has this inverse corrected, ensures xs,ys are inside pixel */
1829                                 xs= (float)shi->scanco[0] + R.jit[ordsamp][0] + 0.5f;
1830                                 ys= (float)shi->scanco[1] + R.jit[ordsamp][1] + 0.5f;
1831                                 zs= shi->scanco[2];
1832
1833                                 shade_input_calc_viewco(shi, xs, ys, zs, view, NULL, jitco[tot], NULL, NULL);
1834                                 tot++;
1835                         }
1836                 }
1837
1838                 *totjitco= tot;
1839         }
1840         else {
1841                 VECCOPY(jitco[0], shi->co);
1842                 *totjitco= 1;
1843         }
1844 }
1845
1846 static void ray_shadow_qmc(ShadeInput *shi, LampRen *lar, float *lampco, float *shadfac, Isect *isec)
1847 {
1848         QMCSampler *qsa=NULL;
1849         int samples=0;
1850         float samp3d[3];
1851
1852         float fac=0.0f, vec[3], end[3];
1853         float colsq[4];
1854         float adapt_thresh = lar->adapt_thresh;
1855         int min_adapt_samples=4, max_samples = lar->ray_totsamp;
1856         float *co;
1857         int do_soft=1, full_osa=0;
1858
1859         float jitco[RE_MAX_OSA][3];
1860         int totjitco;
1861
1862 //      assert(0);
1863
1864         colsq[0] = colsq[1] = colsq[2] = 0.0;
1865         if(isec->mode==RE_RAY_SHADOW_TRA) {
1866                 shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 0.0f;
1867         } else
1868                 shadfac[3]= 1.0f;
1869         
1870         if (lar->ray_totsamp < 2) do_soft = 0;
1871         if ((R.r.mode & R_OSA) && (R.osa > 0) && (shi->vlr->flag & R_FULL_OSA)) full_osa = 1;
1872         
1873         if (full_osa) {
1874                 if (do_soft) max_samples  = max_samples/R.osa + 1;
1875                 else max_samples = 1;
1876         } else {
1877                 if (do_soft) max_samples = lar->ray_totsamp;
1878                 else max_samples = (R.osa > 4)?R.osa:5;
1879         }
1880         
1881         ray_shadow_jittered_coords(shi, max_samples, jitco, &totjitco);
1882
1883         /* sampling init */
1884         if (lar->ray_samp_method==LA_SAMP_HALTON)
1885                 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples);
1886         else if (lar->ray_samp_method==LA_SAMP_HAMMERSLEY)
1887                 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples);
1888         
1889         QMC_initPixel(qsa, shi->thread);
1890         
1891         VECCOPY(vec, lampco);
1892         
1893         
1894         while (samples < max_samples) {
1895
1896                 isec->orig.ob   = shi->obi;
1897                 isec->orig.face = shi->vlr;
1898
1899                 /* manually jitter the start shading co-ord per sample
1900                  * based on the pre-generated OSA texture sampling offsets, 
1901                  * for anti-aliasing sharp shadow edges. */
1902                 co = jitco[samples % totjitco];
1903
1904                 if (do_soft) {
1905                         /* sphere shadow source */
1906                         if (lar->type == LA_LOCAL) {
1907                                 float ru[3], rv[3], v[3], s[3];
1908                                 
1909                                 /* calc tangent plane vectors */
1910                                 v[0] = co[0] - lampco[0];
1911                                 v[1] = co[1] - lampco[1];
1912                                 v[2] = co[2] - lampco[2];
1913                                 Normalize(v);
1914                                 VecOrthoBasisf(v, ru, rv);
1915                                 
1916                                 /* sampling, returns quasi-random vector in area_size disc */
1917                                 QMC_sampleDisc(samp3d, qsa, shi->thread, samples,lar->area_size);
1918
1919                                 /* distribute disc samples across the tangent plane */
1920                                 s[0] = samp3d[0]*ru[0] + samp3d[1]*rv[0];
1921                                 s[1] = samp3d[0]*ru[1] + samp3d[1]*rv[1];
1922                                 s[2] = samp3d[0]*ru[2] + samp3d[1]*rv[2];
1923                                 
1924                                 VECCOPY(samp3d, s);
1925                         }
1926                         else {
1927                                 /* sampling, returns quasi-random vector in [sizex,sizey]^2 plane */
1928                                 QMC_sampleRect(samp3d, qsa, shi->thread, samples, lar->area_size, lar->area_sizey);
1929                                                                 
1930                                 /* align samples to lamp vector */
1931                                 Mat3MulVecfl(lar->mat, samp3d);
1932                         }
1933                         end[0] = vec[0]+samp3d[0];
1934                         end[1] = vec[1]+samp3d[1];
1935                         end[2] = vec[2]+samp3d[2];
1936                 } else {
1937                         VECCOPY(end, vec);
1938                 }
1939
1940                 if(shi->strand) {
1941                         /* bias away somewhat to avoid self intersection */
1942                         float jitbias= 0.5f*(VecLength(shi->dxco) + VecLength(shi->dyco));
1943                         float v[3];
1944
1945                         VECSUB(v, co, end);
1946                         Normalize(v);
1947
1948                         co[0] -= jitbias*v[0];
1949                         co[1] -= jitbias*v[1];
1950                         co[2] -= jitbias*v[2];
1951                 }
1952
1953                 VECCOPY(isec->start, co);
1954 //              VECSUB(isec->vec, end, isec->start);
1955                 isec->vec[0] = end[0]-isec->start[0];
1956                 isec->vec[1] = end[1]-isec->start[1];
1957                 isec->vec[2] = end[2]-isec->start[2];
1958                 isec->labda = 1.0f; // * Normalize(isec->vec);
1959                 
1960                 /* trace the ray */
1961                 if(isec->mode==RE_RAY_SHADOW_TRA) {
1962                         isec->col[0]= isec->col[1]= isec->col[2]=  1.0f;
1963                         isec->col[3]= 1.0f;
1964                         
1965                         ray_trace_shadow_tra(isec, DEPTH_SHADOW_TRA, 0);
1966                         shadfac[0] += isec->col[0];
1967                         shadfac[1] += isec->col[1];
1968                         shadfac[2] += isec->col[2];
1969                         shadfac[3] += isec->col[3];
1970                         
1971                         /* for variance calc */
1972                         colsq[0] += isec->col[0]*isec->col[0];
1973                         colsq[1] += isec->col[1]*isec->col[1];
1974                         colsq[2] += isec->col[2]*isec->col[2];
1975                 }
1976                 else {
1977                         if( RE_rayobject_raycast(R.raytree, isec) ) fac+= 1.0f;
1978                 }
1979                 
1980                 samples++;
1981                 
1982                 if ((lar->ray_samp_method == LA_SAMP_HALTON)) {
1983                 
1984                         /* adaptive sampling - consider samples below threshold as in shadow (or vice versa) and exit early */
1985                         if ((max_samples > min_adapt_samples) && (adapt_thresh > 0.0) && (samples > max_samples / 3)) {
1986                                 if (isec->mode==RE_RAY_SHADOW_TRA) {
1987                                         if ((shadfac[3] / samples > (1.0-adapt_thresh)) || (shadfac[3] / samples < adapt_thresh))
1988                                                 break;
1989                                         else if (adaptive_sample_variance(samples, shadfac, colsq, adapt_thresh))
1990                                                 break;
1991                                 } else {
1992                                         if ((fac / samples > (1.0-adapt_thresh)) || (fac / samples < adapt_thresh))
1993                                                 break;
1994                                 }
1995                         }
1996                 }
1997         }
1998         
1999         if(isec->mode==RE_RAY_SHADOW_TRA) {
2000                 shadfac[0] /= samples;
2001                 shadfac[1] /= samples;
2002                 shadfac[2] /= samples;
2003                 shadfac[3] /= samples;
2004         } else
2005                 shadfac[3]= 1.0f-fac/samples;
2006
2007         if (qsa)
2008                 release_thread_qmcsampler(&R, shi->thread, qsa);
2009 }
2010
2011 static void ray_shadow_jitter(ShadeInput *shi, LampRen *lar, float *lampco, float *shadfac, Isect *isec)
2012 {
2013         /* area soft shadow */
2014         float *jitlamp;
2015         float fac=0.0f, div=0.0f, vec[3];
2016         int a, j= -1, mask;
2017         
2018         assert(0);
2019         
2020         if(isec->mode==RE_RAY_SHADOW_TRA) {
2021                 shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 0.0f;
2022         }
2023         else shadfac[3]= 1.0f;
2024         
2025         fac= 0.0f;
2026         jitlamp= give_jitter_plane(lar, shi->thread, shi->xs, shi->ys);
2027
2028         a= lar->ray_totsamp;
2029         
2030         /* this correction to make sure we always take at least 1 sample */
2031         mask= shi->mask;
2032         if(a==4) mask |= (mask>>4)|(mask>>8);
2033         else if(a==9) mask |= (mask>>9);
2034         
2035         while(a--) {
2036                 
2037                 if(R.r.mode & R_OSA) {
2038                         j++;
2039                         if(j>=R.osa) j= 0;
2040                         if(!(mask & (1<<j))) {
2041                                 jitlamp+= 2;
2042                                 continue;
2043                         }
2044                 }
2045                 
2046                 isec->orig.ob   = shi->obi;
2047                 isec->orig.face = shi->vlr;
2048                 
2049                 vec[0]= jitlamp[0];
2050                 vec[1]= jitlamp[1];
2051                 vec[2]= 0.0f;
2052                 Mat3MulVecfl(lar->mat, vec);
2053                 
2054                 /* set start and vec */
2055                 VECCOPY(isec->start, shi->co);
2056                 VECCOPY(isec->vec, vec);
2057                 isec->labda = 1.0f;
2058                 isec->skip = RE_SKIP_VLR_NEIGHBOUR;
2059                 
2060                 if(isec->mode==RE_RAY_SHADOW_TRA) {
2061                         /* isec.col is like shadfac, so defines amount of light (0.0 is full shadow) */
2062                         isec->col[0]= isec->col[1]= isec->col[2]=  1.0f;
2063                         isec->col[3]= 1.0f;
2064                         
2065                         ray_trace_shadow_tra(isec, DEPTH_SHADOW_TRA, 0);
2066                         shadfac[0] += isec->col[0];
2067                         shadfac[1] += isec->col[1];
2068                         shadfac[2] += isec->col[2];
2069                         shadfac[3] += isec->col[3];
2070                 }
2071                 else if( RE_rayobject_raycast(R.raytree, isec) ) fac+= 1.0f;
2072                 
2073                 div+= 1.0f;
2074                 jitlamp+= 2;
2075         }
2076         
2077         if(isec->mode==RE_RAY_SHADOW_TRA) {
2078                 shadfac[0] /= div;
2079                 shadfac[1] /= div;
2080                 shadfac[2] /= div;
2081                 shadfac[3] /= div;
2082         }
2083         else {
2084                 // sqrt makes nice umbra effect
2085                 if(lar->ray_samp_type & LA_SAMP_UMBRA)
2086                         shadfac[3]= sqrt(1.0f-fac/div);
2087                 else
2088                         shadfac[3]= 1.0f-fac/div;
2089         }
2090 }
2091 /* extern call from shade_lamp_loop */
2092 void ray_shadow(ShadeInput *shi, LampRen *lar, float *shadfac)
2093 {
2094         Isect isec;
2095         float lampco[3];
2096
2097         /* setup isec */
2098         if(shi->mat->mode & MA_SHADOW_TRA) isec.mode= RE_RAY_SHADOW_TRA;
2099         else isec.mode= RE_RAY_SHADOW;
2100         
2101         if(lar->mode & (LA_LAYER|LA_LAYER_SHADOW))
2102                 isec.lay= lar->lay;
2103         else
2104                 isec.lay= -1;
2105
2106         /* only when not mir tracing, first hit optimm */
2107         if(shi->depth==0) {
2108                 isec.last_hit = lar->last_hit[shi->thread];
2109         }
2110         else {
2111                 isec.last_hit = NULL;
2112         }
2113         
2114         if(lar->type==LA_SUN || lar->type==LA_HEMI) {
2115                 lampco[0]= shi->co[0] - RE_RAYTRACE_MAXDIST*lar->vec[0];
2116                 lampco[1]= shi->co[1] - RE_RAYTRACE_MAXDIST*lar->vec[1];
2117                 lampco[2]= shi->co[2] - RE_RAYTRACE_MAXDIST*lar->vec[2];
2118         }
2119         else {
2120                 VECCOPY(lampco, lar->co);
2121         }
2122         
2123         if (ELEM(lar->ray_samp_method, LA_SAMP_HALTON, LA_SAMP_HAMMERSLEY)) {
2124                 
2125                 ray_shadow_qmc(shi, lar, lampco, shadfac, &isec);
2126                 
2127         } else {
2128                 if(lar->ray_totsamp<2) {
2129                         
2130                         isec.orig.ob   = shi->obi;
2131                         isec.orig.face = shi->vlr;
2132                         
2133                         shadfac[3]= 1.0f; // 1.0=full light
2134                         
2135                         /* set up isec vec */
2136                         VECCOPY(isec.start, shi->co);
2137                         VECSUB(isec.vec, lampco, isec.start);
2138                         isec.labda = 1.0f;
2139
2140                         if(isec.mode==RE_RAY_SHADOW_TRA) {
2141                                 /* isec.col is like shadfac, so defines amount of light (0.0 is full shadow) */
2142                                 isec.col[0]= isec.col[1]= isec.col[2]=  1.0f;
2143                                 isec.col[3]= 1.0f;
2144
2145                                 ray_trace_shadow_tra(&isec, DEPTH_SHADOW_TRA, 0);
2146                                 QUATCOPY(shadfac, isec.col);
2147                         }
2148                         else
2149                         {
2150                                 assert(0);
2151                                 if(RE_rayobject_raycast(R.raytree, &isec)) shadfac[3]= 0.0f;
2152                         }
2153                 }
2154                 else {
2155                         ray_shadow_jitter(shi, lar, lampco, shadfac, &isec);
2156                 }
2157         }
2158                 
2159         /* for first hit optim, set last interesected shadow face */
2160         if(shi->depth==0) {
2161                 lar->last_hit[shi->thread] = isec.last_hit;
2162         }
2163
2164 }
2165
2166 #if 0
2167 /* only when face points away from lamp, in direction of lamp, trace ray and find first exit point */
2168 static void ray_translucent(ShadeInput *shi, LampRen *lar, float *distfac, float *co)
2169 {
2170         Isect isec;
2171         float lampco[3];
2172         
2173         assert(0);
2174         
2175         /* setup isec */
2176         isec.mode= RE_RAY_SHADOW_TRA;
2177         
2178         if(lar->mode & LA_LAYER) isec.lay= lar->lay; else isec.lay= -1;
2179         
2180         if(lar->type==LA_SUN || lar->type==LA_HEMI) {
2181                 lampco[0]= shi->co[0] - RE_RAYTRACE_MAXDIST*lar->vec[0];
2182                 lampco[1]= shi->co[1] - RE_RAYTRACE_MAXDIST*lar->vec[1];
2183                 lampco[2]= shi->co[2] - RE_RAYTRACE_MAXDIST*lar->vec[2];
2184         }
2185         else {
2186                 VECCOPY(lampco, lar->co);
2187         }
2188         
2189         isec.orig.ob   = shi->obi;
2190         isec.orig.face = shi->vlr;
2191         
2192         /* set up isec vec */
2193         VECCOPY(isec.start, shi->co);
2194         VECCOPY(isec.end, lampco);
2195         
2196         if(RE_rayobject_raycast(R.raytree, &isec)) {
2197                 /* we got a face */
2198                 
2199                 /* render co */
2200                 co[0]= isec.start[0]+isec.labda*(isec.vec[0]);
2201                 co[1]= isec.start[1]+isec.labda*(isec.vec[1]);
2202                 co[2]= isec.start[2]+isec.labda*(isec.vec[2]);
2203                 
2204                 *distfac= VecLength(isec.vec);
2205         }
2206         else
2207                 *distfac= 0.0f;
2208 }
2209
2210 #endif
2211