svn merge https://svn.blender.org/svnroot/bf-blender/trunk/blender -r22717:22875
[blender.git] / source / blender / render / intern / source / rayshade.c
1 /**
2  * $Id$
3  *
4  * ***** BEGIN GPL LICENSE BLOCK *****
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version. 
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software Foundation,
18  * Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19  *
20  * The Original Code is Copyright (C) 1990-1998 NeoGeo BV.
21  * All rights reserved.
22  *
23  * Contributors: 2004/2005 Blender Foundation, full recode
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 #include <math.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <float.h>
32
33 #include "MEM_guardedalloc.h"
34
35 #include "DNA_material_types.h"
36 #include "DNA_lamp_types.h"
37
38 #include "BKE_global.h"
39 #include "BKE_node.h"
40 #include "BKE_utildefines.h"
41
42 #include "BLI_arithb.h"
43 #include "BLI_blenlib.h"
44 #include "BLI_jitter.h"
45 #include "BLI_rand.h"
46
47 #include "PIL_time.h"
48
49 #include "render_types.h"
50 #include "renderpipeline.h"
51 #include "rendercore.h"
52 #include "renderdatabase.h"
53 #include "pixelblending.h"
54 #include "pixelshading.h"
55 #include "shading.h"
56 #include "texture.h"
57 #include "volumetric.h"
58
59 #include "RE_raytrace.h"
60
61 #define RAY_TRA         1
62 #define RAY_TRAFLIP     2
63
64 #define DEPTH_SHADOW_TRA  10
65
66 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
67 /* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
68 /* only to be used here in this file, it's for speed */
69 extern struct Render R;
70 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
71
72 static void vlr_face_coords(RayFace *face, float **v1, float **v2, float **v3, float **v4)
73 {
74         VlakRen *vlr= (VlakRen*)face;
75
76         *v1 = (vlr->v1)? vlr->v1->co: NULL;
77         *v2 = (vlr->v2)? vlr->v2->co: NULL;
78         *v3 = (vlr->v3)? vlr->v3->co: NULL;
79         *v4 = (vlr->v4)? vlr->v4->co: NULL;
80 }
81
82 static int vlr_check_intersect(Isect *is, int ob, RayFace *face)
83 {
84         ObjectInstanceRen *obi= RAY_OBJECT_GET((Render*)is->userdata, ob);
85         VlakRen *vlr = (VlakRen*)face;
86
87         /* for baking selected to active non-traceable materials might still
88          * be in the raytree */
89         if(!(vlr->mat->mode & MA_TRACEBLE))
90                 return 0;
91
92         /* I know... cpu cycle waste, might do smarter once */
93         if(is->mode==RE_RAY_MIRROR)
94                 return !(vlr->mat->mode & MA_ONLYCAST);
95         else
96                 return (is->lay & obi->lay);
97 }
98
99 static int vlr_check_intersect_solid(Isect *is, int ob, RayFace *face)
100 {
101         VlakRen *vlr = (VlakRen*)face;
102
103         /* solid material types only */
104         if (vlr->mat->material_type == MA_TYPE_SURFACE)
105                 return 1;
106         else
107                 return 0;
108 }
109
110 static float *vlr_get_transform(void *userdata, int i)
111 {
112         ObjectInstanceRen *obi= RAY_OBJECT_GET((Render*)userdata, i);
113
114         return (obi->flag & R_TRANSFORMED)? (float*)obi->mat: NULL;
115 }
116
117 void freeraytree(Render *re)
118 {
119         if(re->raytree) {
120                 RE_ray_tree_free(re->raytree);
121                 re->raytree= NULL;
122         }
123 }
124
125 void makeraytree(Render *re)
126 {
127         ObjectInstanceRen *obi;
128         ObjectRen *obr;
129         VlakRen *vlr= NULL;
130         float min[3], max[3], co1[3], co2[3], co3[3], co4[3];
131         double lasttime= PIL_check_seconds_timer();
132         int v, totv = 0, totface = 0;
133
134         INIT_MINMAX(min, max);
135
136         /* first min max raytree space */
137         for(obi=re->instancetable.first; obi; obi=obi->next) {
138                 obr= obi->obr;
139
140                 if(re->excludeob && obr->ob == re->excludeob)
141                         continue;
142
143                 for(v=0;v<obr->totvlak;v++) {
144                         if((v & 255)==0) vlr= obr->vlaknodes[v>>8].vlak;
145                         else vlr++;
146                         /* baking selected to active needs non-traceable too */
147                         if((re->flag & R_BAKE_TRACE) || (vlr->mat->mode & MA_TRACEBLE)) {       
148                                 if(vlr->mat->material_type != MA_TYPE_WIRE) {
149                                         VECCOPY(co1, vlr->v1->co);
150                                         VECCOPY(co2, vlr->v2->co);
151                                         VECCOPY(co3, vlr->v3->co);
152
153                                         if(obi->flag & R_TRANSFORMED) {
154                                                 Mat4MulVecfl(obi->mat, co1);
155                                                 Mat4MulVecfl(obi->mat, co2);
156                                                 Mat4MulVecfl(obi->mat, co3);
157                                         }
158
159                                         DO_MINMAX(co1, min, max);
160                                         DO_MINMAX(co2, min, max);
161                                         DO_MINMAX(co3, min, max);
162
163                                         if(vlr->v4) {
164                                                 VECCOPY(co4, vlr->v4->co);
165                                                 if(obi->flag & R_TRANSFORMED)
166                                                         Mat4MulVecfl(obi->mat, co4);
167                                                 DO_MINMAX(co4, min, max);
168                                         }
169
170                                         totface++;
171                                 }
172                         }
173                 }
174         }
175
176         re->raytree= RE_ray_tree_create(re->r.ocres, totface, min, max,
177                 vlr_face_coords, vlr_check_intersect, vlr_get_transform, re);
178
179         if(min[0] > max[0]) { /* empty raytree */
180                 RE_ray_tree_done(re->raytree);
181                 return; 
182         }
183
184         for(obi=re->instancetable.first; obi; obi=obi->next) {
185                 obr= obi->obr;
186
187                 if(re->excludeob && obr->ob == re->excludeob)
188                         continue;
189
190                 for(v=0; v<obr->totvlak; v++, totv++) {
191                         if((v & 255)==0) {
192                                 double time= PIL_check_seconds_timer();
193
194                                 vlr= obr->vlaknodes[v>>8].vlak;
195                                 if(re->test_break(re->tbh))
196                                         break;
197                                 if(time-lasttime>1.0f) {
198                                         char str[32];
199                                         sprintf(str, "Filling Octree: %d", totv);
200                                         re->i.infostr= str;
201                                         re->stats_draw(re->sdh, &re->i);
202                                         re->i.infostr= NULL;
203                                         lasttime= time;
204                                 }
205                         }
206                         else vlr++;
207                         
208                         if((re->flag & R_BAKE_TRACE) || (vlr->mat->mode & MA_TRACEBLE))
209                                 if(vlr->mat->material_type != MA_TYPE_WIRE)
210                                         RE_ray_tree_add_face(re->raytree, RAY_OBJECT_SET(re, obi), vlr);
211                 }
212         }
213
214         RE_ray_tree_done(re->raytree);
215         
216         re->i.infostr= NULL;
217         re->stats_draw(re->sdh, &re->i);
218 }
219
220 void shade_ray(Isect *is, ShadeInput *shi, ShadeResult *shr)
221 {
222         VlakRen *vlr= (VlakRen*)is->face;
223         ObjectInstanceRen *obi= RAY_OBJECT_GET(&R, is->ob);
224         int osatex= 0;
225         
226         /* set up view vector */
227         VECCOPY(shi->view, is->vec);
228
229         /* render co */
230         shi->co[0]= is->start[0]+is->labda*(shi->view[0]);
231         shi->co[1]= is->start[1]+is->labda*(shi->view[1]);
232         shi->co[2]= is->start[2]+is->labda*(shi->view[2]);
233         
234         Normalize(shi->view);
235
236         shi->obi= obi;
237         shi->obr= obi->obr;
238         shi->vlr= vlr;
239         shi->mat= vlr->mat;
240         shade_input_init_material(shi);
241         
242         // Osa structs we leave unchanged now
243         SWAP(int, osatex, shi->osatex);
244         
245         shi->dxco[0]= shi->dxco[1]= shi->dxco[2]= 0.0f;
246         shi->dyco[0]= shi->dyco[1]= shi->dyco[2]= 0.0f;
247         
248         // but, set Osa stuff to zero where it can confuse texture code
249         if(shi->mat->texco & (TEXCO_NORM|TEXCO_REFL) ) {
250                 shi->dxno[0]= shi->dxno[1]= shi->dxno[2]= 0.0f;
251                 shi->dyno[0]= shi->dyno[1]= shi->dyno[2]= 0.0f;
252         }
253
254         if(vlr->v4) {
255                 if(is->isect==2) 
256                         shade_input_set_triangle_i(shi, obi, vlr, 2, 1, 3);
257                 else
258                         shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 3);
259         }
260         else {
261                 shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
262         }
263
264         shi->u= is->u;
265         shi->v= is->v;
266         shi->dx_u= shi->dx_v= shi->dy_u= shi->dy_v=  0.0f;
267
268         shade_input_set_normals(shi);
269
270         /* point normals to viewing direction */
271         if(INPR(shi->facenor, shi->view) < 0.0f)
272                 shade_input_flip_normals(shi);
273
274         shade_input_set_shade_texco(shi);
275         if (shi->mat->material_type == MA_TYPE_VOLUME) {
276                 if(ELEM(is->mode, RE_RAY_SHADOW, RE_RAY_SHADOW_TRA)) {
277                         shade_volume_shadow(shi, shr, is);
278                 } else {
279                         shade_volume_outside(shi, shr);
280                 }
281         }
282         else if(is->mode==RE_RAY_SHADOW_TRA) {
283                 /* temp hack to prevent recursion */
284                 if(shi->nodes==0 && shi->mat->nodetree && shi->mat->use_nodes) {
285                         ntreeShaderExecTree(shi->mat->nodetree, shi, shr);
286                         shi->mat= vlr->mat;             /* shi->mat is being set in nodetree */
287                 }
288                 else
289                         shade_color(shi, shr);
290         }
291         else {
292                 if(shi->mat->nodetree && shi->mat->use_nodes) {
293                         ntreeShaderExecTree(shi->mat->nodetree, shi, shr);
294                         shi->mat= vlr->mat;             /* shi->mat is being set in nodetree */
295                 }
296                 else {
297                         int tempdepth;
298                         /* XXX dodgy business here, set ray depth to -1
299                          * to ignore raytrace in shade_material_loop()
300                          * this could really use a refactor --Matt */
301                         if (shi->volume_depth == 0) {
302                                 tempdepth = shi->depth;
303                                 shi->depth = -1;
304                                 shade_material_loop(shi, shr);
305                                 shi->depth = tempdepth;
306                         } else {
307                                 shade_material_loop(shi, shr);
308                         }
309                 }
310                 /* raytrace likes to separate the spec color */
311                 VECSUB(shr->diff, shr->combined, shr->spec);
312         }       
313         
314         SWAP(int, osatex, shi->osatex);  // XXXXX!!!!
315
316 }
317
318 static int refraction(float *refract, float *n, float *view, float index)
319 {
320         float dot, fac;
321
322         VECCOPY(refract, view);
323         
324         dot= view[0]*n[0] + view[1]*n[1] + view[2]*n[2];
325
326         if(dot>0.0f) {
327                 index = 1.0f/index;
328                 fac= 1.0f - (1.0f - dot*dot)*index*index;
329                 if(fac<= 0.0f) return 0;
330                 fac= -dot*index + sqrt(fac);
331         }
332         else {
333                 fac= 1.0f - (1.0f - dot*dot)*index*index;
334                 if(fac<= 0.0f) return 0;
335                 fac= -dot*index - sqrt(fac);
336         }
337
338         refract[0]= index*view[0] + fac*n[0];
339         refract[1]= index*view[1] + fac*n[1];
340         refract[2]= index*view[2] + fac*n[2];
341
342         return 1;
343 }
344
345 /* orn = original face normal */
346 static void reflection(float *ref, float *n, float *view, float *orn)
347 {
348         float f1;
349         
350         f1= -2.0f*(n[0]*view[0]+ n[1]*view[1]+ n[2]*view[2]);
351         
352         ref[0]= (view[0]+f1*n[0]);
353         ref[1]= (view[1]+f1*n[1]);
354         ref[2]= (view[2]+f1*n[2]);
355
356         if(orn) {
357                 /* test phong normals, then we should prevent vector going to the back */
358                 f1= ref[0]*orn[0]+ ref[1]*orn[1]+ ref[2]*orn[2];
359                 if(f1>0.0f) {
360                         f1+= .01f;
361                         ref[0]-= f1*orn[0];
362                         ref[1]-= f1*orn[1];
363                         ref[2]-= f1*orn[2];
364                 }
365         }
366 }
367
368 #if 0
369 static void color_combine(float *result, float fac1, float fac2, float *col1, float *col2)
370 {
371         float col1t[3], col2t[3];
372         
373         col1t[0]= sqrt(col1[0]);
374         col1t[1]= sqrt(col1[1]);
375         col1t[2]= sqrt(col1[2]);
376         col2t[0]= sqrt(col2[0]);
377         col2t[1]= sqrt(col2[1]);
378         col2t[2]= sqrt(col2[2]);
379
380         result[0]= (fac1*col1t[0] + fac2*col2t[0]);
381         result[0]*= result[0];
382         result[1]= (fac1*col1t[1] + fac2*col2t[1]);
383         result[1]*= result[1];
384         result[2]= (fac1*col1t[2] + fac2*col2t[2]);
385         result[2]*= result[2];
386 }
387 #endif
388
389 static float shade_by_transmission(Isect *is, ShadeInput *shi, ShadeResult *shr)
390 {
391         float dx, dy, dz, d, p;
392
393         if (0 == (shi->mat->mode & MA_TRANSP))
394                 return -1;
395            
396         if (shi->mat->tx_limit <= 0.0f) {
397                 d= 1.0f;
398         } 
399         else {
400                 /* shi.co[] calculated by shade_ray() */
401                 dx= shi->co[0] - is->start[0];
402                 dy= shi->co[1] - is->start[1];
403                 dz= shi->co[2] - is->start[2];
404                 d= sqrt(dx*dx+dy*dy+dz*dz);
405                 if (d > shi->mat->tx_limit)
406                         d= shi->mat->tx_limit;
407
408                 p = shi->mat->tx_falloff;
409                 if(p < 0.0f) p= 0.0f;
410                 else if (p > 10.0f) p= 10.0f;
411
412                 shr->alpha *= pow(d, p);
413                 if (shr->alpha > 1.0f)
414                         shr->alpha= 1.0f;
415         }
416
417         return d;
418 }
419
420 static void ray_fadeout_endcolor(float *col, ShadeInput *origshi, ShadeInput *shi, ShadeResult *shr, Isect *isec, float *vec)
421 {
422         /* un-intersected rays get either rendered material color or sky color */
423         if (origshi->mat->fadeto_mir == MA_RAYMIR_FADETOMAT) {
424                 VECCOPY(col, shr->combined);
425         } else if (origshi->mat->fadeto_mir == MA_RAYMIR_FADETOSKY) {
426                 VECCOPY(shi->view, vec);
427                 Normalize(shi->view);
428                 
429                 shadeSkyView(col, isec->start, shi->view, NULL, shi->thread);
430                 shadeSunView(col, shi->view);
431         }
432 }
433
434 static void ray_fadeout(Isect *is, ShadeInput *shi, float *col, float *blendcol, float dist_mir)
435 {
436         /* if fading out, linear blend against fade color */
437         float blendfac;
438
439         blendfac = 1.0 - VecLenf(shi->co, is->start)/dist_mir;
440         
441         col[0] = col[0]*blendfac + (1.0 - blendfac)*blendcol[0];
442         col[1] = col[1]*blendfac + (1.0 - blendfac)*blendcol[1];
443         col[2] = col[2]*blendfac + (1.0 - blendfac)*blendcol[2];
444 }
445
446 /* the main recursive tracer itself */
447 static void traceray(ShadeInput *origshi, ShadeResult *origshr, short depth, float *start, float *vec, float *col, ObjectInstanceRen *obi, VlakRen *vlr, int traflag)
448 {
449         ShadeInput shi;
450         ShadeResult shr;
451         Isect isec;
452         float f, f1, fr, fg, fb;
453         float ref[3], maxsize=RE_ray_tree_max_size(R.raytree);
454         float dist_mir = origshi->mat->dist_mir;
455
456         /* Warning, This is not that nice, and possibly a bit slow for every ray,
457         however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */
458         memset(&shi, 0, sizeof(ShadeInput)); 
459         /* end warning! - Campbell */
460         
461         VECCOPY(isec.start, start);
462         if (dist_mir > 0.0) {
463                 isec.end[0]= start[0]+dist_mir*vec[0];
464                 isec.end[1]= start[1]+dist_mir*vec[1];
465                 isec.end[2]= start[2]+dist_mir*vec[2];
466         } else {
467                 isec.end[0]= start[0]+maxsize*vec[0];
468                 isec.end[1]= start[1]+maxsize*vec[1];
469                 isec.end[2]= start[2]+maxsize*vec[2];
470         }
471         isec.mode= RE_RAY_MIRROR;
472         isec.faceorig= (RayFace*)vlr;
473         isec.oborig= RAY_OBJECT_SET(&R, obi);
474
475         if(RE_ray_tree_intersect(R.raytree, &isec)) {
476                 float d= 1.0f;
477                 
478                 shi.mask= origshi->mask;
479                 shi.osatex= origshi->osatex;
480                 shi.depth= 1;                                   /* only used to indicate tracing */
481                 shi.thread= origshi->thread;
482                 //shi.sample= 0; // memset above, so dont need this
483                 shi.xs= origshi->xs;
484                 shi.ys= origshi->ys;
485                 shi.lay= origshi->lay;
486                 shi.passflag= SCE_PASS_COMBINED; /* result of tracing needs no pass info */
487                 shi.combinedflag= 0xFFFFFF;              /* ray trace does all options */
488                 //shi.do_preview= 0; // memset above, so dont need this
489                 shi.light_override= origshi->light_override;
490                 shi.mat_override= origshi->mat_override;
491                 
492                 memset(&shr, 0, sizeof(ShadeResult));
493                 
494                 shade_ray(&isec, &shi, &shr);
495                 if (traflag & RAY_TRA)
496                         d= shade_by_transmission(&isec, &shi, &shr);
497                 
498                 if(depth>0) {
499
500                         if((shi.mat->mode_l & MA_TRANSP) && shr.alpha < 1.0f) {
501                                 float nf, f, f1, refract[3], tracol[4];
502                                 
503                                 tracol[0]= shi.r;
504                                 tracol[1]= shi.g;
505                                 tracol[2]= shi.b;
506                                 tracol[3]= col[3];      // we pass on and accumulate alpha
507                                 
508                                 if((shi.mat->mode & MA_TRANSP) && (shi.mat->mode & MA_RAYTRANSP)) {
509                                         /* odd depths: use normal facing viewer, otherwise flip */
510                                         if(traflag & RAY_TRAFLIP) {
511                                                 float norm[3];
512                                                 norm[0]= - shi.vn[0];
513                                                 norm[1]= - shi.vn[1];
514                                                 norm[2]= - shi.vn[2];
515                                                 if (!refraction(refract, norm, shi.view, shi.ang))
516                                                         reflection(refract, norm, shi.view, shi.vn);
517                                         }
518                                         else {
519                                                 if (!refraction(refract, shi.vn, shi.view, shi.ang))
520                                                         reflection(refract, shi.vn, shi.view, shi.vn);
521                                         }
522                                         traflag |= RAY_TRA;
523                                         traceray(origshi, origshr, depth-1, shi.co, refract, tracol, shi.obi, shi.vlr, traflag ^ RAY_TRAFLIP);
524                                 }
525                                 else
526                                         traceray(origshi, origshr, depth-1, shi.co, shi.view, tracol, shi.obi, shi.vlr, 0);
527                                 
528                                 f= shr.alpha; f1= 1.0f-f;
529                                 nf= d * shi.mat->filter;
530                                 fr= 1.0f+ nf*(shi.r-1.0f);
531                                 fg= 1.0f+ nf*(shi.g-1.0f);
532                                 fb= 1.0f+ nf*(shi.b-1.0f);
533                                 shr.diff[0]= f*shr.diff[0] + f1*fr*tracol[0];
534                                 shr.diff[1]= f*shr.diff[1] + f1*fg*tracol[1];
535                                 shr.diff[2]= f*shr.diff[2] + f1*fb*tracol[2];
536                                 
537                                 shr.spec[0] *=f;
538                                 shr.spec[1] *=f;
539                                 shr.spec[2] *=f;
540
541                                 col[3]= f1*tracol[3] + f;
542                         }
543                         else 
544                                 col[3]= 1.0f;
545
546                         if(shi.mat->mode_l & MA_RAYMIRROR) {
547                                 f= shi.ray_mirror;
548                                 if(f!=0.0f) f*= fresnel_fac(shi.view, shi.vn, shi.mat->fresnel_mir_i, shi.mat->fresnel_mir);
549                         }
550                         else f= 0.0f;
551                         
552                         if(f!=0.0f) {
553                                 float mircol[4];
554                                 
555                                 reflection(ref, shi.vn, shi.view, NULL);                        
556                                 traceray(origshi, origshr, depth-1, shi.co, ref, mircol, shi.obi, shi.vlr, 0);
557                         
558                                 f1= 1.0f-f;
559
560                                 /* combine */
561                                 //color_combine(col, f*fr*(1.0f-shr.spec[0]), f1, col, shr.diff);
562                                 //col[0]+= shr.spec[0];
563                                 //col[1]+= shr.spec[1];
564                                 //col[2]+= shr.spec[2];
565                                 
566                                 fr= shi.mirr;
567                                 fg= shi.mirg;
568                                 fb= shi.mirb;
569                 
570                                 col[0]= f*fr*(1.0f-shr.spec[0])*mircol[0] + f1*shr.diff[0] + shr.spec[0];
571                                 col[1]= f*fg*(1.0f-shr.spec[1])*mircol[1] + f1*shr.diff[1] + shr.spec[1];
572                                 col[2]= f*fb*(1.0f-shr.spec[2])*mircol[2] + f1*shr.diff[2] + shr.spec[2];
573                         }
574                         else {
575                                 col[0]= shr.diff[0] + shr.spec[0];
576                                 col[1]= shr.diff[1] + shr.spec[1];
577                                 col[2]= shr.diff[2] + shr.spec[2];
578                         }
579                         
580                         if (dist_mir > 0.0) {
581                                 float blendcol[3];
582                                 
583                                 /* max ray distance set, but found an intersection, so fade this color
584                                  * out towards the sky/material color for a smooth transition */
585                                 ray_fadeout_endcolor(blendcol, origshi, &shi, origshr, &isec, vec);
586                                 ray_fadeout(&isec, &shi, col, blendcol, dist_mir);
587                         }
588                 }
589                 else {
590                         col[0]= shr.diff[0] + shr.spec[0];
591                         col[1]= shr.diff[1] + shr.spec[1];
592                         col[2]= shr.diff[2] + shr.spec[2];
593                 }
594                 
595         }
596         else {
597                 ray_fadeout_endcolor(col, origshi, &shi, origshr, &isec, vec);
598         }
599 }
600
601 /* **************** jitter blocks ********** */
602
603 /* calc distributed planar energy */
604
605 static void DP_energy(float *table, float *vec, int tot, float xsize, float ysize)
606 {
607         int x, y, a;
608         float *fp, force[3], result[3];
609         float dx, dy, dist, min;
610         
611         min= MIN2(xsize, ysize);
612         min*= min;
613         result[0]= result[1]= 0.0f;
614         
615         for(y= -1; y<2; y++) {
616                 dy= ysize*y;
617                 for(x= -1; x<2; x++) {
618                         dx= xsize*x;
619                         fp= table;
620                         for(a=0; a<tot; a++, fp+= 2) {
621                                 force[0]= vec[0] - fp[0]-dx;
622                                 force[1]= vec[1] - fp[1]-dy;
623                                 dist= force[0]*force[0] + force[1]*force[1];
624                                 if(dist < min && dist>0.0f) {
625                                         result[0]+= force[0]/dist;
626                                         result[1]+= force[1]/dist;
627                                 }
628                         }
629                 }
630         }
631         vec[0] += 0.1*min*result[0]/(float)tot;
632         vec[1] += 0.1*min*result[1]/(float)tot;
633         // cyclic clamping
634         vec[0]= vec[0] - xsize*floor(vec[0]/xsize + 0.5);
635         vec[1]= vec[1] - ysize*floor(vec[1]/ysize + 0.5);
636 }
637
638 // random offset of 1 in 2
639 static void jitter_plane_offset(float *jitter1, float *jitter2, int tot, float sizex, float sizey, float ofsx, float ofsy)
640 {
641         float dsizex= sizex*ofsx;
642         float dsizey= sizey*ofsy;
643         float hsizex= 0.5*sizex, hsizey= 0.5*sizey;
644         int x;
645         
646         for(x=tot; x>0; x--, jitter1+=2, jitter2+=2) {
647                 jitter2[0]= jitter1[0] + dsizex;
648                 jitter2[1]= jitter1[1] + dsizey;
649                 if(jitter2[0] > hsizex) jitter2[0]-= sizex;
650                 if(jitter2[1] > hsizey) jitter2[1]-= sizey;
651         }
652 }
653
654 /* called from convertBlenderScene.c */
655 /* we do this in advance to get consistant random, not alter the render seed, and be threadsafe */
656 void init_jitter_plane(LampRen *lar)
657 {
658         float *fp;
659         int x, iter=12, tot= lar->ray_totsamp;
660         
661         /* test if already initialized */
662         if(lar->jitter) return;
663         
664         /* at least 4, or max threads+1 tables */
665         if(BLENDER_MAX_THREADS < 4) x= 4;
666         else x= BLENDER_MAX_THREADS+1;
667         fp= lar->jitter= MEM_callocN(x*tot*2*sizeof(float), "lamp jitter tab");
668         
669         /* if 1 sample, we leave table to be zero's */
670         if(tot>1) {
671                 
672                 /* set per-lamp fixed seed */
673                 BLI_srandom(tot);
674                 
675                 /* fill table with random locations, area_size large */
676                 for(x=0; x<tot; x++, fp+=2) {
677                         fp[0]= (BLI_frand()-0.5)*lar->area_size;
678                         fp[1]= (BLI_frand()-0.5)*lar->area_sizey;
679                 }
680                 
681                 while(iter--) {
682                         fp= lar->jitter;
683                         for(x=tot; x>0; x--, fp+=2) {
684                                 DP_energy(lar->jitter, fp, tot, lar->area_size, lar->area_sizey);
685                         }
686                 }
687         }       
688         /* create the dithered tables (could just check lamp type!) */
689         jitter_plane_offset(lar->jitter, lar->jitter+2*tot, tot, lar->area_size, lar->area_sizey, 0.5f, 0.0f);
690         jitter_plane_offset(lar->jitter, lar->jitter+4*tot, tot, lar->area_size, lar->area_sizey, 0.5f, 0.5f);
691         jitter_plane_offset(lar->jitter, lar->jitter+6*tot, tot, lar->area_size, lar->area_sizey, 0.0f, 0.5f);
692 }
693
694 /* table around origin, -0.5*size to 0.5*size */
695 static float *give_jitter_plane(LampRen *lar, int thread, int xs, int ys)
696 {
697         int tot;
698         
699         tot= lar->ray_totsamp;
700                         
701         if(lar->ray_samp_type & LA_SAMP_JITTER) {
702                 /* made it threadsafe */
703                 
704                 if(lar->xold[thread]!=xs || lar->yold[thread]!=ys) {
705                         jitter_plane_offset(lar->jitter, lar->jitter+2*(thread+1)*tot, tot, lar->area_size, lar->area_sizey, BLI_thread_frand(thread), BLI_thread_frand(thread));
706                         lar->xold[thread]= xs; 
707                         lar->yold[thread]= ys;
708                 }
709                 return lar->jitter+2*(thread+1)*tot;
710         }
711         if(lar->ray_samp_type & LA_SAMP_DITHER) {
712                 return lar->jitter + 2*tot*((xs & 1)+2*(ys & 1));
713         }
714         
715         return lar->jitter;
716 }
717
718
719 /* **************** QMC sampling *************** */
720
721 static void halton_sample(double *ht_invprimes, double *ht_nums, double *v)
722 {
723         // incremental halton sequence generator, from:
724         // "Instant Radiosity", Keller A.
725         unsigned int i;
726         
727         for (i = 0; i < 2; i++)
728         {
729                 double r = fabs((1.0 - ht_nums[i]) - 1e-10);
730                 
731                 if (ht_invprimes[i] >= r)
732                 {
733                         double lasth;
734                         double h = ht_invprimes[i];
735                         
736                         do {
737                                 lasth = h;
738                                 h *= ht_invprimes[i];
739                         } while (h >= r);
740                         
741                         ht_nums[i] += ((lasth + h) - 1.0);
742                 }
743                 else
744                         ht_nums[i] += ht_invprimes[i];
745                 
746                 v[i] = (float)ht_nums[i];
747         }
748 }
749
750 /* Generate Hammersley points in [0,1)^2
751  * From Lucille renderer */
752 static void hammersley_create(double *out, int n)
753 {
754         double p, t;
755         int k, kk;
756
757         for (k = 0; k < n; k++) {
758                 t = 0;
759                 for (p = 0.5, kk = k; kk; p *= 0.5, kk >>= 1) {
760                         if (kk & 1) {           /* kk mod 2 = 1         */
761                                 t += p;
762                         }
763                 }
764         
765                 out[2 * k + 0] = (double)k / (double)n;
766                 out[2 * k + 1] = t;
767         }
768 }
769
770 static struct QMCSampler *QMC_initSampler(int type, int tot)
771 {       
772         QMCSampler *qsa = MEM_callocN(sizeof(QMCSampler), "qmc sampler");
773         qsa->samp2d = MEM_callocN(2*sizeof(double)*tot, "qmc sample table");
774
775         qsa->tot = tot;
776         qsa->type = type;
777         
778         if (qsa->type==SAMP_TYPE_HAMMERSLEY) 
779                 hammersley_create(qsa->samp2d, qsa->tot);
780                 
781         return qsa;
782 }
783
784 static void QMC_initPixel(QMCSampler *qsa, int thread)
785 {
786         if (qsa->type==SAMP_TYPE_HAMMERSLEY)
787         {
788                 /* hammersley sequence is fixed, already created in QMCSampler init.
789                  * per pixel, gets a random offset. We create separate offsets per thread, for write-safety */
790                 qsa->offs[thread][0] = 0.5 * BLI_thread_frand(thread);
791                 qsa->offs[thread][1] = 0.5 * BLI_thread_frand(thread);
792         }
793         else {  /* SAMP_TYPE_HALTON */
794                 
795                 /* generate a new randomised halton sequence per pixel
796                  * to alleviate qmc artifacts and make it reproducable 
797                  * between threads/frames */
798                 double ht_invprimes[2], ht_nums[2];
799                 double r[2];
800                 int i;
801         
802                 ht_nums[0] = BLI_thread_frand(thread);
803                 ht_nums[1] = BLI_thread_frand(thread);
804                 ht_invprimes[0] = 0.5;
805                 ht_invprimes[1] = 1.0/3.0;
806                 
807                 for (i=0; i< qsa->tot; i++) {
808                         halton_sample(ht_invprimes, ht_nums, r);
809                         qsa->samp2d[2*i+0] = r[0];
810                         qsa->samp2d[2*i+1] = r[1];
811                 }
812         }
813 }
814
815 static void QMC_freeSampler(QMCSampler *qsa)
816 {
817         MEM_freeN(qsa->samp2d);
818         MEM_freeN(qsa);
819 }
820
821 static void QMC_getSample(double *s, QMCSampler *qsa, int thread, int num)
822 {
823         if (qsa->type == SAMP_TYPE_HAMMERSLEY) {
824                 s[0] = fmod(qsa->samp2d[2*num+0] + qsa->offs[thread][0], 1.0f);
825                 s[1] = fmod(qsa->samp2d[2*num+1] + qsa->offs[thread][1], 1.0f);
826         }
827         else { /* SAMP_TYPE_HALTON */
828                 s[0] = qsa->samp2d[2*num+0];
829                 s[1] = qsa->samp2d[2*num+1];
830         }
831 }
832
833 /* phong weighted disc using 'blur' for exponent, centred on 0,0 */
834 static void QMC_samplePhong(float *vec, QMCSampler *qsa, int thread, int num, float blur)
835 {
836         double s[2];
837         float phi, pz, sqr;
838         
839         QMC_getSample(s, qsa, thread, num);
840
841         phi = s[0]*2*M_PI;
842         pz = pow(s[1], blur);
843         sqr = sqrt(1.0f-pz*pz);
844
845         vec[0] = cos(phi)*sqr;
846         vec[1] = sin(phi)*sqr;
847         vec[2] = 0.0f;
848 }
849
850 /* rect of edge lengths sizex, sizey, centred on 0.0,0.0 i.e. ranging from -sizex/2 to +sizey/2 */
851 static void QMC_sampleRect(float *vec, QMCSampler *qsa, int thread, int num, float sizex, float sizey)
852 {
853         double s[2];
854
855         QMC_getSample(s, qsa, thread, num);
856                 
857         vec[0] = (s[0] - 0.5) * sizex;
858         vec[1] = (s[1] - 0.5) * sizey;
859         vec[2] = 0.0f;
860 }
861
862 /* disc of radius 'radius', centred on 0,0 */
863 static void QMC_sampleDisc(float *vec, QMCSampler *qsa, int thread, int num, float radius)
864 {
865         double s[2];
866         float phi, sqr;
867         
868         QMC_getSample(s, qsa, thread, num);
869         
870         phi = s[0]*2*M_PI;
871         sqr = sqrt(s[1]);
872
873         vec[0] = cos(phi)*sqr* radius/2.0;
874         vec[1] = sin(phi)*sqr* radius/2.0;
875         vec[2] = 0.0f;
876 }
877
878 /* uniform hemisphere sampling */
879 static void QMC_sampleHemi(float *vec, QMCSampler *qsa, int thread, int num)
880 {
881         double s[2];
882         float phi, sqr;
883         
884         QMC_getSample(s, qsa, thread, num);
885         
886         phi = s[0]*2.f*M_PI;    
887         sqr = sqrt(s[1]);
888
889         vec[0] = cos(phi)*sqr;
890         vec[1] = sin(phi)*sqr;
891         vec[2] = 1.f - s[1]*s[1];
892 }
893
894 #if 0 /* currently not used */
895 /* cosine weighted hemisphere sampling */
896 static void QMC_sampleHemiCosine(float *vec, QMCSampler *qsa, int thread, int num)
897 {
898         double s[2];
899         float phi, sqr;
900         
901         QMC_getSample(s, qsa, thread, num);
902         
903         phi = s[0]*2.f*M_PI;    
904         sqr = s[1]*sqrt(2-s[1]*s[1]);
905
906         vec[0] = cos(phi)*sqr;
907         vec[1] = sin(phi)*sqr;
908         vec[2] = 1.f - s[1]*s[1];
909
910 }
911 #endif
912
913 /* called from convertBlenderScene.c */
914 void init_render_qmcsampler(Render *re)
915 {
916         re->qmcsamplers= MEM_callocN(sizeof(ListBase)*BLENDER_MAX_THREADS, "QMCListBase");
917 }
918
919 static QMCSampler *get_thread_qmcsampler(Render *re, int thread, int type, int tot)
920 {
921         QMCSampler *qsa;
922
923         /* create qmc samplers as needed, since recursion makes it hard to
924          * predict how many are needed */
925
926         for(qsa=re->qmcsamplers[thread].first; qsa; qsa=qsa->next) {
927                 if(qsa->type == type && qsa->tot == tot && !qsa->used) {
928                         qsa->used= 1;
929                         return qsa;
930                 }
931         }
932
933         qsa= QMC_initSampler(type, tot);
934         qsa->used= 1;
935         BLI_addtail(&re->qmcsamplers[thread], qsa);
936
937         return qsa;
938 }
939
940 static void release_thread_qmcsampler(Render *re, int thread, QMCSampler *qsa)
941 {
942         qsa->used= 0;
943 }
944
945 void free_render_qmcsampler(Render *re)
946 {
947         QMCSampler *qsa, *next;
948         int a;
949
950         if(re->qmcsamplers) {
951                 for(a=0; a<BLENDER_MAX_THREADS; a++) {
952                         for(qsa=re->qmcsamplers[a].first; qsa; qsa=next) {
953                                 next= qsa->next;
954                                 QMC_freeSampler(qsa);
955                         }
956
957                         re->qmcsamplers[a].first= re->qmcsamplers[a].last= NULL;
958                 }
959
960                 MEM_freeN(re->qmcsamplers);
961                 re->qmcsamplers= NULL;
962         }
963 }
964
965 static int adaptive_sample_variance(int samples, float *col, float *colsq, float thresh)
966 {
967         float var[3], mean[3];
968
969         /* scale threshold just to give a bit more precision in input rather than dealing with 
970          * tiny tiny numbers in the UI */
971         thresh /= 2;
972         
973         mean[0] = col[0] / (float)samples;
974         mean[1] = col[1] / (float)samples;
975         mean[2] = col[2] / (float)samples;
976
977         var[0] = (colsq[0] / (float)samples) - (mean[0]*mean[0]);
978         var[1] = (colsq[1] / (float)samples) - (mean[1]*mean[1]);
979         var[2] = (colsq[2] / (float)samples) - (mean[2]*mean[2]);
980         
981         if ((var[0] * 0.4 < thresh) && (var[1] * 0.3 < thresh) && (var[2] * 0.6 < thresh))
982                 return 1;
983         else
984                 return 0;
985 }
986
987 static int adaptive_sample_contrast_val(int samples, float prev, float val, float thresh)
988 {
989         /* if the last sample's contribution to the total value was below a small threshold
990          * (i.e. the samples taken are very similar), then taking more samples that are probably 
991          * going to be the same is wasting effort */
992         if (fabs( prev/(float)(samples-1) - val/(float)samples ) < thresh) {
993                 return 1;
994         } else
995                 return 0;
996 }
997
998 static float get_avg_speed(ShadeInput *shi)
999 {
1000         float pre_x, pre_y, post_x, post_y, speedavg;
1001         
1002         pre_x = (shi->winspeed[0] == PASS_VECTOR_MAX)?0.0:shi->winspeed[0];
1003         pre_y = (shi->winspeed[1] == PASS_VECTOR_MAX)?0.0:shi->winspeed[1];
1004         post_x = (shi->winspeed[2] == PASS_VECTOR_MAX)?0.0:shi->winspeed[2];
1005         post_y = (shi->winspeed[3] == PASS_VECTOR_MAX)?0.0:shi->winspeed[3];
1006         
1007         speedavg = (sqrt(pre_x*pre_x + pre_y*pre_y) + sqrt(post_x*post_x + post_y*post_y)) / 2.0;
1008         
1009         return speedavg;
1010 }
1011
1012 /* ***************** main calls ************** */
1013
1014
1015 static void trace_refract(float *col, ShadeInput *shi, ShadeResult *shr)
1016 {
1017         QMCSampler *qsa=NULL;
1018         int samp_type;
1019         
1020         float samp3d[3], orthx[3], orthy[3];
1021         float v_refract[3], v_refract_new[3];
1022         float sampcol[4], colsq[4];
1023         
1024         float blur = pow(1.0 - shi->mat->gloss_tra, 3);
1025         short max_samples = shi->mat->samp_gloss_tra;
1026         float adapt_thresh = shi->mat->adapt_thresh_tra;
1027         
1028         int samples=0;
1029         
1030         colsq[0] = colsq[1] = colsq[2] = 0.0;
1031         col[0] = col[1] = col[2] = 0.0;
1032         col[3]= shr->alpha;
1033         
1034         if (blur > 0.0) {
1035                 if (adapt_thresh != 0.0) samp_type = SAMP_TYPE_HALTON;
1036                 else samp_type = SAMP_TYPE_HAMMERSLEY;
1037                         
1038                 /* all samples are generated per pixel */
1039                 qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples);
1040                 QMC_initPixel(qsa, shi->thread);
1041         } else 
1042                 max_samples = 1;
1043         
1044
1045         while (samples < max_samples) {         
1046                 refraction(v_refract, shi->vn, shi->view, shi->ang);
1047                 
1048                 if (max_samples > 1) {
1049                         /* get a quasi-random vector from a phong-weighted disc */
1050                         QMC_samplePhong(samp3d, qsa, shi->thread, samples, blur);
1051                                                 
1052                         VecOrthoBasisf(v_refract, orthx, orthy);
1053                         VecMulf(orthx, samp3d[0]);
1054                         VecMulf(orthy, samp3d[1]);
1055                                 
1056                         /* and perturb the refraction vector in it */
1057                         VecAddf(v_refract_new, v_refract, orthx);
1058                         VecAddf(v_refract_new, v_refract_new, orthy);
1059                         
1060                         Normalize(v_refract_new);
1061                 } else {
1062                         /* no blurriness, use the original normal */
1063                         VECCOPY(v_refract_new, v_refract);
1064                 }
1065         
1066                 traceray(shi, shr, shi->mat->ray_depth_tra, shi->co, v_refract_new, sampcol, shi->obi, shi->vlr, RAY_TRA|RAY_TRAFLIP);
1067         
1068                 col[0] += sampcol[0];
1069                 col[1] += sampcol[1];
1070                 col[2] += sampcol[2];
1071                 col[3] += sampcol[3];
1072                 
1073                 /* for variance calc */
1074                 colsq[0] += sampcol[0]*sampcol[0];
1075                 colsq[1] += sampcol[1]*sampcol[1];
1076                 colsq[2] += sampcol[2]*sampcol[2];
1077                 
1078                 samples++;
1079                 
1080                 /* adaptive sampling */
1081                 if (adapt_thresh < 1.0 && samples > max_samples/2) 
1082                 {
1083                         if (adaptive_sample_variance(samples, col, colsq, adapt_thresh))
1084                                 break;
1085                         
1086                         /* if the pixel so far is very dark, we can get away with less samples */
1087                         if ( (col[0] + col[1] + col[2])/3.0/(float)samples < 0.01 )
1088                                 max_samples--;
1089                 }
1090         }
1091         
1092         col[0] /= (float)samples;
1093         col[1] /= (float)samples;
1094         col[2] /= (float)samples;
1095         col[3] /= (float)samples;
1096         
1097         if (qsa)
1098                 release_thread_qmcsampler(&R, shi->thread, qsa);
1099 }
1100
1101 static void trace_reflect(float *col, ShadeInput *shi, ShadeResult *shr, float fresnelfac)
1102 {
1103         QMCSampler *qsa=NULL;
1104         int samp_type;
1105         
1106         float samp3d[3], orthx[3], orthy[3];
1107         float v_nor_new[3], v_reflect[3];
1108         float sampcol[4], colsq[4];
1109                 
1110         float blur = pow(1.0 - shi->mat->gloss_mir, 3);
1111         short max_samples = shi->mat->samp_gloss_mir;
1112         float adapt_thresh = shi->mat->adapt_thresh_mir;
1113         float aniso = 1.0 - shi->mat->aniso_gloss_mir;
1114         
1115         int samples=0;
1116         
1117         col[0] = col[1] = col[2] = 0.0;
1118         colsq[0] = colsq[1] = colsq[2] = 0.0;
1119         
1120         if (blur > 0.0) {
1121                 if (adapt_thresh != 0.0) samp_type = SAMP_TYPE_HALTON;
1122                 else samp_type = SAMP_TYPE_HAMMERSLEY;
1123                         
1124                 /* all samples are generated per pixel */
1125                 qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples);
1126                 QMC_initPixel(qsa, shi->thread);
1127         } else 
1128                 max_samples = 1;
1129         
1130         while (samples < max_samples) {
1131                                 
1132                 if (max_samples > 1) {
1133                         /* get a quasi-random vector from a phong-weighted disc */
1134                         QMC_samplePhong(samp3d, qsa, shi->thread, samples, blur);
1135
1136                         /* find the normal's perpendicular plane, blurring along tangents
1137                          * if tangent shading enabled */
1138                         if (shi->mat->mode & (MA_TANGENT_V)) {
1139                                 Crossf(orthx, shi->vn, shi->tang);      // bitangent
1140                                 VECCOPY(orthy, shi->tang);
1141                                 VecMulf(orthx, samp3d[0]);
1142                                 VecMulf(orthy, samp3d[1]*aniso);
1143                         } else {
1144                                 VecOrthoBasisf(shi->vn, orthx, orthy);
1145                                 VecMulf(orthx, samp3d[0]);
1146                                 VecMulf(orthy, samp3d[1]);
1147                         }
1148
1149                         /* and perturb the normal in it */
1150                         VecAddf(v_nor_new, shi->vn, orthx);
1151                         VecAddf(v_nor_new, v_nor_new, orthy);
1152                         Normalize(v_nor_new);
1153                 } else {
1154                         /* no blurriness, use the original normal */
1155                         VECCOPY(v_nor_new, shi->vn);
1156                 }
1157                 
1158                 if((shi->vlr->flag & R_SMOOTH)) 
1159                         reflection(v_reflect, v_nor_new, shi->view, shi->facenor);
1160                 else
1161                         reflection(v_reflect, v_nor_new, shi->view, NULL);
1162                 
1163                 traceray(shi, shr, shi->mat->ray_depth, shi->co, v_reflect, sampcol, shi->obi, shi->vlr, 0);
1164
1165                 
1166                 col[0] += sampcol[0];
1167                 col[1] += sampcol[1];
1168                 col[2] += sampcol[2];
1169         
1170                 /* for variance calc */
1171                 colsq[0] += sampcol[0]*sampcol[0];
1172                 colsq[1] += sampcol[1]*sampcol[1];
1173                 colsq[2] += sampcol[2]*sampcol[2];
1174                 
1175                 samples++;
1176
1177                 /* adaptive sampling */
1178                 if (adapt_thresh > 0.0 && samples > max_samples/3) 
1179                 {
1180                         if (adaptive_sample_variance(samples, col, colsq, adapt_thresh))
1181                                 break;
1182                         
1183                         /* if the pixel so far is very dark, we can get away with less samples */
1184                         if ( (col[0] + col[1] + col[2])/3.0/(float)samples < 0.01 )
1185                                 max_samples--;
1186                 
1187                         /* reduce samples when reflection is dim due to low ray mirror blend value or fresnel factor
1188                          * and when reflection is blurry */
1189                         if (fresnelfac < 0.1 * (blur+1)) {
1190                                 max_samples--;
1191                                 
1192                                 /* even more for very dim */
1193                                 if (fresnelfac < 0.05 * (blur+1)) 
1194                                         max_samples--;
1195                         }
1196                 }
1197         }
1198         
1199         col[0] /= (float)samples;
1200         col[1] /= (float)samples;
1201         col[2] /= (float)samples;
1202         
1203         if (qsa)
1204                 release_thread_qmcsampler(&R, shi->thread, qsa);
1205 }
1206
1207 /* extern call from render loop */
1208 void ray_trace(ShadeInput *shi, ShadeResult *shr)
1209 {
1210         float i, f, f1, fr, fg, fb;
1211         float mircol[4], tracol[4];
1212         float diff[3];
1213         int do_tra, do_mir;
1214         
1215         do_tra= ((shi->mat->mode & MA_TRANSP) && (shi->mat->mode & MA_RAYTRANSP) && shr->alpha!=1.0f);
1216         do_mir= ((shi->mat->mode & MA_RAYMIRROR) && shi->ray_mirror!=0.0f);
1217
1218         
1219         /* raytrace mirror amd refract like to separate the spec color */
1220         if(shi->combinedflag & SCE_PASS_SPEC)
1221                 VECSUB(diff, shr->combined, shr->spec) /* no ; */
1222         else
1223                 VECCOPY(diff, shr->combined);
1224         
1225         if(do_tra) {
1226                 float olddiff[3];
1227                 
1228                 trace_refract(tracol, shi, shr);
1229                 
1230                 f= shr->alpha; f1= 1.0f-f;
1231                 fr= 1.0f+ shi->mat->filter*(shi->r-1.0f);
1232                 fg= 1.0f+ shi->mat->filter*(shi->g-1.0f);
1233                 fb= 1.0f+ shi->mat->filter*(shi->b-1.0f);
1234                 
1235                 /* for refract pass */
1236                 VECCOPY(olddiff, diff);
1237                 
1238                 diff[0]= f*diff[0] + f1*fr*tracol[0];
1239                 diff[1]= f*diff[1] + f1*fg*tracol[1];
1240                 diff[2]= f*diff[2] + f1*fb*tracol[2];
1241                 
1242                 if(shi->passflag & SCE_PASS_REFRACT)
1243                         VECSUB(shr->refr, diff, olddiff);
1244                 
1245                 if(!(shi->combinedflag & SCE_PASS_REFRACT))
1246                         VECSUB(diff, diff, shr->refr);
1247                 
1248                 shr->alpha= tracol[3];
1249         }
1250         
1251         if(do_mir) {
1252         
1253                 i= shi->ray_mirror*fresnel_fac(shi->view, shi->vn, shi->mat->fresnel_mir_i, shi->mat->fresnel_mir);
1254                 if(i!=0.0f) {
1255                 
1256                         trace_reflect(mircol, shi, shr, i);
1257                         
1258                         fr= i*shi->mirr;
1259                         fg= i*shi->mirg;
1260                         fb= i*shi->mirb;
1261
1262                         if(shi->passflag & SCE_PASS_REFLECT) {
1263                                 /* mirror pass is not blocked out with spec */
1264                                 shr->refl[0]= fr*mircol[0] - fr*diff[0];
1265                                 shr->refl[1]= fg*mircol[1] - fg*diff[1];
1266                                 shr->refl[2]= fb*mircol[2] - fb*diff[2];
1267                         }
1268                         
1269                         if(shi->combinedflag & SCE_PASS_REFLECT) {
1270                                 /* values in shr->spec can be greater then 1.0.
1271                                  * In this case the mircol uses a zero blending factor, so ignoring it is ok.
1272                                  * Fixes bug #18837 - when the spec is higher then 1.0,
1273                                  * diff can become a negative color - Campbell  */
1274                                 
1275                                 f1= 1.0f-i;
1276                                 
1277                                 diff[0] *= f1;
1278                                 diff[1] *= f1;
1279                                 diff[2] *= f1;
1280                                 
1281                                 if(shr->spec[0]<1.0f)   diff[0] += mircol[0] * (fr*(1.0f-shr->spec[0]));
1282                                 if(shr->spec[1]<1.0f)   diff[1] += mircol[1] * (fg*(1.0f-shr->spec[1]));
1283                                 if(shr->spec[2]<1.0f)   diff[2] += mircol[2] * (fb*(1.0f-shr->spec[2]));
1284                         }
1285                 }
1286         }
1287         /* put back together */
1288         if(shi->combinedflag & SCE_PASS_SPEC)
1289                 VECADD(shr->combined, diff, shr->spec) /* no ; */
1290         else
1291                 VECCOPY(shr->combined, diff);
1292 }
1293
1294 /* color 'shadfac' passes through 'col' with alpha and filter */
1295 /* filter is only applied on alpha defined transparent part */
1296 static void addAlphaLight(float *shadfac, float *col, float alpha, float filter)
1297 {
1298         float fr, fg, fb;
1299         
1300         fr= 1.0f+ filter*(col[0]-1.0f);
1301         fg= 1.0f+ filter*(col[1]-1.0f);
1302         fb= 1.0f+ filter*(col[2]-1.0f);
1303         
1304         shadfac[0]= alpha*col[0] + fr*(1.0f-alpha)*shadfac[0];
1305         shadfac[1]= alpha*col[1] + fg*(1.0f-alpha)*shadfac[1];
1306         shadfac[2]= alpha*col[2] + fb*(1.0f-alpha)*shadfac[2];
1307         
1308         shadfac[3]= (1.0f-alpha)*shadfac[3];
1309 }
1310
1311 static void ray_trace_shadow_tra(Isect *is, ShadeInput *origshi, int depth, int traflag)
1312 {
1313         /* ray to lamp, find first face that intersects, check alpha properties,
1314            if it has col[3]>0.0f  continue. so exit when alpha is full */
1315         ShadeInput shi;
1316         ShadeResult shr;
1317
1318         if(RE_ray_tree_intersect(R.raytree, is)) {
1319                 float d= 1.0f;
1320                 /* we got a face */
1321                 
1322                 /* Warning, This is not that nice, and possibly a bit slow for every ray,
1323                 however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */
1324                 memset(&shi, 0, sizeof(ShadeInput)); 
1325                 /* end warning! - Campbell */
1326                 
1327                 shi.depth= 1;                                   /* only used to indicate tracing */
1328                 shi.mask= origshi->mask;
1329                 shi.thread= origshi->thread;
1330                 shi.passflag= SCE_PASS_COMBINED;
1331                 shi.combinedflag= 0xFFFFFF;              /* ray trace does all options */
1332         
1333                 shi.xs= origshi->xs;
1334                 shi.ys= origshi->ys;
1335                 shi.lay= origshi->lay;
1336                 shi.nodes= origshi->nodes;
1337                 
1338                 shade_ray(is, &shi, &shr);
1339                 if (shi.mat->material_type == MA_TYPE_SURFACE) {
1340                         if (traflag & RAY_TRA)
1341                                 d= shade_by_transmission(is, &shi, &shr);
1342                         
1343                         /* mix colors based on shadfac (rgb + amount of light factor) */
1344                         addAlphaLight(is->col, shr.diff, shr.alpha, d*shi.mat->filter);
1345                 } else if (shi.mat->material_type == MA_TYPE_VOLUME) {
1346                         addAlphaLight(is->col, shr.combined, shr.alpha, 1.0f);
1347                 }
1348                 
1349                 if(depth>0 && is->col[3]>0.0f) {
1350                         
1351                         /* adapt isect struct */
1352                         VECCOPY(is->start, shi.co);
1353                         is->oborig= RAY_OBJECT_SET(&R, shi.obi);
1354                         is->faceorig= (RayFace*)shi.vlr;
1355
1356                         ray_trace_shadow_tra(is, origshi, depth-1, traflag | RAY_TRA);
1357                 }
1358         }
1359 }
1360
1361 /* not used, test function for ambient occlusion (yaf: pathlight) */
1362 /* main problem; has to be called within shading loop, giving unwanted recursion */
1363 int ray_trace_shadow_rad(ShadeInput *ship, ShadeResult *shr)
1364 {
1365         static int counter=0, only_one= 0;
1366         extern float hashvectf[];
1367         Isect isec;
1368         ShadeInput shi;
1369         ShadeResult shr_t;
1370         float vec[3], accum[3], div= 0.0f, maxsize= RE_ray_tree_max_size(R.raytree);
1371         int a;
1372         
1373         if(only_one) {
1374                 return 0;
1375         }
1376         only_one= 1;
1377         
1378         accum[0]= accum[1]= accum[2]= 0.0f;
1379         isec.mode= RE_RAY_MIRROR;
1380         isec.faceorig= (RayFace*)ship->vlr;
1381         isec.oborig= RAY_OBJECT_SET(&R, ship->obi);
1382         
1383         for(a=0; a<8*8; a++) {
1384                 
1385                 counter+=3;
1386                 counter %= 768;
1387                 VECCOPY(vec, hashvectf+counter);
1388                 if(ship->vn[0]*vec[0]+ship->vn[1]*vec[1]+ship->vn[2]*vec[2]>0.0f) {
1389                         vec[0]-= vec[0];
1390                         vec[1]-= vec[1];
1391                         vec[2]-= vec[2];
1392                 }
1393                 VECCOPY(isec.start, ship->co);
1394                 isec.end[0]= isec.start[0] + maxsize*vec[0];
1395                 isec.end[1]= isec.start[1] + maxsize*vec[1];
1396                 isec.end[2]= isec.start[2] + maxsize*vec[2];
1397                 
1398                 if(RE_ray_tree_intersect(R.raytree, &isec)) {
1399                         float fac;
1400                         
1401                         /* Warning, This is not that nice, and possibly a bit slow for every ray,
1402                         however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */
1403                         memset(&shi, 0, sizeof(ShadeInput)); 
1404                         /* end warning! - Campbell */
1405                         
1406                         shade_ray(&isec, &shi, &shr_t);
1407                         fac= isec.labda*isec.labda;
1408                         fac= 1.0f;
1409                         accum[0]+= fac*(shr_t.diff[0]+shr_t.spec[0]);
1410                         accum[1]+= fac*(shr_t.diff[1]+shr_t.spec[1]);
1411                         accum[2]+= fac*(shr_t.diff[2]+shr_t.spec[2]);
1412                         div+= fac;
1413                 }
1414                 else div+= 1.0f;
1415         }
1416         
1417         if(div!=0.0f) {
1418                 shr->diff[0]+= accum[0]/div;
1419                 shr->diff[1]+= accum[1]/div;
1420                 shr->diff[2]+= accum[2]/div;
1421         }
1422         shr->alpha= 1.0f;
1423         
1424         only_one= 0;
1425         return 1;
1426 }
1427
1428 /* aolight: function to create random unit sphere vectors for total random sampling */
1429 static void RandomSpherical(float *v)
1430 {
1431         float r;
1432         v[2] = 2.f*BLI_frand()-1.f;
1433         if ((r = 1.f - v[2]*v[2])>0.f) {
1434                 float a = 6.283185307f*BLI_frand();
1435                 r = sqrt(r);
1436                 v[0] = r * cos(a);
1437                 v[1] = r * sin(a);
1438         }
1439         else v[2] = 1.f;
1440 }
1441
1442 /* calc distributed spherical energy */
1443 static void DS_energy(float *sphere, int tot, float *vec)
1444 {
1445         float *fp, fac, force[3], res[3];
1446         int a;
1447         
1448         res[0]= res[1]= res[2]= 0.0f;
1449         
1450         for(a=0, fp=sphere; a<tot; a++, fp+=3) {
1451                 VecSubf(force, vec, fp);
1452                 fac= force[0]*force[0] + force[1]*force[1] + force[2]*force[2];
1453                 if(fac!=0.0f) {
1454                         fac= 1.0f/fac;
1455                         res[0]+= fac*force[0];
1456                         res[1]+= fac*force[1];
1457                         res[2]+= fac*force[2];
1458                 }
1459         }
1460
1461         VecMulf(res, 0.5);
1462         VecAddf(vec, vec, res);
1463         Normalize(vec);
1464         
1465 }
1466
1467 /* called from convertBlenderScene.c */
1468 /* creates an equally distributed spherical sample pattern */
1469 /* and allocates threadsafe memory */
1470 void init_ao_sphere(World *wrld)
1471 {
1472         float *fp;
1473         int a, tot, iter= 16;
1474
1475         /* we make twice the amount of samples, because only a hemisphere is used */
1476         tot= 2*wrld->aosamp*wrld->aosamp;
1477         
1478         wrld->aosphere= MEM_mallocN(3*tot*sizeof(float), "AO sphere");
1479         
1480         /* fixed random */
1481         BLI_srandom(tot);
1482         
1483         /* init */
1484         fp= wrld->aosphere;
1485         for(a=0; a<tot; a++, fp+= 3) {
1486                 RandomSpherical(fp);
1487         }
1488         
1489         while(iter--) {
1490                 for(a=0, fp= wrld->aosphere; a<tot; a++, fp+= 3) {
1491                         DS_energy(wrld->aosphere, tot, fp);
1492                 }
1493         }
1494         
1495         /* tables */
1496         wrld->aotables= MEM_mallocN(BLENDER_MAX_THREADS*3*tot*sizeof(float), "AO tables");
1497 }
1498
1499 /* give per thread a table, we have to compare xs ys because of way OSA works... */
1500 static float *threadsafe_table_sphere(int test, int thread, int xs, int ys, int tot)
1501 {
1502         static int xso[BLENDER_MAX_THREADS], yso[BLENDER_MAX_THREADS];
1503         static int firsttime= 1;
1504         
1505         if(firsttime) {
1506                 memset(xso, 255, sizeof(xso));
1507                 memset(yso, 255, sizeof(yso));
1508                 firsttime= 0;
1509         }
1510         
1511         if(xs==xso[thread] && ys==yso[thread]) return R.wrld.aotables+ thread*tot*3;
1512         if(test) return NULL;
1513         xso[thread]= xs; yso[thread]= ys;
1514         return R.wrld.aotables+ thread*tot*3;
1515 }
1516
1517 static float *sphere_sampler(int type, int resol, int thread, int xs, int ys)
1518 {
1519         int tot;
1520         float *vec;
1521         
1522         tot= 2*resol*resol;
1523
1524         if (type & WO_AORNDSMP) {
1525                 float *sphere;
1526                 int a;
1527                 
1528                 // always returns table
1529                 sphere= threadsafe_table_sphere(0, thread, xs, ys, tot);
1530
1531                 /* total random sampling. NOT THREADSAFE! (should be removed, is not useful) */
1532                 vec= sphere;
1533                 for (a=0; a<tot; a++, vec+=3) {
1534                         RandomSpherical(vec);
1535                 }
1536                 
1537                 return sphere;
1538         } 
1539         else {
1540                 float *sphere;
1541                 float cosfi, sinfi, cost, sint;
1542                 float ang, *vec1;
1543                 int a;
1544                 
1545                 // returns table if xs and ys were equal to last call
1546                 sphere= threadsafe_table_sphere(1, thread, xs, ys, tot);
1547                 if(sphere==NULL) {
1548                         sphere= threadsafe_table_sphere(0, thread, xs, ys, tot);
1549                         
1550                         // random rotation
1551                         ang= BLI_thread_frand(thread);
1552                         sinfi= sin(ang); cosfi= cos(ang);
1553                         ang= BLI_thread_frand(thread);
1554                         sint= sin(ang); cost= cos(ang);
1555                         
1556                         vec= R.wrld.aosphere;
1557                         vec1= sphere;
1558                         for (a=0; a<tot; a++, vec+=3, vec1+=3) {
1559                                 vec1[0]= cost*cosfi*vec[0] - sinfi*vec[1] + sint*cosfi*vec[2];
1560                                 vec1[1]= cost*sinfi*vec[0] + cosfi*vec[1] + sint*sinfi*vec[2];
1561                                 vec1[2]= -sint*vec[0] + cost*vec[2];                    
1562                         }
1563                 }
1564                 return sphere;
1565         }
1566 }
1567
1568 static void ray_ao_qmc(ShadeInput *shi, float *shadfac)
1569 {
1570         Isect isec;
1571         QMCSampler *qsa=NULL;
1572         float samp3d[3];
1573         float up[3], side[3], dir[3], nrm[3];
1574         
1575         float maxdist = R.wrld.aodist;
1576         float fac=0.0f, prev=0.0f;
1577         float adapt_thresh = R.wrld.ao_adapt_thresh;
1578         float adapt_speed_fac = R.wrld.ao_adapt_speed_fac;
1579         
1580         int samples=0;
1581         int max_samples = R.wrld.aosamp*R.wrld.aosamp;
1582         
1583         float dxyview[3], skyadded=0, div;
1584         int aocolor;
1585         
1586         isec.faceorig= (RayFace*)shi->vlr;
1587         isec.oborig= RAY_OBJECT_SET(&R, shi->obi);
1588         isec.face_last= NULL;
1589         isec.ob_last= 0;
1590         isec.mode= (R.wrld.aomode & WO_AODIST)?RE_RAY_SHADOW_TRA:RE_RAY_SHADOW;
1591         isec.lay= -1;
1592         
1593         shadfac[0]= shadfac[1]= shadfac[2]= 0.0f;
1594         
1595         /* prevent sky colors to be added for only shadow (shadow becomes alpha) */
1596         aocolor= R.wrld.aocolor;
1597         if(shi->mat->mode & MA_ONLYSHADOW)
1598                 aocolor= WO_AOPLAIN;
1599         
1600         if(aocolor == WO_AOSKYTEX) {
1601                 dxyview[0]= 1.0f/(float)R.wrld.aosamp;
1602                 dxyview[1]= 1.0f/(float)R.wrld.aosamp;
1603                 dxyview[2]= 0.0f;
1604         }
1605         
1606         if(shi->vlr->flag & R_SMOOTH) {
1607                 VECCOPY(nrm, shi->vn);
1608         }
1609         else {
1610                 VECCOPY(nrm, shi->facenor);
1611         }
1612         
1613         VecOrthoBasisf(nrm, up, side);
1614         
1615         /* sampling init */
1616         if (R.wrld.ao_samp_method==WO_AOSAMP_HALTON) {
1617                 float speedfac;
1618                 
1619                 speedfac = get_avg_speed(shi) * adapt_speed_fac;
1620                 CLAMP(speedfac, 1.0, 1000.0);
1621                 max_samples /= speedfac;
1622                 if (max_samples < 5) max_samples = 5;
1623                 
1624                 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples);
1625         } else if (R.wrld.ao_samp_method==WO_AOSAMP_HAMMERSLEY)
1626                 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples);
1627
1628         QMC_initPixel(qsa, shi->thread);
1629         
1630         while (samples < max_samples) {
1631
1632                 /* sampling, returns quasi-random vector in unit hemisphere */
1633                 QMC_sampleHemi(samp3d, qsa, shi->thread, samples);
1634
1635                 dir[0] = (samp3d[0]*up[0] + samp3d[1]*side[0] + samp3d[2]*nrm[0]);
1636                 dir[1] = (samp3d[0]*up[1] + samp3d[1]*side[1] + samp3d[2]*nrm[1]);
1637                 dir[2] = (samp3d[0]*up[2] + samp3d[1]*side[2] + samp3d[2]*nrm[2]);
1638                 
1639                 Normalize(dir);
1640                         
1641                 VECCOPY(isec.start, shi->co);
1642                 isec.end[0] = shi->co[0] - maxdist*dir[0];
1643                 isec.end[1] = shi->co[1] - maxdist*dir[1];
1644                 isec.end[2] = shi->co[2] - maxdist*dir[2];
1645                 
1646                 prev = fac;
1647                 
1648                 if(RE_ray_tree_intersect_check(R.raytree, &isec, vlr_check_intersect_solid)) {
1649                         if (R.wrld.aomode & WO_AODIST) fac+= exp(-isec.labda*R.wrld.aodistfac); 
1650                         else fac+= 1.0f;
1651                 }
1652                 else if(aocolor!=WO_AOPLAIN) {
1653                         float skycol[4];
1654                         float skyfac, view[3];
1655                         
1656                         view[0]= -dir[0];
1657                         view[1]= -dir[1];
1658                         view[2]= -dir[2];
1659                         Normalize(view);
1660                         
1661                         if(aocolor==WO_AOSKYCOL) {
1662                                 skyfac= 0.5*(1.0f+view[0]*R.grvec[0]+ view[1]*R.grvec[1]+ view[2]*R.grvec[2]);
1663                                 shadfac[0]+= (1.0f-skyfac)*R.wrld.horr + skyfac*R.wrld.zenr;
1664                                 shadfac[1]+= (1.0f-skyfac)*R.wrld.horg + skyfac*R.wrld.zeng;
1665                                 shadfac[2]+= (1.0f-skyfac)*R.wrld.horb + skyfac*R.wrld.zenb;
1666                         }
1667                         else {  /* WO_AOSKYTEX */
1668                                 shadeSkyView(skycol, isec.start, view, dxyview, shi->thread);
1669                                 shadeSunView(skycol, shi->view);
1670                                 shadfac[0]+= skycol[0];
1671                                 shadfac[1]+= skycol[1];
1672                                 shadfac[2]+= skycol[2];
1673                         }
1674                         skyadded++;
1675                 }
1676                 
1677                 samples++;
1678                 
1679                 if (qsa->type == SAMP_TYPE_HALTON) {
1680                         /* adaptive sampling - consider samples below threshold as in shadow (or vice versa) and exit early */          
1681                         if (adapt_thresh > 0.0 && (samples > max_samples/2) ) {
1682                                 
1683                                 if (adaptive_sample_contrast_val(samples, prev, fac, adapt_thresh)) {
1684                                         break;
1685                                 }
1686                         }
1687                 }
1688         }
1689         
1690         if(aocolor!=WO_AOPLAIN && skyadded) {
1691                 div= (1.0f - fac/(float)samples)/((float)skyadded);
1692                 
1693                 shadfac[0]*= div;       // average color times distances/hits formula
1694                 shadfac[1]*= div;       // average color times distances/hits formula
1695                 shadfac[2]*= div;       // average color times distances/hits formula
1696         } else {
1697                 shadfac[0]= shadfac[1]= shadfac[2]= 1.0f - fac/(float)samples;
1698         }
1699         
1700         if (qsa)
1701                 release_thread_qmcsampler(&R, shi->thread, qsa);
1702 }
1703
1704 /* extern call from shade_lamp_loop, ambient occlusion calculus */
1705 static void ray_ao_spheresamp(ShadeInput *shi, float *shadfac)
1706 {
1707         Isect isec;
1708         float *vec, *nrm, div, bias, sh=0.0f;
1709         float maxdist = R.wrld.aodist;
1710         float dxyview[3];
1711         int j= -1, tot, actual=0, skyadded=0, aocolor, resol= R.wrld.aosamp;
1712         
1713         isec.faceorig= (RayFace*)shi->vlr;
1714         isec.oborig= RAY_OBJECT_SET(&R, shi->obi);
1715         isec.face_last= NULL;
1716         isec.ob_last= 0;
1717         isec.mode= (R.wrld.aomode & WO_AODIST)?RE_RAY_SHADOW_TRA:RE_RAY_SHADOW;
1718         isec.lay= -1;
1719
1720
1721         shadfac[0]= shadfac[1]= shadfac[2]= 0.0f;
1722
1723         /* bias prevents smoothed faces to appear flat */
1724         if(shi->vlr->flag & R_SMOOTH) {
1725                 bias= R.wrld.aobias;
1726                 nrm= shi->vn;
1727         }
1728         else {
1729                 bias= 0.0f;
1730                 nrm= shi->facenor;
1731         }
1732
1733         /* prevent sky colors to be added for only shadow (shadow becomes alpha) */
1734         aocolor= R.wrld.aocolor;
1735         if(shi->mat->mode & MA_ONLYSHADOW)
1736                 aocolor= WO_AOPLAIN;
1737         
1738         if(resol>32) resol= 32;
1739         
1740         vec= sphere_sampler(R.wrld.aomode, resol, shi->thread, shi->xs, shi->ys);
1741         
1742         // warning: since we use full sphere now, and dotproduct is below, we do twice as much
1743         tot= 2*resol*resol;
1744
1745         if(aocolor == WO_AOSKYTEX) {
1746                 dxyview[0]= 1.0f/(float)resol;
1747                 dxyview[1]= 1.0f/(float)resol;
1748                 dxyview[2]= 0.0f;
1749         }
1750         
1751         while(tot--) {
1752                 
1753                 if ((vec[0]*nrm[0] + vec[1]*nrm[1] + vec[2]*nrm[2]) > bias) {
1754                         /* only ao samples for mask */
1755                         if(R.r.mode & R_OSA) {
1756                                 j++;
1757                                 if(j==R.osa) j= 0;
1758                                 if(!(shi->mask & (1<<j))) {
1759                                         vec+=3;
1760                                         continue;
1761                                 }
1762                         }
1763                         
1764                         actual++;
1765                         
1766                         /* always set start/end, RE_ray_tree_intersect clips it */
1767                         VECCOPY(isec.start, shi->co);
1768                         isec.end[0] = shi->co[0] - maxdist*vec[0];
1769                         isec.end[1] = shi->co[1] - maxdist*vec[1];
1770                         isec.end[2] = shi->co[2] - maxdist*vec[2];
1771                         
1772                         /* do the trace */
1773                         if(RE_ray_tree_intersect_check(R.raytree, &isec, vlr_check_intersect_solid)) {
1774                                 if (R.wrld.aomode & WO_AODIST) sh+= exp(-isec.labda*R.wrld.aodistfac); 
1775                                 else sh+= 1.0f;
1776                         }
1777                         else if(aocolor!=WO_AOPLAIN) {
1778                                 float skycol[4];
1779                                 float fac, view[3];
1780                                 
1781                                 view[0]= -vec[0];
1782                                 view[1]= -vec[1];
1783                                 view[2]= -vec[2];
1784                                 Normalize(view);
1785                                 
1786                                 if(aocolor==WO_AOSKYCOL) {
1787                                         fac= 0.5*(1.0f+view[0]*R.grvec[0]+ view[1]*R.grvec[1]+ view[2]*R.grvec[2]);
1788                                         shadfac[0]+= (1.0f-fac)*R.wrld.horr + fac*R.wrld.zenr;
1789                                         shadfac[1]+= (1.0f-fac)*R.wrld.horg + fac*R.wrld.zeng;
1790                                         shadfac[2]+= (1.0f-fac)*R.wrld.horb + fac*R.wrld.zenb;
1791                                 }
1792                                 else {  /* WO_AOSKYTEX */
1793                                         shadeSkyView(skycol, isec.start, view, dxyview, shi->thread);
1794                                         shadeSunView(skycol, shi->view);
1795                                         shadfac[0]+= skycol[0];
1796                                         shadfac[1]+= skycol[1];
1797                                         shadfac[2]+= skycol[2];
1798                                 }
1799                                 skyadded++;
1800                         }
1801                 }
1802                 // samples
1803                 vec+= 3;
1804         }
1805         
1806         if(actual==0) sh= 1.0f;
1807         else sh = 1.0f - sh/((float)actual);
1808         
1809         if(aocolor!=WO_AOPLAIN && skyadded) {
1810                 div= sh/((float)skyadded);
1811                 
1812                 shadfac[0]*= div;       // average color times distances/hits formula
1813                 shadfac[1]*= div;       // average color times distances/hits formula
1814                 shadfac[2]*= div;       // average color times distances/hits formula
1815         }
1816         else {
1817                 shadfac[0]= shadfac[1]= shadfac[2]= sh;
1818         }
1819 }
1820
1821 void ray_ao(ShadeInput *shi, float *shadfac)
1822 {
1823         /* Unfortunately, the unusual way that the sphere sampler calculates roughly twice as many
1824          * samples as are actually traced, and skips them based on bias and OSA settings makes it very difficult
1825          * to reuse code between these two functions. This is the easiest way I can think of to do it
1826          * --broken */
1827         if (ELEM(R.wrld.ao_samp_method, WO_AOSAMP_HAMMERSLEY, WO_AOSAMP_HALTON))
1828                 ray_ao_qmc(shi, shadfac);
1829         else if (R.wrld.ao_samp_method == WO_AOSAMP_CONSTANT)
1830                 ray_ao_spheresamp(shi, shadfac);
1831 }
1832
1833 static void ray_shadow_jittered_coords(ShadeInput *shi, int max, float jitco[RE_MAX_OSA][3], int *totjitco)
1834 {
1835         /* magic numbers for reordering sample positions to give better
1836          * results with adaptive sample, when it usually only takes 4 samples */
1837         int order8[8] = {0, 1, 5, 6, 2, 3, 4, 7};
1838         int order11[11] = {1, 3, 8, 10, 0, 2, 4, 5, 6, 7, 9};
1839         int order16[16] = {1, 3, 9, 12, 0, 6, 7, 8, 13, 2, 4, 5, 10, 11, 14, 15};
1840         int count = count_mask(shi->mask);
1841
1842         /* for better antialising shadow samples are distributed over the subpixel
1843          * sample coordinates, this only works for raytracing depth 0 though */
1844         if(!shi->strand && shi->depth == 0 && count > 1 && count <= max) {
1845                 float xs, ys, zs, view[3];
1846                 int samp, ordsamp, tot= 0;
1847
1848                 for(samp=0; samp<R.osa; samp++) {
1849                         if(R.osa == 8) ordsamp = order8[samp];
1850                         else if(R.osa == 11) ordsamp = order11[samp];
1851                         else if(R.osa == 16) ordsamp = order16[samp];
1852                         else ordsamp = samp;
1853
1854                         if(shi->mask & (1<<ordsamp)) {
1855                                 /* zbuffer has this inverse corrected, ensures xs,ys are inside pixel */
1856                                 xs= (float)shi->scanco[0] + R.jit[ordsamp][0] + 0.5f;
1857                                 ys= (float)shi->scanco[1] + R.jit[ordsamp][1] + 0.5f;
1858                                 zs= shi->scanco[2];
1859
1860                                 shade_input_calc_viewco(shi, xs, ys, zs, view, NULL, jitco[tot], NULL, NULL);
1861                                 tot++;
1862                         }
1863                 }
1864
1865                 *totjitco= tot;
1866         }
1867         else {
1868                 VECCOPY(jitco[0], shi->co);
1869                 *totjitco= 1;
1870         }
1871 }
1872
1873 static void ray_shadow_qmc(ShadeInput *shi, LampRen *lar, float *lampco, float *shadfac, Isect *isec)
1874 {
1875         QMCSampler *qsa=NULL;
1876         int samples=0;
1877         float samp3d[3];
1878
1879         float fac=0.0f, vec[3];
1880         float colsq[4];
1881         float adapt_thresh = lar->adapt_thresh;
1882         int min_adapt_samples=4, max_samples = lar->ray_totsamp;
1883         float *co;
1884         int do_soft=1, full_osa=0;
1885
1886         float jitco[RE_MAX_OSA][3];
1887         int totjitco;
1888
1889         colsq[0] = colsq[1] = colsq[2] = 0.0;
1890         if(isec->mode==RE_RAY_SHADOW_TRA) {
1891                 shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 0.0f;
1892         } else
1893                 shadfac[3]= 1.0f;
1894         
1895         if (lar->ray_totsamp < 2) do_soft = 0;
1896         if ((R.r.mode & R_OSA) && (R.osa > 0) && (shi->vlr->flag & R_FULL_OSA)) full_osa = 1;
1897         
1898         if (full_osa) {
1899                 if (do_soft) max_samples  = max_samples/R.osa + 1;
1900                 else max_samples = 1;
1901         } else {
1902                 if (do_soft) max_samples = lar->ray_totsamp;
1903                 else max_samples = (R.osa > 4)?R.osa:5;
1904         }
1905         
1906         ray_shadow_jittered_coords(shi, max_samples, jitco, &totjitco);
1907
1908         /* sampling init */
1909         if (lar->ray_samp_method==LA_SAMP_HALTON)
1910                 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples);
1911         else if (lar->ray_samp_method==LA_SAMP_HAMMERSLEY)
1912                 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples);
1913         
1914         QMC_initPixel(qsa, shi->thread);
1915         
1916         VECCOPY(vec, lampco);
1917         
1918         
1919         while (samples < max_samples) {
1920                 isec->faceorig= (RayFace*)shi->vlr;
1921                 isec->oborig= RAY_OBJECT_SET(&R, shi->obi);
1922
1923                 /* manually jitter the start shading co-ord per sample
1924                  * based on the pre-generated OSA texture sampling offsets, 
1925                  * for anti-aliasing sharp shadow edges. */
1926                 co = jitco[samples % totjitco];
1927
1928                 if (do_soft) {
1929                         /* sphere shadow source */
1930                         if (lar->type == LA_LOCAL) {
1931                                 float ru[3], rv[3], v[3], s[3];
1932                                 
1933                                 /* calc tangent plane vectors */
1934                                 v[0] = co[0] - lampco[0];
1935                                 v[1] = co[1] - lampco[1];
1936                                 v[2] = co[2] - lampco[2];
1937                                 Normalize(v);
1938                                 VecOrthoBasisf(v, ru, rv);
1939                                 
1940                                 /* sampling, returns quasi-random vector in area_size disc */
1941                                 QMC_sampleDisc(samp3d, qsa, shi->thread, samples,lar->area_size);
1942
1943                                 /* distribute disc samples across the tangent plane */
1944                                 s[0] = samp3d[0]*ru[0] + samp3d[1]*rv[0];
1945                                 s[1] = samp3d[0]*ru[1] + samp3d[1]*rv[1];
1946                                 s[2] = samp3d[0]*ru[2] + samp3d[1]*rv[2];
1947                                 
1948                                 VECCOPY(samp3d, s);
1949                         }
1950                         else {
1951                                 /* sampling, returns quasi-random vector in [sizex,sizey]^2 plane */
1952                                 QMC_sampleRect(samp3d, qsa, shi->thread, samples, lar->area_size, lar->area_sizey);
1953                                                                 
1954                                 /* align samples to lamp vector */
1955                                 Mat3MulVecfl(lar->mat, samp3d);
1956                         }
1957                         isec->end[0]= vec[0]+samp3d[0];
1958                         isec->end[1]= vec[1]+samp3d[1];
1959                         isec->end[2]= vec[2]+samp3d[2];
1960                 } else {
1961                         VECCOPY(isec->end, vec);
1962                 }
1963
1964                 if(shi->strand) {
1965                         /* bias away somewhat to avoid self intersection */
1966                         float jitbias= 0.5f*(VecLength(shi->dxco) + VecLength(shi->dyco));
1967                         float v[3];
1968
1969                         VECSUB(v, co, isec->end);
1970                         Normalize(v);
1971
1972                         co[0] -= jitbias*v[0];
1973                         co[1] -= jitbias*v[1];
1974                         co[2] -= jitbias*v[2];
1975                 }
1976
1977                 VECCOPY(isec->start, co);
1978                 
1979                 /* trace the ray */
1980                 if(isec->mode==RE_RAY_SHADOW_TRA) {
1981                         isec->col[0]= isec->col[1]= isec->col[2]=  1.0f;
1982                         isec->col[3]= 1.0f;
1983                         
1984                         ray_trace_shadow_tra(isec, shi, DEPTH_SHADOW_TRA, 0);
1985                         shadfac[0] += isec->col[0];
1986                         shadfac[1] += isec->col[1];
1987                         shadfac[2] += isec->col[2];
1988                         shadfac[3] += isec->col[3];
1989                         
1990                         /* for variance calc */
1991                         colsq[0] += isec->col[0]*isec->col[0];
1992                         colsq[1] += isec->col[1]*isec->col[1];
1993                         colsq[2] += isec->col[2]*isec->col[2];
1994                 }
1995                 else {
1996                         if( RE_ray_tree_intersect(R.raytree, isec) ) fac+= 1.0f;
1997                 }
1998                 
1999                 samples++;
2000                 
2001                 if ((lar->ray_samp_method == LA_SAMP_HALTON)) {
2002                 
2003                         /* adaptive sampling - consider samples below threshold as in shadow (or vice versa) and exit early */
2004                         if ((max_samples > min_adapt_samples) && (adapt_thresh > 0.0) && (samples > max_samples / 3)) {
2005                                 if (isec->mode==RE_RAY_SHADOW_TRA) {
2006                                         if ((shadfac[3] / samples > (1.0-adapt_thresh)) || (shadfac[3] / samples < adapt_thresh))
2007                                                 break;
2008                                         else if (adaptive_sample_variance(samples, shadfac, colsq, adapt_thresh))
2009                                                 break;
2010                                 } else {
2011                                         if ((fac / samples > (1.0-adapt_thresh)) || (fac / samples < adapt_thresh))
2012                                                 break;
2013                                 }
2014                         }
2015                 }
2016         }
2017         
2018         if(isec->mode==RE_RAY_SHADOW_TRA) {
2019                 shadfac[0] /= samples;
2020                 shadfac[1] /= samples;
2021                 shadfac[2] /= samples;
2022                 shadfac[3] /= samples;
2023         } else
2024                 shadfac[3]= 1.0f-fac/samples;
2025
2026         if (qsa)
2027                 release_thread_qmcsampler(&R, shi->thread, qsa);
2028 }
2029
2030 static void ray_shadow_jitter(ShadeInput *shi, LampRen *lar, float *lampco, float *shadfac, Isect *isec)
2031 {
2032         /* area soft shadow */
2033         float *jitlamp;
2034         float fac=0.0f, div=0.0f, vec[3];
2035         int a, j= -1, mask;
2036         
2037         if(isec->mode==RE_RAY_SHADOW_TRA) {
2038                 shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 0.0f;
2039         }
2040         else shadfac[3]= 1.0f;
2041         
2042         fac= 0.0f;
2043         jitlamp= give_jitter_plane(lar, shi->thread, shi->xs, shi->ys);
2044
2045         a= lar->ray_totsamp;
2046         
2047         /* this correction to make sure we always take at least 1 sample */
2048         mask= shi->mask;
2049         if(a==4) mask |= (mask>>4)|(mask>>8);
2050         else if(a==9) mask |= (mask>>9);
2051         
2052         while(a--) {
2053                 
2054                 if(R.r.mode & R_OSA) {
2055                         j++;
2056                         if(j>=R.osa) j= 0;
2057                         if(!(mask & (1<<j))) {
2058                                 jitlamp+= 2;
2059                                 continue;
2060                         }
2061                 }
2062                 
2063                 isec->faceorig= (RayFace*)shi->vlr;
2064                 isec->oborig= RAY_OBJECT_SET(&R, shi->obi);
2065                 
2066                 vec[0]= jitlamp[0];
2067                 vec[1]= jitlamp[1];
2068                 vec[2]= 0.0f;
2069                 Mat3MulVecfl(lar->mat, vec);
2070                 
2071                 /* set start and end, RE_ray_tree_intersect clips it */
2072                 VECCOPY(isec->start, shi->co);
2073                 isec->end[0]= lampco[0]+vec[0];
2074                 isec->end[1]= lampco[1]+vec[1];
2075                 isec->end[2]= lampco[2]+vec[2];
2076                 
2077                 if(isec->mode==RE_RAY_SHADOW_TRA) {
2078                         /* isec.col is like shadfac, so defines amount of light (0.0 is full shadow) */
2079                         isec->col[0]= isec->col[1]= isec->col[2]=  1.0f;
2080                         isec->col[3]= 1.0f;
2081                         
2082                         ray_trace_shadow_tra(isec, shi, DEPTH_SHADOW_TRA, 0);
2083                         shadfac[0] += isec->col[0];
2084                         shadfac[1] += isec->col[1];
2085                         shadfac[2] += isec->col[2];
2086                         shadfac[3] += isec->col[3];
2087                 }
2088                 else if( RE_ray_tree_intersect(R.raytree, isec) ) fac+= 1.0f;
2089                 
2090                 div+= 1.0f;
2091                 jitlamp+= 2;
2092         }
2093         
2094         if(isec->mode==RE_RAY_SHADOW_TRA) {
2095                 shadfac[0] /= div;
2096                 shadfac[1] /= div;
2097                 shadfac[2] /= div;
2098                 shadfac[3] /= div;
2099         }
2100         else {
2101                 // sqrt makes nice umbra effect
2102                 if(lar->ray_samp_type & LA_SAMP_UMBRA)
2103                         shadfac[3]= sqrt(1.0f-fac/div);
2104                 else
2105                         shadfac[3]= 1.0f-fac/div;
2106         }
2107 }
2108 /* extern call from shade_lamp_loop */
2109 void ray_shadow(ShadeInput *shi, LampRen *lar, float *shadfac)
2110 {
2111         Isect isec;
2112         float lampco[3], maxsize;
2113
2114         /* setup isec */
2115         if(shi->mat->mode & MA_SHADOW_TRA) isec.mode= RE_RAY_SHADOW_TRA;
2116         else isec.mode= RE_RAY_SHADOW;
2117         
2118         if(lar->mode & (LA_LAYER|LA_LAYER_SHADOW))
2119                 isec.lay= lar->lay;
2120         else
2121                 isec.lay= -1;
2122
2123         /* only when not mir tracing, first hit optimm */
2124         if(shi->depth==0) {
2125                 isec.face_last= (RayFace*)lar->vlr_last[shi->thread];
2126                 isec.ob_last= RAY_OBJECT_SET(&R, lar->obi_last[shi->thread]);
2127         }
2128         else {
2129                 isec.face_last= NULL;
2130                 isec.ob_last= 0;
2131         }
2132         
2133         if(lar->type==LA_SUN || lar->type==LA_HEMI) {
2134                 maxsize= RE_ray_tree_max_size(R.raytree);
2135                 lampco[0]= shi->co[0] - maxsize*lar->vec[0];
2136                 lampco[1]= shi->co[1] - maxsize*lar->vec[1];
2137                 lampco[2]= shi->co[2] - maxsize*lar->vec[2];
2138         }
2139         else {
2140                 VECCOPY(lampco, lar->co);
2141         }
2142         
2143         if (ELEM(lar->ray_samp_method, LA_SAMP_HALTON, LA_SAMP_HAMMERSLEY)) {
2144                 
2145                 ray_shadow_qmc(shi, lar, lampco, shadfac, &isec);
2146                 
2147         } else {
2148                 if(lar->ray_totsamp<2) {
2149                         
2150                         isec.faceorig= (RayFace*)shi->vlr;
2151                         isec.oborig= RAY_OBJECT_SET(&R, shi->obi);
2152                         shadfac[3]= 1.0f; // 1.0=full light
2153                         
2154                         /* set up isec vec */
2155                         VECCOPY(isec.start, shi->co);
2156                         VECCOPY(isec.end, lampco);
2157
2158                         if(isec.mode==RE_RAY_SHADOW_TRA) {
2159                                 /* isec.col is like shadfac, so defines amount of light (0.0 is full shadow) */
2160                                 isec.col[0]= isec.col[1]= isec.col[2]=  1.0f;
2161                                 isec.col[3]= 1.0f;
2162
2163                                 ray_trace_shadow_tra(&isec, shi, DEPTH_SHADOW_TRA, 0);
2164                                 QUATCOPY(shadfac, isec.col);
2165                         }
2166                         else if(RE_ray_tree_intersect(R.raytree, &isec)) shadfac[3]= 0.0f;
2167                 }
2168                 else {
2169                         ray_shadow_jitter(shi, lar, lampco, shadfac, &isec);
2170                 }
2171         }
2172                 
2173         /* for first hit optim, set last interesected shadow face */
2174         if(shi->depth==0) {
2175                 lar->vlr_last[shi->thread]= (VlakRen*)isec.face_last;
2176                 lar->obi_last[shi->thread]= RAY_OBJECT_GET(&R, isec.ob_last);
2177         }
2178
2179 }
2180
2181 #if 0
2182 /* only when face points away from lamp, in direction of lamp, trace ray and find first exit point */
2183 static void ray_translucent(ShadeInput *shi, LampRen *lar, float *distfac, float *co)
2184 {
2185         Isect isec;
2186         float lampco[3], maxsize;
2187         
2188         /* setup isec */
2189         isec.mode= RE_RAY_SHADOW_TRA;
2190         
2191         if(lar->mode & LA_LAYER) isec.lay= lar->lay; else isec.lay= -1;
2192         
2193         if(lar->type==LA_SUN || lar->type==LA_HEMI) {
2194                 maxsize= RE_ray_tree_max_size(R.raytree);
2195                 lampco[0]= shi->co[0] - maxsize*lar->vec[0];
2196                 lampco[1]= shi->co[1] - maxsize*lar->vec[1];
2197                 lampco[2]= shi->co[2] - maxsize*lar->vec[2];
2198         }
2199         else {
2200                 VECCOPY(lampco, lar->co);
2201         }
2202         
2203         isec.faceorig= (RayFace*)shi->vlr;
2204         isec.oborig= RAY_OBJECT_SET(&R, shi->obi);
2205         
2206         /* set up isec vec */
2207         VECCOPY(isec.start, shi->co);
2208         VECCOPY(isec.end, lampco);
2209         
2210         if(RE_ray_tree_intersect(R.raytree, &isec)) {
2211                 /* we got a face */
2212                 
2213                 /* render co */
2214                 co[0]= isec.start[0]+isec.labda*(isec.vec[0]);
2215                 co[1]= isec.start[1]+isec.labda*(isec.vec[1]);
2216                 co[2]= isec.start[2]+isec.labda*(isec.vec[2]);
2217                 
2218                 *distfac= VecLength(isec.vec);
2219         }
2220         else
2221                 *distfac= 0.0f;
2222 }
2223
2224 #endif
2225