Bug fix 2116
[blender-staging.git] / source / blender / render / intern / source / ray.c
1 /**
2  * $Id$
3  *
4  * ***** BEGIN GPL/BL DUAL LICENSE BLOCK *****
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version. The Blender
10  * Foundation also sells licenses for use in proprietary software under
11  * the Blender License.  See http://www.blender.org/BL/ for information
12  * about this.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software Foundation,
21  * Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
22  *
23  * The Original Code is Copyright (C) 1990-1998 NeoGeo BV.
24  * All rights reserved.
25  *
26  * The Original Code is: all of this file.
27  *
28  * ***** END GPL/BL DUAL LICENSE BLOCK *****
29  */
30
31
32 #include <math.h>
33 #include <string.h>
34 #include <stdlib.h>
35
36 #include "MEM_guardedalloc.h"
37
38 #include "DNA_material_types.h"
39 #include "DNA_lamp_types.h"
40
41 #include "BKE_utildefines.h"
42 #include "BKE_global.h"
43
44 #include "BLI_arithb.h"
45 #include <BLI_rand.h>
46
47 #include "render.h"
48 #include "rendercore.h"
49 #include "pixelblending.h"
50 #include "pixelshading.h"
51 #include "jitter.h"
52 #include "texture.h"
53
54 #define DDA_SHADOW 0
55 #define DDA_MIRROR 1
56 #define DDA_SHADOW_TRA 2
57
58 #define RAY_TRA         1
59 #define RAY_TRAFLIP     2
60
61 #define DEPTH_SHADOW_TRA  10
62 /* from float.h */
63 #define FLT_EPSILON 1.19209290e-07F
64
65
66 /* ********** structs *************** */
67
68 #define BRANCH_ARRAY 1024
69
70 typedef struct Octree {
71         struct Branch *adrbranch[BRANCH_ARRAY];
72         struct Node *adrnode[4096];
73         float ocsize;   /* ocsize: mult factor,  max size octree */
74         float ocfacx,ocfacy,ocfacz;
75         float min[3], max[3];
76         int ocres;
77
78 } Octree;
79
80 typedef struct Isect {
81         float start[3], vec[3], end[3];         /* start+vec = end, in d3dda */
82         float labda, u, v;
83         struct VlakRen *vlr, *vlrcontr, *vlrorig;
84         short isect, mode;      /* isect: which half of quad, mode: DDA_SHADOW, DDA_MIRROR, DDA_SHADOW_TRA */
85         float ddalabda;
86         float col[4];           /* RGBA for shadow_tra */
87         int lay;                        /* -1 default, set for layer lamps */
88         short vlrisect;         /* flag whether vlrcontr was done or not */
89         /* for optimize, last intersected face */
90         VlakRen *vlr_last;
91 } Isect;
92
93 typedef struct Branch
94 {
95         struct Branch *b[8];
96 } Branch;
97
98 typedef struct OcVal 
99 {
100         short ocx, ocy, ocz;
101 } OcVal;
102
103 typedef struct Node
104 {
105         struct VlakRen *v[8];
106         struct OcVal ov[8];
107         struct Node *next;
108 } Node;
109
110
111 /* ******** globals ***************** */
112
113 static Octree g_oc;     /* can be scene pointer or so later... */
114
115 /* just for statistics */
116 static int raycount, branchcount, nodecount;
117 static int accepted, rejected, coherent_ray;
118
119 /* **************** ocval method ******************* */
120 /* within one octree node, a set of 3x15 bits defines a 'boundbox' to OR with */
121
122 #define OCVALRES        15
123 #define BROW16(min, max)      (((max)>=OCVALRES? 0xFFFF: (1<<(max+1))-1) - ((min>0)? ((1<<(min))-1):0) )
124
125 static void calc_ocval_face(float *v1, float *v2, float *v3, float *v4, short x, short y, short z, OcVal *ov)
126 {
127         float min[3], max[3];
128         int ocmin, ocmax;
129         
130         VECCOPY(min, v1);
131         VECCOPY(max, v1);
132         DO_MINMAX(v2, min, max);
133         DO_MINMAX(v3, min, max);
134         if(v4) {
135                 DO_MINMAX(v4, min, max);
136         }
137         
138         ocmin= OCVALRES*(min[0]-x); 
139         ocmax= OCVALRES*(max[0]-x);
140         ov->ocx= BROW16(ocmin, ocmax);
141         
142         ocmin= OCVALRES*(min[1]-y); 
143         ocmax= OCVALRES*(max[1]-y);
144         ov->ocy= BROW16(ocmin, ocmax);
145         
146         ocmin= OCVALRES*(min[2]-z); 
147         ocmax= OCVALRES*(max[2]-z);
148         ov->ocz= BROW16(ocmin, ocmax);
149
150 }
151
152 static void calc_ocval_ray(OcVal *ov, float xo, float yo, float zo, float *vec1, float *vec2)
153 {
154         int ocmin, ocmax;
155         
156         if(vec1[0]<vec2[0]) {
157                 ocmin= OCVALRES*(vec1[0] - xo);
158                 ocmax= OCVALRES*(vec2[0] - xo);
159         } else {
160                 ocmin= OCVALRES*(vec2[0] - xo);
161                 ocmax= OCVALRES*(vec1[0] - xo);
162         }
163         ov->ocx= BROW16(ocmin, ocmax);
164
165         if(vec1[1]<vec2[1]) {
166                 ocmin= OCVALRES*(vec1[1] - yo);
167                 ocmax= OCVALRES*(vec2[1] - yo);
168         } else {
169                 ocmin= OCVALRES*(vec2[1] - yo);
170                 ocmax= OCVALRES*(vec1[1] - yo);
171         }
172         ov->ocy= BROW16(ocmin, ocmax);
173
174         if(vec1[2]<vec2[2]) {
175                 ocmin= OCVALRES*(vec1[2] - zo);
176                 ocmax= OCVALRES*(vec2[2] - zo);
177         } else {
178                 ocmin= OCVALRES*(vec2[2] - zo);
179                 ocmax= OCVALRES*(vec1[2] - zo);
180         }
181         ov->ocz= BROW16(ocmin, ocmax);
182 }
183
184 /* ************* octree ************** */
185
186 static Branch *addbranch(Branch *br, short oc)
187 {
188         
189         if(br->b[oc]) return br->b[oc];
190         
191         branchcount++;
192         if(g_oc.adrbranch[branchcount>>12]==NULL)
193                 g_oc.adrbranch[branchcount>>12]= MEM_callocN(4096*sizeof(Branch),"addbranch");
194
195         if(branchcount>= BRANCH_ARRAY*4096) {
196                 printf("error; octree branches full\n");
197                 branchcount=0;
198         }
199         
200         return br->b[oc]=g_oc.adrbranch[branchcount>>12]+(branchcount & 4095);
201 }
202
203 static Node *addnode(void)
204 {
205         
206         nodecount++;
207         if(g_oc.adrnode[nodecount>>12]==NULL)
208                 g_oc.adrnode[nodecount>>12]= MEM_callocN(4096*sizeof(Node),"addnode");
209
210         if(nodecount> 4096*4096) {
211                 printf("error; octree nodes full\n");
212                 nodecount=0;
213         }
214         
215         return g_oc.adrnode[nodecount>>12]+(nodecount & 4095);
216 }
217
218 static int face_in_node(VlakRen *vlr, short x, short y, short z, float rtf[][3])
219 {
220         static float nor[3], d;
221         float fx, fy, fz;
222         
223         // init static vars 
224         if(vlr) {
225                 CalcNormFloat(rtf[0], rtf[1], rtf[2], nor);
226                 d= -nor[0]*rtf[0][0] - nor[1]*rtf[0][1] - nor[2]*rtf[0][2];
227                 return 0;
228         }
229         
230         fx= x;
231         fy= y;
232         fz= z;
233         
234         if((x+0)*nor[0] + (y+0)*nor[1] + (z+0)*nor[2] + d > 0.0) {
235                 if((x+1)*nor[0] + (y+0)*nor[1] + (z+0)*nor[2] + d < 0.0) return 1;
236                 if((x+0)*nor[0] + (y+1)*nor[1] + (z+0)*nor[2] + d < 0.0) return 1;
237                 if((x+1)*nor[0] + (y+1)*nor[1] + (z+0)*nor[2] + d < 0.0) return 1;
238         
239                 if((x+0)*nor[0] + (y+0)*nor[1] + (z+1)*nor[2] + d < 0.0) return 1;
240                 if((x+1)*nor[0] + (y+0)*nor[1] + (z+1)*nor[2] + d < 0.0) return 1;
241                 if((x+0)*nor[0] + (y+1)*nor[1] + (z+1)*nor[2] + d < 0.0) return 1;
242                 if((x+1)*nor[0] + (y+1)*nor[1] + (z+1)*nor[2] + d < 0.0) return 1;
243         }
244         else {
245                 if((x+1)*nor[0] + (y+0)*nor[1] + (z+0)*nor[2] + d > 0.0) return 1;
246                 if((x+0)*nor[0] + (y+1)*nor[1] + (z+0)*nor[2] + d > 0.0) return 1;
247                 if((x+1)*nor[0] + (y+1)*nor[1] + (z+0)*nor[2] + d > 0.0) return 1;
248         
249                 if((x+0)*nor[0] + (y+0)*nor[1] + (z+1)*nor[2] + d > 0.0) return 1;
250                 if((x+1)*nor[0] + (y+0)*nor[1] + (z+1)*nor[2] + d > 0.0) return 1;
251                 if((x+0)*nor[0] + (y+1)*nor[1] + (z+1)*nor[2] + d > 0.0) return 1;
252                 if((x+1)*nor[0] + (y+1)*nor[1] + (z+1)*nor[2] + d > 0.0) return 1;
253         }
254
255         return 0;
256 }
257
258 static void ocwrite(VlakRen *vlr, short x, short y, short z, float rtf[][3])
259 {
260         Branch *br;
261         Node *no;
262         short a, oc0, oc1, oc2, oc3, oc4, oc5;
263
264         if(face_in_node(NULL, x,y,z, rtf)==0) return;
265
266         x<<=2;
267         y<<=1;
268
269         br= g_oc.adrbranch[0];
270
271         if(g_oc.ocres==512) {
272                 oc0= ((x & 1024)+(y & 512)+(z & 256))>>8;
273                 br= addbranch(br, oc0);
274         }
275         if(g_oc.ocres>=256) {
276                 oc0= ((x & 512)+(y & 256)+(z & 128))>>7;
277                 br= addbranch(br, oc0);
278         }
279         if(g_oc.ocres>=128) {
280                 oc0= ((x & 256)+(y & 128)+(z & 64))>>6;
281                 br= addbranch(br, oc0);
282         }
283
284         oc0= ((x & 128)+(y & 64)+(z & 32))>>5;
285         oc1= ((x & 64)+(y & 32)+(z & 16))>>4;
286         oc2= ((x & 32)+(y & 16)+(z & 8))>>3;
287         oc3= ((x & 16)+(y & 8)+(z & 4))>>2;
288         oc4= ((x & 8)+(y & 4)+(z & 2))>>1;
289         oc5= ((x & 4)+(y & 2)+(z & 1));
290
291         br= addbranch(br,oc0);
292         br= addbranch(br,oc1);
293         br= addbranch(br,oc2);
294         br= addbranch(br,oc3);
295         br= addbranch(br,oc4);
296         no= (Node *)br->b[oc5];
297         if(no==NULL) br->b[oc5]= (Branch *)(no= addnode());
298
299         while(no->next) no= no->next;
300
301         a= 0;
302         if(no->v[7]) {          /* node full */
303                 no->next= addnode();
304                 no= no->next;
305         }
306         else {
307                 while(no->v[a]!=NULL) a++;
308         }
309         
310         no->v[a]= vlr;
311         
312         calc_ocval_face(rtf[0], rtf[1], rtf[2], rtf[3], x>>2, y>>1, z, &no->ov[a]);
313
314 }
315
316 static void d2dda(short b1, short b2, short c1, short c2, char *ocvlak, short rts[][3], float rtf[][3])
317 {
318         int ocx1,ocx2,ocy1,ocy2;
319         int x,y,dx=0,dy=0;
320         float ox1,ox2,oy1,oy2;
321         float labda,labdao,labdax,labday,ldx,ldy;
322
323         ocx1= rts[b1][c1];
324         ocy1= rts[b1][c2];
325         ocx2= rts[b2][c1];
326         ocy2= rts[b2][c2];
327
328         if(ocx1==ocx2 && ocy1==ocy2) {
329                 ocvlak[g_oc.ocres*ocx1+ocy1]= 1;
330                 return;
331         }
332
333         ox1= rtf[b1][c1];
334         oy1= rtf[b1][c2];
335         ox2= rtf[b2][c1];
336         oy2= rtf[b2][c2];
337
338         if(ox1!=ox2) {
339                 if(ox2-ox1>0.0) {
340                         labdax= (ox1-ocx1-1.0)/(ox1-ox2);
341                         ldx= -1.0/(ox1-ox2);
342                         dx= 1;
343                 } else {
344                         labdax= (ox1-ocx1)/(ox1-ox2);
345                         ldx= 1.0/(ox1-ox2);
346                         dx= -1;
347                 }
348         } else {
349                 labdax=1.0;
350                 ldx=0;
351         }
352
353         if(oy1!=oy2) {
354                 if(oy2-oy1>0.0) {
355                         labday= (oy1-ocy1-1.0)/(oy1-oy2);
356                         ldy= -1.0/(oy1-oy2);
357                         dy= 1;
358                 } else {
359                         labday= (oy1-ocy1)/(oy1-oy2);
360                         ldy= 1.0/(oy1-oy2);
361                         dy= -1;
362                 }
363         } else {
364                 labday=1.0;
365                 ldy=0;
366         }
367         
368         x=ocx1; y=ocy1;
369         labda= MIN2(labdax, labday);
370         
371         while(TRUE) {
372                 
373                 if(x<0 || y<0 || x>=g_oc.ocres || y>=g_oc.ocres);
374                 else ocvlak[g_oc.ocres*x+y]= 1;
375                 
376                 labdao=labda;
377                 if(labdax==labday) {
378                         labdax+=ldx;
379                         x+=dx;
380                         labday+=ldy;
381                         y+=dy;
382                 } else {
383                         if(labdax<labday) {
384                                 labdax+=ldx;
385                                 x+=dx;
386                         } else {
387                                 labday+=ldy;
388                                 y+=dy;
389                         }
390                 }
391                 labda=MIN2(labdax,labday);
392                 if(labda==labdao) break;
393                 if(labda>=1.0) break;
394         }
395         ocvlak[g_oc.ocres*ocx2+ocy2]=1;
396 }
397
398 static void filltriangle(short c1, short c2, char *ocvlak, short *ocmin)
399 {
400         short *ocmax;
401         int a, x, y, y1, y2;
402
403         ocmax=ocmin+3;
404
405         for(x=ocmin[c1];x<=ocmax[c1];x++) {
406                 a= g_oc.ocres*x;
407                 for(y=ocmin[c2];y<=ocmax[c2];y++) {
408                         if(ocvlak[a+y]) {
409                                 y++;
410                                 while(ocvlak[a+y] && y!=ocmax[c2]) y++;
411                                 for(y1=ocmax[c2];y1>y;y1--) {
412                                         if(ocvlak[a+y1]) {
413                                                 for(y2=y;y2<=y1;y2++) ocvlak[a+y2]=1;
414                                                 y1=0;
415                                         }
416                                 }
417                                 y=ocmax[c2];
418                         }
419                 }
420         }
421 }
422
423 void freeoctree(void)
424 {
425         int a= 0;
426         
427         while(g_oc.adrbranch[a]) {
428                 MEM_freeN(g_oc.adrbranch[a]);
429                 g_oc.adrbranch[a]= NULL;
430                 a++;
431         }
432         
433         a= 0;
434         while(g_oc.adrnode[a]) {
435                 MEM_freeN(g_oc.adrnode[a]);
436                 g_oc.adrnode[a]= NULL;
437                 a++;
438         }
439         
440         if(G.f & G_DEBUG) {
441                 printf("branches %d nodes %d\n", branchcount, nodecount);
442                 printf("raycount %d \n", raycount);     
443                 printf("ray coherent %d \n", coherent_ray);
444                 printf("accepted %d rejected %d\n", accepted, rejected);
445         }
446         branchcount= 0;
447         nodecount= 0;
448 }
449
450 void makeoctree()
451 {
452         VlakRen *vlr=NULL;
453         VertRen *v1, *v2, *v3, *v4;
454         float ocfac[3], t00, t01, t02;
455         float rtf[4][3];
456         int v;
457         int a, b, c, oc1, oc2, oc3, oc4, x, y, z, ocres2;
458         short rts[4][3], ocmin[6], *ocmax;
459         char *ocvlak;   // front, top, size view of face, to fill in
460
461         ocmax= ocmin+3;
462
463         memset(g_oc.adrnode, 0, sizeof(g_oc.adrnode));
464         memset(g_oc.adrbranch, 0, sizeof(g_oc.adrbranch));
465         
466         branchcount=0;
467         nodecount=0;
468         raycount=0;
469         accepted= 0;
470         rejected= 0;
471         coherent_ray= 0;
472         
473         /* fill main octree struct */
474         g_oc.ocres= R.r.ocres;
475         ocres2= g_oc.ocres*g_oc.ocres;
476         INIT_MINMAX(g_oc.min, g_oc.max);
477         
478         /* first min max octree space */
479         for(v=0;v<R.totvlak;v++) {
480                 if((v & 255)==0) vlr= R.blovl[v>>8];    
481                 else vlr++;
482                 if(vlr->mat->mode & MA_TRACEBLE) {      
483                         
484                         DO_MINMAX(vlr->v1->co, g_oc.min, g_oc.max);
485                         DO_MINMAX(vlr->v2->co, g_oc.min, g_oc.max);
486                         DO_MINMAX(vlr->v3->co, g_oc.min, g_oc.max);
487                         if(vlr->v4) {
488                                 DO_MINMAX(vlr->v4->co, g_oc.min, g_oc.max);
489                         }
490                 }
491         }
492
493         if(g_oc.min[0] > g_oc.max[0]) return;   /* empty octree */
494
495         g_oc.adrbranch[0]=(Branch *)MEM_callocN(4096*sizeof(Branch), "makeoctree");
496         
497         /* the lookup table, per face, for which nodes to fill in */
498         ocvlak= MEM_callocN( 3*ocres2 + 8, "ocvlak");
499         memset(ocvlak, 0, 3*ocres2);
500
501         for(c=0;c<3;c++) {      /* octree enlarge, still needed? */
502                 g_oc.min[c]-= 0.01;
503                 g_oc.max[c]+= 0.01;
504         }
505
506         t00= g_oc.max[0]-g_oc.min[0];
507         t01= g_oc.max[1]-g_oc.min[1];
508         t02= g_oc.max[2]-g_oc.min[2];
509         
510         /* this minus 0.1 is old safety... seems to be needed? */
511         g_oc.ocfacx=ocfac[0]= (g_oc.ocres-0.1)/t00;
512         g_oc.ocfacy=ocfac[1]= (g_oc.ocres-0.1)/t01;
513         g_oc.ocfacz=ocfac[2]= (g_oc.ocres-0.1)/t02;
514         
515         g_oc.ocsize= sqrt(t00*t00+t01*t01+t02*t02);     /* global, max size octree */
516
517         for(v=0; v<R.totvlak; v++) {
518                 if((v & 255)==0) vlr= R.blovl[v>>8];    
519                 else vlr++;
520                 
521                 if(vlr->mat->mode & MA_TRACEBLE) {
522                         
523                         v1= vlr->v1;
524                         v2= vlr->v2;
525                         v3= vlr->v3;
526                         v4= vlr->v4;
527                         
528                         for(c=0;c<3;c++) {
529                                 rtf[0][c]= (v1->co[c]-g_oc.min[c])*ocfac[c] ;
530                                 rts[0][c]= (short)rtf[0][c];
531                                 rtf[1][c]= (v2->co[c]-g_oc.min[c])*ocfac[c] ;
532                                 rts[1][c]= (short)rtf[1][c];
533                                 rtf[2][c]= (v3->co[c]-g_oc.min[c])*ocfac[c] ;
534                                 rts[2][c]= (short)rtf[2][c];
535                                 if(v4) {
536                                         rtf[3][c]= (v4->co[c]-g_oc.min[c])*ocfac[c] ;
537                                         rts[3][c]= (short)rtf[3][c];
538                                 }
539                         }
540                         
541                         
542                         
543                         for(c=0;c<3;c++) {
544                                 oc1= rts[0][c];
545                                 oc2= rts[1][c];
546                                 oc3= rts[2][c];
547                                 if(v4==NULL) {
548                                         ocmin[c]= MIN3(oc1,oc2,oc3);
549                                         ocmax[c]= MAX3(oc1,oc2,oc3);
550                                 }
551                                 else {
552                                         oc4= rts[3][c];
553                                         ocmin[c]= MIN4(oc1,oc2,oc3,oc4);
554                                         ocmax[c]= MAX4(oc1,oc2,oc3,oc4);
555                                 }
556                                 if(ocmax[c]>g_oc.ocres-1) ocmax[c]=g_oc.ocres-1;
557                                 if(ocmin[c]<0) ocmin[c]=0;
558                         }
559
560                         d2dda(0,1,0,1,ocvlak+ocres2,rts,rtf);
561                         d2dda(0,1,0,2,ocvlak,rts,rtf);
562                         d2dda(0,1,1,2,ocvlak+2*ocres2,rts,rtf);
563                         d2dda(1,2,0,1,ocvlak+ocres2,rts,rtf);
564                         d2dda(1,2,0,2,ocvlak,rts,rtf);
565                         d2dda(1,2,1,2,ocvlak+2*ocres2,rts,rtf);
566                         if(v4==NULL) {
567                                 d2dda(2,0,0,1,ocvlak+ocres2,rts,rtf);
568                                 d2dda(2,0,0,2,ocvlak,rts,rtf);
569                                 d2dda(2,0,1,2,ocvlak+2*ocres2,rts,rtf);
570                         }
571                         else {
572                                 d2dda(2,3,0,1,ocvlak+ocres2,rts,rtf);
573                                 d2dda(2,3,0,2,ocvlak,rts,rtf);
574                                 d2dda(2,3,1,2,ocvlak+2*ocres2,rts,rtf);
575                                 d2dda(3,0,0,1,ocvlak+ocres2,rts,rtf);
576                                 d2dda(3,0,0,2,ocvlak,rts,rtf);
577                                 d2dda(3,0,1,2,ocvlak+2*ocres2,rts,rtf);
578                         }
579                         /* nothing todo with triangle..., just fills :) */
580                         filltriangle(0,1,ocvlak+ocres2,ocmin);
581                         filltriangle(0,2,ocvlak,ocmin);
582                         filltriangle(1,2,ocvlak+2*ocres2,ocmin);
583                         
584                         /* init static vars here */
585                         face_in_node(vlr, 0,0,0, rtf);
586                         
587                         for(x=ocmin[0];x<=ocmax[0];x++) {
588                                 a= g_oc.ocres*x;
589                                 for(y=ocmin[1];y<=ocmax[1];y++) {
590                                         if(ocvlak[a+y+ocres2]) {
591                                                 b= g_oc.ocres*y+2*ocres2;
592                                                 for(z=ocmin[2];z<=ocmax[2];z++) {
593                                                         if(ocvlak[b+z] && ocvlak[a+z]) ocwrite(vlr, x,y,z, rtf);
594                                                 }
595                                         }
596                                 }
597                         }
598                         
599                         /* same loops to clear octree, doubt it can be done smarter */
600                         for(x=ocmin[0];x<=ocmax[0];x++) {
601                                 a= g_oc.ocres*x;
602                                 for(y=ocmin[1];y<=ocmax[1];y++) {
603                                         /* x-y */
604                                         ocvlak[a+y+ocres2]= 0;
605
606                                         b= g_oc.ocres*y + 2*ocres2;
607                                         for(z=ocmin[2];z<=ocmax[2];z++) {
608                                                 /* y-z */
609                                                 ocvlak[b+z]= 0;
610                                                 /* x-z */
611                                                 ocvlak[a+z]= 0;
612                                         }
613                                 }
614                         }
615                 }
616         }
617         
618         MEM_freeN(ocvlak);
619 }
620
621 /* ************ raytracer **************** */
622
623 /* only for self-intersecting test with current render face (where ray left) */
624 static short intersection2(VlakRen *vlr, float r0, float r1, float r2, float rx1, float ry1, float rz1)
625 {
626         VertRen *v1,*v2,*v3,*v4=NULL;
627         float x0,x1,x2,t00,t01,t02,t10,t11,t12,t20,t21,t22;
628         float m0, m1, m2, divdet, det, det1;
629         float u1, v, u2;
630
631         v1= vlr->v1; 
632         v2= vlr->v2; 
633         if(vlr->v4) {
634                 v3= vlr->v4;
635                 v4= vlr->v3;
636         }
637         else v3= vlr->v3;       
638
639         t00= v3->co[0]-v1->co[0];
640         t01= v3->co[1]-v1->co[1];
641         t02= v3->co[2]-v1->co[2];
642         t10= v3->co[0]-v2->co[0];
643         t11= v3->co[1]-v2->co[1];
644         t12= v3->co[2]-v2->co[2];
645         
646         x0= t11*r2-t12*r1;
647         x1= t12*r0-t10*r2;
648         x2= t10*r1-t11*r0;
649
650         divdet= t00*x0+t01*x1+t02*x2;
651
652         m0= rx1-v3->co[0];
653         m1= ry1-v3->co[1];
654         m2= rz1-v3->co[2];
655         det1= m0*x0+m1*x1+m2*x2;
656         
657         if(divdet!=0.0) {
658                 u1= det1/divdet;
659
660                 if(u1<=0.0) {
661                         det= t00*(m1*r2-m2*r1);
662                         det+= t01*(m2*r0-m0*r2);
663                         det+= t02*(m0*r1-m1*r0);
664                         v= det/divdet;
665
666                         if(v<=0.0 && (u1 + v) >= -1.0) {
667                                 return 1;
668                         }
669                 }
670         }
671
672         if(v4) {
673
674                 t20= v3->co[0]-v4->co[0];
675                 t21= v3->co[1]-v4->co[1];
676                 t22= v3->co[2]-v4->co[2];
677
678                 divdet= t20*x0+t21*x1+t22*x2;
679                 if(divdet!=0.0) {
680                         u2= det1/divdet;
681                 
682                         if(u2<=0.0) {
683                                 det= t20*(m1*r2-m2*r1);
684                                 det+= t21*(m2*r0-m0*r2);
685                                 det+= t22*(m0*r1-m1*r0);
686                                 v= det/divdet;
687         
688                                 if(v<=0.0 && (u2 + v) >= -1.0) {
689                                         return 2;
690                                 }
691                         }
692                 }
693         }
694         return 0;
695 }
696
697 static short intersection(Isect *is)
698 {
699         VertRen *v1,*v2,*v3,*v4=NULL;
700         float x0,x1,x2,t00,t01,t02,t10,t11,t12,t20,t21,t22,r0,r1,r2;
701         float m0, m1, m2, divdet, det1;
702         short ok=0;
703         
704         v1= is->vlr->v1; 
705         v2= is->vlr->v2; 
706         if(is->vlr->v4) {
707                 v3= is->vlr->v4;
708                 v4= is->vlr->v3;
709         }
710         else v3= is->vlr->v3;   
711
712         t00= v3->co[0]-v1->co[0];
713         t01= v3->co[1]-v1->co[1];
714         t02= v3->co[2]-v1->co[2];
715         t10= v3->co[0]-v2->co[0];
716         t11= v3->co[1]-v2->co[1];
717         t12= v3->co[2]-v2->co[2];
718         
719         r0= is->vec[0];
720         r1= is->vec[1];
721         r2= is->vec[2];
722         
723         x0= t12*r1-t11*r2;
724         x1= t10*r2-t12*r0;
725         x2= t11*r0-t10*r1;
726
727         divdet= t00*x0+t01*x1+t02*x2;
728
729         m0= is->start[0]-v3->co[0];
730         m1= is->start[1]-v3->co[1];
731         m2= is->start[2]-v3->co[2];
732         det1= m0*x0+m1*x1+m2*x2;
733         
734         if(divdet!=0.0) {
735                 float u;
736
737                 divdet= 1.0/divdet;
738                 u= det1*divdet;
739                 if(u<0.0 && u>-1.0) {
740                         float v, cros0, cros1, cros2;
741                         
742                         cros0= m1*t02-m2*t01;
743                         cros1= m2*t00-m0*t02;
744                         cros2= m0*t01-m1*t00;
745                         v= divdet*(cros0*r0 + cros1*r1 + cros2*r2);
746
747                         if(v<0.0 && (u + v) > -1.0) {
748                                 float labda;
749                                 labda= divdet*(cros0*t10 + cros1*t11 + cros2*t12);
750
751                                 if(labda>0.0 && labda<1.0) {
752                                         is->labda= labda;
753                                         is->u= u; is->v= v;
754                                         ok= 1;
755                                 }
756                         }
757                 }
758         }
759
760         if(ok==0 && v4) {
761
762                 t20= v3->co[0]-v4->co[0];
763                 t21= v3->co[1]-v4->co[1];
764                 t22= v3->co[2]-v4->co[2];
765
766                 divdet= t20*x0+t21*x1+t22*x2;
767                 if(divdet!=0.0) {
768                         float u;
769                         divdet= 1.0/divdet;
770                         u = det1*divdet;
771                         
772                         if(u<0.0 && u>-1.0) {
773                                 float v, cros0, cros1, cros2;
774                                 cros0= m1*t22-m2*t21;
775                                 cros1= m2*t20-m0*t22;
776                                 cros2= m0*t21-m1*t20;
777                                 v= divdet*(cros0*r0 + cros1*r1 + cros2*r2);
778         
779                                 if(v<0.0 && (u + v) > -1.0) {
780                                         float labda;
781                                         labda= divdet*(cros0*t10 + cros1*t11 + cros2*t12);
782                                         
783                                         if(labda>0.0 && labda<1.0) {
784                                                 ok= 2;
785                                                 is->labda= labda;
786                                                 is->u= u; is->v= v;
787                                         }
788                                 }
789                         }
790                 }
791         }
792
793         if(ok) {
794                 is->isect= ok;  // wich half of the quad
795                 
796                 if(is->mode==DDA_MIRROR) {
797                         /* for mirror: large faces can be filled in too often, this prevents
798                            a face being detected too soon... */
799                         if(is->labda > is->ddalabda) {
800                                 return 0;
801                         }
802                 }
803                 
804                 /* when a shadow ray leaves a face, it can be little outside the edges of it, causing
805                 intersection to be detected in its neighbour face */
806                 
807                 if(is->vlrcontr && is->vlrisect);       // optimizing, the tests below are not needed
808                 else if(is->labda< .1) {
809                         VlakRen *vlr= is->vlrorig;
810                         short de= 0;
811                         
812                         if(v1==vlr->v1 || v2==vlr->v1 || v3==vlr->v1 || v4==vlr->v1) de++;
813                         if(v1==vlr->v2 || v2==vlr->v2 || v3==vlr->v2 || v4==vlr->v2) de++;
814                         if(v1==vlr->v3 || v2==vlr->v3 || v3==vlr->v3 || v4==vlr->v3) de++;
815                         if(vlr->v4) {
816                                 if(v1==vlr->v4 || v2==vlr->v4 || v3==vlr->v4 || v4==vlr->v4) de++;
817                         }
818                         if(de) {
819                                 
820                                 /* so there's a shared edge or vertex, let's intersect ray with vlr
821                                 itself, if that's true we can safely return 1, otherwise we assume
822                                 the intersection is invalid, 0 */
823                                 
824                                 if(is->vlrcontr==NULL) {
825                                         is->vlrcontr= vlr;
826                                         is->vlrisect= intersection2(vlr, -r0, -r1, -r2, is->start[0], is->start[1], is->start[2]);
827                                 }
828
829                                 if(is->vlrisect) return 1;
830                                 return 0;
831                         }
832                 }
833                 
834                 return 1;
835         }
836
837         return 0;
838 }
839
840 /* check all faces in this node */
841 static int testnode(Isect *is, Node *no, OcVal ocval)
842 {
843         VlakRen *vlr;
844         short nr=0;
845         OcVal *ov;
846         
847         if(is->mode==DDA_SHADOW) {
848                 
849                 vlr= no->v[0];
850                 while(vlr) {
851                 
852                         if(is->vlrorig != vlr) {
853
854                                 if(is->lay & vlr->lay) {
855                                         
856                                         ov= no->ov+nr;
857                                         if( (ov->ocx & ocval.ocx) && (ov->ocy & ocval.ocy) && (ov->ocz & ocval.ocz) ) { 
858                                                 //accepted++;
859                                                 is->vlr= vlr;
860         
861                                                 if(intersection(is)) {
862                                                         is->vlr_last= vlr;
863                                                         return 1;
864                                                 }
865                                         }
866                                         //else rejected++;
867                                 }
868                         }
869                         
870                         nr++;
871                         if(nr==8) {
872                                 no= no->next;
873                                 if(no==0) return 0;
874                                 nr=0;
875                         }
876                         vlr= no->v[nr];
877                 }
878         }
879         else {                  /* else mirror and glass  */
880                 Isect isect;
881                 int found= 0;
882                 
883                 is->labda= 1.0; /* needed? */
884                 isect= *is;             /* copy for sorting */
885                 
886                 vlr= no->v[0];
887                 while(vlr) {
888                         
889                         if(is->vlrorig != vlr) {
890                                 
891                                 ov= no->ov+nr;
892                                 if( (ov->ocx & ocval.ocx) && (ov->ocy & ocval.ocy) && (ov->ocz & ocval.ocz) ) { 
893                                         //accepted++;
894
895                                         isect.vlr= vlr;
896                                         if(intersection(&isect)) {
897                                                 if(isect.labda<is->labda) *is= isect;
898                                                 found= 1;
899                                         }
900                                 }
901                                 //else rejected++;
902                         }
903                         
904                         nr++;
905                         if(nr==8) {
906                                 no= no->next;
907                                 if(no==NULL) break;
908                                 nr=0;
909                         }
910                         vlr= no->v[nr];
911                 }
912                 
913                 return found;
914         }
915
916         return 0;
917 }
918
919 /* find the Node for the octree coord x y z */
920 static Node *ocread(int x, int y, int z)
921 {
922         Branch *br;
923         int oc1;
924         
925         x<<=2;
926         y<<=1;
927         
928         br= g_oc.adrbranch[0];
929         
930         if(g_oc.ocres==512) {
931                 oc1= ((x & 1024)+(y & 512)+(z & 256))>>8;
932                 br= br->b[oc1];
933                 if(br==NULL) {
934                         return NULL;
935                 }
936         }
937         if(g_oc.ocres>=256) {
938                 oc1= ((x & 512)+(y & 256)+(z & 128))>>7;
939                 br= br->b[oc1];
940                 if(br==NULL) {
941                         return NULL;
942                 }
943         }
944         if(g_oc.ocres>=128) {
945                 oc1= ((x & 256)+(y & 128)+(z & 64))>>6;
946                 br= br->b[oc1];
947                 if(br==NULL) {
948                         return NULL;
949                 }
950         }
951         
952         oc1= ((x & 128)+(y & 64)+(z & 32))>>5;
953         br= br->b[oc1];
954         if(br) {
955                 oc1= ((x & 64)+(y & 32)+(z & 16))>>4;
956                 br= br->b[oc1];
957                 if(br) {
958                         oc1= ((x & 32)+(y & 16)+(z & 8))>>3;
959                         br= br->b[oc1];
960                         if(br) {
961                                 oc1= ((x & 16)+(y & 8)+(z & 4))>>2;
962                                 br= br->b[oc1];
963                                 if(br) {
964                                         oc1= ((x & 8)+(y & 4)+(z & 2))>>1;
965                                         br= br->b[oc1];
966                                         if(br) {
967                                                 oc1= ((x & 4)+(y & 2)+(z & 1));
968                                                 return (Node *)br->b[oc1];
969                                         }
970                                 }
971                         }
972                 }
973         }
974         
975         return NULL;
976 }
977
978 static short cliptest(float p, float q, float *u1, float *u2)
979 {
980         float r;
981
982         if(p<0.0) {
983                 if(q<p) return 0;
984                 else if(q<0.0) {
985                         r= q/p;
986                         if(r>*u2) return 0;
987                         else if(r>*u1) *u1=r;
988                 }
989         }
990         else {
991                 if(p>0.0) {
992                         if(q<0.0) return 0;
993                         else if(q<p) {
994                                 r= q/p;
995                                 if(r<*u1) return 0;
996                                 else if(r<*u2) *u2=r;
997                         }
998                 }
999                 else if(q<0.0) return 0;
1000         }
1001         return 1;
1002 }
1003
1004 /* extensive coherence checks/storage cancels out the benefit of it, and gives errors... we
1005    need better methods, sample code commented out below (ton) */
1006  
1007 /*
1008
1009 in top: static short coh_nodes[16*16*16][6];
1010 in makeoctree: memset(coh_nodes, 0, sizeof(coh_nodes));
1011  
1012 static void add_coherence_test(int ocx1, int ocx2, int ocy1, int ocy2, int ocz1, int ocz2)
1013 {
1014         short *sp;
1015         
1016         sp= coh_nodes[ (ocx2 & 15) + 16*(ocy2 & 15) + 256*(ocz2 & 15) ];
1017         sp[0]= ocx1; sp[1]= ocy1; sp[2]= ocz1;
1018         sp[3]= ocx2; sp[4]= ocy2; sp[5]= ocz2;
1019         
1020 }
1021
1022 static int do_coherence_test(int ocx1, int ocx2, int ocy1, int ocy2, int ocz1, int ocz2)
1023 {
1024         short *sp;
1025         
1026         sp= coh_nodes[ (ocx2 & 15) + 16*(ocy2 & 15) + 256*(ocz2 & 15) ];
1027         if(sp[0]==ocx1 && sp[1]==ocy1 && sp[2]==ocz1 &&
1028            sp[3]==ocx2 && sp[4]==ocy2 && sp[5]==ocz2) return 1;
1029         return 0;
1030 }
1031
1032 */
1033
1034 /* return 1: found valid intersection */
1035 /* starts with is->vlrorig */
1036 static int d3dda(Isect *is)     
1037 {
1038         Node *no;
1039         OcVal ocval;
1040         float vec1[3], vec2[3];
1041         float u1,u2,ox1,ox2,oy1,oy2,oz1,oz2;
1042         float labdao,labdax,ldx,labday,ldy,labdaz,ldz, ddalabda;
1043         int dx,dy,dz;   
1044         int xo,yo,zo,c1=0;
1045         int ocx1,ocx2,ocy1, ocy2,ocz1,ocz2;
1046         
1047         /* clip with octree */
1048         if(branchcount==0) return 0;
1049         
1050         /* do this before intersect calls */
1051         is->vlrcontr= NULL;     /*  to check shared edge */
1052
1053         /* only for shadow! */
1054         if(is->mode==DDA_SHADOW) {
1055         
1056                 /* check with last intersected shadow face */
1057                 if(is->vlr_last!=NULL && is->vlr_last!=is->vlrorig) {
1058                         if(is->lay & is->vlr_last->lay) {
1059                                 is->vlr= is->vlr_last;
1060                                 VECSUB(is->vec, is->end, is->start);
1061                                 if(intersection(is)) return 1;
1062                         }
1063                 }
1064         }
1065         
1066         ldx= is->end[0] - is->start[0];
1067         u1= 0.0;
1068         u2= 1.0;
1069
1070         /* clip with octree cube */
1071         if(cliptest(-ldx, is->start[0]-g_oc.min[0], &u1,&u2)) {
1072                 if(cliptest(ldx, g_oc.max[0]-is->start[0], &u1,&u2)) {
1073                         ldy= is->end[1] - is->start[1];
1074                         if(cliptest(-ldy, is->start[1]-g_oc.min[1], &u1,&u2)) {
1075                                 if(cliptest(ldy, g_oc.max[1]-is->start[1], &u1,&u2)) {
1076                                         ldz= is->end[2] - is->start[2];
1077                                         if(cliptest(-ldz, is->start[2]-g_oc.min[2], &u1,&u2)) {
1078                                                 if(cliptest(ldz, g_oc.max[2]-is->start[2], &u1,&u2)) {
1079                                                         c1=1;
1080                                                         if(u2<1.0) {
1081                                                                 is->end[0]= is->start[0]+u2*ldx;
1082                                                                 is->end[1]= is->start[1]+u2*ldy;
1083                                                                 is->end[2]= is->start[2]+u2*ldz;
1084                                                         }
1085                                                         if(u1>0.0) {
1086                                                                 is->start[0]+=u1*ldx;
1087                                                                 is->start[1]+=u1*ldy;
1088                                                                 is->start[2]+=u1*ldz;
1089                                                         }
1090                                                 }
1091                                         }
1092                                 }
1093                         }
1094                 }
1095         }
1096
1097         if(c1==0) return 0;
1098
1099         /* reset static variables in ocread */
1100         //ocread(g_oc.ocres, 0, 0);
1101
1102         /* setup 3dda to traverse octree */
1103         ox1= (is->start[0]-g_oc.min[0])*g_oc.ocfacx;
1104         oy1= (is->start[1]-g_oc.min[1])*g_oc.ocfacy;
1105         oz1= (is->start[2]-g_oc.min[2])*g_oc.ocfacz;
1106         ox2= (is->end[0]-g_oc.min[0])*g_oc.ocfacx;
1107         oy2= (is->end[1]-g_oc.min[1])*g_oc.ocfacy;
1108         oz2= (is->end[2]-g_oc.min[2])*g_oc.ocfacz;
1109
1110         ocx1= (int)ox1;
1111         ocy1= (int)oy1;
1112         ocz1= (int)oz1;
1113         ocx2= (int)ox2;
1114         ocy2= (int)oy2;
1115         ocz2= (int)oz2;
1116
1117         /* for intersection */
1118         VECSUB(is->vec, is->end, is->start);
1119
1120         if(ocx1==ocx2 && ocy1==ocy2 && ocz1==ocz2) {
1121                 no= ocread(ocx1, ocy1, ocz1);
1122                 if(no) {
1123                         /* exact intersection with node */
1124                         vec1[0]= ox1; vec1[1]= oy1; vec1[2]= oz1;
1125                         vec2[0]= ox2; vec2[1]= oy2; vec2[2]= oz2;
1126                         calc_ocval_ray(&ocval, (float)ocx1, (float)ocy1, (float)ocz1, vec1, vec2);
1127                         is->ddalabda= 1.0;
1128                         if( testnode(is, no, ocval) ) return 1;
1129                 }
1130         }
1131         else {
1132                 //static int coh_ocx1,coh_ocx2,coh_ocy1, coh_ocy2,coh_ocz1,coh_ocz2;
1133                 float dox, doy, doz;
1134                 int eqval;
1135                 
1136                 /* calc labda en ld */
1137                 dox= ox1-ox2;
1138                 doy= oy1-oy2;
1139                 doz= oz1-oz2;
1140
1141                 if(dox<-FLT_EPSILON) {
1142                         ldx= -1.0/dox;
1143                         labdax= (ocx1-ox1+1.0)*ldx;
1144                         dx= 1;
1145                 } else if(dox>FLT_EPSILON) {
1146                         ldx= 1.0/dox;
1147                         labdax= (ox1-ocx1)*ldx;
1148                         dx= -1;
1149                 } else {
1150                         labdax=1.0;
1151                         ldx=0;
1152                         dx= 0;
1153                 }
1154
1155                 if(doy<-FLT_EPSILON) {
1156                         ldy= -1.0/doy;
1157                         labday= (ocy1-oy1+1.0)*ldy;
1158                         dy= 1;
1159                 } else if(doy>FLT_EPSILON) {
1160                         ldy= 1.0/doy;
1161                         labday= (oy1-ocy1)*ldy;
1162                         dy= -1;
1163                 } else {
1164                         labday=1.0;
1165                         ldy=0;
1166                         dy= 0;
1167                 }
1168
1169                 if(doz<-FLT_EPSILON) {
1170                         ldz= -1.0/doz;
1171                         labdaz= (ocz1-oz1+1.0)*ldz;
1172                         dz= 1;
1173                 } else if(doz>FLT_EPSILON) {
1174                         ldz= 1.0/doz;
1175                         labdaz= (oz1-ocz1)*ldz;
1176                         dz= -1;
1177                 } else {
1178                         labdaz=1.0;
1179                         ldz=0;
1180                         dz= 0;
1181                 }
1182                 
1183                 xo=ocx1; yo=ocy1; zo=ocz1;
1184                 labdao= ddalabda= MIN3(labdax,labday,labdaz);
1185                 
1186                 vec2[0]= ox1;
1187                 vec2[1]= oy1;
1188                 vec2[2]= oz1;
1189                 
1190                 /* this loop has been constructed to make sure the first and last node of ray
1191                    are always included, even when ddalabda==1.0 or larger */
1192
1193                 while(TRUE) {
1194
1195                         no= ocread(xo, yo, zo);
1196                         if(no) {
1197                                 
1198                                 /* calculate ray intersection with octree node */
1199                                 VECCOPY(vec1, vec2);
1200                                 // dox,y,z is negative
1201                                 vec2[0]= ox1-ddalabda*dox;
1202                                 vec2[1]= oy1-ddalabda*doy;
1203                                 vec2[2]= oz1-ddalabda*doz;
1204                                 calc_ocval_ray(&ocval, (float)xo, (float)yo, (float)zo, vec1, vec2);
1205                                                            
1206                                 is->ddalabda= ddalabda;
1207                                 if( testnode(is, no, ocval) ) return 1;
1208                         }
1209
1210                         labdao= ddalabda;
1211                         
1212                         /* traversing ocree nodes need careful detection of smallest values, with proper
1213                            exceptions for equal labdas */
1214                         eqval= (labdax==labday);
1215                         if(labday==labdaz) eqval += 2;
1216                         if(labdax==labdaz) eqval += 4;
1217                         
1218                         if(eqval) {     // only 4 cases exist!
1219                                 if(eqval==7) {  // x=y=z
1220                                         xo+=dx; labdax+=ldx;
1221                                         yo+=dy; labday+=ldy;
1222                                         zo+=dz; labdaz+=ldz;
1223                                 }
1224                                 else if(eqval==1) { // x=y 
1225                                         if(labday < labdaz) {
1226                                                 xo+=dx; labdax+=ldx;
1227                                                 yo+=dy; labday+=ldy;
1228                                         }
1229                                         else {
1230                                                 zo+=dz; labdaz+=ldz;
1231                                         }
1232                                 }
1233                                 else if(eqval==2) { // y=z
1234                                         if(labdax < labday) {
1235                                                 xo+=dx; labdax+=ldx;
1236                                         }
1237                                         else {
1238                                                 yo+=dy; labday+=ldy;
1239                                                 zo+=dz; labdaz+=ldz;
1240                                         }
1241                                 }
1242                                 else { // x=z
1243                                         if(labday < labdax) {
1244                                                 yo+=dy; labday+=ldy;
1245                                         }
1246                                         else {
1247                                                 xo+=dx; labdax+=ldx;
1248                                                 zo+=dz; labdaz+=ldz;
1249                                         }
1250                                 }
1251                         }
1252                         else {  // all three different, just three cases exist
1253                                 eqval= (labdax<labday);
1254                                 if(labday<labdaz) eqval += 2;
1255                                 if(labdax<labdaz) eqval += 4;
1256                                 
1257                                 if(eqval==7 || eqval==5) { // x smallest
1258                                         xo+=dx; labdax+=ldx;
1259                                 }
1260                                 else if(eqval==2 || eqval==6) { // y smallest
1261                                         yo+=dy; labday+=ldy;
1262                                 }
1263                                 else { // z smallest
1264                                         zo+=dz; labdaz+=ldz;
1265                                 }
1266                                 
1267                         }
1268
1269                         ddalabda=MIN3(labdax,labday,labdaz);
1270                         if(ddalabda==labdao) break;
1271                         /* to make sure the last node is always checked */
1272                         if(labdao>=1.0) break;
1273                 }
1274         }
1275         
1276         /* reached end, no intersections found */
1277         is->vlr_last= NULL;
1278         return 0;
1279 }               
1280
1281
1282 static void shade_ray(Isect *is, ShadeInput *shi, ShadeResult *shr)
1283 {
1284         VlakRen *vlr= is->vlr;
1285         float l;
1286         int osatex= 0;
1287         
1288         /* set up view vector */
1289         VECCOPY(shi->view, is->vec);
1290
1291         /* render co */
1292         shi->co[0]= is->start[0]+is->labda*(shi->view[0]);
1293         shi->co[1]= is->start[1]+is->labda*(shi->view[1]);
1294         shi->co[2]= is->start[2]+is->labda*(shi->view[2]);
1295         
1296         Normalise(shi->view);
1297
1298         shi->vlr= vlr;
1299         shi->mat= vlr->mat;
1300         memcpy(&shi->r, &shi->mat->r, 23*sizeof(float));        // note, keep this synced with render_types.h
1301         shi->har= shi->mat->har;
1302         
1303         /* face normal, check for flip */
1304         l= vlr->n[0]*shi->view[0]+vlr->n[1]*shi->view[1]+vlr->n[2]*shi->view[2];
1305         if(l<0.0) {     
1306                 shi->facenor[0]= -vlr->n[0];
1307                 shi->facenor[1]= -vlr->n[1];
1308                 shi->facenor[2]= -vlr->n[2];
1309                 // only flip lower 4 bits
1310                 shi->puno= vlr->puno ^ 15;
1311         }
1312         else {
1313                 VECCOPY(shi->facenor, vlr->n);
1314                 shi->puno= vlr->puno;
1315         }
1316         
1317         // Osa structs we leave unchanged now
1318         SWAP(int, osatex, shi->osatex);
1319         
1320         shi->dxco[0]= shi->dxco[1]= shi->dxco[2]= 0.0;
1321         shi->dyco[0]= shi->dyco[1]= shi->dyco[2]= 0.0;
1322         
1323         // but, set Osa stuff to zero where it can confuse texture code
1324         if(shi->mat->texco & (TEXCO_NORM|TEXCO_REFL) ) {
1325                 shi->dxno[0]= shi->dxno[1]= shi->dxno[2]= 0.0;
1326                 shi->dyno[0]= shi->dyno[1]= shi->dyno[2]= 0.0;
1327         }
1328
1329         if(vlr->v4) {
1330                 if(is->isect==2) 
1331                         shade_input_set_coords(shi, is->u, is->v, 2, 1, 3);
1332                 else
1333                         shade_input_set_coords(shi, is->u, is->v, 0, 1, 3);
1334         }
1335         else {
1336                 shade_input_set_coords(shi, is->u, is->v, 0, 1, 2);
1337         }
1338         
1339         // SWAP(int, osatex, shi->osatex);  XXXXX!!!!
1340
1341         if(is->mode==DDA_SHADOW_TRA) shade_color(shi, shr);
1342         else {
1343
1344                 shade_lamp_loop(shi, shr);      
1345
1346                 if(shi->translucency!=0.0) {
1347                         ShadeResult shr_t;
1348                         VecMulf(shi->vn, -1.0);
1349                         VecMulf(shi->facenor, -1.0);
1350                         shade_lamp_loop(shi, &shr_t);
1351                         shr->diff[0]+= shi->translucency*shr_t.diff[0];
1352                         shr->diff[1]+= shi->translucency*shr_t.diff[1];
1353                         shr->diff[2]+= shi->translucency*shr_t.diff[2];
1354                         VecMulf(shi->vn, -1.0);
1355                         VecMulf(shi->facenor, -1.0);
1356                 }
1357         }
1358         
1359         SWAP(int, osatex, shi->osatex);  // XXXXX!!!!
1360
1361 }
1362
1363 static void refraction(float *refract, float *n, float *view, float index)
1364 {
1365         float dot, fac;
1366
1367         VECCOPY(refract, view);
1368         index= 1.0/index;
1369         
1370         dot= view[0]*n[0] + view[1]*n[1] + view[2]*n[2];
1371
1372         if(dot>0.0) {
1373                 fac= 1.0 - (1.0 - dot*dot)*index*index;
1374                 if(fac<= 0.0) return;
1375                 fac= -dot*index + sqrt(fac);
1376         }
1377         else {
1378                 index = 1.0/index;
1379                 fac= 1.0 - (1.0 - dot*dot)*index*index;
1380                 if(fac<= 0.0) return;
1381                 fac= -dot*index - sqrt(fac);
1382         }
1383
1384         refract[0]= index*view[0] + fac*n[0];
1385         refract[1]= index*view[1] + fac*n[1];
1386         refract[2]= index*view[2] + fac*n[2];
1387 }
1388
1389 /* orn = original face normal */
1390 static void reflection(float *ref, float *n, float *view, float *orn)
1391 {
1392         float f1;
1393         
1394         f1= -2.0*(n[0]*view[0]+ n[1]*view[1]+ n[2]*view[2]);
1395         
1396         ref[0]= (view[0]+f1*n[0]);
1397         ref[1]= (view[1]+f1*n[1]);
1398         ref[2]= (view[2]+f1*n[2]);
1399
1400         if(orn) {
1401                 /* test phong normals, then we should prevent vector going to the back */
1402                 f1= ref[0]*orn[0]+ ref[1]*orn[1]+ ref[2]*orn[2];
1403                 if(f1>0.0) {
1404                         f1+= .01;
1405                         ref[0]-= f1*orn[0];
1406                         ref[1]-= f1*orn[1];
1407                         ref[2]-= f1*orn[2];
1408                 }
1409         }
1410 }
1411
1412 #if 0
1413 static void color_combine(float *result, float fac1, float fac2, float *col1, float *col2)
1414 {
1415         float col1t[3], col2t[3];
1416         
1417         col1t[0]= sqrt(col1[0]);
1418         col1t[1]= sqrt(col1[1]);
1419         col1t[2]= sqrt(col1[2]);
1420         col2t[0]= sqrt(col2[0]);
1421         col2t[1]= sqrt(col2[1]);
1422         col2t[2]= sqrt(col2[2]);
1423
1424         result[0]= (fac1*col1t[0] + fac2*col2t[0]);
1425         result[0]*= result[0];
1426         result[1]= (fac1*col1t[1] + fac2*col2t[1]);
1427         result[1]*= result[1];
1428         result[2]= (fac1*col1t[2] + fac2*col2t[2]);
1429         result[2]*= result[2];
1430 }
1431 #endif
1432
1433 /* the main recursive tracer itself */
1434 static void traceray(short depth, float *start, float *vec, float *col, VlakRen *vlr, int mask, int osatex, int traflag)
1435 {
1436         ShadeInput shi;
1437         ShadeResult shr;
1438         Isect isec;
1439         float f, f1, fr, fg, fb;
1440         float ref[3];
1441         
1442         VECCOPY(isec.start, start);
1443         isec.end[0]= start[0]+g_oc.ocsize*vec[0];
1444         isec.end[1]= start[1]+g_oc.ocsize*vec[1];
1445         isec.end[2]= start[2]+g_oc.ocsize*vec[2];
1446         isec.mode= DDA_MIRROR;
1447         isec.vlrorig= vlr;
1448
1449         if( d3dda(&isec) ) {
1450                 
1451                 shi.mask= mask;
1452                 shi.osatex= osatex;
1453                 shi.depth= 1;   // only now to indicate tracing
1454                 
1455                 shade_ray(&isec, &shi, &shr);
1456                 
1457                 if(depth>0) {
1458
1459                         if(shi.mat->mode & (MA_RAYTRANSP|MA_ZTRA) && shr.alpha!=1.0) {
1460                                 float f, f1, refract[3], tracol[3];
1461                                 
1462                                 if(shi.mat->mode & MA_RAYTRANSP) {
1463                                         /* odd depths: use normal facing viewer, otherwise flip */
1464                                         if(traflag & RAY_TRAFLIP) {
1465                                                 float norm[3];
1466                                                 norm[0]= - shi.vn[0];
1467                                                 norm[1]= - shi.vn[1];
1468                                                 norm[2]= - shi.vn[2];
1469                                                 refraction(refract, norm, shi.view, shi.ang);
1470                                         }
1471                                         else {
1472                                                 refraction(refract, shi.vn, shi.view, shi.ang);
1473                                         }
1474                                         traflag |= RAY_TRA;
1475                                         traceray(depth-1, shi.co, refract, tracol, shi.vlr, shi.mask, osatex, traflag ^ RAY_TRAFLIP);
1476                                 }
1477                                 else
1478                                         traceray(depth-1, shi.co, shi.view, tracol, shi.vlr, shi.mask, osatex, 0);
1479                                 
1480                                 f= shr.alpha; f1= 1.0-f;
1481                                 shr.diff[0]= f*shr.diff[0] + f1*tracol[0];
1482                                 shr.diff[1]= f*shr.diff[1] + f1*tracol[1];
1483                                 shr.diff[2]= f*shr.diff[2] + f1*tracol[2];
1484                                 
1485                                 shr.spec[0] *=f;
1486                                 shr.spec[1] *=f;
1487                                 shr.spec[2] *=f;
1488                                 
1489                                 shr.alpha= 1.0;
1490                         }
1491
1492                         if(shi.mat->mode & MA_RAYMIRROR) {
1493                                 f= shi.ray_mirror;
1494                                 if(f!=0.0) f*= fresnel_fac(shi.view, shi.vn, shi.mat->fresnel_mir_i, shi.mat->fresnel_mir);
1495                         }
1496                         else f= 0.0;
1497                         
1498                         if(f!=0.0) {
1499                         
1500                                 reflection(ref, shi.vn, shi.view, NULL);                        
1501                                 traceray(depth-1, shi.co, ref, col, shi.vlr, shi.mask, osatex, 0);
1502                         
1503                                 f1= 1.0-f;
1504
1505                                 /* combine */
1506                                 //color_combine(col, f*fr*(1.0-shr.spec[0]), f1, col, shr.diff);
1507                                 //col[0]+= shr.spec[0];
1508                                 //col[1]+= shr.spec[1];
1509                                 //col[2]+= shr.spec[2];
1510                                 
1511                                 fr= shi.mirr;
1512                                 fg= shi.mirg;
1513                                 fb= shi.mirb;
1514                 
1515                                 col[0]= f*fr*(1.0-shr.spec[0])*col[0] + f1*shr.diff[0] + shr.spec[0];
1516                                 col[1]= f*fg*(1.0-shr.spec[1])*col[1] + f1*shr.diff[1] + shr.spec[1];
1517                                 col[2]= f*fb*(1.0-shr.spec[2])*col[2] + f1*shr.diff[2] + shr.spec[2];
1518                         }
1519                         else {
1520                                 col[0]= shr.diff[0] + shr.spec[0];
1521                                 col[1]= shr.diff[1] + shr.spec[1];
1522                                 col[2]= shr.diff[2] + shr.spec[2];
1523                         }
1524                 }
1525                 else {
1526                         col[0]= shr.diff[0] + shr.spec[0];
1527                         col[1]= shr.diff[1] + shr.spec[1];
1528                         col[2]= shr.diff[2] + shr.spec[2];
1529                 }
1530                 
1531         }
1532         else {  /* sky */
1533                 
1534                 VECCOPY(shi.view, vec);
1535                 Normalise(shi.view);
1536                 
1537                 shadeSkyPixelFloat(col, shi.view, NULL);
1538         }
1539 }
1540
1541 /* **************** jitter blocks ********** */
1542
1543 /* calc distributed planar energy */
1544
1545 static void DP_energy(float *table, float *vec, int tot, float xsize, float ysize)
1546 {
1547         int x, y, a;
1548         float *fp, force[3], result[3];
1549         float dx, dy, dist, min;
1550         
1551         min= MIN2(xsize, ysize);
1552         min*= min;
1553         result[0]= result[1]= 0.0;
1554         
1555         for(y= -1; y<2; y++) {
1556                 dy= ysize*y;
1557                 for(x= -1; x<2; x++) {
1558                         dx= xsize*x;
1559                         fp= table;
1560                         for(a=0; a<tot; a++, fp+= 2) {
1561                                 force[0]= vec[0] - fp[0]-dx;
1562                                 force[1]= vec[1] - fp[1]-dy;
1563                                 dist= force[0]*force[0] + force[1]*force[1];
1564                                 if(dist < min && dist>0.0) {
1565                                         result[0]+= force[0]/dist;
1566                                         result[1]+= force[1]/dist;
1567                                 }
1568                         }
1569                 }
1570         }
1571         vec[0] += 0.1*min*result[0]/(float)tot;
1572         vec[1] += 0.1*min*result[1]/(float)tot;
1573         // cyclic clamping
1574         vec[0]= vec[0] - xsize*floor(vec[0]/xsize + 0.5);
1575         vec[1]= vec[1] - ysize*floor(vec[1]/ysize + 0.5);
1576 }
1577
1578
1579 float *test_jitter(int resol, int iter, float xsize, float ysize)
1580 {
1581         static float jitter[2*256];
1582         float *fp;
1583         int x;
1584         
1585         /* fill table with random locations, area_size large */
1586         fp= jitter;
1587         for(x=0; x<resol*resol; x++, fp+=2) {
1588                 fp[0]= (BLI_frand()-0.5)*xsize;
1589                 fp[1]= (BLI_frand()-0.5)*ysize;
1590         }
1591         
1592         while(iter--) {
1593                 fp= jitter;
1594                 for(x=0; x<resol*resol; x++, fp+=2) {
1595                         DP_energy(jitter, fp, resol*resol, xsize, ysize);
1596                 }
1597         }
1598         return jitter;
1599 }
1600
1601 // random offset of 1 in 2
1602 static void jitter_plane_offset(float *jitter1, float *jitter2, int tot, float sizex, float sizey, float ofsx, float ofsy)
1603 {
1604         float dsizex= sizex*ofsx;
1605         float dsizey= sizey*ofsy;
1606         float hsizex= 0.5*sizex, hsizey= 0.5*sizey;
1607         int x;
1608         
1609         for(x=tot; x>0; x--, jitter1+=2, jitter2+=2) {
1610                 jitter2[0]= jitter1[0] + dsizex;
1611                 jitter2[1]= jitter1[1] + dsizey;
1612                 if(jitter2[0] > hsizex) jitter2[0]-= sizex;
1613                 if(jitter2[1] > hsizey) jitter2[1]-= sizey;
1614         }
1615 }
1616
1617 /* table around origin, -0.5*size to 0.5*size */
1618 static float *jitter_plane(LampRen *lar, int xs, int ys)
1619 {
1620         float *fp;
1621         int tot, x, iter=12;
1622         
1623         tot= lar->ray_totsamp;
1624         
1625         if(lar->jitter==NULL) {
1626                 lar->jitter= (float *)4;        // mislead thread quickly (tsk!)
1627                 fp=lar->jitter= MEM_mallocN(4*tot*2*sizeof(float), "lamp jitter tab");
1628                 
1629                 /* fill table with random locations, area_size large */
1630                 for(x=0; x<tot; x++, fp+=2) {
1631                         fp[0]= (BLI_frand()-0.5)*lar->area_size;
1632                         fp[1]= (BLI_frand()-0.5)*lar->area_sizey;
1633                 }
1634                 
1635                 while(iter--) {
1636                         fp= lar->jitter;
1637                         for(x=tot; x>0; x--, fp+=2) {
1638                                 DP_energy(lar->jitter, fp, tot, lar->area_size, lar->area_sizey);
1639                         }
1640                 }
1641                 
1642                 jitter_plane_offset(lar->jitter, lar->jitter+2*tot, tot, lar->area_size, lar->area_sizey, 0.5, 0.0);
1643                 jitter_plane_offset(lar->jitter, lar->jitter+4*tot, tot, lar->area_size, lar->area_sizey, 0.5, 0.5);
1644                 jitter_plane_offset(lar->jitter, lar->jitter+6*tot, tot, lar->area_size, lar->area_sizey, 0.0, 0.5);
1645         }
1646                 
1647         if(lar->ray_samp_type & LA_SAMP_JITTER) {
1648                 /* made it threadsafe */
1649                 if(ys & 1) {
1650                         if(lar->xold!=xs || lar->yold!=ys) {
1651                                 jitter_plane_offset(lar->jitter, lar->jitter+2*tot, tot, lar->area_size, lar->area_sizey, BLI_frand(), BLI_frand());
1652                                 lar->xold= xs; lar->yold= ys;
1653                         }
1654                         return lar->jitter+2*tot;
1655                 }
1656                 else {
1657                         if(lar->xold!=xs || lar->yold!=ys) {
1658                                 jitter_plane_offset(lar->jitter, lar->jitter+4*tot, tot, lar->area_size, lar->area_sizey, BLI_frand(), BLI_frand());
1659                                 lar->xold= xs; lar->yold= ys;
1660                         }
1661                         return lar->jitter+4*tot;
1662                 }
1663         }
1664         if(lar->ray_samp_type & LA_SAMP_DITHER) {
1665                 return lar->jitter + 2*tot*((xs & 1)+2*(ys & 1));
1666         }
1667         
1668         return lar->jitter;
1669 }
1670
1671
1672 /* ***************** main calls ************** */
1673
1674
1675 /* extern call from render loop */
1676 void ray_trace(ShadeInput *shi, ShadeResult *shr)
1677 {
1678         VlakRen *vlr;
1679         float i, f, f1, fr, fg, fb, vec[3], mircol[3], tracol[3];
1680         int do_tra, do_mir;
1681         
1682         do_tra= ((shi->mat->mode & (MA_RAYTRANSP|MA_ZTRA)) && shr->alpha!=1.0);
1683         do_mir= ((shi->mat->mode & MA_RAYMIRROR) && shi->ray_mirror!=0.0);
1684         vlr= shi->vlr;
1685         
1686         if(do_tra) {
1687                 float refract[3];
1688                 
1689                 if(shi->mat->mode & MA_RAYTRANSP) {
1690                         refraction(refract, shi->vn, shi->view, shi->ang);
1691                         traceray(shi->mat->ray_depth_tra, shi->co, refract, tracol, shi->vlr, shi->mask, 0, RAY_TRA|RAY_TRAFLIP);
1692                 }
1693                 else
1694                         traceray(shi->mat->ray_depth_tra, shi->co, shi->view, tracol, shi->vlr, shi->mask, 0, 0);
1695                 
1696                 f= shr->alpha; f1= 1.0-f;
1697                 shr->diff[0]= f*shr->diff[0] + f1*tracol[0];
1698                 shr->diff[1]= f*shr->diff[1] + f1*tracol[1];
1699                 shr->diff[2]= f*shr->diff[2] + f1*tracol[2];
1700                 shr->alpha= 1.0;
1701         }
1702         
1703         if(do_mir) {
1704         
1705                 i= shi->ray_mirror*fresnel_fac(shi->view, shi->vn, shi->mat->fresnel_mir_i, shi->mat->fresnel_mir);
1706                 if(i!=0.0) {
1707                         fr= shi->mirr;
1708                         fg= shi->mirg;
1709                         fb= shi->mirb;
1710
1711                         if(vlr->flag & R_SMOOTH) 
1712                                 reflection(vec, shi->vn, shi->view, shi->facenor);
1713                         else
1714                                 reflection(vec, shi->vn, shi->view, NULL);
1715         
1716                         traceray(shi->mat->ray_depth, shi->co, vec, mircol, shi->vlr, shi->mask, shi->osatex, 0);
1717                         
1718                         f= i*fr*(1.0-shr->spec[0]);     f1= 1.0-i;
1719                         shr->diff[0]= f*mircol[0] + f1*shr->diff[0];
1720                         
1721                         f= i*fg*(1.0-shr->spec[1]);     f1= 1.0-i;
1722                         shr->diff[1]= f*mircol[1] + f1*shr->diff[1];
1723                         
1724                         f= i*fb*(1.0-shr->spec[2]);     f1= 1.0-i;
1725                         shr->diff[2]= f*mircol[2] + f1*shr->diff[2];
1726                 }
1727         }
1728 }
1729
1730 /* no premul here! */
1731 static void addAlphaLight(float *old, float *over)
1732 {
1733         float div= old[3]+over[3];
1734
1735         if(div > 0.0001) {
1736                 old[0]= (over[3]*over[0] + old[3]*old[0])/div;
1737                 old[1]= (over[3]*over[1] + old[3]*old[1])/div;
1738                 old[2]= (over[3]*over[2] + old[3]*old[2])/div;
1739         }
1740         old[3]= over[3] + (1-over[3])*old[3];
1741
1742 }
1743
1744 static void ray_trace_shadow_tra(Isect *is, int depth)
1745 {
1746         /* ray to lamp, find first face that intersects, check alpha properties,
1747            if it has alpha<1  continue. exit when alpha is full */
1748         ShadeInput shi;
1749         ShadeResult shr;
1750
1751         if( d3dda(is)) {
1752                 float col[4];
1753                 /* we got a face */
1754                 
1755                 shi.mask= 1;
1756                 shi.osatex= 0;
1757                 shi.depth= 1;   // only now to indicate tracing
1758                 
1759                 shade_ray(is, &shi, &shr);
1760                 
1761                 /* add color */
1762                 VECCOPY(col, shr.diff);
1763                 col[3]= shr.alpha;
1764                 addAlphaLight(is->col, col);
1765
1766                 if(depth>0 && is->col[3]<1.0) {
1767                         
1768                         /* adapt isect struct */
1769                         VECCOPY(is->start, shi.co);
1770                         is->vlrorig= shi.vlr;
1771
1772                         ray_trace_shadow_tra(is, depth-1);
1773                 }
1774                 else if(is->col[3]>1.0) is->col[3]= 1.0;
1775
1776         }
1777 }
1778
1779 /* not used, test function for ambient occlusion (yaf: pathlight) */
1780 /* main problem; has to be called within shading loop, giving unwanted recursion */
1781 int ray_trace_shadow_rad(ShadeInput *ship, ShadeResult *shr)
1782 {
1783         static int counter=0, only_one= 0;
1784         extern float hashvectf[];
1785         Isect isec;
1786         ShadeInput shi;
1787         ShadeResult shr_t;
1788         float vec[3], accum[3], div= 0.0;
1789         int a;
1790         
1791         if(only_one) {
1792                 return 0;
1793         }
1794         only_one= 1;
1795         
1796         accum[0]= accum[1]= accum[2]= 0.0;
1797         isec.mode= DDA_MIRROR;
1798         isec.vlrorig= ship->vlr;
1799         
1800         for(a=0; a<8*8; a++) {
1801                 
1802                 counter+=3;
1803                 counter %= 768;
1804                 VECCOPY(vec, hashvectf+counter);
1805                 if(ship->vn[0]*vec[0]+ship->vn[1]*vec[1]+ship->vn[2]*vec[2]>0.0) {
1806                         vec[0]-= vec[0];
1807                         vec[1]-= vec[1];
1808                         vec[2]-= vec[2];
1809                 }
1810                 VECCOPY(isec.start, ship->co);
1811                 isec.end[0]= isec.start[0] + g_oc.ocsize*vec[0];
1812                 isec.end[1]= isec.start[1] + g_oc.ocsize*vec[1];
1813                 isec.end[2]= isec.start[2] + g_oc.ocsize*vec[2];
1814                 
1815                 if( d3dda(&isec)) {
1816                         float fac;
1817                         shade_ray(&isec, &shi, &shr_t);
1818                         fac= isec.labda*isec.labda;
1819                         fac= 1.0;
1820                         accum[0]+= fac*(shr_t.diff[0]+shr_t.spec[0]);
1821                         accum[1]+= fac*(shr_t.diff[1]+shr_t.spec[1]);
1822                         accum[2]+= fac*(shr_t.diff[2]+shr_t.spec[2]);
1823                         div+= fac;
1824                 }
1825                 else div+= 1.0;
1826         }
1827         
1828         if(div!=0.0) {
1829                 shr->diff[0]+= accum[0]/div;
1830                 shr->diff[1]+= accum[1]/div;
1831                 shr->diff[2]+= accum[2]/div;
1832         }
1833         shr->alpha= 1.0;
1834         
1835         only_one= 0;
1836         return 1;
1837 }
1838
1839 /* aolight: function to create random unit sphere vectors for total random sampling */
1840 void RandomSpherical(float *v)
1841 {
1842         float r;
1843         v[2] = 2.f*BLI_frand()-1.f;
1844         if ((r = 1.f - v[2]*v[2])>0.f) {
1845                 float a = 6.283185307f*BLI_frand();
1846                 r = sqrt(r);
1847                 v[0] = r * cos(a);
1848                 v[1] = r * sin(a);
1849         }
1850         else v[2] = 1.f;
1851 }
1852
1853 /* calc distributed spherical energy */
1854 static void DS_energy(float *sphere, int tot, float *vec)
1855 {
1856         float *fp, fac, force[3], res[3];
1857         int a;
1858         
1859         res[0]= res[1]= res[2]= 0.0;
1860         
1861         for(a=0, fp=sphere; a<tot; a++, fp+=3) {
1862                 VecSubf(force, vec, fp);
1863                 fac= force[0]*force[0] + force[1]*force[1] + force[2]*force[2];
1864                 if(fac!=0.0) {
1865                         fac= 1.0/fac;
1866                         res[0]+= fac*force[0];
1867                         res[1]+= fac*force[1];
1868                         res[2]+= fac*force[2];
1869                 }
1870         }
1871
1872         VecMulf(res, 0.5);
1873         VecAddf(vec, vec, res);
1874         Normalise(vec);
1875         
1876 }
1877
1878 static void DistributedSpherical(float *sphere, int tot, int iter)
1879 {
1880         float *fp;
1881         int a;
1882
1883         /* init */
1884         fp= sphere;
1885         for(a=0; a<tot; a++, fp+= 3) {
1886                 RandomSpherical(fp);
1887         }
1888         
1889         while(iter--) {
1890                 for(a=0, fp= sphere; a<tot; a++, fp+= 3) {
1891                         DS_energy(sphere, tot, fp);
1892                 }
1893         }
1894 }
1895
1896
1897 static float *threadsafe_table_sphere(int test, int xs, int ys)
1898 {
1899         static float sphere1[2*3*256];
1900         static float sphere2[2*3*256];
1901         static int xs1=-1, xs2=-1, ys1=-1, ys2=-1;
1902         
1903         if(ys & 1) {
1904                 if(xs==xs1 && ys==ys1) return sphere1;
1905                 if(test) return NULL;
1906                 xs1= xs; ys1= ys;
1907                 return sphere1;
1908         }
1909         else  {
1910                 if(xs==xs2 && ys==ys2) return sphere2;
1911                 if(test) return NULL;
1912                 xs2= xs; ys2= ys;
1913                 return sphere2;
1914         }
1915 }
1916
1917 static float *sphere_sampler(int type, int resol, int xs, int ys)
1918 {
1919         static float sphere[2*3*256];
1920         float *sphere1;
1921         int tot;
1922         float *vec;
1923         
1924         if(resol>16) return sphere;
1925         
1926         tot= 2*resol*resol;
1927
1928         if (type & WO_AORNDSMP) {
1929                 int a;
1930                 
1931                 /* total random sampling */
1932                 vec= sphere;
1933                 for (a=0; a<tot; a++, vec+=3) {
1934                         RandomSpherical(vec);
1935                 }
1936         } 
1937         else {
1938                 static int last_distr= 0;
1939                 float cosf, sinf, cost, sint;
1940                 float ang, *vec1;
1941                 int a;
1942                 
1943                 if(last_distr!=resol) {
1944                         last_distr= resol;
1945                         DistributedSpherical(sphere, tot, 16);
1946                 }
1947                 
1948                 sphere1= threadsafe_table_sphere(1, xs, ys);
1949                 if(sphere1==NULL) {
1950                         sphere1= threadsafe_table_sphere(0, xs, ys);
1951                         
1952                         // random rotation
1953                         ang= BLI_frand();
1954                         sinf= sin(ang); cosf= cos(ang);
1955                         ang= BLI_frand();
1956                         sint= sin(ang); cost= cos(ang);
1957                         
1958                         vec= sphere;
1959                         vec1= sphere1;
1960                         for (a=0; a<tot; a++, vec+=3, vec1+=3) {
1961                                 vec1[0]= cost*cosf*vec[0] - sinf*vec[1] + sint*cosf*vec[2];
1962                                 vec1[1]= cost*sinf*vec[0] + cosf*vec[1] + sint*sinf*vec[2];
1963                                 vec1[2]= -sint*vec[0] + cost*vec[2];                    
1964                         }
1965                 }
1966                 return sphere1;
1967         }
1968         return sphere;
1969 }
1970
1971
1972 /* extern call from shade_lamp_loop, ambient occlusion calculus */
1973 void ray_ao(ShadeInput *shi, World *wrld, float *shadfac)
1974 {
1975         Isect isec;
1976         float *vec, *nrm, div, bias, sh=0;
1977         float maxdist = wrld->aodist;
1978         int j=0, tot, actual=0, skyadded=0;
1979
1980         isec.vlrorig= shi->vlr;
1981         isec.vlr_last= NULL;
1982         isec.mode= DDA_SHADOW;
1983         isec.lay= -1;
1984
1985         shadfac[0]= shadfac[1]= shadfac[2]= 0.0;
1986
1987         // bias prevents smoothed faces to appear flat
1988         if(shi->vlr->flag & R_SMOOTH) {
1989                 bias= G.scene->world->aobias;
1990                 nrm= shi->vn;
1991         }
1992         else {
1993                 bias= 0.0;
1994                 nrm= shi->facenor;
1995         }
1996         
1997         vec= sphere_sampler(wrld->aomode, wrld->aosamp, floor(shi->xs+0.5), floor(shi->ys+0.5) );
1998         
1999         // warning: since we use full sphere now, and dotproduct is below, we do twice as much
2000         tot= 2*wrld->aosamp*wrld->aosamp;
2001
2002         while(tot--) {
2003                 
2004                 if ((vec[0]*nrm[0] + vec[1]*nrm[1] + vec[2]*nrm[2]) > bias) {
2005                         // only ao samples for mask
2006                         if(R.r.mode & R_OSA) {
2007                                 j++;
2008                                 if(j==R.osa) j= 0;
2009                                 if(!(shi->mask & (1<<j))) {
2010                                         vec+=3;
2011                                         continue;
2012                                 }
2013                         }
2014                         
2015                         actual++;
2016                         
2017                         /* always set start/end, 3dda clips it */
2018                         VECCOPY(isec.start, shi->co);
2019                         isec.end[0] = shi->co[0] - maxdist*vec[0];
2020                         isec.end[1] = shi->co[1] - maxdist*vec[1];
2021                         isec.end[2] = shi->co[2] - maxdist*vec[2];
2022                         
2023                         /* do the trace */
2024                         if (d3dda(&isec)) {
2025                                 if (wrld->aomode & WO_AODIST) sh+= exp(-isec.labda*wrld->aodistfac); 
2026                                 else sh+= 1.0;
2027                         }
2028                         else if(wrld->aocolor!=WO_AOPLAIN) {
2029                                 float skycol[4];
2030                                 float fac, view[3];
2031                                 
2032                                 view[0]= -vec[0];
2033                                 view[1]= -vec[1];
2034                                 view[2]= -vec[2];
2035                                 Normalise(view);
2036                                 
2037                                 if(wrld->aocolor==WO_AOSKYCOL) {
2038                                         fac= 0.5*(1.0+view[0]*R.grvec[0]+ view[1]*R.grvec[1]+ view[2]*R.grvec[2]);
2039                                         shadfac[0]+= (1.0-fac)*R.wrld.horr + fac*R.wrld.zenr;
2040                                         shadfac[1]+= (1.0-fac)*R.wrld.horg + fac*R.wrld.zeng;
2041                                         shadfac[2]+= (1.0-fac)*R.wrld.horb + fac*R.wrld.zenb;
2042                                 }
2043                                 else {
2044                                         shadeSkyPixelFloat(skycol, view, NULL);
2045                                         shadfac[0]+= skycol[0];
2046                                         shadfac[1]+= skycol[1];
2047                                         shadfac[2]+= skycol[2];
2048                                 }
2049                                 skyadded++;
2050                         }
2051                 }
2052                 // samples
2053                 vec+= 3;
2054         }
2055         
2056         shadfac[3] = 1.0 - sh/((float)actual);
2057         
2058         if(wrld->aocolor!=WO_AOPLAIN && skyadded) {
2059                 div= shadfac[3]/((float)skyadded);
2060                 
2061                 shadfac[0]*= div;       // average color times distances/hits formula
2062                 shadfac[1]*= div;       // average color times distances/hits formula
2063                 shadfac[2]*= div;       // average color times distances/hits formula
2064         }
2065 }
2066
2067
2068
2069 /* extern call from shade_lamp_loop */
2070 void ray_shadow(ShadeInput *shi, LampRen *lar, float *shadfac)
2071 {
2072         Isect isec;
2073         float lampco[3];
2074
2075         /* setup isec */
2076         if(shi->mat->mode & MA_SHADOW_TRA) isec.mode= DDA_SHADOW_TRA;
2077         else isec.mode= DDA_SHADOW;
2078         
2079         if(lar->mode & LA_LAYER) isec.lay= lar->lay; else isec.lay= -1;
2080
2081         /* only when not mir tracing, first hit optimm */
2082         if(shi->depth==0) isec.vlr_last= lar->vlr_last;
2083         else isec.vlr_last= NULL;
2084         isec.vlrorig= shi->vlr;
2085         
2086         shadfac[3]= 1.0;        // 1=full light
2087         
2088         if(lar->type==LA_SUN || lar->type==LA_HEMI) {
2089                 lampco[0]= shi->co[0] - g_oc.ocsize*lar->vec[0];
2090                 lampco[1]= shi->co[1] - g_oc.ocsize*lar->vec[1];
2091                 lampco[2]= shi->co[2] - g_oc.ocsize*lar->vec[2];
2092         }
2093         else {
2094                 VECCOPY(lampco, lar->co);
2095         }
2096         
2097         /* transp-shadow and soft not implemented yet */
2098         if(lar->ray_totsamp<2 ||  isec.mode == DDA_SHADOW_TRA) {
2099                 
2100                 /* set up isec vec */
2101                 VECCOPY(isec.start, shi->co);
2102                 VECCOPY(isec.end, lampco);
2103
2104                 if(isec.mode==DDA_SHADOW_TRA) {
2105                         isec.col[0]= isec.col[1]= isec.col[2]=  1.0;
2106                         isec.col[3]= 0.0;       //alpha
2107
2108                         ray_trace_shadow_tra(&isec, DEPTH_SHADOW_TRA);
2109
2110                         VECCOPY(shadfac, isec.col);
2111                                 // alpha to 'light'
2112                         shadfac[3]= 1.0-isec.col[3];
2113                         shadfac[0]= shadfac[3]+shadfac[0]*isec.col[3];
2114                         shadfac[1]= shadfac[3]+shadfac[1]*isec.col[3];
2115                         shadfac[2]= shadfac[3]+shadfac[2]*isec.col[3];
2116                 }
2117                 else if( d3dda(&isec)) shadfac[3]= 0.0;
2118         }
2119         else {
2120                 /* area soft shadow */
2121                 float *jitlamp;
2122                 float fac=0.0, div=0.0, vec[3];
2123                 int a, j= -1, mask;
2124                 
2125                 fac= 0.0;
2126                 jitlamp= jitter_plane(lar, floor(shi->xs+0.5), floor(shi->ys+0.5));
2127
2128                 a= lar->ray_totsamp;
2129                 
2130                 /* this correction to make sure we always take at least 1 sample */
2131                 mask= shi->mask;
2132                 if(a==4) mask |= (mask>>4)|(mask>>8);
2133                 else if(a==9) mask |= (mask>>9);
2134                 
2135                 while(a--) {
2136                         
2137                         if(R.r.mode & R_OSA) {
2138                                 j++;
2139                                 if(j>=R.osa) j= 0;
2140                                 if(!(mask & (1<<j))) {
2141                                         jitlamp+= 2;
2142                                         continue;
2143                                 }
2144                         }
2145
2146                         vec[0]= jitlamp[0];
2147                         vec[1]= jitlamp[1];
2148                         vec[2]= 0.0;
2149                         Mat3MulVecfl(lar->mat, vec);
2150                         
2151                         /* set start and end, d3dda clips it */
2152                         VECCOPY(isec.start, shi->co);
2153                         isec.end[0]= lampco[0]+vec[0];
2154                         isec.end[1]= lampco[1]+vec[1];
2155                         isec.end[2]= lampco[2]+vec[2];
2156                         
2157                         if( d3dda(&isec) ) fac+= 1.0;
2158                         div+= 1.0;
2159                         jitlamp+= 2;
2160                 }
2161                 
2162                 // sqrt makes nice umbra effect
2163                 if(lar->ray_samp_type & LA_SAMP_UMBRA)
2164                         shadfac[3]= sqrt(1.0-fac/div);
2165                 else
2166                         shadfac[3]= 1.0-fac/div;
2167         }
2168
2169         /* for first hit optim, set last interesected shadow face */
2170         if(shi->depth==0) lar->vlr_last= isec.vlr_last;
2171
2172 }
2173