Final merge of HEAD (bf-blender) into the orange branch.
[blender.git] / source / blender / render / intern / source / shadbuf.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
17  *
18  * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
19  * All rights reserved.
20  *
21  * Contributor(s): 2004-2006, Blender Foundation
22  *
23  * ***** END GPL LICENSE BLOCK *****
24  */
25
26 #include <math.h>
27 #include <string.h>
28
29 #include "MTC_matrixops.h"
30 #include "MEM_guardedalloc.h"
31
32 #include "DNA_lamp_types.h"
33 #include "BKE_utildefines.h"
34
35 #include "BLI_arithb.h"
36 #include "BLI_jitter.h"
37
38 #include "renderpipeline.h"
39 #include "render_types.h"
40 #include "renderdatabase.h"
41
42 #include "shadbuf.h"
43 #include "zbuf.h"
44
45 /* XXX, could be better implemented... this is for endian issues
46 */
47 #if defined(__sgi) || defined(__sparc) || defined(__sparc__) || defined (__PPC__) || defined (__ppc__) || defined (__BIG_ENDIAN__)
48 #define RCOMP   3
49 #define GCOMP   2
50 #define BCOMP   1
51 #define ACOMP   0
52 #else
53 #define RCOMP   0
54 #define GCOMP   1
55 #define BCOMP   2
56 #define ACOMP   3
57 #endif
58
59 /* ------------------------------------------------------------------------- */
60
61 /* initshadowbuf() in convertBlenderScene.c */
62
63 /* ------------------------------------------------------------------------- */
64
65 static void copy_to_ztile(int *rectz, int size, int x1, int y1, int tile, char *r1)
66 {
67         int len4, *rz;  
68         int x2, y2;
69         
70         x2= x1+tile;
71         y2= y1+tile;
72         if(x2>=size) x2= size-1;
73         if(y2>=size) y2= size-1;
74
75         if(x1>=x2 || y1>=y2) return;
76
77         len4= 4*(x2- x1);
78         rz= rectz + size*y1 + x1;
79         for(; y1<y2; y1++) {
80                 memcpy(r1, rz, len4);
81                 rz+= size;
82                 r1+= len4;
83         }
84 }
85
86 #if 0
87 static int sizeoflampbuf(struct ShadBuf *shb)
88 {
89         int num,count=0;
90         char *cp;
91         
92         cp= shb->cbuf;
93         num= (shb->size*shb->size)/256;
94
95         while(num--) count+= *(cp++);
96         
97         return 256*count;
98 }
99 #endif
100
101 static float *give_jitter_tab(int samp)
102 {
103         /* these are all possible jitter tables, takes up some
104          * 12k, not really bad!
105          * For soft shadows, it saves memory and render time
106          */
107         static int tab[17]={1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256};
108         static float jit[1496][2];
109         static char ctab[17]= {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
110         int a, offset=0;
111         
112         if(samp<2) samp= 2;
113         else if(samp>16) samp= 16;
114
115         for(a=0; a<samp-1; a++) offset+= tab[a];
116
117         if(ctab[samp]==0) {
118                 BLI_initjit(jit[offset], samp*samp);
119                 ctab[samp]= 1;
120         }
121                 
122         return jit[offset];
123         
124 }
125
126 void makeshadowbuf(Render *re, LampRen *lar)
127 {
128         struct ShadBuf *shb= lar->shb;
129         float wsize, dist;
130         int *rectz, *rz, *rz1, verg, verg1;
131         unsigned long *ztile;
132         int a, x, y, minx, miny, byt1, byt2, square;
133         char *rc, *rcline, *ctile, *zt;
134
135         shb->jit= give_jitter_tab(shb->samp);
136
137         /* matrices and window: in winmat the transformation is being put,
138                 transforming from observer view to lamp view, including lamp window matrix */
139         wsize= shb->pixsize*(shb->size/2.0);
140
141         i_window(-wsize, wsize, -wsize, wsize, shb->d, shb->clipend, shb->winmat);
142
143         MTC_Mat4MulMat4(shb->persmat, shb->viewmat, shb->winmat);
144         
145         /* temp, will be restored */
146         MTC_Mat4SwapMat4(shb->persmat, re->winmat);
147
148         /* zbuffering */
149         rectz= MEM_mallocN(sizeof(int)*shb->size*shb->size, "makeshadbuf");
150         rcline= MEM_mallocN(256*4+sizeof(int), "makeshadbuf2");
151
152         project_renderdata(re, projectvert, 0, 0);
153         
154         zbuffer_shadow(re, lar, rectz, shb->size);
155         
156         square= lar->mode & LA_SQUARE;
157
158         /* create Z tiles (for compression): this system is 24 bits!!! */
159         
160         ztile= shb->zbuf;
161         ctile= shb->cbuf;
162         for(y=0; y<shb->size; y+=16) {
163                 if(y< shb->size/2) miny= y+15-shb->size/2;
164                 else miny= y-shb->size/2;       
165                                 
166                 for(x=0; x<shb->size; x+=16) {
167
168                         /* is tile within spotbundle? */
169                         a= shb->size/2;
170                         if(x< a) minx= x+15-a;
171                         else minx= x-a; 
172                         
173                         dist= sqrt( (float)(minx*minx+miny*miny) );
174
175                         if(square==0 && dist>(float)(a+12)) {   /* 12, tested with a onlyshadow lamp */
176                                 a= 256; verg= 0; /* 0x80000000; */ /* 0x7FFFFFFF; */
177                                 rz1= (&verg)+1;
178                         } 
179                         else {
180                                 copy_to_ztile(rectz, shb->size, x, y, 16, rcline);
181                                 rz1= (int *)rcline;
182                                 
183                                 verg= (*rz1 & 0xFFFFFF00);
184
185                                 for(a=0;a<256;a++,rz1++) {
186                                         if( (*rz1 & 0xFFFFFF00) !=verg) break;
187                                 }
188                         }
189                         if(a==256) { /* complete empty tile */
190                                 *ctile= 0;
191                                 *ztile= *(rz1-1);
192                         }
193                         else {
194
195                                 /* ACOMP etc. are defined to work L/B endian */
196                                 
197                                 rc= rcline;
198                                 rz1= (int *)rcline;
199                                 verg=  rc[ACOMP];
200                                 verg1= rc[BCOMP];
201                                 rc+= 4;
202                                 byt1= 1; byt2= 1;
203                                 for(a=1;a<256;a++,rc+=4) {
204                                         byt1 &= (verg==rc[ACOMP]);
205                                         byt2 &= (verg1==rc[BCOMP]);
206
207                                         if(byt1==0) break;
208                                 }
209                                 if(byt1 && byt2) {      /* only store byte */
210                                         *ctile= 1;
211                                         *ztile= (unsigned long)MEM_mallocN(256+4, "tile1");
212                                         rz= (int *)*ztile;
213                                         *rz= *rz1;
214
215                                         zt= (char *)(rz+1);
216                                         rc= rcline;
217                                         for(a=0; a<256; a++, zt++, rc+=4) *zt= rc[GCOMP];       
218                                 }
219                                 else if(byt1) {         /* only store short */
220                                         *ctile= 2;
221                                         *ztile= (unsigned long)MEM_mallocN(2*256+4,"Tile2");
222                                         rz= (int *)*ztile;
223                                         *rz= *rz1;
224
225                                         zt= (char *)(rz+1);
226                                         rc= rcline;
227                                         for(a=0; a<256; a++, zt+=2, rc+=4) {
228                                                 zt[0]= rc[BCOMP];
229                                                 zt[1]= rc[GCOMP];
230                                         }
231                                 }
232                                 else {                  /* store triple */
233                                         *ctile= 3;
234                                         *ztile= (unsigned long)MEM_mallocN(3*256,"Tile3");
235
236                                         zt= (char *)*ztile;
237                                         rc= rcline;
238                                         for(a=0; a<256; a++, zt+=3, rc+=4) {
239                                                 zt[0]= rc[ACOMP];
240                                                 zt[1]= rc[BCOMP];
241                                                 zt[2]= rc[GCOMP];
242                                         }
243                                 }
244                         }
245                         ztile++;
246                         ctile++;
247                 }
248         }
249
250         MEM_freeN(rcline);
251         MEM_freeN(rectz);
252         
253         /* old matrix back */
254         MTC_Mat4SwapMat4(shb->persmat, re->winmat);
255
256         /* printf("lampbuf %d\n", sizeoflampbuf(shb)); */
257 }
258
259 static int firstreadshadbuf(struct ShadBuf *shb, int xs, int ys, int nr)
260 {
261         /* return a 1 if fully compressed shadbuf-tile && z==const */
262         static int *rz;
263         int ofs;
264         char *ct;
265
266         /* always test borders of shadowbuffer */
267         if(xs<0) xs= 0; else if(xs>=shb->size) xs= shb->size-1;
268         if(ys<0) ys= 0; else if(ys>=shb->size) ys= shb->size-1;
269    
270         /* calc z */
271         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
272         ct= shb->cbuf+ofs;
273         if(*ct==0) {
274             if(nr==0) {
275                         rz= *( (int **)(shb->zbuf+ofs) );
276                         return 1;
277             }
278                 else if(rz!= *( (int **)(shb->zbuf+ofs) )) return 0;
279                 
280             return 1;
281         }
282         
283         return 0;
284 }
285
286 static float readshadowbuf(struct ShadBuf *shb, int bias, int xs, int ys, int zs)       /* return 1.0 : fully in light */
287 {
288         float temp;
289         int *rz, ofs;
290         int zsamp=0;
291         char *ct, *cz;
292
293         /* simpleclip */
294         /* if(xs<0 || ys<0) return 1.0; */
295         /* if(xs>=shb->size || ys>=shb->size) return 1.0; */
296         
297         /* always test borders of shadowbuffer */
298         if(xs<0) xs= 0; else if(xs>=shb->size) xs= shb->size-1;
299         if(ys<0) ys= 0; else if(ys>=shb->size) ys= shb->size-1;
300
301         /* calc z */
302         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
303         ct= shb->cbuf+ofs;
304         rz= *( (int **)(shb->zbuf+ofs) );
305
306         if(*ct==3) {
307                 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
308                 cz= (char *)&zsamp;
309                 cz[ACOMP]= ct[0];
310                 cz[BCOMP]= ct[1];
311                 cz[GCOMP]= ct[2];
312         }
313         else if(*ct==2) {
314                 ct= ((char *)rz);
315                 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
316                 zsamp= *rz;
317         
318                 cz= (char *)&zsamp;
319                 cz[BCOMP]= ct[0];
320                 cz[GCOMP]= ct[1];
321         }
322         else if(*ct==1) {
323                 ct= ((char *)rz);
324                 ct+= 4+16*(ys & 15)+(xs & 15);
325                 zsamp= *rz;
326
327                 cz= (char *)&zsamp;
328                 cz[GCOMP]= ct[0];
329
330         }
331         else {
332                 /* got warning on this from DEC alpha (64 bits).... */
333                 /* but it's working code! (ton) */
334                 zsamp= (int) rz;
335         }
336
337         /* if(zsamp >= 0x7FFFFE00) return 1.0; */       /* no shaduw when sampling at eternal distance */
338
339         if(zsamp > zs) return 1.0;              /* absolute no shadow */
340         else if( zsamp < zs-bias) return 0.0 ;  /* absolute in shadow */
341         else {                                  /* soft area */
342                 
343                 temp=  ( (float)(zs- zsamp) )/(float)bias;
344                 return 1.0 - temp*temp;
345                         
346         }
347 }
348
349 /* the externally called shadow testing (reading) function */
350 /* return 1.0: no shadow at all */
351 float testshadowbuf(struct ShadBuf *shb, float *rco, float *dxco, float *dyco, float inp)
352 {
353         float fac, co[4], dx[3], dy[3], aantal=0;
354         float xs1,ys1, siz, *j, xres, yres;
355         int xs,ys, zs, bias;
356         short a,num;
357         
358         if(inp <= 0.0) return 0.0;
359
360         /* rotate renderco en osaco */
361         siz= 0.5*(float)shb->size;
362         VECCOPY(co, rco);
363         co[3]= 1.0;
364
365         MTC_Mat4MulVec4fl(shb->persmat, co);    /* rational hom co */
366
367         xs1= siz*(1.0+co[0]/co[3]);
368         ys1= siz*(1.0+co[1]/co[3]);
369
370         /* Clip for z: clipsta and clipend clip values of the shadow buffer. We
371                 * can test for -1.0/1.0 because of the properties of the
372                 * coordinate transformations. */
373         fac= (co[2]/co[3]);
374
375         if(fac>=1.0) {
376                 return 0.0;
377         } else if(fac<= -1.0) {
378                 return 1.0;
379         }
380
381         zs= ((float)0x7FFFFFFF)*fac;
382
383         /* take num*num samples, increase area with fac */
384         num= shb->samp*shb->samp;
385         fac= shb->soft;
386         
387         /* with inp==1.0, bias is half the size. correction value was 1.1, giving errors 
388            on cube edges, with one side being almost frontal lighted (ton)  */
389         bias= (1.5-inp*inp)*shb->bias;
390
391         if(num==1) {
392                 return readshadowbuf(shb, bias, (int)xs1, (int)ys1, zs);
393         }
394
395         co[0]= rco[0]+dxco[0];
396         co[1]= rco[1]+dxco[1];
397         co[2]= rco[2]+dxco[2];
398         co[3]= 1.0;
399         MTC_Mat4MulVec4fl(shb->persmat,co);     /* rational hom co */
400         dx[0]= xs1- siz*(1.0+co[0]/co[3]);
401         dx[1]= ys1- siz*(1.0+co[1]/co[3]);
402
403         co[0]= rco[0]+dyco[0];
404         co[1]= rco[1]+dyco[1];
405         co[2]= rco[2]+dyco[2];
406         co[3]= 1.0;
407         MTC_Mat4MulVec4fl(shb->persmat,co);     /* rational hom co */
408         dy[0]= xs1- siz*(1.0+co[0]/co[3]);
409         dy[1]= ys1- siz*(1.0+co[1]/co[3]);
410
411         xres= fac*( fabs(dx[0])+fabs(dy[0]) );
412         yres= fac*( fabs(dx[1])+fabs(dy[1]) );
413
414         if(xres<fac) xres= fac;
415         if(yres<fac) yres= fac;
416         
417         xs1-= (xres)/2;
418         ys1-= (yres)/2;
419
420         j= shb->jit;
421
422         if(xres<16.0 && yres<16.0) {
423             if(firstreadshadbuf(shb, (int)xs1, (int)ys1, 0)) {
424                         if(firstreadshadbuf(shb, (int)(xs1+xres), (int)ys1, 1)) {
425                                 if(firstreadshadbuf(shb, (int)xs1, (int)(ys1+yres), 1)) {
426                                         if(firstreadshadbuf(shb, (int)(xs1+xres), (int)(ys1+yres), 1)) {
427                                                 return readshadowbuf(shb, bias,(int)xs1, (int)ys1, zs);
428                                         }
429                                 }
430                         }
431             }
432         }
433
434         for(a=num;a>0;a--) {
435                 /* instead of jit i tried random: ugly! */
436                 /* note: the plus 0.5 gives best sampling results, jit used to go from 0-1 */
437                 /* xs1 and ys1 are already corrected to be corner of sample area */
438                 xs= xs1 + xres*(j[0] + 0.5);
439                 ys= ys1 + yres*(j[1] + 0.5);
440                 j+=2;
441                 
442                 aantal+= readshadowbuf(shb, bias, xs, ys, zs);
443         }
444
445         /* Renormalizes for the sample number: */
446         return aantal/( (float)(num) );
447 }
448
449 /* different function... sampling behind clipend can be LIGHT, bias is negative! */
450 /* return: light */
451 static float readshadowbuf_halo(struct ShadBuf *shb, int xs, int ys, int zs)
452 {
453         float temp;
454         int *rz, ofs;
455         int bias, zbias, zsamp;
456         char *ct, *cz;
457
458         /* negative! The other side is more important */
459         bias= -shb->bias;
460         
461         /* simpleclip */
462         if(xs<0 || ys<0) return 0.0;
463         if(xs>=shb->size || ys>=shb->size) return 0.0;
464
465         /* calc z */
466         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
467         ct= shb->cbuf+ofs;
468         rz= *( (int **)(shb->zbuf+ofs) );
469
470         if(*ct==3) {
471                 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
472                 cz= (char *)&zsamp;
473                 zsamp= 0;
474                 cz[ACOMP]= ct[0];
475                 cz[BCOMP]= ct[1];
476                 cz[GCOMP]= ct[2];
477         }
478         else if(*ct==2) {
479                 ct= ((char *)rz);
480                 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
481                 zsamp= *rz;
482         
483                 cz= (char *)&zsamp;
484                 cz[BCOMP]= ct[0];
485                 cz[GCOMP]= ct[1];
486         }
487         else if(*ct==1) {
488                 ct= ((char *)rz);
489                 ct+= 4+16*(ys & 15)+(xs & 15);
490                 zsamp= *rz;
491
492                 cz= (char *)&zsamp;
493                 cz[GCOMP]= ct[0];
494
495         }
496         else {
497                 /* same as before */
498                 /* still working code! (ton) */
499                 zsamp= (int) rz;
500         }
501
502         /* NO schadow when sampled at 'eternal' distance */
503
504         if(zsamp >= 0x7FFFFE00) return 1.0; 
505
506         if(zsamp > zs) return 1.0;              /* absolute no shadww */
507         else {
508                 /* bias is negative, so the (zs-bias) can be beyond 0x7fffffff */
509                 zbias= 0x7fffffff - zs;
510                 if(zbias > -bias) {
511                         if( zsamp < zs-bias) return 0.0 ;       /* absolute in shadow */
512                 }
513                 else return 0.0 ;       /* absolute shadow */
514         }
515
516         /* soft area */
517         
518         temp=  ( (float)(zs- zsamp) )/(float)bias;
519         return 1.0 - temp*temp;
520 }
521
522
523 float shadow_halo(LampRen *lar, float *p1, float *p2)
524 {
525         /* p1 p2 already are rotated in spot-space */
526         ShadBuf *shb= lar->shb;
527         float co[4], siz;
528         float labda, labdao, labdax, labday, ldx, ldy;
529         float zf, xf1, yf1, zf1, xf2, yf2, zf2;
530         float count, lightcount;
531         int x, y, z, xs1, ys1;
532         int dx = 0, dy = 0;
533         
534         siz= 0.5*(float)shb->size;
535         
536         co[0]= p1[0];
537         co[1]= p1[1];
538         co[2]= p1[2]/lar->sh_zfac;
539         co[3]= 1.0;
540         MTC_Mat4MulVec4fl(shb->winmat, co);     /* rational hom co */
541         xf1= siz*(1.0+co[0]/co[3]);
542         yf1= siz*(1.0+co[1]/co[3]);
543         zf1= (co[2]/co[3]);
544
545
546         co[0]= p2[0];
547         co[1]= p2[1];
548         co[2]= p2[2]/lar->sh_zfac;
549         co[3]= 1.0;
550         MTC_Mat4MulVec4fl(shb->winmat, co);     /* rational hom co */
551         xf2= siz*(1.0+co[0]/co[3]);
552         yf2= siz*(1.0+co[1]/co[3]);
553         zf2= (co[2]/co[3]);
554
555         /* the 2dda (a pixel line formula) */
556
557         xs1= (int)xf1;
558         ys1= (int)yf1;
559
560         if(xf1 != xf2) {
561                 if(xf2-xf1 > 0.0) {
562                         labdax= (xf1-xs1-1.0)/(xf1-xf2);
563                         ldx= -shb->shadhalostep/(xf1-xf2);
564                         dx= shb->shadhalostep;
565                 }
566                 else {
567                         labdax= (xf1-xs1)/(xf1-xf2);
568                         ldx= shb->shadhalostep/(xf1-xf2);
569                         dx= -shb->shadhalostep;
570                 }
571         }
572         else {
573                 labdax= 1.0;
574                 ldx= 0.0;
575         }
576
577         if(yf1 != yf2) {
578                 if(yf2-yf1 > 0.0) {
579                         labday= (yf1-ys1-1.0)/(yf1-yf2);
580                         ldy= -shb->shadhalostep/(yf1-yf2);
581                         dy= shb->shadhalostep;
582                 }
583                 else {
584                         labday= (yf1-ys1)/(yf1-yf2);
585                         ldy= shb->shadhalostep/(yf1-yf2);
586                         dy= -shb->shadhalostep;
587                 }
588         }
589         else {
590                 labday= 1.0;
591                 ldy= 0.0;
592         }
593         
594         x= xs1;
595         y= ys1;
596         labda= count= lightcount= 0.0;
597
598 /* printf("start %x %x  \n", (int)(0x7FFFFFFF*zf1), (int)(0x7FFFFFFF*zf2)); */
599
600         while(1) {
601                 labdao= labda;
602                 
603                 if(labdax==labday) {
604                         labdax+= ldx;
605                         x+= dx;
606                         labday+= ldy;
607                         y+= dy;
608                 }
609                 else {
610                         if(labdax<labday) {
611                                 labdax+= ldx;
612                                 x+= dx;
613                         } else {
614                                 labday+= ldy;
615                                 y+= dy;
616                         }
617                 }
618                 
619                 labda= MIN2(labdax, labday);
620                 if(labda==labdao || labda>=1.0) break;
621                 
622                 zf= zf1 + labda*(zf2-zf1);
623                 count+= 1.0;
624
625                 if(zf<= -1.0) lightcount += 1.0;        /* close to the spot */
626                 else {
627                 
628                         /* make sure, behind the clipend we extend halolines. */
629                         if(zf>=1.0) z= 0x7FFFF000;
630                         else z= (int)(0x7FFFF000*zf);
631                         
632                         lightcount+= readshadowbuf_halo(shb, x, y, z);
633                         
634                 }
635         }
636         
637         if(count!=0.0) return (lightcount/count);
638         return 0.0;
639         
640 }
641
642
643
644
645
646
647
648
649