2 * ***** BEGIN GPL LICENSE BLOCK *****
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software Foundation,
16 * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
19 * All rights reserved.
21 * Contributor(s): 2004-2006, Blender Foundation
23 * ***** END GPL LICENSE BLOCK *****
30 #include "MEM_guardedalloc.h"
32 #include "DNA_group_types.h"
33 #include "DNA_lamp_types.h"
34 #include "DNA_material_types.h"
36 #include "BKE_global.h"
37 #include "BKE_scene.h"
38 #include "BKE_utildefines.h"
40 #include "BLI_arithb.h"
41 #include "BLI_blenlib.h"
42 #include "BLI_jitter.h"
43 #include "BLI_memarena.h"
48 #include "renderpipeline.h"
49 #include "render_types.h"
50 #include "renderdatabase.h"
51 #include "rendercore.h"
56 /* XXX, could be better implemented... this is for endian issues
58 #if defined(__sgi) || defined(__sparc) || defined(__sparc__) || defined (__PPC__) || defined (__ppc__) || defined (__hppa__) || defined (__BIG_ENDIAN__)
70 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
71 /* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
72 /* only to be used here in this file, it's for speed */
73 extern struct Render R;
74 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
76 /* ------------------------------------------------------------------------- */
78 /* initshadowbuf() in convertBlenderScene.c */
80 /* ------------------------------------------------------------------------- */
82 static void copy_to_ztile(int *rectz, int size, int x1, int y1, int tile, char *r1)
89 if(x2>=size) x2= size-1;
90 if(y2>=size) y2= size-1;
92 if(x1>=x2 || y1>=y2) return;
95 rz= rectz + size*y1 + x1;
104 static int sizeoflampbuf(ShadBuf *shb)
110 num= (shb->size*shb->size)/256;
112 while(num--) count+= *(cp++);
118 /* not threadsafe... */
119 static float *give_jitter_tab(int samp)
121 /* these are all possible jitter tables, takes up some
122 * 12k, not really bad!
123 * For soft shadows, it saves memory and render time
125 static int tab[17]={1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256};
126 static float jit[1496][2];
127 static char ctab[17]= {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
131 else if(samp>16) samp= 16;
133 for(a=0; a<samp-1; a++) offset+= tab[a];
137 BLI_initjit(jit[offset], samp*samp);
144 static void make_jitter_weight_tab(Render *re, ShadBuf *shb, short filtertype)
146 float *jit, totw= 0.0f;
147 int samp= get_render_shadow_samples(&re->r, shb->samp);
148 int a, tot=samp*samp;
150 shb->weight= MEM_mallocN(sizeof(float)*tot, "weight tab lamp");
152 for(jit= shb->jit, a=0; a<tot; a++, jit+=2) {
153 if(filtertype==LA_SHADBUF_TENT)
154 shb->weight[a]= 0.71f - sqrt(jit[0]*jit[0] + jit[1]*jit[1]);
155 else if(filtertype==LA_SHADBUF_GAUSS)
156 shb->weight[a]= RE_filter_value(R_FILTER_GAUSS, 1.8f*sqrt(jit[0]*jit[0] + jit[1]*jit[1]));
158 shb->weight[a]= 1.0f;
160 totw+= shb->weight[a];
164 for(a=0; a<tot; a++) {
165 shb->weight[a]*= totw;
169 /* create Z tiles (for compression): this system is 24 bits!!! */
170 static void compress_shadowbuf(ShadBuf *shb, int *rectz, int square)
172 ShadSampleBuf *shsample;
175 int *rz, *rz1, verg, verg1, size= shb->size;
176 int a, x, y, minx, miny, byt1, byt2;
177 char *rc, *rcline, *ctile, *zt;
179 shsample= MEM_mallocN( sizeof(ShadSampleBuf), "shad sample buf");
180 BLI_addtail(&shb->buffers, shsample);
182 shsample->zbuf= MEM_mallocN( sizeof(uintptr_t)*(size*size)/256, "initshadbuf2");
183 shsample->cbuf= MEM_callocN( (size*size)/256, "initshadbuf3");
185 ztile= (uintptr_t *)shsample->zbuf;
186 ctile= shsample->cbuf;
189 rcline= MEM_mallocN(256*4+sizeof(int), "makeshadbuf2");
191 for(y=0; y<size; y+=16) {
192 if(y< size/2) miny= y+15-size/2;
195 for(x=0; x<size; x+=16) {
197 /* is tile within spotbundle? */
199 if(x< a) minx= x+15-a;
202 dist= sqrt( (float)(minx*minx+miny*miny) );
204 if(square==0 && dist>(float)(a+12)) { /* 12, tested with a onlyshadow lamp */
205 a= 256; verg= 0; /* 0x80000000; */ /* 0x7FFFFFFF; */
209 copy_to_ztile(rectz, size, x, y, 16, rcline);
212 verg= (*rz1 & 0xFFFFFF00);
214 for(a=0;a<256;a++,rz1++) {
215 if( (*rz1 & 0xFFFFFF00) !=verg) break;
218 if(a==256) { /* complete empty tile */
224 /* ACOMP etc. are defined to work L/B endian */
232 for(a=1;a<256;a++,rc+=4) {
233 byt1 &= (verg==rc[ACOMP]);
234 byt2 &= (verg1==rc[BCOMP]);
238 if(byt1 && byt2) { /* only store byte */
240 *ztile= (uintptr_t)MEM_mallocN(256+4, "tile1");
246 for(a=0; a<256; a++, zt++, rc+=4) *zt= rc[GCOMP];
248 else if(byt1) { /* only store short */
250 *ztile= (uintptr_t)MEM_mallocN(2*256+4,"Tile2");
256 for(a=0; a<256; a++, zt+=2, rc+=4) {
261 else { /* store triple */
263 *ztile= (uintptr_t)MEM_mallocN(3*256,"Tile3");
267 for(a=0; a<256; a++, zt+=3, rc+=4) {
283 /* sets start/end clipping. lar->shb should be initialized */
284 static void shadowbuf_autoclip(Render *re, LampRen *lar)
286 ObjectInstanceRen *obi;
291 float minz, maxz, vec[3], viewmat[4][4], obviewmat[4][4];
292 unsigned int lay = -1;
293 int i, a, maxtotvert, ok= 1;
296 minz= 1.0e30f; maxz= -1.0e30f;
297 Mat4CpyMat4(viewmat, lar->shb->viewmat);
299 if(lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay;
302 for(obr=re->objecttable.first; obr; obr=obr->next)
303 maxtotvert= MAX2(obr->totvert, maxtotvert);
305 clipflag= MEM_callocN(sizeof(char)*maxtotvert, "autoclipflag");
307 /* set clip in vertices when face visible */
308 for(i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
311 if(obi->flag & R_TRANSFORMED)
312 Mat4MulMat4(obviewmat, obi->mat, viewmat);
314 Mat4CpyMat4(obviewmat, viewmat);
316 memset(clipflag, 0, sizeof(char)*obr->totvert);
318 /* clear clip, is being set if face is visible (clip is calculated for real later) */
319 for(a=0; a<obr->totvlak; a++) {
320 if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
323 /* note; these conditions are copied from zbuffer_shadow() */
327 if((ma->mode & MA_SHADBUF)==0) ok= 0;
330 if(ok && (obi->lay & lay)) {
331 clipflag[vlr->v1->index]= 1;
332 clipflag[vlr->v2->index]= 1;
333 clipflag[vlr->v3->index]= 1;
334 if(vlr->v4) clipflag[vlr->v4->index]= 1;
338 /* calculate min and max */
339 for(a=0; a< obr->totvert;a++) {
340 if((a & 255)==0) ver= RE_findOrAddVert(obr, a);
344 VECCOPY(vec, ver->co);
345 Mat4MulVecfl(obviewmat, vec);
346 /* Z on visible side of lamp space */
348 float inpr, z= -vec[2];
350 /* since vec is rotated in lampspace, this is how to get the cosine of angle */
351 /* precision is set 20% larger */
356 if(inpr>=lar->spotsi) {
367 /* set clipping min and max */
369 float delta= (maxz - minz); /* threshold to prevent precision issues */
371 //printf("minz %f maxz %f delta %f\n", minz, maxz, delta);
372 if(lar->bufflag & LA_SHADBUF_AUTO_START)
373 lar->shb->d= minz - delta*0.02f; /* 0.02 is arbitrary... needs more thinking! */
374 if(lar->bufflag & LA_SHADBUF_AUTO_END)
375 lar->shb->clipend= maxz + delta*0.1f;
377 /* bias was calculated as percentage, we scale it to prevent animation issues */
378 delta= (lar->clipend-lar->clipsta)/(lar->shb->clipend-lar->shb->d);
379 //printf("bias delta %f\n", delta);
380 lar->shb->bias= (int) (delta*(float)lar->shb->bias);
384 void makeshadowbuf(Render *re, LampRen *lar)
386 ShadBuf *shb= lar->shb;
387 float wsize, *jitbuf, twozero[2]= {0.0f, 0.0f}, angle, temp;
390 if(lar->bufflag & (LA_SHADBUF_AUTO_START|LA_SHADBUF_AUTO_END))
391 shadowbuf_autoclip(re, lar);
393 /* just to enforce identical behaviour of all irregular buffers */
394 if(lar->buftype==LA_SHADBUF_IRREGULAR)
397 /* matrices and window: in winmat the transformation is being put,
398 transforming from observer view to lamp view, including lamp window matrix */
400 angle= saacos(lar->spotsi);
401 temp= 0.5f*shb->size*cos(angle)/sin(angle);
402 shb->pixsize= (shb->d)/temp;
403 wsize= shb->pixsize*(shb->size/2.0);
405 i_window(-wsize, wsize, -wsize, wsize, shb->d, shb->clipend, shb->winmat);
406 Mat4MulMat4(shb->persmat, shb->viewmat, shb->winmat);
408 if(ELEM(lar->buftype, LA_SHADBUF_REGULAR, LA_SHADBUF_HALFWAY)) {
409 /* jitter, weights - not threadsafe! */
410 BLI_lock_thread(LOCK_CUSTOM1);
411 shb->jit= give_jitter_tab(get_render_shadow_samples(&re->r, shb->samp));
412 make_jitter_weight_tab(re, shb, lar->filtertype);
413 BLI_unlock_thread(LOCK_CUSTOM1);
415 shb->totbuf= lar->buffers;
416 if(shb->totbuf==4) jitbuf= give_jitter_tab(2);
417 else if(shb->totbuf==9) jitbuf= give_jitter_tab(3);
418 else jitbuf= twozero;
421 rectz= MEM_mapallocN(sizeof(int)*shb->size*shb->size, "makeshadbuf");
423 for(samples=0; samples<shb->totbuf; samples++) {
424 zbuffer_shadow(re, shb->persmat, lar, rectz, shb->size, jitbuf[2*samples], jitbuf[2*samples+1]);
425 /* create Z tiles (for compression): this system is 24 bits!!! */
426 compress_shadowbuf(shb, rectz, lar->mode & LA_SQUARE);
428 if(re->test_break(re->tbh))
434 /* printf("lampbuf %d\n", sizeoflampbuf(shb)); */
438 static void *do_shadow_thread(void *re_v)
440 Render *re= (Render*)re_v;
444 BLI_lock_thread(LOCK_CUSTOM1);
445 for(lar=re->lampren.first; lar; lar=lar->next) {
446 if(lar->shb && !lar->thread_assigned) {
447 lar->thread_assigned= 1;
451 BLI_unlock_thread(LOCK_CUSTOM1);
453 /* if type is irregular, this only sets the perspective matrix and autoclips */
455 makeshadowbuf(re, lar);
456 BLI_lock_thread(LOCK_CUSTOM1);
457 lar->thread_ready= 1;
458 BLI_unlock_thread(LOCK_CUSTOM1);
460 } while(lar && !re->test_break(re->tbh));
465 static volatile int g_break= 0;
466 static int thread_break(void *unused)
471 void threaded_makeshadowbufs(Render *re)
476 int (*test_break)(void *);
478 /* count number of threads to use */
480 for(lar=re->lampren.first; lar; lar= lar->next)
484 totthread= MIN2(totthread, re->r.threads);
487 totthread= 1; /* preview render */
490 for(lar=re->lampren.first; lar; lar= lar->next) {
491 if(re->test_break(re->tbh)) break;
493 /* if type is irregular, this only sets the perspective matrix and autoclips */
494 makeshadowbuf(re, lar);
499 /* swap test break function */
500 test_break= re->test_break;
501 re->test_break= thread_break;
503 for(lar=re->lampren.first; lar; lar= lar->next) {
504 lar->thread_assigned= 0;
505 lar->thread_ready= 0;
508 BLI_init_threads(&threads, do_shadow_thread, totthread);
510 for(a=0; a<totthread; a++)
511 BLI_insert_thread(&threads, re);
513 /* keep rendering as long as there are shadow buffers not ready */
515 if((g_break=test_break(re->tbh)))
520 BLI_lock_thread(LOCK_CUSTOM1);
521 for(lar=re->lampren.first; lar; lar= lar->next)
522 if(lar->shb && !lar->thread_ready)
524 BLI_unlock_thread(LOCK_CUSTOM1);
527 BLI_end_threads(&threads);
529 /* unset threadsafety */
530 re->test_break= test_break;
535 void freeshadowbuf(LampRen *lar)
538 ShadBuf *shb= lar->shb;
539 ShadSampleBuf *shsample;
542 v= (shb->size*shb->size)/256;
544 for(shsample= shb->buffers.first; shsample; shsample= shsample->next) {
545 intptr_t *ztile= shsample->zbuf;
546 char *ctile= shsample->cbuf;
548 for(b=0; b<v; b++, ztile++, ctile++)
549 if(*ctile) MEM_freeN((void *) *ztile);
551 MEM_freeN(shsample->zbuf);
552 MEM_freeN(shsample->cbuf);
554 BLI_freelistN(&shb->buffers);
556 if(shb->weight) MEM_freeN(shb->weight);
564 static int firstreadshadbuf(ShadBuf *shb, ShadSampleBuf *shsample, int **rz, int xs, int ys, int nr)
566 /* return a 1 if fully compressed shadbuf-tile && z==const */
570 /* always test borders of shadowbuffer */
571 if(xs<0) xs= 0; else if(xs>=shb->size) xs= shb->size-1;
572 if(ys<0) ys= 0; else if(ys>=shb->size) ys= shb->size-1;
575 ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
576 ct= shsample->cbuf+ofs;
579 *rz= *( (int **)(shsample->zbuf+ofs) );
582 else if(*rz!= *( (int **)(shsample->zbuf+ofs) )) return 0;
590 /* return 1.0 : fully in light */
591 static float readshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)
599 /* if(xs<0 || ys<0) return 1.0; */
600 /* if(xs>=shb->size || ys>=shb->size) return 1.0; */
602 /* always test borders of shadowbuffer */
603 if(xs<0) xs= 0; else if(xs>=shb->size) xs= shb->size-1;
604 if(ys<0) ys= 0; else if(ys>=shb->size) ys= shb->size-1;
607 ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
608 ct= shsample->cbuf+ofs;
609 rz= *( (int **)(shsample->zbuf+ofs) );
612 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
620 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
629 ct+= 4+16*(ys & 15)+(xs & 15);
637 /* got warning on this for 64 bits.... */
638 /* but it's working code! in this case rz is not a pointer but zvalue (ton) */
639 zsamp= GET_INT_FROM_POINTER(rz);
642 /* tricky stuff here; we use ints which can overflow easily with bias values */
644 if(zsamp > zs) return 1.0; /* absolute no shadow */
645 else if(zs < - 0x7FFFFE00 + bias) return 1.0; /* extreme close to clipstart */
646 else if(zsamp < zs-bias) return 0.0 ; /* absolute in shadow */
647 else { /* soft area */
649 temp= ( (float)(zs- zsamp) )/(float)bias;
650 return 1.0 - temp*temp;
655 /* the externally called shadow testing (reading) function */
656 /* return 1.0: no shadow at all */
657 float testshadowbuf(Render *re, ShadBuf *shb, float *rco, float *dxco, float *dyco, float inp, float mat_bias)
659 ShadSampleBuf *shsample;
660 float fac, co[4], dx[3], dy[3], shadfac=0.0f;
661 float xs1,ys1, siz, *jit, *weight, xres, yres, biasf;
662 int xs, ys, zs, bias, *rz;
665 /* crash preventer */
666 if(shb->buffers.first==NULL)
669 if(inp <= 0.0f) return 0.0f;
671 /* rotate renderco en osaco */
672 siz= 0.5f*(float)shb->size;
676 Mat4MulVec4fl(shb->persmat, co); /* rational hom co */
678 xs1= siz*(1.0f+co[0]/co[3]);
679 ys1= siz*(1.0f+co[1]/co[3]);
681 /* Clip for z: clipsta and clipend clip values of the shadow buffer. We
682 * can test for -1.0/1.0 because of the properties of the
683 * coordinate transformations. */
688 } else if(fac<= -1.0f) {
692 zs= ((float)0x7FFFFFFF)*fac;
694 /* take num*num samples, increase area with fac */
695 num= get_render_shadow_samples(&re->r, shb->samp);
699 if(mat_bias!=0.0f) biasf= shb->bias*mat_bias;
700 else biasf= shb->bias;
701 /* with inp==1.0, bias is half the size. correction value was 1.1, giving errors
702 on cube edges, with one side being almost frontal lighted (ton) */
703 bias= (1.5f-inp*inp)*biasf;
706 for(shsample= shb->buffers.first; shsample; shsample= shsample->next)
707 shadfac += readshadowbuf(shb, shsample, bias, (int)xs1, (int)ys1, zs);
709 return shadfac/(float)shb->totbuf;
712 /* calculate filter size */
713 co[0]= rco[0]+dxco[0];
714 co[1]= rco[1]+dxco[1];
715 co[2]= rco[2]+dxco[2];
717 Mat4MulVec4fl(shb->persmat,co); /* rational hom co */
718 dx[0]= xs1- siz*(1.0+co[0]/co[3]);
719 dx[1]= ys1- siz*(1.0+co[1]/co[3]);
721 co[0]= rco[0]+dyco[0];
722 co[1]= rco[1]+dyco[1];
723 co[2]= rco[2]+dyco[2];
725 Mat4MulVec4fl(shb->persmat,co); /* rational hom co */
726 dy[0]= xs1- siz*(1.0+co[0]/co[3]);
727 dy[1]= ys1- siz*(1.0+co[1]/co[3]);
729 xres= fac*( fabs(dx[0])+fabs(dy[0]) );
730 yres= fac*( fabs(dx[1])+fabs(dy[1]) );
731 if(xres<fac) xres= fac;
732 if(yres<fac) yres= fac;
737 if(xres<16.0f && yres<16.0f) {
738 shsample= shb->buffers.first;
739 if(firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)ys1, 0)) {
740 if(firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)ys1, 1)) {
741 if(firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)(ys1+yres), 1)) {
742 if(firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)(ys1+yres), 1)) {
743 return readshadowbuf(shb, shsample, bias,(int)xs1, (int)ys1, zs);
750 for(shsample= shb->buffers.first; shsample; shsample= shsample->next) {
754 for(a=num; a>0; a--, jit+=2, weight++) {
755 /* instead of jit i tried random: ugly! */
756 /* note: the plus 0.5 gives best sampling results, jit goes from -0.5 to 0.5 */
757 /* xs1 and ys1 are already corrected to be corner of sample area */
758 xs= xs1 + xres*(jit[0] + 0.5f);
759 ys= ys1 + yres*(jit[1] + 0.5f);
761 shadfac+= *weight * readshadowbuf(shb, shsample, bias, xs, ys, zs);
765 /* Renormalizes for the sample number: */
766 return shadfac/(float)shb->totbuf;
769 /* different function... sampling behind clipend can be LIGHT, bias is negative! */
771 static float readshadowbuf_halo(ShadBuf *shb, ShadSampleBuf *shsample, int xs, int ys, int zs)
775 int bias, zbias, zsamp;
778 /* negative! The other side is more important */
782 if(xs<0 || ys<0) return 0.0;
783 if(xs>=shb->size || ys>=shb->size) return 0.0;
786 ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
787 ct= shsample->cbuf+ofs;
788 rz= *( (int **)(shsample->zbuf+ofs) );
791 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
800 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
809 ct+= 4+16*(ys & 15)+(xs & 15);
818 /* still working code! (ton) */
819 zsamp= GET_INT_FROM_POINTER(rz);
822 /* NO schadow when sampled at 'eternal' distance */
824 if(zsamp >= 0x7FFFFE00) return 1.0;
826 if(zsamp > zs) return 1.0; /* absolute no shadww */
828 /* bias is negative, so the (zs-bias) can be beyond 0x7fffffff */
829 zbias= 0x7fffffff - zs;
831 if( zsamp < zs-bias) return 0.0 ; /* absolute in shadow */
833 else return 0.0 ; /* absolute shadow */
838 temp= ( (float)(zs- zsamp) )/(float)bias;
839 return 1.0 - temp*temp;
843 float shadow_halo(LampRen *lar, float *p1, float *p2)
845 /* p1 p2 already are rotated in spot-space */
846 ShadBuf *shb= lar->shb;
847 ShadSampleBuf *shsample;
849 float labda, labdao, labdax, labday, ldx, ldy;
850 float zf, xf1, yf1, zf1, xf2, yf2, zf2;
851 float count, lightcount;
852 int x, y, z, xs1, ys1;
855 siz= 0.5*(float)shb->size;
859 co[2]= p1[2]/lar->sh_zfac;
861 Mat4MulVec4fl(shb->winmat, co); /* rational hom co */
862 xf1= siz*(1.0+co[0]/co[3]);
863 yf1= siz*(1.0+co[1]/co[3]);
869 co[2]= p2[2]/lar->sh_zfac;
871 Mat4MulVec4fl(shb->winmat, co); /* rational hom co */
872 xf2= siz*(1.0+co[0]/co[3]);
873 yf2= siz*(1.0+co[1]/co[3]);
876 /* the 2dda (a pixel line formula) */
883 labdax= (xf1-xs1-1.0)/(xf1-xf2);
884 ldx= -shb->shadhalostep/(xf1-xf2);
885 dx= shb->shadhalostep;
888 labdax= (xf1-xs1)/(xf1-xf2);
889 ldx= shb->shadhalostep/(xf1-xf2);
890 dx= -shb->shadhalostep;
900 labday= (yf1-ys1-1.0)/(yf1-yf2);
901 ldy= -shb->shadhalostep/(yf1-yf2);
902 dy= shb->shadhalostep;
905 labday= (yf1-ys1)/(yf1-yf2);
906 ldy= shb->shadhalostep/(yf1-yf2);
907 dy= -shb->shadhalostep;
917 labda= count= lightcount= 0.0;
919 /* printf("start %x %x \n", (int)(0x7FFFFFFF*zf1), (int)(0x7FFFFFFF*zf2)); */
940 labda= MIN2(labdax, labday);
941 if(labda==labdao || labda>=1.0) break;
943 zf= zf1 + labda*(zf2-zf1);
944 count+= (float)shb->totbuf;
946 if(zf<= -1.0) lightcount += 1.0; /* close to the spot */
949 /* make sure, behind the clipend we extend halolines. */
950 if(zf>=1.0) z= 0x7FFFF000;
951 else z= (int)(0x7FFFF000*zf);
953 for(shsample= shb->buffers.first; shsample; shsample= shsample->next)
954 lightcount+= readshadowbuf_halo(shb, shsample, x, y, z);
959 if(count!=0.0) return (lightcount/count);
965 /* ********************* Irregular Shadow Buffer (ISB) ************* */
966 /* ********** storage of all view samples in a raster of lists ***** */
968 /* based on several articles describing this method, like:
969 The Irregular Z-Buffer and its Application to Shadow Mapping
970 Gregory S. Johnson - William R. Mark - Christopher A. Burns
972 Alias-Free Shadow Maps
973 Timo Aila and Samuli Laine
976 /* bsp structure (actually kd tree) */
978 #define BSPMAX_SAMPLE 128
979 #define BSPMAX_DEPTH 32
981 /* aligned with struct rctf */
982 typedef struct Boxf {
988 typedef struct ISBBranch {
989 struct ISBBranch *left, *right;
992 short totsamp, index, full, unused;
996 typedef struct BSPFace {
998 float *v1, *v2, *v3, *v4;
999 int obi; /* object for face lookup */
1000 int facenr; /* index to retrieve VlakRen */
1001 int type; /* only for strand now */
1002 short shad_alpha, is_full;
1004 /* strand caching data, optimize for point_behind_strand() */
1005 float radline, radline_end, len;
1006 float vec1[3], vec2[3], rc[3];
1009 /* boxes are in lamp projection */
1010 static void init_box(Boxf *box)
1012 box->xmin= 1000000.0f;
1014 box->ymin= 1000000.0f;
1016 box->zmin= 0x7FFFFFFF;
1017 box->zmax= - 0x7FFFFFFF;
1020 /* use v1 to calculate boundbox */
1021 static void bound_boxf(Boxf *box, float *v1)
1023 if(v1[0] < box->xmin) box->xmin= v1[0];
1024 if(v1[0] > box->xmax) box->xmax= v1[0];
1025 if(v1[1] < box->ymin) box->ymin= v1[1];
1026 if(v1[1] > box->ymax) box->ymax= v1[1];
1027 if(v1[2] < box->zmin) box->zmin= v1[2];
1028 if(v1[2] > box->zmax) box->zmax= v1[2];
1031 /* use v1 to calculate boundbox */
1032 static void bound_rectf(rctf *box, float *v1)
1034 if(v1[0] < box->xmin) box->xmin= v1[0];
1035 if(v1[0] > box->xmax) box->xmax= v1[0];
1036 if(v1[1] < box->ymin) box->ymin= v1[1];
1037 if(v1[1] > box->ymax) box->ymax= v1[1];
1041 /* halfway splitting, for initializing a more regular tree */
1042 static void isb_bsp_split_init(ISBBranch *root, MemArena *mem, int level)
1045 /* if level > 0 we create new branches and go deeper*/
1047 ISBBranch *left, *right;
1051 root->divider[0]= 0.5f*(root->box.xmin+root->box.xmax);
1052 root->divider[1]= 0.5f*(root->box.ymin+root->box.ymax);
1054 /* find best splitpoint */
1055 if(root->box.xmax-root->box.xmin > root->box.ymax-root->box.ymin)
1060 left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1061 right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1064 left->box= root->box;
1065 right->box= root->box;
1067 left->box.xmax= root->divider[0];
1068 right->box.xmin= root->divider[0];
1071 left->box.ymax= root->divider[1];
1072 right->box.ymin= root->divider[1];
1074 isb_bsp_split_init(left, mem, level-1);
1075 isb_bsp_split_init(right, mem, level-1);
1078 /* we add sample array */
1079 root->samples= BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
1083 /* note; if all samples on same location we just spread them over 2 new branches */
1084 static void isb_bsp_split(ISBBranch *root, MemArena *mem)
1086 ISBBranch *left, *right;
1087 ISBSample *samples[BSPMAX_SAMPLE];
1091 root->divider[0]= root->divider[1]= 0.0f;
1092 for(a=BSPMAX_SAMPLE-1; a>=0; a--) {
1093 root->divider[0]+= root->samples[a]->zco[0];
1094 root->divider[1]+= root->samples[a]->zco[1];
1096 root->divider[0]/= BSPMAX_SAMPLE;
1097 root->divider[1]/= BSPMAX_SAMPLE;
1099 /* find best splitpoint */
1100 if(root->box.xmax-root->box.xmin > root->box.ymax-root->box.ymin)
1106 left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1107 right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1109 /* new sample array */
1110 left->samples= BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
1111 right->samples= samples; // tmp
1114 for(a=BSPMAX_SAMPLE-1; a>=0; a--) {
1116 /* this prevents adding samples all to 1 branch when divider is equal to samples */
1117 if(root->samples[a]->zco[i] == root->divider[i])
1119 else if(root->samples[a]->zco[i] < root->divider[i])
1123 left->samples[left->totsamp]= root->samples[a];
1127 right->samples[right->totsamp]= root->samples[a];
1132 /* copy samples from tmp */
1133 memcpy(root->samples, samples, right->totsamp*(sizeof(void *)));
1134 right->samples= root->samples;
1135 root->samples= NULL;
1138 left->box= root->box;
1139 right->box= root->box;
1141 left->box.xmax= root->divider[0];
1142 right->box.xmin= root->divider[0];
1145 left->box.ymax= root->divider[1];
1146 right->box.ymin= root->divider[1];
1150 /* inserts sample in main tree, also splits on threshold */
1151 /* returns 1 if error */
1152 static int isb_bsp_insert(ISBBranch *root, MemArena *memarena, ISBSample *sample)
1154 ISBBranch *bspn= root;
1155 float *zco= sample->zco;
1158 /* debug counter, also used to check if something was filled in ever */
1161 /* going over branches until last one found */
1163 if(zco[bspn->index] <= bspn->divider[bspn->index])
1169 /* bspn now is the last branch */
1171 if(bspn->totsamp==BSPMAX_SAMPLE) {
1172 printf("error in bsp branch\n"); /* only for debug, cannot happen */
1177 bspn->samples[bspn->totsamp]= sample;
1180 /* split if allowed and needed */
1181 if(bspn->totsamp==BSPMAX_SAMPLE) {
1182 if(i==BSPMAX_DEPTH) {
1183 bspn->totsamp--; /* stop filling in... will give errors */
1186 isb_bsp_split(bspn, memarena);
1191 static float VecLen2f( float *v1, float *v2)
1193 float x= v1[0]-v2[0];
1194 float y= v1[1]-v2[1];
1195 return (float)sqrt(x*x+y*y);
1198 /* initialize vars in face, for optimal point-in-face test */
1199 static void bspface_init_strand(BSPFace *face)
1202 face->radline= 0.5f*VecLen2f(face->v1, face->v2);
1204 VecMidf(face->vec1, face->v1, face->v2);
1206 VecMidf(face->vec2, face->v3, face->v4);
1208 VECCOPY(face->vec2, face->v3);
1210 face->rc[0]= face->vec2[0]-face->vec1[0];
1211 face->rc[1]= face->vec2[1]-face->vec1[1];
1212 face->rc[2]= face->vec2[2]-face->vec1[2];
1214 face->len= face->rc[0]*face->rc[0]+ face->rc[1]*face->rc[1];
1216 if(face->len!=0.0f) {
1217 face->radline_end= face->radline/sqrt(face->len);
1218 face->len= 1.0f/face->len;
1222 /* brought back to a simple 2d case */
1223 static int point_behind_strand(float *p, BSPFace *face)
1225 /* v1 - v2 is radius, v1 - v3 length */
1226 float dist, rc[2], pt[2];
1228 /* using code from PdistVL2Dfl(), distance vec to line-piece */
1230 if(face->len==0.0f) {
1231 rc[0]= p[0]-face->vec1[0];
1232 rc[1]= p[1]-face->vec1[1];
1233 dist= (float)(sqrt(rc[0]*rc[0]+ rc[1]*rc[1]));
1235 if(dist < face->radline)
1239 float labda= ( face->rc[0]*(p[0]-face->vec1[0]) + face->rc[1]*(p[1]-face->vec1[1]) )*face->len;
1241 if(labda > -face->radline_end && labda < 1.0f+face->radline_end) {
1242 /* hesse for dist: */
1243 //dist= (float)(fabs( (p[0]-vec2[0])*rc[1] + (p[1]-vec2[1])*rc[0])/len);
1245 pt[0]= labda*face->rc[0]+face->vec1[0];
1246 pt[1]= labda*face->rc[1]+face->vec1[1];
1250 dist= (float)sqrt(rc[0]*rc[0]+ rc[1]*rc[1]);
1252 if(dist < face->radline) {
1253 float zval= face->vec1[2] + labda*face->rc[2];
1263 /* return 1 if inside. code derived from src/parametrizer.c */
1264 static int point_behind_tria2d(float *p, float *v1, float *v2, float *v3)
1266 float a[2], c[2], h[2], div;
1269 a[0] = v2[0] - v1[0];
1270 a[1] = v2[1] - v1[1];
1271 c[0] = v3[0] - v1[0];
1272 c[1] = v3[1] - v1[1];
1274 div = a[0]*c[1] - a[1]*c[0];
1278 h[0] = p[0] - v1[0];
1279 h[1] = p[1] - v1[1];
1283 u = (h[0]*c[1] - h[1]*c[0])*div;
1285 v = (a[0]*h[1] - a[1]*h[0])*div;
1287 if( u + v <= 1.0f) {
1288 /* inside, now check if point p is behind */
1289 float z= (1.0f-u-v)*v1[2] + u*v2[2] + v*v3[2];
1300 /* tested these calls, but it gives inaccuracy, 'side' cannot be found reliably using v3 */
1302 /* check if line v1-v2 has all rect points on other side of point v3 */
1303 static int rect_outside_line(rctf *rect, float *v1, float *v2, float *v3)
1308 /* line formula for v1-v2 */
1311 c= -a*v1[0] - b*v1[1];
1312 side= a*v3[0] + b*v3[1] + c < 0.0f;
1314 /* the four quad points */
1315 if( side==(rect->xmin*a + rect->ymin*b + c >= 0.0f) )
1316 if( side==(rect->xmax*a + rect->ymin*b + c >= 0.0f) )
1317 if( side==(rect->xmax*a + rect->ymax*b + c >= 0.0f) )
1318 if( side==(rect->xmin*a + rect->ymax*b + c >= 0.0f) )
1323 /* check if one of the triangle edges separates all rect points on 1 side */
1324 static int rect_isect_tria(rctf *rect, float *v1, float *v2, float *v3)
1326 if(rect_outside_line(rect, v1, v2, v3))
1328 if(rect_outside_line(rect, v2, v3, v1))
1330 if(rect_outside_line(rect, v3, v1, v2))
1336 /* if face overlaps a branch, it executes func. recursive */
1337 static void isb_bsp_face_inside(ISBBranch *bspn, BSPFace *face)
1340 /* are we descending? */
1342 /* hrmf, the box struct cannot be addressed with index */
1343 if(bspn->index==0) {
1344 if(face->box.xmin <= bspn->divider[0])
1345 isb_bsp_face_inside(bspn->left, face);
1346 if(face->box.xmax > bspn->divider[0])
1347 isb_bsp_face_inside(bspn->right, face);
1350 if(face->box.ymin <= bspn->divider[1])
1351 isb_bsp_face_inside(bspn->left, face);
1352 if(face->box.ymax > bspn->divider[1])
1353 isb_bsp_face_inside(bspn->right, face);
1357 /* else: end branch reached */
1360 if(bspn->totsamp==0) return;
1362 /* check for nodes entirely in shadow, can be skipped */
1363 if(bspn->totsamp==bspn->full)
1366 /* if bsp node is entirely in front of face, give up */
1367 if(bspn->box.zmax < face->box.zmin)
1370 /* if face boundbox is outside of branch rect, give up */
1371 if(0==BLI_isect_rctf((rctf *)&face->box, (rctf *)&bspn->box, NULL))
1374 /* test all points inside branch */
1375 for(a=bspn->totsamp-1; a>=0; a--) {
1376 ISBSample *samp= bspn->samples[a];
1378 if((samp->facenr!=face->facenr || samp->obi!=face->obi) && samp->shadfac) {
1379 if(face->box.zmin < samp->zco[2]) {
1380 if(BLI_in_rctf((rctf *)&face->box, samp->zco[0], samp->zco[1])) {
1384 if(point_behind_strand(samp->zco, face))
1387 else if( point_behind_tria2d(samp->zco, face->v1, face->v2, face->v3))
1389 else if(face->v4 && point_behind_tria2d(samp->zco, face->v1, face->v3, face->v4))
1393 *(samp->shadfac) += face->shad_alpha;
1394 /* optimize; is_full means shad_alpha==4096 */
1395 if(*(samp->shadfac) >= 4096 || face->is_full) {
1397 samp->shadfac= NULL;
1407 /* based on available samples, recalculate the bounding box for bsp nodes, recursive */
1408 static void isb_bsp_recalc_box(ISBBranch *root)
1411 isb_bsp_recalc_box(root->left);
1412 isb_bsp_recalc_box(root->right);
1414 else if(root->totsamp) {
1417 init_box(&root->box);
1418 for(a=root->totsamp-1; a>=0; a--)
1419 bound_boxf(&root->box, root->samples[a]->zco);
1423 /* callback function for zbuf clip */
1424 static void isb_bsp_test_strand(ZSpan *zspan, int obi, int zvlnr, float *v1, float *v2, float *v3, float *v4)
1433 face.facenr= zvlnr & ~RE_QUAD_OFFS;
1434 face.type= R_STRAND;
1436 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
1438 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
1440 face.is_full= (zspan->shad_alpha==1.0f);
1442 /* setup boundbox */
1443 init_box(&face.box);
1444 bound_boxf(&face.box, v1);
1445 bound_boxf(&face.box, v2);
1446 bound_boxf(&face.box, v3);
1448 bound_boxf(&face.box, v4);
1450 /* optimize values */
1451 bspface_init_strand(&face);
1453 isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
1457 /* callback function for zbuf clip */
1458 static void isb_bsp_test_face(ZSpan *zspan, int obi, int zvlnr, float *v1, float *v2, float *v3, float *v4)
1467 face.facenr= zvlnr & ~RE_QUAD_OFFS;
1470 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
1472 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
1474 face.is_full= (zspan->shad_alpha==1.0f);
1476 /* setup boundbox */
1477 init_box(&face.box);
1478 bound_boxf(&face.box, v1);
1479 bound_boxf(&face.box, v2);
1480 bound_boxf(&face.box, v3);
1482 bound_boxf(&face.box, v4);
1484 isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
1487 static int testclip_minmax(float *ho, float *minmax)
1492 if( ho[0] > minmax[1]*wco) flag = 1;
1493 else if( ho[0]< minmax[0]*wco) flag = 2;
1495 if( ho[1] > minmax[3]*wco) flag |= 4;
1496 else if( ho[1]< minmax[2]*wco) flag |= 8;
1501 /* main loop going over all faces and check in bsp overlaps, fill in shadfac values */
1502 static void isb_bsp_fillfaces(Render *re, LampRen *lar, ISBBranch *root)
1504 ObjectInstanceRen *obi;
1506 ShadBuf *shb= lar->shb;
1507 ZSpan zspan, zspanstrand;
1510 float minmaxf[4], winmat[4][4];
1511 int size= shb->size;
1512 int i, a, ok=1, lay= -1;
1514 /* further optimize, also sets minz maxz */
1515 isb_bsp_recalc_box(root);
1517 /* extra clipping for minmax */
1518 minmaxf[0]= (2.0f*root->box.xmin - size-2.0f)/size;
1519 minmaxf[1]= (2.0f*root->box.xmax - size+2.0f)/size;
1520 minmaxf[2]= (2.0f*root->box.ymin - size-2.0f)/size;
1521 minmaxf[3]= (2.0f*root->box.ymax - size+2.0f)/size;
1523 if(lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay;
1525 /* (ab)use zspan, since we use zbuffer clipping code */
1526 zbuf_alloc_span(&zspan, size, size, re->clipcrop);
1528 zspan.zmulx= ((float)size)/2.0f;
1529 zspan.zmuly= ((float)size)/2.0f;
1533 /* pass on bsp root to zspan */
1534 zspan.rectz= (int *)root;
1536 /* filling methods */
1538 // zspan.zbuflinefunc= zbufline_onlyZ;
1539 zspan.zbuffunc= isb_bsp_test_face;
1540 zspanstrand.zbuffunc= isb_bsp_test_strand;
1542 for(i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
1545 if(obi->flag & R_TRANSFORMED)
1546 Mat4MulMat4(winmat, obi->mat, shb->persmat);
1548 Mat4CpyMat4(winmat, shb->persmat);
1550 for(a=0; a<obr->totvlak; a++) {
1552 if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
1555 /* note, these conditions are copied in shadowbuf_autoclip() */
1559 if((ma->mode & MA_SHADBUF)==0) ok= 0;
1560 if(ma->material_type == MA_TYPE_WIRE) ok= 0;
1561 zspanstrand.shad_alpha= zspan.shad_alpha= ma->shad_alpha;
1564 if(ok && (obi->lay & lay)) {
1566 int c1, c2, c3, c4=0;
1567 int d1, d2, d3, d4=0;
1570 /* create hocos per face, it is while render */
1571 projectvert(vlr->v1->co, winmat, hoco[0]); d1= testclip_minmax(hoco[0], minmaxf);
1572 projectvert(vlr->v2->co, winmat, hoco[1]); d2= testclip_minmax(hoco[1], minmaxf);
1573 projectvert(vlr->v3->co, winmat, hoco[2]); d3= testclip_minmax(hoco[2], minmaxf);
1575 projectvert(vlr->v4->co, winmat, hoco[3]); d4= testclip_minmax(hoco[3], minmaxf);
1578 /* minmax clipping */
1579 if(vlr->v4) partclip= d1 & d2 & d3 & d4;
1580 else partclip= d1 & d2 & d3;
1584 /* window clipping */
1585 c1= testclip(hoco[0]);
1586 c2= testclip(hoco[1]);
1587 c3= testclip(hoco[2]);
1589 c4= testclip(hoco[3]);
1591 /* ***** NO WIRE YET */
1592 if(ma->material_type == MA_TYPE_WIRE) {
1594 zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
1596 zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], 0, c1, c2, c3, 0);
1599 if(vlr->flag & R_STRAND)
1600 zbufclip4(&zspanstrand, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
1602 zbufclip4(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
1605 zbufclip(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], c1, c2, c3);
1612 zbuf_free_span(&zspan);
1615 /* returns 1 when the viewpixel is visible in lampbuffer */
1616 static int viewpixel_to_lampbuf(ShadBuf *shb, ObjectInstanceRen *obi, VlakRen *vlr, float x, float y, float *co)
1618 float hoco[4], v1[3], nor[3];
1619 float dface, fac, siz;
1621 RE_vlakren_get_normal(&R, obi, vlr, nor);
1622 VECCOPY(v1, vlr->v1->co);
1623 if(obi->flag & R_TRANSFORMED)
1624 Mat4MulVecfl(obi->mat, v1);
1626 /* from shadepixel() */
1627 dface= v1[0]*nor[0] + v1[1]*nor[1] + v1[2]*nor[2];
1630 /* ortho viewplane cannot intersect using view vector originating in (0,0,0) */
1631 if(R.r.mode & R_ORTHO) {
1632 /* x and y 3d coordinate can be derived from pixel coord and winmat */
1633 float fx= 2.0/(R.winx*R.winmat[0][0]);
1634 float fy= 2.0/(R.winy*R.winmat[1][1]);
1636 hoco[0]= (x - 0.5*R.winx)*fx - R.winmat[3][0]/R.winmat[0][0];
1637 hoco[1]= (y - 0.5*R.winy)*fy - R.winmat[3][1]/R.winmat[1][1];
1639 /* using a*x + b*y + c*z = d equation, (a b c) is normal */
1641 hoco[2]= (dface - nor[0]*hoco[0] - nor[1]*hoco[1])/nor[2];
1648 calc_view_vector(view, x, y);
1650 div= nor[0]*view[0] + nor[1]*view[1] + nor[2]*view[2];
1656 hoco[0]= fac*view[0];
1657 hoco[1]= fac*view[1];
1658 hoco[2]= fac*view[2];
1661 /* move 3d vector to lampbuf */
1662 Mat4MulVec4fl(shb->persmat, hoco); /* rational hom co */
1664 /* clip We can test for -1.0/1.0 because of the properties of the
1665 * coordinate transformations. */
1667 if(hoco[0]<-fac || hoco[0]>fac)
1669 if(hoco[1]<-fac || hoco[1]>fac)
1671 if(hoco[2]<-fac || hoco[2]>fac)
1674 siz= 0.5f*(float)shb->size;
1675 co[0]= siz*(1.0f+hoco[0]/hoco[3]) -0.5f;
1676 co[1]= siz*(1.0f+hoco[1]/hoco[3]) -0.5f;
1677 co[2]= ((float)0x7FFFFFFF)*(hoco[2]/hoco[3]);
1679 /* XXXX bias, much less than normal shadbuf, or do we need a constant? */
1680 co[2] -= 0.05f*shb->bias;
1685 /* storage of shadow results, solid osa and transp case */
1686 static void isb_add_shadfac(ISBShadfacA **isbsapp, MemArena *mem, int obi, int facenr, short shadfac, short samples)
1691 /* in osa case, the samples were filled in with factor 1.0/R.osa. if fewer samples we have to correct */
1693 shadfacf= ((float)shadfac*R.osa)/(4096.0*samples);
1695 shadfacf= ((float)shadfac)/(4096.0);
1697 new= BLI_memarena_alloc(mem, sizeof(ISBShadfacA));
1699 new->facenr= facenr & ~RE_QUAD_OFFS;
1700 new->shadfac= shadfacf;
1702 new->next= (*isbsapp);
1709 /* adding samples, solid case */
1710 static int isb_add_samples(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSample **samplebuf)
1712 int xi, yi, *xcos, *ycos;
1713 int sample, bsp_err= 0;
1715 /* bsp split doesn't like to handle regular sequenes */
1716 xcos= MEM_mallocN( pa->rectx*sizeof(int), "xcos");
1717 ycos= MEM_mallocN( pa->recty*sizeof(int), "ycos");
1718 for(xi=0; xi<pa->rectx; xi++)
1720 for(yi=0; yi<pa->recty; yi++)
1722 BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
1723 BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
1725 for(sample=0; sample<(R.osa?R.osa:1); sample++) {
1726 ISBSample *samp= samplebuf[sample], *samp1;
1728 for(yi=0; yi<pa->recty; yi++) {
1730 for(xi=0; xi<pa->rectx; xi++) {
1732 samp1= samp + y*pa->rectx + x;
1734 bsp_err |= isb_bsp_insert(root, memarena, samp1);
1747 /* lar->shb, pa->rectz and pa->rectp should exist */
1748 static void isb_make_buffer(RenderPart *pa, LampRen *lar)
1750 ShadBuf *shb= lar->shb;
1752 ISBSample *samp, *samplebuf[16]; /* should be RE_MAX_OSA */
1756 int *recto, *rectp, x, y, sindex, sample, bsp_err=0;
1758 /* storage for shadow, per thread */
1759 isbdata= shb->isb_result[pa->thread];
1761 /* to map the shi->xs and ys coordinate */
1762 isbdata->minx= pa->disprect.xmin;
1763 isbdata->miny= pa->disprect.ymin;
1764 isbdata->rectx= pa->rectx;
1765 isbdata->recty= pa->recty;
1767 /* branches are added using memarena (32k branches) */
1768 memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch));
1769 BLI_memarena_use_calloc(memarena);
1771 /* samplebuf is in camera view space (pixels) */
1772 for(sample=0; sample<(R.osa?R.osa:1); sample++)
1773 samplebuf[sample]= MEM_callocN(sizeof(ISBSample)*pa->rectx*pa->recty, "isb samplebuf");
1775 /* for end result, ISBSamples point to this in non OSA case, otherwise to pixstruct->shadfac */
1777 isbdata->shadfacs= MEM_callocN(pa->rectx*pa->recty*sizeof(short), "isb shadfacs");
1779 /* setup bsp root */
1780 memset(&root, 0, sizeof(ISBBranch));
1781 root.box.xmin= (float)shb->size;
1782 root.box.ymin= (float)shb->size;
1784 /* create the sample buffers */
1785 for(sindex=0, y=0; y<pa->recty; y++) {
1786 for(x=0; x<pa->rectx; x++, sindex++) {
1788 /* this makes it a long function, but splitting it out would mean 10+ arguments */
1789 /* first check OSA case */
1791 rd= pa->rectdaps + sindex;
1793 float xs= (float)(x + pa->disprect.xmin);
1794 float ys= (float)(y + pa->disprect.ymin);
1796 for(sample=0; sample<R.osa; sample++) {
1797 PixStr *ps= (PixStr *)(*rd);
1798 int mask= (1<<sample);
1805 if(ps && ps->facenr>0) {
1806 ObjectInstanceRen *obi= &R.objectinstance[ps->obi];
1807 ObjectRen *obr= obi->obr;
1808 VlakRen *vlr= RE_findOrAddVlak(obr, (ps->facenr-1) & RE_QUAD_MASK);
1810 samp= samplebuf[sample] + sindex;
1811 /* convert image plane pixel location to lamp buffer space */
1812 if(viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], samp->zco)) {
1814 samp->facenr= ps->facenr & ~RE_QUAD_OFFS;
1816 samp->shadfac= &ps->shadfac;
1817 bound_rectf((rctf *)&root.box, samp->zco);
1824 rectp= pa->rectp + sindex;
1825 recto= pa->recto + sindex;
1827 ObjectInstanceRen *obi= &R.objectinstance[*recto];
1828 ObjectRen *obr= obi->obr;
1829 VlakRen *vlr= RE_findOrAddVlak(obr, (*rectp-1) & RE_QUAD_MASK);
1830 float xs= (float)(x + pa->disprect.xmin);
1831 float ys= (float)(y + pa->disprect.ymin);
1833 samp= samplebuf[0] + sindex;
1834 /* convert image plane pixel location to lamp buffer space */
1835 if(viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, samp->zco)) {
1837 samp->facenr= *rectp & ~RE_QUAD_OFFS;
1838 samp->shadfac= isbdata->shadfacs + sindex;
1839 bound_rectf((rctf *)&root.box, samp->zco);
1846 /* simple method to see if we have samples */
1847 if(root.box.xmin != (float)shb->size) {
1848 /* now create a regular split, root.box has the initial bounding box of all pixels */
1849 /* split bsp 8 levels deep, in regular grid (16 x 16) */
1850 isb_bsp_split_init(&root, memarena, 8);
1852 /* insert all samples in BSP now */
1853 bsp_err= isb_add_samples(pa, &root, memarena, samplebuf);
1856 /* go over all faces and fill in shadow values */
1858 isb_bsp_fillfaces(&R, lar, &root); /* shb->persmat should have been calculated */
1860 /* copy shadow samples to persistant buffer, reduce memory overhead */
1862 ISBShadfacA **isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
1864 isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA));
1865 BLI_memarena_use_calloc(isbdata->memarena);
1867 for(rd= pa->rectdaps, x=pa->rectx*pa->recty; x>0; x--, rd++, isbsa++) {
1870 PixStr *ps= (PixStr *)(*rd);
1873 isb_add_shadfac(isbsa, isbdata->memarena, ps->obi, ps->facenr, ps->shadfac, count_mask(ps->mask));
1882 if(isbdata->shadfacs) {
1883 MEM_freeN(isbdata->shadfacs);
1884 isbdata->shadfacs= NULL;
1889 BLI_memarena_free(memarena);
1892 for(x=0; x<(R.osa?R.osa:1); x++)
1893 MEM_freeN(samplebuf[x]);
1895 if(bsp_err) printf("error in filling bsp\n");
1898 /* add sample to buffer, isbsa is the root sample in a buffer */
1899 static ISBSampleA *isb_alloc_sample_transp(ISBSampleA **isbsa, MemArena *mem)
1903 new= BLI_memarena_alloc(mem, sizeof(ISBSampleA));
1905 new->next= (*isbsa);
1913 /* adding samples in BSP, transparent case */
1914 static int isb_add_samples_transp(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSampleA ***samplebuf)
1916 int xi, yi, *xcos, *ycos;
1917 int sample, bsp_err= 0;
1919 /* bsp split doesn't like to handle regular sequenes */
1920 xcos= MEM_mallocN( pa->rectx*sizeof(int), "xcos");
1921 ycos= MEM_mallocN( pa->recty*sizeof(int), "ycos");
1922 for(xi=0; xi<pa->rectx; xi++)
1924 for(yi=0; yi<pa->recty; yi++)
1926 BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
1927 BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
1929 for(sample=0; sample<(R.osa?R.osa:1); sample++) {
1930 ISBSampleA **samp= samplebuf[sample], *samp1;
1932 for(yi=0; yi<pa->recty; yi++) {
1934 for(xi=0; xi<pa->rectx; xi++) {
1937 samp1= *(samp + y*pa->rectx + x);
1939 bsp_err |= isb_bsp_insert(root, memarena, (ISBSample *)samp1);
1954 /* Ztransp version */
1955 /* lar->shb, pa->rectz and pa->rectp should exist */
1956 static void isb_make_buffer_transp(RenderPart *pa, APixstr *apixbuf, LampRen *lar)
1958 ShadBuf *shb= lar->shb;
1960 ISBSampleA *samp, **samplebuf[16]; /* MAX_OSA */
1964 int x, y, sindex, sample, bsp_err=0;
1966 /* storage for shadow, per thread */
1967 isbdata= shb->isb_result[pa->thread];
1969 /* to map the shi->xs and ys coordinate */
1970 isbdata->minx= pa->disprect.xmin;
1971 isbdata->miny= pa->disprect.ymin;
1972 isbdata->rectx= pa->rectx;
1973 isbdata->recty= pa->recty;
1975 /* branches are added using memarena (32k branches) */
1976 memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch));
1977 BLI_memarena_use_calloc(memarena);
1979 /* samplebuf is in camera view space (pixels) */
1980 for(sample=0; sample<(R.osa?R.osa:1); sample++)
1981 samplebuf[sample]= MEM_callocN(sizeof(void *)*pa->rectx*pa->recty, "isb alpha samplebuf");
1983 /* setup bsp root */
1984 memset(&root, 0, sizeof(ISBBranch));
1985 root.box.xmin= (float)shb->size;
1986 root.box.ymin= (float)shb->size;
1988 /* create the sample buffers */
1989 for(ap= apixbuf, sindex=0, y=0; y<pa->recty; y++) {
1990 for(x=0; x<pa->rectx; x++, sindex++, ap++) {
1994 float xs= (float)(x + pa->disprect.xmin);
1995 float ys= (float)(y + pa->disprect.ymin);
1997 for(apn=ap; apn; apn= apn->next) {
1999 for(a=0; a<4; a++) {
2001 ObjectInstanceRen *obi= &R.objectinstance[apn->obi[a]];
2002 ObjectRen *obr= obi->obr;
2003 VlakRen *vlr= RE_findOrAddVlak(obr, (apn->p[a]-1) & RE_QUAD_MASK);
2006 /* here we store shadfac, easier to create the end storage buffer. needs zero'ed, multiple shadowbufs use it */
2010 for(sample=0; sample<R.osa; sample++) {
2011 int mask= (1<<sample);
2013 if(apn->mask[a] & mask) {
2015 /* convert image plane pixel location to lamp buffer space */
2016 if(viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], zco)) {
2017 samp= isb_alloc_sample_transp(samplebuf[sample] + sindex, memarena);
2018 samp->obi= apn->obi[a];
2019 samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
2020 samp->shadfac= &apn->shadfac[a];
2022 VECCOPY(samp->zco, zco);
2023 bound_rectf((rctf *)&root.box, samp->zco);
2030 /* convert image plane pixel location to lamp buffer space */
2031 if(viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, zco)) {
2033 samp= isb_alloc_sample_transp(samplebuf[0] + sindex, memarena);
2034 samp->obi= apn->obi[a];
2035 samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
2036 samp->shadfac= &apn->shadfac[a];
2038 VECCOPY(samp->zco, zco);
2039 bound_rectf((rctf *)&root.box, samp->zco);
2049 /* simple method to see if we have samples */
2050 if(root.box.xmin != (float)shb->size) {
2051 /* now create a regular split, root.box has the initial bounding box of all pixels */
2052 /* split bsp 8 levels deep, in regular grid (16 x 16) */
2053 isb_bsp_split_init(&root, memarena, 8);
2055 /* insert all samples in BSP now */
2056 bsp_err= isb_add_samples_transp(pa, &root, memarena, samplebuf);
2059 ISBShadfacA **isbsa;
2061 /* go over all faces and fill in shadow values */
2062 isb_bsp_fillfaces(&R, lar, &root); /* shb->persmat should have been calculated */
2064 /* copy shadow samples to persistant buffer, reduce memory overhead */
2065 isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
2067 isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA));
2069 for(ap= apixbuf, x=pa->rectx*pa->recty; x>0; x--, ap++, isbsa++) {
2073 for(apn=ap; apn; apn= apn->next) {
2075 for(a=0; a<4; a++) {
2076 if(apn->p[a] && apn->shadfac[a]) {
2078 isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], count_mask(apn->mask[a]));
2080 isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], 0);
2090 BLI_memarena_free(memarena);
2093 for(x=0; x<(R.osa?R.osa:1); x++)
2094 MEM_freeN(samplebuf[x]);
2096 if(bsp_err) printf("error in filling bsp\n");
2103 /* returns amount of light (1.0 = no shadow) */
2104 /* note, shadepixel() rounds the coordinate, not the real sample info */
2105 float ISB_getshadow(ShadeInput *shi, ShadBuf *shb)
2107 /* if raytracing, we can't accept irregular shadow */
2109 ISBData *isbdata= shb->isb_result[shi->thread];
2112 if(isbdata->shadfacs || isbdata->shadfaca) {
2113 int x= shi->xs - isbdata->minx;
2115 if(x >= 0 && x < isbdata->rectx) {
2116 int y= shi->ys - isbdata->miny;
2118 if(y >= 0 && y < isbdata->recty) {
2119 if(isbdata->shadfacs) {
2120 short *sp= isbdata->shadfacs + y*isbdata->rectx + x;
2121 return *sp>=4096?0.0f:1.0f - ((float)*sp)/4096.0f;
2124 int sindex= y*isbdata->rectx + x;
2125 int obi= shi->obi - R.objectinstance;
2126 ISBShadfacA *isbsa= *(isbdata->shadfaca + sindex);
2129 if(isbsa->facenr==shi->facenr+1 && isbsa->obi==obi)
2130 return isbsa->shadfac>=1.0f?0.0f:1.0f - isbsa->shadfac;
2142 /* part is supposed to be solid zbuffered (apixbuf==NULL) or transparent zbuffered */
2143 void ISB_create(RenderPart *pa, APixstr *apixbuf)
2147 /* go over all lamps, and make the irregular buffers */
2148 for(go=R.lights.first; go; go= go->next) {
2149 LampRen *lar= go->lampren;
2151 if(lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
2153 /* create storage for shadow, per thread */
2154 lar->shb->isb_result[pa->thread]= MEM_callocN(sizeof(ISBData), "isb data");
2157 isb_make_buffer_transp(pa, apixbuf, lar);
2159 isb_make_buffer(pa, lar);
2165 /* end of part rendering, free stored shadow data for this thread from all lamps */
2166 void ISB_free(RenderPart *pa)
2170 /* go over all lamps, and free the irregular buffers */
2171 for(go=R.lights.first; go; go= go->next) {
2172 LampRen *lar= go->lampren;
2174 if(lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
2175 ISBData *isbdata= lar->shb->isb_result[pa->thread];
2178 if(isbdata->shadfacs)
2179 MEM_freeN(isbdata->shadfacs);
2180 if(isbdata->shadfaca)
2181 MEM_freeN(isbdata->shadfaca);
2183 if(isbdata->memarena)
2184 BLI_memarena_free(isbdata->memarena);
2187 lar->shb->isb_result[pa->thread]= NULL;