Render control feature: shader-level shadowbuffer bias
[blender.git] / source / blender / render / intern / source / shadbuf.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
17  *
18  * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
19  * All rights reserved.
20  *
21  * Contributor(s): 2004-2006, Blender Foundation
22  *
23  * ***** END GPL LICENSE BLOCK *****
24  */
25
26 #include <math.h>
27 #include <string.h>
28
29 #include "MTC_matrixops.h"
30 #include "MEM_guardedalloc.h"
31
32 #include "DNA_group_types.h"
33 #include "DNA_lamp_types.h"
34 #include "DNA_material_types.h"
35
36 #include "BKE_global.h"
37 #include "BKE_utildefines.h"
38
39 #include "BLI_arithb.h"
40 #include "BLI_blenlib.h"
41 #include "BLI_jitter.h"
42 #include "BLI_memarena.h"
43 #include "BLI_rand.h"
44
45 #include "PIL_time.h"
46
47 #include "renderpipeline.h"
48 #include "render_types.h"
49 #include "renderdatabase.h"
50 #include "rendercore.h"
51
52 #include "shadbuf.h"
53 #include "zbuf.h"
54
55 /* XXX, could be better implemented... this is for endian issues
56 */
57 #if defined(__sgi) || defined(__sparc) || defined(__sparc__) || defined (__PPC__) || defined (__ppc__) || defined (__BIG_ENDIAN__)
58 #define RCOMP   3
59 #define GCOMP   2
60 #define BCOMP   1
61 #define ACOMP   0
62 #else
63 #define RCOMP   0
64 #define GCOMP   1
65 #define BCOMP   2
66 #define ACOMP   3
67 #endif
68
69 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
70 /* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
71 /* only to be used here in this file, it's for speed */
72 extern struct Render R;
73 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
74
75 /* ------------------------------------------------------------------------- */
76
77 /* initshadowbuf() in convertBlenderScene.c */
78
79 /* ------------------------------------------------------------------------- */
80
81 static void copy_to_ztile(int *rectz, int size, int x1, int y1, int tile, char *r1)
82 {
83         int len4, *rz;  
84         int x2, y2;
85         
86         x2= x1+tile;
87         y2= y1+tile;
88         if(x2>=size) x2= size-1;
89         if(y2>=size) y2= size-1;
90
91         if(x1>=x2 || y1>=y2) return;
92
93         len4= 4*(x2- x1);
94         rz= rectz + size*y1 + x1;
95         for(; y1<y2; y1++) {
96                 memcpy(r1, rz, len4);
97                 rz+= size;
98                 r1+= len4;
99         }
100 }
101
102 #if 0
103 static int sizeoflampbuf(ShadBuf *shb)
104 {
105         int num,count=0;
106         char *cp;
107         
108         cp= shb->cbuf;
109         num= (shb->size*shb->size)/256;
110
111         while(num--) count+= *(cp++);
112         
113         return 256*count;
114 }
115 #endif
116
117 /* not threadsafe... */
118 static float *give_jitter_tab(int samp)
119 {
120         /* these are all possible jitter tables, takes up some
121          * 12k, not really bad!
122          * For soft shadows, it saves memory and render time
123          */
124         static int tab[17]={1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256};
125         static float jit[1496][2];
126         static char ctab[17]= {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
127         int a, offset=0;
128         
129         if(samp<2) samp= 2;
130         else if(samp>16) samp= 16;
131
132         for(a=0; a<samp-1; a++) offset+= tab[a];
133
134         if(ctab[samp]==0) {
135                 ctab[samp]= 1;
136                 BLI_initjit(jit[offset], samp*samp);
137         }
138                 
139         return jit[offset];
140         
141 }
142
143 static void make_jitter_weight_tab(ShadBuf *shb, short filtertype) 
144 {
145         float *jit, totw= 0.0f;
146         int a, tot=shb->samp*shb->samp;
147         
148         shb->weight= MEM_mallocN(sizeof(float)*tot, "weight tab lamp");
149         
150         for(jit= shb->jit, a=0; a<tot; a++, jit+=2) {
151                 if(filtertype==LA_SHADBUF_TENT) 
152                         shb->weight[a]= 0.71f - sqrt(jit[0]*jit[0] + jit[1]*jit[1]);
153                 else if(filtertype==LA_SHADBUF_GAUSS) 
154                         shb->weight[a]= RE_filter_value(R_FILTER_GAUSS, 1.8f*sqrt(jit[0]*jit[0] + jit[1]*jit[1]));
155                 else
156                         shb->weight[a]= 1.0f;
157                 
158                 totw+= shb->weight[a];
159         }
160         
161         totw= 1.0f/totw;
162         for(a=0; a<tot; a++) {
163                 shb->weight[a]*= totw;
164         }
165 }
166
167 /* create Z tiles (for compression): this system is 24 bits!!! */
168 static void compress_shadowbuf(ShadBuf *shb, int *rectz, int square)
169 {
170         ShadSampleBuf *shsample;
171         float dist;
172         unsigned long *ztile;
173         int *rz, *rz1, verg, verg1, size= shb->size;
174         int a, x, y, minx, miny, byt1, byt2;
175         char *rc, *rcline, *ctile, *zt;
176         
177         shsample= MEM_mallocN( sizeof(ShadSampleBuf), "shad sample buf");
178         BLI_addtail(&shb->buffers, shsample);
179         
180         shsample->zbuf= MEM_mallocN( sizeof(unsigned long)*(size*size)/256, "initshadbuf2");
181         shsample->cbuf= MEM_callocN( (size*size)/256, "initshadbuf3");
182         
183         ztile= (unsigned long *)shsample->zbuf;
184         ctile= shsample->cbuf;
185         
186         /* help buffer */
187         rcline= MEM_mallocN(256*4+sizeof(int), "makeshadbuf2");
188         
189         for(y=0; y<size; y+=16) {
190                 if(y< size/2) miny= y+15-size/2;
191                 else miny= y-size/2;    
192                 
193                 for(x=0; x<size; x+=16) {
194                         
195                         /* is tile within spotbundle? */
196                         a= size/2;
197                         if(x< a) minx= x+15-a;
198                         else minx= x-a; 
199                         
200                         dist= sqrt( (float)(minx*minx+miny*miny) );
201                         
202                         if(square==0 && dist>(float)(a+12)) {   /* 12, tested with a onlyshadow lamp */
203                                 a= 256; verg= 0; /* 0x80000000; */ /* 0x7FFFFFFF; */
204                                 rz1= (&verg)+1;
205                         } 
206                         else {
207                                 copy_to_ztile(rectz, size, x, y, 16, rcline);
208                                 rz1= (int *)rcline;
209                                 
210                                 verg= (*rz1 & 0xFFFFFF00);
211                                 
212                                 for(a=0;a<256;a++,rz1++) {
213                                         if( (*rz1 & 0xFFFFFF00) !=verg) break;
214                                 }
215                         }
216                         if(a==256) { /* complete empty tile */
217                                 *ctile= 0;
218                                 *ztile= *(rz1-1);
219                         }
220                         else {
221                                 
222                                 /* ACOMP etc. are defined to work L/B endian */
223                                 
224                                 rc= rcline;
225                                 rz1= (int *)rcline;
226                                 verg=  rc[ACOMP];
227                                 verg1= rc[BCOMP];
228                                 rc+= 4;
229                                 byt1= 1; byt2= 1;
230                                 for(a=1;a<256;a++,rc+=4) {
231                                         byt1 &= (verg==rc[ACOMP]);
232                                         byt2 &= (verg1==rc[BCOMP]);
233                                         
234                                         if(byt1==0) break;
235                                 }
236                                 if(byt1 && byt2) {      /* only store byte */
237                                         *ctile= 1;
238                                         *ztile= (unsigned long)MEM_mallocN(256+4, "tile1");
239                                         rz= (int *)*ztile;
240                                         *rz= *rz1;
241                                         
242                                         zt= (char *)(rz+1);
243                                         rc= rcline;
244                                         for(a=0; a<256; a++, zt++, rc+=4) *zt= rc[GCOMP];       
245                                 }
246                                 else if(byt1) {         /* only store short */
247                                         *ctile= 2;
248                                         *ztile= (unsigned long)MEM_mallocN(2*256+4,"Tile2");
249                                         rz= (int *)*ztile;
250                                         *rz= *rz1;
251                                         
252                                         zt= (char *)(rz+1);
253                                         rc= rcline;
254                                         for(a=0; a<256; a++, zt+=2, rc+=4) {
255                                                 zt[0]= rc[BCOMP];
256                                                 zt[1]= rc[GCOMP];
257                                         }
258                                 }
259                                 else {                  /* store triple */
260                                         *ctile= 3;
261                                         *ztile= (unsigned long)MEM_mallocN(3*256,"Tile3");
262
263                                         zt= (char *)*ztile;
264                                         rc= rcline;
265                                         for(a=0; a<256; a++, zt+=3, rc+=4) {
266                                                 zt[0]= rc[ACOMP];
267                                                 zt[1]= rc[BCOMP];
268                                                 zt[2]= rc[GCOMP];
269                                         }
270                                 }
271                         }
272                         ztile++;
273                         ctile++;
274                 }
275         }
276
277         MEM_freeN(rcline);
278
279 }
280
281 /* sets start/end clipping. lar->shb should be initialized */
282 static void shadowbuf_autoclip(Render *re, LampRen *lar)
283 {
284         ObjectInstanceRen *obi;
285         ObjectRen *obr;
286         VlakRen *vlr= NULL;
287         VertRen *ver= NULL;
288         Material *ma= NULL;
289         float minz, maxz, vec[3], viewmat[4][4], obviewmat[4][4];
290         unsigned int lay = -1;
291         int i, a, ok= 1;
292         char *clipflag;
293         
294         minz= 1.0e30f; maxz= -1.0e30f;
295         Mat4CpyMat4(viewmat, lar->shb->viewmat);
296         
297         if(lar->mode & LA_LAYER) lay= lar->lay;
298
299         clipflag= MEM_callocN(sizeof(char)*re->totvert, "autoclipflag");
300
301         /* set clip in vertices when face visible */
302         for(i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
303                 obr= obi->obr;
304
305                 if(obi->flag & R_TRANSFORMED)
306                         Mat4MulMat4(obviewmat, obi->mat, viewmat);
307                 else
308                         Mat4CpyMat4(obviewmat, viewmat);
309
310                 memset(clipflag, 0, sizeof(char)*obr->totvert);
311
312                 /* clear clip, is being set if face is visible (clip is calculated for real later) */
313                 for(a=0; a<obr->totvlak; a++) {
314                         if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
315                         else vlr++;
316                         
317                         /* note; these conditions are copied from zbuffer_shadow() */
318                         if(vlr->mat!= ma) {
319                                 ma= vlr->mat;
320                                 ok= 1;
321                                 if((ma->mode & MA_SHADBUF)==0) ok= 0;
322                         }
323                         
324                         if(ok && (vlr->lay & lay)) {
325                                 clipflag[vlr->v1->index]= 1;
326                                 clipflag[vlr->v2->index]= 1;
327                                 clipflag[vlr->v3->index]= 1;
328                                 if(vlr->v4) clipflag[vlr->v4->index]= 1;
329                         }                               
330                 }               
331                 
332                 /* calculate min and max */
333                 for(a=0; a< obr->totvert;a++) {
334                         if((a & 255)==0) ver= RE_findOrAddVert(obr, a);
335                         else ver++;
336                         
337                         if(clipflag[a]) {
338                                 VECCOPY(vec, ver->co);
339                                 Mat4MulVecfl(obviewmat, vec);
340                                 /* Z on visible side of lamp space */
341                                 if(vec[2] < 0.0f) {
342                                         float inpr, z= -vec[2];
343                                         
344                                         /* since vec is rotated in lampspace, this is how to get the cosine of angle */
345                                         /* precision is set 20% larger */
346                                         vec[2]*= 1.2f;
347                                         Normalize(vec);
348                                         inpr= - vec[2];
349
350                                         if(inpr>=lar->spotsi) {
351                                                 if(z<minz) minz= z;
352                                                 if(z>maxz) maxz= z;
353                                         }
354                                 }
355                         }
356                 }
357         }
358
359         MEM_freeN(clipflag);
360         
361         /* set clipping min and max */
362         if(minz < maxz) {
363                 float delta= (maxz - minz);     /* threshold to prevent precision issues */
364                 
365                 //printf("minz %f maxz %f delta %f\n", minz, maxz, delta);
366                 if(lar->bufflag & LA_SHADBUF_AUTO_START)
367                         lar->shb->d= minz - delta*0.02f;        /* 0.02 is arbitrary... needs more thinking! */
368                 if(lar->bufflag & LA_SHADBUF_AUTO_END)
369                         lar->shb->clipend= maxz + delta*0.1f;
370                 
371                 /* bias was calculated as percentage, we scale it to prevent animation issues */
372                 delta= (lar->clipend-lar->clipsta)/(lar->shb->clipend-lar->shb->d);
373                 //printf("bias delta %f\n", delta);
374                 lar->shb->bias= (int) (delta*(float)lar->shb->bias);
375         }
376 }
377
378 void makeshadowbuf(Render *re, LampRen *lar)
379 {
380         ShadBuf *shb= lar->shb;
381         float wsize, *jitbuf, twozero[2]= {0.0f, 0.0f}, angle, temp;
382         int *rectz, samples;
383         
384         if(lar->bufflag & (LA_SHADBUF_AUTO_START|LA_SHADBUF_AUTO_END))
385                 shadowbuf_autoclip(re, lar);
386         
387         /* just to enforce identical behaviour of all irregular buffers */
388         if(lar->buftype==LA_SHADBUF_IRREGULAR)
389                 shb->size= 1024;
390         
391         /* matrices and window: in winmat the transformation is being put,
392                 transforming from observer view to lamp view, including lamp window matrix */
393         
394         angle= saacos(lar->spotsi);
395         temp= 0.5f*shb->size*cos(angle)/sin(angle);
396         shb->pixsize= (shb->d)/temp;
397         wsize= shb->pixsize*(shb->size/2.0);
398         
399         i_window(-wsize, wsize, -wsize, wsize, shb->d, shb->clipend, shb->winmat);
400         MTC_Mat4MulMat4(shb->persmat, shb->viewmat, shb->winmat);
401
402         if(ELEM(lar->buftype, LA_SHADBUF_REGULAR, LA_SHADBUF_HALFWAY)) {
403                 /* jitter, weights - not threadsafe! */
404                 BLI_lock_thread(LOCK_CUSTOM1);
405                 shb->jit= give_jitter_tab(shb->samp);
406                 make_jitter_weight_tab(shb, lar->filtertype);
407                 BLI_unlock_thread(LOCK_CUSTOM1);
408                 
409                 shb->totbuf= lar->buffers;
410                 if(shb->totbuf==4) jitbuf= give_jitter_tab(2);
411                 else if(shb->totbuf==9) jitbuf= give_jitter_tab(3);
412                 else jitbuf= twozero;
413                 
414                 /* zbuffering */
415                 rectz= MEM_mapallocN(sizeof(int)*shb->size*shb->size, "makeshadbuf");
416                 
417                 for(samples=0; samples<shb->totbuf; samples++) {
418                         zbuffer_shadow(re, shb->persmat, lar, rectz, shb->size, jitbuf[2*samples], jitbuf[2*samples+1]);
419                         /* create Z tiles (for compression): this system is 24 bits!!! */
420                         compress_shadowbuf(shb, rectz, lar->mode & LA_SQUARE);
421
422                         if(re->test_break())
423                                 break;
424                 }
425                 
426                 MEM_freeN(rectz);
427
428                 /* printf("lampbuf %d\n", sizeoflampbuf(shb)); */
429         }
430 }
431
432 static void *do_shadow_thread(void *re_v)
433 {
434         Render *re= (Render*)re_v;
435         LampRen *lar;
436
437         do {
438                 BLI_lock_thread(LOCK_CUSTOM1);
439                 for(lar=re->lampren.first; lar; lar=lar->next) {
440                         if(lar->shb && !lar->thread_assigned) {
441                                 lar->thread_assigned= 1;
442                                 break;
443                         }
444                 }
445                 BLI_unlock_thread(LOCK_CUSTOM1);
446
447                 /* if type is irregular, this only sets the perspective matrix and autoclips */
448                 if(lar) {
449                         makeshadowbuf(re, lar);
450                         BLI_lock_thread(LOCK_CUSTOM1);
451                         lar->thread_ready= 1;
452                         BLI_unlock_thread(LOCK_CUSTOM1);
453                 }
454         } while(lar && !re->test_break());
455
456         return NULL;
457 }
458
459 static volatile int g_break= 0;
460 static int thread_break(void)
461 {
462         return g_break;
463 }
464
465 void threaded_makeshadowbufs(Render *re)
466 {
467         ListBase threads;
468         LampRen *lar;
469         int a, totthread= 0;
470         int (*test_break)(void);
471
472         /* count number of threads to use */
473         if(G.rendering) {
474                 for(lar=re->lampren.first; lar; lar= lar->next)
475                         if(lar->shb)
476                                 totthread++;
477                 
478                 totthread= MIN2(totthread, re->r.threads);
479         }
480         else
481                 totthread= 1; /* preview render */
482
483         if(totthread <= 1) {
484                 for(lar=re->lampren.first; lar; lar= lar->next) {
485                         if(re->test_break()) break;
486                         if(lar->shb) {
487                                 /* if type is irregular, this only sets the perspective matrix and autoclips */
488                                 makeshadowbuf(re, lar);
489                         }
490                 }
491         }
492         else {
493                 /* swap test break function */
494                 test_break= re->test_break;
495                 re->test_break= thread_break;
496
497                 for(lar=re->lampren.first; lar; lar= lar->next) {
498                         lar->thread_assigned= 0;
499                         lar->thread_ready= 0;
500                 }
501
502                 BLI_init_threads(&threads, do_shadow_thread, totthread);
503                 
504                 for(a=0; a<totthread; a++)
505                         BLI_insert_thread(&threads, re);
506
507                 /* keep rendering as long as there are shadow buffers not ready */
508                 do {
509                         if((g_break=test_break()))
510                                 break;
511
512                         PIL_sleep_ms(50);
513
514                         BLI_lock_thread(LOCK_CUSTOM1);
515                         for(lar=re->lampren.first; lar; lar= lar->next)
516                                 if(lar->shb && !lar->thread_ready)
517                                         break;
518                         BLI_unlock_thread(LOCK_CUSTOM1);
519                 } while(lar);
520         
521                 BLI_end_threads(&threads);
522
523                 /* unset threadsafety */
524                 re->test_break= test_break;
525                 g_break= 0;
526         }
527 }
528
529 void freeshadowbuf(LampRen *lar)
530 {
531         if(lar->shb) {
532                 ShadBuf *shb= lar->shb;
533                 ShadSampleBuf *shsample;
534                 int b, v;
535                 
536                 v= (shb->size*shb->size)/256;
537                 
538                 for(shsample= shb->buffers.first; shsample; shsample= shsample->next) {
539                         long *ztile= shsample->zbuf;
540                         char *ctile= shsample->cbuf;
541                         
542                         for(b=0; b<v; b++, ztile++, ctile++)
543                                 if(*ctile) MEM_freeN((void *) *ztile);
544                         
545                         MEM_freeN(shsample->zbuf);
546                         MEM_freeN(shsample->cbuf);
547                 }
548                 BLI_freelistN(&shb->buffers);
549                 
550                 if(shb->weight) MEM_freeN(shb->weight);
551                 MEM_freeN(lar->shb);
552                 
553                 lar->shb= NULL;
554         }
555 }
556
557
558 static int firstreadshadbuf(ShadBuf *shb, ShadSampleBuf *shsample, int **rz, int xs, int ys, int nr)
559 {
560         /* return a 1 if fully compressed shadbuf-tile && z==const */
561         int ofs;
562         char *ct;
563
564         /* always test borders of shadowbuffer */
565         if(xs<0) xs= 0; else if(xs>=shb->size) xs= shb->size-1;
566         if(ys<0) ys= 0; else if(ys>=shb->size) ys= shb->size-1;
567    
568         /* calc z */
569         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
570         ct= shsample->cbuf+ofs;
571         if(*ct==0) {
572             if(nr==0) {
573                         *rz= *( (int **)(shsample->zbuf+ofs) );
574                         return 1;
575             }
576                 else if(*rz!= *( (int **)(shsample->zbuf+ofs) )) return 0;
577                 
578             return 1;
579         }
580         
581         return 0;
582 }
583
584 /* return 1.0 : fully in light */
585 static float readshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)     
586 {
587         float temp;
588         int *rz, ofs;
589         int zsamp=0;
590         char *ct, *cz;
591
592         /* simpleclip */
593         /* if(xs<0 || ys<0) return 1.0; */
594         /* if(xs>=shb->size || ys>=shb->size) return 1.0; */
595         
596         /* always test borders of shadowbuffer */
597         if(xs<0) xs= 0; else if(xs>=shb->size) xs= shb->size-1;
598         if(ys<0) ys= 0; else if(ys>=shb->size) ys= shb->size-1;
599
600         /* calc z */
601         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
602         ct= shsample->cbuf+ofs;
603         rz= *( (int **)(shsample->zbuf+ofs) );
604
605         if(*ct==3) {
606                 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
607                 cz= (char *)&zsamp;
608                 cz[ACOMP]= ct[0];
609                 cz[BCOMP]= ct[1];
610                 cz[GCOMP]= ct[2];
611         }
612         else if(*ct==2) {
613                 ct= ((char *)rz);
614                 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
615                 zsamp= *rz;
616         
617                 cz= (char *)&zsamp;
618                 cz[BCOMP]= ct[0];
619                 cz[GCOMP]= ct[1];
620         }
621         else if(*ct==1) {
622                 ct= ((char *)rz);
623                 ct+= 4+16*(ys & 15)+(xs & 15);
624                 zsamp= *rz;
625
626                 cz= (char *)&zsamp;
627                 cz[GCOMP]= ct[0];
628
629         }
630         else {
631                 /* got warning on this for 64 bits.... */
632                 /* but it's working code! in this case rz is not a pointer but zvalue (ton) */
633                 zsamp= (int) rz;
634         }
635
636         /* tricky stuff here; we use ints which can overflow easily with bias values */
637         
638         if(zsamp > zs) return 1.0;              /* absolute no shadow */
639         else if(zs < - 0x7FFFFE00 + bias) return 1.0;   /* extreme close to clipstart */
640         else if(zsamp < zs-bias) return 0.0 ;   /* absolute in shadow */
641         else {                                  /* soft area */
642                 
643                 temp=  ( (float)(zs- zsamp) )/(float)bias;
644                 return 1.0 - temp*temp;
645                         
646         }
647 }
648
649 /* the externally called shadow testing (reading) function */
650 /* return 1.0: no shadow at all */
651 float testshadowbuf(ShadBuf *shb, float *rco, float *dxco, float *dyco, float inp, float mat_bias)
652 {
653         ShadSampleBuf *shsample;
654         float fac, co[4], dx[3], dy[3], shadfac=0.0f;
655         float xs1,ys1, siz, *jit, *weight, xres, yres, biasf;
656         int xs, ys, zs, bias, *rz;
657         short a, num;
658         
659         /* crash preventer */
660         if(shb->buffers.first==NULL)
661                 return 1.0f;
662         
663         if(inp <= 0.0f) return 0.0f;
664
665         /* rotate renderco en osaco */
666         siz= 0.5f*(float)shb->size;
667         VECCOPY(co, rco);
668         co[3]= 1.0f;
669
670         MTC_Mat4MulVec4fl(shb->persmat, co);    /* rational hom co */
671
672         xs1= siz*(1.0f+co[0]/co[3]);
673         ys1= siz*(1.0f+co[1]/co[3]);
674
675         /* Clip for z: clipsta and clipend clip values of the shadow buffer. We
676                 * can test for -1.0/1.0 because of the properties of the
677                 * coordinate transformations. */
678         fac= (co[2]/co[3]);
679
680         if(fac>=1.0f) {
681                 return 0.0f;
682         } else if(fac<= -1.0f) {
683                 return 1.0f;
684         }
685
686         zs= ((float)0x7FFFFFFF)*fac;
687
688         /* take num*num samples, increase area with fac */
689         num= shb->samp*shb->samp;
690         fac= shb->soft;
691         
692         if(mat_bias!=0.0f) biasf= shb->bias*mat_bias;
693         else biasf= shb->bias;
694         /* with inp==1.0, bias is half the size. correction value was 1.1, giving errors 
695            on cube edges, with one side being almost frontal lighted (ton)  */
696         bias= (1.5f-inp*inp)*biasf;
697         
698         if(num==1) {
699                 for(shsample= shb->buffers.first; shsample; shsample= shsample->next)
700                         shadfac += readshadowbuf(shb, shsample, bias, (int)xs1, (int)ys1, zs);
701                 
702                 return shadfac/(float)shb->totbuf;
703         }
704
705         /* calculate filter size */
706         co[0]= rco[0]+dxco[0];
707         co[1]= rco[1]+dxco[1];
708         co[2]= rco[2]+dxco[2];
709         co[3]= 1.0;
710         MTC_Mat4MulVec4fl(shb->persmat,co);     /* rational hom co */
711         dx[0]= xs1- siz*(1.0+co[0]/co[3]);
712         dx[1]= ys1- siz*(1.0+co[1]/co[3]);
713         
714         co[0]= rco[0]+dyco[0];
715         co[1]= rco[1]+dyco[1];
716         co[2]= rco[2]+dyco[2];
717         co[3]= 1.0;
718         MTC_Mat4MulVec4fl(shb->persmat,co);     /* rational hom co */
719         dy[0]= xs1- siz*(1.0+co[0]/co[3]);
720         dy[1]= ys1- siz*(1.0+co[1]/co[3]);
721         
722         xres= fac*( fabs(dx[0])+fabs(dy[0]) );
723         yres= fac*( fabs(dx[1])+fabs(dy[1]) );
724         if(xres<fac) xres= fac;
725         if(yres<fac) yres= fac;
726         
727         xs1-= (xres)/2;
728         ys1-= (yres)/2;
729
730         if(xres<16.0f && yres<16.0f) {
731                 shsample= shb->buffers.first;
732             if(firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)ys1, 0)) {
733                         if(firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)ys1, 1)) {
734                                 if(firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)(ys1+yres), 1)) {
735                                         if(firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)(ys1+yres), 1)) {
736                                                 return readshadowbuf(shb, shsample, bias,(int)xs1, (int)ys1, zs);
737                                         }
738                                 }
739                         }
740             }
741         }
742         
743         for(shsample= shb->buffers.first; shsample; shsample= shsample->next) {
744                 jit= shb->jit;
745                 weight= shb->weight;
746                 
747                 for(a=num; a>0; a--, jit+=2, weight++) {
748                         /* instead of jit i tried random: ugly! */
749                         /* note: the plus 0.5 gives best sampling results, jit goes from -0.5 to 0.5 */
750                         /* xs1 and ys1 are already corrected to be corner of sample area */
751                         xs= xs1 + xres*(jit[0] + 0.5f);
752                         ys= ys1 + yres*(jit[1] + 0.5f);
753                         
754                         shadfac+= *weight * readshadowbuf(shb, shsample, bias, xs, ys, zs);
755                 }
756         }
757
758         /* Renormalizes for the sample number: */
759         return shadfac/(float)shb->totbuf;
760 }
761
762 /* different function... sampling behind clipend can be LIGHT, bias is negative! */
763 /* return: light */
764 static float readshadowbuf_halo(ShadBuf *shb, ShadSampleBuf *shsample, int xs, int ys, int zs)
765 {
766         float temp;
767         int *rz, ofs;
768         int bias, zbias, zsamp;
769         char *ct, *cz;
770
771         /* negative! The other side is more important */
772         bias= -shb->bias;
773         
774         /* simpleclip */
775         if(xs<0 || ys<0) return 0.0;
776         if(xs>=shb->size || ys>=shb->size) return 0.0;
777
778         /* calc z */
779         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
780         ct= shsample->cbuf+ofs;
781         rz= *( (int **)(shsample->zbuf+ofs) );
782
783         if(*ct==3) {
784                 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
785                 cz= (char *)&zsamp;
786                 zsamp= 0;
787                 cz[ACOMP]= ct[0];
788                 cz[BCOMP]= ct[1];
789                 cz[GCOMP]= ct[2];
790         }
791         else if(*ct==2) {
792                 ct= ((char *)rz);
793                 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
794                 zsamp= *rz;
795         
796                 cz= (char *)&zsamp;
797                 cz[BCOMP]= ct[0];
798                 cz[GCOMP]= ct[1];
799         }
800         else if(*ct==1) {
801                 ct= ((char *)rz);
802                 ct+= 4+16*(ys & 15)+(xs & 15);
803                 zsamp= *rz;
804
805                 cz= (char *)&zsamp;
806                 cz[GCOMP]= ct[0];
807
808         }
809         else {
810                 /* same as before */
811                 /* still working code! (ton) */
812                 zsamp= (int) rz;
813         }
814
815         /* NO schadow when sampled at 'eternal' distance */
816
817         if(zsamp >= 0x7FFFFE00) return 1.0; 
818
819         if(zsamp > zs) return 1.0;              /* absolute no shadww */
820         else {
821                 /* bias is negative, so the (zs-bias) can be beyond 0x7fffffff */
822                 zbias= 0x7fffffff - zs;
823                 if(zbias > -bias) {
824                         if( zsamp < zs-bias) return 0.0 ;       /* absolute in shadow */
825                 }
826                 else return 0.0 ;       /* absolute shadow */
827         }
828
829         /* soft area */
830         
831         temp=  ( (float)(zs- zsamp) )/(float)bias;
832         return 1.0 - temp*temp;
833 }
834
835
836 float shadow_halo(LampRen *lar, float *p1, float *p2)
837 {
838         /* p1 p2 already are rotated in spot-space */
839         ShadBuf *shb= lar->shb;
840         ShadSampleBuf *shsample;
841         float co[4], siz;
842         float labda, labdao, labdax, labday, ldx, ldy;
843         float zf, xf1, yf1, zf1, xf2, yf2, zf2;
844         float count, lightcount;
845         int x, y, z, xs1, ys1;
846         int dx = 0, dy = 0;
847         
848         siz= 0.5*(float)shb->size;
849         
850         co[0]= p1[0];
851         co[1]= p1[1];
852         co[2]= p1[2]/lar->sh_zfac;
853         co[3]= 1.0;
854         MTC_Mat4MulVec4fl(shb->winmat, co);     /* rational hom co */
855         xf1= siz*(1.0+co[0]/co[3]);
856         yf1= siz*(1.0+co[1]/co[3]);
857         zf1= (co[2]/co[3]);
858
859
860         co[0]= p2[0];
861         co[1]= p2[1];
862         co[2]= p2[2]/lar->sh_zfac;
863         co[3]= 1.0;
864         MTC_Mat4MulVec4fl(shb->winmat, co);     /* rational hom co */
865         xf2= siz*(1.0+co[0]/co[3]);
866         yf2= siz*(1.0+co[1]/co[3]);
867         zf2= (co[2]/co[3]);
868
869         /* the 2dda (a pixel line formula) */
870
871         xs1= (int)xf1;
872         ys1= (int)yf1;
873
874         if(xf1 != xf2) {
875                 if(xf2-xf1 > 0.0) {
876                         labdax= (xf1-xs1-1.0)/(xf1-xf2);
877                         ldx= -shb->shadhalostep/(xf1-xf2);
878                         dx= shb->shadhalostep;
879                 }
880                 else {
881                         labdax= (xf1-xs1)/(xf1-xf2);
882                         ldx= shb->shadhalostep/(xf1-xf2);
883                         dx= -shb->shadhalostep;
884                 }
885         }
886         else {
887                 labdax= 1.0;
888                 ldx= 0.0;
889         }
890
891         if(yf1 != yf2) {
892                 if(yf2-yf1 > 0.0) {
893                         labday= (yf1-ys1-1.0)/(yf1-yf2);
894                         ldy= -shb->shadhalostep/(yf1-yf2);
895                         dy= shb->shadhalostep;
896                 }
897                 else {
898                         labday= (yf1-ys1)/(yf1-yf2);
899                         ldy= shb->shadhalostep/(yf1-yf2);
900                         dy= -shb->shadhalostep;
901                 }
902         }
903         else {
904                 labday= 1.0;
905                 ldy= 0.0;
906         }
907         
908         x= xs1;
909         y= ys1;
910         labda= count= lightcount= 0.0;
911
912 /* printf("start %x %x  \n", (int)(0x7FFFFFFF*zf1), (int)(0x7FFFFFFF*zf2)); */
913
914         while(1) {
915                 labdao= labda;
916                 
917                 if(labdax==labday) {
918                         labdax+= ldx;
919                         x+= dx;
920                         labday+= ldy;
921                         y+= dy;
922                 }
923                 else {
924                         if(labdax<labday) {
925                                 labdax+= ldx;
926                                 x+= dx;
927                         } else {
928                                 labday+= ldy;
929                                 y+= dy;
930                         }
931                 }
932                 
933                 labda= MIN2(labdax, labday);
934                 if(labda==labdao || labda>=1.0) break;
935                 
936                 zf= zf1 + labda*(zf2-zf1);
937                 count+= (float)shb->totbuf;
938
939                 if(zf<= -1.0) lightcount += 1.0;        /* close to the spot */
940                 else {
941                 
942                         /* make sure, behind the clipend we extend halolines. */
943                         if(zf>=1.0) z= 0x7FFFF000;
944                         else z= (int)(0x7FFFF000*zf);
945                         
946                         for(shsample= shb->buffers.first; shsample; shsample= shsample->next)
947                                 lightcount+= readshadowbuf_halo(shb, shsample, x, y, z);
948                         
949                 }
950         }
951         
952         if(count!=0.0) return (lightcount/count);
953         return 0.0;
954         
955 }
956
957
958 /* ********************* Irregular Shadow Buffer (ISB) ************* */
959 /* ********** storage of all view samples in a raster of lists ***** */
960
961 /* based on several articles describing this method, like:
962 The Irregular Z-Buffer and its Application to Shadow Mapping
963 Gregory S. Johnson - William R. Mark - Christopher A. Burns 
964 and
965 Alias-Free Shadow Maps
966 Timo Aila and Samuli Laine
967 */
968
969 /* bsp structure (actually kd tree) */
970
971 #define BSPMAX_SAMPLE   128
972 #define BSPMAX_DEPTH    32
973
974 /* aligned with struct rctf */
975 typedef struct Boxf {
976         float xmin, xmax;
977         float ymin, ymax;
978         float zmin, zmax;
979 } Boxf;
980
981 typedef struct ISBBranch {
982         struct ISBBranch *left, *right;
983         float divider[2];
984         Boxf box;
985         short totsamp, index, full, unused;
986         ISBSample **samples;
987 } ISBBranch;
988
989 typedef struct BSPFace {
990         Boxf box;
991         float *v1, *v2, *v3, *v4;
992         int obi;                /* object for face lookup */
993         int facenr;             /* index to retrieve VlakRen */
994         int type;               /* only for strand now */
995         short shad_alpha, is_full;
996         
997         /* strand caching data, optimize for point_behind_strand() */
998         float radline, radline_end, len;
999         float vec1[3], vec2[3], rc[3];
1000 } BSPFace;
1001
1002 /* boxes are in lamp projection */
1003 static void init_box(Boxf *box)
1004 {
1005         box->xmin= 1000000.0f;
1006         box->xmax= 0;
1007         box->ymin= 1000000.0f;
1008         box->ymax= 0;
1009         box->zmin= 0x7FFFFFFF;
1010         box->zmax= - 0x7FFFFFFF;
1011 }
1012
1013 /* use v1 to calculate boundbox */
1014 static void bound_boxf(Boxf *box, float *v1)
1015 {
1016         if(v1[0] < box->xmin) box->xmin= v1[0];
1017         if(v1[0] > box->xmax) box->xmax= v1[0];
1018         if(v1[1] < box->ymin) box->ymin= v1[1];
1019         if(v1[1] > box->ymax) box->ymax= v1[1];
1020         if(v1[2] < box->zmin) box->zmin= v1[2];
1021         if(v1[2] > box->zmax) box->zmax= v1[2];
1022 }
1023
1024 /* use v1 to calculate boundbox */
1025 static void bound_rectf(rctf *box, float *v1)
1026 {
1027         if(v1[0] < box->xmin) box->xmin= v1[0];
1028         if(v1[0] > box->xmax) box->xmax= v1[0];
1029         if(v1[1] < box->ymin) box->ymin= v1[1];
1030         if(v1[1] > box->ymax) box->ymax= v1[1];
1031 }
1032
1033
1034 /* halfway splitting, for initializing a more regular tree */
1035 static void isb_bsp_split_init(ISBBranch *root, MemArena *mem, int level)
1036 {
1037         
1038         /* if level > 0 we create new branches and go deeper*/
1039         if(level > 0) {
1040                 ISBBranch *left, *right;
1041                 int i;
1042                 
1043                 /* splitpoint */
1044                 root->divider[0]= 0.5f*(root->box.xmin+root->box.xmax);
1045                 root->divider[1]= 0.5f*(root->box.ymin+root->box.ymax);
1046                 
1047                 /* find best splitpoint */
1048                 if(root->box.xmax-root->box.xmin > root->box.ymax-root->box.ymin)
1049                         i= root->index= 0;
1050                 else
1051                         i= root->index= 1;
1052                 
1053                 left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1054                 right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1055                 
1056                 /* box info */
1057                 left->box= root->box;
1058                 right->box= root->box;
1059                 if(i==0) {
1060                         left->box.xmax= root->divider[0];
1061                         right->box.xmin= root->divider[0];
1062                 }
1063                 else {
1064                         left->box.ymax= root->divider[1];
1065                         right->box.ymin= root->divider[1];
1066                 }
1067                 isb_bsp_split_init(left, mem, level-1);
1068                 isb_bsp_split_init(right, mem, level-1);
1069         }
1070         else {
1071                 /* we add sample array */
1072                 root->samples= BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
1073         }
1074 }
1075
1076 /* note; if all samples on same location we just spread them over 2 new branches */
1077 static void isb_bsp_split(ISBBranch *root, MemArena *mem)
1078 {
1079         ISBBranch *left, *right;
1080         ISBSample *samples[BSPMAX_SAMPLE];
1081         int a, i;
1082
1083         /* splitpoint */
1084         root->divider[0]= root->divider[1]= 0.0f;
1085         for(a=BSPMAX_SAMPLE-1; a>=0; a--) {
1086                 root->divider[0]+= root->samples[a]->zco[0];
1087                 root->divider[1]+= root->samples[a]->zco[1];
1088         }
1089         root->divider[0]/= BSPMAX_SAMPLE;
1090         root->divider[1]/= BSPMAX_SAMPLE;
1091         
1092         /* find best splitpoint */
1093         if(root->box.xmax-root->box.xmin > root->box.ymax-root->box.ymin)
1094                 i= root->index= 0;
1095         else
1096                 i= root->index= 1;
1097         
1098         /* new branches */
1099         left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1100         right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1101
1102         /* new sample array */
1103         left->samples= BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
1104         right->samples= samples; // tmp
1105         
1106         /* split samples */
1107         for(a=BSPMAX_SAMPLE-1; a>=0; a--) {
1108                 int comp= 0;
1109                 /* this prevents adding samples all to 1 branch when divider is equal to samples */
1110                 if(root->samples[a]->zco[i] == root->divider[i])
1111                         comp= a & 1;
1112                 else if(root->samples[a]->zco[i] < root->divider[i])
1113                         comp= 1;
1114                 
1115                 if(comp==1) {
1116                         left->samples[left->totsamp]= root->samples[a];
1117                         left->totsamp++;
1118                 }
1119                 else {
1120                         right->samples[right->totsamp]= root->samples[a];
1121                         right->totsamp++;
1122                 }
1123         }
1124         
1125         /* copy samples from tmp */
1126         memcpy(root->samples, samples, right->totsamp*(sizeof(void *)));
1127         right->samples= root->samples;
1128         root->samples= NULL;
1129         
1130         /* box info */
1131         left->box= root->box;
1132         right->box= root->box;
1133         if(i==0) {
1134                 left->box.xmax= root->divider[0];
1135                 right->box.xmin= root->divider[0];
1136         }
1137         else {
1138                 left->box.ymax= root->divider[1];
1139                 right->box.ymin= root->divider[1];
1140         }
1141 }
1142
1143 /* inserts sample in main tree, also splits on threshold */
1144 /* returns 1 if error */
1145 static int isb_bsp_insert(ISBBranch *root, MemArena *memarena, ISBSample *sample)
1146 {
1147         ISBBranch *bspn= root;
1148         float *zco= sample->zco;
1149         int i= 0;
1150         
1151         /* debug counter, also used to check if something was filled in ever */
1152         root->totsamp++;
1153         
1154         /* going over branches until last one found */
1155         while(bspn->left) {
1156                 if(zco[bspn->index] <= bspn->divider[bspn->index])
1157                         bspn= bspn->left;
1158                 else
1159                         bspn= bspn->right;
1160                 i++;
1161         }
1162         /* bspn now is the last branch */
1163         
1164         if(bspn->totsamp==BSPMAX_SAMPLE) {
1165                 printf("error in bsp branch\n");        /* only for debug, cannot happen */
1166                 return 1;
1167         }
1168         
1169         /* insert */
1170         bspn->samples[bspn->totsamp]= sample;
1171         bspn->totsamp++;
1172
1173         /* split if allowed and needed */
1174         if(bspn->totsamp==BSPMAX_SAMPLE) {
1175                 if(i==BSPMAX_DEPTH) {
1176                         bspn->totsamp--;        /* stop filling in... will give errors */
1177                         return 1;
1178                 }
1179                 isb_bsp_split(bspn, memarena);
1180         }
1181         return 0;
1182 }
1183
1184 static float VecLen2f( float *v1, float *v2)
1185 {
1186         float x= v1[0]-v2[0];
1187         float y= v1[1]-v2[1];
1188         return (float)sqrt(x*x+y*y);
1189 }
1190
1191 /* initialize vars in face, for optimal point-in-face test */
1192 static void bspface_init_strand(BSPFace *face) 
1193 {
1194         
1195         face->radline= 0.5f*VecLen2f(face->v1, face->v2);
1196         
1197         VecMidf(face->vec1, face->v1, face->v2);
1198         if(face->v4)
1199                 VecMidf(face->vec2, face->v3, face->v4);
1200         else
1201                 VECCOPY(face->vec2, face->v3);
1202         
1203         face->rc[0]= face->vec2[0]-face->vec1[0];
1204         face->rc[1]= face->vec2[1]-face->vec1[1];
1205         face->rc[2]= face->vec2[2]-face->vec1[2];
1206         
1207         face->len= face->rc[0]*face->rc[0]+ face->rc[1]*face->rc[1];
1208         
1209         if(face->len!=0.0f) {
1210                 face->radline_end= face->radline/sqrt(face->len);
1211                 face->len= 1.0f/face->len;
1212         }
1213 }
1214
1215 /* brought back to a simple 2d case */
1216 static int point_behind_strand(float *p, BSPFace *face)
1217 {
1218         /* v1 - v2 is radius, v1 - v3 length */
1219         float dist, rc[2], pt[2];
1220         
1221         /* using code from PdistVL2Dfl(), distance vec to line-piece */
1222
1223         if(face->len==0.0f) {
1224                 rc[0]= p[0]-face->vec1[0];
1225                 rc[1]= p[1]-face->vec1[1];
1226                 dist= (float)(sqrt(rc[0]*rc[0]+ rc[1]*rc[1]));
1227                 
1228                 if(dist < face->radline)
1229                         return 1;
1230         }
1231         else {
1232                 float labda= ( face->rc[0]*(p[0]-face->vec1[0]) + face->rc[1]*(p[1]-face->vec1[1]) )*face->len;
1233                 
1234                 if(labda > -face->radline_end && labda < 1.0f+face->radline_end) { 
1235                         /* hesse for dist: */
1236                         //dist= (float)(fabs( (p[0]-vec2[0])*rc[1] + (p[1]-vec2[1])*rc[0])/len);
1237                         
1238                         pt[0]= labda*face->rc[0]+face->vec1[0];
1239                         pt[1]= labda*face->rc[1]+face->vec1[1];
1240                         
1241                         rc[0]= pt[0]-p[0];
1242                         rc[1]= pt[1]-p[1];
1243                         dist= (float)sqrt(rc[0]*rc[0]+ rc[1]*rc[1]);
1244                         
1245                         if(dist < face->radline) {
1246                                 float zval= face->vec1[2] + labda*face->rc[2];
1247                                 if(p[2] > zval)
1248                                         return 1;
1249                         }
1250                 }
1251         }
1252         return 0;
1253 }
1254
1255
1256 /* return 1 if inside. code derived from src/parametrizer.c */
1257 static int point_behind_tria2d(float *p, float *v1, float *v2, float *v3)
1258 {
1259         float a[2], c[2], h[2], div;
1260         float u, v;
1261         
1262         a[0] = v2[0] - v1[0];
1263         a[1] = v2[1] - v1[1];
1264         c[0] = v3[0] - v1[0];
1265         c[1] = v3[1] - v1[1];
1266         
1267         div = a[0]*c[1] - a[1]*c[0];
1268         if(div==0.0f)
1269                 return 0;
1270         
1271         h[0] = p[0] - v1[0];
1272         h[1] = p[1] - v1[1];
1273         
1274         div = 1.0f/div;
1275         
1276         u = (h[0]*c[1] - h[1]*c[0])*div;
1277         if(u >= 0.0f) {
1278                 v = (a[0]*h[1] - a[1]*h[0])*div;
1279                 if(v >= 0.0f) {
1280                         if( u + v <= 1.0f) {
1281                                 /* inside, now check if point p is behind */
1282                                 float z=  (1.0f-u-v)*v1[2] + u*v2[2] + v*v3[2];
1283                                 if(z <= p[2])
1284                                         return 1;
1285                         }
1286                 }
1287         }
1288         
1289         return 0;
1290 }
1291
1292 #if 0
1293 /* tested these calls, but it gives inaccuracy, 'side' cannot be found reliably using v3 */
1294
1295 /* check if line v1-v2 has all rect points on other side of point v3 */
1296 static int rect_outside_line(rctf *rect, float *v1, float *v2, float *v3)
1297 {
1298         float a, b, c;
1299         int side;
1300         
1301         /* line formula for v1-v2 */
1302         a= v2[1]-v1[1];
1303         b= v1[0]-v2[0];
1304         c= -a*v1[0] - b*v1[1];
1305         side= a*v3[0] + b*v3[1] + c < 0.0f;
1306         
1307         /* the four quad points */
1308         if( side==(rect->xmin*a + rect->ymin*b + c >= 0.0f) )
1309                 if( side==(rect->xmax*a + rect->ymin*b + c >= 0.0f) )
1310                         if( side==(rect->xmax*a + rect->ymax*b + c >= 0.0f) )
1311                                 if( side==(rect->xmin*a + rect->ymax*b + c >= 0.0f) )
1312                                         return 1;
1313         return 0;
1314 }
1315
1316 /* check if one of the triangle edges separates all rect points on 1 side */
1317 static int rect_isect_tria(rctf *rect, float *v1, float *v2, float *v3)
1318 {
1319         if(rect_outside_line(rect, v1, v2, v3))
1320                 return 0;
1321         if(rect_outside_line(rect, v2, v3, v1))
1322                 return 0;
1323         if(rect_outside_line(rect, v3, v1, v2))
1324                 return 0;
1325         return 1;
1326 }
1327 #endif
1328
1329 /* if face overlaps a branch, it executes func. recursive */
1330 static void isb_bsp_face_inside(ISBBranch *bspn, BSPFace *face)
1331 {
1332         
1333         /* are we descending? */
1334         if(bspn->left) {
1335                 /* hrmf, the box struct cannot be addressed with index */
1336                 if(bspn->index==0) {
1337                         if(face->box.xmin <= bspn->divider[0])
1338                                 isb_bsp_face_inside(bspn->left, face);
1339                         if(face->box.xmax > bspn->divider[0])
1340                                 isb_bsp_face_inside(bspn->right, face);
1341                 }
1342                 else {
1343                         if(face->box.ymin <= bspn->divider[1])
1344                                 isb_bsp_face_inside(bspn->left, face);
1345                         if(face->box.ymax > bspn->divider[1])
1346                                 isb_bsp_face_inside(bspn->right, face);
1347                 }
1348         }
1349         else {
1350                 /* else: end branch reached */
1351                 int a;
1352                 
1353                 if(bspn->totsamp==0) return;
1354                 
1355                 /* check for nodes entirely in shadow, can be skipped */
1356                 if(bspn->totsamp==bspn->full)
1357                         return;
1358                 
1359                 /* if bsp node is entirely in front of face, give up */
1360                 if(bspn->box.zmax < face->box.zmin)
1361                         return;
1362                 
1363                 /* if face boundbox is outside of branch rect, give up */
1364                 if(0==BLI_isect_rctf((rctf *)&face->box, (rctf *)&bspn->box, NULL))
1365                         return;
1366                 
1367                 /* test all points inside branch */
1368                 for(a=bspn->totsamp-1; a>=0; a--) {
1369                         ISBSample *samp= bspn->samples[a];
1370                         
1371                         if((samp->facenr!=face->facenr || samp->obi!=face->obi) && samp->shadfac) {
1372                                 if(face->box.zmin < samp->zco[2]) {
1373                                         if(BLI_in_rctf((rctf *)&face->box, samp->zco[0], samp->zco[1])) {
1374                                                 int inshadow= 0;
1375                                                 
1376                                                 if(face->type) {
1377                                                         if(point_behind_strand(samp->zco, face)) 
1378                                                                 inshadow= 1;
1379                                                 }
1380                                                 else if( point_behind_tria2d(samp->zco, face->v1, face->v2, face->v3))
1381                                                         inshadow= 1;
1382                                                 else if(face->v4 && point_behind_tria2d(samp->zco, face->v1, face->v3, face->v4))
1383                                                         inshadow= 1;
1384
1385                                                 if(inshadow) {
1386                                                         *(samp->shadfac) += face->shad_alpha;
1387                                                         /* optimize; is_full means shad_alpha==4096 */
1388                                                         if(*(samp->shadfac) >= 4096 || face->is_full) {
1389                                                                 bspn->full++;
1390                                                                 samp->shadfac= NULL;
1391                                                         }
1392                                                 }
1393                                         }
1394                                 }
1395                         }
1396                 }
1397         }
1398 }
1399
1400 /* based on available samples, recalculate the bounding box for bsp nodes, recursive */
1401 static void isb_bsp_recalc_box(ISBBranch *root)
1402 {
1403         if(root->left) {
1404                 isb_bsp_recalc_box(root->left);
1405                 isb_bsp_recalc_box(root->right);
1406         }
1407         else if(root->totsamp) {
1408                 int a;
1409                 
1410                 init_box(&root->box);
1411                 for(a=root->totsamp-1; a>=0; a--)
1412                         bound_boxf(&root->box, root->samples[a]->zco);
1413         }       
1414 }
1415
1416 /* callback function for zbuf clip */
1417 static void isb_bsp_test_strand(ZSpan *zspan, int obi, int zvlnr, float *v1, float *v2, float *v3, float *v4)
1418 {
1419         BSPFace face;
1420         
1421         face.v1= v1;
1422         face.v2= v2;
1423         face.v3= v3;
1424         face.v4= v4;
1425         face.obi= obi;
1426         face.facenr= zvlnr & ~RE_QUAD_OFFS;
1427         face.type= R_STRAND;
1428         if(R.osa)
1429                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
1430         else
1431                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
1432         
1433         face.is_full= (zspan->shad_alpha==1.0f);
1434         
1435         /* setup boundbox */
1436         init_box(&face.box);
1437         bound_boxf(&face.box, v1);
1438         bound_boxf(&face.box, v2);
1439         bound_boxf(&face.box, v3);
1440         if(v4)
1441                 bound_boxf(&face.box, v4);
1442         
1443         /* optimize values */
1444         bspface_init_strand(&face);
1445         
1446         isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
1447         
1448 }
1449
1450 /* callback function for zbuf clip */
1451 static void isb_bsp_test_face(ZSpan *zspan, int obi, int zvlnr, float *v1, float *v2, float *v3, float *v4) 
1452 {
1453         BSPFace face;
1454         
1455         face.v1= v1;
1456         face.v2= v2;
1457         face.v3= v3;
1458         face.v4= v4;
1459         face.obi= obi;
1460         face.facenr= zvlnr & ~RE_QUAD_OFFS;
1461         face.type= 0;
1462         if(R.osa)
1463                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
1464         else
1465                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
1466         
1467         face.is_full= (zspan->shad_alpha==1.0f);
1468         
1469         /* setup boundbox */
1470         init_box(&face.box);
1471         bound_boxf(&face.box, v1);
1472         bound_boxf(&face.box, v2);
1473         bound_boxf(&face.box, v3);
1474         if(v4)
1475                 bound_boxf(&face.box, v4);
1476
1477         isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
1478 }
1479
1480 static int testclip_minmax(float *ho, float *minmax)
1481 {
1482         float wco= ho[3];
1483         int flag= 0;
1484         
1485         if( ho[0] > minmax[1]*wco) flag = 1;
1486         else if( ho[0]< minmax[0]*wco) flag = 2;
1487         
1488         if( ho[1] > minmax[3]*wco) flag |= 4;
1489         else if( ho[1]< minmax[2]*wco) flag |= 8;
1490         
1491         return flag;
1492 }
1493
1494 /* main loop going over all faces and check in bsp overlaps, fill in shadfac values */
1495 static void isb_bsp_fillfaces(Render *re, LampRen *lar, ISBBranch *root)
1496 {
1497         ObjectInstanceRen *obi;
1498         ObjectRen *obr;
1499         ShadBuf *shb= lar->shb;
1500         ZSpan zspan, zspanstrand;
1501         VlakRen *vlr= NULL;
1502         Material *ma= NULL;
1503         float minmaxf[4], winmat[4][4];
1504         int size= shb->size;
1505         int i, a, ok=1, lay= -1;
1506         
1507         /* further optimize, also sets minz maxz */
1508         isb_bsp_recalc_box(root);
1509         
1510         /* extra clipping for minmax */
1511         minmaxf[0]= (2.0f*root->box.xmin - size-2.0f)/size;
1512         minmaxf[1]= (2.0f*root->box.xmax - size+2.0f)/size;
1513         minmaxf[2]= (2.0f*root->box.ymin - size-2.0f)/size;
1514         minmaxf[3]= (2.0f*root->box.ymax - size+2.0f)/size;
1515         
1516         if(lar->mode & LA_LAYER) lay= lar->lay;
1517         
1518         /* (ab)use zspan, since we use zbuffer clipping code */
1519         zbuf_alloc_span(&zspan, size, size, re->clipcrop);
1520         
1521         zspan.zmulx=  ((float)size)/2.0f;
1522         zspan.zmuly=  ((float)size)/2.0f;
1523         zspan.zofsx= -0.5f;
1524         zspan.zofsy= -0.5f;
1525         
1526         /* pass on bsp root to zspan */
1527         zspan.rectz= (int *)root;
1528         
1529         /* filling methods */
1530         zspanstrand= zspan;
1531         //      zspan.zbuflinefunc= zbufline_onlyZ;
1532         zspan.zbuffunc= isb_bsp_test_face;
1533         zspanstrand.zbuffunc= isb_bsp_test_strand;
1534         
1535         for(i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
1536                 obr= obi->obr;
1537
1538                 if(obi->flag & R_TRANSFORMED)
1539                         Mat4MulMat4(winmat, obi->mat, shb->persmat);
1540                 else
1541                         Mat4CpyMat4(winmat, shb->persmat);
1542
1543                 for(a=0; a<obr->totvlak; a++) {
1544                         
1545                         if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
1546                         else vlr++;
1547                         
1548                         /* note, these conditions are copied in shadowbuf_autoclip() */
1549                         if(vlr->mat!= ma) {
1550                                 ma= vlr->mat;
1551                                 ok= 1;
1552                                 if((ma->mode & MA_SHADBUF)==0) ok= 0;
1553                                 if(ma->mode & MA_WIRE) ok= 0;
1554                                 zspanstrand.shad_alpha= zspan.shad_alpha= ma->shad_alpha;
1555                         }
1556                         
1557                         if(ok && (vlr->lay & lay)) {
1558                                 float hoco[4][4];
1559                                 int c1, c2, c3, c4=0;
1560                                 int d1, d2, d3, d4=0;
1561                                 int partclip;
1562                                 
1563                                 /* create hocos per face, it is while render */
1564                                 projectvert(vlr->v1->co, winmat, hoco[0]); d1= testclip_minmax(hoco[0], minmaxf);
1565                                 projectvert(vlr->v2->co, winmat, hoco[1]); d2= testclip_minmax(hoco[1], minmaxf);
1566                                 projectvert(vlr->v3->co, winmat, hoco[2]); d3= testclip_minmax(hoco[2], minmaxf);
1567                                 if(vlr->v4) {
1568                                         projectvert(vlr->v4->co, winmat, hoco[3]); d4= testclip_minmax(hoco[3], minmaxf);
1569                                 }
1570
1571                                 /* minmax clipping */
1572                                 if(vlr->v4) partclip= d1 & d2 & d3 & d4;
1573                                 else partclip= d1 & d2 & d3;
1574                                 
1575                                 if(partclip==0) {
1576                                         
1577                                         /* window clipping */
1578                                         c1= testclip(hoco[0]); 
1579                                         c2= testclip(hoco[1]); 
1580                                         c3= testclip(hoco[2]); 
1581                                         if(vlr->v4)
1582                                                 c4= testclip(hoco[3]); 
1583                                         
1584                                         /* ***** NO WIRE YET */                 
1585                                         if(ma->mode & MA_WIRE)  {
1586                                                 if(vlr->v4)
1587                                                         zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
1588                                                 else
1589                                                         zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], 0, c1, c2, c3, 0);
1590                                         }
1591                                         else if(vlr->v4) {
1592                                                 if(vlr->flag & R_STRAND)
1593                                                         zbufclip4(&zspanstrand, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
1594                                                 else
1595                                                         zbufclip4(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
1596                                         }
1597                                         else
1598                                                 zbufclip(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], c1, c2, c3);
1599                                         
1600                                 }
1601                         }
1602                 }
1603         }
1604         
1605         zbuf_free_span(&zspan);
1606 }
1607
1608 /* returns 1 when the viewpixel is visible in lampbuffer */
1609 static int viewpixel_to_lampbuf(ShadBuf *shb, ObjectInstanceRen *obi, VlakRen *vlr, float x, float y, float *co)
1610 {
1611         float hoco[4], v1[3], nor[3];
1612         float dface, fac, siz;
1613         
1614         RE_vlakren_get_normal(&R, obi, vlr, nor);
1615         VECCOPY(v1, vlr->v1->co);
1616         if(obi->flag & R_TRANSFORMED)
1617                 Mat4MulVecfl(obi->mat, v1);
1618
1619         /* from shadepixel() */
1620         dface= v1[0]*nor[0] + v1[1]*nor[1] + v1[2]*nor[2];
1621         hoco[3]= 1.0f;
1622         
1623         /* ortho viewplane cannot intersect using view vector originating in (0,0,0) */
1624         if(R.r.mode & R_ORTHO) {
1625                 /* x and y 3d coordinate can be derived from pixel coord and winmat */
1626                 float fx= 2.0/(R.winx*R.winmat[0][0]);
1627                 float fy= 2.0/(R.winy*R.winmat[1][1]);
1628                 
1629                 hoco[0]= (x - 0.5*R.winx)*fx - R.winmat[3][0]/R.winmat[0][0];
1630                 hoco[1]= (y - 0.5*R.winy)*fy - R.winmat[3][1]/R.winmat[1][1];
1631                 
1632                 /* using a*x + b*y + c*z = d equation, (a b c) is normal */
1633                 if(nor[2]!=0.0f)
1634                         hoco[2]= (dface - nor[0]*hoco[0] - nor[1]*hoco[1])/nor[2];
1635                 else
1636                         hoco[2]= 0.0f;
1637         }
1638         else {
1639                 float div, view[3];
1640                 
1641                 calc_view_vector(view, x, y);
1642                 
1643                 div= nor[0]*view[0] + nor[1]*view[1] + nor[2]*view[2];
1644                 if (div==0.0f) 
1645                         return 0;
1646                 
1647                 fac= dface/div;
1648                 
1649                 hoco[0]= fac*view[0];
1650                 hoco[1]= fac*view[1];
1651                 hoco[2]= fac*view[2];
1652         }
1653         
1654         /* move 3d vector to lampbuf */
1655         MTC_Mat4MulVec4fl(shb->persmat, hoco);  /* rational hom co */
1656         
1657         /* clip We can test for -1.0/1.0 because of the properties of the
1658          * coordinate transformations. */
1659         fac= fabs(hoco[3]);
1660         if(hoco[0]<-fac || hoco[0]>fac)
1661                 return 0;
1662         if(hoco[1]<-fac || hoco[1]>fac)
1663                 return 0;
1664         if(hoco[2]<-fac || hoco[2]>fac)
1665                 return 0;
1666         
1667         siz= 0.5f*(float)shb->size;
1668         co[0]= siz*(1.0f+hoco[0]/hoco[3]) -0.5f;
1669         co[1]= siz*(1.0f+hoco[1]/hoco[3]) -0.5f;
1670         co[2]= ((float)0x7FFFFFFF)*(hoco[2]/hoco[3]);
1671         
1672         /* XXXX bias, much less than normal shadbuf, or do we need a constant? */
1673         co[2] -= 0.05f*shb->bias;
1674         
1675         return 1;
1676 }
1677
1678 /* storage of shadow results, solid osa and transp case */
1679 static void isb_add_shadfac(ISBShadfacA **isbsapp, MemArena *mem, int obi, int facenr, short shadfac, short samples)
1680 {
1681         ISBShadfacA *new;
1682         float shadfacf;
1683         
1684         /* in osa case, the samples were filled in with factor 1.0/R.osa. if fewer samples we have to correct */
1685         if(R.osa)
1686                 shadfacf= ((float)shadfac*R.osa)/(4096.0*samples);
1687         else
1688                 shadfacf= ((float)shadfac)/(4096.0);
1689         
1690         new= BLI_memarena_alloc(mem, sizeof(ISBShadfacA));
1691         new->obi= obi;
1692         new->facenr= facenr & ~RE_QUAD_OFFS;
1693         new->shadfac= shadfacf;
1694         if(*isbsapp)
1695                 new->next= (*isbsapp);
1696         else
1697                 new->next= NULL;
1698         
1699         *isbsapp= new;
1700 }
1701
1702 /* adding samples, solid case */
1703 static int isb_add_samples(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSample **samplebuf)
1704 {
1705         int xi, yi, *xcos, *ycos;
1706         int sample, bsp_err= 0;
1707         
1708         /* bsp split doesn't like to handle regular sequenes */
1709         xcos= MEM_mallocN( pa->rectx*sizeof(int), "xcos");
1710         ycos= MEM_mallocN( pa->recty*sizeof(int), "ycos");
1711         for(xi=0; xi<pa->rectx; xi++)
1712                 xcos[xi]= xi;
1713         for(yi=0; yi<pa->recty; yi++)
1714                 ycos[yi]= yi;
1715         BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
1716         BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
1717         
1718         for(sample=0; sample<(R.osa?R.osa:1); sample++) {
1719                 ISBSample *samp= samplebuf[sample], *samp1;
1720                 
1721                 for(yi=0; yi<pa->recty; yi++) {
1722                         int y= ycos[yi];
1723                         for(xi=0; xi<pa->rectx; xi++) {
1724                                 int x= xcos[xi];
1725                                 samp1= samp + y*pa->rectx + x;
1726                                 if(samp1->facenr)
1727                                         bsp_err |= isb_bsp_insert(root, memarena, samp1);
1728                         }
1729                         if(bsp_err) break;
1730                 }
1731         }       
1732         
1733         MEM_freeN(xcos);
1734         MEM_freeN(ycos);
1735
1736         return bsp_err;
1737 }
1738
1739 /* solid version */
1740 /* lar->shb, pa->rectz and pa->rectp should exist */
1741 static void isb_make_buffer(RenderPart *pa, LampRen *lar)
1742 {
1743         ShadBuf *shb= lar->shb;
1744         ISBData *isbdata;
1745         ISBSample *samp, *samplebuf[16];        /* should be RE_MAX_OSA */
1746         ISBBranch root;
1747         MemArena *memarena;
1748         long *rd;
1749         int *recto, *rectp, x, y, sindex, sample, bsp_err=0;
1750         
1751         /* storage for shadow, per thread */
1752         isbdata= shb->isb_result[pa->thread];
1753         
1754         /* to map the shi->xs and ys coordinate */
1755         isbdata->minx= pa->disprect.xmin;
1756         isbdata->miny= pa->disprect.ymin;
1757         isbdata->rectx= pa->rectx;
1758         isbdata->recty= pa->recty;
1759         
1760         /* branches are added using memarena (32k branches) */
1761         memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch));
1762         BLI_memarena_use_calloc(memarena);
1763         
1764         /* samplebuf is in camera view space (pixels) */
1765         for(sample=0; sample<(R.osa?R.osa:1); sample++)
1766                 samplebuf[sample]= MEM_callocN(sizeof(ISBSample)*pa->rectx*pa->recty, "isb samplebuf");
1767         
1768         /* for end result, ISBSamples point to this in non OSA case, otherwise to pixstruct->shadfac */
1769         if(R.osa==0)
1770                 isbdata->shadfacs= MEM_callocN(pa->rectx*pa->recty*sizeof(short), "isb shadfacs");
1771         
1772         /* setup bsp root */
1773         memset(&root, 0, sizeof(ISBBranch));
1774         root.box.xmin= (float)shb->size;
1775         root.box.ymin= (float)shb->size;
1776         
1777         /* create the sample buffers */
1778         for(sindex=0, y=0; y<pa->recty; y++) {
1779                 for(x=0; x<pa->rectx; x++, sindex++) {
1780                         
1781                         /* this makes it a long function, but splitting it out would mean 10+ arguments */
1782                         /* first check OSA case */
1783                         if(R.osa) {
1784                                 rd= pa->rectdaps + sindex;
1785                                 if(*rd) {
1786                                         float xs= (float)(x + pa->disprect.xmin);
1787                                         float ys= (float)(y + pa->disprect.ymin);
1788                                         
1789                                         for(sample=0; sample<R.osa; sample++) {
1790                                                 PixStr *ps= (PixStr *)(*rd);
1791                                                 int mask= (1<<sample);
1792                                                 
1793                                                 while(ps) {
1794                                                         if(ps->mask & mask)
1795                                                                 break;
1796                                                         ps= ps->next;
1797                                                 }
1798                                                 if(ps && ps->facenr>0) {
1799                                                         ObjectInstanceRen *obi= &R.objectinstance[ps->obi];
1800                                                         ObjectRen *obr= obi->obr;
1801                                                         VlakRen *vlr= RE_findOrAddVlak(obr, (ps->facenr-1) & RE_QUAD_MASK);
1802                                                         
1803                                                         samp= samplebuf[sample] + sindex;
1804                                                         /* convert image plane pixel location to lamp buffer space */
1805                                                         if(viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], samp->zco)) {
1806                                                                 samp->obi= ps->obi;
1807                                                                 samp->facenr= ps->facenr & ~RE_QUAD_OFFS;
1808                                                                 ps->shadfac= 0;
1809                                                                 samp->shadfac= &ps->shadfac;
1810                                                                 bound_rectf((rctf *)&root.box, samp->zco);
1811                                                         }
1812                                                 }
1813                                         }
1814                                 }
1815                         }
1816                         else {
1817                                 rectp= pa->rectp + sindex;
1818                                 recto= pa->recto + sindex;
1819                                 if(*rectp>0) {
1820                                         ObjectInstanceRen *obi= &R.objectinstance[*recto];
1821                                         ObjectRen *obr= obi->obr;
1822                                         VlakRen *vlr= RE_findOrAddVlak(obr, (*rectp-1) & RE_QUAD_MASK);
1823                                         float xs= (float)(x + pa->disprect.xmin);
1824                                         float ys= (float)(y + pa->disprect.ymin);
1825                                         
1826                                         samp= samplebuf[0] + sindex;
1827                                         /* convert image plane pixel location to lamp buffer space */
1828                                         if(viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, samp->zco)) {
1829                                                 samp->obi= *recto;
1830                                                 samp->facenr= *rectp & ~RE_QUAD_OFFS;
1831                                                 samp->shadfac= isbdata->shadfacs + sindex;
1832                                                 bound_rectf((rctf *)&root.box, samp->zco);
1833                                         }
1834                                 }
1835                         }
1836                 }
1837         }
1838         
1839         /* simple method to see if we have samples */
1840         if(root.box.xmin != (float)shb->size) {
1841                 /* now create a regular split, root.box has the initial bounding box of all pixels */
1842                 /* split bsp 8 levels deep, in regular grid (16 x 16) */
1843                 isb_bsp_split_init(&root, memarena, 8);
1844                 
1845                 /* insert all samples in BSP now */
1846                 bsp_err= isb_add_samples(pa, &root, memarena, samplebuf);
1847                         
1848                 if(bsp_err==0) {
1849                         /* go over all faces and fill in shadow values */
1850                         
1851                         isb_bsp_fillfaces(&R, lar, &root);      /* shb->persmat should have been calculated */
1852                         
1853                         /* copy shadow samples to persistant buffer, reduce memory overhead */
1854                         if(R.osa) {
1855                                 ISBShadfacA **isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
1856                                 
1857                                 isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA));
1858                                 BLI_memarena_use_calloc(isbdata->memarena);
1859
1860                                 for(rd= pa->rectdaps, x=pa->rectx*pa->recty; x>0; x--, rd++, isbsa++) {
1861                                         
1862                                         if(*rd) {
1863                                                 PixStr *ps= (PixStr *)(*rd);
1864                                                 while(ps) {
1865                                                         if(ps->shadfac)
1866                                                                 isb_add_shadfac(isbsa, isbdata->memarena, ps->obi, ps->facenr, ps->shadfac, count_mask(ps->mask));
1867                                                         ps= ps->next;
1868                                                 }
1869                                         }
1870                                 }
1871                         }
1872                 }
1873         }
1874         else {
1875                 if(isbdata->shadfacs) {
1876                         MEM_freeN(isbdata->shadfacs);
1877                         isbdata->shadfacs= NULL;
1878                 }
1879         }
1880
1881         /* free BSP */
1882         BLI_memarena_free(memarena);
1883         
1884         /* free samples */
1885         for(x=0; x<(R.osa?R.osa:1); x++)
1886                 MEM_freeN(samplebuf[x]);
1887         
1888         if(bsp_err) printf("error in filling bsp\n");
1889 }
1890
1891 /* add sample to buffer, isbsa is the root sample in a buffer */
1892 static ISBSampleA *isb_alloc_sample_transp(ISBSampleA **isbsa, MemArena *mem)
1893 {
1894         ISBSampleA *new;
1895         
1896         new= BLI_memarena_alloc(mem, sizeof(ISBSampleA));
1897         if(*isbsa)
1898                 new->next= (*isbsa);
1899         else
1900                 new->next= NULL;
1901         
1902         *isbsa= new;
1903         return new;
1904 }
1905
1906 /* adding samples in BSP, transparent case */
1907 static int isb_add_samples_transp(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSampleA ***samplebuf)
1908 {
1909         int xi, yi, *xcos, *ycos;
1910         int sample, bsp_err= 0;
1911         
1912         /* bsp split doesn't like to handle regular sequenes */
1913         xcos= MEM_mallocN( pa->rectx*sizeof(int), "xcos");
1914         ycos= MEM_mallocN( pa->recty*sizeof(int), "ycos");
1915         for(xi=0; xi<pa->rectx; xi++)
1916                 xcos[xi]= xi;
1917         for(yi=0; yi<pa->recty; yi++)
1918                 ycos[yi]= yi;
1919         BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
1920         BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
1921         
1922         for(sample=0; sample<(R.osa?R.osa:1); sample++) {
1923                 ISBSampleA **samp= samplebuf[sample], *samp1;
1924                 
1925                 for(yi=0; yi<pa->recty; yi++) {
1926                         int y= ycos[yi];
1927                         for(xi=0; xi<pa->rectx; xi++) {
1928                                 int x= xcos[xi];
1929                                 
1930                                 samp1= *(samp + y*pa->rectx + x);
1931                                 while(samp1) {
1932                                         bsp_err |= isb_bsp_insert(root, memarena, (ISBSample *)samp1);
1933                                         samp1= samp1->next;
1934                                 }
1935                         }
1936                         if(bsp_err) break;
1937                 }
1938         }       
1939         
1940         MEM_freeN(xcos);
1941         MEM_freeN(ycos);
1942         
1943         return bsp_err;
1944 }
1945
1946
1947 /* Ztransp version */
1948 /* lar->shb, pa->rectz and pa->rectp should exist */
1949 static void isb_make_buffer_transp(RenderPart *pa, APixstr *apixbuf, LampRen *lar)
1950 {
1951         ShadBuf *shb= lar->shb;
1952         ISBData *isbdata;
1953         ISBSampleA *samp, **samplebuf[16];      /* MAX_OSA */
1954         ISBBranch root;
1955         MemArena *memarena;
1956         APixstr *ap;
1957         int x, y, sindex, sample, bsp_err=0;
1958         
1959         /* storage for shadow, per thread */
1960         isbdata= shb->isb_result[pa->thread];
1961         
1962         /* to map the shi->xs and ys coordinate */
1963         isbdata->minx= pa->disprect.xmin;
1964         isbdata->miny= pa->disprect.ymin;
1965         isbdata->rectx= pa->rectx;
1966         isbdata->recty= pa->recty;
1967         
1968         /* branches are added using memarena (32k branches) */
1969         memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch));
1970         BLI_memarena_use_calloc(memarena);
1971         
1972         /* samplebuf is in camera view space (pixels) */
1973         for(sample=0; sample<(R.osa?R.osa:1); sample++)
1974                 samplebuf[sample]= MEM_callocN(sizeof(void *)*pa->rectx*pa->recty, "isb alpha samplebuf");
1975         
1976         /* setup bsp root */
1977         memset(&root, 0, sizeof(ISBBranch));
1978         root.box.xmin= (float)shb->size;
1979         root.box.ymin= (float)shb->size;
1980
1981         /* create the sample buffers */
1982         for(ap= apixbuf, sindex=0, y=0; y<pa->recty; y++) {
1983                 for(x=0; x<pa->rectx; x++, sindex++, ap++) {
1984                         
1985                         if(ap->p[0]) {
1986                                 APixstr *apn;
1987                                 float xs= (float)(x + pa->disprect.xmin);
1988                                 float ys= (float)(y + pa->disprect.ymin);
1989                                 
1990                                 for(apn=ap; apn; apn= apn->next) {
1991                                         int a;
1992                                         for(a=0; a<4; a++) {
1993                                                 if(apn->p[a]) {
1994                                                         ObjectInstanceRen *obi= &R.objectinstance[apn->obi[a]];
1995                                                         ObjectRen *obr= obi->obr;
1996                                                         VlakRen *vlr= RE_findOrAddVlak(obr, (apn->p[a]-1) & RE_QUAD_MASK);
1997                                                         float zco[3];
1998                                                         
1999                                                         /* here we store shadfac, easier to create the end storage buffer. needs zero'ed, multiple shadowbufs use it */
2000                                                         apn->shadfac[a]= 0;
2001                                                         
2002                                                         if(R.osa) {
2003                                                                 for(sample=0; sample<R.osa; sample++) {
2004                                                                         int mask= (1<<sample);
2005                                                                         
2006                                                                         if(apn->mask[a] & mask) {
2007                                                                                 
2008                                                                                 /* convert image plane pixel location to lamp buffer space */
2009                                                                                 if(viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], zco)) {
2010                                                                                         samp= isb_alloc_sample_transp(samplebuf[sample] + sindex, memarena);
2011                                                                                         samp->obi= apn->obi[a];
2012                                                                                         samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
2013                                                                                         samp->shadfac= &apn->shadfac[a];
2014                                                                                         
2015                                                                                         VECCOPY(samp->zco, zco);
2016                                                                                         bound_rectf((rctf *)&root.box, samp->zco);
2017                                                                                 }
2018                                                                         }
2019                                                                 }
2020                                                         }
2021                                                         else {
2022                                                                 
2023                                                                 /* convert image plane pixel location to lamp buffer space */
2024                                                                 if(viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, zco)) {
2025                                                                         
2026                                                                         samp= isb_alloc_sample_transp(samplebuf[0] + sindex, memarena);
2027                                                                         samp->obi= apn->obi[a];
2028                                                                         samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
2029                                                                         samp->shadfac= &apn->shadfac[a];
2030                                                                         
2031                                                                         VECCOPY(samp->zco, zco);
2032                                                                         bound_rectf((rctf *)&root.box, samp->zco);
2033                                                                 }
2034                                                         }
2035                                                 }
2036                                         }
2037                                 }
2038                         }
2039                 }
2040         }
2041         
2042         /* simple method to see if we have samples */
2043         if(root.box.xmin != (float)shb->size) {
2044                 /* now create a regular split, root.box has the initial bounding box of all pixels */
2045                 /* split bsp 8 levels deep, in regular grid (16 x 16) */
2046                 isb_bsp_split_init(&root, memarena, 8);
2047                 
2048                 /* insert all samples in BSP now */
2049                 bsp_err= isb_add_samples_transp(pa, &root, memarena, samplebuf);
2050                 
2051                 if(bsp_err==0) {
2052                         ISBShadfacA **isbsa;
2053                         
2054                         /* go over all faces and fill in shadow values */
2055                         isb_bsp_fillfaces(&R, lar, &root);      /* shb->persmat should have been calculated */
2056                         
2057                         /* copy shadow samples to persistant buffer, reduce memory overhead */
2058                         isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
2059                         
2060                         isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA));
2061                         
2062                         for(ap= apixbuf, x=pa->rectx*pa->recty; x>0; x--, ap++, isbsa++) {
2063                                         
2064                                 if(ap->p[0]) {
2065                                         APixstr *apn;
2066                                         for(apn=ap; apn; apn= apn->next) {
2067                                                 int a;
2068                                                 for(a=0; a<4; a++) {
2069                                                         if(apn->p[a] && apn->shadfac[a]) {
2070                                                                 if(R.osa)
2071                                                                         isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], count_mask(apn->mask[a]));
2072                                                                 else
2073                                                                         isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], 0);
2074                                                         }
2075                                                 }
2076                                         }
2077                                 }
2078                         }
2079                 }
2080         }
2081
2082         /* free BSP */
2083         BLI_memarena_free(memarena);
2084
2085         /* free samples */
2086         for(x=0; x<(R.osa?R.osa:1); x++)
2087                 MEM_freeN(samplebuf[x]);
2088
2089         if(bsp_err) printf("error in filling bsp\n");
2090 }
2091
2092
2093
2094 /* exported */
2095
2096 /* returns amount of light (1.0 = no shadow) */
2097 /* note, shadepixel() rounds the coordinate, not the real sample info */
2098 float ISB_getshadow(ShadeInput *shi, ShadBuf *shb)
2099 {
2100         /* if raytracing, we can't accept irregular shadow */
2101         if(shi->depth==0) {
2102                 ISBData *isbdata= shb->isb_result[shi->thread];
2103                 
2104                 if(isbdata) {
2105                         if(isbdata->shadfacs || isbdata->shadfaca) {
2106                                 int x= shi->xs - isbdata->minx;
2107                                 
2108                                 if(x >= 0 && x < isbdata->rectx) {
2109                                         int y= shi->ys - isbdata->miny;
2110                         
2111                                         if(y >= 0 && y < isbdata->recty) {
2112                                                 if(isbdata->shadfacs) {
2113                                                         short *sp= isbdata->shadfacs + y*isbdata->rectx + x;
2114                                                         return *sp>=4096?0.0f:1.0f - ((float)*sp)/4096.0f;
2115                                                 }
2116                                                 else {
2117                                                         int sindex= y*isbdata->rectx + x;
2118                                                         int obi= shi->obi - R.objectinstance;
2119                                                         ISBShadfacA *isbsa= *(isbdata->shadfaca + sindex);
2120                                                         
2121                                                         while(isbsa) {
2122                                                                 if(isbsa->facenr==shi->facenr+1 && isbsa->obi==obi)
2123                                                                         return isbsa->shadfac>=1.0f?0.0f:1.0f - isbsa->shadfac;
2124                                                                 isbsa= isbsa->next;
2125                                                         }
2126                                                 }
2127                                         }
2128                                 }
2129                         }
2130                 }
2131         }
2132         return 1.0f;
2133 }
2134
2135 /* part is supposed to be solid zbuffered (apixbuf==NULL) or transparent zbuffered */
2136 void ISB_create(RenderPart *pa, APixstr *apixbuf)
2137 {
2138         GroupObject *go;
2139         
2140         /* go over all lamps, and make the irregular buffers */
2141         for(go=R.lights.first; go; go= go->next) {
2142                 LampRen *lar= go->lampren;
2143                 
2144                 if(lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
2145                         
2146                         /* create storage for shadow, per thread */
2147                         lar->shb->isb_result[pa->thread]= MEM_callocN(sizeof(ISBData), "isb data");
2148                         
2149                         if(apixbuf)
2150                                 isb_make_buffer_transp(pa, apixbuf, lar);
2151                         else
2152                                 isb_make_buffer(pa, lar);
2153                 }
2154         }
2155 }
2156
2157
2158 /* end of part rendering, free stored shadow data for this thread from all lamps */
2159 void ISB_free(RenderPart *pa)
2160 {
2161         GroupObject *go;
2162         
2163         /* go over all lamps, and free the irregular buffers */
2164         for(go=R.lights.first; go; go= go->next) {
2165                 LampRen *lar= go->lampren;
2166                 
2167                 if(lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
2168                         ISBData *isbdata= lar->shb->isb_result[pa->thread];
2169
2170                         if(isbdata) {
2171                                 if(isbdata->shadfacs)
2172                                         MEM_freeN(isbdata->shadfacs);
2173                                 if(isbdata->shadfaca)
2174                                         MEM_freeN(isbdata->shadfaca);
2175                                 
2176                                 if(isbdata->memarena)
2177                                         BLI_memarena_free(isbdata->memarena);
2178                                 
2179                                 MEM_freeN(isbdata);
2180                                 lar->shb->isb_result[pa->thread]= NULL;
2181                         }
2182                 }
2183         }
2184 }
2185