Undo revision 23130 which was a merge with 2.5, a messy one because I did something...
[blender.git] / source / blender / render / intern / source / shadbuf.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
17  *
18  * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
19  * All rights reserved.
20  *
21  * Contributor(s): 2004-2006, Blender Foundation
22  *
23  * ***** END GPL LICENSE BLOCK *****
24  */
25
26 #include <math.h>
27 #include <string.h>
28
29 #include "MTC_matrixops.h"
30 #include "MEM_guardedalloc.h"
31
32 #include "DNA_group_types.h"
33 #include "DNA_lamp_types.h"
34 #include "DNA_material_types.h"
35
36 #include "BKE_global.h"
37 #include "BKE_scene.h"
38 #include "BKE_utildefines.h"
39
40 #include "BLI_arithb.h"
41 #include "BLI_blenlib.h"
42 #include "BLI_jitter.h"
43 #include "BLI_memarena.h"
44 #include "BLI_rand.h"
45
46 #include "PIL_time.h"
47
48 #include "renderpipeline.h"
49 #include "render_types.h"
50 #include "renderdatabase.h"
51 #include "rendercore.h"
52
53 #include "shadbuf.h"
54 #include "zbuf.h"
55
56 /* XXX, could be better implemented... this is for endian issues
57 */
58 #if defined(__sgi) || defined(__sparc) || defined(__sparc__) || defined (__PPC__) || defined (__ppc__) || defined (__hppa__) || defined (__BIG_ENDIAN__)
59 #define RCOMP   3
60 #define GCOMP   2
61 #define BCOMP   1
62 #define ACOMP   0
63 #else
64 #define RCOMP   0
65 #define GCOMP   1
66 #define BCOMP   2
67 #define ACOMP   3
68 #endif
69
70 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
71 /* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
72 /* only to be used here in this file, it's for speed */
73 extern struct Render R;
74 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
75
76 /* ------------------------------------------------------------------------- */
77
78 /* initshadowbuf() in convertBlenderScene.c */
79
80 /* ------------------------------------------------------------------------- */
81
82 static void copy_to_ztile(int *rectz, int size, int x1, int y1, int tile, char *r1)
83 {
84         int len4, *rz;  
85         int x2, y2;
86         
87         x2= x1+tile;
88         y2= y1+tile;
89         if(x2>=size) x2= size-1;
90         if(y2>=size) y2= size-1;
91
92         if(x1>=x2 || y1>=y2) return;
93
94         len4= 4*(x2- x1);
95         rz= rectz + size*y1 + x1;
96         for(; y1<y2; y1++) {
97                 memcpy(r1, rz, len4);
98                 rz+= size;
99                 r1+= len4;
100         }
101 }
102
103 #if 0
104 static int sizeoflampbuf(ShadBuf *shb)
105 {
106         int num,count=0;
107         char *cp;
108         
109         cp= shb->cbuf;
110         num= (shb->size*shb->size)/256;
111
112         while(num--) count+= *(cp++);
113         
114         return 256*count;
115 }
116 #endif
117
118 /* not threadsafe... */
119 static float *give_jitter_tab(int samp)
120 {
121         /* these are all possible jitter tables, takes up some
122          * 12k, not really bad!
123          * For soft shadows, it saves memory and render time
124          */
125         static int tab[17]={1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256};
126         static float jit[1496][2];
127         static char ctab[17]= {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
128         int a, offset=0;
129         
130         if(samp<2) samp= 2;
131         else if(samp>16) samp= 16;
132
133         for(a=0; a<samp-1; a++) offset+= tab[a];
134
135         if(ctab[samp]==0) {
136                 ctab[samp]= 1;
137                 BLI_initjit(jit[offset], samp*samp);
138         }
139                 
140         return jit[offset];
141         
142 }
143
144 static void make_jitter_weight_tab(Render *re, ShadBuf *shb, short filtertype) 
145 {
146         float *jit, totw= 0.0f;
147         int samp= get_render_shadow_samples(&re->r, shb->samp);
148         int a, tot=samp*samp;
149         
150         shb->weight= MEM_mallocN(sizeof(float)*tot, "weight tab lamp");
151         
152         for(jit= shb->jit, a=0; a<tot; a++, jit+=2) {
153                 if(filtertype==LA_SHADBUF_TENT) 
154                         shb->weight[a]= 0.71f - sqrt(jit[0]*jit[0] + jit[1]*jit[1]);
155                 else if(filtertype==LA_SHADBUF_GAUSS) 
156                         shb->weight[a]= RE_filter_value(R_FILTER_GAUSS, 1.8f*sqrt(jit[0]*jit[0] + jit[1]*jit[1]));
157                 else
158                         shb->weight[a]= 1.0f;
159                 
160                 totw+= shb->weight[a];
161         }
162         
163         totw= 1.0f/totw;
164         for(a=0; a<tot; a++) {
165                 shb->weight[a]*= totw;
166         }
167 }
168
169 /* create Z tiles (for compression): this system is 24 bits!!! */
170 static void compress_shadowbuf(ShadBuf *shb, int *rectz, int square)
171 {
172         ShadSampleBuf *shsample;
173         float dist;
174         uintptr_t *ztile;
175         int *rz, *rz1, verg, verg1, size= shb->size;
176         int a, x, y, minx, miny, byt1, byt2;
177         char *rc, *rcline, *ctile, *zt;
178         
179         shsample= MEM_mallocN( sizeof(ShadSampleBuf), "shad sample buf");
180         BLI_addtail(&shb->buffers, shsample);
181         
182         shsample->zbuf= MEM_mallocN( sizeof(uintptr_t)*(size*size)/256, "initshadbuf2");
183         shsample->cbuf= MEM_callocN( (size*size)/256, "initshadbuf3");
184         
185         ztile= (uintptr_t *)shsample->zbuf;
186         ctile= shsample->cbuf;
187         
188         /* help buffer */
189         rcline= MEM_mallocN(256*4+sizeof(int), "makeshadbuf2");
190         
191         for(y=0; y<size; y+=16) {
192                 if(y< size/2) miny= y+15-size/2;
193                 else miny= y-size/2;    
194                 
195                 for(x=0; x<size; x+=16) {
196                         
197                         /* is tile within spotbundle? */
198                         a= size/2;
199                         if(x< a) minx= x+15-a;
200                         else minx= x-a; 
201                         
202                         dist= sqrt( (float)(minx*minx+miny*miny) );
203                         
204                         if(square==0 && dist>(float)(a+12)) {   /* 12, tested with a onlyshadow lamp */
205                                 a= 256; verg= 0; /* 0x80000000; */ /* 0x7FFFFFFF; */
206                                 rz1= (&verg)+1;
207                         } 
208                         else {
209                                 copy_to_ztile(rectz, size, x, y, 16, rcline);
210                                 rz1= (int *)rcline;
211                                 
212                                 verg= (*rz1 & 0xFFFFFF00);
213                                 
214                                 for(a=0;a<256;a++,rz1++) {
215                                         if( (*rz1 & 0xFFFFFF00) !=verg) break;
216                                 }
217                         }
218                         if(a==256) { /* complete empty tile */
219                                 *ctile= 0;
220                                 *ztile= *(rz1-1);
221                         }
222                         else {
223                                 
224                                 /* ACOMP etc. are defined to work L/B endian */
225                                 
226                                 rc= rcline;
227                                 rz1= (int *)rcline;
228                                 verg=  rc[ACOMP];
229                                 verg1= rc[BCOMP];
230                                 rc+= 4;
231                                 byt1= 1; byt2= 1;
232                                 for(a=1;a<256;a++,rc+=4) {
233                                         byt1 &= (verg==rc[ACOMP]);
234                                         byt2 &= (verg1==rc[BCOMP]);
235                                         
236                                         if(byt1==0) break;
237                                 }
238                                 if(byt1 && byt2) {      /* only store byte */
239                                         *ctile= 1;
240                                         *ztile= (uintptr_t)MEM_mallocN(256+4, "tile1");
241                                         rz= (int *)*ztile;
242                                         *rz= *rz1;
243                                         
244                                         zt= (char *)(rz+1);
245                                         rc= rcline;
246                                         for(a=0; a<256; a++, zt++, rc+=4) *zt= rc[GCOMP];       
247                                 }
248                                 else if(byt1) {         /* only store short */
249                                         *ctile= 2;
250                                         *ztile= (uintptr_t)MEM_mallocN(2*256+4,"Tile2");
251                                         rz= (int *)*ztile;
252                                         *rz= *rz1;
253                                         
254                                         zt= (char *)(rz+1);
255                                         rc= rcline;
256                                         for(a=0; a<256; a++, zt+=2, rc+=4) {
257                                                 zt[0]= rc[BCOMP];
258                                                 zt[1]= rc[GCOMP];
259                                         }
260                                 }
261                                 else {                  /* store triple */
262                                         *ctile= 3;
263                                         *ztile= (uintptr_t)MEM_mallocN(3*256,"Tile3");
264
265                                         zt= (char *)*ztile;
266                                         rc= rcline;
267                                         for(a=0; a<256; a++, zt+=3, rc+=4) {
268                                                 zt[0]= rc[ACOMP];
269                                                 zt[1]= rc[BCOMP];
270                                                 zt[2]= rc[GCOMP];
271                                         }
272                                 }
273                         }
274                         ztile++;
275                         ctile++;
276                 }
277         }
278
279         MEM_freeN(rcline);
280
281 }
282
283 /* sets start/end clipping. lar->shb should be initialized */
284 static void shadowbuf_autoclip(Render *re, LampRen *lar)
285 {
286         ObjectInstanceRen *obi;
287         ObjectRen *obr;
288         VlakRen *vlr= NULL;
289         VertRen *ver= NULL;
290         Material *ma= NULL;
291         float minz, maxz, vec[3], viewmat[4][4], obviewmat[4][4];
292         unsigned int lay = -1;
293         int i, a, maxtotvert, ok= 1;
294         char *clipflag;
295         
296         minz= 1.0e30f; maxz= -1.0e30f;
297         Mat4CpyMat4(viewmat, lar->shb->viewmat);
298         
299         if(lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay;
300
301         maxtotvert= 0;
302         for(obr=re->objecttable.first; obr; obr=obr->next)
303                 maxtotvert= MAX2(obr->totvert, maxtotvert);
304
305         clipflag= MEM_callocN(sizeof(char)*maxtotvert, "autoclipflag");
306
307         /* set clip in vertices when face visible */
308         for(i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
309                 obr= obi->obr;
310
311                 if(obi->flag & R_TRANSFORMED)
312                         Mat4MulMat4(obviewmat, obi->mat, viewmat);
313                 else
314                         Mat4CpyMat4(obviewmat, viewmat);
315
316                 memset(clipflag, 0, sizeof(char)*obr->totvert);
317
318                 /* clear clip, is being set if face is visible (clip is calculated for real later) */
319                 for(a=0; a<obr->totvlak; a++) {
320                         if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
321                         else vlr++;
322                         
323                         /* note; these conditions are copied from zbuffer_shadow() */
324                         if(vlr->mat!= ma) {
325                                 ma= vlr->mat;
326                                 ok= 1;
327                                 if((ma->mode & MA_SHADBUF)==0) ok= 0;
328                         }
329                         
330                         if(ok && (obi->lay & lay)) {
331                                 clipflag[vlr->v1->index]= 1;
332                                 clipflag[vlr->v2->index]= 1;
333                                 clipflag[vlr->v3->index]= 1;
334                                 if(vlr->v4) clipflag[vlr->v4->index]= 1;
335                         }                               
336                 }               
337                 
338                 /* calculate min and max */
339                 for(a=0; a< obr->totvert;a++) {
340                         if((a & 255)==0) ver= RE_findOrAddVert(obr, a);
341                         else ver++;
342                         
343                         if(clipflag[a]) {
344                                 VECCOPY(vec, ver->co);
345                                 Mat4MulVecfl(obviewmat, vec);
346                                 /* Z on visible side of lamp space */
347                                 if(vec[2] < 0.0f) {
348                                         float inpr, z= -vec[2];
349                                         
350                                         /* since vec is rotated in lampspace, this is how to get the cosine of angle */
351                                         /* precision is set 20% larger */
352                                         vec[2]*= 1.2f;
353                                         Normalize(vec);
354                                         inpr= - vec[2];
355
356                                         if(inpr>=lar->spotsi) {
357                                                 if(z<minz) minz= z;
358                                                 if(z>maxz) maxz= z;
359                                         }
360                                 }
361                         }
362                 }
363         }
364
365         MEM_freeN(clipflag);
366         
367         /* set clipping min and max */
368         if(minz < maxz) {
369                 float delta= (maxz - minz);     /* threshold to prevent precision issues */
370                 
371                 //printf("minz %f maxz %f delta %f\n", minz, maxz, delta);
372                 if(lar->bufflag & LA_SHADBUF_AUTO_START)
373                         lar->shb->d= minz - delta*0.02f;        /* 0.02 is arbitrary... needs more thinking! */
374                 if(lar->bufflag & LA_SHADBUF_AUTO_END)
375                         lar->shb->clipend= maxz + delta*0.1f;
376                 
377                 /* bias was calculated as percentage, we scale it to prevent animation issues */
378                 delta= (lar->clipend-lar->clipsta)/(lar->shb->clipend-lar->shb->d);
379                 //printf("bias delta %f\n", delta);
380                 lar->shb->bias= (int) (delta*(float)lar->shb->bias);
381         }
382 }
383
384 void makeshadowbuf(Render *re, LampRen *lar)
385 {
386         ShadBuf *shb= lar->shb;
387         float wsize, *jitbuf, twozero[2]= {0.0f, 0.0f}, angle, temp;
388         int *rectz, samples;
389         
390         if(lar->bufflag & (LA_SHADBUF_AUTO_START|LA_SHADBUF_AUTO_END))
391                 shadowbuf_autoclip(re, lar);
392         
393         /* just to enforce identical behaviour of all irregular buffers */
394         if(lar->buftype==LA_SHADBUF_IRREGULAR)
395                 shb->size= 1024;
396         
397         /* matrices and window: in winmat the transformation is being put,
398                 transforming from observer view to lamp view, including lamp window matrix */
399         
400         angle= saacos(lar->spotsi);
401         temp= 0.5f*shb->size*cos(angle)/sin(angle);
402         shb->pixsize= (shb->d)/temp;
403         wsize= shb->pixsize*(shb->size/2.0);
404         
405         i_window(-wsize, wsize, -wsize, wsize, shb->d, shb->clipend, shb->winmat);
406         MTC_Mat4MulMat4(shb->persmat, shb->viewmat, shb->winmat);
407
408         if(ELEM(lar->buftype, LA_SHADBUF_REGULAR, LA_SHADBUF_HALFWAY)) {
409                 /* jitter, weights - not threadsafe! */
410                 BLI_lock_thread(LOCK_CUSTOM1);
411                 shb->jit= give_jitter_tab(get_render_shadow_samples(&re->r, shb->samp));
412                 make_jitter_weight_tab(re, shb, lar->filtertype);
413                 BLI_unlock_thread(LOCK_CUSTOM1);
414                 
415                 shb->totbuf= lar->buffers;
416                 if(shb->totbuf==4) jitbuf= give_jitter_tab(2);
417                 else if(shb->totbuf==9) jitbuf= give_jitter_tab(3);
418                 else jitbuf= twozero;
419                 
420                 /* zbuffering */
421                 rectz= MEM_mapallocN(sizeof(int)*shb->size*shb->size, "makeshadbuf");
422                 
423                 for(samples=0; samples<shb->totbuf; samples++) {
424                         zbuffer_shadow(re, shb->persmat, lar, rectz, shb->size, jitbuf[2*samples], jitbuf[2*samples+1]);
425                         /* create Z tiles (for compression): this system is 24 bits!!! */
426                         compress_shadowbuf(shb, rectz, lar->mode & LA_SQUARE);
427
428                         if(re->test_break(re->tbh))
429                                 break;
430                 }
431                 
432                 MEM_freeN(rectz);
433
434                 /* printf("lampbuf %d\n", sizeoflampbuf(shb)); */
435         }
436 }
437
438 static void *do_shadow_thread(void *re_v)
439 {
440         Render *re= (Render*)re_v;
441         LampRen *lar;
442
443         do {
444                 BLI_lock_thread(LOCK_CUSTOM1);
445                 for(lar=re->lampren.first; lar; lar=lar->next) {
446                         if(lar->shb && !lar->thread_assigned) {
447                                 lar->thread_assigned= 1;
448                                 break;
449                         }
450                 }
451                 BLI_unlock_thread(LOCK_CUSTOM1);
452
453                 /* if type is irregular, this only sets the perspective matrix and autoclips */
454                 if(lar) {
455                         makeshadowbuf(re, lar);
456                         BLI_lock_thread(LOCK_CUSTOM1);
457                         lar->thread_ready= 1;
458                         BLI_unlock_thread(LOCK_CUSTOM1);
459                 }
460         } while(lar && !re->test_break(re->tbh));
461
462         return NULL;
463 }
464
465 static volatile int g_break= 0;
466 static int thread_break(void *unused)
467 {
468         return g_break;
469 }
470
471 void threaded_makeshadowbufs(Render *re)
472 {
473         ListBase threads;
474         LampRen *lar;
475         int a, totthread= 0;
476         int (*test_break)(void *);
477
478         /* count number of threads to use */
479         if(G.rendering) {
480                 for(lar=re->lampren.first; lar; lar= lar->next)
481                         if(lar->shb)
482                                 totthread++;
483                 
484                 totthread= MIN2(totthread, re->r.threads);
485         }
486         else
487                 totthread= 1; /* preview render */
488
489         if(totthread <= 1) {
490                 for(lar=re->lampren.first; lar; lar= lar->next) {
491                         if(re->test_break(re->tbh)) break;
492                         if(lar->shb) {
493                                 /* if type is irregular, this only sets the perspective matrix and autoclips */
494                                 makeshadowbuf(re, lar);
495                         }
496                 }
497         }
498         else {
499                 /* swap test break function */
500                 test_break= re->test_break;
501                 re->test_break= thread_break;
502
503                 for(lar=re->lampren.first; lar; lar= lar->next) {
504                         lar->thread_assigned= 0;
505                         lar->thread_ready= 0;
506                 }
507
508                 BLI_init_threads(&threads, do_shadow_thread, totthread);
509                 
510                 for(a=0; a<totthread; a++)
511                         BLI_insert_thread(&threads, re);
512
513                 /* keep rendering as long as there are shadow buffers not ready */
514                 do {
515                         if((g_break=test_break(re->tbh)))
516                                 break;
517
518                         PIL_sleep_ms(50);
519
520                         BLI_lock_thread(LOCK_CUSTOM1);
521                         for(lar=re->lampren.first; lar; lar= lar->next)
522                                 if(lar->shb && !lar->thread_ready)
523                                         break;
524                         BLI_unlock_thread(LOCK_CUSTOM1);
525                 } while(lar);
526         
527                 BLI_end_threads(&threads);
528
529                 /* unset threadsafety */
530                 re->test_break= test_break;
531                 g_break= 0;
532         }
533 }
534
535 void freeshadowbuf(LampRen *lar)
536 {
537         if(lar->shb) {
538                 ShadBuf *shb= lar->shb;
539                 ShadSampleBuf *shsample;
540                 int b, v;
541                 
542                 v= (shb->size*shb->size)/256;
543                 
544                 for(shsample= shb->buffers.first; shsample; shsample= shsample->next) {
545                         intptr_t *ztile= shsample->zbuf;
546                         char *ctile= shsample->cbuf;
547                         
548                         for(b=0; b<v; b++, ztile++, ctile++)
549                                 if(*ctile) MEM_freeN((void *) *ztile);
550                         
551                         MEM_freeN(shsample->zbuf);
552                         MEM_freeN(shsample->cbuf);
553                 }
554                 BLI_freelistN(&shb->buffers);
555                 
556                 if(shb->weight) MEM_freeN(shb->weight);
557                 MEM_freeN(lar->shb);
558                 
559                 lar->shb= NULL;
560         }
561 }
562
563
564 static int firstreadshadbuf(ShadBuf *shb, ShadSampleBuf *shsample, int **rz, int xs, int ys, int nr)
565 {
566         /* return a 1 if fully compressed shadbuf-tile && z==const */
567         int ofs;
568         char *ct;
569
570         /* always test borders of shadowbuffer */
571         if(xs<0) xs= 0; else if(xs>=shb->size) xs= shb->size-1;
572         if(ys<0) ys= 0; else if(ys>=shb->size) ys= shb->size-1;
573    
574         /* calc z */
575         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
576         ct= shsample->cbuf+ofs;
577         if(*ct==0) {
578             if(nr==0) {
579                         *rz= *( (int **)(shsample->zbuf+ofs) );
580                         return 1;
581             }
582                 else if(*rz!= *( (int **)(shsample->zbuf+ofs) )) return 0;
583                 
584             return 1;
585         }
586         
587         return 0;
588 }
589
590 /* return 1.0 : fully in light */
591 static float readshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)     
592 {
593         float temp;
594         int *rz, ofs;
595         int zsamp=0;
596         char *ct, *cz;
597
598         /* simpleclip */
599         /* if(xs<0 || ys<0) return 1.0; */
600         /* if(xs>=shb->size || ys>=shb->size) return 1.0; */
601         
602         /* always test borders of shadowbuffer */
603         if(xs<0) xs= 0; else if(xs>=shb->size) xs= shb->size-1;
604         if(ys<0) ys= 0; else if(ys>=shb->size) ys= shb->size-1;
605
606         /* calc z */
607         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
608         ct= shsample->cbuf+ofs;
609         rz= *( (int **)(shsample->zbuf+ofs) );
610
611         if(*ct==3) {
612                 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
613                 cz= (char *)&zsamp;
614                 cz[ACOMP]= ct[0];
615                 cz[BCOMP]= ct[1];
616                 cz[GCOMP]= ct[2];
617         }
618         else if(*ct==2) {
619                 ct= ((char *)rz);
620                 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
621                 zsamp= *rz;
622         
623                 cz= (char *)&zsamp;
624                 cz[BCOMP]= ct[0];
625                 cz[GCOMP]= ct[1];
626         }
627         else if(*ct==1) {
628                 ct= ((char *)rz);
629                 ct+= 4+16*(ys & 15)+(xs & 15);
630                 zsamp= *rz;
631
632                 cz= (char *)&zsamp;
633                 cz[GCOMP]= ct[0];
634
635         }
636         else {
637                 /* got warning on this for 64 bits.... */
638                 /* but it's working code! in this case rz is not a pointer but zvalue (ton) */
639                 zsamp= GET_INT_FROM_POINTER(rz);
640         }
641
642         /* tricky stuff here; we use ints which can overflow easily with bias values */
643         
644         if(zsamp > zs) return 1.0;              /* absolute no shadow */
645         else if(zs < - 0x7FFFFE00 + bias) return 1.0;   /* extreme close to clipstart */
646         else if(zsamp < zs-bias) return 0.0 ;   /* absolute in shadow */
647         else {                                  /* soft area */
648                 
649                 temp=  ( (float)(zs- zsamp) )/(float)bias;
650                 return 1.0 - temp*temp;
651                         
652         }
653 }
654
655 /* the externally called shadow testing (reading) function */
656 /* return 1.0: no shadow at all */
657 float testshadowbuf(Render *re, ShadBuf *shb, float *rco, float *dxco, float *dyco, float inp, float mat_bias)
658 {
659         ShadSampleBuf *shsample;
660         float fac, co[4], dx[3], dy[3], shadfac=0.0f;
661         float xs1,ys1, siz, *jit, *weight, xres, yres, biasf;
662         int xs, ys, zs, bias, *rz;
663         short a, num;
664         
665         /* crash preventer */
666         if(shb->buffers.first==NULL)
667                 return 1.0f;
668         
669         if(inp <= 0.0f) return 0.0f;
670
671         /* rotate renderco en osaco */
672         siz= 0.5f*(float)shb->size;
673         VECCOPY(co, rco);
674         co[3]= 1.0f;
675
676         MTC_Mat4MulVec4fl(shb->persmat, co);    /* rational hom co */
677
678         xs1= siz*(1.0f+co[0]/co[3]);
679         ys1= siz*(1.0f+co[1]/co[3]);
680
681         /* Clip for z: clipsta and clipend clip values of the shadow buffer. We
682                 * can test for -1.0/1.0 because of the properties of the
683                 * coordinate transformations. */
684         fac= (co[2]/co[3]);
685
686         if(fac>=1.0f) {
687                 return 0.0f;
688         } else if(fac<= -1.0f) {
689                 return 1.0f;
690         }
691
692         zs= ((float)0x7FFFFFFF)*fac;
693
694         /* take num*num samples, increase area with fac */
695         num= get_render_shadow_samples(&re->r, shb->samp);
696         num= num*num;
697         fac= shb->soft;
698         
699         if(mat_bias!=0.0f) biasf= shb->bias*mat_bias;
700         else biasf= shb->bias;
701         /* with inp==1.0, bias is half the size. correction value was 1.1, giving errors 
702            on cube edges, with one side being almost frontal lighted (ton)  */
703         bias= (1.5f-inp*inp)*biasf;
704         
705         if(num==1) {
706                 for(shsample= shb->buffers.first; shsample; shsample= shsample->next)
707                         shadfac += readshadowbuf(shb, shsample, bias, (int)xs1, (int)ys1, zs);
708                 
709                 return shadfac/(float)shb->totbuf;
710         }
711
712         /* calculate filter size */
713         co[0]= rco[0]+dxco[0];
714         co[1]= rco[1]+dxco[1];
715         co[2]= rco[2]+dxco[2];
716         co[3]= 1.0;
717         MTC_Mat4MulVec4fl(shb->persmat,co);     /* rational hom co */
718         dx[0]= xs1- siz*(1.0+co[0]/co[3]);
719         dx[1]= ys1- siz*(1.0+co[1]/co[3]);
720         
721         co[0]= rco[0]+dyco[0];
722         co[1]= rco[1]+dyco[1];
723         co[2]= rco[2]+dyco[2];
724         co[3]= 1.0;
725         MTC_Mat4MulVec4fl(shb->persmat,co);     /* rational hom co */
726         dy[0]= xs1- siz*(1.0+co[0]/co[3]);
727         dy[1]= ys1- siz*(1.0+co[1]/co[3]);
728         
729         xres= fac*( fabs(dx[0])+fabs(dy[0]) );
730         yres= fac*( fabs(dx[1])+fabs(dy[1]) );
731         if(xres<fac) xres= fac;
732         if(yres<fac) yres= fac;
733         
734         xs1-= (xres)/2;
735         ys1-= (yres)/2;
736
737         if(xres<16.0f && yres<16.0f) {
738                 shsample= shb->buffers.first;
739             if(firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)ys1, 0)) {
740                         if(firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)ys1, 1)) {
741                                 if(firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)(ys1+yres), 1)) {
742                                         if(firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)(ys1+yres), 1)) {
743                                                 return readshadowbuf(shb, shsample, bias,(int)xs1, (int)ys1, zs);
744                                         }
745                                 }
746                         }
747             }
748         }
749         
750         for(shsample= shb->buffers.first; shsample; shsample= shsample->next) {
751                 jit= shb->jit;
752                 weight= shb->weight;
753                 
754                 for(a=num; a>0; a--, jit+=2, weight++) {
755                         /* instead of jit i tried random: ugly! */
756                         /* note: the plus 0.5 gives best sampling results, jit goes from -0.5 to 0.5 */
757                         /* xs1 and ys1 are already corrected to be corner of sample area */
758                         xs= xs1 + xres*(jit[0] + 0.5f);
759                         ys= ys1 + yres*(jit[1] + 0.5f);
760                         
761                         shadfac+= *weight * readshadowbuf(shb, shsample, bias, xs, ys, zs);
762                 }
763         }
764
765         /* Renormalizes for the sample number: */
766         return shadfac/(float)shb->totbuf;
767 }
768
769 /* different function... sampling behind clipend can be LIGHT, bias is negative! */
770 /* return: light */
771 static float readshadowbuf_halo(ShadBuf *shb, ShadSampleBuf *shsample, int xs, int ys, int zs)
772 {
773         float temp;
774         int *rz, ofs;
775         int bias, zbias, zsamp;
776         char *ct, *cz;
777
778         /* negative! The other side is more important */
779         bias= -shb->bias;
780         
781         /* simpleclip */
782         if(xs<0 || ys<0) return 0.0;
783         if(xs>=shb->size || ys>=shb->size) return 0.0;
784
785         /* calc z */
786         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
787         ct= shsample->cbuf+ofs;
788         rz= *( (int **)(shsample->zbuf+ofs) );
789
790         if(*ct==3) {
791                 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
792                 cz= (char *)&zsamp;
793                 zsamp= 0;
794                 cz[ACOMP]= ct[0];
795                 cz[BCOMP]= ct[1];
796                 cz[GCOMP]= ct[2];
797         }
798         else if(*ct==2) {
799                 ct= ((char *)rz);
800                 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
801                 zsamp= *rz;
802         
803                 cz= (char *)&zsamp;
804                 cz[BCOMP]= ct[0];
805                 cz[GCOMP]= ct[1];
806         }
807         else if(*ct==1) {
808                 ct= ((char *)rz);
809                 ct+= 4+16*(ys & 15)+(xs & 15);
810                 zsamp= *rz;
811
812                 cz= (char *)&zsamp;
813                 cz[GCOMP]= ct[0];
814
815         }
816         else {
817                 /* same as before */
818                 /* still working code! (ton) */
819                 zsamp= GET_INT_FROM_POINTER(rz);
820         }
821
822         /* NO schadow when sampled at 'eternal' distance */
823
824         if(zsamp >= 0x7FFFFE00) return 1.0; 
825
826         if(zsamp > zs) return 1.0;              /* absolute no shadww */
827         else {
828                 /* bias is negative, so the (zs-bias) can be beyond 0x7fffffff */
829                 zbias= 0x7fffffff - zs;
830                 if(zbias > -bias) {
831                         if( zsamp < zs-bias) return 0.0 ;       /* absolute in shadow */
832                 }
833                 else return 0.0 ;       /* absolute shadow */
834         }
835
836         /* soft area */
837         
838         temp=  ( (float)(zs- zsamp) )/(float)bias;
839         return 1.0 - temp*temp;
840 }
841
842
843 float shadow_halo(LampRen *lar, float *p1, float *p2)
844 {
845         /* p1 p2 already are rotated in spot-space */
846         ShadBuf *shb= lar->shb;
847         ShadSampleBuf *shsample;
848         float co[4], siz;
849         float labda, labdao, labdax, labday, ldx, ldy;
850         float zf, xf1, yf1, zf1, xf2, yf2, zf2;
851         float count, lightcount;
852         int x, y, z, xs1, ys1;
853         int dx = 0, dy = 0;
854         
855         siz= 0.5*(float)shb->size;
856         
857         co[0]= p1[0];
858         co[1]= p1[1];
859         co[2]= p1[2]/lar->sh_zfac;
860         co[3]= 1.0;
861         MTC_Mat4MulVec4fl(shb->winmat, co);     /* rational hom co */
862         xf1= siz*(1.0+co[0]/co[3]);
863         yf1= siz*(1.0+co[1]/co[3]);
864         zf1= (co[2]/co[3]);
865
866
867         co[0]= p2[0];
868         co[1]= p2[1];
869         co[2]= p2[2]/lar->sh_zfac;
870         co[3]= 1.0;
871         MTC_Mat4MulVec4fl(shb->winmat, co);     /* rational hom co */
872         xf2= siz*(1.0+co[0]/co[3]);
873         yf2= siz*(1.0+co[1]/co[3]);
874         zf2= (co[2]/co[3]);
875
876         /* the 2dda (a pixel line formula) */
877
878         xs1= (int)xf1;
879         ys1= (int)yf1;
880
881         if(xf1 != xf2) {
882                 if(xf2-xf1 > 0.0) {
883                         labdax= (xf1-xs1-1.0)/(xf1-xf2);
884                         ldx= -shb->shadhalostep/(xf1-xf2);
885                         dx= shb->shadhalostep;
886                 }
887                 else {
888                         labdax= (xf1-xs1)/(xf1-xf2);
889                         ldx= shb->shadhalostep/(xf1-xf2);
890                         dx= -shb->shadhalostep;
891                 }
892         }
893         else {
894                 labdax= 1.0;
895                 ldx= 0.0;
896         }
897
898         if(yf1 != yf2) {
899                 if(yf2-yf1 > 0.0) {
900                         labday= (yf1-ys1-1.0)/(yf1-yf2);
901                         ldy= -shb->shadhalostep/(yf1-yf2);
902                         dy= shb->shadhalostep;
903                 }
904                 else {
905                         labday= (yf1-ys1)/(yf1-yf2);
906                         ldy= shb->shadhalostep/(yf1-yf2);
907                         dy= -shb->shadhalostep;
908                 }
909         }
910         else {
911                 labday= 1.0;
912                 ldy= 0.0;
913         }
914         
915         x= xs1;
916         y= ys1;
917         labda= count= lightcount= 0.0;
918
919 /* printf("start %x %x  \n", (int)(0x7FFFFFFF*zf1), (int)(0x7FFFFFFF*zf2)); */
920
921         while(1) {
922                 labdao= labda;
923                 
924                 if(labdax==labday) {
925                         labdax+= ldx;
926                         x+= dx;
927                         labday+= ldy;
928                         y+= dy;
929                 }
930                 else {
931                         if(labdax<labday) {
932                                 labdax+= ldx;
933                                 x+= dx;
934                         } else {
935                                 labday+= ldy;
936                                 y+= dy;
937                         }
938                 }
939                 
940                 labda= MIN2(labdax, labday);
941                 if(labda==labdao || labda>=1.0) break;
942                 
943                 zf= zf1 + labda*(zf2-zf1);
944                 count+= (float)shb->totbuf;
945
946                 if(zf<= -1.0) lightcount += 1.0;        /* close to the spot */
947                 else {
948                 
949                         /* make sure, behind the clipend we extend halolines. */
950                         if(zf>=1.0) z= 0x7FFFF000;
951                         else z= (int)(0x7FFFF000*zf);
952                         
953                         for(shsample= shb->buffers.first; shsample; shsample= shsample->next)
954                                 lightcount+= readshadowbuf_halo(shb, shsample, x, y, z);
955                         
956                 }
957         }
958         
959         if(count!=0.0) return (lightcount/count);
960         return 0.0;
961         
962 }
963
964
965 /* ********************* Irregular Shadow Buffer (ISB) ************* */
966 /* ********** storage of all view samples in a raster of lists ***** */
967
968 /* based on several articles describing this method, like:
969 The Irregular Z-Buffer and its Application to Shadow Mapping
970 Gregory S. Johnson - William R. Mark - Christopher A. Burns 
971 and
972 Alias-Free Shadow Maps
973 Timo Aila and Samuli Laine
974 */
975
976 /* bsp structure (actually kd tree) */
977
978 #define BSPMAX_SAMPLE   128
979 #define BSPMAX_DEPTH    32
980
981 /* aligned with struct rctf */
982 typedef struct Boxf {
983         float xmin, xmax;
984         float ymin, ymax;
985         float zmin, zmax;
986 } Boxf;
987
988 typedef struct ISBBranch {
989         struct ISBBranch *left, *right;
990         float divider[2];
991         Boxf box;
992         short totsamp, index, full, unused;
993         ISBSample **samples;
994 } ISBBranch;
995
996 typedef struct BSPFace {
997         Boxf box;
998         float *v1, *v2, *v3, *v4;
999         int obi;                /* object for face lookup */
1000         int facenr;             /* index to retrieve VlakRen */
1001         int type;               /* only for strand now */
1002         short shad_alpha, is_full;
1003         
1004         /* strand caching data, optimize for point_behind_strand() */
1005         float radline, radline_end, len;
1006         float vec1[3], vec2[3], rc[3];
1007 } BSPFace;
1008
1009 /* boxes are in lamp projection */
1010 static void init_box(Boxf *box)
1011 {
1012         box->xmin= 1000000.0f;
1013         box->xmax= 0;
1014         box->ymin= 1000000.0f;
1015         box->ymax= 0;
1016         box->zmin= 0x7FFFFFFF;
1017         box->zmax= - 0x7FFFFFFF;
1018 }
1019
1020 /* use v1 to calculate boundbox */
1021 static void bound_boxf(Boxf *box, float *v1)
1022 {
1023         if(v1[0] < box->xmin) box->xmin= v1[0];
1024         if(v1[0] > box->xmax) box->xmax= v1[0];
1025         if(v1[1] < box->ymin) box->ymin= v1[1];
1026         if(v1[1] > box->ymax) box->ymax= v1[1];
1027         if(v1[2] < box->zmin) box->zmin= v1[2];
1028         if(v1[2] > box->zmax) box->zmax= v1[2];
1029 }
1030
1031 /* use v1 to calculate boundbox */
1032 static void bound_rectf(rctf *box, float *v1)
1033 {
1034         if(v1[0] < box->xmin) box->xmin= v1[0];
1035         if(v1[0] > box->xmax) box->xmax= v1[0];
1036         if(v1[1] < box->ymin) box->ymin= v1[1];
1037         if(v1[1] > box->ymax) box->ymax= v1[1];
1038 }
1039
1040
1041 /* halfway splitting, for initializing a more regular tree */
1042 static void isb_bsp_split_init(ISBBranch *root, MemArena *mem, int level)
1043 {
1044         
1045         /* if level > 0 we create new branches and go deeper*/
1046         if(level > 0) {
1047                 ISBBranch *left, *right;
1048                 int i;
1049                 
1050                 /* splitpoint */
1051                 root->divider[0]= 0.5f*(root->box.xmin+root->box.xmax);
1052                 root->divider[1]= 0.5f*(root->box.ymin+root->box.ymax);
1053                 
1054                 /* find best splitpoint */
1055                 if(root->box.xmax-root->box.xmin > root->box.ymax-root->box.ymin)
1056                         i= root->index= 0;
1057                 else
1058                         i= root->index= 1;
1059                 
1060                 left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1061                 right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1062                 
1063                 /* box info */
1064                 left->box= root->box;
1065                 right->box= root->box;
1066                 if(i==0) {
1067                         left->box.xmax= root->divider[0];
1068                         right->box.xmin= root->divider[0];
1069                 }
1070                 else {
1071                         left->box.ymax= root->divider[1];
1072                         right->box.ymin= root->divider[1];
1073                 }
1074                 isb_bsp_split_init(left, mem, level-1);
1075                 isb_bsp_split_init(right, mem, level-1);
1076         }
1077         else {
1078                 /* we add sample array */
1079                 root->samples= BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
1080         }
1081 }
1082
1083 /* note; if all samples on same location we just spread them over 2 new branches */
1084 static void isb_bsp_split(ISBBranch *root, MemArena *mem)
1085 {
1086         ISBBranch *left, *right;
1087         ISBSample *samples[BSPMAX_SAMPLE];
1088         int a, i;
1089
1090         /* splitpoint */
1091         root->divider[0]= root->divider[1]= 0.0f;
1092         for(a=BSPMAX_SAMPLE-1; a>=0; a--) {
1093                 root->divider[0]+= root->samples[a]->zco[0];
1094                 root->divider[1]+= root->samples[a]->zco[1];
1095         }
1096         root->divider[0]/= BSPMAX_SAMPLE;
1097         root->divider[1]/= BSPMAX_SAMPLE;
1098         
1099         /* find best splitpoint */
1100         if(root->box.xmax-root->box.xmin > root->box.ymax-root->box.ymin)
1101                 i= root->index= 0;
1102         else
1103                 i= root->index= 1;
1104         
1105         /* new branches */
1106         left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1107         right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1108
1109         /* new sample array */
1110         left->samples= BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
1111         right->samples= samples; // tmp
1112         
1113         /* split samples */
1114         for(a=BSPMAX_SAMPLE-1; a>=0; a--) {
1115                 int comp= 0;
1116                 /* this prevents adding samples all to 1 branch when divider is equal to samples */
1117                 if(root->samples[a]->zco[i] == root->divider[i])
1118                         comp= a & 1;
1119                 else if(root->samples[a]->zco[i] < root->divider[i])
1120                         comp= 1;
1121                 
1122                 if(comp==1) {
1123                         left->samples[left->totsamp]= root->samples[a];
1124                         left->totsamp++;
1125                 }
1126                 else {
1127                         right->samples[right->totsamp]= root->samples[a];
1128                         right->totsamp++;
1129                 }
1130         }
1131         
1132         /* copy samples from tmp */
1133         memcpy(root->samples, samples, right->totsamp*(sizeof(void *)));
1134         right->samples= root->samples;
1135         root->samples= NULL;
1136         
1137         /* box info */
1138         left->box= root->box;
1139         right->box= root->box;
1140         if(i==0) {
1141                 left->box.xmax= root->divider[0];
1142                 right->box.xmin= root->divider[0];
1143         }
1144         else {
1145                 left->box.ymax= root->divider[1];
1146                 right->box.ymin= root->divider[1];
1147         }
1148 }
1149
1150 /* inserts sample in main tree, also splits on threshold */
1151 /* returns 1 if error */
1152 static int isb_bsp_insert(ISBBranch *root, MemArena *memarena, ISBSample *sample)
1153 {
1154         ISBBranch *bspn= root;
1155         float *zco= sample->zco;
1156         int i= 0;
1157         
1158         /* debug counter, also used to check if something was filled in ever */
1159         root->totsamp++;
1160         
1161         /* going over branches until last one found */
1162         while(bspn->left) {
1163                 if(zco[bspn->index] <= bspn->divider[bspn->index])
1164                         bspn= bspn->left;
1165                 else
1166                         bspn= bspn->right;
1167                 i++;
1168         }
1169         /* bspn now is the last branch */
1170         
1171         if(bspn->totsamp==BSPMAX_SAMPLE) {
1172                 printf("error in bsp branch\n");        /* only for debug, cannot happen */
1173                 return 1;
1174         }
1175         
1176         /* insert */
1177         bspn->samples[bspn->totsamp]= sample;
1178         bspn->totsamp++;
1179
1180         /* split if allowed and needed */
1181         if(bspn->totsamp==BSPMAX_SAMPLE) {
1182                 if(i==BSPMAX_DEPTH) {
1183                         bspn->totsamp--;        /* stop filling in... will give errors */
1184                         return 1;
1185                 }
1186                 isb_bsp_split(bspn, memarena);
1187         }
1188         return 0;
1189 }
1190
1191 static float VecLen2f( float *v1, float *v2)
1192 {
1193         float x= v1[0]-v2[0];
1194         float y= v1[1]-v2[1];
1195         return (float)sqrt(x*x+y*y);
1196 }
1197
1198 /* initialize vars in face, for optimal point-in-face test */
1199 static void bspface_init_strand(BSPFace *face) 
1200 {
1201         
1202         face->radline= 0.5f*VecLen2f(face->v1, face->v2);
1203         
1204         VecMidf(face->vec1, face->v1, face->v2);
1205         if(face->v4)
1206                 VecMidf(face->vec2, face->v3, face->v4);
1207         else
1208                 VECCOPY(face->vec2, face->v3);
1209         
1210         face->rc[0]= face->vec2[0]-face->vec1[0];
1211         face->rc[1]= face->vec2[1]-face->vec1[1];
1212         face->rc[2]= face->vec2[2]-face->vec1[2];
1213         
1214         face->len= face->rc[0]*face->rc[0]+ face->rc[1]*face->rc[1];
1215         
1216         if(face->len!=0.0f) {
1217                 face->radline_end= face->radline/sqrt(face->len);
1218                 face->len= 1.0f/face->len;
1219         }
1220 }
1221
1222 /* brought back to a simple 2d case */
1223 static int point_behind_strand(float *p, BSPFace *face)
1224 {
1225         /* v1 - v2 is radius, v1 - v3 length */
1226         float dist, rc[2], pt[2];
1227         
1228         /* using code from PdistVL2Dfl(), distance vec to line-piece */
1229
1230         if(face->len==0.0f) {
1231                 rc[0]= p[0]-face->vec1[0];
1232                 rc[1]= p[1]-face->vec1[1];
1233                 dist= (float)(sqrt(rc[0]*rc[0]+ rc[1]*rc[1]));
1234                 
1235                 if(dist < face->radline)
1236                         return 1;
1237         }
1238         else {
1239                 float labda= ( face->rc[0]*(p[0]-face->vec1[0]) + face->rc[1]*(p[1]-face->vec1[1]) )*face->len;
1240                 
1241                 if(labda > -face->radline_end && labda < 1.0f+face->radline_end) { 
1242                         /* hesse for dist: */
1243                         //dist= (float)(fabs( (p[0]-vec2[0])*rc[1] + (p[1]-vec2[1])*rc[0])/len);
1244                         
1245                         pt[0]= labda*face->rc[0]+face->vec1[0];
1246                         pt[1]= labda*face->rc[1]+face->vec1[1];
1247                         
1248                         rc[0]= pt[0]-p[0];
1249                         rc[1]= pt[1]-p[1];
1250                         dist= (float)sqrt(rc[0]*rc[0]+ rc[1]*rc[1]);
1251                         
1252                         if(dist < face->radline) {
1253                                 float zval= face->vec1[2] + labda*face->rc[2];
1254                                 if(p[2] > zval)
1255                                         return 1;
1256                         }
1257                 }
1258         }
1259         return 0;
1260 }
1261
1262
1263 /* return 1 if inside. code derived from src/parametrizer.c */
1264 static int point_behind_tria2d(float *p, float *v1, float *v2, float *v3)
1265 {
1266         float a[2], c[2], h[2], div;
1267         float u, v;
1268         
1269         a[0] = v2[0] - v1[0];
1270         a[1] = v2[1] - v1[1];
1271         c[0] = v3[0] - v1[0];
1272         c[1] = v3[1] - v1[1];
1273         
1274         div = a[0]*c[1] - a[1]*c[0];
1275         if(div==0.0f)
1276                 return 0;
1277         
1278         h[0] = p[0] - v1[0];
1279         h[1] = p[1] - v1[1];
1280         
1281         div = 1.0f/div;
1282         
1283         u = (h[0]*c[1] - h[1]*c[0])*div;
1284         if(u >= 0.0f) {
1285                 v = (a[0]*h[1] - a[1]*h[0])*div;
1286                 if(v >= 0.0f) {
1287                         if( u + v <= 1.0f) {
1288                                 /* inside, now check if point p is behind */
1289                                 float z=  (1.0f-u-v)*v1[2] + u*v2[2] + v*v3[2];
1290                                 if(z <= p[2])
1291                                         return 1;
1292                         }
1293                 }
1294         }
1295         
1296         return 0;
1297 }
1298
1299 #if 0
1300 /* tested these calls, but it gives inaccuracy, 'side' cannot be found reliably using v3 */
1301
1302 /* check if line v1-v2 has all rect points on other side of point v3 */
1303 static int rect_outside_line(rctf *rect, float *v1, float *v2, float *v3)
1304 {
1305         float a, b, c;
1306         int side;
1307         
1308         /* line formula for v1-v2 */
1309         a= v2[1]-v1[1];
1310         b= v1[0]-v2[0];
1311         c= -a*v1[0] - b*v1[1];
1312         side= a*v3[0] + b*v3[1] + c < 0.0f;
1313         
1314         /* the four quad points */
1315         if( side==(rect->xmin*a + rect->ymin*b + c >= 0.0f) )
1316                 if( side==(rect->xmax*a + rect->ymin*b + c >= 0.0f) )
1317                         if( side==(rect->xmax*a + rect->ymax*b + c >= 0.0f) )
1318                                 if( side==(rect->xmin*a + rect->ymax*b + c >= 0.0f) )
1319                                         return 1;
1320         return 0;
1321 }
1322
1323 /* check if one of the triangle edges separates all rect points on 1 side */
1324 static int rect_isect_tria(rctf *rect, float *v1, float *v2, float *v3)
1325 {
1326         if(rect_outside_line(rect, v1, v2, v3))
1327                 return 0;
1328         if(rect_outside_line(rect, v2, v3, v1))
1329                 return 0;
1330         if(rect_outside_line(rect, v3, v1, v2))
1331                 return 0;
1332         return 1;
1333 }
1334 #endif
1335
1336 /* if face overlaps a branch, it executes func. recursive */
1337 static void isb_bsp_face_inside(ISBBranch *bspn, BSPFace *face)
1338 {
1339         
1340         /* are we descending? */
1341         if(bspn->left) {
1342                 /* hrmf, the box struct cannot be addressed with index */
1343                 if(bspn->index==0) {
1344                         if(face->box.xmin <= bspn->divider[0])
1345                                 isb_bsp_face_inside(bspn->left, face);
1346                         if(face->box.xmax > bspn->divider[0])
1347                                 isb_bsp_face_inside(bspn->right, face);
1348                 }
1349                 else {
1350                         if(face->box.ymin <= bspn->divider[1])
1351                                 isb_bsp_face_inside(bspn->left, face);
1352                         if(face->box.ymax > bspn->divider[1])
1353                                 isb_bsp_face_inside(bspn->right, face);
1354                 }
1355         }
1356         else {
1357                 /* else: end branch reached */
1358                 int a;
1359                 
1360                 if(bspn->totsamp==0) return;
1361                 
1362                 /* check for nodes entirely in shadow, can be skipped */
1363                 if(bspn->totsamp==bspn->full)
1364                         return;
1365                 
1366                 /* if bsp node is entirely in front of face, give up */
1367                 if(bspn->box.zmax < face->box.zmin)
1368                         return;
1369                 
1370                 /* if face boundbox is outside of branch rect, give up */
1371                 if(0==BLI_isect_rctf((rctf *)&face->box, (rctf *)&bspn->box, NULL))
1372                         return;
1373                 
1374                 /* test all points inside branch */
1375                 for(a=bspn->totsamp-1; a>=0; a--) {
1376                         ISBSample *samp= bspn->samples[a];
1377                         
1378                         if((samp->facenr!=face->facenr || samp->obi!=face->obi) && samp->shadfac) {
1379                                 if(face->box.zmin < samp->zco[2]) {
1380                                         if(BLI_in_rctf((rctf *)&face->box, samp->zco[0], samp->zco[1])) {
1381                                                 int inshadow= 0;
1382                                                 
1383                                                 if(face->type) {
1384                                                         if(point_behind_strand(samp->zco, face)) 
1385                                                                 inshadow= 1;
1386                                                 }
1387                                                 else if( point_behind_tria2d(samp->zco, face->v1, face->v2, face->v3))
1388                                                         inshadow= 1;
1389                                                 else if(face->v4 && point_behind_tria2d(samp->zco, face->v1, face->v3, face->v4))
1390                                                         inshadow= 1;
1391
1392                                                 if(inshadow) {
1393                                                         *(samp->shadfac) += face->shad_alpha;
1394                                                         /* optimize; is_full means shad_alpha==4096 */
1395                                                         if(*(samp->shadfac) >= 4096 || face->is_full) {
1396                                                                 bspn->full++;
1397                                                                 samp->shadfac= NULL;
1398                                                         }
1399                                                 }
1400                                         }
1401                                 }
1402                         }
1403                 }
1404         }
1405 }
1406
1407 /* based on available samples, recalculate the bounding box for bsp nodes, recursive */
1408 static void isb_bsp_recalc_box(ISBBranch *root)
1409 {
1410         if(root->left) {
1411                 isb_bsp_recalc_box(root->left);
1412                 isb_bsp_recalc_box(root->right);
1413         }
1414         else if(root->totsamp) {
1415                 int a;
1416                 
1417                 init_box(&root->box);
1418                 for(a=root->totsamp-1; a>=0; a--)
1419                         bound_boxf(&root->box, root->samples[a]->zco);
1420         }       
1421 }
1422
1423 /* callback function for zbuf clip */
1424 static void isb_bsp_test_strand(ZSpan *zspan, int obi, int zvlnr, float *v1, float *v2, float *v3, float *v4)
1425 {
1426         BSPFace face;
1427         
1428         face.v1= v1;
1429         face.v2= v2;
1430         face.v3= v3;
1431         face.v4= v4;
1432         face.obi= obi;
1433         face.facenr= zvlnr & ~RE_QUAD_OFFS;
1434         face.type= R_STRAND;
1435         if(R.osa)
1436                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
1437         else
1438                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
1439         
1440         face.is_full= (zspan->shad_alpha==1.0f);
1441         
1442         /* setup boundbox */
1443         init_box(&face.box);
1444         bound_boxf(&face.box, v1);
1445         bound_boxf(&face.box, v2);
1446         bound_boxf(&face.box, v3);
1447         if(v4)
1448                 bound_boxf(&face.box, v4);
1449         
1450         /* optimize values */
1451         bspface_init_strand(&face);
1452         
1453         isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
1454         
1455 }
1456
1457 /* callback function for zbuf clip */
1458 static void isb_bsp_test_face(ZSpan *zspan, int obi, int zvlnr, float *v1, float *v2, float *v3, float *v4) 
1459 {
1460         BSPFace face;
1461         
1462         face.v1= v1;
1463         face.v2= v2;
1464         face.v3= v3;
1465         face.v4= v4;
1466         face.obi= obi;
1467         face.facenr= zvlnr & ~RE_QUAD_OFFS;
1468         face.type= 0;
1469         if(R.osa)
1470                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
1471         else
1472                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
1473         
1474         face.is_full= (zspan->shad_alpha==1.0f);
1475         
1476         /* setup boundbox */
1477         init_box(&face.box);
1478         bound_boxf(&face.box, v1);
1479         bound_boxf(&face.box, v2);
1480         bound_boxf(&face.box, v3);
1481         if(v4)
1482                 bound_boxf(&face.box, v4);
1483
1484         isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
1485 }
1486
1487 static int testclip_minmax(float *ho, float *minmax)
1488 {
1489         float wco= ho[3];
1490         int flag= 0;
1491         
1492         if( ho[0] > minmax[1]*wco) flag = 1;
1493         else if( ho[0]< minmax[0]*wco) flag = 2;
1494         
1495         if( ho[1] > minmax[3]*wco) flag |= 4;
1496         else if( ho[1]< minmax[2]*wco) flag |= 8;
1497         
1498         return flag;
1499 }
1500
1501 /* main loop going over all faces and check in bsp overlaps, fill in shadfac values */
1502 static void isb_bsp_fillfaces(Render *re, LampRen *lar, ISBBranch *root)
1503 {
1504         ObjectInstanceRen *obi;
1505         ObjectRen *obr;
1506         ShadBuf *shb= lar->shb;
1507         ZSpan zspan, zspanstrand;
1508         VlakRen *vlr= NULL;
1509         Material *ma= NULL;
1510         float minmaxf[4], winmat[4][4];
1511         int size= shb->size;
1512         int i, a, ok=1, lay= -1;
1513         
1514         /* further optimize, also sets minz maxz */
1515         isb_bsp_recalc_box(root);
1516         
1517         /* extra clipping for minmax */
1518         minmaxf[0]= (2.0f*root->box.xmin - size-2.0f)/size;
1519         minmaxf[1]= (2.0f*root->box.xmax - size+2.0f)/size;
1520         minmaxf[2]= (2.0f*root->box.ymin - size-2.0f)/size;
1521         minmaxf[3]= (2.0f*root->box.ymax - size+2.0f)/size;
1522         
1523         if(lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay;
1524         
1525         /* (ab)use zspan, since we use zbuffer clipping code */
1526         zbuf_alloc_span(&zspan, size, size, re->clipcrop);
1527         
1528         zspan.zmulx=  ((float)size)/2.0f;
1529         zspan.zmuly=  ((float)size)/2.0f;
1530         zspan.zofsx= -0.5f;
1531         zspan.zofsy= -0.5f;
1532         
1533         /* pass on bsp root to zspan */
1534         zspan.rectz= (int *)root;
1535         
1536         /* filling methods */
1537         zspanstrand= zspan;
1538         //      zspan.zbuflinefunc= zbufline_onlyZ;
1539         zspan.zbuffunc= isb_bsp_test_face;
1540         zspanstrand.zbuffunc= isb_bsp_test_strand;
1541         
1542         for(i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
1543                 obr= obi->obr;
1544
1545                 if(obi->flag & R_TRANSFORMED)
1546                         Mat4MulMat4(winmat, obi->mat, shb->persmat);
1547                 else
1548                         Mat4CpyMat4(winmat, shb->persmat);
1549
1550                 for(a=0; a<obr->totvlak; a++) {
1551                         
1552                         if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
1553                         else vlr++;
1554                         
1555                         /* note, these conditions are copied in shadowbuf_autoclip() */
1556                         if(vlr->mat!= ma) {
1557                                 ma= vlr->mat;
1558                                 ok= 1;
1559                                 if((ma->mode & MA_SHADBUF)==0) ok= 0;
1560                                 if(ma->material_type == MA_TYPE_WIRE) ok= 0;
1561                                 zspanstrand.shad_alpha= zspan.shad_alpha= ma->shad_alpha;
1562                         }
1563                         
1564                         if(ok && (obi->lay & lay)) {
1565                                 float hoco[4][4];
1566                                 int c1, c2, c3, c4=0;
1567                                 int d1, d2, d3, d4=0;
1568                                 int partclip;
1569                                 
1570                                 /* create hocos per face, it is while render */
1571                                 projectvert(vlr->v1->co, winmat, hoco[0]); d1= testclip_minmax(hoco[0], minmaxf);
1572                                 projectvert(vlr->v2->co, winmat, hoco[1]); d2= testclip_minmax(hoco[1], minmaxf);
1573                                 projectvert(vlr->v3->co, winmat, hoco[2]); d3= testclip_minmax(hoco[2], minmaxf);
1574                                 if(vlr->v4) {
1575                                         projectvert(vlr->v4->co, winmat, hoco[3]); d4= testclip_minmax(hoco[3], minmaxf);
1576                                 }
1577
1578                                 /* minmax clipping */
1579                                 if(vlr->v4) partclip= d1 & d2 & d3 & d4;
1580                                 else partclip= d1 & d2 & d3;
1581                                 
1582                                 if(partclip==0) {
1583                                         
1584                                         /* window clipping */
1585                                         c1= testclip(hoco[0]); 
1586                                         c2= testclip(hoco[1]); 
1587                                         c3= testclip(hoco[2]); 
1588                                         if(vlr->v4)
1589                                                 c4= testclip(hoco[3]); 
1590                                         
1591                                         /* ***** NO WIRE YET */                 
1592                                         if(ma->material_type == MA_TYPE_WIRE) {
1593                                                 if(vlr->v4)
1594                                                         zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
1595                                                 else
1596                                                         zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], 0, c1, c2, c3, 0);
1597                                         }
1598                                         else if(vlr->v4) {
1599                                                 if(vlr->flag & R_STRAND)
1600                                                         zbufclip4(&zspanstrand, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
1601                                                 else
1602                                                         zbufclip4(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
1603                                         }
1604                                         else
1605                                                 zbufclip(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], c1, c2, c3);
1606                                         
1607                                 }
1608                         }
1609                 }
1610         }
1611         
1612         zbuf_free_span(&zspan);
1613 }
1614
1615 /* returns 1 when the viewpixel is visible in lampbuffer */
1616 static int viewpixel_to_lampbuf(ShadBuf *shb, ObjectInstanceRen *obi, VlakRen *vlr, float x, float y, float *co)
1617 {
1618         float hoco[4], v1[3], nor[3];
1619         float dface, fac, siz;
1620         
1621         RE_vlakren_get_normal(&R, obi, vlr, nor);
1622         VECCOPY(v1, vlr->v1->co);
1623         if(obi->flag & R_TRANSFORMED)
1624                 Mat4MulVecfl(obi->mat, v1);
1625
1626         /* from shadepixel() */
1627         dface= v1[0]*nor[0] + v1[1]*nor[1] + v1[2]*nor[2];
1628         hoco[3]= 1.0f;
1629         
1630         /* ortho viewplane cannot intersect using view vector originating in (0,0,0) */
1631         if(R.r.mode & R_ORTHO) {
1632                 /* x and y 3d coordinate can be derived from pixel coord and winmat */
1633                 float fx= 2.0/(R.winx*R.winmat[0][0]);
1634                 float fy= 2.0/(R.winy*R.winmat[1][1]);
1635                 
1636                 hoco[0]= (x - 0.5*R.winx)*fx - R.winmat[3][0]/R.winmat[0][0];
1637                 hoco[1]= (y - 0.5*R.winy)*fy - R.winmat[3][1]/R.winmat[1][1];
1638                 
1639                 /* using a*x + b*y + c*z = d equation, (a b c) is normal */
1640                 if(nor[2]!=0.0f)
1641                         hoco[2]= (dface - nor[0]*hoco[0] - nor[1]*hoco[1])/nor[2];
1642                 else
1643                         hoco[2]= 0.0f;
1644         }
1645         else {
1646                 float div, view[3];
1647                 
1648                 calc_view_vector(view, x, y);
1649                 
1650                 div= nor[0]*view[0] + nor[1]*view[1] + nor[2]*view[2];
1651                 if (div==0.0f) 
1652                         return 0;
1653                 
1654                 fac= dface/div;
1655                 
1656                 hoco[0]= fac*view[0];
1657                 hoco[1]= fac*view[1];
1658                 hoco[2]= fac*view[2];
1659         }
1660         
1661         /* move 3d vector to lampbuf */
1662         MTC_Mat4MulVec4fl(shb->persmat, hoco);  /* rational hom co */
1663         
1664         /* clip We can test for -1.0/1.0 because of the properties of the
1665          * coordinate transformations. */
1666         fac= fabs(hoco[3]);
1667         if(hoco[0]<-fac || hoco[0]>fac)
1668                 return 0;
1669         if(hoco[1]<-fac || hoco[1]>fac)
1670                 return 0;
1671         if(hoco[2]<-fac || hoco[2]>fac)
1672                 return 0;
1673         
1674         siz= 0.5f*(float)shb->size;
1675         co[0]= siz*(1.0f+hoco[0]/hoco[3]) -0.5f;
1676         co[1]= siz*(1.0f+hoco[1]/hoco[3]) -0.5f;
1677         co[2]= ((float)0x7FFFFFFF)*(hoco[2]/hoco[3]);
1678         
1679         /* XXXX bias, much less than normal shadbuf, or do we need a constant? */
1680         co[2] -= 0.05f*shb->bias;
1681         
1682         return 1;
1683 }
1684
1685 /* storage of shadow results, solid osa and transp case */
1686 static void isb_add_shadfac(ISBShadfacA **isbsapp, MemArena *mem, int obi, int facenr, short shadfac, short samples)
1687 {
1688         ISBShadfacA *new;
1689         float shadfacf;
1690         
1691         /* in osa case, the samples were filled in with factor 1.0/R.osa. if fewer samples we have to correct */
1692         if(R.osa)
1693                 shadfacf= ((float)shadfac*R.osa)/(4096.0*samples);
1694         else
1695                 shadfacf= ((float)shadfac)/(4096.0);
1696         
1697         new= BLI_memarena_alloc(mem, sizeof(ISBShadfacA));
1698         new->obi= obi;
1699         new->facenr= facenr & ~RE_QUAD_OFFS;
1700         new->shadfac= shadfacf;
1701         if(*isbsapp)
1702                 new->next= (*isbsapp);
1703         else
1704                 new->next= NULL;
1705         
1706         *isbsapp= new;
1707 }
1708
1709 /* adding samples, solid case */
1710 static int isb_add_samples(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSample **samplebuf)
1711 {
1712         int xi, yi, *xcos, *ycos;
1713         int sample, bsp_err= 0;
1714         
1715         /* bsp split doesn't like to handle regular sequenes */
1716         xcos= MEM_mallocN( pa->rectx*sizeof(int), "xcos");
1717         ycos= MEM_mallocN( pa->recty*sizeof(int), "ycos");
1718         for(xi=0; xi<pa->rectx; xi++)
1719                 xcos[xi]= xi;
1720         for(yi=0; yi<pa->recty; yi++)
1721                 ycos[yi]= yi;
1722         BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
1723         BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
1724         
1725         for(sample=0; sample<(R.osa?R.osa:1); sample++) {
1726                 ISBSample *samp= samplebuf[sample], *samp1;
1727                 
1728                 for(yi=0; yi<pa->recty; yi++) {
1729                         int y= ycos[yi];
1730                         for(xi=0; xi<pa->rectx; xi++) {
1731                                 int x= xcos[xi];
1732                                 samp1= samp + y*pa->rectx + x;
1733                                 if(samp1->facenr)
1734                                         bsp_err |= isb_bsp_insert(root, memarena, samp1);
1735                         }
1736                         if(bsp_err) break;
1737                 }
1738         }       
1739         
1740         MEM_freeN(xcos);
1741         MEM_freeN(ycos);
1742
1743         return bsp_err;
1744 }
1745
1746 /* solid version */
1747 /* lar->shb, pa->rectz and pa->rectp should exist */
1748 static void isb_make_buffer(RenderPart *pa, LampRen *lar)
1749 {
1750         ShadBuf *shb= lar->shb;
1751         ISBData *isbdata;
1752         ISBSample *samp, *samplebuf[16];        /* should be RE_MAX_OSA */
1753         ISBBranch root;
1754         MemArena *memarena;
1755         intptr_t *rd;
1756         int *recto, *rectp, x, y, sindex, sample, bsp_err=0;
1757         
1758         /* storage for shadow, per thread */
1759         isbdata= shb->isb_result[pa->thread];
1760         
1761         /* to map the shi->xs and ys coordinate */
1762         isbdata->minx= pa->disprect.xmin;
1763         isbdata->miny= pa->disprect.ymin;
1764         isbdata->rectx= pa->rectx;
1765         isbdata->recty= pa->recty;
1766         
1767         /* branches are added using memarena (32k branches) */
1768         memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch));
1769         BLI_memarena_use_calloc(memarena);
1770         
1771         /* samplebuf is in camera view space (pixels) */
1772         for(sample=0; sample<(R.osa?R.osa:1); sample++)
1773                 samplebuf[sample]= MEM_callocN(sizeof(ISBSample)*pa->rectx*pa->recty, "isb samplebuf");
1774         
1775         /* for end result, ISBSamples point to this in non OSA case, otherwise to pixstruct->shadfac */
1776         if(R.osa==0)
1777                 isbdata->shadfacs= MEM_callocN(pa->rectx*pa->recty*sizeof(short), "isb shadfacs");
1778         
1779         /* setup bsp root */
1780         memset(&root, 0, sizeof(ISBBranch));
1781         root.box.xmin= (float)shb->size;
1782         root.box.ymin= (float)shb->size;
1783         
1784         /* create the sample buffers */
1785         for(sindex=0, y=0; y<pa->recty; y++) {
1786                 for(x=0; x<pa->rectx; x++, sindex++) {
1787                         
1788                         /* this makes it a long function, but splitting it out would mean 10+ arguments */
1789                         /* first check OSA case */
1790                         if(R.osa) {
1791                                 rd= pa->rectdaps + sindex;
1792                                 if(*rd) {
1793                                         float xs= (float)(x + pa->disprect.xmin);
1794                                         float ys= (float)(y + pa->disprect.ymin);
1795                                         
1796                                         for(sample=0; sample<R.osa; sample++) {
1797                                                 PixStr *ps= (PixStr *)(*rd);
1798                                                 int mask= (1<<sample);
1799                                                 
1800                                                 while(ps) {
1801                                                         if(ps->mask & mask)
1802                                                                 break;
1803                                                         ps= ps->next;
1804                                                 }
1805                                                 if(ps && ps->facenr>0) {
1806                                                         ObjectInstanceRen *obi= &R.objectinstance[ps->obi];
1807                                                         ObjectRen *obr= obi->obr;
1808                                                         VlakRen *vlr= RE_findOrAddVlak(obr, (ps->facenr-1) & RE_QUAD_MASK);
1809                                                         
1810                                                         samp= samplebuf[sample] + sindex;
1811                                                         /* convert image plane pixel location to lamp buffer space */
1812                                                         if(viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], samp->zco)) {
1813                                                                 samp->obi= ps->obi;
1814                                                                 samp->facenr= ps->facenr & ~RE_QUAD_OFFS;
1815                                                                 ps->shadfac= 0;
1816                                                                 samp->shadfac= &ps->shadfac;
1817                                                                 bound_rectf((rctf *)&root.box, samp->zco);
1818                                                         }
1819                                                 }
1820                                         }
1821                                 }
1822                         }
1823                         else {
1824                                 rectp= pa->rectp + sindex;
1825                                 recto= pa->recto + sindex;
1826                                 if(*rectp>0) {
1827                                         ObjectInstanceRen *obi= &R.objectinstance[*recto];
1828                                         ObjectRen *obr= obi->obr;
1829                                         VlakRen *vlr= RE_findOrAddVlak(obr, (*rectp-1) & RE_QUAD_MASK);
1830                                         float xs= (float)(x + pa->disprect.xmin);
1831                                         float ys= (float)(y + pa->disprect.ymin);
1832                                         
1833                                         samp= samplebuf[0] + sindex;
1834                                         /* convert image plane pixel location to lamp buffer space */
1835                                         if(viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, samp->zco)) {
1836                                                 samp->obi= *recto;
1837                                                 samp->facenr= *rectp & ~RE_QUAD_OFFS;
1838                                                 samp->shadfac= isbdata->shadfacs + sindex;
1839                                                 bound_rectf((rctf *)&root.box, samp->zco);
1840                                         }
1841                                 }
1842                         }
1843                 }
1844         }
1845         
1846         /* simple method to see if we have samples */
1847         if(root.box.xmin != (float)shb->size) {
1848                 /* now create a regular split, root.box has the initial bounding box of all pixels */
1849                 /* split bsp 8 levels deep, in regular grid (16 x 16) */
1850                 isb_bsp_split_init(&root, memarena, 8);
1851                 
1852                 /* insert all samples in BSP now */
1853                 bsp_err= isb_add_samples(pa, &root, memarena, samplebuf);
1854                         
1855                 if(bsp_err==0) {
1856                         /* go over all faces and fill in shadow values */
1857                         
1858                         isb_bsp_fillfaces(&R, lar, &root);      /* shb->persmat should have been calculated */
1859                         
1860                         /* copy shadow samples to persistant buffer, reduce memory overhead */
1861                         if(R.osa) {
1862                                 ISBShadfacA **isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
1863                                 
1864                                 isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA));
1865                                 BLI_memarena_use_calloc(isbdata->memarena);
1866
1867                                 for(rd= pa->rectdaps, x=pa->rectx*pa->recty; x>0; x--, rd++, isbsa++) {
1868                                         
1869                                         if(*rd) {
1870                                                 PixStr *ps= (PixStr *)(*rd);
1871                                                 while(ps) {
1872                                                         if(ps->shadfac)
1873                                                                 isb_add_shadfac(isbsa, isbdata->memarena, ps->obi, ps->facenr, ps->shadfac, count_mask(ps->mask));
1874                                                         ps= ps->next;
1875                                                 }
1876                                         }
1877                                 }
1878                         }
1879                 }
1880         }
1881         else {
1882                 if(isbdata->shadfacs) {
1883                         MEM_freeN(isbdata->shadfacs);
1884                         isbdata->shadfacs= NULL;
1885                 }
1886         }
1887
1888         /* free BSP */
1889         BLI_memarena_free(memarena);
1890         
1891         /* free samples */
1892         for(x=0; x<(R.osa?R.osa:1); x++)
1893                 MEM_freeN(samplebuf[x]);
1894         
1895         if(bsp_err) printf("error in filling bsp\n");
1896 }
1897
1898 /* add sample to buffer, isbsa is the root sample in a buffer */
1899 static ISBSampleA *isb_alloc_sample_transp(ISBSampleA **isbsa, MemArena *mem)
1900 {
1901         ISBSampleA *new;
1902         
1903         new= BLI_memarena_alloc(mem, sizeof(ISBSampleA));
1904         if(*isbsa)
1905                 new->next= (*isbsa);
1906         else
1907                 new->next= NULL;
1908         
1909         *isbsa= new;
1910         return new;
1911 }
1912
1913 /* adding samples in BSP, transparent case */
1914 static int isb_add_samples_transp(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSampleA ***samplebuf)
1915 {
1916         int xi, yi, *xcos, *ycos;
1917         int sample, bsp_err= 0;
1918         
1919         /* bsp split doesn't like to handle regular sequenes */
1920         xcos= MEM_mallocN( pa->rectx*sizeof(int), "xcos");
1921         ycos= MEM_mallocN( pa->recty*sizeof(int), "ycos");
1922         for(xi=0; xi<pa->rectx; xi++)
1923                 xcos[xi]= xi;
1924         for(yi=0; yi<pa->recty; yi++)
1925                 ycos[yi]= yi;
1926         BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
1927         BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
1928         
1929         for(sample=0; sample<(R.osa?R.osa:1); sample++) {
1930                 ISBSampleA **samp= samplebuf[sample], *samp1;
1931                 
1932                 for(yi=0; yi<pa->recty; yi++) {
1933                         int y= ycos[yi];
1934                         for(xi=0; xi<pa->rectx; xi++) {
1935                                 int x= xcos[xi];
1936                                 
1937                                 samp1= *(samp + y*pa->rectx + x);
1938                                 while(samp1) {
1939                                         bsp_err |= isb_bsp_insert(root, memarena, (ISBSample *)samp1);
1940                                         samp1= samp1->next;
1941                                 }
1942                         }
1943                         if(bsp_err) break;
1944                 }
1945         }       
1946         
1947         MEM_freeN(xcos);
1948         MEM_freeN(ycos);
1949         
1950         return bsp_err;
1951 }
1952
1953
1954 /* Ztransp version */
1955 /* lar->shb, pa->rectz and pa->rectp should exist */
1956 static void isb_make_buffer_transp(RenderPart *pa, APixstr *apixbuf, LampRen *lar)
1957 {
1958         ShadBuf *shb= lar->shb;
1959         ISBData *isbdata;
1960         ISBSampleA *samp, **samplebuf[16];      /* MAX_OSA */
1961         ISBBranch root;
1962         MemArena *memarena;
1963         APixstr *ap;
1964         int x, y, sindex, sample, bsp_err=0;
1965         
1966         /* storage for shadow, per thread */
1967         isbdata= shb->isb_result[pa->thread];
1968         
1969         /* to map the shi->xs and ys coordinate */
1970         isbdata->minx= pa->disprect.xmin;
1971         isbdata->miny= pa->disprect.ymin;
1972         isbdata->rectx= pa->rectx;
1973         isbdata->recty= pa->recty;
1974         
1975         /* branches are added using memarena (32k branches) */
1976         memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch));
1977         BLI_memarena_use_calloc(memarena);
1978         
1979         /* samplebuf is in camera view space (pixels) */
1980         for(sample=0; sample<(R.osa?R.osa:1); sample++)
1981                 samplebuf[sample]= MEM_callocN(sizeof(void *)*pa->rectx*pa->recty, "isb alpha samplebuf");
1982         
1983         /* setup bsp root */
1984         memset(&root, 0, sizeof(ISBBranch));
1985         root.box.xmin= (float)shb->size;
1986         root.box.ymin= (float)shb->size;
1987
1988         /* create the sample buffers */
1989         for(ap= apixbuf, sindex=0, y=0; y<pa->recty; y++) {
1990                 for(x=0; x<pa->rectx; x++, sindex++, ap++) {
1991                         
1992                         if(ap->p[0]) {
1993                                 APixstr *apn;
1994                                 float xs= (float)(x + pa->disprect.xmin);
1995                                 float ys= (float)(y + pa->disprect.ymin);
1996                                 
1997                                 for(apn=ap; apn; apn= apn->next) {
1998                                         int a;
1999                                         for(a=0; a<4; a++) {
2000                                                 if(apn->p[a]) {
2001                                                         ObjectInstanceRen *obi= &R.objectinstance[apn->obi[a]];
2002                                                         ObjectRen *obr= obi->obr;
2003                                                         VlakRen *vlr= RE_findOrAddVlak(obr, (apn->p[a]-1) & RE_QUAD_MASK);
2004                                                         float zco[3];
2005                                                         
2006                                                         /* here we store shadfac, easier to create the end storage buffer. needs zero'ed, multiple shadowbufs use it */
2007                                                         apn->shadfac[a]= 0;
2008                                                         
2009                                                         if(R.osa) {
2010                                                                 for(sample=0; sample<R.osa; sample++) {
2011                                                                         int mask= (1<<sample);
2012                                                                         
2013                                                                         if(apn->mask[a] & mask) {
2014                                                                                 
2015                                                                                 /* convert image plane pixel location to lamp buffer space */
2016                                                                                 if(viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], zco)) {
2017                                                                                         samp= isb_alloc_sample_transp(samplebuf[sample] + sindex, memarena);
2018                                                                                         samp->obi= apn->obi[a];
2019                                                                                         samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
2020                                                                                         samp->shadfac= &apn->shadfac[a];
2021                                                                                         
2022                                                                                         VECCOPY(samp->zco, zco);
2023                                                                                         bound_rectf((rctf *)&root.box, samp->zco);
2024                                                                                 }
2025                                                                         }
2026                                                                 }
2027                                                         }
2028                                                         else {
2029                                                                 
2030                                                                 /* convert image plane pixel location to lamp buffer space */
2031                                                                 if(viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, zco)) {
2032                                                                         
2033                                                                         samp= isb_alloc_sample_transp(samplebuf[0] + sindex, memarena);
2034                                                                         samp->obi= apn->obi[a];
2035                                                                         samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
2036                                                                         samp->shadfac= &apn->shadfac[a];
2037                                                                         
2038                                                                         VECCOPY(samp->zco, zco);
2039                                                                         bound_rectf((rctf *)&root.box, samp->zco);
2040                                                                 }
2041                                                         }
2042                                                 }
2043                                         }
2044                                 }
2045                         }
2046                 }
2047         }
2048         
2049         /* simple method to see if we have samples */
2050         if(root.box.xmin != (float)shb->size) {
2051                 /* now create a regular split, root.box has the initial bounding box of all pixels */
2052                 /* split bsp 8 levels deep, in regular grid (16 x 16) */
2053                 isb_bsp_split_init(&root, memarena, 8);
2054                 
2055                 /* insert all samples in BSP now */
2056                 bsp_err= isb_add_samples_transp(pa, &root, memarena, samplebuf);
2057                 
2058                 if(bsp_err==0) {
2059                         ISBShadfacA **isbsa;
2060                         
2061                         /* go over all faces and fill in shadow values */
2062                         isb_bsp_fillfaces(&R, lar, &root);      /* shb->persmat should have been calculated */
2063                         
2064                         /* copy shadow samples to persistant buffer, reduce memory overhead */
2065                         isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
2066                         
2067                         isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA));
2068                         
2069                         for(ap= apixbuf, x=pa->rectx*pa->recty; x>0; x--, ap++, isbsa++) {
2070                                         
2071                                 if(ap->p[0]) {
2072                                         APixstr *apn;
2073                                         for(apn=ap; apn; apn= apn->next) {
2074                                                 int a;
2075                                                 for(a=0; a<4; a++) {
2076                                                         if(apn->p[a] && apn->shadfac[a]) {
2077                                                                 if(R.osa)
2078                                                                         isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], count_mask(apn->mask[a]));
2079                                                                 else
2080                                                                         isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], 0);
2081                                                         }
2082                                                 }
2083                                         }
2084                                 }
2085                         }
2086                 }
2087         }
2088
2089         /* free BSP */
2090         BLI_memarena_free(memarena);
2091
2092         /* free samples */
2093         for(x=0; x<(R.osa?R.osa:1); x++)
2094                 MEM_freeN(samplebuf[x]);
2095
2096         if(bsp_err) printf("error in filling bsp\n");
2097 }
2098
2099
2100
2101 /* exported */
2102
2103 /* returns amount of light (1.0 = no shadow) */
2104 /* note, shadepixel() rounds the coordinate, not the real sample info */
2105 float ISB_getshadow(ShadeInput *shi, ShadBuf *shb)
2106 {
2107         /* if raytracing, we can't accept irregular shadow */
2108         if(shi->depth==0) {
2109                 ISBData *isbdata= shb->isb_result[shi->thread];
2110                 
2111                 if(isbdata) {
2112                         if(isbdata->shadfacs || isbdata->shadfaca) {
2113                                 int x= shi->xs - isbdata->minx;
2114                                 
2115                                 if(x >= 0 && x < isbdata->rectx) {
2116                                         int y= shi->ys - isbdata->miny;
2117                         
2118                                         if(y >= 0 && y < isbdata->recty) {
2119                                                 if(isbdata->shadfacs) {
2120                                                         short *sp= isbdata->shadfacs + y*isbdata->rectx + x;
2121                                                         return *sp>=4096?0.0f:1.0f - ((float)*sp)/4096.0f;
2122                                                 }
2123                                                 else {
2124                                                         int sindex= y*isbdata->rectx + x;
2125                                                         int obi= shi->obi - R.objectinstance;
2126                                                         ISBShadfacA *isbsa= *(isbdata->shadfaca + sindex);
2127                                                         
2128                                                         while(isbsa) {
2129                                                                 if(isbsa->facenr==shi->facenr+1 && isbsa->obi==obi)
2130                                                                         return isbsa->shadfac>=1.0f?0.0f:1.0f - isbsa->shadfac;
2131                                                                 isbsa= isbsa->next;
2132                                                         }
2133                                                 }
2134                                         }
2135                                 }
2136                         }
2137                 }
2138         }
2139         return 1.0f;
2140 }
2141
2142 /* part is supposed to be solid zbuffered (apixbuf==NULL) or transparent zbuffered */
2143 void ISB_create(RenderPart *pa, APixstr *apixbuf)
2144 {
2145         GroupObject *go;
2146         
2147         /* go over all lamps, and make the irregular buffers */
2148         for(go=R.lights.first; go; go= go->next) {
2149                 LampRen *lar= go->lampren;
2150                 
2151                 if(lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
2152                         
2153                         /* create storage for shadow, per thread */
2154                         lar->shb->isb_result[pa->thread]= MEM_callocN(sizeof(ISBData), "isb data");
2155                         
2156                         if(apixbuf)
2157                                 isb_make_buffer_transp(pa, apixbuf, lar);
2158                         else
2159                                 isb_make_buffer(pa, lar);
2160                 }
2161         }
2162 }
2163
2164
2165 /* end of part rendering, free stored shadow data for this thread from all lamps */
2166 void ISB_free(RenderPart *pa)
2167 {
2168         GroupObject *go;
2169         
2170         /* go over all lamps, and free the irregular buffers */
2171         for(go=R.lights.first; go; go= go->next) {
2172                 LampRen *lar= go->lampren;
2173                 
2174                 if(lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
2175                         ISBData *isbdata= lar->shb->isb_result[pa->thread];
2176
2177                         if(isbdata) {
2178                                 if(isbdata->shadfacs)
2179                                         MEM_freeN(isbdata->shadfacs);
2180                                 if(isbdata->shadfaca)
2181                                         MEM_freeN(isbdata->shadfaca);
2182                                 
2183                                 if(isbdata->memarena)
2184                                         BLI_memarena_free(isbdata->memarena);
2185                                 
2186                                 MEM_freeN(isbdata);
2187                                 lar->shb->isb_result[pa->thread]= NULL;
2188                         }
2189                 }
2190         }
2191 }
2192