Merge various small changes from render branch:
[blender-staging.git] / source / blender / render / intern / source / shadbuf.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
19  * All rights reserved.
20  *
21  * Contributor(s): 2004-2006, Blender Foundation
22  *
23  * ***** END GPL LICENSE BLOCK *****
24  */
25
26 #include <math.h>
27 #include <string.h>
28
29
30 #include "MEM_guardedalloc.h"
31
32 #include "DNA_group_types.h"
33 #include "DNA_lamp_types.h"
34 #include "DNA_material_types.h"
35
36 #include "BKE_global.h"
37 #include "BKE_scene.h"
38 #include "BKE_utildefines.h"
39
40 #include "BLI_math.h"
41 #include "BLI_blenlib.h"
42 #include "BLI_jitter.h"
43 #include "BLI_memarena.h"
44 #include "BLI_rand.h"
45
46 #include "PIL_time.h"
47
48 #include "renderpipeline.h"
49 #include "render_types.h"
50 #include "renderdatabase.h"
51 #include "rendercore.h"
52 #include "shadbuf.h"
53 #include "shading.h"
54 #include "zbuf.h"
55
56 /* XXX, could be better implemented... this is for endian issues
57 */
58 #if defined(__sgi) || defined(__sparc) || defined(__sparc__) || defined (__PPC__) || defined (__ppc__) || defined (__hppa__) || defined (__BIG_ENDIAN__)
59 #define RCOMP   3
60 #define GCOMP   2
61 #define BCOMP   1
62 #define ACOMP   0
63 #else
64 #define RCOMP   0
65 #define GCOMP   1
66 #define BCOMP   2
67 #define ACOMP   3
68 #endif
69
70 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
71 /* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
72 /* only to be used here in this file, it's for speed */
73 extern struct Render R;
74 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
75
76 /* ------------------------------------------------------------------------- */
77
78 /* initshadowbuf() in convertBlenderScene.c */
79
80 /* ------------------------------------------------------------------------- */
81
82 static void copy_to_ztile(int *rectz, int size, int x1, int y1, int tile, char *r1)
83 {
84         int len4, *rz;  
85         int x2, y2;
86         
87         x2= x1+tile;
88         y2= y1+tile;
89         if(x2>=size) x2= size-1;
90         if(y2>=size) y2= size-1;
91
92         if(x1>=x2 || y1>=y2) return;
93
94         len4= 4*(x2- x1);
95         rz= rectz + size*y1 + x1;
96         for(; y1<y2; y1++) {
97                 memcpy(r1, rz, len4);
98                 rz+= size;
99                 r1+= len4;
100         }
101 }
102
103 #if 0
104 static int sizeoflampbuf(ShadBuf *shb)
105 {
106         int num,count=0;
107         char *cp;
108         
109         cp= shb->cbuf;
110         num= (shb->size*shb->size)/256;
111
112         while(num--) count+= *(cp++);
113         
114         return 256*count;
115 }
116 #endif
117
118 /* not threadsafe... */
119 static float *give_jitter_tab(int samp)
120 {
121         /* these are all possible jitter tables, takes up some
122          * 12k, not really bad!
123          * For soft shadows, it saves memory and render time
124          */
125         static int tab[17]={1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256};
126         static float jit[1496][2];
127         static char ctab[17]= {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
128         int a, offset=0;
129         
130         if(samp<2) samp= 2;
131         else if(samp>16) samp= 16;
132
133         for(a=0; a<samp-1; a++) offset+= tab[a];
134
135         if(ctab[samp]==0) {
136                 ctab[samp]= 1;
137                 BLI_initjit(jit[offset], samp*samp);
138         }
139                 
140         return jit[offset];
141         
142 }
143
144 static void make_jitter_weight_tab(Render *re, ShadBuf *shb, short filtertype) 
145 {
146         float *jit, totw= 0.0f;
147         int samp= get_render_shadow_samples(&re->r, shb->samp);
148         int a, tot=samp*samp;
149         
150         shb->weight= MEM_mallocN(sizeof(float)*tot, "weight tab lamp");
151         
152         for(jit= shb->jit, a=0; a<tot; a++, jit+=2) {
153                 if(filtertype==LA_SHADBUF_TENT) 
154                         shb->weight[a]= 0.71f - sqrt(jit[0]*jit[0] + jit[1]*jit[1]);
155                 else if(filtertype==LA_SHADBUF_GAUSS) 
156                         shb->weight[a]= RE_filter_value(R_FILTER_GAUSS, 1.8f*sqrt(jit[0]*jit[0] + jit[1]*jit[1]));
157                 else
158                         shb->weight[a]= 1.0f;
159                 
160                 totw+= shb->weight[a];
161         }
162         
163         totw= 1.0f/totw;
164         for(a=0; a<tot; a++) {
165                 shb->weight[a]*= totw;
166         }
167 }
168
169 static int verg_deepsample(const void *poin1, const void *poin2)
170 {
171         const DeepSample *ds1= (const DeepSample*)poin1;
172         const DeepSample *ds2= (const DeepSample*)poin2;
173
174         if(ds1->z < ds2->z) return -1;
175         else if(ds1->z == ds2->z) return 0;
176         else return 1;
177 }
178
179 static int compress_deepsamples(DeepSample *dsample, int tot, float epsilon)
180 {
181         /* uses doubles to avoid overflows and other numerical issues,
182            could be improved */
183         DeepSample *ds, *newds;
184         float v;
185         double slope, slopemin, slopemax, min, max, div, newmin, newmax;
186         int a, first, z, newtot= 0;
187
188         /*if(print) {
189                 for(a=0, ds=dsample; a<tot; a++, ds++)
190                         printf("%lf,%f ", ds->z/(double)0x7FFFFFFF, ds->v);
191                 printf("\n");
192         }*/
193
194         /* read from and write into same array */
195         ds= dsample;
196         newds= dsample;
197         a= 0;
198
199         /* as long as we are not at the end of the array */
200         for(a++, ds++; a<tot; a++, ds++) {
201                 slopemin= 0.0f;
202                 slopemax= 0.0f;
203                 first= 1;
204
205                 for(; a<tot; a++, ds++) {
206                         //dz= ds->z - newds->z;
207                         if(ds->z == newds->z) {
208                                 /* still in same z position, simply check
209                                    visibility difference against epsilon */
210                                 if(!(fabs(newds->v - ds->v) <= epsilon)) {
211                                         break;
212                                 }
213                         }
214                         else {
215                                 /* compute slopes */
216                                 div= (double)0x7FFFFFFF/((double)ds->z - (double)newds->z);
217                                 min= ((ds->v - epsilon) - newds->v)*div;
218                                 max= ((ds->v + epsilon) - newds->v)*div;
219
220                                 /* adapt existing slopes */
221                                 if(first) {
222                                         newmin= min;
223                                         newmax= max;
224                                         first= 0;
225                                 }
226                                 else {
227                                         newmin= MAX2(slopemin, min);
228                                         newmax= MIN2(slopemax, max);
229
230                                         /* verify if there is still space between the slopes */
231                                         if(newmin > newmax) {
232                                                 ds--;
233                                                 a--;
234                                                 break;
235                                         }
236                                 }
237
238                                 slopemin= newmin;
239                                 slopemax= newmax;
240                         }
241                 }
242
243                 if(a == tot) {
244                         ds--;
245                         a--;
246                 }
247
248                 /* always previous z */
249                 z= ds->z;
250
251                 if(first || a==tot-1) {
252                         /* if slopes were not initialized, use last visibility */
253                         v= ds->v;
254                 }
255                 else {
256                         /* compute visibility at center between slopes at z */
257                         slope= (slopemin+slopemax)*0.5;
258                         v= newds->v + slope*((z - newds->z)/(double)0x7FFFFFFF);
259                 }
260
261                 newds++;
262                 newtot++;
263
264                 newds->z= z;
265                 newds->v= v;
266         }
267
268         if(newtot == 0 || (newds->v != (newds-1)->v))
269                 newtot++;
270
271         /*if(print) {
272                 for(a=0, ds=dsample; a<newtot; a++, ds++)
273                         printf("%lf,%f ", ds->z/(double)0x7FFFFFFF, ds->v);
274                 printf("\n");
275         }*/
276
277         return newtot;
278 }
279
280 static float deep_alpha(Render *re, int obinr, int facenr, int strand)
281 {
282         ObjectInstanceRen *obi= &re->objectinstance[obinr];
283         Material *ma;
284
285         if(strand) {
286                 StrandRen *strand= RE_findOrAddStrand(obi->obr, facenr-1);
287                 ma= strand->buffer->ma;
288         }
289         else {
290                 VlakRen *vlr= RE_findOrAddVlak(obi->obr, (facenr-1) & RE_QUAD_MASK);
291                 ma= vlr->mat;
292         }
293
294         return ma->shad_alpha;
295 }
296
297 static void compress_deepshadowbuf(Render *re, ShadBuf *shb, APixstr *apixbuf, APixstrand *apixbufstrand)
298 {
299         ShadSampleBuf *shsample;
300         DeepSample *ds[RE_MAX_OSA], *sampleds[RE_MAX_OSA], *dsb, *newbuf;
301         APixstr *ap, *apn;
302         APixstrand *aps, *apns;
303         float visibility, totbuf= shb->totbuf;
304         int a, b, c, tot, minz, found, size= shb->size, prevtot, newtot;
305         int sampletot[RE_MAX_OSA], totsample = 0, totsamplec = 0;
306         
307         shsample= MEM_callocN( sizeof(ShadSampleBuf), "shad sample buf");
308         BLI_addtail(&shb->buffers, shsample);
309
310         shsample->totbuf= MEM_callocN(sizeof(int)*size*size, "deeptotbuf");
311         shsample->deepbuf= MEM_callocN(sizeof(DeepSample*)*size*size, "deepbuf");
312
313         ap= apixbuf;
314         aps= apixbufstrand;
315         for(a=0; a<size*size; a++, ap++, aps++) {
316                 /* count number of samples */
317                 for(c=0; c<totbuf; c++)
318                         sampletot[c]= 0;
319
320                 tot= 0;
321                 for(apn=ap; apn; apn=apn->next)
322                         for(b=0; b<4; b++)
323                                 if(apn->p[b])
324                                         for(c=0; c<totbuf; c++)
325                                                 if(apn->mask[b] & (1<<c))
326                                                         sampletot[c]++;
327
328                 if(apixbufstrand) {
329                         for(apns=aps; apns; apns=apns->next)
330                                 for(b=0; b<4; b++)
331                                         if(apns->p[b])
332                                                 for(c=0; c<totbuf; c++)
333                                                         if(apns->mask[b] & (1<<c))
334                                                                 sampletot[c]++;
335                 }
336
337                 for(c=0; c<totbuf; c++)
338                         tot += sampletot[c];
339
340                 if(tot == 0) {
341                         shsample->deepbuf[a]= NULL;
342                         shsample->totbuf[a]= 0;
343                         continue;
344                 }
345
346                 /* fill samples */
347                 ds[0]= sampleds[0]= MEM_callocN(sizeof(DeepSample)*tot*2, "deepsample");
348                 for(c=1; c<totbuf; c++)
349                         ds[c]= sampleds[c]= sampleds[c-1] + sampletot[c-1]*2;
350
351                 for(apn=ap; apn; apn=apn->next) {
352                         for(b=0; b<4; b++) {
353                                 if(apn->p[b]) {
354                                         for(c=0; c<totbuf; c++) {
355                                                 if(apn->mask[b] & (1<<c)) {
356                                                         /* two entries to create step profile */
357                                                         ds[c]->z= apn->z[b];
358                                                         ds[c]->v= 1.0f; /* not used */
359                                                         ds[c]++;
360                                                         ds[c]->z= apn->z[b];
361                                                         ds[c]->v= deep_alpha(re, apn->obi[b], apn->p[b], 0);
362                                                         ds[c]++;
363                                                 }
364                                         }
365                                 }
366                         }
367                 }
368
369                 if(apixbufstrand) {
370                         for(apns=aps; apns; apns=apns->next) {
371                                 for(b=0; b<4; b++) {
372                                         if(apns->p[b]) {
373                                                 for(c=0; c<totbuf; c++) {
374                                                         if(apns->mask[b] & (1<<c)) {
375                                                                 /* two entries to create step profile */
376                                                                 ds[c]->z= apns->z[b];
377                                                                 ds[c]->v= 1.0f; /* not used */
378                                                                 ds[c]++;
379                                                                 ds[c]->z= apns->z[b];
380                                                                 ds[c]->v= deep_alpha(re, apns->obi[b], apns->p[b], 1);
381                                                                 ds[c]++;
382                                                         }
383                                                 }
384                                         }
385                                 }
386                         }
387                 }
388
389                 for(c=0; c<totbuf; c++) {
390                         /* sort by increasing z */
391                         qsort(sampleds[c], sampletot[c], sizeof(DeepSample)*2, verg_deepsample);
392
393                         /* sum visibility, replacing alpha values */
394                         visibility= 1.0f;
395                         ds[c]= sampleds[c];
396
397                         for(b=0; b<sampletot[c]; b++) {
398                                 /* two entries creating step profile */
399                                 ds[c]->v= visibility;
400                                 ds[c]++;
401
402                                 visibility *= 1.0f-ds[c]->v;
403                                 ds[c]->v= visibility;
404                                 ds[c]++;
405                         }
406
407                         /* halfway trick, probably won't work well for volumes? */
408                         ds[c]= sampleds[c];
409                         for(b=0; b<sampletot[c]; b++) {
410                                 if(b+1 < sampletot[c]) {
411                                         ds[c]->z= (ds[c]->z>>1) + ((ds[c]+2)->z>>1);
412                                         ds[c]++;
413                                         ds[c]->z= (ds[c]->z>>1) + ((ds[c]+2)->z>>1);
414                                         ds[c]++;
415                                 }
416                                 else {
417                                         ds[c]->z= (ds[c]->z>>1) + (0x7FFFFFFF>>1);
418                                         ds[c]++;
419                                         ds[c]->z= (ds[c]->z>>1) + (0x7FFFFFFF>>1);
420                                         ds[c]++;
421                                 }
422                         }
423
424                         /* init for merge loop */
425                         ds[c]= sampleds[c];
426                         sampletot[c] *= 2;
427                 }
428
429                 shsample->deepbuf[a]= MEM_callocN(sizeof(DeepSample)*tot*2, "deepsample");
430                 shsample->totbuf[a]= 0;
431
432                 /* merge buffers */
433                 dsb= shsample->deepbuf[a];
434                 while(1) {
435                         minz= 0;
436                         found= 0;
437
438                         for(c=0; c<totbuf; c++) {
439                                 if(sampletot[c] && (!found || ds[c]->z < minz)) {
440                                         minz= ds[c]->z;
441                                         found= 1;
442                                 }
443                         }
444
445                         if(!found)
446                                 break;
447
448                         dsb->z= minz;
449                         dsb->v= 0.0f;
450
451                         visibility= 0.0f;
452                         for(c=0; c<totbuf; c++) {
453                                 if(sampletot[c] && ds[c]->z == minz) {
454                                         ds[c]++;
455                                         sampletot[c]--;
456                                 }
457
458                                 if(sampleds[c] == ds[c])
459                                         visibility += 1.0f/totbuf;
460                                 else
461                                         visibility += (ds[c]-1)->v/totbuf;
462                         }
463
464                         dsb->v= visibility;
465                         dsb++;
466                         shsample->totbuf[a]++;
467                 }
468
469                 prevtot= shsample->totbuf[a];
470                 totsample += prevtot;
471
472                 newtot= compress_deepsamples(shsample->deepbuf[a], prevtot, shb->compressthresh);
473                 shsample->totbuf[a]= newtot;
474                 totsamplec += newtot;
475
476                 if(newtot < prevtot) {
477                         newbuf= MEM_mallocN(sizeof(DeepSample)*newtot, "cdeepsample");
478                         memcpy(newbuf, shsample->deepbuf[a], sizeof(DeepSample)*newtot);
479                         MEM_freeN(shsample->deepbuf[a]);
480                         shsample->deepbuf[a]= newbuf;
481                 }
482
483                 MEM_freeN(sampleds[0]);
484         }
485
486         //printf("%d -> %d, ratio %f\n", totsample, totsamplec, (float)totsamplec/(float)totsample);
487 }
488
489 /* create Z tiles (for compression): this system is 24 bits!!! */
490 static void compress_shadowbuf(ShadBuf *shb, int *rectz, int square)
491 {
492         ShadSampleBuf *shsample;
493         float dist;
494         uintptr_t *ztile;
495         int *rz, *rz1, verg, verg1, size= shb->size;
496         int a, x, y, minx, miny, byt1, byt2;
497         char *rc, *rcline, *ctile, *zt;
498         
499         shsample= MEM_callocN( sizeof(ShadSampleBuf), "shad sample buf");
500         BLI_addtail(&shb->buffers, shsample);
501         
502         shsample->zbuf= MEM_mallocN( sizeof(uintptr_t)*(size*size)/256, "initshadbuf2");
503         shsample->cbuf= MEM_callocN( (size*size)/256, "initshadbuf3");
504         
505         ztile= (uintptr_t *)shsample->zbuf;
506         ctile= shsample->cbuf;
507         
508         /* help buffer */
509         rcline= MEM_mallocN(256*4+sizeof(int), "makeshadbuf2");
510         
511         for(y=0; y<size; y+=16) {
512                 if(y< size/2) miny= y+15-size/2;
513                 else miny= y-size/2;    
514                 
515                 for(x=0; x<size; x+=16) {
516                         
517                         /* is tile within spotbundle? */
518                         a= size/2;
519                         if(x< a) minx= x+15-a;
520                         else minx= x-a; 
521                         
522                         dist= sqrt( (float)(minx*minx+miny*miny) );
523                         
524                         if(square==0 && dist>(float)(a+12)) {   /* 12, tested with a onlyshadow lamp */
525                                 a= 256; verg= 0; /* 0x80000000; */ /* 0x7FFFFFFF; */
526                                 rz1= (&verg)+1;
527                         } 
528                         else {
529                                 copy_to_ztile(rectz, size, x, y, 16, rcline);
530                                 rz1= (int *)rcline;
531                                 
532                                 verg= (*rz1 & 0xFFFFFF00);
533                                 
534                                 for(a=0;a<256;a++,rz1++) {
535                                         if( (*rz1 & 0xFFFFFF00) !=verg) break;
536                                 }
537                         }
538                         if(a==256) { /* complete empty tile */
539                                 *ctile= 0;
540                                 *ztile= *(rz1-1);
541                         }
542                         else {
543                                 
544                                 /* ACOMP etc. are defined to work L/B endian */
545                                 
546                                 rc= rcline;
547                                 rz1= (int *)rcline;
548                                 verg=  rc[ACOMP];
549                                 verg1= rc[BCOMP];
550                                 rc+= 4;
551                                 byt1= 1; byt2= 1;
552                                 for(a=1;a<256;a++,rc+=4) {
553                                         byt1 &= (verg==rc[ACOMP]);
554                                         byt2 &= (verg1==rc[BCOMP]);
555                                         
556                                         if(byt1==0) break;
557                                 }
558                                 if(byt1 && byt2) {      /* only store byte */
559                                         *ctile= 1;
560                                         *ztile= (uintptr_t)MEM_mallocN(256+4, "tile1");
561                                         rz= (int *)*ztile;
562                                         *rz= *rz1;
563                                         
564                                         zt= (char *)(rz+1);
565                                         rc= rcline;
566                                         for(a=0; a<256; a++, zt++, rc+=4) *zt= rc[GCOMP];       
567                                 }
568                                 else if(byt1) {         /* only store short */
569                                         *ctile= 2;
570                                         *ztile= (uintptr_t)MEM_mallocN(2*256+4,"Tile2");
571                                         rz= (int *)*ztile;
572                                         *rz= *rz1;
573                                         
574                                         zt= (char *)(rz+1);
575                                         rc= rcline;
576                                         for(a=0; a<256; a++, zt+=2, rc+=4) {
577                                                 zt[0]= rc[BCOMP];
578                                                 zt[1]= rc[GCOMP];
579                                         }
580                                 }
581                                 else {                  /* store triple */
582                                         *ctile= 3;
583                                         *ztile= (uintptr_t)MEM_mallocN(3*256,"Tile3");
584
585                                         zt= (char *)*ztile;
586                                         rc= rcline;
587                                         for(a=0; a<256; a++, zt+=3, rc+=4) {
588                                                 zt[0]= rc[ACOMP];
589                                                 zt[1]= rc[BCOMP];
590                                                 zt[2]= rc[GCOMP];
591                                         }
592                                 }
593                         }
594                         ztile++;
595                         ctile++;
596                 }
597         }
598
599         MEM_freeN(rcline);
600 }
601
602 /* sets start/end clipping. lar->shb should be initialized */
603 static void shadowbuf_autoclip(Render *re, LampRen *lar)
604 {
605         ObjectInstanceRen *obi;
606         ObjectRen *obr;
607         VlakRen *vlr= NULL;
608         VertRen *ver= NULL;
609         Material *ma= NULL;
610         float minz, maxz, vec[3], viewmat[4][4], obviewmat[4][4];
611         unsigned int lay = -1;
612         int i, a, maxtotvert, ok= 1;
613         char *clipflag;
614         
615         minz= 1.0e30f; maxz= -1.0e30f;
616         copy_m4_m4(viewmat, lar->shb->viewmat);
617         
618         if(lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay;
619
620         maxtotvert= 0;
621         for(obr=re->objecttable.first; obr; obr=obr->next)
622                 maxtotvert= MAX2(obr->totvert, maxtotvert);
623
624         clipflag= MEM_callocN(sizeof(char)*maxtotvert, "autoclipflag");
625
626         /* set clip in vertices when face visible */
627         for(i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
628                 obr= obi->obr;
629
630                 if(obi->flag & R_TRANSFORMED)
631                         mul_m4_m4m4(obviewmat, obi->mat, viewmat);
632                 else
633                         copy_m4_m4(obviewmat, viewmat);
634
635                 memset(clipflag, 0, sizeof(char)*obr->totvert);
636
637                 /* clear clip, is being set if face is visible (clip is calculated for real later) */
638                 for(a=0; a<obr->totvlak; a++) {
639                         if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
640                         else vlr++;
641                         
642                         /* note; these conditions are copied from zbuffer_shadow() */
643                         if(vlr->mat!= ma) {
644                                 ma= vlr->mat;
645                                 ok= 1;
646                                 if((ma->mode & MA_SHADBUF)==0) ok= 0;
647                         }
648                         
649                         if(ok && (obi->lay & lay)) {
650                                 clipflag[vlr->v1->index]= 1;
651                                 clipflag[vlr->v2->index]= 1;
652                                 clipflag[vlr->v3->index]= 1;
653                                 if(vlr->v4) clipflag[vlr->v4->index]= 1;
654                         }                               
655                 }               
656                 
657                 /* calculate min and max */
658                 for(a=0; a< obr->totvert;a++) {
659                         if((a & 255)==0) ver= RE_findOrAddVert(obr, a);
660                         else ver++;
661                         
662                         if(clipflag[a]) {
663                                 VECCOPY(vec, ver->co);
664                                 mul_m4_v3(obviewmat, vec);
665                                 /* Z on visible side of lamp space */
666                                 if(vec[2] < 0.0f) {
667                                         float inpr, z= -vec[2];
668                                         
669                                         /* since vec is rotated in lampspace, this is how to get the cosine of angle */
670                                         /* precision is set 20% larger */
671                                         vec[2]*= 1.2f;
672                                         normalize_v3(vec);
673                                         inpr= - vec[2];
674
675                                         if(inpr>=lar->spotsi) {
676                                                 if(z<minz) minz= z;
677                                                 if(z>maxz) maxz= z;
678                                         }
679                                 }
680                         }
681                 }
682         }
683
684         MEM_freeN(clipflag);
685         
686         /* set clipping min and max */
687         if(minz < maxz) {
688                 float delta= (maxz - minz);     /* threshold to prevent precision issues */
689                 
690                 //printf("minz %f maxz %f delta %f\n", minz, maxz, delta);
691                 if(lar->bufflag & LA_SHADBUF_AUTO_START)
692                         lar->shb->d= minz - delta*0.02f;        /* 0.02 is arbitrary... needs more thinking! */
693                 if(lar->bufflag & LA_SHADBUF_AUTO_END)
694                         lar->shb->clipend= maxz + delta*0.1f;
695                 
696                 /* bias was calculated as percentage, we scale it to prevent animation issues */
697                 delta= (lar->clipend-lar->clipsta)/(lar->shb->clipend-lar->shb->d);
698                 //printf("bias delta %f\n", delta);
699                 lar->shb->bias= (int) (delta*(float)lar->shb->bias);
700         }
701 }
702
703 static void makeflatshadowbuf(Render *re, LampRen *lar, float *jitbuf)
704 {
705         ShadBuf *shb= lar->shb;
706         int *rectz, samples;
707
708         /* zbuffering */
709         rectz= MEM_mapallocN(sizeof(int)*shb->size*shb->size, "makeshadbuf");
710         
711         for(samples=0; samples<shb->totbuf; samples++) {
712                 zbuffer_shadow(re, shb->persmat, lar, rectz, shb->size, jitbuf[2*samples], jitbuf[2*samples+1]);
713                 /* create Z tiles (for compression): this system is 24 bits!!! */
714                 compress_shadowbuf(shb, rectz, lar->mode & LA_SQUARE);
715
716                 if(re->test_break(re->tbh))
717                         break;
718         }
719         
720         MEM_freeN(rectz);
721 }
722
723 static void makedeepshadowbuf(Render *re, LampRen *lar, float *jitbuf)
724 {
725         ShadBuf *shb= lar->shb;
726         APixstr *apixbuf;
727         APixstrand *apixbufstrand= NULL;
728         ListBase apsmbase= {NULL, NULL};
729
730         /* zbuffering */
731         apixbuf= MEM_callocN(sizeof(APixstr)*shb->size*shb->size, "APixbuf");
732         if(re->totstrand)
733                 apixbufstrand= MEM_callocN(sizeof(APixstrand)*shb->size*shb->size, "APixbufstrand");
734
735         zbuffer_abuf_shadow(re, lar, shb->persmat, apixbuf, apixbufstrand, &apsmbase, shb->size,
736                 shb->totbuf, (float(*)[2])jitbuf);
737
738         /* create Z tiles (for compression): this system is 24 bits!!! */
739         compress_deepshadowbuf(re, shb, apixbuf, apixbufstrand);
740         
741         MEM_freeN(apixbuf);
742         if(apixbufstrand)
743                 MEM_freeN(apixbufstrand);
744         freepsA(&apsmbase);
745 }
746
747 void makeshadowbuf(Render *re, LampRen *lar)
748 {
749         ShadBuf *shb= lar->shb;
750         float wsize, *jitbuf, twozero[2]= {0.0f, 0.0f}, angle, temp;
751         
752         if(lar->bufflag & (LA_SHADBUF_AUTO_START|LA_SHADBUF_AUTO_END))
753                 shadowbuf_autoclip(re, lar);
754         
755         /* just to enforce identical behaviour of all irregular buffers */
756         if(lar->buftype==LA_SHADBUF_IRREGULAR)
757                 shb->size= 1024;
758         
759         /* matrices and window: in winmat the transformation is being put,
760                 transforming from observer view to lamp view, including lamp window matrix */
761         
762         angle= saacos(lar->spotsi);
763         temp= 0.5f*shb->size*cos(angle)/sin(angle);
764         shb->pixsize= (shb->d)/temp;
765         wsize= shb->pixsize*(shb->size/2.0);
766         
767         perspective_m4( shb->winmat,-wsize, wsize, -wsize, wsize, shb->d, shb->clipend);
768         mul_m4_m4m4(shb->persmat, shb->viewmat, shb->winmat);
769
770         if(ELEM3(lar->buftype, LA_SHADBUF_REGULAR, LA_SHADBUF_HALFWAY, LA_SHADBUF_DEEP)) {
771                 shb->totbuf= lar->buffers;
772
773                 /* jitter, weights - not threadsafe! */
774                 BLI_lock_thread(LOCK_CUSTOM1);
775                 shb->jit= give_jitter_tab(get_render_shadow_samples(&re->r, shb->samp));
776                 make_jitter_weight_tab(re, shb, lar->filtertype);
777                 BLI_unlock_thread(LOCK_CUSTOM1);
778                 
779                 if(shb->totbuf==4) jitbuf= give_jitter_tab(2);
780                 else if(shb->totbuf==9) jitbuf= give_jitter_tab(3);
781                 else jitbuf= twozero;
782                 
783                 /* zbuffering */
784                 if(lar->buftype == LA_SHADBUF_DEEP) {
785                         makedeepshadowbuf(re, lar, jitbuf);
786                         shb->totbuf= 1;
787                 }
788                 else
789                         makeflatshadowbuf(re, lar, jitbuf);
790
791                 /* printf("lampbuf %d\n", sizeoflampbuf(shb)); */
792         }
793 }
794
795 static void *do_shadow_thread(void *re_v)
796 {
797         Render *re= (Render*)re_v;
798         LampRen *lar;
799
800         do {
801                 BLI_lock_thread(LOCK_CUSTOM1);
802                 for(lar=re->lampren.first; lar; lar=lar->next) {
803                         if(lar->shb && !lar->thread_assigned) {
804                                 lar->thread_assigned= 1;
805                                 break;
806                         }
807                 }
808                 BLI_unlock_thread(LOCK_CUSTOM1);
809
810                 /* if type is irregular, this only sets the perspective matrix and autoclips */
811                 if(lar) {
812                         makeshadowbuf(re, lar);
813                         BLI_lock_thread(LOCK_CUSTOM1);
814                         lar->thread_ready= 1;
815                         BLI_unlock_thread(LOCK_CUSTOM1);
816                 }
817         } while(lar && !re->test_break(re->tbh));
818
819         return NULL;
820 }
821
822 static volatile int g_break= 0;
823 static int thread_break(void *unused)
824 {
825         return g_break;
826 }
827
828 void threaded_makeshadowbufs(Render *re)
829 {
830         ListBase threads;
831         LampRen *lar;
832         int a, totthread= 0;
833         int (*test_break)(void *);
834
835         /* count number of threads to use */
836         if(G.rendering) {
837                 for(lar=re->lampren.first; lar; lar= lar->next)
838                         if(lar->shb)
839                                 totthread++;
840                 
841                 totthread= MIN2(totthread, re->r.threads);
842         }
843         else
844                 totthread= 1; /* preview render */
845
846         if(totthread <= 1) {
847                 for(lar=re->lampren.first; lar; lar= lar->next) {
848                         if(re->test_break(re->tbh)) break;
849                         if(lar->shb) {
850                                 /* if type is irregular, this only sets the perspective matrix and autoclips */
851                                 makeshadowbuf(re, lar);
852                         }
853                 }
854         }
855         else {
856                 /* swap test break function */
857                 test_break= re->test_break;
858                 re->test_break= thread_break;
859
860                 for(lar=re->lampren.first; lar; lar= lar->next) {
861                         lar->thread_assigned= 0;
862                         lar->thread_ready= 0;
863                 }
864
865                 BLI_init_threads(&threads, do_shadow_thread, totthread);
866                 
867                 for(a=0; a<totthread; a++)
868                         BLI_insert_thread(&threads, re);
869
870                 /* keep rendering as long as there are shadow buffers not ready */
871                 do {
872                         if((g_break=test_break(re->tbh)))
873                                 break;
874
875                         PIL_sleep_ms(50);
876
877                         BLI_lock_thread(LOCK_CUSTOM1);
878                         for(lar=re->lampren.first; lar; lar= lar->next)
879                                 if(lar->shb && !lar->thread_ready)
880                                         break;
881                         BLI_unlock_thread(LOCK_CUSTOM1);
882                 } while(lar);
883         
884                 BLI_end_threads(&threads);
885
886                 /* unset threadsafety */
887                 re->test_break= test_break;
888                 g_break= 0;
889         }
890 }
891
892 void freeshadowbuf(LampRen *lar)
893 {
894         if(lar->shb) {
895                 ShadBuf *shb= lar->shb;
896                 ShadSampleBuf *shsample;
897                 int b, v;
898                 
899                 for(shsample= shb->buffers.first; shsample; shsample= shsample->next) {
900                         if(shsample->deepbuf) {
901                                 v= shb->size*shb->size;
902                                 for(b=0; b<v; b++)
903                                         if(shsample->deepbuf[b])
904                                                 MEM_freeN(shsample->deepbuf[b]);
905                                         
906                                 MEM_freeN(shsample->deepbuf);
907                                 MEM_freeN(shsample->totbuf);
908                         }
909                         else {
910                                 intptr_t *ztile= shsample->zbuf;
911                                 char *ctile= shsample->cbuf;
912                                 
913                                 v= (shb->size*shb->size)/256;
914                                 for(b=0; b<v; b++, ztile++, ctile++)
915                                         if(*ctile) MEM_freeN((void *) *ztile);
916                                 
917                                 MEM_freeN(shsample->zbuf);
918                                 MEM_freeN(shsample->cbuf);
919                         }
920                 }
921                 BLI_freelistN(&shb->buffers);
922                 
923                 if(shb->weight) MEM_freeN(shb->weight);
924                 MEM_freeN(lar->shb);
925                 
926                 lar->shb= NULL;
927         }
928 }
929
930
931 static int firstreadshadbuf(ShadBuf *shb, ShadSampleBuf *shsample, int **rz, int xs, int ys, int nr)
932 {
933         /* return a 1 if fully compressed shadbuf-tile && z==const */
934         int ofs;
935         char *ct;
936
937         if(shsample->deepbuf)
938                 return 0;
939
940         /* always test borders of shadowbuffer */
941         if(xs<0) xs= 0; else if(xs>=shb->size) xs= shb->size-1;
942         if(ys<0) ys= 0; else if(ys>=shb->size) ys= shb->size-1;
943    
944         /* calc z */
945         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
946         ct= shsample->cbuf+ofs;
947         if(*ct==0) {
948                 if(nr==0) {
949                         *rz= *( (int **)(shsample->zbuf+ofs) );
950                         return 1;
951                 }
952                 else if(*rz!= *( (int **)(shsample->zbuf+ofs) )) return 0;
953                 
954                 return 1;
955         }
956         
957         return 0;
958 }
959
960 static float readdeepvisibility(DeepSample *dsample, int tot, int z, int bias, float *biast)
961 {
962         DeepSample *ds, *prevds;
963         float t;
964         int a;
965
966         /* tricky stuff here; we use ints which can overflow easily with bias values */
967
968         ds= dsample;
969         for(a=0; a<tot && (z-bias > ds->z); a++, ds++)
970                 ;
971
972         if(a == tot) {
973                 if(biast)
974                         *biast= 0.0f;
975                 return (ds-1)->v; /* completely behind all samples */
976         }
977         
978         /* check if this read needs bias blending */
979         if(biast) {
980                 if(z > ds->z)
981                         *biast= (float)(z - ds->z)/(float)bias;
982                 else
983                         *biast= 0.0f;
984         }
985
986         if(a == 0)
987                 return 1.0f; /* completely in front of all samples */
988
989         /* converting to float early here because ds->z - prevds->z can overflow */
990         prevds= ds-1;
991         t= ((float)(z-bias) - (float)prevds->z)/((float)ds->z - (float)prevds->z);
992         return t*ds->v + (1.0f-t)*prevds->v;
993 }
994
995 static float readdeepshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)
996 {
997         float v, biasv, biast;
998         int ofs, tot;
999
1000         if(zs < - 0x7FFFFE00 + bias)
1001                 return 1.0;     /* extreme close to clipstart */
1002
1003         /* calc z */
1004         ofs= ys*shb->size + xs;
1005         tot= shsample->totbuf[ofs];
1006         if(tot == 0)
1007                 return 1.0f;
1008
1009         v= readdeepvisibility(shsample->deepbuf[ofs], tot, zs, bias, &biast);
1010
1011         if(biast != 0.0f) {
1012                 /* in soft bias area */
1013                 biasv= readdeepvisibility(shsample->deepbuf[ofs], tot, zs, 0, 0);
1014
1015                 biast= biast*biast;
1016                 return (1.0f-biast)*v + biast*biasv;
1017         }
1018
1019         return v;
1020 }
1021
1022 /* return 1.0 : fully in light */
1023 static float readshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)     
1024 {
1025         float temp;
1026         int *rz, ofs;
1027         int zsamp=0;
1028         char *ct, *cz;
1029
1030         /* simpleclip */
1031         /* if(xs<0 || ys<0) return 1.0; */
1032         /* if(xs>=shb->size || ys>=shb->size) return 1.0; */
1033         
1034         /* always test borders of shadowbuffer */
1035         if(xs<0) xs= 0; else if(xs>=shb->size) xs= shb->size-1;
1036         if(ys<0) ys= 0; else if(ys>=shb->size) ys= shb->size-1;
1037
1038         if(shsample->deepbuf)
1039                 return readdeepshadowbuf(shb, shsample, bias, xs, ys, zs);
1040
1041         /* calc z */
1042         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
1043         ct= shsample->cbuf+ofs;
1044         rz= *( (int **)(shsample->zbuf+ofs) );
1045
1046         if(*ct==3) {
1047                 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
1048                 cz= (char *)&zsamp;
1049                 cz[ACOMP]= ct[0];
1050                 cz[BCOMP]= ct[1];
1051                 cz[GCOMP]= ct[2];
1052         }
1053         else if(*ct==2) {
1054                 ct= ((char *)rz);
1055                 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
1056                 zsamp= *rz;
1057         
1058                 cz= (char *)&zsamp;
1059                 cz[BCOMP]= ct[0];
1060                 cz[GCOMP]= ct[1];
1061         }
1062         else if(*ct==1) {
1063                 ct= ((char *)rz);
1064                 ct+= 4+16*(ys & 15)+(xs & 15);
1065                 zsamp= *rz;
1066
1067                 cz= (char *)&zsamp;
1068                 cz[GCOMP]= ct[0];
1069
1070         }
1071         else {
1072                 /* got warning on this for 64 bits.... */
1073                 /* but it's working code! in this case rz is not a pointer but zvalue (ton) */
1074                  zsamp= GET_INT_FROM_POINTER(rz);
1075         }
1076
1077         /* tricky stuff here; we use ints which can overflow easily with bias values */
1078         
1079         if(zsamp > zs) return 1.0;              /* absolute no shadow */
1080         else if(zs < - 0x7FFFFE00 + bias) return 1.0;   /* extreme close to clipstart */
1081         else if(zsamp < zs-bias) return 0.0 ;   /* absolute in shadow */
1082         else {                                  /* soft area */
1083                 
1084                 temp=  ( (float)(zs- zsamp) )/(float)bias;
1085                 return 1.0 - temp*temp;
1086                         
1087         }
1088 }
1089
1090 static void shadowbuf_project_co(float *x, float *y, float *z, ShadBuf *shb, float co[3])
1091 {
1092         float hco[4], size= 0.5f*(float)shb->size;
1093
1094         copy_v3_v3(hco, co);
1095         hco[3]= 1.0f;
1096
1097         mul_m4_v4(shb->persmat, hco);
1098
1099         *x= size*(1.0f+hco[0]/hco[3]);
1100         *y= size*(1.0f+hco[1]/hco[3]);
1101         if(z) *z= (hco[2]/hco[3]);
1102 }
1103
1104 /* the externally called shadow testing (reading) function */
1105 /* return 1.0: no shadow at all */
1106 float testshadowbuf(Render *re, ShadBuf *shb, float *co, float *dxco, float *dyco, float inp, float mat_bias)
1107 {
1108         ShadSampleBuf *shsample;
1109         float fac, dco[3], dx[3], dy[3], shadfac=0.0f;
1110         float xs1, ys1, zs1, *jit, *weight, xres, yres, biasf;
1111         int xs, ys, zs, bias, *rz;
1112         short a, num;
1113         
1114         /* crash preventer */
1115         if(shb->buffers.first==NULL)
1116                 return 1.0f;
1117         
1118         /* when facing away, assume fully in shadow */
1119         if(inp <= 0.0f)
1120                 return 0.0f;
1121
1122         /* project coordinate to pixel space */
1123         shadowbuf_project_co(&xs1, &ys1, &zs1, shb, co);
1124
1125         /* clip z coordinate, z is projected so that (-1.0, 1.0) matches
1126            (clipstart, clipend), so we can do this simple test */
1127         if(zs1>=1.0f)
1128                 return 0.0f;
1129         else if(zs1<= -1.0f)
1130                 return 1.0f;
1131
1132         zs= ((float)0x7FFFFFFF)*zs1;
1133
1134         /* take num*num samples, increase area with fac */
1135         num= get_render_shadow_samples(&re->r, shb->samp);
1136         num= num*num;
1137         fac= shb->soft;
1138         
1139         /* compute z bias */
1140         if(mat_bias!=0.0f) biasf= shb->bias*mat_bias;
1141         else biasf= shb->bias;
1142         /* with inp==1.0, bias is half the size. correction value was 1.1, giving errors 
1143            on cube edges, with one side being almost frontal lighted (ton)  */
1144         bias= (1.5f-inp*inp)*biasf;
1145         
1146         /* in case of no filtering we can do things simpler */
1147         if(num==1) {
1148                 for(shsample= shb->buffers.first; shsample; shsample= shsample->next)
1149                         shadfac += readshadowbuf(shb, shsample, bias, (int)xs1, (int)ys1, zs);
1150                 
1151                 return shadfac/(float)shb->totbuf;
1152         }
1153
1154         /* calculate filter size */
1155         add_v3_v3v3(dco, co, dxco);
1156         shadowbuf_project_co(&dx[0], &dx[1], NULL, shb, dco);
1157         dx[0]= xs1 - dx[0];
1158         dx[1]= ys1 - dx[1];
1159
1160         add_v3_v3v3(dco, co, dyco);
1161         shadowbuf_project_co(&dy[0], &dy[1], NULL, shb, dco);
1162         dy[0]= xs1 - dy[0];
1163         dy[1]= ys1 - dy[1];
1164         
1165         xres= fac*(fabs(dx[0]) + fabs(dy[0]));
1166         yres= fac*(fabs(dx[1]) + fabs(dy[1]));
1167         if(xres<1.0f) xres= 1.0f;
1168         if(yres<1.0f) yres= 1.0f;
1169         
1170         /* make xs1/xs1 corner of sample area */
1171         xs1 -= xres*0.5f;
1172         ys1 -= yres*0.5f;
1173
1174         /* in case we have a constant value in a tile, we can do quicker lookup */
1175         if(xres<16.0f && yres<16.0f) {
1176                 shsample= shb->buffers.first;
1177                 if(firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)ys1, 0)) {
1178                         if(firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)ys1, 1)) {
1179                                 if(firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)(ys1+yres), 1)) {
1180                                         if(firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)(ys1+yres), 1)) {
1181                                                 return readshadowbuf(shb, shsample, bias,(int)xs1, (int)ys1, zs);
1182                                         }
1183                                 }
1184                         }
1185                 }
1186         }
1187         
1188         /* full jittered shadow buffer lookup */
1189         for(shsample= shb->buffers.first; shsample; shsample= shsample->next) {
1190                 jit= shb->jit;
1191                 weight= shb->weight;
1192                 
1193                 for(a=num; a>0; a--, jit+=2, weight++) {
1194                         /* instead of jit i tried random: ugly! */
1195                         /* note: the plus 0.5 gives best sampling results, jit goes from -0.5 to 0.5 */
1196                         /* xs1 and ys1 are already corrected to be corner of sample area */
1197                         xs= xs1 + xres*(jit[0] + 0.5f);
1198                         ys= ys1 + yres*(jit[1] + 0.5f);
1199                         
1200                         shadfac+= *weight * readshadowbuf(shb, shsample, bias, xs, ys, zs);
1201                 }
1202         }
1203
1204         /* Renormalizes for the sample number: */
1205         return shadfac/(float)shb->totbuf;
1206 }
1207
1208 /* different function... sampling behind clipend can be LIGHT, bias is negative! */
1209 /* return: light */
1210 static float readshadowbuf_halo(ShadBuf *shb, ShadSampleBuf *shsample, int xs, int ys, int zs)
1211 {
1212         float temp;
1213         int *rz, ofs;
1214         int bias, zbias, zsamp;
1215         char *ct, *cz;
1216
1217         /* negative! The other side is more important */
1218         bias= -shb->bias;
1219         
1220         /* simpleclip */
1221         if(xs<0 || ys<0) return 0.0;
1222         if(xs>=shb->size || ys>=shb->size) return 0.0;
1223
1224         /* calc z */
1225         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
1226         ct= shsample->cbuf+ofs;
1227         rz= *( (int **)(shsample->zbuf+ofs) );
1228
1229         if(*ct==3) {
1230                 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
1231                 cz= (char *)&zsamp;
1232                 zsamp= 0;
1233                 cz[ACOMP]= ct[0];
1234                 cz[BCOMP]= ct[1];
1235                 cz[GCOMP]= ct[2];
1236         }
1237         else if(*ct==2) {
1238                 ct= ((char *)rz);
1239                 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
1240                 zsamp= *rz;
1241         
1242                 cz= (char *)&zsamp;
1243                 cz[BCOMP]= ct[0];
1244                 cz[GCOMP]= ct[1];
1245         }
1246         else if(*ct==1) {
1247                 ct= ((char *)rz);
1248                 ct+= 4+16*(ys & 15)+(xs & 15);
1249                 zsamp= *rz;
1250
1251                 cz= (char *)&zsamp;
1252                 cz[GCOMP]= ct[0];
1253
1254         }
1255         else {
1256                 /* same as before */
1257                 /* still working code! (ton) */
1258                  zsamp= GET_INT_FROM_POINTER(rz);
1259         }
1260
1261         /* NO schadow when sampled at 'eternal' distance */
1262
1263         if(zsamp >= 0x7FFFFE00) return 1.0; 
1264
1265         if(zsamp > zs) return 1.0;              /* absolute no shadww */
1266         else {
1267                 /* bias is negative, so the (zs-bias) can be beyond 0x7fffffff */
1268                 zbias= 0x7fffffff - zs;
1269                 if(zbias > -bias) {
1270                         if( zsamp < zs-bias) return 0.0 ;       /* absolute in shadow */
1271                 }
1272                 else return 0.0 ;       /* absolute shadow */
1273         }
1274
1275         /* soft area */
1276         
1277         temp=  ( (float)(zs- zsamp) )/(float)bias;
1278         return 1.0 - temp*temp;
1279 }
1280
1281
1282 float shadow_halo(LampRen *lar, float *p1, float *p2)
1283 {
1284         /* p1 p2 already are rotated in spot-space */
1285         ShadBuf *shb= lar->shb;
1286         ShadSampleBuf *shsample;
1287         float co[4], siz;
1288         float labda, labdao, labdax, labday, ldx, ldy;
1289         float zf, xf1, yf1, zf1, xf2, yf2, zf2;
1290         float count, lightcount;
1291         int x, y, z, xs1, ys1;
1292         int dx = 0, dy = 0;
1293         
1294         siz= 0.5*(float)shb->size;
1295         
1296         co[0]= p1[0];
1297         co[1]= p1[1];
1298         co[2]= p1[2]/lar->sh_zfac;
1299         co[3]= 1.0;
1300         mul_m4_v4(shb->winmat, co);     /* rational hom co */
1301         xf1= siz*(1.0+co[0]/co[3]);
1302         yf1= siz*(1.0+co[1]/co[3]);
1303         zf1= (co[2]/co[3]);
1304
1305
1306         co[0]= p2[0];
1307         co[1]= p2[1];
1308         co[2]= p2[2]/lar->sh_zfac;
1309         co[3]= 1.0;
1310         mul_m4_v4(shb->winmat, co);     /* rational hom co */
1311         xf2= siz*(1.0+co[0]/co[3]);
1312         yf2= siz*(1.0+co[1]/co[3]);
1313         zf2= (co[2]/co[3]);
1314
1315         /* the 2dda (a pixel line formula) */
1316
1317         xs1= (int)xf1;
1318         ys1= (int)yf1;
1319
1320         if(xf1 != xf2) {
1321                 if(xf2-xf1 > 0.0) {
1322                         labdax= (xf1-xs1-1.0)/(xf1-xf2);
1323                         ldx= -shb->shadhalostep/(xf1-xf2);
1324                         dx= shb->shadhalostep;
1325                 }
1326                 else {
1327                         labdax= (xf1-xs1)/(xf1-xf2);
1328                         ldx= shb->shadhalostep/(xf1-xf2);
1329                         dx= -shb->shadhalostep;
1330                 }
1331         }
1332         else {
1333                 labdax= 1.0;
1334                 ldx= 0.0;
1335         }
1336
1337         if(yf1 != yf2) {
1338                 if(yf2-yf1 > 0.0) {
1339                         labday= (yf1-ys1-1.0)/(yf1-yf2);
1340                         ldy= -shb->shadhalostep/(yf1-yf2);
1341                         dy= shb->shadhalostep;
1342                 }
1343                 else {
1344                         labday= (yf1-ys1)/(yf1-yf2);
1345                         ldy= shb->shadhalostep/(yf1-yf2);
1346                         dy= -shb->shadhalostep;
1347                 }
1348         }
1349         else {
1350                 labday= 1.0;
1351                 ldy= 0.0;
1352         }
1353         
1354         x= xs1;
1355         y= ys1;
1356         labda= count= lightcount= 0.0;
1357
1358 /* printf("start %x %x  \n", (int)(0x7FFFFFFF*zf1), (int)(0x7FFFFFFF*zf2)); */
1359
1360         while(1) {
1361                 labdao= labda;
1362                 
1363                 if(labdax==labday) {
1364                         labdax+= ldx;
1365                         x+= dx;
1366                         labday+= ldy;
1367                         y+= dy;
1368                 }
1369                 else {
1370                         if(labdax<labday) {
1371                                 labdax+= ldx;
1372                                 x+= dx;
1373                         } else {
1374                                 labday+= ldy;
1375                                 y+= dy;
1376                         }
1377                 }
1378                 
1379                 labda= MIN2(labdax, labday);
1380                 if(labda==labdao || labda>=1.0) break;
1381                 
1382                 zf= zf1 + labda*(zf2-zf1);
1383                 count+= (float)shb->totbuf;
1384
1385                 if(zf<= -1.0) lightcount += 1.0;        /* close to the spot */
1386                 else {
1387                 
1388                         /* make sure, behind the clipend we extend halolines. */
1389                         if(zf>=1.0) z= 0x7FFFF000;
1390                         else z= (int)(0x7FFFF000*zf);
1391                         
1392                         for(shsample= shb->buffers.first; shsample; shsample= shsample->next)
1393                                 lightcount+= readshadowbuf_halo(shb, shsample, x, y, z);
1394                         
1395                 }
1396         }
1397         
1398         if(count!=0.0) return (lightcount/count);
1399         return 0.0;
1400         
1401 }
1402
1403
1404 /* ********************* Irregular Shadow Buffer (ISB) ************* */
1405 /* ********** storage of all view samples in a raster of lists ***** */
1406
1407 /* based on several articles describing this method, like:
1408 The Irregular Z-Buffer and its Application to Shadow Mapping
1409 Gregory S. Johnson - William R. Mark - Christopher A. Burns 
1410 and
1411 Alias-Free Shadow Maps
1412 Timo Aila and Samuli Laine
1413 */
1414
1415 /* bsp structure (actually kd tree) */
1416
1417 #define BSPMAX_SAMPLE   128
1418 #define BSPMAX_DEPTH    32
1419
1420 /* aligned with struct rctf */
1421 typedef struct Boxf {
1422         float xmin, xmax;
1423         float ymin, ymax;
1424         float zmin, zmax;
1425 } Boxf;
1426
1427 typedef struct ISBBranch {
1428         struct ISBBranch *left, *right;
1429         float divider[2];
1430         Boxf box;
1431         short totsamp, index, full, unused;
1432         ISBSample **samples;
1433 } ISBBranch;
1434
1435 typedef struct BSPFace {
1436         Boxf box;
1437         float *v1, *v2, *v3, *v4;
1438         int obi;                /* object for face lookup */
1439         int facenr;             /* index to retrieve VlakRen */
1440         int type;               /* only for strand now */
1441         short shad_alpha, is_full;
1442         
1443         /* strand caching data, optimize for point_behind_strand() */
1444         float radline, radline_end, len;
1445         float vec1[3], vec2[3], rc[3];
1446 } BSPFace;
1447
1448 /* boxes are in lamp projection */
1449 static void init_box(Boxf *box)
1450 {
1451         box->xmin= 1000000.0f;
1452         box->xmax= 0;
1453         box->ymin= 1000000.0f;
1454         box->ymax= 0;
1455         box->zmin= 0x7FFFFFFF;
1456         box->zmax= - 0x7FFFFFFF;
1457 }
1458
1459 /* use v1 to calculate boundbox */
1460 static void bound_boxf(Boxf *box, float *v1)
1461 {
1462         if(v1[0] < box->xmin) box->xmin= v1[0];
1463         if(v1[0] > box->xmax) box->xmax= v1[0];
1464         if(v1[1] < box->ymin) box->ymin= v1[1];
1465         if(v1[1] > box->ymax) box->ymax= v1[1];
1466         if(v1[2] < box->zmin) box->zmin= v1[2];
1467         if(v1[2] > box->zmax) box->zmax= v1[2];
1468 }
1469
1470 /* use v1 to calculate boundbox */
1471 static void bound_rectf(rctf *box, float *v1)
1472 {
1473         if(v1[0] < box->xmin) box->xmin= v1[0];
1474         if(v1[0] > box->xmax) box->xmax= v1[0];
1475         if(v1[1] < box->ymin) box->ymin= v1[1];
1476         if(v1[1] > box->ymax) box->ymax= v1[1];
1477 }
1478
1479
1480 /* halfway splitting, for initializing a more regular tree */
1481 static void isb_bsp_split_init(ISBBranch *root, MemArena *mem, int level)
1482 {
1483         
1484         /* if level > 0 we create new branches and go deeper*/
1485         if(level > 0) {
1486                 ISBBranch *left, *right;
1487                 int i;
1488                 
1489                 /* splitpoint */
1490                 root->divider[0]= 0.5f*(root->box.xmin+root->box.xmax);
1491                 root->divider[1]= 0.5f*(root->box.ymin+root->box.ymax);
1492                 
1493                 /* find best splitpoint */
1494                 if(root->box.xmax-root->box.xmin > root->box.ymax-root->box.ymin)
1495                         i= root->index= 0;
1496                 else
1497                         i= root->index= 1;
1498                 
1499                 left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1500                 right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1501                 
1502                 /* box info */
1503                 left->box= root->box;
1504                 right->box= root->box;
1505                 if(i==0) {
1506                         left->box.xmax= root->divider[0];
1507                         right->box.xmin= root->divider[0];
1508                 }
1509                 else {
1510                         left->box.ymax= root->divider[1];
1511                         right->box.ymin= root->divider[1];
1512                 }
1513                 isb_bsp_split_init(left, mem, level-1);
1514                 isb_bsp_split_init(right, mem, level-1);
1515         }
1516         else {
1517                 /* we add sample array */
1518                 root->samples= BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
1519         }
1520 }
1521
1522 /* note; if all samples on same location we just spread them over 2 new branches */
1523 static void isb_bsp_split(ISBBranch *root, MemArena *mem)
1524 {
1525         ISBBranch *left, *right;
1526         ISBSample *samples[BSPMAX_SAMPLE];
1527         int a, i;
1528
1529         /* splitpoint */
1530         root->divider[0]= root->divider[1]= 0.0f;
1531         for(a=BSPMAX_SAMPLE-1; a>=0; a--) {
1532                 root->divider[0]+= root->samples[a]->zco[0];
1533                 root->divider[1]+= root->samples[a]->zco[1];
1534         }
1535         root->divider[0]/= BSPMAX_SAMPLE;
1536         root->divider[1]/= BSPMAX_SAMPLE;
1537         
1538         /* find best splitpoint */
1539         if(root->box.xmax-root->box.xmin > root->box.ymax-root->box.ymin)
1540                 i= root->index= 0;
1541         else
1542                 i= root->index= 1;
1543         
1544         /* new branches */
1545         left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1546         right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1547
1548         /* new sample array */
1549         left->samples= BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
1550         right->samples= samples; // tmp
1551         
1552         /* split samples */
1553         for(a=BSPMAX_SAMPLE-1; a>=0; a--) {
1554                 int comp= 0;
1555                 /* this prevents adding samples all to 1 branch when divider is equal to samples */
1556                 if(root->samples[a]->zco[i] == root->divider[i])
1557                         comp= a & 1;
1558                 else if(root->samples[a]->zco[i] < root->divider[i])
1559                         comp= 1;
1560                 
1561                 if(comp==1) {
1562                         left->samples[left->totsamp]= root->samples[a];
1563                         left->totsamp++;
1564                 }
1565                 else {
1566                         right->samples[right->totsamp]= root->samples[a];
1567                         right->totsamp++;
1568                 }
1569         }
1570         
1571         /* copy samples from tmp */
1572         memcpy(root->samples, samples, right->totsamp*(sizeof(void *)));
1573         right->samples= root->samples;
1574         root->samples= NULL;
1575         
1576         /* box info */
1577         left->box= root->box;
1578         right->box= root->box;
1579         if(i==0) {
1580                 left->box.xmax= root->divider[0];
1581                 right->box.xmin= root->divider[0];
1582         }
1583         else {
1584                 left->box.ymax= root->divider[1];
1585                 right->box.ymin= root->divider[1];
1586         }
1587 }
1588
1589 /* inserts sample in main tree, also splits on threshold */
1590 /* returns 1 if error */
1591 static int isb_bsp_insert(ISBBranch *root, MemArena *memarena, ISBSample *sample)
1592 {
1593         ISBBranch *bspn= root;
1594         float *zco= sample->zco;
1595         int i= 0;
1596         
1597         /* debug counter, also used to check if something was filled in ever */
1598         root->totsamp++;
1599         
1600         /* going over branches until last one found */
1601         while(bspn->left) {
1602                 if(zco[bspn->index] <= bspn->divider[bspn->index])
1603                         bspn= bspn->left;
1604                 else
1605                         bspn= bspn->right;
1606                 i++;
1607         }
1608         /* bspn now is the last branch */
1609         
1610         if(bspn->totsamp==BSPMAX_SAMPLE) {
1611                 printf("error in bsp branch\n");        /* only for debug, cannot happen */
1612                 return 1;
1613         }
1614         
1615         /* insert */
1616         bspn->samples[bspn->totsamp]= sample;
1617         bspn->totsamp++;
1618
1619         /* split if allowed and needed */
1620         if(bspn->totsamp==BSPMAX_SAMPLE) {
1621                 if(i==BSPMAX_DEPTH) {
1622                         bspn->totsamp--;        /* stop filling in... will give errors */
1623                         return 1;
1624                 }
1625                 isb_bsp_split(bspn, memarena);
1626         }
1627         return 0;
1628 }
1629
1630 static float VecLen2f( float *v1, float *v2)
1631 {
1632         float x= v1[0]-v2[0];
1633         float y= v1[1]-v2[1];
1634         return (float)sqrt(x*x+y*y);
1635 }
1636
1637 /* initialize vars in face, for optimal point-in-face test */
1638 static void bspface_init_strand(BSPFace *face) 
1639 {
1640         
1641         face->radline= 0.5f*VecLen2f(face->v1, face->v2);
1642         
1643         mid_v3_v3v3(face->vec1, face->v1, face->v2);
1644         if(face->v4)
1645                 mid_v3_v3v3(face->vec2, face->v3, face->v4);
1646         else
1647                 VECCOPY(face->vec2, face->v3);
1648         
1649         face->rc[0]= face->vec2[0]-face->vec1[0];
1650         face->rc[1]= face->vec2[1]-face->vec1[1];
1651         face->rc[2]= face->vec2[2]-face->vec1[2];
1652         
1653         face->len= face->rc[0]*face->rc[0]+ face->rc[1]*face->rc[1];
1654         
1655         if(face->len!=0.0f) {
1656                 face->radline_end= face->radline/sqrt(face->len);
1657                 face->len= 1.0f/face->len;
1658         }
1659 }
1660
1661 /* brought back to a simple 2d case */
1662 static int point_behind_strand(float *p, BSPFace *face)
1663 {
1664         /* v1 - v2 is radius, v1 - v3 length */
1665         float dist, rc[2], pt[2];
1666         
1667         /* using code from dist_to_line_segment_v2(), distance vec to line-piece */
1668
1669         if(face->len==0.0f) {
1670                 rc[0]= p[0]-face->vec1[0];
1671                 rc[1]= p[1]-face->vec1[1];
1672                 dist= (float)(sqrt(rc[0]*rc[0]+ rc[1]*rc[1]));
1673                 
1674                 if(dist < face->radline)
1675                         return 1;
1676         }
1677         else {
1678                 float labda= ( face->rc[0]*(p[0]-face->vec1[0]) + face->rc[1]*(p[1]-face->vec1[1]) )*face->len;
1679                 
1680                 if(labda > -face->radline_end && labda < 1.0f+face->radline_end) { 
1681                         /* hesse for dist: */
1682                         //dist= (float)(fabs( (p[0]-vec2[0])*rc[1] + (p[1]-vec2[1])*rc[0])/len);
1683                         
1684                         pt[0]= labda*face->rc[0]+face->vec1[0];
1685                         pt[1]= labda*face->rc[1]+face->vec1[1];
1686                         
1687                         rc[0]= pt[0]-p[0];
1688                         rc[1]= pt[1]-p[1];
1689                         dist= (float)sqrt(rc[0]*rc[0]+ rc[1]*rc[1]);
1690                         
1691                         if(dist < face->radline) {
1692                                 float zval= face->vec1[2] + labda*face->rc[2];
1693                                 if(p[2] > zval)
1694                                         return 1;
1695                         }
1696                 }
1697         }
1698         return 0;
1699 }
1700
1701
1702 /* return 1 if inside. code derived from src/parametrizer.c */
1703 static int point_behind_tria2d(float *p, float *v1, float *v2, float *v3)
1704 {
1705         float a[2], c[2], h[2], div;
1706         float u, v;
1707         
1708         a[0] = v2[0] - v1[0];
1709         a[1] = v2[1] - v1[1];
1710         c[0] = v3[0] - v1[0];
1711         c[1] = v3[1] - v1[1];
1712         
1713         div = a[0]*c[1] - a[1]*c[0];
1714         if(div==0.0f)
1715                 return 0;
1716         
1717         h[0] = p[0] - v1[0];
1718         h[1] = p[1] - v1[1];
1719         
1720         div = 1.0f/div;
1721         
1722         u = (h[0]*c[1] - h[1]*c[0])*div;
1723         if(u >= 0.0f) {
1724                 v = (a[0]*h[1] - a[1]*h[0])*div;
1725                 if(v >= 0.0f) {
1726                         if( u + v <= 1.0f) {
1727                                 /* inside, now check if point p is behind */
1728                                 float z=  (1.0f-u-v)*v1[2] + u*v2[2] + v*v3[2];
1729                                 if(z <= p[2])
1730                                         return 1;
1731                         }
1732                 }
1733         }
1734         
1735         return 0;
1736 }
1737
1738 #if 0
1739 /* tested these calls, but it gives inaccuracy, 'side' cannot be found reliably using v3 */
1740
1741 /* check if line v1-v2 has all rect points on other side of point v3 */
1742 static int rect_outside_line(rctf *rect, float *v1, float *v2, float *v3)
1743 {
1744         float a, b, c;
1745         int side;
1746         
1747         /* line formula for v1-v2 */
1748         a= v2[1]-v1[1];
1749         b= v1[0]-v2[0];
1750         c= -a*v1[0] - b*v1[1];
1751         side= a*v3[0] + b*v3[1] + c < 0.0f;
1752         
1753         /* the four quad points */
1754         if( side==(rect->xmin*a + rect->ymin*b + c >= 0.0f) )
1755                 if( side==(rect->xmax*a + rect->ymin*b + c >= 0.0f) )
1756                         if( side==(rect->xmax*a + rect->ymax*b + c >= 0.0f) )
1757                                 if( side==(rect->xmin*a + rect->ymax*b + c >= 0.0f) )
1758                                         return 1;
1759         return 0;
1760 }
1761
1762 /* check if one of the triangle edges separates all rect points on 1 side */
1763 static int rect_isect_tria(rctf *rect, float *v1, float *v2, float *v3)
1764 {
1765         if(rect_outside_line(rect, v1, v2, v3))
1766                 return 0;
1767         if(rect_outside_line(rect, v2, v3, v1))
1768                 return 0;
1769         if(rect_outside_line(rect, v3, v1, v2))
1770                 return 0;
1771         return 1;
1772 }
1773 #endif
1774
1775 /* if face overlaps a branch, it executes func. recursive */
1776 static void isb_bsp_face_inside(ISBBranch *bspn, BSPFace *face)
1777 {
1778         
1779         /* are we descending? */
1780         if(bspn->left) {
1781                 /* hrmf, the box struct cannot be addressed with index */
1782                 if(bspn->index==0) {
1783                         if(face->box.xmin <= bspn->divider[0])
1784                                 isb_bsp_face_inside(bspn->left, face);
1785                         if(face->box.xmax > bspn->divider[0])
1786                                 isb_bsp_face_inside(bspn->right, face);
1787                 }
1788                 else {
1789                         if(face->box.ymin <= bspn->divider[1])
1790                                 isb_bsp_face_inside(bspn->left, face);
1791                         if(face->box.ymax > bspn->divider[1])
1792                                 isb_bsp_face_inside(bspn->right, face);
1793                 }
1794         }
1795         else {
1796                 /* else: end branch reached */
1797                 int a;
1798                 
1799                 if(bspn->totsamp==0) return;
1800                 
1801                 /* check for nodes entirely in shadow, can be skipped */
1802                 if(bspn->totsamp==bspn->full)
1803                         return;
1804                 
1805                 /* if bsp node is entirely in front of face, give up */
1806                 if(bspn->box.zmax < face->box.zmin)
1807                         return;
1808                 
1809                 /* if face boundbox is outside of branch rect, give up */
1810                 if(0==BLI_isect_rctf((rctf *)&face->box, (rctf *)&bspn->box, NULL))
1811                         return;
1812                 
1813                 /* test all points inside branch */
1814                 for(a=bspn->totsamp-1; a>=0; a--) {
1815                         ISBSample *samp= bspn->samples[a];
1816                         
1817                         if((samp->facenr!=face->facenr || samp->obi!=face->obi) && samp->shadfac) {
1818                                 if(face->box.zmin < samp->zco[2]) {
1819                                         if(BLI_in_rctf((rctf *)&face->box, samp->zco[0], samp->zco[1])) {
1820                                                 int inshadow= 0;
1821                                                 
1822                                                 if(face->type) {
1823                                                         if(point_behind_strand(samp->zco, face)) 
1824                                                                 inshadow= 1;
1825                                                 }
1826                                                 else if( point_behind_tria2d(samp->zco, face->v1, face->v2, face->v3))
1827                                                         inshadow= 1;
1828                                                 else if(face->v4 && point_behind_tria2d(samp->zco, face->v1, face->v3, face->v4))
1829                                                         inshadow= 1;
1830
1831                                                 if(inshadow) {
1832                                                         *(samp->shadfac) += face->shad_alpha;
1833                                                         /* optimize; is_full means shad_alpha==4096 */
1834                                                         if(*(samp->shadfac) >= 4096 || face->is_full) {
1835                                                                 bspn->full++;
1836                                                                 samp->shadfac= NULL;
1837                                                         }
1838                                                 }
1839                                         }
1840                                 }
1841                         }
1842                 }
1843         }
1844 }
1845
1846 /* based on available samples, recalculate the bounding box for bsp nodes, recursive */
1847 static void isb_bsp_recalc_box(ISBBranch *root)
1848 {
1849         if(root->left) {
1850                 isb_bsp_recalc_box(root->left);
1851                 isb_bsp_recalc_box(root->right);
1852         }
1853         else if(root->totsamp) {
1854                 int a;
1855                 
1856                 init_box(&root->box);
1857                 for(a=root->totsamp-1; a>=0; a--)
1858                         bound_boxf(&root->box, root->samples[a]->zco);
1859         }       
1860 }
1861
1862 /* callback function for zbuf clip */
1863 static void isb_bsp_test_strand(ZSpan *zspan, int obi, int zvlnr, float *v1, float *v2, float *v3, float *v4)
1864 {
1865         BSPFace face;
1866         
1867         face.v1= v1;
1868         face.v2= v2;
1869         face.v3= v3;
1870         face.v4= v4;
1871         face.obi= obi;
1872         face.facenr= zvlnr & ~RE_QUAD_OFFS;
1873         face.type= R_STRAND;
1874         if(R.osa)
1875                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
1876         else
1877                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
1878         
1879         face.is_full= (zspan->shad_alpha==1.0f);
1880         
1881         /* setup boundbox */
1882         init_box(&face.box);
1883         bound_boxf(&face.box, v1);
1884         bound_boxf(&face.box, v2);
1885         bound_boxf(&face.box, v3);
1886         if(v4)
1887                 bound_boxf(&face.box, v4);
1888         
1889         /* optimize values */
1890         bspface_init_strand(&face);
1891         
1892         isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
1893         
1894 }
1895
1896 /* callback function for zbuf clip */
1897 static void isb_bsp_test_face(ZSpan *zspan, int obi, int zvlnr, float *v1, float *v2, float *v3, float *v4) 
1898 {
1899         BSPFace face;
1900         
1901         face.v1= v1;
1902         face.v2= v2;
1903         face.v3= v3;
1904         face.v4= v4;
1905         face.obi= obi;
1906         face.facenr= zvlnr & ~RE_QUAD_OFFS;
1907         face.type= 0;
1908         if(R.osa)
1909                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
1910         else
1911                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
1912         
1913         face.is_full= (zspan->shad_alpha==1.0f);
1914         
1915         /* setup boundbox */
1916         init_box(&face.box);
1917         bound_boxf(&face.box, v1);
1918         bound_boxf(&face.box, v2);
1919         bound_boxf(&face.box, v3);
1920         if(v4)
1921                 bound_boxf(&face.box, v4);
1922
1923         isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
1924 }
1925
1926 static int testclip_minmax(float *ho, float *minmax)
1927 {
1928         float wco= ho[3];
1929         int flag= 0;
1930         
1931         if( ho[0] > minmax[1]*wco) flag = 1;
1932         else if( ho[0]< minmax[0]*wco) flag = 2;
1933         
1934         if( ho[1] > minmax[3]*wco) flag |= 4;
1935         else if( ho[1]< minmax[2]*wco) flag |= 8;
1936         
1937         return flag;
1938 }
1939
1940 /* main loop going over all faces and check in bsp overlaps, fill in shadfac values */
1941 static void isb_bsp_fillfaces(Render *re, LampRen *lar, ISBBranch *root)
1942 {
1943         ObjectInstanceRen *obi;
1944         ObjectRen *obr;
1945         ShadBuf *shb= lar->shb;
1946         ZSpan zspan, zspanstrand;
1947         VlakRen *vlr= NULL;
1948         Material *ma= NULL;
1949         float minmaxf[4], winmat[4][4];
1950         int size= shb->size;
1951         int i, a, ok=1, lay= -1;
1952         
1953         /* further optimize, also sets minz maxz */
1954         isb_bsp_recalc_box(root);
1955         
1956         /* extra clipping for minmax */
1957         minmaxf[0]= (2.0f*root->box.xmin - size-2.0f)/size;
1958         minmaxf[1]= (2.0f*root->box.xmax - size+2.0f)/size;
1959         minmaxf[2]= (2.0f*root->box.ymin - size-2.0f)/size;
1960         minmaxf[3]= (2.0f*root->box.ymax - size+2.0f)/size;
1961         
1962         if(lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay;
1963         
1964         /* (ab)use zspan, since we use zbuffer clipping code */
1965         zbuf_alloc_span(&zspan, size, size, re->clipcrop);
1966         
1967         zspan.zmulx=  ((float)size)/2.0f;
1968         zspan.zmuly=  ((float)size)/2.0f;
1969         zspan.zofsx= -0.5f;
1970         zspan.zofsy= -0.5f;
1971         
1972         /* pass on bsp root to zspan */
1973         zspan.rectz= (int *)root;
1974         
1975         /* filling methods */
1976         zspanstrand= zspan;
1977         //      zspan.zbuflinefunc= zbufline_onlyZ;
1978         zspan.zbuffunc= isb_bsp_test_face;
1979         zspanstrand.zbuffunc= isb_bsp_test_strand;
1980         
1981         for(i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
1982                 obr= obi->obr;
1983
1984                 if(obi->flag & R_TRANSFORMED)
1985                         mul_m4_m4m4(winmat, obi->mat, shb->persmat);
1986                 else
1987                         copy_m4_m4(winmat, shb->persmat);
1988
1989                 for(a=0; a<obr->totvlak; a++) {
1990                         
1991                         if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
1992                         else vlr++;
1993                         
1994                         /* note, these conditions are copied in shadowbuf_autoclip() */
1995                         if(vlr->mat!= ma) {
1996                                 ma= vlr->mat;
1997                                 ok= 1;
1998                                 if((ma->mode & MA_SHADBUF)==0) ok= 0;
1999                                 if(ma->material_type == MA_TYPE_WIRE) ok= 0;
2000                                 zspanstrand.shad_alpha= zspan.shad_alpha= ma->shad_alpha;
2001                         }
2002                         
2003                         if(ok && (obi->lay & lay)) {
2004                                 float hoco[4][4];
2005                                 int c1, c2, c3, c4=0;
2006                                 int d1, d2, d3, d4=0;
2007                                 int partclip;
2008                                 
2009                                 /* create hocos per face, it is while render */
2010                                 projectvert(vlr->v1->co, winmat, hoco[0]); d1= testclip_minmax(hoco[0], minmaxf);
2011                                 projectvert(vlr->v2->co, winmat, hoco[1]); d2= testclip_minmax(hoco[1], minmaxf);
2012                                 projectvert(vlr->v3->co, winmat, hoco[2]); d3= testclip_minmax(hoco[2], minmaxf);
2013                                 if(vlr->v4) {
2014                                         projectvert(vlr->v4->co, winmat, hoco[3]); d4= testclip_minmax(hoco[3], minmaxf);
2015                                 }
2016
2017                                 /* minmax clipping */
2018                                 if(vlr->v4) partclip= d1 & d2 & d3 & d4;
2019                                 else partclip= d1 & d2 & d3;
2020                                 
2021                                 if(partclip==0) {
2022                                         
2023                                         /* window clipping */
2024                                         c1= testclip(hoco[0]); 
2025                                         c2= testclip(hoco[1]); 
2026                                         c3= testclip(hoco[2]); 
2027                                         if(vlr->v4)
2028                                                 c4= testclip(hoco[3]); 
2029                                         
2030                                         /* ***** NO WIRE YET */                 
2031                                         if(ma->material_type == MA_TYPE_WIRE) {
2032                                                 if(vlr->v4)
2033                                                         zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
2034                                                 else
2035                                                         zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], 0, c1, c2, c3, 0);
2036                                         }
2037                                         else if(vlr->v4) {
2038                                                 if(vlr->flag & R_STRAND)
2039                                                         zbufclip4(&zspanstrand, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
2040                                                 else
2041                                                         zbufclip4(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
2042                                         }
2043                                         else
2044                                                 zbufclip(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], c1, c2, c3);
2045                                         
2046                                 }
2047                         }
2048                 }
2049         }
2050         
2051         zbuf_free_span(&zspan);
2052 }
2053
2054 /* returns 1 when the viewpixel is visible in lampbuffer */
2055 static int viewpixel_to_lampbuf(ShadBuf *shb, ObjectInstanceRen *obi, VlakRen *vlr, float x, float y, float *co)
2056 {
2057         float hoco[4], v1[3], nor[3];
2058         float dface, fac, siz;
2059         
2060         RE_vlakren_get_normal(&R, obi, vlr, nor);
2061         VECCOPY(v1, vlr->v1->co);
2062         if(obi->flag & R_TRANSFORMED)
2063                 mul_m4_v3(obi->mat, v1);
2064
2065         /* from shadepixel() */
2066         dface= v1[0]*nor[0] + v1[1]*nor[1] + v1[2]*nor[2];
2067         hoco[3]= 1.0f;
2068         
2069         /* ortho viewplane cannot intersect using view vector originating in (0,0,0) */
2070         if(R.r.mode & R_ORTHO) {
2071                 /* x and y 3d coordinate can be derived from pixel coord and winmat */
2072                 float fx= 2.0/(R.winx*R.winmat[0][0]);
2073                 float fy= 2.0/(R.winy*R.winmat[1][1]);
2074                 
2075                 hoco[0]= (x - 0.5*R.winx)*fx - R.winmat[3][0]/R.winmat[0][0];
2076                 hoco[1]= (y - 0.5*R.winy)*fy - R.winmat[3][1]/R.winmat[1][1];
2077                 
2078                 /* using a*x + b*y + c*z = d equation, (a b c) is normal */
2079                 if(nor[2]!=0.0f)
2080                         hoco[2]= (dface - nor[0]*hoco[0] - nor[1]*hoco[1])/nor[2];
2081                 else
2082                         hoco[2]= 0.0f;
2083         }
2084         else {
2085                 float div, view[3];
2086                 
2087                 calc_view_vector(view, x, y);
2088                 
2089                 div= nor[0]*view[0] + nor[1]*view[1] + nor[2]*view[2];
2090                 if (div==0.0f) 
2091                         return 0;
2092                 
2093                 fac= dface/div;
2094                 
2095                 hoco[0]= fac*view[0];
2096                 hoco[1]= fac*view[1];
2097                 hoco[2]= fac*view[2];
2098         }
2099         
2100         /* move 3d vector to lampbuf */
2101         mul_m4_v4(shb->persmat, hoco);  /* rational hom co */
2102         
2103         /* clip We can test for -1.0/1.0 because of the properties of the
2104          * coordinate transformations. */
2105         fac= fabs(hoco[3]);
2106         if(hoco[0]<-fac || hoco[0]>fac)
2107                 return 0;
2108         if(hoco[1]<-fac || hoco[1]>fac)
2109                 return 0;
2110         if(hoco[2]<-fac || hoco[2]>fac)
2111                 return 0;
2112         
2113         siz= 0.5f*(float)shb->size;
2114         co[0]= siz*(1.0f+hoco[0]/hoco[3]) -0.5f;
2115         co[1]= siz*(1.0f+hoco[1]/hoco[3]) -0.5f;
2116         co[2]= ((float)0x7FFFFFFF)*(hoco[2]/hoco[3]);
2117         
2118         /* XXXX bias, much less than normal shadbuf, or do we need a constant? */
2119         co[2] -= 0.05f*shb->bias;
2120         
2121         return 1;
2122 }
2123
2124 /* storage of shadow results, solid osa and transp case */
2125 static void isb_add_shadfac(ISBShadfacA **isbsapp, MemArena *mem, int obi, int facenr, short shadfac, short samples)
2126 {
2127         ISBShadfacA *new;
2128         float shadfacf;
2129         
2130         /* in osa case, the samples were filled in with factor 1.0/R.osa. if fewer samples we have to correct */
2131         if(R.osa)
2132                 shadfacf= ((float)shadfac*R.osa)/(4096.0*samples);
2133         else
2134                 shadfacf= ((float)shadfac)/(4096.0);
2135         
2136         new= BLI_memarena_alloc(mem, sizeof(ISBShadfacA));
2137         new->obi= obi;
2138         new->facenr= facenr & ~RE_QUAD_OFFS;
2139         new->shadfac= shadfacf;
2140         if(*isbsapp)
2141                 new->next= (*isbsapp);
2142         else
2143                 new->next= NULL;
2144         
2145         *isbsapp= new;
2146 }
2147
2148 /* adding samples, solid case */
2149 static int isb_add_samples(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSample **samplebuf)
2150 {
2151         int xi, yi, *xcos, *ycos;
2152         int sample, bsp_err= 0;
2153         
2154         /* bsp split doesn't like to handle regular sequenes */
2155         xcos= MEM_mallocN( pa->rectx*sizeof(int), "xcos");
2156         ycos= MEM_mallocN( pa->recty*sizeof(int), "ycos");
2157         for(xi=0; xi<pa->rectx; xi++)
2158                 xcos[xi]= xi;
2159         for(yi=0; yi<pa->recty; yi++)
2160                 ycos[yi]= yi;
2161         BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
2162         BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
2163         
2164         for(sample=0; sample<(R.osa?R.osa:1); sample++) {
2165                 ISBSample *samp= samplebuf[sample], *samp1;
2166                 
2167                 for(yi=0; yi<pa->recty; yi++) {
2168                         int y= ycos[yi];
2169                         for(xi=0; xi<pa->rectx; xi++) {
2170                                 int x= xcos[xi];
2171                                 samp1= samp + y*pa->rectx + x;
2172                                 if(samp1->facenr)
2173                                         bsp_err |= isb_bsp_insert(root, memarena, samp1);
2174                         }
2175                         if(bsp_err) break;
2176                 }
2177         }       
2178         
2179         MEM_freeN(xcos);
2180         MEM_freeN(ycos);
2181
2182         return bsp_err;
2183 }
2184
2185 /* solid version */
2186 /* lar->shb, pa->rectz and pa->rectp should exist */
2187 static void isb_make_buffer(RenderPart *pa, LampRen *lar)
2188 {
2189         ShadBuf *shb= lar->shb;
2190         ISBData *isbdata;
2191         ISBSample *samp, *samplebuf[16];        /* should be RE_MAX_OSA */
2192         ISBBranch root;
2193         MemArena *memarena;
2194         intptr_t *rd;
2195         int *recto, *rectp, x, y, sindex, sample, bsp_err=0;
2196         
2197         /* storage for shadow, per thread */
2198         isbdata= shb->isb_result[pa->thread];
2199         
2200         /* to map the shi->xs and ys coordinate */
2201         isbdata->minx= pa->disprect.xmin;
2202         isbdata->miny= pa->disprect.ymin;
2203         isbdata->rectx= pa->rectx;
2204         isbdata->recty= pa->recty;
2205         
2206         /* branches are added using memarena (32k branches) */
2207         memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch), "isb arena");
2208         BLI_memarena_use_calloc(memarena);
2209         
2210         /* samplebuf is in camera view space (pixels) */
2211         for(sample=0; sample<(R.osa?R.osa:1); sample++)
2212                 samplebuf[sample]= MEM_callocN(sizeof(ISBSample)*pa->rectx*pa->recty, "isb samplebuf");
2213         
2214         /* for end result, ISBSamples point to this in non OSA case, otherwise to pixstruct->shadfac */
2215         if(R.osa==0)
2216                 isbdata->shadfacs= MEM_callocN(pa->rectx*pa->recty*sizeof(short), "isb shadfacs");
2217         
2218         /* setup bsp root */
2219         memset(&root, 0, sizeof(ISBBranch));
2220         root.box.xmin= (float)shb->size;
2221         root.box.ymin= (float)shb->size;
2222         
2223         /* create the sample buffers */
2224         for(sindex=0, y=0; y<pa->recty; y++) {
2225                 for(x=0; x<pa->rectx; x++, sindex++) {
2226                         
2227                         /* this makes it a long function, but splitting it out would mean 10+ arguments */
2228                         /* first check OSA case */
2229                         if(R.osa) {
2230                                 rd= pa->rectdaps + sindex;
2231                                 if(*rd) {
2232                                         float xs= (float)(x + pa->disprect.xmin);
2233                                         float ys= (float)(y + pa->disprect.ymin);
2234                                         
2235                                         for(sample=0; sample<R.osa; sample++) {
2236                                                 PixStr *ps= (PixStr *)(*rd);
2237                                                 int mask= (1<<sample);
2238                                                 
2239                                                 while(ps) {
2240                                                         if(ps->mask & mask)
2241                                                                 break;
2242                                                         ps= ps->next;
2243                                                 }
2244                                                 if(ps && ps->facenr>0) {
2245                                                         ObjectInstanceRen *obi= &R.objectinstance[ps->obi];
2246                                                         ObjectRen *obr= obi->obr;
2247                                                         VlakRen *vlr= RE_findOrAddVlak(obr, (ps->facenr-1) & RE_QUAD_MASK);
2248                                                         
2249                                                         samp= samplebuf[sample] + sindex;
2250                                                         /* convert image plane pixel location to lamp buffer space */
2251                                                         if(viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], samp->zco)) {
2252                                                                 samp->obi= ps->obi;
2253                                                                 samp->facenr= ps->facenr & ~RE_QUAD_OFFS;
2254                                                                 ps->shadfac= 0;
2255                                                                 samp->shadfac= &ps->shadfac;
2256                                                                 bound_rectf((rctf *)&root.box, samp->zco);
2257                                                         }
2258                                                 }
2259                                         }
2260                                 }
2261                         }
2262                         else {
2263                                 rectp= pa->rectp + sindex;
2264                                 recto= pa->recto + sindex;
2265                                 if(*rectp>0) {
2266                                         ObjectInstanceRen *obi= &R.objectinstance[*recto];
2267                                         ObjectRen *obr= obi->obr;
2268                                         VlakRen *vlr= RE_findOrAddVlak(obr, (*rectp-1) & RE_QUAD_MASK);
2269                                         float xs= (float)(x + pa->disprect.xmin);
2270                                         float ys= (float)(y + pa->disprect.ymin);
2271                                         
2272                                         samp= samplebuf[0] + sindex;
2273                                         /* convert image plane pixel location to lamp buffer space */
2274                                         if(viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, samp->zco)) {
2275                                                 samp->obi= *recto;
2276                                                 samp->facenr= *rectp & ~RE_QUAD_OFFS;
2277                                                 samp->shadfac= isbdata->shadfacs + sindex;
2278                                                 bound_rectf((rctf *)&root.box, samp->zco);
2279                                         }
2280                                 }
2281                         }
2282                 }
2283         }
2284         
2285         /* simple method to see if we have samples */
2286         if(root.box.xmin != (float)shb->size) {
2287                 /* now create a regular split, root.box has the initial bounding box of all pixels */
2288                 /* split bsp 8 levels deep, in regular grid (16 x 16) */
2289                 isb_bsp_split_init(&root, memarena, 8);
2290                 
2291                 /* insert all samples in BSP now */
2292                 bsp_err= isb_add_samples(pa, &root, memarena, samplebuf);
2293                         
2294                 if(bsp_err==0) {
2295                         /* go over all faces and fill in shadow values */
2296                         
2297                         isb_bsp_fillfaces(&R, lar, &root);      /* shb->persmat should have been calculated */
2298                         
2299                         /* copy shadow samples to persistant buffer, reduce memory overhead */
2300                         if(R.osa) {
2301                                 ISBShadfacA **isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
2302                                 
2303                                 isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA), "isb arena");
2304                                 BLI_memarena_use_calloc(isbdata->memarena);
2305
2306                                 for(rd= pa->rectdaps, x=pa->rectx*pa->recty; x>0; x--, rd++, isbsa++) {
2307                                         
2308                                         if(*rd) {
2309                                                 PixStr *ps= (PixStr *)(*rd);
2310                                                 while(ps) {
2311                                                         if(ps->shadfac)
2312                                                                 isb_add_shadfac(isbsa, isbdata->memarena, ps->obi, ps->facenr, ps->shadfac, count_mask(ps->mask));
2313                                                         ps= ps->next;
2314                                                 }
2315                                         }
2316                                 }
2317                         }
2318                 }
2319         }
2320         else {
2321                 if(isbdata->shadfacs) {
2322                         MEM_freeN(isbdata->shadfacs);
2323                         isbdata->shadfacs= NULL;
2324                 }
2325         }
2326
2327         /* free BSP */
2328         BLI_memarena_free(memarena);
2329         
2330         /* free samples */
2331         for(x=0; x<(R.osa?R.osa:1); x++)
2332                 MEM_freeN(samplebuf[x]);
2333         
2334         if(bsp_err) printf("error in filling bsp\n");
2335 }
2336
2337 /* add sample to buffer, isbsa is the root sample in a buffer */
2338 static ISBSampleA *isb_alloc_sample_transp(ISBSampleA **isbsa, MemArena *mem)
2339 {
2340         ISBSampleA *new;
2341         
2342         new= BLI_memarena_alloc(mem, sizeof(ISBSampleA));
2343         if(*isbsa)
2344                 new->next= (*isbsa);
2345         else
2346                 new->next= NULL;
2347         
2348         *isbsa= new;
2349         return new;
2350 }
2351
2352 /* adding samples in BSP, transparent case */
2353 static int isb_add_samples_transp(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSampleA ***samplebuf)
2354 {
2355         int xi, yi, *xcos, *ycos;
2356         int sample, bsp_err= 0;
2357         
2358         /* bsp split doesn't like to handle regular sequenes */
2359         xcos= MEM_mallocN( pa->rectx*sizeof(int), "xcos");
2360         ycos= MEM_mallocN( pa->recty*sizeof(int), "ycos");
2361         for(xi=0; xi<pa->rectx; xi++)
2362                 xcos[xi]= xi;
2363         for(yi=0; yi<pa->recty; yi++)
2364                 ycos[yi]= yi;
2365         BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
2366         BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
2367         
2368         for(sample=0; sample<(R.osa?R.osa:1); sample++) {
2369                 ISBSampleA **samp= samplebuf[sample], *samp1;
2370                 
2371                 for(yi=0; yi<pa->recty; yi++) {
2372                         int y= ycos[yi];
2373                         for(xi=0; xi<pa->rectx; xi++) {
2374                                 int x= xcos[xi];
2375                                 
2376                                 samp1= *(samp + y*pa->rectx + x);
2377                                 while(samp1) {
2378                                         bsp_err |= isb_bsp_insert(root, memarena, (ISBSample *)samp1);
2379                                         samp1= samp1->next;
2380                                 }
2381                         }
2382                         if(bsp_err) break;
2383                 }
2384         }       
2385         
2386         MEM_freeN(xcos);
2387         MEM_freeN(ycos);
2388         
2389         return bsp_err;
2390 }
2391
2392
2393 /* Ztransp version */
2394 /* lar->shb, pa->rectz and pa->rectp should exist */
2395 static void isb_make_buffer_transp(RenderPart *pa, APixstr *apixbuf, LampRen *lar)
2396 {
2397         ShadBuf *shb= lar->shb;
2398         ISBData *isbdata;
2399         ISBSampleA *samp, **samplebuf[16];      /* MAX_OSA */
2400         ISBBranch root;
2401         MemArena *memarena;
2402         APixstr *ap;
2403         int x, y, sindex, sample, bsp_err=0;
2404         
2405         /* storage for shadow, per thread */
2406         isbdata= shb->isb_result[pa->thread];
2407         
2408         /* to map the shi->xs and ys coordinate */
2409         isbdata->minx= pa->disprect.xmin;
2410         isbdata->miny= pa->disprect.ymin;
2411         isbdata->rectx= pa->rectx;
2412         isbdata->recty= pa->recty;
2413         
2414         /* branches are added using memarena (32k branches) */
2415         memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch), "isb arena");
2416         BLI_memarena_use_calloc(memarena);
2417         
2418         /* samplebuf is in camera view space (pixels) */
2419         for(sample=0; sample<(R.osa?R.osa:1); sample++)
2420                 samplebuf[sample]= MEM_callocN(sizeof(void *)*pa->rectx*pa->recty, "isb alpha samplebuf");
2421         
2422         /* setup bsp root */
2423         memset(&root, 0, sizeof(ISBBranch));
2424         root.box.xmin= (float)shb->size;
2425         root.box.ymin= (float)shb->size;
2426
2427         /* create the sample buffers */
2428         for(ap= apixbuf, sindex=0, y=0; y<pa->recty; y++) {
2429                 for(x=0; x<pa->rectx; x++, sindex++, ap++) {
2430                         
2431                         if(ap->p[0]) {
2432                                 APixstr *apn;
2433                                 float xs= (float)(x + pa->disprect.xmin);
2434                                 float ys= (float)(y + pa->disprect.ymin);
2435                                 
2436                                 for(apn=ap; apn; apn= apn->next) {
2437                                         int a;
2438                                         for(a=0; a<4; a++) {
2439                                                 if(apn->p[a]) {
2440                                                         ObjectInstanceRen *obi= &R.objectinstance[apn->obi[a]];
2441                                                         ObjectRen *obr= obi->obr;
2442                                                         VlakRen *vlr= RE_findOrAddVlak(obr, (apn->p[a]-1) & RE_QUAD_MASK);
2443                                                         float zco[3];
2444                                                         
2445                                                         /* here we store shadfac, easier to create the end storage buffer. needs zero'ed, multiple shadowbufs use it */
2446                                                         apn->shadfac[a]= 0;
2447                                                         
2448                                                         if(R.osa) {
2449                                                                 for(sample=0; sample<R.osa; sample++) {
2450                                                                         int mask= (1<<sample);
2451                                                                         
2452                                                                         if(apn->mask[a] & mask) {
2453                                                                                 
2454                                                                                 /* convert image plane pixel location to lamp buffer space */
2455                                                                                 if(viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], zco)) {
2456                                                                                         samp= isb_alloc_sample_transp(samplebuf[sample] + sindex, memarena);
2457                                                                                         samp->obi= apn->obi[a];
2458                                                                                         samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
2459                                                                                         samp->shadfac= &apn->shadfac[a];
2460                                                                                         
2461                                                                                         VECCOPY(samp->zco, zco);
2462                                                                                         bound_rectf((rctf *)&root.box, samp->zco);
2463                                                                                 }
2464                                                                         }
2465                                                                 }
2466                                                         }
2467                                                         else {
2468                                                                 
2469                                                                 /* convert image plane pixel location to lamp buffer space */
2470                                                                 if(viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, zco)) {
2471                                                                         
2472                                                                         samp= isb_alloc_sample_transp(samplebuf[0] + sindex, memarena);
2473                                                                         samp->obi= apn->obi[a];
2474                                                                         samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
2475                                                                         samp->shadfac= &apn->shadfac[a];
2476                                                                         
2477                                                                         VECCOPY(samp->zco, zco);
2478                                                                         bound_rectf((rctf *)&root.box, samp->zco);
2479                                                                 }
2480                                                         }
2481                                                 }
2482                                         }
2483                                 }
2484                         }
2485                 }
2486         }
2487         
2488         /* simple method to see if we have samples */
2489         if(root.box.xmin != (float)shb->size) {
2490                 /* now create a regular split, root.box has the initial bounding box of all pixels */
2491                 /* split bsp 8 levels deep, in regular grid (16 x 16) */
2492                 isb_bsp_split_init(&root, memarena, 8);
2493                 
2494                 /* insert all samples in BSP now */
2495                 bsp_err= isb_add_samples_transp(pa, &root, memarena, samplebuf);
2496                 
2497                 if(bsp_err==0) {
2498                         ISBShadfacA **isbsa;
2499                         
2500                         /* go over all faces and fill in shadow values */
2501                         isb_bsp_fillfaces(&R, lar, &root);      /* shb->persmat should have been calculated */
2502                         
2503                         /* copy shadow samples to persistant buffer, reduce memory overhead */
2504                         isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
2505                         
2506                         isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA), "isb arena");
2507                         
2508                         for(ap= apixbuf, x=pa->rectx*pa->recty; x>0; x--, ap++, isbsa++) {
2509                                         
2510                                 if(ap->p[0]) {
2511                                         APixstr *apn;
2512                                         for(apn=ap; apn; apn= apn->next) {
2513                                                 int a;
2514                                                 for(a=0; a<4; a++) {
2515                                                         if(apn->p[a] && apn->shadfac[a]) {
2516                                                                 if(R.osa)
2517                                                                         isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], count_mask(apn->mask[a]));
2518                                                                 else
2519                                                                         isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], 0);
2520                                                         }
2521                                                 }
2522                                         }
2523                                 }
2524                         }
2525                 }
2526         }
2527
2528         /* free BSP */
2529         BLI_memarena_free(memarena);
2530
2531         /* free samples */
2532         for(x=0; x<(R.osa?R.osa:1); x++)
2533                 MEM_freeN(samplebuf[x]);
2534
2535         if(bsp_err) printf("error in filling bsp\n");
2536 }
2537
2538
2539
2540 /* exported */
2541
2542 /* returns amount of light (1.0 = no shadow) */
2543 /* note, shadepixel() rounds the coordinate, not the real sample info */
2544 float ISB_getshadow(ShadeInput *shi, ShadBuf *shb)
2545 {
2546         /* if raytracing, we can't accept irregular shadow */
2547         if(shi->depth==0) {
2548                 ISBData *isbdata= shb->isb_result[shi->thread];
2549                 
2550                 if(isbdata) {
2551                         if(isbdata->shadfacs || isbdata->shadfaca) {
2552                                 int x= shi->xs - isbdata->minx;
2553                                 
2554                                 if(x >= 0 && x < isbdata->rectx) {
2555                                         int y= shi->ys - isbdata->miny;
2556                         
2557                                         if(y >= 0 && y < isbdata->recty) {
2558                                                 if(isbdata->shadfacs) {
2559                                                         short *sp= isbdata->shadfacs + y*isbdata->rectx + x;
2560                                                         return *sp>=4096?0.0f:1.0f - ((float)*sp)/4096.0f;
2561                                                 }
2562                                                 else {
2563                                                         int sindex= y*isbdata->rectx + x;
2564                                                         int obi= shi->obi - R.objectinstance;
2565                                                         ISBShadfacA *isbsa= *(isbdata->shadfaca + sindex);
2566                                                         
2567                                                         while(isbsa) {
2568                                                                 if(isbsa->facenr==shi->facenr+1 && isbsa->obi==obi)
2569                                                                         return isbsa->shadfac>=1.0f?0.0f:1.0f - isbsa->shadfac;
2570                                                                 isbsa= isbsa->next;
2571                                                         }
2572                                                 }
2573                                         }
2574                                 }
2575                         }
2576                 }
2577         }
2578         return 1.0f;
2579 }
2580
2581 /* part is supposed to be solid zbuffered (apixbuf==NULL) or transparent zbuffered */
2582 void ISB_create(RenderPart *pa, APixstr *apixbuf)
2583 {
2584         GroupObject *go;
2585         
2586         /* go over all lamps, and make the irregular buffers */
2587         for(go=R.lights.first; go; go= go->next) {
2588                 LampRen *lar= go->lampren;
2589                 
2590                 if(lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
2591                         
2592                         /* create storage for shadow, per thread */
2593                         lar->shb->isb_result[pa->thread]= MEM_callocN(sizeof(ISBData), "isb data");
2594                         
2595                         if(apixbuf)
2596                                 isb_make_buffer_transp(pa, apixbuf, lar);
2597                         else
2598                                 isb_make_buffer(pa, lar);
2599                 }
2600         }
2601 }
2602
2603
2604 /* end of part rendering, free stored shadow data for this thread from all lamps */
2605 void ISB_free(RenderPart *pa)
2606 {
2607         GroupObject *go;
2608         
2609         /* go over all lamps, and free the irregular buffers */
2610         for(go=R.lights.first; go; go= go->next) {
2611                 LampRen *lar= go->lampren;
2612                 
2613                 if(lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
2614                         ISBData *isbdata= lar->shb->isb_result[pa->thread];
2615
2616                         if(isbdata) {
2617                                 if(isbdata->shadfacs)
2618                                         MEM_freeN(isbdata->shadfacs);
2619                                 if(isbdata->shadfaca)
2620                                         MEM_freeN(isbdata->shadfaca);
2621                                 
2622                                 if(isbdata->memarena)
2623                                         BLI_memarena_free(isbdata->memarena);
2624                                 
2625                                 MEM_freeN(isbdata);
2626                                 lar->shb->isb_result[pa->thread]= NULL;
2627                         }
2628                 }
2629         }
2630 }
2631