converted more mixed tab/space indentations to tabs. only whitespace changes.
[blender.git] / source / blender / render / intern / source / shadbuf.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
19  * All rights reserved.
20  *
21  * Contributor(s): 2004-2006, Blender Foundation
22  *
23  * ***** END GPL LICENSE BLOCK *****
24  */
25
26 /** \file blender/render/intern/source/shadbuf.c
27  *  \ingroup render
28  */
29
30
31 #include <math.h>
32 #include <string.h>
33
34
35 #include "MEM_guardedalloc.h"
36
37 #include "DNA_group_types.h"
38 #include "DNA_lamp_types.h"
39 #include "DNA_material_types.h"
40
41 #include "BKE_global.h"
42 #include "BKE_scene.h"
43
44
45 #include "BLI_math.h"
46 #include "BLI_blenlib.h"
47 #include "BLI_jitter.h"
48 #include "BLI_memarena.h"
49 #include "BLI_rand.h"
50 #include "BLI_utildefines.h"
51
52 #include "PIL_time.h"
53
54 #include "renderpipeline.h"
55 #include "render_types.h"
56 #include "renderdatabase.h"
57 #include "rendercore.h"
58 #include "shadbuf.h"
59 #include "shading.h"
60 #include "zbuf.h"
61
62 /* XXX, could be better implemented... this is for endian issues
63 */
64 #if defined(__sgi) || defined(__sparc) || defined(__sparc__) || defined (__PPC__) || defined (__ppc__) || defined (__hppa__) || defined (__BIG_ENDIAN__)
65 #define RCOMP   3
66 #define GCOMP   2
67 #define BCOMP   1
68 #define ACOMP   0
69 #else
70 #define RCOMP   0
71 #define GCOMP   1
72 #define BCOMP   2
73 #define ACOMP   3
74 #endif
75
76 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
77 /* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
78 /* only to be used here in this file, it's for speed */
79 extern struct Render R;
80 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
81
82 /* ------------------------------------------------------------------------- */
83
84 /* initshadowbuf() in convertBlenderScene.c */
85
86 /* ------------------------------------------------------------------------- */
87
88 static void copy_to_ztile(int *rectz, int size, int x1, int y1, int tile, char *r1)
89 {
90         int len4, *rz;  
91         int x2, y2;
92         
93         x2= x1+tile;
94         y2= y1+tile;
95         if(x2>=size) x2= size-1;
96         if(y2>=size) y2= size-1;
97
98         if(x1>=x2 || y1>=y2) return;
99
100         len4= 4*(x2- x1);
101         rz= rectz + size*y1 + x1;
102         for(; y1<y2; y1++) {
103                 memcpy(r1, rz, len4);
104                 rz+= size;
105                 r1+= len4;
106         }
107 }
108
109 #if 0
110 static int sizeoflampbuf(ShadBuf *shb)
111 {
112         int num,count=0;
113         char *cp;
114         
115         cp= shb->cbuf;
116         num= (shb->size*shb->size)/256;
117
118         while(num--) count+= *(cp++);
119         
120         return 256*count;
121 }
122 #endif
123
124 /* not threadsafe... */
125 static float *give_jitter_tab(int samp)
126 {
127         /* these are all possible jitter tables, takes up some
128          * 12k, not really bad!
129          * For soft shadows, it saves memory and render time
130          */
131         static int tab[17]={1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256};
132         static float jit[1496][2];
133         static char ctab[17]= {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
134         int a, offset=0;
135         
136         if(samp<2) samp= 2;
137         else if(samp>16) samp= 16;
138
139         for(a=0; a<samp-1; a++) offset+= tab[a];
140
141         if(ctab[samp]==0) {
142                 ctab[samp]= 1;
143                 BLI_initjit(jit[offset], samp*samp);
144         }
145                 
146         return jit[offset];
147         
148 }
149
150 static void make_jitter_weight_tab(Render *re, ShadBuf *shb, short filtertype) 
151 {
152         float *jit, totw= 0.0f;
153         int samp= get_render_shadow_samples(&re->r, shb->samp);
154         int a, tot=samp*samp;
155         
156         shb->weight= MEM_mallocN(sizeof(float)*tot, "weight tab lamp");
157         
158         for(jit= shb->jit, a=0; a<tot; a++, jit+=2) {
159                 if(filtertype==LA_SHADBUF_TENT) 
160                         shb->weight[a]= 0.71f - sqrt(jit[0]*jit[0] + jit[1]*jit[1]);
161                 else if(filtertype==LA_SHADBUF_GAUSS) 
162                         shb->weight[a]= RE_filter_value(R_FILTER_GAUSS, 1.8f*sqrt(jit[0]*jit[0] + jit[1]*jit[1]));
163                 else
164                         shb->weight[a]= 1.0f;
165                 
166                 totw+= shb->weight[a];
167         }
168         
169         totw= 1.0f/totw;
170         for(a=0; a<tot; a++) {
171                 shb->weight[a]*= totw;
172         }
173 }
174
175 static int verg_deepsample(const void *poin1, const void *poin2)
176 {
177         const DeepSample *ds1= (const DeepSample*)poin1;
178         const DeepSample *ds2= (const DeepSample*)poin2;
179
180         if(ds1->z < ds2->z) return -1;
181         else if(ds1->z == ds2->z) return 0;
182         else return 1;
183 }
184
185 static int compress_deepsamples(DeepSample *dsample, int tot, float epsilon)
186 {
187         /* uses doubles to avoid overflows and other numerical issues,
188            could be improved */
189         DeepSample *ds, *newds;
190         float v;
191         double slope, slopemin, slopemax, min, max, div, newmin, newmax;
192         int a, first, z, newtot= 0;
193
194         /*if(print) {
195                 for(a=0, ds=dsample; a<tot; a++, ds++)
196                         printf("%lf,%f ", ds->z/(double)0x7FFFFFFF, ds->v);
197                 printf("\n");
198         }*/
199
200         /* read from and write into same array */
201         ds= dsample;
202         newds= dsample;
203         a= 0;
204
205         /* as long as we are not at the end of the array */
206         for(a++, ds++; a<tot; a++, ds++) {
207                 slopemin= 0.0f;
208                 slopemax= 0.0f;
209                 first= 1;
210
211                 for(; a<tot; a++, ds++) {
212                         //dz= ds->z - newds->z;
213                         if(ds->z == newds->z) {
214                                 /* still in same z position, simply check
215                                    visibility difference against epsilon */
216                                 if(!(fabs(newds->v - ds->v) <= epsilon)) {
217                                         break;
218                                 }
219                         }
220                         else {
221                                 /* compute slopes */
222                                 div= (double)0x7FFFFFFF/((double)ds->z - (double)newds->z);
223                                 min= ((ds->v - epsilon) - newds->v)*div;
224                                 max= ((ds->v + epsilon) - newds->v)*div;
225
226                                 /* adapt existing slopes */
227                                 if(first) {
228                                         newmin= min;
229                                         newmax= max;
230                                         first= 0;
231                                 }
232                                 else {
233                                         newmin= MAX2(slopemin, min);
234                                         newmax= MIN2(slopemax, max);
235
236                                         /* verify if there is still space between the slopes */
237                                         if(newmin > newmax) {
238                                                 ds--;
239                                                 a--;
240                                                 break;
241                                         }
242                                 }
243
244                                 slopemin= newmin;
245                                 slopemax= newmax;
246                         }
247                 }
248
249                 if(a == tot) {
250                         ds--;
251                         a--;
252                 }
253
254                 /* always previous z */
255                 z= ds->z;
256
257                 if(first || a==tot-1) {
258                         /* if slopes were not initialized, use last visibility */
259                         v= ds->v;
260                 }
261                 else {
262                         /* compute visibility at center between slopes at z */
263                         slope= (slopemin+slopemax)*0.5;
264                         v= newds->v + slope*((z - newds->z)/(double)0x7FFFFFFF);
265                 }
266
267                 newds++;
268                 newtot++;
269
270                 newds->z= z;
271                 newds->v= v;
272         }
273
274         if(newtot == 0 || (newds->v != (newds-1)->v))
275                 newtot++;
276
277         /*if(print) {
278                 for(a=0, ds=dsample; a<newtot; a++, ds++)
279                         printf("%lf,%f ", ds->z/(double)0x7FFFFFFF, ds->v);
280                 printf("\n");
281         }*/
282
283         return newtot;
284 }
285
286 static float deep_alpha(Render *re, int obinr, int facenr, int strand)
287 {
288         ObjectInstanceRen *obi= &re->objectinstance[obinr];
289         Material *ma;
290
291         if(strand) {
292                 StrandRen *strand= RE_findOrAddStrand(obi->obr, facenr-1);
293                 ma= strand->buffer->ma;
294         }
295         else {
296                 VlakRen *vlr= RE_findOrAddVlak(obi->obr, (facenr-1) & RE_QUAD_MASK);
297                 ma= vlr->mat;
298         }
299
300         return ma->shad_alpha;
301 }
302
303 static void compress_deepshadowbuf(Render *re, ShadBuf *shb, APixstr *apixbuf, APixstrand *apixbufstrand)
304 {
305         ShadSampleBuf *shsample;
306         DeepSample *ds[RE_MAX_OSA], *sampleds[RE_MAX_OSA], *dsb, *newbuf;
307         APixstr *ap, *apn;
308         APixstrand *aps, *apns;
309         float visibility;
310
311         const int totbuf= shb->totbuf;
312         const float totbuf_f= (float)shb->totbuf;
313         const float totbuf_f_inv= 1.0f/totbuf_f;
314         const int size= shb->size;
315
316         int a, b, c, tot, minz, found, prevtot, newtot;
317         int sampletot[RE_MAX_OSA], totsample = 0, totsamplec = 0;
318         
319         shsample= MEM_callocN( sizeof(ShadSampleBuf), "shad sample buf");
320         BLI_addtail(&shb->buffers, shsample);
321
322         shsample->totbuf= MEM_callocN(sizeof(int)*size*size, "deeptotbuf");
323         shsample->deepbuf= MEM_callocN(sizeof(DeepSample*)*size*size, "deepbuf");
324
325         ap= apixbuf;
326         aps= apixbufstrand;
327         for(a=0; a<size*size; a++, ap++, aps++) {
328                 /* count number of samples */
329                 for(c=0; c<totbuf; c++)
330                         sampletot[c]= 0;
331
332                 tot= 0;
333                 for(apn=ap; apn; apn=apn->next)
334                         for(b=0; b<4; b++)
335                                 if(apn->p[b])
336                                         for(c=0; c<totbuf; c++)
337                                                 if(apn->mask[b] & (1<<c))
338                                                         sampletot[c]++;
339
340                 if(apixbufstrand) {
341                         for(apns=aps; apns; apns=apns->next)
342                                 for(b=0; b<4; b++)
343                                         if(apns->p[b])
344                                                 for(c=0; c<totbuf; c++)
345                                                         if(apns->mask[b] & (1<<c))
346                                                                 sampletot[c]++;
347                 }
348
349                 for(c=0; c<totbuf; c++)
350                         tot += sampletot[c];
351
352                 if(tot == 0) {
353                         shsample->deepbuf[a]= NULL;
354                         shsample->totbuf[a]= 0;
355                         continue;
356                 }
357
358                 /* fill samples */
359                 ds[0]= sampleds[0]= MEM_callocN(sizeof(DeepSample)*tot*2, "deepsample");
360                 for(c=1; c<totbuf; c++)
361                         ds[c]= sampleds[c]= sampleds[c-1] + sampletot[c-1]*2;
362
363                 for(apn=ap; apn; apn=apn->next) {
364                         for(b=0; b<4; b++) {
365                                 if(apn->p[b]) {
366                                         for(c=0; c<totbuf; c++) {
367                                                 if(apn->mask[b] & (1<<c)) {
368                                                         /* two entries to create step profile */
369                                                         ds[c]->z= apn->z[b];
370                                                         ds[c]->v= 1.0f; /* not used */
371                                                         ds[c]++;
372                                                         ds[c]->z= apn->z[b];
373                                                         ds[c]->v= deep_alpha(re, apn->obi[b], apn->p[b], 0);
374                                                         ds[c]++;
375                                                 }
376                                         }
377                                 }
378                         }
379                 }
380
381                 if(apixbufstrand) {
382                         for(apns=aps; apns; apns=apns->next) {
383                                 for(b=0; b<4; b++) {
384                                         if(apns->p[b]) {
385                                                 for(c=0; c<totbuf; c++) {
386                                                         if(apns->mask[b] & (1<<c)) {
387                                                                 /* two entries to create step profile */
388                                                                 ds[c]->z= apns->z[b];
389                                                                 ds[c]->v= 1.0f; /* not used */
390                                                                 ds[c]++;
391                                                                 ds[c]->z= apns->z[b];
392                                                                 ds[c]->v= deep_alpha(re, apns->obi[b], apns->p[b], 1);
393                                                                 ds[c]++;
394                                                         }
395                                                 }
396                                         }
397                                 }
398                         }
399                 }
400
401                 for(c=0; c<totbuf; c++) {
402                         /* sort by increasing z */
403                         qsort(sampleds[c], sampletot[c], sizeof(DeepSample)*2, verg_deepsample);
404
405                         /* sum visibility, replacing alpha values */
406                         visibility= 1.0f;
407                         ds[c]= sampleds[c];
408
409                         for(b=0; b<sampletot[c]; b++) {
410                                 /* two entries creating step profile */
411                                 ds[c]->v= visibility;
412                                 ds[c]++;
413
414                                 visibility *= 1.0f-ds[c]->v;
415                                 ds[c]->v= visibility;
416                                 ds[c]++;
417                         }
418
419                         /* halfway trick, probably won't work well for volumes? */
420                         ds[c]= sampleds[c];
421                         for(b=0; b<sampletot[c]; b++) {
422                                 if(b+1 < sampletot[c]) {
423                                         ds[c]->z= (ds[c]->z>>1) + ((ds[c]+2)->z>>1);
424                                         ds[c]++;
425                                         ds[c]->z= (ds[c]->z>>1) + ((ds[c]+2)->z>>1);
426                                         ds[c]++;
427                                 }
428                                 else {
429                                         ds[c]->z= (ds[c]->z>>1) + (0x7FFFFFFF>>1);
430                                         ds[c]++;
431                                         ds[c]->z= (ds[c]->z>>1) + (0x7FFFFFFF>>1);
432                                         ds[c]++;
433                                 }
434                         }
435
436                         /* init for merge loop */
437                         ds[c]= sampleds[c];
438                         sampletot[c] *= 2;
439                 }
440
441                 shsample->deepbuf[a]= MEM_callocN(sizeof(DeepSample)*tot*2, "deepsample");
442                 shsample->totbuf[a]= 0;
443
444                 /* merge buffers */
445                 dsb= shsample->deepbuf[a];
446                 while(1) {
447                         minz= 0;
448                         found= 0;
449
450                         for(c=0; c<totbuf; c++) {
451                                 if(sampletot[c] && (!found || ds[c]->z < minz)) {
452                                         minz= ds[c]->z;
453                                         found= 1;
454                                 }
455                         }
456
457                         if(!found)
458                                 break;
459
460                         dsb->z= minz;
461                         dsb->v= 0.0f;
462
463                         visibility= 0.0f;
464                         for(c=0; c<totbuf; c++) {
465                                 if(sampletot[c] && ds[c]->z == minz) {
466                                         ds[c]++;
467                                         sampletot[c]--;
468                                 }
469
470                                 if(sampleds[c] == ds[c])
471                                         visibility += totbuf_f_inv;
472                                 else
473                                         visibility += (ds[c]-1)->v / totbuf_f;
474                         }
475
476                         dsb->v= visibility;
477                         dsb++;
478                         shsample->totbuf[a]++;
479                 }
480
481                 prevtot= shsample->totbuf[a];
482                 totsample += prevtot;
483
484                 newtot= compress_deepsamples(shsample->deepbuf[a], prevtot, shb->compressthresh);
485                 shsample->totbuf[a]= newtot;
486                 totsamplec += newtot;
487
488                 if(newtot < prevtot) {
489                         newbuf= MEM_mallocN(sizeof(DeepSample)*newtot, "cdeepsample");
490                         memcpy(newbuf, shsample->deepbuf[a], sizeof(DeepSample)*newtot);
491                         MEM_freeN(shsample->deepbuf[a]);
492                         shsample->deepbuf[a]= newbuf;
493                 }
494
495                 MEM_freeN(sampleds[0]);
496         }
497
498         //printf("%d -> %d, ratio %f\n", totsample, totsamplec, (float)totsamplec/(float)totsample);
499 }
500
501 /* create Z tiles (for compression): this system is 24 bits!!! */
502 static void compress_shadowbuf(ShadBuf *shb, int *rectz, int square)
503 {
504         ShadSampleBuf *shsample;
505         float dist;
506         uintptr_t *ztile;
507         int *rz, *rz1, verg, verg1, size= shb->size;
508         int a, x, y, minx, miny, byt1, byt2;
509         char *rc, *rcline, *ctile, *zt;
510         
511         shsample= MEM_callocN( sizeof(ShadSampleBuf), "shad sample buf");
512         BLI_addtail(&shb->buffers, shsample);
513         
514         shsample->zbuf= MEM_mallocN( sizeof(uintptr_t)*(size*size)/256, "initshadbuf2");
515         shsample->cbuf= MEM_callocN( (size*size)/256, "initshadbuf3");
516         
517         ztile= (uintptr_t *)shsample->zbuf;
518         ctile= shsample->cbuf;
519         
520         /* help buffer */
521         rcline= MEM_mallocN(256*4+sizeof(int), "makeshadbuf2");
522         
523         for(y=0; y<size; y+=16) {
524                 if(y< size/2) miny= y+15-size/2;
525                 else miny= y-size/2;    
526                 
527                 for(x=0; x<size; x+=16) {
528                         
529                         /* is tile within spotbundle? */
530                         a= size/2;
531                         if(x< a) minx= x+15-a;
532                         else minx= x-a; 
533                         
534                         dist= sqrt( (float)(minx*minx+miny*miny) );
535                         
536                         if(square==0 && dist>(float)(a+12)) {   /* 12, tested with a onlyshadow lamp */
537                                 a= 256; verg= 0; /* 0x80000000; */ /* 0x7FFFFFFF; */
538                                 rz1= (&verg)+1;
539                         } 
540                         else {
541                                 copy_to_ztile(rectz, size, x, y, 16, rcline);
542                                 rz1= (int *)rcline;
543                                 
544                                 verg= (*rz1 & 0xFFFFFF00);
545                                 
546                                 for(a=0;a<256;a++,rz1++) {
547                                         if( (*rz1 & 0xFFFFFF00) !=verg) break;
548                                 }
549                         }
550                         if(a==256) { /* complete empty tile */
551                                 *ctile= 0;
552                                 *ztile= *(rz1-1);
553                         }
554                         else {
555                                 
556                                 /* ACOMP etc. are defined to work L/B endian */
557                                 
558                                 rc= rcline;
559                                 rz1= (int *)rcline;
560                                 verg=  rc[ACOMP];
561                                 verg1= rc[BCOMP];
562                                 rc+= 4;
563                                 byt1= 1; byt2= 1;
564                                 for(a=1;a<256;a++,rc+=4) {
565                                         byt1 &= (verg==rc[ACOMP]);
566                                         byt2 &= (verg1==rc[BCOMP]);
567                                         
568                                         if(byt1==0) break;
569                                 }
570                                 if(byt1 && byt2) {      /* only store byte */
571                                         *ctile= 1;
572                                         *ztile= (uintptr_t)MEM_mallocN(256+4, "tile1");
573                                         rz= (int *)*ztile;
574                                         *rz= *rz1;
575                                         
576                                         zt= (char *)(rz+1);
577                                         rc= rcline;
578                                         for(a=0; a<256; a++, zt++, rc+=4) *zt= rc[GCOMP];       
579                                 }
580                                 else if(byt1) {         /* only store short */
581                                         *ctile= 2;
582                                         *ztile= (uintptr_t)MEM_mallocN(2*256+4,"Tile2");
583                                         rz= (int *)*ztile;
584                                         *rz= *rz1;
585                                         
586                                         zt= (char *)(rz+1);
587                                         rc= rcline;
588                                         for(a=0; a<256; a++, zt+=2, rc+=4) {
589                                                 zt[0]= rc[BCOMP];
590                                                 zt[1]= rc[GCOMP];
591                                         }
592                                 }
593                                 else {                  /* store triple */
594                                         *ctile= 3;
595                                         *ztile= (uintptr_t)MEM_mallocN(3*256,"Tile3");
596
597                                         zt= (char *)*ztile;
598                                         rc= rcline;
599                                         for(a=0; a<256; a++, zt+=3, rc+=4) {
600                                                 zt[0]= rc[ACOMP];
601                                                 zt[1]= rc[BCOMP];
602                                                 zt[2]= rc[GCOMP];
603                                         }
604                                 }
605                         }
606                         ztile++;
607                         ctile++;
608                 }
609         }
610
611         MEM_freeN(rcline);
612 }
613
614 /* sets start/end clipping. lar->shb should be initialized */
615 static void shadowbuf_autoclip(Render *re, LampRen *lar)
616 {
617         ObjectInstanceRen *obi;
618         ObjectRen *obr;
619         VlakRen *vlr= NULL;
620         VertRen *ver= NULL;
621         Material *ma= NULL;
622         float minz, maxz, vec[3], viewmat[4][4], obviewmat[4][4];
623         unsigned int lay = -1;
624         int i, a, maxtotvert, ok= 1;
625         char *clipflag;
626         
627         minz= 1.0e30f; maxz= -1.0e30f;
628         copy_m4_m4(viewmat, lar->shb->viewmat);
629         
630         if(lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay;
631
632         maxtotvert= 0;
633         for(obr=re->objecttable.first; obr; obr=obr->next)
634                 maxtotvert= MAX2(obr->totvert, maxtotvert);
635
636         clipflag= MEM_callocN(sizeof(char)*maxtotvert, "autoclipflag");
637
638         /* set clip in vertices when face visible */
639         for(i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
640                 obr= obi->obr;
641
642                 if(obi->flag & R_TRANSFORMED)
643                         mul_m4_m4m4(obviewmat, obi->mat, viewmat);
644                 else
645                         copy_m4_m4(obviewmat, viewmat);
646
647                 memset(clipflag, 0, sizeof(char)*obr->totvert);
648
649                 /* clear clip, is being set if face is visible (clip is calculated for real later) */
650                 for(a=0; a<obr->totvlak; a++) {
651                         if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
652                         else vlr++;
653                         
654                         /* note; these conditions are copied from zbuffer_shadow() */
655                         if(vlr->mat!= ma) {
656                                 ma= vlr->mat;
657                                 ok= 1;
658                                 if((ma->mode & MA_SHADBUF)==0) ok= 0;
659                         }
660                         
661                         if(ok && (obi->lay & lay)) {
662                                 clipflag[vlr->v1->index]= 1;
663                                 clipflag[vlr->v2->index]= 1;
664                                 clipflag[vlr->v3->index]= 1;
665                                 if(vlr->v4) clipflag[vlr->v4->index]= 1;
666                         }                               
667                 }               
668                 
669                 /* calculate min and max */
670                 for(a=0; a< obr->totvert;a++) {
671                         if((a & 255)==0) ver= RE_findOrAddVert(obr, a);
672                         else ver++;
673                         
674                         if(clipflag[a]) {
675                                 VECCOPY(vec, ver->co);
676                                 mul_m4_v3(obviewmat, vec);
677                                 /* Z on visible side of lamp space */
678                                 if(vec[2] < 0.0f) {
679                                         float inpr, z= -vec[2];
680                                         
681                                         /* since vec is rotated in lampspace, this is how to get the cosine of angle */
682                                         /* precision is set 20% larger */
683                                         vec[2]*= 1.2f;
684                                         normalize_v3(vec);
685                                         inpr= - vec[2];
686
687                                         if(inpr>=lar->spotsi) {
688                                                 if(z<minz) minz= z;
689                                                 if(z>maxz) maxz= z;
690                                         }
691                                 }
692                         }
693                 }
694         }
695
696         MEM_freeN(clipflag);
697         
698         /* set clipping min and max */
699         if(minz < maxz) {
700                 float delta= (maxz - minz);     /* threshold to prevent precision issues */
701                 
702                 //printf("minz %f maxz %f delta %f\n", minz, maxz, delta);
703                 if(lar->bufflag & LA_SHADBUF_AUTO_START)
704                         lar->shb->d= minz - delta*0.02f;        /* 0.02 is arbitrary... needs more thinking! */
705                 if(lar->bufflag & LA_SHADBUF_AUTO_END)
706                         lar->shb->clipend= maxz + delta*0.1f;
707                 
708                 /* bias was calculated as percentage, we scale it to prevent animation issues */
709                 delta= (lar->clipend-lar->clipsta)/(lar->shb->clipend-lar->shb->d);
710                 //printf("bias delta %f\n", delta);
711                 lar->shb->bias= (int) (delta*(float)lar->shb->bias);
712         }
713 }
714
715 static void makeflatshadowbuf(Render *re, LampRen *lar, float *jitbuf)
716 {
717         ShadBuf *shb= lar->shb;
718         int *rectz, samples;
719
720         /* zbuffering */
721         rectz= MEM_mapallocN(sizeof(int)*shb->size*shb->size, "makeshadbuf");
722         
723         for(samples=0; samples<shb->totbuf; samples++) {
724                 zbuffer_shadow(re, shb->persmat, lar, rectz, shb->size, jitbuf[2*samples], jitbuf[2*samples+1]);
725                 /* create Z tiles (for compression): this system is 24 bits!!! */
726                 compress_shadowbuf(shb, rectz, lar->mode & LA_SQUARE);
727
728                 if(re->test_break(re->tbh))
729                         break;
730         }
731         
732         MEM_freeN(rectz);
733 }
734
735 static void makedeepshadowbuf(Render *re, LampRen *lar, float *jitbuf)
736 {
737         ShadBuf *shb= lar->shb;
738         APixstr *apixbuf;
739         APixstrand *apixbufstrand= NULL;
740         ListBase apsmbase= {NULL, NULL};
741
742         /* zbuffering */
743         apixbuf= MEM_callocN(sizeof(APixstr)*shb->size*shb->size, "APixbuf");
744         if(re->totstrand)
745                 apixbufstrand= MEM_callocN(sizeof(APixstrand)*shb->size*shb->size, "APixbufstrand");
746
747         zbuffer_abuf_shadow(re, lar, shb->persmat, apixbuf, apixbufstrand, &apsmbase, shb->size,
748                 shb->totbuf, (float(*)[2])jitbuf);
749
750         /* create Z tiles (for compression): this system is 24 bits!!! */
751         compress_deepshadowbuf(re, shb, apixbuf, apixbufstrand);
752         
753         MEM_freeN(apixbuf);
754         if(apixbufstrand)
755                 MEM_freeN(apixbufstrand);
756         freepsA(&apsmbase);
757 }
758
759 void makeshadowbuf(Render *re, LampRen *lar)
760 {
761         ShadBuf *shb= lar->shb;
762         float wsize, *jitbuf, twozero[2]= {0.0f, 0.0f}, angle, temp;
763         
764         if(lar->bufflag & (LA_SHADBUF_AUTO_START|LA_SHADBUF_AUTO_END))
765                 shadowbuf_autoclip(re, lar);
766         
767         /* just to enforce identical behaviour of all irregular buffers */
768         if(lar->buftype==LA_SHADBUF_IRREGULAR)
769                 shb->size= 1024;
770         
771         /* matrices and window: in winmat the transformation is being put,
772                 transforming from observer view to lamp view, including lamp window matrix */
773         
774         angle= saacos(lar->spotsi);
775         temp= 0.5f*shb->size*cos(angle)/sin(angle);
776         shb->pixsize= (shb->d)/temp;
777         wsize= shb->pixsize*(shb->size/2.0);
778         
779         perspective_m4( shb->winmat,-wsize, wsize, -wsize, wsize, shb->d, shb->clipend);
780         mul_m4_m4m4(shb->persmat, shb->viewmat, shb->winmat);
781
782         if(ELEM3(lar->buftype, LA_SHADBUF_REGULAR, LA_SHADBUF_HALFWAY, LA_SHADBUF_DEEP)) {
783                 shb->totbuf= lar->buffers;
784
785                 /* jitter, weights - not threadsafe! */
786                 BLI_lock_thread(LOCK_CUSTOM1);
787                 shb->jit= give_jitter_tab(get_render_shadow_samples(&re->r, shb->samp));
788                 make_jitter_weight_tab(re, shb, lar->filtertype);
789                 BLI_unlock_thread(LOCK_CUSTOM1);
790                 
791                 if(shb->totbuf==4) jitbuf= give_jitter_tab(2);
792                 else if(shb->totbuf==9) jitbuf= give_jitter_tab(3);
793                 else jitbuf= twozero;
794                 
795                 /* zbuffering */
796                 if(lar->buftype == LA_SHADBUF_DEEP) {
797                         makedeepshadowbuf(re, lar, jitbuf);
798                         shb->totbuf= 1;
799                 }
800                 else
801                         makeflatshadowbuf(re, lar, jitbuf);
802
803                 /* printf("lampbuf %d\n", sizeoflampbuf(shb)); */
804         }
805 }
806
807 static void *do_shadow_thread(void *re_v)
808 {
809         Render *re= (Render*)re_v;
810         LampRen *lar;
811
812         do {
813                 BLI_lock_thread(LOCK_CUSTOM1);
814                 for(lar=re->lampren.first; lar; lar=lar->next) {
815                         if(lar->shb && !lar->thread_assigned) {
816                                 lar->thread_assigned= 1;
817                                 break;
818                         }
819                 }
820                 BLI_unlock_thread(LOCK_CUSTOM1);
821
822                 /* if type is irregular, this only sets the perspective matrix and autoclips */
823                 if(lar) {
824                         makeshadowbuf(re, lar);
825                         BLI_lock_thread(LOCK_CUSTOM1);
826                         lar->thread_ready= 1;
827                         BLI_unlock_thread(LOCK_CUSTOM1);
828                 }
829         } while(lar && !re->test_break(re->tbh));
830
831         return NULL;
832 }
833
834 static volatile int g_break= 0;
835 static int thread_break(void *UNUSED(arg))
836 {
837         return g_break;
838 }
839
840 void threaded_makeshadowbufs(Render *re)
841 {
842         ListBase threads;
843         LampRen *lar;
844         int a, totthread= 0;
845         int (*test_break)(void *);
846
847         /* count number of threads to use */
848         if(G.rendering) {
849                 for(lar=re->lampren.first; lar; lar= lar->next)
850                         if(lar->shb)
851                                 totthread++;
852                 
853                 totthread= MIN2(totthread, re->r.threads);
854         }
855         else
856                 totthread= 1; /* preview render */
857
858         if(totthread <= 1) {
859                 for(lar=re->lampren.first; lar; lar= lar->next) {
860                         if(re->test_break(re->tbh)) break;
861                         if(lar->shb) {
862                                 /* if type is irregular, this only sets the perspective matrix and autoclips */
863                                 makeshadowbuf(re, lar);
864                         }
865                 }
866         }
867         else {
868                 /* swap test break function */
869                 test_break= re->test_break;
870                 re->test_break= thread_break;
871
872                 for(lar=re->lampren.first; lar; lar= lar->next) {
873                         lar->thread_assigned= 0;
874                         lar->thread_ready= 0;
875                 }
876
877                 BLI_init_threads(&threads, do_shadow_thread, totthread);
878                 
879                 for(a=0; a<totthread; a++)
880                         BLI_insert_thread(&threads, re);
881
882                 /* keep rendering as long as there are shadow buffers not ready */
883                 do {
884                         if((g_break=test_break(re->tbh)))
885                                 break;
886
887                         PIL_sleep_ms(50);
888
889                         BLI_lock_thread(LOCK_CUSTOM1);
890                         for(lar=re->lampren.first; lar; lar= lar->next)
891                                 if(lar->shb && !lar->thread_ready)
892                                         break;
893                         BLI_unlock_thread(LOCK_CUSTOM1);
894                 } while(lar);
895         
896                 BLI_end_threads(&threads);
897
898                 /* unset threadsafety */
899                 re->test_break= test_break;
900                 g_break= 0;
901         }
902 }
903
904 void freeshadowbuf(LampRen *lar)
905 {
906         if(lar->shb) {
907                 ShadBuf *shb= lar->shb;
908                 ShadSampleBuf *shsample;
909                 int b, v;
910                 
911                 for(shsample= shb->buffers.first; shsample; shsample= shsample->next) {
912                         if(shsample->deepbuf) {
913                                 v= shb->size*shb->size;
914                                 for(b=0; b<v; b++)
915                                         if(shsample->deepbuf[b])
916                                                 MEM_freeN(shsample->deepbuf[b]);
917                                         
918                                 MEM_freeN(shsample->deepbuf);
919                                 MEM_freeN(shsample->totbuf);
920                         }
921                         else {
922                                 intptr_t *ztile= shsample->zbuf;
923                                 char *ctile= shsample->cbuf;
924                                 
925                                 v= (shb->size*shb->size)/256;
926                                 for(b=0; b<v; b++, ztile++, ctile++)
927                                         if(*ctile) MEM_freeN((void *) *ztile);
928                                 
929                                 MEM_freeN(shsample->zbuf);
930                                 MEM_freeN(shsample->cbuf);
931                         }
932                 }
933                 BLI_freelistN(&shb->buffers);
934                 
935                 if(shb->weight) MEM_freeN(shb->weight);
936                 MEM_freeN(lar->shb);
937                 
938                 lar->shb= NULL;
939         }
940 }
941
942
943 static int firstreadshadbuf(ShadBuf *shb, ShadSampleBuf *shsample, int **rz, int xs, int ys, int nr)
944 {
945         /* return a 1 if fully compressed shadbuf-tile && z==const */
946         int ofs;
947         char *ct;
948
949         if(shsample->deepbuf)
950                 return 0;
951
952         /* always test borders of shadowbuffer */
953         if(xs<0) xs= 0; else if(xs>=shb->size) xs= shb->size-1;
954         if(ys<0) ys= 0; else if(ys>=shb->size) ys= shb->size-1;
955
956         /* calc z */
957         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
958         ct= shsample->cbuf+ofs;
959         if(*ct==0) {
960                 if(nr==0) {
961                         *rz= *( (int **)(shsample->zbuf+ofs) );
962                         return 1;
963                 }
964                 else if(*rz!= *( (int **)(shsample->zbuf+ofs) )) return 0;
965                 
966                 return 1;
967         }
968         
969         return 0;
970 }
971
972 static float readdeepvisibility(DeepSample *dsample, int tot, int z, int bias, float *biast)
973 {
974         DeepSample *ds, *prevds;
975         float t;
976         int a;
977
978         /* tricky stuff here; we use ints which can overflow easily with bias values */
979
980         ds= dsample;
981         for(a=0; a<tot && (z-bias > ds->z); a++, ds++)
982                 ;
983
984         if(a == tot) {
985                 if(biast)
986                         *biast= 0.0f;
987                 return (ds-1)->v; /* completely behind all samples */
988         }
989         
990         /* check if this read needs bias blending */
991         if(biast) {
992                 if(z > ds->z)
993                         *biast= (float)(z - ds->z)/(float)bias;
994                 else
995                         *biast= 0.0f;
996         }
997
998         if(a == 0)
999                 return 1.0f; /* completely in front of all samples */
1000
1001         /* converting to float early here because ds->z - prevds->z can overflow */
1002         prevds= ds-1;
1003         t= ((float)(z-bias) - (float)prevds->z)/((float)ds->z - (float)prevds->z);
1004         return t*ds->v + (1.0f-t)*prevds->v;
1005 }
1006
1007 static float readdeepshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)
1008 {
1009         float v, biasv, biast;
1010         int ofs, tot;
1011
1012         if(zs < - 0x7FFFFE00 + bias)
1013                 return 1.0;     /* extreme close to clipstart */
1014
1015         /* calc z */
1016         ofs= ys*shb->size + xs;
1017         tot= shsample->totbuf[ofs];
1018         if(tot == 0)
1019                 return 1.0f;
1020
1021         v= readdeepvisibility(shsample->deepbuf[ofs], tot, zs, bias, &biast);
1022
1023         if(biast != 0.0f) {
1024                 /* in soft bias area */
1025                 biasv= readdeepvisibility(shsample->deepbuf[ofs], tot, zs, 0, 0);
1026
1027                 biast= biast*biast;
1028                 return (1.0f-biast)*v + biast*biasv;
1029         }
1030
1031         return v;
1032 }
1033
1034 /* return 1.0 : fully in light */
1035 static float readshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)     
1036 {
1037         float temp;
1038         int *rz, ofs;
1039         int zsamp=0;
1040         char *ct, *cz;
1041
1042         /* simpleclip */
1043         /* if(xs<0 || ys<0) return 1.0; */
1044         /* if(xs>=shb->size || ys>=shb->size) return 1.0; */
1045         
1046         /* always test borders of shadowbuffer */
1047         if(xs<0) xs= 0; else if(xs>=shb->size) xs= shb->size-1;
1048         if(ys<0) ys= 0; else if(ys>=shb->size) ys= shb->size-1;
1049
1050         if(shsample->deepbuf)
1051                 return readdeepshadowbuf(shb, shsample, bias, xs, ys, zs);
1052
1053         /* calc z */
1054         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
1055         ct= shsample->cbuf+ofs;
1056         rz= *( (int **)(shsample->zbuf+ofs) );
1057
1058         if(*ct==3) {
1059                 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
1060                 cz= (char *)&zsamp;
1061                 cz[ACOMP]= ct[0];
1062                 cz[BCOMP]= ct[1];
1063                 cz[GCOMP]= ct[2];
1064         }
1065         else if(*ct==2) {
1066                 ct= ((char *)rz);
1067                 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
1068                 zsamp= *rz;
1069         
1070                 cz= (char *)&zsamp;
1071                 cz[BCOMP]= ct[0];
1072                 cz[GCOMP]= ct[1];
1073         }
1074         else if(*ct==1) {
1075                 ct= ((char *)rz);
1076                 ct+= 4+16*(ys & 15)+(xs & 15);
1077                 zsamp= *rz;
1078
1079                 cz= (char *)&zsamp;
1080                 cz[GCOMP]= ct[0];
1081
1082         }
1083         else {
1084                 /* got warning on this for 64 bits.... */
1085                 /* but it's working code! in this case rz is not a pointer but zvalue (ton) */
1086                  zsamp= GET_INT_FROM_POINTER(rz);
1087         }
1088
1089         /* tricky stuff here; we use ints which can overflow easily with bias values */
1090         
1091         if(zsamp > zs) return 1.0;              /* absolute no shadow */
1092         else if(zs < - 0x7FFFFE00 + bias) return 1.0;   /* extreme close to clipstart */
1093         else if(zsamp < zs-bias) return 0.0 ;   /* absolute in shadow */
1094         else {                                  /* soft area */
1095                 
1096                 temp=  ( (float)(zs- zsamp) )/(float)bias;
1097                 return 1.0 - temp*temp;
1098                         
1099         }
1100 }
1101
1102 static void shadowbuf_project_co(float *x, float *y, float *z, ShadBuf *shb, float co[3])
1103 {
1104         float hco[4], size= 0.5f*(float)shb->size;
1105
1106         copy_v3_v3(hco, co);
1107         hco[3]= 1.0f;
1108
1109         mul_m4_v4(shb->persmat, hco);
1110
1111         *x= size*(1.0f+hco[0]/hco[3]);
1112         *y= size*(1.0f+hco[1]/hco[3]);
1113         if(z) *z= (hco[2]/hco[3]);
1114 }
1115
1116 /* the externally called shadow testing (reading) function */
1117 /* return 1.0: no shadow at all */
1118 float testshadowbuf(Render *re, ShadBuf *shb, float *co, float *dxco, float *dyco, float inp, float mat_bias)
1119 {
1120         ShadSampleBuf *shsample;
1121         float fac, dco[3], dx[3], dy[3], shadfac=0.0f;
1122         float xs1, ys1, zs1, *jit, *weight, xres, yres, biasf;
1123         int xs, ys, zs, bias, *rz;
1124         short a, num;
1125         
1126         /* crash preventer */
1127         if(shb->buffers.first==NULL)
1128                 return 1.0f;
1129         
1130         /* when facing away, assume fully in shadow */
1131         if(inp <= 0.0f)
1132                 return 0.0f;
1133
1134         /* project coordinate to pixel space */
1135         shadowbuf_project_co(&xs1, &ys1, &zs1, shb, co);
1136
1137         /* clip z coordinate, z is projected so that (-1.0, 1.0) matches
1138            (clipstart, clipend), so we can do this simple test */
1139         if(zs1>=1.0f)
1140                 return 0.0f;
1141         else if(zs1<= -1.0f)
1142                 return 1.0f;
1143
1144         zs= ((float)0x7FFFFFFF)*zs1;
1145
1146         /* take num*num samples, increase area with fac */
1147         num= get_render_shadow_samples(&re->r, shb->samp);
1148         num= num*num;
1149         fac= shb->soft;
1150         
1151         /* compute z bias */
1152         if(mat_bias!=0.0f) biasf= shb->bias*mat_bias;
1153         else biasf= shb->bias;
1154         /* with inp==1.0, bias is half the size. correction value was 1.1, giving errors 
1155            on cube edges, with one side being almost frontal lighted (ton)  */
1156         bias= (1.5f-inp*inp)*biasf;
1157         
1158         /* in case of no filtering we can do things simpler */
1159         if(num==1) {
1160                 for(shsample= shb->buffers.first; shsample; shsample= shsample->next)
1161                         shadfac += readshadowbuf(shb, shsample, bias, (int)xs1, (int)ys1, zs);
1162                 
1163                 return shadfac/(float)shb->totbuf;
1164         }
1165
1166         /* calculate filter size */
1167         add_v3_v3v3(dco, co, dxco);
1168         shadowbuf_project_co(&dx[0], &dx[1], NULL, shb, dco);
1169         dx[0]= xs1 - dx[0];
1170         dx[1]= ys1 - dx[1];
1171
1172         add_v3_v3v3(dco, co, dyco);
1173         shadowbuf_project_co(&dy[0], &dy[1], NULL, shb, dco);
1174         dy[0]= xs1 - dy[0];
1175         dy[1]= ys1 - dy[1];
1176         
1177         xres= fac*(fabs(dx[0]) + fabs(dy[0]));
1178         yres= fac*(fabs(dx[1]) + fabs(dy[1]));
1179         if(xres<1.0f) xres= 1.0f;
1180         if(yres<1.0f) yres= 1.0f;
1181         
1182         /* make xs1/xs1 corner of sample area */
1183         xs1 -= xres*0.5f;
1184         ys1 -= yres*0.5f;
1185
1186         /* in case we have a constant value in a tile, we can do quicker lookup */
1187         if(xres<16.0f && yres<16.0f) {
1188                 shsample= shb->buffers.first;
1189                 if(firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)ys1, 0)) {
1190                         if(firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)ys1, 1)) {
1191                                 if(firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)(ys1+yres), 1)) {
1192                                         if(firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)(ys1+yres), 1)) {
1193                                                 return readshadowbuf(shb, shsample, bias,(int)xs1, (int)ys1, zs);
1194                                         }
1195                                 }
1196                         }
1197                 }
1198         }
1199         
1200         /* full jittered shadow buffer lookup */
1201         for(shsample= shb->buffers.first; shsample; shsample= shsample->next) {
1202                 jit= shb->jit;
1203                 weight= shb->weight;
1204                 
1205                 for(a=num; a>0; a--, jit+=2, weight++) {
1206                         /* instead of jit i tried random: ugly! */
1207                         /* note: the plus 0.5 gives best sampling results, jit goes from -0.5 to 0.5 */
1208                         /* xs1 and ys1 are already corrected to be corner of sample area */
1209                         xs= xs1 + xres*(jit[0] + 0.5f);
1210                         ys= ys1 + yres*(jit[1] + 0.5f);
1211                         
1212                         shadfac+= *weight * readshadowbuf(shb, shsample, bias, xs, ys, zs);
1213                 }
1214         }
1215
1216         /* Renormalizes for the sample number: */
1217         return shadfac/(float)shb->totbuf;
1218 }
1219
1220 /* different function... sampling behind clipend can be LIGHT, bias is negative! */
1221 /* return: light */
1222 static float readshadowbuf_halo(ShadBuf *shb, ShadSampleBuf *shsample, int xs, int ys, int zs)
1223 {
1224         float temp;
1225         int *rz, ofs;
1226         int bias, zbias, zsamp;
1227         char *ct, *cz;
1228
1229         /* negative! The other side is more important */
1230         bias= -shb->bias;
1231         
1232         /* simpleclip */
1233         if(xs<0 || ys<0) return 0.0;
1234         if(xs>=shb->size || ys>=shb->size) return 0.0;
1235
1236         /* calc z */
1237         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
1238         ct= shsample->cbuf+ofs;
1239         rz= *( (int **)(shsample->zbuf+ofs) );
1240
1241         if(*ct==3) {
1242                 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
1243                 cz= (char *)&zsamp;
1244                 zsamp= 0;
1245                 cz[ACOMP]= ct[0];
1246                 cz[BCOMP]= ct[1];
1247                 cz[GCOMP]= ct[2];
1248         }
1249         else if(*ct==2) {
1250                 ct= ((char *)rz);
1251                 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
1252                 zsamp= *rz;
1253         
1254                 cz= (char *)&zsamp;
1255                 cz[BCOMP]= ct[0];
1256                 cz[GCOMP]= ct[1];
1257         }
1258         else if(*ct==1) {
1259                 ct= ((char *)rz);
1260                 ct+= 4+16*(ys & 15)+(xs & 15);
1261                 zsamp= *rz;
1262
1263                 cz= (char *)&zsamp;
1264                 cz[GCOMP]= ct[0];
1265
1266         }
1267         else {
1268                 /* same as before */
1269                 /* still working code! (ton) */
1270                  zsamp= GET_INT_FROM_POINTER(rz);
1271         }
1272
1273         /* NO schadow when sampled at 'eternal' distance */
1274
1275         if(zsamp >= 0x7FFFFE00) return 1.0; 
1276
1277         if(zsamp > zs) return 1.0;              /* absolute no shadww */
1278         else {
1279                 /* bias is negative, so the (zs-bias) can be beyond 0x7fffffff */
1280                 zbias= 0x7fffffff - zs;
1281                 if(zbias > -bias) {
1282                         if( zsamp < zs-bias) return 0.0 ;       /* absolute in shadow */
1283                 }
1284                 else return 0.0 ;       /* absolute shadow */
1285         }
1286
1287         /* soft area */
1288         
1289         temp=  ( (float)(zs- zsamp) )/(float)bias;
1290         return 1.0 - temp*temp;
1291 }
1292
1293
1294 float shadow_halo(LampRen *lar, float *p1, float *p2)
1295 {
1296         /* p1 p2 already are rotated in spot-space */
1297         ShadBuf *shb= lar->shb;
1298         ShadSampleBuf *shsample;
1299         float co[4], siz;
1300         float labda, labdao, labdax, labday, ldx, ldy;
1301         float zf, xf1, yf1, zf1, xf2, yf2, zf2;
1302         float count, lightcount;
1303         int x, y, z, xs1, ys1;
1304         int dx = 0, dy = 0;
1305         
1306         siz= 0.5*(float)shb->size;
1307         
1308         co[0]= p1[0];
1309         co[1]= p1[1];
1310         co[2]= p1[2]/lar->sh_zfac;
1311         co[3]= 1.0;
1312         mul_m4_v4(shb->winmat, co);     /* rational hom co */
1313         xf1= siz*(1.0+co[0]/co[3]);
1314         yf1= siz*(1.0+co[1]/co[3]);
1315         zf1= (co[2]/co[3]);
1316
1317
1318         co[0]= p2[0];
1319         co[1]= p2[1];
1320         co[2]= p2[2]/lar->sh_zfac;
1321         co[3]= 1.0;
1322         mul_m4_v4(shb->winmat, co);     /* rational hom co */
1323         xf2= siz*(1.0+co[0]/co[3]);
1324         yf2= siz*(1.0+co[1]/co[3]);
1325         zf2= (co[2]/co[3]);
1326
1327         /* the 2dda (a pixel line formula) */
1328
1329         xs1= (int)xf1;
1330         ys1= (int)yf1;
1331
1332         if(xf1 != xf2) {
1333                 if(xf2-xf1 > 0.0) {
1334                         labdax= (xf1-xs1-1.0)/(xf1-xf2);
1335                         ldx= -shb->shadhalostep/(xf1-xf2);
1336                         dx= shb->shadhalostep;
1337                 }
1338                 else {
1339                         labdax= (xf1-xs1)/(xf1-xf2);
1340                         ldx= shb->shadhalostep/(xf1-xf2);
1341                         dx= -shb->shadhalostep;
1342                 }
1343         }
1344         else {
1345                 labdax= 1.0;
1346                 ldx= 0.0;
1347         }
1348
1349         if(yf1 != yf2) {
1350                 if(yf2-yf1 > 0.0) {
1351                         labday= (yf1-ys1-1.0)/(yf1-yf2);
1352                         ldy= -shb->shadhalostep/(yf1-yf2);
1353                         dy= shb->shadhalostep;
1354                 }
1355                 else {
1356                         labday= (yf1-ys1)/(yf1-yf2);
1357                         ldy= shb->shadhalostep/(yf1-yf2);
1358                         dy= -shb->shadhalostep;
1359                 }
1360         }
1361         else {
1362                 labday= 1.0;
1363                 ldy= 0.0;
1364         }
1365         
1366         x= xs1;
1367         y= ys1;
1368         labda= count= lightcount= 0.0;
1369
1370 /* printf("start %x %x  \n", (int)(0x7FFFFFFF*zf1), (int)(0x7FFFFFFF*zf2)); */
1371
1372         while(1) {
1373                 labdao= labda;
1374                 
1375                 if(labdax==labday) {
1376                         labdax+= ldx;
1377                         x+= dx;
1378                         labday+= ldy;
1379                         y+= dy;
1380                 }
1381                 else {
1382                         if(labdax<labday) {
1383                                 labdax+= ldx;
1384                                 x+= dx;
1385                         } else {
1386                                 labday+= ldy;
1387                                 y+= dy;
1388                         }
1389                 }
1390                 
1391                 labda= MIN2(labdax, labday);
1392                 if(labda==labdao || labda>=1.0) break;
1393                 
1394                 zf= zf1 + labda*(zf2-zf1);
1395                 count+= (float)shb->totbuf;
1396
1397                 if(zf<= -1.0) lightcount += 1.0;        /* close to the spot */
1398                 else {
1399                 
1400                         /* make sure, behind the clipend we extend halolines. */
1401                         if(zf>=1.0) z= 0x7FFFF000;
1402                         else z= (int)(0x7FFFF000*zf);
1403                         
1404                         for(shsample= shb->buffers.first; shsample; shsample= shsample->next)
1405                                 lightcount+= readshadowbuf_halo(shb, shsample, x, y, z);
1406                         
1407                 }
1408         }
1409         
1410         if(count!=0.0) return (lightcount/count);
1411         return 0.0;
1412         
1413 }
1414
1415
1416 /* ********************* Irregular Shadow Buffer (ISB) ************* */
1417 /* ********** storage of all view samples in a raster of lists ***** */
1418
1419 /* based on several articles describing this method, like:
1420 The Irregular Z-Buffer and its Application to Shadow Mapping
1421 Gregory S. Johnson - William R. Mark - Christopher A. Burns 
1422 and
1423 Alias-Free Shadow Maps
1424 Timo Aila and Samuli Laine
1425 */
1426
1427 /* bsp structure (actually kd tree) */
1428
1429 #define BSPMAX_SAMPLE   128
1430 #define BSPMAX_DEPTH    32
1431
1432 /* aligned with struct rctf */
1433 typedef struct Boxf {
1434         float xmin, xmax;
1435         float ymin, ymax;
1436         float zmin, zmax;
1437 } Boxf;
1438
1439 typedef struct ISBBranch {
1440         struct ISBBranch *left, *right;
1441         float divider[2];
1442         Boxf box;
1443         short totsamp, index, full, unused;
1444         ISBSample **samples;
1445 } ISBBranch;
1446
1447 typedef struct BSPFace {
1448         Boxf box;
1449         float *v1, *v2, *v3, *v4;
1450         int obi;                /* object for face lookup */
1451         int facenr;             /* index to retrieve VlakRen */
1452         int type;               /* only for strand now */
1453         short shad_alpha, is_full;
1454         
1455         /* strand caching data, optimize for point_behind_strand() */
1456         float radline, radline_end, len;
1457         float vec1[3], vec2[3], rc[3];
1458 } BSPFace;
1459
1460 /* boxes are in lamp projection */
1461 static void init_box(Boxf *box)
1462 {
1463         box->xmin= 1000000.0f;
1464         box->xmax= 0;
1465         box->ymin= 1000000.0f;
1466         box->ymax= 0;
1467         box->zmin= 0x7FFFFFFF;
1468         box->zmax= - 0x7FFFFFFF;
1469 }
1470
1471 /* use v1 to calculate boundbox */
1472 static void bound_boxf(Boxf *box, float *v1)
1473 {
1474         if(v1[0] < box->xmin) box->xmin= v1[0];
1475         if(v1[0] > box->xmax) box->xmax= v1[0];
1476         if(v1[1] < box->ymin) box->ymin= v1[1];
1477         if(v1[1] > box->ymax) box->ymax= v1[1];
1478         if(v1[2] < box->zmin) box->zmin= v1[2];
1479         if(v1[2] > box->zmax) box->zmax= v1[2];
1480 }
1481
1482 /* use v1 to calculate boundbox */
1483 static void bound_rectf(rctf *box, float *v1)
1484 {
1485         if(v1[0] < box->xmin) box->xmin= v1[0];
1486         if(v1[0] > box->xmax) box->xmax= v1[0];
1487         if(v1[1] < box->ymin) box->ymin= v1[1];
1488         if(v1[1] > box->ymax) box->ymax= v1[1];
1489 }
1490
1491
1492 /* halfway splitting, for initializing a more regular tree */
1493 static void isb_bsp_split_init(ISBBranch *root, MemArena *mem, int level)
1494 {
1495         
1496         /* if level > 0 we create new branches and go deeper*/
1497         if(level > 0) {
1498                 ISBBranch *left, *right;
1499                 int i;
1500                 
1501                 /* splitpoint */
1502                 root->divider[0]= 0.5f*(root->box.xmin+root->box.xmax);
1503                 root->divider[1]= 0.5f*(root->box.ymin+root->box.ymax);
1504                 
1505                 /* find best splitpoint */
1506                 if(root->box.xmax-root->box.xmin > root->box.ymax-root->box.ymin)
1507                         i= root->index= 0;
1508                 else
1509                         i= root->index= 1;
1510                 
1511                 left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1512                 right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1513                 
1514                 /* box info */
1515                 left->box= root->box;
1516                 right->box= root->box;
1517                 if(i==0) {
1518                         left->box.xmax= root->divider[0];
1519                         right->box.xmin= root->divider[0];
1520                 }
1521                 else {
1522                         left->box.ymax= root->divider[1];
1523                         right->box.ymin= root->divider[1];
1524                 }
1525                 isb_bsp_split_init(left, mem, level-1);
1526                 isb_bsp_split_init(right, mem, level-1);
1527         }
1528         else {
1529                 /* we add sample array */
1530                 root->samples= BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
1531         }
1532 }
1533
1534 /* note; if all samples on same location we just spread them over 2 new branches */
1535 static void isb_bsp_split(ISBBranch *root, MemArena *mem)
1536 {
1537         ISBBranch *left, *right;
1538         ISBSample *samples[BSPMAX_SAMPLE];
1539         int a, i;
1540
1541         /* splitpoint */
1542         root->divider[0]= root->divider[1]= 0.0f;
1543         for(a=BSPMAX_SAMPLE-1; a>=0; a--) {
1544                 root->divider[0]+= root->samples[a]->zco[0];
1545                 root->divider[1]+= root->samples[a]->zco[1];
1546         }
1547         root->divider[0]/= BSPMAX_SAMPLE;
1548         root->divider[1]/= BSPMAX_SAMPLE;
1549         
1550         /* find best splitpoint */
1551         if(root->box.xmax-root->box.xmin > root->box.ymax-root->box.ymin)
1552                 i= root->index= 0;
1553         else
1554                 i= root->index= 1;
1555         
1556         /* new branches */
1557         left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1558         right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1559
1560         /* new sample array */
1561         left->samples= BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
1562         right->samples= samples; // tmp
1563         
1564         /* split samples */
1565         for(a=BSPMAX_SAMPLE-1; a>=0; a--) {
1566                 int comp= 0;
1567                 /* this prevents adding samples all to 1 branch when divider is equal to samples */
1568                 if(root->samples[a]->zco[i] == root->divider[i])
1569                         comp= a & 1;
1570                 else if(root->samples[a]->zco[i] < root->divider[i])
1571                         comp= 1;
1572                 
1573                 if(comp==1) {
1574                         left->samples[left->totsamp]= root->samples[a];
1575                         left->totsamp++;
1576                 }
1577                 else {
1578                         right->samples[right->totsamp]= root->samples[a];
1579                         right->totsamp++;
1580                 }
1581         }
1582         
1583         /* copy samples from tmp */
1584         memcpy(root->samples, samples, right->totsamp*(sizeof(void *)));
1585         right->samples= root->samples;
1586         root->samples= NULL;
1587         
1588         /* box info */
1589         left->box= root->box;
1590         right->box= root->box;
1591         if(i==0) {
1592                 left->box.xmax= root->divider[0];
1593                 right->box.xmin= root->divider[0];
1594         }
1595         else {
1596                 left->box.ymax= root->divider[1];
1597                 right->box.ymin= root->divider[1];
1598         }
1599 }
1600
1601 /* inserts sample in main tree, also splits on threshold */
1602 /* returns 1 if error */
1603 static int isb_bsp_insert(ISBBranch *root, MemArena *memarena, ISBSample *sample)
1604 {
1605         ISBBranch *bspn= root;
1606         float *zco= sample->zco;
1607         int i= 0;
1608         
1609         /* debug counter, also used to check if something was filled in ever */
1610         root->totsamp++;
1611         
1612         /* going over branches until last one found */
1613         while(bspn->left) {
1614                 if(zco[bspn->index] <= bspn->divider[bspn->index])
1615                         bspn= bspn->left;
1616                 else
1617                         bspn= bspn->right;
1618                 i++;
1619         }
1620         /* bspn now is the last branch */
1621         
1622         if(bspn->totsamp==BSPMAX_SAMPLE) {
1623                 printf("error in bsp branch\n");        /* only for debug, cannot happen */
1624                 return 1;
1625         }
1626         
1627         /* insert */
1628         bspn->samples[bspn->totsamp]= sample;
1629         bspn->totsamp++;
1630
1631         /* split if allowed and needed */
1632         if(bspn->totsamp==BSPMAX_SAMPLE) {
1633                 if(i==BSPMAX_DEPTH) {
1634                         bspn->totsamp--;        /* stop filling in... will give errors */
1635                         return 1;
1636                 }
1637                 isb_bsp_split(bspn, memarena);
1638         }
1639         return 0;
1640 }
1641
1642 static float VecLen2f( float *v1, float *v2)
1643 {
1644         float x= v1[0]-v2[0];
1645         float y= v1[1]-v2[1];
1646         return (float)sqrt(x*x+y*y);
1647 }
1648
1649 /* initialize vars in face, for optimal point-in-face test */
1650 static void bspface_init_strand(BSPFace *face) 
1651 {
1652         
1653         face->radline= 0.5f*VecLen2f(face->v1, face->v2);
1654         
1655         mid_v3_v3v3(face->vec1, face->v1, face->v2);
1656         if(face->v4)
1657                 mid_v3_v3v3(face->vec2, face->v3, face->v4);
1658         else
1659                 VECCOPY(face->vec2, face->v3);
1660         
1661         face->rc[0]= face->vec2[0]-face->vec1[0];
1662         face->rc[1]= face->vec2[1]-face->vec1[1];
1663         face->rc[2]= face->vec2[2]-face->vec1[2];
1664         
1665         face->len= face->rc[0]*face->rc[0]+ face->rc[1]*face->rc[1];
1666         
1667         if(face->len!=0.0f) {
1668                 face->radline_end= face->radline/sqrt(face->len);
1669                 face->len= 1.0f/face->len;
1670         }
1671 }
1672
1673 /* brought back to a simple 2d case */
1674 static int point_behind_strand(float *p, BSPFace *face)
1675 {
1676         /* v1 - v2 is radius, v1 - v3 length */
1677         float dist, rc[2], pt[2];
1678         
1679         /* using code from dist_to_line_segment_v2(), distance vec to line-piece */
1680
1681         if(face->len==0.0f) {
1682                 rc[0]= p[0]-face->vec1[0];
1683                 rc[1]= p[1]-face->vec1[1];
1684                 dist= (float)(sqrt(rc[0]*rc[0]+ rc[1]*rc[1]));
1685                 
1686                 if(dist < face->radline)
1687                         return 1;
1688         }
1689         else {
1690                 float labda= ( face->rc[0]*(p[0]-face->vec1[0]) + face->rc[1]*(p[1]-face->vec1[1]) )*face->len;
1691                 
1692                 if(labda > -face->radline_end && labda < 1.0f+face->radline_end) { 
1693                         /* hesse for dist: */
1694                         //dist= (float)(fabs( (p[0]-vec2[0])*rc[1] + (p[1]-vec2[1])*rc[0])/len);
1695                         
1696                         pt[0]= labda*face->rc[0]+face->vec1[0];
1697                         pt[1]= labda*face->rc[1]+face->vec1[1];
1698                         
1699                         rc[0]= pt[0]-p[0];
1700                         rc[1]= pt[1]-p[1];
1701                         dist= (float)sqrt(rc[0]*rc[0]+ rc[1]*rc[1]);
1702                         
1703                         if(dist < face->radline) {
1704                                 float zval= face->vec1[2] + labda*face->rc[2];
1705                                 if(p[2] > zval)
1706                                         return 1;
1707                         }
1708                 }
1709         }
1710         return 0;
1711 }
1712
1713
1714 /* return 1 if inside. code derived from src/parametrizer.c */
1715 static int point_behind_tria2d(float *p, float *v1, float *v2, float *v3)
1716 {
1717         float a[2], c[2], h[2], div;
1718         float u, v;
1719         
1720         a[0] = v2[0] - v1[0];
1721         a[1] = v2[1] - v1[1];
1722         c[0] = v3[0] - v1[0];
1723         c[1] = v3[1] - v1[1];
1724         
1725         div = a[0]*c[1] - a[1]*c[0];
1726         if(div==0.0f)
1727                 return 0;
1728         
1729         h[0] = p[0] - v1[0];
1730         h[1] = p[1] - v1[1];
1731         
1732         div = 1.0f/div;
1733         
1734         u = (h[0]*c[1] - h[1]*c[0])*div;
1735         if(u >= 0.0f) {
1736                 v = (a[0]*h[1] - a[1]*h[0])*div;
1737                 if(v >= 0.0f) {
1738                         if( u + v <= 1.0f) {
1739                                 /* inside, now check if point p is behind */
1740                                 float z=  (1.0f-u-v)*v1[2] + u*v2[2] + v*v3[2];
1741                                 if(z <= p[2])
1742                                         return 1;
1743                         }
1744                 }
1745         }
1746         
1747         return 0;
1748 }
1749
1750 #if 0
1751 /* tested these calls, but it gives inaccuracy, 'side' cannot be found reliably using v3 */
1752
1753 /* check if line v1-v2 has all rect points on other side of point v3 */
1754 static int rect_outside_line(rctf *rect, float *v1, float *v2, float *v3)
1755 {
1756         float a, b, c;
1757         int side;
1758         
1759         /* line formula for v1-v2 */
1760         a= v2[1]-v1[1];
1761         b= v1[0]-v2[0];
1762         c= -a*v1[0] - b*v1[1];
1763         side= a*v3[0] + b*v3[1] + c < 0.0f;
1764         
1765         /* the four quad points */
1766         if( side==(rect->xmin*a + rect->ymin*b + c >= 0.0f) )
1767                 if( side==(rect->xmax*a + rect->ymin*b + c >= 0.0f) )
1768                         if( side==(rect->xmax*a + rect->ymax*b + c >= 0.0f) )
1769                                 if( side==(rect->xmin*a + rect->ymax*b + c >= 0.0f) )
1770                                         return 1;
1771         return 0;
1772 }
1773
1774 /* check if one of the triangle edges separates all rect points on 1 side */
1775 static int rect_isect_tria(rctf *rect, float *v1, float *v2, float *v3)
1776 {
1777         if(rect_outside_line(rect, v1, v2, v3))
1778                 return 0;
1779         if(rect_outside_line(rect, v2, v3, v1))
1780                 return 0;
1781         if(rect_outside_line(rect, v3, v1, v2))
1782                 return 0;
1783         return 1;
1784 }
1785 #endif
1786
1787 /* if face overlaps a branch, it executes func. recursive */
1788 static void isb_bsp_face_inside(ISBBranch *bspn, BSPFace *face)
1789 {
1790         
1791         /* are we descending? */
1792         if(bspn->left) {
1793                 /* hrmf, the box struct cannot be addressed with index */
1794                 if(bspn->index==0) {
1795                         if(face->box.xmin <= bspn->divider[0])
1796                                 isb_bsp_face_inside(bspn->left, face);
1797                         if(face->box.xmax > bspn->divider[0])
1798                                 isb_bsp_face_inside(bspn->right, face);
1799                 }
1800                 else {
1801                         if(face->box.ymin <= bspn->divider[1])
1802                                 isb_bsp_face_inside(bspn->left, face);
1803                         if(face->box.ymax > bspn->divider[1])
1804                                 isb_bsp_face_inside(bspn->right, face);
1805                 }
1806         }
1807         else {
1808                 /* else: end branch reached */
1809                 int a;
1810                 
1811                 if(bspn->totsamp==0) return;
1812                 
1813                 /* check for nodes entirely in shadow, can be skipped */
1814                 if(bspn->totsamp==bspn->full)
1815                         return;
1816                 
1817                 /* if bsp node is entirely in front of face, give up */
1818                 if(bspn->box.zmax < face->box.zmin)
1819                         return;
1820                 
1821                 /* if face boundbox is outside of branch rect, give up */
1822                 if(0==BLI_isect_rctf((rctf *)&face->box, (rctf *)&bspn->box, NULL))
1823                         return;
1824                 
1825                 /* test all points inside branch */
1826                 for(a=bspn->totsamp-1; a>=0; a--) {
1827                         ISBSample *samp= bspn->samples[a];
1828                         
1829                         if((samp->facenr!=face->facenr || samp->obi!=face->obi) && samp->shadfac) {
1830                                 if(face->box.zmin < samp->zco[2]) {
1831                                         if(BLI_in_rctf((rctf *)&face->box, samp->zco[0], samp->zco[1])) {
1832                                                 int inshadow= 0;
1833                                                 
1834                                                 if(face->type) {
1835                                                         if(point_behind_strand(samp->zco, face)) 
1836                                                                 inshadow= 1;
1837                                                 }
1838                                                 else if( point_behind_tria2d(samp->zco, face->v1, face->v2, face->v3))
1839                                                         inshadow= 1;
1840                                                 else if(face->v4 && point_behind_tria2d(samp->zco, face->v1, face->v3, face->v4))
1841                                                         inshadow= 1;
1842
1843                                                 if(inshadow) {
1844                                                         *(samp->shadfac) += face->shad_alpha;
1845                                                         /* optimize; is_full means shad_alpha==4096 */
1846                                                         if(*(samp->shadfac) >= 4096 || face->is_full) {
1847                                                                 bspn->full++;
1848                                                                 samp->shadfac= NULL;
1849                                                         }
1850                                                 }
1851                                         }
1852                                 }
1853                         }
1854                 }
1855         }
1856 }
1857
1858 /* based on available samples, recalculate the bounding box for bsp nodes, recursive */
1859 static void isb_bsp_recalc_box(ISBBranch *root)
1860 {
1861         if(root->left) {
1862                 isb_bsp_recalc_box(root->left);
1863                 isb_bsp_recalc_box(root->right);
1864         }
1865         else if(root->totsamp) {
1866                 int a;
1867                 
1868                 init_box(&root->box);
1869                 for(a=root->totsamp-1; a>=0; a--)
1870                         bound_boxf(&root->box, root->samples[a]->zco);
1871         }       
1872 }
1873
1874 /* callback function for zbuf clip */
1875 static void isb_bsp_test_strand(ZSpan *zspan, int obi, int zvlnr, float *v1, float *v2, float *v3, float *v4)
1876 {
1877         BSPFace face;
1878         
1879         face.v1= v1;
1880         face.v2= v2;
1881         face.v3= v3;
1882         face.v4= v4;
1883         face.obi= obi;
1884         face.facenr= zvlnr & ~RE_QUAD_OFFS;
1885         face.type= R_STRAND;
1886         if(R.osa)
1887                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
1888         else
1889                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
1890         
1891         face.is_full= (zspan->shad_alpha==1.0f);
1892         
1893         /* setup boundbox */
1894         init_box(&face.box);
1895         bound_boxf(&face.box, v1);
1896         bound_boxf(&face.box, v2);
1897         bound_boxf(&face.box, v3);
1898         if(v4)
1899                 bound_boxf(&face.box, v4);
1900         
1901         /* optimize values */
1902         bspface_init_strand(&face);
1903         
1904         isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
1905         
1906 }
1907
1908 /* callback function for zbuf clip */
1909 static void isb_bsp_test_face(ZSpan *zspan, int obi, int zvlnr, float *v1, float *v2, float *v3, float *v4) 
1910 {
1911         BSPFace face;
1912         
1913         face.v1= v1;
1914         face.v2= v2;
1915         face.v3= v3;
1916         face.v4= v4;
1917         face.obi= obi;
1918         face.facenr= zvlnr & ~RE_QUAD_OFFS;
1919         face.type= 0;
1920         if(R.osa)
1921                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
1922         else
1923                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
1924         
1925         face.is_full= (zspan->shad_alpha==1.0f);
1926         
1927         /* setup boundbox */
1928         init_box(&face.box);
1929         bound_boxf(&face.box, v1);
1930         bound_boxf(&face.box, v2);
1931         bound_boxf(&face.box, v3);
1932         if(v4)
1933                 bound_boxf(&face.box, v4);
1934
1935         isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
1936 }
1937
1938 static int testclip_minmax(float *ho, float *minmax)
1939 {
1940         float wco= ho[3];
1941         int flag= 0;
1942         
1943         if( ho[0] > minmax[1]*wco) flag = 1;
1944         else if( ho[0]< minmax[0]*wco) flag = 2;
1945         
1946         if( ho[1] > minmax[3]*wco) flag |= 4;
1947         else if( ho[1]< minmax[2]*wco) flag |= 8;
1948         
1949         return flag;
1950 }
1951
1952 /* main loop going over all faces and check in bsp overlaps, fill in shadfac values */
1953 static void isb_bsp_fillfaces(Render *re, LampRen *lar, ISBBranch *root)
1954 {
1955         ObjectInstanceRen *obi;
1956         ObjectRen *obr;
1957         ShadBuf *shb= lar->shb;
1958         ZSpan zspan, zspanstrand;
1959         VlakRen *vlr= NULL;
1960         Material *ma= NULL;
1961         float minmaxf[4], winmat[4][4];
1962         int size= shb->size;
1963         int i, a, ok=1, lay= -1;
1964         
1965         /* further optimize, also sets minz maxz */
1966         isb_bsp_recalc_box(root);
1967         
1968         /* extra clipping for minmax */
1969         minmaxf[0]= (2.0f*root->box.xmin - size-2.0f)/size;
1970         minmaxf[1]= (2.0f*root->box.xmax - size+2.0f)/size;
1971         minmaxf[2]= (2.0f*root->box.ymin - size-2.0f)/size;
1972         minmaxf[3]= (2.0f*root->box.ymax - size+2.0f)/size;
1973         
1974         if(lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay;
1975         
1976         /* (ab)use zspan, since we use zbuffer clipping code */
1977         zbuf_alloc_span(&zspan, size, size, re->clipcrop);
1978         
1979         zspan.zmulx=  ((float)size)/2.0f;
1980         zspan.zmuly=  ((float)size)/2.0f;
1981         zspan.zofsx= -0.5f;
1982         zspan.zofsy= -0.5f;
1983         
1984         /* pass on bsp root to zspan */
1985         zspan.rectz= (int *)root;
1986         
1987         /* filling methods */
1988         zspanstrand= zspan;
1989         //      zspan.zbuflinefunc= zbufline_onlyZ;
1990         zspan.zbuffunc= isb_bsp_test_face;
1991         zspanstrand.zbuffunc= isb_bsp_test_strand;
1992         
1993         for(i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
1994                 obr= obi->obr;
1995
1996                 if(obi->flag & R_TRANSFORMED)
1997                         mul_m4_m4m4(winmat, obi->mat, shb->persmat);
1998                 else
1999                         copy_m4_m4(winmat, shb->persmat);
2000
2001                 for(a=0; a<obr->totvlak; a++) {
2002                         
2003                         if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
2004                         else vlr++;
2005                         
2006                         /* note, these conditions are copied in shadowbuf_autoclip() */
2007                         if(vlr->mat!= ma) {
2008                                 ma= vlr->mat;
2009                                 ok= 1;
2010                                 if((ma->mode & MA_SHADBUF)==0) ok= 0;
2011                                 if(ma->material_type == MA_TYPE_WIRE) ok= 0;
2012                                 zspanstrand.shad_alpha= zspan.shad_alpha= ma->shad_alpha;
2013                         }
2014                         
2015                         if(ok && (obi->lay & lay)) {
2016                                 float hoco[4][4];
2017                                 int c1, c2, c3, c4=0;
2018                                 int d1, d2, d3, d4=0;
2019                                 int partclip;
2020                                 
2021                                 /* create hocos per face, it is while render */
2022                                 projectvert(vlr->v1->co, winmat, hoco[0]); d1= testclip_minmax(hoco[0], minmaxf);
2023                                 projectvert(vlr->v2->co, winmat, hoco[1]); d2= testclip_minmax(hoco[1], minmaxf);
2024                                 projectvert(vlr->v3->co, winmat, hoco[2]); d3= testclip_minmax(hoco[2], minmaxf);
2025                                 if(vlr->v4) {
2026                                         projectvert(vlr->v4->co, winmat, hoco[3]); d4= testclip_minmax(hoco[3], minmaxf);
2027                                 }
2028
2029                                 /* minmax clipping */
2030                                 if(vlr->v4) partclip= d1 & d2 & d3 & d4;
2031                                 else partclip= d1 & d2 & d3;
2032                                 
2033                                 if(partclip==0) {
2034                                         
2035                                         /* window clipping */
2036                                         c1= testclip(hoco[0]); 
2037                                         c2= testclip(hoco[1]); 
2038                                         c3= testclip(hoco[2]); 
2039                                         if(vlr->v4)
2040                                                 c4= testclip(hoco[3]); 
2041                                         
2042                                         /* ***** NO WIRE YET */                 
2043                                         if(ma->material_type == MA_TYPE_WIRE) {
2044                                                 if(vlr->v4)
2045                                                         zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
2046                                                 else
2047                                                         zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], 0, c1, c2, c3, 0);
2048                                         }
2049                                         else if(vlr->v4) {
2050                                                 if(vlr->flag & R_STRAND)
2051                                                         zbufclip4(&zspanstrand, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
2052                                                 else
2053                                                         zbufclip4(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
2054                                         }
2055                                         else
2056                                                 zbufclip(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], c1, c2, c3);
2057                                         
2058                                 }
2059                         }
2060                 }
2061         }
2062         
2063         zbuf_free_span(&zspan);
2064 }
2065
2066 /* returns 1 when the viewpixel is visible in lampbuffer */
2067 static int viewpixel_to_lampbuf(ShadBuf *shb, ObjectInstanceRen *obi, VlakRen *vlr, float x, float y, float *co)
2068 {
2069         float hoco[4], v1[3], nor[3];
2070         float dface, fac, siz;
2071         
2072         RE_vlakren_get_normal(&R, obi, vlr, nor);
2073         VECCOPY(v1, vlr->v1->co);
2074         if(obi->flag & R_TRANSFORMED)
2075                 mul_m4_v3(obi->mat, v1);
2076
2077         /* from shadepixel() */
2078         dface= v1[0]*nor[0] + v1[1]*nor[1] + v1[2]*nor[2];
2079         hoco[3]= 1.0f;
2080         
2081         /* ortho viewplane cannot intersect using view vector originating in (0,0,0) */
2082         if(R.r.mode & R_ORTHO) {
2083                 /* x and y 3d coordinate can be derived from pixel coord and winmat */
2084                 float fx= 2.0/(R.winx*R.winmat[0][0]);
2085                 float fy= 2.0/(R.winy*R.winmat[1][1]);
2086                 
2087                 hoco[0]= (x - 0.5*R.winx)*fx - R.winmat[3][0]/R.winmat[0][0];
2088                 hoco[1]= (y - 0.5*R.winy)*fy - R.winmat[3][1]/R.winmat[1][1];
2089                 
2090                 /* using a*x + b*y + c*z = d equation, (a b c) is normal */
2091                 if(nor[2]!=0.0f)
2092                         hoco[2]= (dface - nor[0]*hoco[0] - nor[1]*hoco[1])/nor[2];
2093                 else
2094                         hoco[2]= 0.0f;
2095         }
2096         else {
2097                 float div, view[3];
2098                 
2099                 calc_view_vector(view, x, y);
2100                 
2101                 div= nor[0]*view[0] + nor[1]*view[1] + nor[2]*view[2];
2102                 if (div==0.0f) 
2103                         return 0;
2104                 
2105                 fac= dface/div;
2106                 
2107                 hoco[0]= fac*view[0];
2108                 hoco[1]= fac*view[1];
2109                 hoco[2]= fac*view[2];
2110         }
2111         
2112         /* move 3d vector to lampbuf */
2113         mul_m4_v4(shb->persmat, hoco);  /* rational hom co */
2114         
2115         /* clip We can test for -1.0/1.0 because of the properties of the
2116          * coordinate transformations. */
2117         fac= fabs(hoco[3]);
2118         if(hoco[0]<-fac || hoco[0]>fac)
2119                 return 0;
2120         if(hoco[1]<-fac || hoco[1]>fac)
2121                 return 0;
2122         if(hoco[2]<-fac || hoco[2]>fac)
2123                 return 0;
2124         
2125         siz= 0.5f*(float)shb->size;
2126         co[0]= siz*(1.0f+hoco[0]/hoco[3]) -0.5f;
2127         co[1]= siz*(1.0f+hoco[1]/hoco[3]) -0.5f;
2128         co[2]= ((float)0x7FFFFFFF)*(hoco[2]/hoco[3]);
2129         
2130         /* XXXX bias, much less than normal shadbuf, or do we need a constant? */
2131         co[2] -= 0.05f*shb->bias;
2132         
2133         return 1;
2134 }
2135
2136 /* storage of shadow results, solid osa and transp case */
2137 static void isb_add_shadfac(ISBShadfacA **isbsapp, MemArena *mem, int obi, int facenr, short shadfac, short samples)
2138 {
2139         ISBShadfacA *new;
2140         float shadfacf;
2141         
2142         /* in osa case, the samples were filled in with factor 1.0/R.osa. if fewer samples we have to correct */
2143         if(R.osa)
2144                 shadfacf= ((float)shadfac*R.osa)/(4096.0*samples);
2145         else
2146                 shadfacf= ((float)shadfac)/(4096.0);
2147         
2148         new= BLI_memarena_alloc(mem, sizeof(ISBShadfacA));
2149         new->obi= obi;
2150         new->facenr= facenr & ~RE_QUAD_OFFS;
2151         new->shadfac= shadfacf;
2152         if(*isbsapp)
2153                 new->next= (*isbsapp);
2154         else
2155                 new->next= NULL;
2156         
2157         *isbsapp= new;
2158 }
2159
2160 /* adding samples, solid case */
2161 static int isb_add_samples(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSample **samplebuf)
2162 {
2163         int xi, yi, *xcos, *ycos;
2164         int sample, bsp_err= 0;
2165         
2166         /* bsp split doesn't like to handle regular sequenes */
2167         xcos= MEM_mallocN( pa->rectx*sizeof(int), "xcos");
2168         ycos= MEM_mallocN( pa->recty*sizeof(int), "ycos");
2169         for(xi=0; xi<pa->rectx; xi++)
2170                 xcos[xi]= xi;
2171         for(yi=0; yi<pa->recty; yi++)
2172                 ycos[yi]= yi;
2173         BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
2174         BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
2175         
2176         for(sample=0; sample<(R.osa?R.osa:1); sample++) {
2177                 ISBSample *samp= samplebuf[sample], *samp1;
2178                 
2179                 for(yi=0; yi<pa->recty; yi++) {
2180                         int y= ycos[yi];
2181                         for(xi=0; xi<pa->rectx; xi++) {
2182                                 int x= xcos[xi];
2183                                 samp1= samp + y*pa->rectx + x;
2184                                 if(samp1->facenr)
2185                                         bsp_err |= isb_bsp_insert(root, memarena, samp1);
2186                         }
2187                         if(bsp_err) break;
2188                 }
2189         }       
2190         
2191         MEM_freeN(xcos);
2192         MEM_freeN(ycos);
2193
2194         return bsp_err;
2195 }
2196
2197 /* solid version */
2198 /* lar->shb, pa->rectz and pa->rectp should exist */
2199 static void isb_make_buffer(RenderPart *pa, LampRen *lar)
2200 {
2201         ShadBuf *shb= lar->shb;
2202         ISBData *isbdata;
2203         ISBSample *samp, *samplebuf[16];        /* should be RE_MAX_OSA */
2204         ISBBranch root;
2205         MemArena *memarena;
2206         intptr_t *rd;
2207         int *recto, *rectp, x, y, sindex, sample, bsp_err=0;
2208         
2209         /* storage for shadow, per thread */
2210         isbdata= shb->isb_result[pa->thread];
2211         
2212         /* to map the shi->xs and ys coordinate */
2213         isbdata->minx= pa->disprect.xmin;
2214         isbdata->miny= pa->disprect.ymin;
2215         isbdata->rectx= pa->rectx;
2216         isbdata->recty= pa->recty;
2217         
2218         /* branches are added using memarena (32k branches) */
2219         memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch), "isb arena");
2220         BLI_memarena_use_calloc(memarena);
2221         
2222         /* samplebuf is in camera view space (pixels) */
2223         for(sample=0; sample<(R.osa?R.osa:1); sample++)
2224                 samplebuf[sample]= MEM_callocN(sizeof(ISBSample)*pa->rectx*pa->recty, "isb samplebuf");
2225         
2226         /* for end result, ISBSamples point to this in non OSA case, otherwise to pixstruct->shadfac */
2227         if(R.osa==0)
2228                 isbdata->shadfacs= MEM_callocN(pa->rectx*pa->recty*sizeof(short), "isb shadfacs");
2229         
2230         /* setup bsp root */
2231         memset(&root, 0, sizeof(ISBBranch));
2232         root.box.xmin= (float)shb->size;
2233         root.box.ymin= (float)shb->size;
2234         
2235         /* create the sample buffers */
2236         for(sindex=0, y=0; y<pa->recty; y++) {
2237                 for(x=0; x<pa->rectx; x++, sindex++) {
2238                         
2239                         /* this makes it a long function, but splitting it out would mean 10+ arguments */
2240                         /* first check OSA case */
2241                         if(R.osa) {
2242                                 rd= pa->rectdaps + sindex;
2243                                 if(*rd) {
2244                                         float xs= (float)(x + pa->disprect.xmin);
2245                                         float ys= (float)(y + pa->disprect.ymin);
2246                                         
2247                                         for(sample=0; sample<R.osa; sample++) {
2248                                                 PixStr *ps= (PixStr *)(*rd);
2249                                                 int mask= (1<<sample);
2250                                                 
2251                                                 while(ps) {
2252                                                         if(ps->mask & mask)
2253                                                                 break;
2254                                                         ps= ps->next;
2255                                                 }
2256                                                 if(ps && ps->facenr>0) {
2257                                                         ObjectInstanceRen *obi= &R.objectinstance[ps->obi];
2258                                                         ObjectRen *obr= obi->obr;
2259                                                         VlakRen *vlr= RE_findOrAddVlak(obr, (ps->facenr-1) & RE_QUAD_MASK);
2260                                                         
2261                                                         samp= samplebuf[sample] + sindex;
2262                                                         /* convert image plane pixel location to lamp buffer space */
2263                                                         if(viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], samp->zco)) {
2264                                                                 samp->obi= ps->obi;
2265                                                                 samp->facenr= ps->facenr & ~RE_QUAD_OFFS;
2266                                                                 ps->shadfac= 0;
2267                                                                 samp->shadfac= &ps->shadfac;
2268                                                                 bound_rectf((rctf *)&root.box, samp->zco);
2269                                                         }
2270                                                 }
2271                                         }
2272                                 }
2273                         }
2274                         else {
2275                                 rectp= pa->rectp + sindex;
2276                                 recto= pa->recto + sindex;
2277                                 if(*rectp>0) {
2278                                         ObjectInstanceRen *obi= &R.objectinstance[*recto];
2279                                         ObjectRen *obr= obi->obr;
2280                                         VlakRen *vlr= RE_findOrAddVlak(obr, (*rectp-1) & RE_QUAD_MASK);
2281                                         float xs= (float)(x + pa->disprect.xmin);
2282                                         float ys= (float)(y + pa->disprect.ymin);
2283                                         
2284                                         samp= samplebuf[0] + sindex;
2285                                         /* convert image plane pixel location to lamp buffer space */
2286                                         if(viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, samp->zco)) {
2287                                                 samp->obi= *recto;
2288                                                 samp->facenr= *rectp & ~RE_QUAD_OFFS;
2289                                                 samp->shadfac= isbdata->shadfacs + sindex;
2290                                                 bound_rectf((rctf *)&root.box, samp->zco);
2291                                         }
2292                                 }
2293                         }
2294                 }
2295         }
2296         
2297         /* simple method to see if we have samples */
2298         if(root.box.xmin != (float)shb->size) {
2299                 /* now create a regular split, root.box has the initial bounding box of all pixels */
2300                 /* split bsp 8 levels deep, in regular grid (16 x 16) */
2301                 isb_bsp_split_init(&root, memarena, 8);
2302                 
2303                 /* insert all samples in BSP now */
2304                 bsp_err= isb_add_samples(pa, &root, memarena, samplebuf);
2305                         
2306                 if(bsp_err==0) {
2307                         /* go over all faces and fill in shadow values */
2308                         
2309                         isb_bsp_fillfaces(&R, lar, &root);      /* shb->persmat should have been calculated */
2310                         
2311                         /* copy shadow samples to persistant buffer, reduce memory overhead */
2312                         if(R.osa) {
2313                                 ISBShadfacA **isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
2314                                 
2315                                 isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA), "isb arena");
2316                                 BLI_memarena_use_calloc(isbdata->memarena);
2317
2318                                 for(rd= pa->rectdaps, x=pa->rectx*pa->recty; x>0; x--, rd++, isbsa++) {
2319                                         
2320                                         if(*rd) {
2321                                                 PixStr *ps= (PixStr *)(*rd);
2322                                                 while(ps) {
2323                                                         if(ps->shadfac)
2324                                                                 isb_add_shadfac(isbsa, isbdata->memarena, ps->obi, ps->facenr, ps->shadfac, count_mask(ps->mask));
2325                                                         ps= ps->next;
2326                                                 }
2327                                         }
2328                                 }
2329                         }
2330                 }
2331         }
2332         else {
2333                 if(isbdata->shadfacs) {
2334                         MEM_freeN(isbdata->shadfacs);
2335                         isbdata->shadfacs= NULL;
2336                 }
2337         }
2338
2339         /* free BSP */
2340         BLI_memarena_free(memarena);
2341         
2342         /* free samples */
2343         for(x=0; x<(R.osa?R.osa:1); x++)
2344                 MEM_freeN(samplebuf[x]);
2345         
2346         if(bsp_err) printf("error in filling bsp\n");
2347 }
2348
2349 /* add sample to buffer, isbsa is the root sample in a buffer */
2350 static ISBSampleA *isb_alloc_sample_transp(ISBSampleA **isbsa, MemArena *mem)
2351 {
2352         ISBSampleA *new;
2353         
2354         new= BLI_memarena_alloc(mem, sizeof(ISBSampleA));
2355         if(*isbsa)
2356                 new->next= (*isbsa);
2357         else
2358                 new->next= NULL;
2359         
2360         *isbsa= new;
2361         return new;
2362 }
2363
2364 /* adding samples in BSP, transparent case */
2365 static int isb_add_samples_transp(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSampleA ***samplebuf)
2366 {
2367         int xi, yi, *xcos, *ycos;
2368         int sample, bsp_err= 0;
2369         
2370         /* bsp split doesn't like to handle regular sequenes */
2371         xcos= MEM_mallocN( pa->rectx*sizeof(int), "xcos");
2372         ycos= MEM_mallocN( pa->recty*sizeof(int), "ycos");
2373         for(xi=0; xi<pa->rectx; xi++)
2374                 xcos[xi]= xi;
2375         for(yi=0; yi<pa->recty; yi++)
2376                 ycos[yi]= yi;
2377         BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
2378         BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
2379         
2380         for(sample=0; sample<(R.osa?R.osa:1); sample++) {
2381                 ISBSampleA **samp= samplebuf[sample], *samp1;
2382                 
2383                 for(yi=0; yi<pa->recty; yi++) {
2384                         int y= ycos[yi];
2385                         for(xi=0; xi<pa->rectx; xi++) {
2386                                 int x= xcos[xi];
2387                                 
2388                                 samp1= *(samp + y*pa->rectx + x);
2389                                 while(samp1) {
2390                                         bsp_err |= isb_bsp_insert(root, memarena, (ISBSample *)samp1);
2391                                         samp1= samp1->next;
2392                                 }
2393                         }
2394                         if(bsp_err) break;
2395                 }
2396         }       
2397         
2398         MEM_freeN(xcos);
2399         MEM_freeN(ycos);
2400         
2401         return bsp_err;
2402 }
2403
2404
2405 /* Ztransp version */
2406 /* lar->shb, pa->rectz and pa->rectp should exist */
2407 static void isb_make_buffer_transp(RenderPart *pa, APixstr *apixbuf, LampRen *lar)
2408 {
2409         ShadBuf *shb= lar->shb;
2410         ISBData *isbdata;
2411         ISBSampleA *samp, **samplebuf[16];      /* MAX_OSA */
2412         ISBBranch root;
2413         MemArena *memarena;
2414         APixstr *ap;
2415         int x, y, sindex, sample, bsp_err=0;
2416         
2417         /* storage for shadow, per thread */
2418         isbdata= shb->isb_result[pa->thread];
2419         
2420         /* to map the shi->xs and ys coordinate */
2421         isbdata->minx= pa->disprect.xmin;
2422         isbdata->miny= pa->disprect.ymin;
2423         isbdata->rectx= pa->rectx;
2424         isbdata->recty= pa->recty;
2425         
2426         /* branches are added using memarena (32k branches) */
2427         memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch), "isb arena");
2428         BLI_memarena_use_calloc(memarena);
2429         
2430         /* samplebuf is in camera view space (pixels) */
2431         for(sample=0; sample<(R.osa?R.osa:1); sample++)
2432                 samplebuf[sample]= MEM_callocN(sizeof(void *)*pa->rectx*pa->recty, "isb alpha samplebuf");
2433         
2434         /* setup bsp root */
2435         memset(&root, 0, sizeof(ISBBranch));
2436         root.box.xmin= (float)shb->size;
2437         root.box.ymin= (float)shb->size;
2438
2439         /* create the sample buffers */
2440         for(ap= apixbuf, sindex=0, y=0; y<pa->recty; y++) {
2441                 for(x=0; x<pa->rectx; x++, sindex++, ap++) {
2442                         
2443                         if(ap->p[0]) {
2444                                 APixstr *apn;
2445                                 float xs= (float)(x + pa->disprect.xmin);
2446                                 float ys= (float)(y + pa->disprect.ymin);
2447                                 
2448                                 for(apn=ap; apn; apn= apn->next) {
2449                                         int a;
2450                                         for(a=0; a<4; a++) {
2451                                                 if(apn->p[a]) {
2452                                                         ObjectInstanceRen *obi= &R.objectinstance[apn->obi[a]];
2453                                                         ObjectRen *obr= obi->obr;
2454                                                         VlakRen *vlr= RE_findOrAddVlak(obr, (apn->p[a]-1) & RE_QUAD_MASK);
2455                                                         float zco[3];
2456                                                         
2457                                                         /* here we store shadfac, easier to create the end storage buffer. needs zero'ed, multiple shadowbufs use it */
2458                                                         apn->shadfac[a]= 0;
2459                                                         
2460                                                         if(R.osa) {
2461                                                                 for(sample=0; sample<R.osa; sample++) {
2462                                                                         int mask= (1<<sample);
2463                                                                         
2464                                                                         if(apn->mask[a] & mask) {
2465                                                                                 
2466                                                                                 /* convert image plane pixel location to lamp buffer space */
2467                                                                                 if(viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], zco)) {
2468                                                                                         samp= isb_alloc_sample_transp(samplebuf[sample] + sindex, memarena);
2469                                                                                         samp->obi= apn->obi[a];
2470                                                                                         samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
2471                                                                                         samp->shadfac= &apn->shadfac[a];
2472                                                                                         
2473                                                                                         VECCOPY(samp->zco, zco);
2474                                                                                         bound_rectf((rctf *)&root.box, samp->zco);
2475                                                                                 }
2476                                                                         }
2477                                                                 }
2478                                                         }
2479                                                         else {
2480                                                                 
2481                                                                 /* convert image plane pixel location to lamp buffer space */
2482                                                                 if(viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, zco)) {
2483                                                                         
2484                                                                         samp= isb_alloc_sample_transp(samplebuf[0] + sindex, memarena);
2485                                                                         samp->obi= apn->obi[a];
2486                                                                         samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
2487                                                                         samp->shadfac= &apn->shadfac[a];
2488                                                                         
2489                                                                         VECCOPY(samp->zco, zco);
2490                                                                         bound_rectf((rctf *)&root.box, samp->zco);
2491                                                                 }
2492                                                         }
2493                                                 }
2494                                         }
2495                                 }
2496                         }
2497                 }
2498         }
2499         
2500         /* simple method to see if we have samples */
2501         if(root.box.xmin != (float)shb->size) {
2502                 /* now create a regular split, root.box has the initial bounding box of all pixels */
2503                 /* split bsp 8 levels deep, in regular grid (16 x 16) */
2504                 isb_bsp_split_init(&root, memarena, 8);
2505                 
2506                 /* insert all samples in BSP now */
2507                 bsp_err= isb_add_samples_transp(pa, &root, memarena, samplebuf);
2508                 
2509                 if(bsp_err==0) {
2510                         ISBShadfacA **isbsa;
2511                         
2512                         /* go over all faces and fill in shadow values */
2513                         isb_bsp_fillfaces(&R, lar, &root);      /* shb->persmat should have been calculated */
2514                         
2515                         /* copy shadow samples to persistant buffer, reduce memory overhead */
2516                         isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
2517                         
2518                         isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA), "isb arena");
2519                         
2520                         for(ap= apixbuf, x=pa->rectx*pa->recty; x>0; x--, ap++, isbsa++) {
2521                                         
2522                                 if(ap->p[0]) {
2523                                         APixstr *apn;
2524                                         for(apn=ap; apn; apn= apn->next) {
2525                                                 int a;
2526                                                 for(a=0; a<4; a++) {
2527                                                         if(apn->p[a] && apn->shadfac[a]) {
2528                                                                 if(R.osa)
2529                                                                         isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], count_mask(apn->mask[a]));
2530                                                                 else
2531                                                                         isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], 0);
2532                                                         }
2533                                                 }
2534                                         }
2535                                 }
2536                         }
2537                 }
2538         }
2539
2540         /* free BSP */
2541         BLI_memarena_free(memarena);
2542
2543         /* free samples */
2544         for(x=0; x<(R.osa?R.osa:1); x++)
2545                 MEM_freeN(samplebuf[x]);
2546
2547         if(bsp_err) printf("error in filling bsp\n");
2548 }
2549
2550
2551
2552 /* exported */
2553
2554 /* returns amount of light (1.0 = no shadow) */
2555 /* note, shadepixel() rounds the coordinate, not the real sample info */
2556 float ISB_getshadow(ShadeInput *shi, ShadBuf *shb)
2557 {
2558         /* if raytracing, we can't accept irregular shadow */
2559         if(shi->depth==0) {
2560                 ISBData *isbdata= shb->isb_result[shi->thread];
2561                 
2562                 if(isbdata) {
2563                         if(isbdata->shadfacs || isbdata->shadfaca) {
2564                                 int x= shi->xs - isbdata->minx;
2565                                 
2566                                 if(x >= 0 && x < isbdata->rectx) {
2567                                         int y= shi->ys - isbdata->miny;
2568                         
2569                                         if(y >= 0 && y < isbdata->recty) {
2570                                                 if(isbdata->shadfacs) {
2571                                                         short *sp= isbdata->shadfacs + y*isbdata->rectx + x;
2572                                                         return *sp>=4096?0.0f:1.0f - ((float)*sp)/4096.0f;
2573                                                 }
2574                                                 else {
2575                                                         int sindex= y*isbdata->rectx + x;
2576                                                         int obi= shi->obi - R.objectinstance;
2577                                                         ISBShadfacA *isbsa= *(isbdata->shadfaca + sindex);
2578                                                         
2579                                                         while(isbsa) {
2580                                                                 if(isbsa->facenr==shi->facenr+1 && isbsa->obi==obi)
2581                                                                         return isbsa->shadfac>=1.0f?0.0f:1.0f - isbsa->shadfac;
2582                                                                 isbsa= isbsa->next;
2583                                                         }
2584                                                 }
2585                                         }
2586                                 }
2587                         }
2588                 }
2589         }
2590         return 1.0f;
2591 }
2592
2593 /* part is supposed to be solid zbuffered (apixbuf==NULL) or transparent zbuffered */
2594 void ISB_create(RenderPart *pa, APixstr *apixbuf)
2595 {
2596         GroupObject *go;
2597         
2598         /* go over all lamps, and make the irregular buffers */
2599         for(go=R.lights.first; go; go= go->next) {
2600                 LampRen *lar= go->lampren;
2601                 
2602                 if(lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
2603                         
2604                         /* create storage for shadow, per thread */
2605                         lar->shb->isb_result[pa->thread]= MEM_callocN(sizeof(ISBData), "isb data");
2606                         
2607                         if(apixbuf)
2608                                 isb_make_buffer_transp(pa, apixbuf, lar);
2609                         else
2610                                 isb_make_buffer(pa, lar);
2611                 }
2612         }
2613 }
2614
2615
2616 /* end of part rendering, free stored shadow data for this thread from all lamps */
2617 void ISB_free(RenderPart *pa)
2618 {
2619         GroupObject *go;
2620         
2621         /* go over all lamps, and free the irregular buffers */
2622         for(go=R.lights.first; go; go= go->next) {
2623                 LampRen *lar= go->lampren;
2624                 
2625                 if(lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
2626                         ISBData *isbdata= lar->shb->isb_result[pa->thread];
2627
2628                         if(isbdata) {
2629                                 if(isbdata->shadfacs)
2630                                         MEM_freeN(isbdata->shadfacs);
2631                                 if(isbdata->shadfaca)
2632                                         MEM_freeN(isbdata->shadfaca);
2633                                 
2634                                 if(isbdata->memarena)
2635                                         BLI_memarena_free(isbdata->memarena);
2636                                 
2637                                 MEM_freeN(isbdata);
2638                                 lar->shb->isb_result[pa->thread]= NULL;
2639                         }
2640                 }
2641         }
2642 }
2643