code cleanup: use const float and define array size
[blender.git] / source / blender / render / intern / source / shadbuf.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
19  * All rights reserved.
20  *
21  * Contributor(s): 2004-2006, Blender Foundation
22  *
23  * ***** END GPL LICENSE BLOCK *****
24  */
25
26 /** \file blender/render/intern/source/shadbuf.c
27  *  \ingroup render
28  */
29
30
31 #include <math.h>
32 #include <string.h>
33
34
35 #include "MEM_guardedalloc.h"
36
37 #include "DNA_group_types.h"
38 #include "DNA_lamp_types.h"
39 #include "DNA_material_types.h"
40
41 #include "BKE_global.h"
42 #include "BKE_scene.h"
43
44
45 #include "BLI_math.h"
46 #include "BLI_blenlib.h"
47 #include "BLI_jitter.h"
48 #include "BLI_memarena.h"
49 #include "BLI_rand.h"
50 #include "BLI_utildefines.h"
51
52 #include "PIL_time.h"
53
54 #include "renderpipeline.h"
55 #include "render_types.h"
56 #include "renderdatabase.h"
57 #include "rendercore.h"
58 #include "shadbuf.h"
59 #include "shading.h"
60 #include "zbuf.h"
61
62 /* XXX, could be better implemented... this is for endian issues */
63 #ifdef __BIG_ENDIAN__
64 #  define RCOMP 3
65 #  define GCOMP 2
66 #  define BCOMP 1
67 #  define ACOMP 0
68 #else
69 #  define RCOMP 0
70 #  define GCOMP 1
71 #  define BCOMP 2
72 #  define ACOMP 3
73 #endif
74
75 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
76 /* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
77 /* only to be used here in this file, it's for speed */
78 extern struct Render R;
79 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
80
81 /* ------------------------------------------------------------------------- */
82
83 /* initshadowbuf() in convertBlenderScene.c */
84
85 /* ------------------------------------------------------------------------- */
86
87 static void copy_to_ztile(int *rectz, int size, int x1, int y1, int tile, char *r1)
88 {
89         int len4, *rz;  
90         int x2, y2;
91         
92         x2= x1+tile;
93         y2= y1+tile;
94         if (x2>=size) x2= size-1;
95         if (y2>=size) y2= size-1;
96
97         if (x1>=x2 || y1>=y2) return;
98
99         len4= 4*(x2- x1);
100         rz= rectz + size*y1 + x1;
101         for (; y1<y2; y1++) {
102                 memcpy(r1, rz, len4);
103                 rz+= size;
104                 r1+= len4;
105         }
106 }
107
108 #if 0
109 static int sizeoflampbuf(ShadBuf *shb)
110 {
111         int num, count=0;
112         char *cp;
113         
114         cp= shb->cbuf;
115         num= (shb->size*shb->size)/256;
116
117         while (num--) count+= *(cp++);
118         
119         return 256*count;
120 }
121 #endif
122
123 /* not threadsafe... */
124 static float *give_jitter_tab(int samp)
125 {
126         /* these are all possible jitter tables, takes up some
127          * 12k, not really bad!
128          * For soft shadows, it saves memory and render time
129          */
130         static int tab[17]={1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256};
131         static float jit[1496][2];
132         static char ctab[17]= {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
133         int a, offset=0;
134         
135         if (samp<2) samp= 2;
136         else if (samp>16) samp= 16;
137
138         for (a=0; a<samp-1; a++) offset+= tab[a];
139
140         if (ctab[samp]==0) {
141                 ctab[samp]= 1;
142                 BLI_jitter_init(jit[offset], samp*samp);
143         }
144                 
145         return jit[offset];
146         
147 }
148
149 static void make_jitter_weight_tab(Render *re, ShadBuf *shb, short filtertype) 
150 {
151         float *jit, totw= 0.0f;
152         int samp= get_render_shadow_samples(&re->r, shb->samp);
153         int a, tot=samp*samp;
154         
155         shb->weight= MEM_mallocN(sizeof(float)*tot, "weight tab lamp");
156         
157         for (jit= shb->jit, a=0; a<tot; a++, jit+=2) {
158                 if (filtertype==LA_SHADBUF_TENT)
159                         shb->weight[a]= 0.71f - sqrt(jit[0]*jit[0] + jit[1]*jit[1]);
160                 else if (filtertype==LA_SHADBUF_GAUSS)
161                         shb->weight[a]= RE_filter_value(R_FILTER_GAUSS, 1.8f*sqrt(jit[0]*jit[0] + jit[1]*jit[1]));
162                 else
163                         shb->weight[a]= 1.0f;
164                 
165                 totw+= shb->weight[a];
166         }
167         
168         totw= 1.0f/totw;
169         for (a=0; a<tot; a++) {
170                 shb->weight[a]*= totw;
171         }
172 }
173
174 static int verg_deepsample(const void *poin1, const void *poin2)
175 {
176         const DeepSample *ds1= (const DeepSample*)poin1;
177         const DeepSample *ds2= (const DeepSample*)poin2;
178
179         if (ds1->z < ds2->z) return -1;
180         else if (ds1->z == ds2->z) return 0;
181         else return 1;
182 }
183
184 static int compress_deepsamples(DeepSample *dsample, int tot, float epsilon)
185 {
186         /* uses doubles to avoid overflows and other numerical issues,
187          * could be improved */
188         DeepSample *ds, *newds;
189         float v;
190         double slope, slopemin, slopemax, min, max, div, newmin, newmax;
191         int a, first, z, newtot= 0;
192
193 #if 0
194         if (print) {
195                 for (a=0, ds=dsample; a<tot; a++, ds++)
196                         printf("%lf, %f ", ds->z/(double)0x7FFFFFFF, ds->v);
197                 printf("\n");
198         }
199 #endif
200
201         /* read from and write into same array */
202         ds= dsample;
203         newds= dsample;
204         a= 0;
205
206         /* as long as we are not at the end of the array */
207         for (a++, ds++; a<tot; a++, ds++) {
208                 slopemin= 0.0f;
209                 slopemax= 0.0f;
210                 first= 1;
211
212                 for (; a<tot; a++, ds++) {
213                         //dz= ds->z - newds->z;
214                         if (ds->z == newds->z) {
215                                 /* still in same z position, simply check
216                                  * visibility difference against epsilon */
217                                 if (!(fabs(newds->v - ds->v) <= epsilon)) {
218                                         break;
219                                 }
220                         }
221                         else {
222                                 /* compute slopes */
223                                 div= (double)0x7FFFFFFF/((double)ds->z - (double)newds->z);
224                                 min= ((ds->v - epsilon) - newds->v)*div;
225                                 max= ((ds->v + epsilon) - newds->v)*div;
226
227                                 /* adapt existing slopes */
228                                 if (first) {
229                                         newmin= min;
230                                         newmax= max;
231                                         first= 0;
232                                 }
233                                 else {
234                                         newmin= MAX2(slopemin, min);
235                                         newmax= MIN2(slopemax, max);
236
237                                         /* verify if there is still space between the slopes */
238                                         if (newmin > newmax) {
239                                                 ds--;
240                                                 a--;
241                                                 break;
242                                         }
243                                 }
244
245                                 slopemin= newmin;
246                                 slopemax= newmax;
247                         }
248                 }
249
250                 if (a == tot) {
251                         ds--;
252                         a--;
253                 }
254
255                 /* always previous z */
256                 z= ds->z;
257
258                 if (first || a==tot-1) {
259                         /* if slopes were not initialized, use last visibility */
260                         v= ds->v;
261                 }
262                 else {
263                         /* compute visibility at center between slopes at z */
264                         slope= (slopemin+slopemax)*0.5f;
265                         v= newds->v + slope*((z - newds->z)/(double)0x7FFFFFFF);
266                 }
267
268                 newds++;
269                 newtot++;
270
271                 newds->z= z;
272                 newds->v= v;
273         }
274
275         if (newtot == 0 || (newds->v != (newds-1)->v))
276                 newtot++;
277
278 #if 0
279         if (print) {
280                 for (a=0, ds=dsample; a<newtot; a++, ds++)
281                         printf("%lf, %f ", ds->z/(double)0x7FFFFFFF, ds->v);
282                 printf("\n");
283         }
284 #endif
285
286         return newtot;
287 }
288
289 static float deep_alpha(Render *re, int obinr, int facenr, int strand)
290 {
291         ObjectInstanceRen *obi= &re->objectinstance[obinr];
292         Material *ma;
293
294         if (strand) {
295                 StrandRen *strand= RE_findOrAddStrand(obi->obr, facenr-1);
296                 ma= strand->buffer->ma;
297         }
298         else {
299                 VlakRen *vlr= RE_findOrAddVlak(obi->obr, (facenr-1) & RE_QUAD_MASK);
300                 ma= vlr->mat;
301         }
302
303         return ma->shad_alpha;
304 }
305
306 static void compress_deepshadowbuf(Render *re, ShadBuf *shb, APixstr *apixbuf, APixstrand *apixbufstrand)
307 {
308         ShadSampleBuf *shsample;
309         DeepSample *ds[RE_MAX_OSA], *sampleds[RE_MAX_OSA], *dsb, *newbuf;
310         APixstr *ap, *apn;
311         APixstrand *aps, *apns;
312         float visibility;
313
314         const int totbuf= shb->totbuf;
315         const float totbuf_f= (float)shb->totbuf;
316         const float totbuf_f_inv= 1.0f/totbuf_f;
317         const int size= shb->size;
318
319         int a, b, c, tot, minz, found, prevtot, newtot;
320         int sampletot[RE_MAX_OSA], totsample = 0, totsamplec = 0;
321         
322         shsample= MEM_callocN(sizeof(ShadSampleBuf), "shad sample buf");
323         BLI_addtail(&shb->buffers, shsample);
324
325         shsample->totbuf= MEM_callocN(sizeof(int)*size*size, "deeptotbuf");
326         shsample->deepbuf= MEM_callocN(sizeof(DeepSample*)*size*size, "deepbuf");
327
328         ap= apixbuf;
329         aps= apixbufstrand;
330         for (a=0; a<size*size; a++, ap++, aps++) {
331                 /* count number of samples */
332                 for (c=0; c<totbuf; c++)
333                         sampletot[c]= 0;
334
335                 tot= 0;
336                 for (apn=ap; apn; apn=apn->next)
337                         for (b=0; b<4; b++)
338                                 if (apn->p[b])
339                                         for (c=0; c<totbuf; c++)
340                                                 if (apn->mask[b] & (1<<c))
341                                                         sampletot[c]++;
342
343                 if (apixbufstrand) {
344                         for (apns=aps; apns; apns=apns->next)
345                                 for (b=0; b<4; b++)
346                                         if (apns->p[b])
347                                                 for (c=0; c<totbuf; c++)
348                                                         if (apns->mask[b] & (1<<c))
349                                                                 sampletot[c]++;
350                 }
351
352                 for (c=0; c<totbuf; c++)
353                         tot += sampletot[c];
354
355                 if (tot == 0) {
356                         shsample->deepbuf[a]= NULL;
357                         shsample->totbuf[a]= 0;
358                         continue;
359                 }
360
361                 /* fill samples */
362                 ds[0]= sampleds[0]= MEM_callocN(sizeof(DeepSample)*tot*2, "deepsample");
363                 for (c=1; c<totbuf; c++)
364                         ds[c]= sampleds[c]= sampleds[c-1] + sampletot[c-1]*2;
365
366                 for (apn=ap; apn; apn=apn->next) {
367                         for (b=0; b<4; b++) {
368                                 if (apn->p[b]) {
369                                         for (c=0; c<totbuf; c++) {
370                                                 if (apn->mask[b] & (1<<c)) {
371                                                         /* two entries to create step profile */
372                                                         ds[c]->z= apn->z[b];
373                                                         ds[c]->v= 1.0f; /* not used */
374                                                         ds[c]++;
375                                                         ds[c]->z= apn->z[b];
376                                                         ds[c]->v= deep_alpha(re, apn->obi[b], apn->p[b], 0);
377                                                         ds[c]++;
378                                                 }
379                                         }
380                                 }
381                         }
382                 }
383
384                 if (apixbufstrand) {
385                         for (apns=aps; apns; apns=apns->next) {
386                                 for (b=0; b<4; b++) {
387                                         if (apns->p[b]) {
388                                                 for (c=0; c<totbuf; c++) {
389                                                         if (apns->mask[b] & (1<<c)) {
390                                                                 /* two entries to create step profile */
391                                                                 ds[c]->z= apns->z[b];
392                                                                 ds[c]->v= 1.0f; /* not used */
393                                                                 ds[c]++;
394                                                                 ds[c]->z= apns->z[b];
395                                                                 ds[c]->v= deep_alpha(re, apns->obi[b], apns->p[b], 1);
396                                                                 ds[c]++;
397                                                         }
398                                                 }
399                                         }
400                                 }
401                         }
402                 }
403
404                 for (c=0; c<totbuf; c++) {
405                         /* sort by increasing z */
406                         qsort(sampleds[c], sampletot[c], sizeof(DeepSample)*2, verg_deepsample);
407
408                         /* sum visibility, replacing alpha values */
409                         visibility= 1.0f;
410                         ds[c]= sampleds[c];
411
412                         for (b=0; b<sampletot[c]; b++) {
413                                 /* two entries creating step profile */
414                                 ds[c]->v= visibility;
415                                 ds[c]++;
416
417                                 visibility *= 1.0f-ds[c]->v;
418                                 ds[c]->v= visibility;
419                                 ds[c]++;
420                         }
421
422                         /* halfway trick, probably won't work well for volumes? */
423                         ds[c]= sampleds[c];
424                         for (b=0; b<sampletot[c]; b++) {
425                                 if (b+1 < sampletot[c]) {
426                                         ds[c]->z= (ds[c]->z>>1) + ((ds[c]+2)->z>>1);
427                                         ds[c]++;
428                                         ds[c]->z= (ds[c]->z>>1) + ((ds[c]+2)->z>>1);
429                                         ds[c]++;
430                                 }
431                                 else {
432                                         ds[c]->z= (ds[c]->z>>1) + (0x7FFFFFFF>>1);
433                                         ds[c]++;
434                                         ds[c]->z= (ds[c]->z>>1) + (0x7FFFFFFF>>1);
435                                         ds[c]++;
436                                 }
437                         }
438
439                         /* init for merge loop */
440                         ds[c]= sampleds[c];
441                         sampletot[c] *= 2;
442                 }
443
444                 shsample->deepbuf[a]= MEM_callocN(sizeof(DeepSample)*tot*2, "deepsample");
445                 shsample->totbuf[a]= 0;
446
447                 /* merge buffers */
448                 dsb= shsample->deepbuf[a];
449                 while (1) {
450                         minz= 0;
451                         found= 0;
452
453                         for (c=0; c<totbuf; c++) {
454                                 if (sampletot[c] && (!found || ds[c]->z < minz)) {
455                                         minz= ds[c]->z;
456                                         found= 1;
457                                 }
458                         }
459
460                         if (!found)
461                                 break;
462
463                         dsb->z= minz;
464                         dsb->v= 0.0f;
465
466                         visibility= 0.0f;
467                         for (c=0; c<totbuf; c++) {
468                                 if (sampletot[c] && ds[c]->z == minz) {
469                                         ds[c]++;
470                                         sampletot[c]--;
471                                 }
472
473                                 if (sampleds[c] == ds[c])
474                                         visibility += totbuf_f_inv;
475                                 else
476                                         visibility += (ds[c]-1)->v / totbuf_f;
477                         }
478
479                         dsb->v= visibility;
480                         dsb++;
481                         shsample->totbuf[a]++;
482                 }
483
484                 prevtot= shsample->totbuf[a];
485                 totsample += prevtot;
486
487                 newtot= compress_deepsamples(shsample->deepbuf[a], prevtot, shb->compressthresh);
488                 shsample->totbuf[a]= newtot;
489                 totsamplec += newtot;
490
491                 if (newtot < prevtot) {
492                         newbuf= MEM_mallocN(sizeof(DeepSample)*newtot, "cdeepsample");
493                         memcpy(newbuf, shsample->deepbuf[a], sizeof(DeepSample)*newtot);
494                         MEM_freeN(shsample->deepbuf[a]);
495                         shsample->deepbuf[a]= newbuf;
496                 }
497
498                 MEM_freeN(sampleds[0]);
499         }
500
501         //printf("%d -> %d, ratio %f\n", totsample, totsamplec, (float)totsamplec/(float)totsample);
502 }
503
504 /* create Z tiles (for compression): this system is 24 bits!!! */
505 static void compress_shadowbuf(ShadBuf *shb, int *rectz, int square)
506 {
507         ShadSampleBuf *shsample;
508         float dist;
509         uintptr_t *ztile;
510         int *rz, *rz1, verg, verg1, size= shb->size;
511         int a, x, y, minx, miny, byt1, byt2;
512         char *rc, *rcline, *ctile, *zt;
513         
514         shsample= MEM_callocN(sizeof(ShadSampleBuf), "shad sample buf");
515         BLI_addtail(&shb->buffers, shsample);
516         
517         shsample->zbuf= MEM_mallocN(sizeof(uintptr_t)*(size*size)/256, "initshadbuf2");
518         shsample->cbuf= MEM_callocN((size*size)/256, "initshadbuf3");
519         
520         ztile= (uintptr_t *)shsample->zbuf;
521         ctile= shsample->cbuf;
522         
523         /* help buffer */
524         rcline= MEM_mallocN(256*4+sizeof(int), "makeshadbuf2");
525         
526         for (y=0; y<size; y+=16) {
527                 if (y< size/2) miny= y+15-size/2;
528                 else miny= y-size/2;    
529                 
530                 for (x=0; x<size; x+=16) {
531                         
532                         /* is tile within spotbundle? */
533                         a= size/2;
534                         if (x< a) minx= x+15-a;
535                         else minx= x-a; 
536                         
537                         dist= sqrt( (float)(minx*minx+miny*miny) );
538                         
539                         if (square==0 && dist>(float)(a+12)) {  /* 12, tested with a onlyshadow lamp */
540                                 a= 256; verg= 0; /* 0x80000000; */ /* 0x7FFFFFFF; */
541                                 rz1= (&verg)+1;
542                         } 
543                         else {
544                                 copy_to_ztile(rectz, size, x, y, 16, rcline);
545                                 rz1= (int *)rcline;
546                                 
547                                 verg= (*rz1 & 0xFFFFFF00);
548                                 
549                                 for (a=0;a<256;a++, rz1++) {
550                                         if ( (*rz1 & 0xFFFFFF00) !=verg) break;
551                                 }
552                         }
553                         if (a==256) { /* complete empty tile */
554                                 *ctile= 0;
555                                 *ztile= *(rz1-1);
556                         }
557                         else {
558                                 
559                                 /* ACOMP etc. are defined to work L/B endian */
560                                 
561                                 rc= rcline;
562                                 rz1= (int *)rcline;
563                                 verg=  rc[ACOMP];
564                                 verg1= rc[BCOMP];
565                                 rc+= 4;
566                                 byt1= 1; byt2= 1;
567                                 for (a=1;a<256;a++, rc+=4) {
568                                         byt1 &= (verg==rc[ACOMP]);
569                                         byt2 &= (verg1==rc[BCOMP]);
570                                         
571                                         if (byt1==0) break;
572                                 }
573                                 if (byt1 && byt2) {     /* only store byte */
574                                         *ctile= 1;
575                                         *ztile= (uintptr_t)MEM_mallocN(256+4, "tile1");
576                                         rz= (int *)*ztile;
577                                         *rz= *rz1;
578                                         
579                                         zt= (char *)(rz+1);
580                                         rc= rcline;
581                                         for (a=0; a<256; a++, zt++, rc+=4) *zt= rc[GCOMP];
582                                 }
583                                 else if (byt1) {                /* only store short */
584                                         *ctile= 2;
585                                         *ztile= (uintptr_t)MEM_mallocN(2*256+4, "Tile2");
586                                         rz= (int *)*ztile;
587                                         *rz= *rz1;
588                                         
589                                         zt= (char *)(rz+1);
590                                         rc= rcline;
591                                         for (a=0; a<256; a++, zt+=2, rc+=4) {
592                                                 zt[0]= rc[BCOMP];
593                                                 zt[1]= rc[GCOMP];
594                                         }
595                                 }
596                                 else {                  /* store triple */
597                                         *ctile= 3;
598                                         *ztile= (uintptr_t)MEM_mallocN(3*256, "Tile3");
599
600                                         zt= (char *)*ztile;
601                                         rc= rcline;
602                                         for (a=0; a<256; a++, zt+=3, rc+=4) {
603                                                 zt[0]= rc[ACOMP];
604                                                 zt[1]= rc[BCOMP];
605                                                 zt[2]= rc[GCOMP];
606                                         }
607                                 }
608                         }
609                         ztile++;
610                         ctile++;
611                 }
612         }
613
614         MEM_freeN(rcline);
615 }
616
617 /* sets start/end clipping. lar->shb should be initialized */
618 static void shadowbuf_autoclip(Render *re, LampRen *lar)
619 {
620         ObjectInstanceRen *obi;
621         ObjectRen *obr;
622         VlakRen *vlr= NULL;
623         VertRen *ver= NULL;
624         Material *ma= NULL;
625         float minz, maxz, vec[3], viewmat[4][4], obviewmat[4][4];
626         unsigned int lay = -1;
627         int i, a, maxtotvert, ok= 1;
628         char *clipflag;
629         
630         minz= 1.0e30f; maxz= -1.0e30f;
631         copy_m4_m4(viewmat, lar->shb->viewmat);
632         
633         if (lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay;
634
635         maxtotvert= 0;
636         for (obr=re->objecttable.first; obr; obr=obr->next)
637                 maxtotvert= MAX2(obr->totvert, maxtotvert);
638
639         clipflag= MEM_callocN(sizeof(char)*maxtotvert, "autoclipflag");
640
641         /* set clip in vertices when face visible */
642         for (i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
643                 obr= obi->obr;
644
645                 if (obi->flag & R_TRANSFORMED)
646                         mult_m4_m4m4(obviewmat, viewmat, obi->mat);
647                 else
648                         copy_m4_m4(obviewmat, viewmat);
649
650                 memset(clipflag, 0, sizeof(char)*obr->totvert);
651
652                 /* clear clip, is being set if face is visible (clip is calculated for real later) */
653                 for (a=0; a<obr->totvlak; a++) {
654                         if ((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
655                         else vlr++;
656                         
657                         /* note; these conditions are copied from zbuffer_shadow() */
658                         if (vlr->mat!= ma) {
659                                 ma= vlr->mat;
660                                 ok= 1;
661                                 if ((ma->mode & MA_SHADBUF)==0) ok= 0;
662                         }
663                         
664                         if (ok && (obi->lay & lay)) {
665                                 clipflag[vlr->v1->index]= 1;
666                                 clipflag[vlr->v2->index]= 1;
667                                 clipflag[vlr->v3->index]= 1;
668                                 if (vlr->v4) clipflag[vlr->v4->index]= 1;
669                         }                               
670                 }               
671                 
672                 /* calculate min and max */
673                 for (a=0; a< obr->totvert;a++) {
674                         if ((a & 255)==0) ver= RE_findOrAddVert(obr, a);
675                         else ver++;
676                         
677                         if (clipflag[a]) {
678                                 copy_v3_v3(vec, ver->co);
679                                 mul_m4_v3(obviewmat, vec);
680                                 /* Z on visible side of lamp space */
681                                 if (vec[2] < 0.0f) {
682                                         float inpr, z= -vec[2];
683                                         
684                                         /* since vec is rotated in lampspace, this is how to get the cosine of angle */
685                                         /* precision is set 20% larger */
686                                         vec[2]*= 1.2f;
687                                         normalize_v3(vec);
688                                         inpr= - vec[2];
689
690                                         if (inpr>=lar->spotsi) {
691                                                 if (z<minz) minz= z;
692                                                 if (z>maxz) maxz= z;
693                                         }
694                                 }
695                         }
696                 }
697         }
698
699         MEM_freeN(clipflag);
700         
701         /* set clipping min and max */
702         if (minz < maxz) {
703                 float delta= (maxz - minz);     /* threshold to prevent precision issues */
704                 
705                 //printf("minz %f maxz %f delta %f\n", minz, maxz, delta);
706                 if (lar->bufflag & LA_SHADBUF_AUTO_START)
707                         lar->shb->d= minz - delta*0.02f;        /* 0.02 is arbitrary... needs more thinking! */
708                 if (lar->bufflag & LA_SHADBUF_AUTO_END)
709                         lar->shb->clipend= maxz + delta*0.1f;
710                 
711                 /* bias was calculated as percentage, we scale it to prevent animation issues */
712                 delta= (lar->clipend-lar->clipsta)/(lar->shb->clipend-lar->shb->d);
713                 //printf("bias delta %f\n", delta);
714                 lar->shb->bias= (int) (delta*(float)lar->shb->bias);
715         }
716 }
717
718 static void makeflatshadowbuf(Render *re, LampRen *lar, float *jitbuf)
719 {
720         ShadBuf *shb= lar->shb;
721         int *rectz, samples;
722
723         /* zbuffering */
724         rectz= MEM_mapallocN(sizeof(int)*shb->size*shb->size, "makeshadbuf");
725         
726         for (samples=0; samples<shb->totbuf; samples++) {
727                 zbuffer_shadow(re, shb->persmat, lar, rectz, shb->size, jitbuf[2*samples], jitbuf[2*samples+1]);
728                 /* create Z tiles (for compression): this system is 24 bits!!! */
729                 compress_shadowbuf(shb, rectz, lar->mode & LA_SQUARE);
730
731                 if (re->test_break(re->tbh))
732                         break;
733         }
734         
735         MEM_freeN(rectz);
736 }
737
738 static void makedeepshadowbuf(Render *re, LampRen *lar, float *jitbuf)
739 {
740         ShadBuf *shb= lar->shb;
741         APixstr *apixbuf;
742         APixstrand *apixbufstrand= NULL;
743         ListBase apsmbase= {NULL, NULL};
744
745         /* zbuffering */
746         apixbuf= MEM_callocN(sizeof(APixstr)*shb->size*shb->size, "APixbuf");
747         if (re->totstrand)
748                 apixbufstrand= MEM_callocN(sizeof(APixstrand)*shb->size*shb->size, "APixbufstrand");
749
750         zbuffer_abuf_shadow(re, lar, shb->persmat, apixbuf, apixbufstrand, &apsmbase, shb->size,
751                 shb->totbuf, (float(*)[2])jitbuf);
752
753         /* create Z tiles (for compression): this system is 24 bits!!! */
754         compress_deepshadowbuf(re, shb, apixbuf, apixbufstrand);
755         
756         MEM_freeN(apixbuf);
757         if (apixbufstrand)
758                 MEM_freeN(apixbufstrand);
759         freepsA(&apsmbase);
760 }
761
762 void makeshadowbuf(Render *re, LampRen *lar)
763 {
764         ShadBuf *shb= lar->shb;
765         float wsize, *jitbuf, twozero[2]= {0.0f, 0.0f}, angle, temp;
766         
767         if (lar->bufflag & (LA_SHADBUF_AUTO_START|LA_SHADBUF_AUTO_END))
768                 shadowbuf_autoclip(re, lar);
769         
770         /* just to enforce identical behavior of all irregular buffers */
771         if (lar->buftype==LA_SHADBUF_IRREGULAR)
772                 shb->size= 1024;
773         
774         /* matrices and window: in winmat the transformation is being put,
775          * transforming from observer view to lamp view, including lamp window matrix */
776         
777         angle= saacos(lar->spotsi);
778         temp= 0.5f*shb->size*cos(angle)/sin(angle);
779         shb->pixsize= (shb->d)/temp;
780         wsize= shb->pixsize*(shb->size/2.0f);
781         
782         perspective_m4(shb->winmat, -wsize, wsize, -wsize, wsize, shb->d, shb->clipend);
783         mult_m4_m4m4(shb->persmat, shb->winmat, shb->viewmat);
784
785         if (ELEM3(lar->buftype, LA_SHADBUF_REGULAR, LA_SHADBUF_HALFWAY, LA_SHADBUF_DEEP)) {
786                 shb->totbuf= lar->buffers;
787
788                 /* jitter, weights - not threadsafe! */
789                 BLI_lock_thread(LOCK_CUSTOM1);
790                 shb->jit= give_jitter_tab(get_render_shadow_samples(&re->r, shb->samp));
791                 make_jitter_weight_tab(re, shb, lar->filtertype);
792                 BLI_unlock_thread(LOCK_CUSTOM1);
793                 
794                 if (shb->totbuf==4) jitbuf= give_jitter_tab(2);
795                 else if (shb->totbuf==9) jitbuf= give_jitter_tab(3);
796                 else jitbuf= twozero;
797                 
798                 /* zbuffering */
799                 if (lar->buftype == LA_SHADBUF_DEEP) {
800                         makedeepshadowbuf(re, lar, jitbuf);
801                         shb->totbuf= 1;
802                 }
803                 else
804                         makeflatshadowbuf(re, lar, jitbuf);
805
806                 /* printf("lampbuf %d\n", sizeoflampbuf(shb)); */
807         }
808 }
809
810 static void *do_shadow_thread(void *re_v)
811 {
812         Render *re= (Render*)re_v;
813         LampRen *lar;
814
815         do {
816                 BLI_lock_thread(LOCK_CUSTOM1);
817                 for (lar=re->lampren.first; lar; lar=lar->next) {
818                         if (lar->shb && !lar->thread_assigned) {
819                                 lar->thread_assigned= 1;
820                                 break;
821                         }
822                 }
823                 BLI_unlock_thread(LOCK_CUSTOM1);
824
825                 /* if type is irregular, this only sets the perspective matrix and autoclips */
826                 if (lar) {
827                         makeshadowbuf(re, lar);
828                         BLI_lock_thread(LOCK_CUSTOM1);
829                         lar->thread_ready= 1;
830                         BLI_unlock_thread(LOCK_CUSTOM1);
831                 }
832         } while (lar && !re->test_break(re->tbh));
833
834         return NULL;
835 }
836
837 static volatile int g_break= 0;
838 static int thread_break(void *UNUSED(arg))
839 {
840         return g_break;
841 }
842
843 void threaded_makeshadowbufs(Render *re)
844 {
845         ListBase threads;
846         LampRen *lar;
847         int a, totthread= 0;
848         int (*test_break)(void *);
849
850         /* count number of threads to use */
851         if (G.rendering) {
852                 for (lar=re->lampren.first; lar; lar= lar->next)
853                         if (lar->shb)
854                                 totthread++;
855                 
856                 totthread= MIN2(totthread, re->r.threads);
857         }
858         else
859                 totthread= 1; /* preview render */
860
861         if (totthread <= 1) {
862                 for (lar=re->lampren.first; lar; lar= lar->next) {
863                         if (re->test_break(re->tbh)) break;
864                         if (lar->shb) {
865                                 /* if type is irregular, this only sets the perspective matrix and autoclips */
866                                 makeshadowbuf(re, lar);
867                         }
868                 }
869         }
870         else {
871                 /* swap test break function */
872                 test_break= re->test_break;
873                 re->test_break= thread_break;
874
875                 for (lar=re->lampren.first; lar; lar= lar->next) {
876                         lar->thread_assigned= 0;
877                         lar->thread_ready= 0;
878                 }
879
880                 BLI_init_threads(&threads, do_shadow_thread, totthread);
881                 
882                 for (a=0; a<totthread; a++)
883                         BLI_insert_thread(&threads, re);
884
885                 /* keep rendering as long as there are shadow buffers not ready */
886                 do {
887                         if ((g_break=test_break(re->tbh)))
888                                 break;
889
890                         PIL_sleep_ms(50);
891
892                         BLI_lock_thread(LOCK_CUSTOM1);
893                         for (lar=re->lampren.first; lar; lar= lar->next)
894                                 if (lar->shb && !lar->thread_ready)
895                                         break;
896                         BLI_unlock_thread(LOCK_CUSTOM1);
897                 } while (lar);
898         
899                 BLI_end_threads(&threads);
900
901                 /* unset threadsafety */
902                 re->test_break= test_break;
903                 g_break= 0;
904         }
905 }
906
907 void freeshadowbuf(LampRen *lar)
908 {
909         if (lar->shb) {
910                 ShadBuf *shb= lar->shb;
911                 ShadSampleBuf *shsample;
912                 int b, v;
913                 
914                 for (shsample= shb->buffers.first; shsample; shsample= shsample->next) {
915                         if (shsample->deepbuf) {
916                                 v= shb->size*shb->size;
917                                 for (b=0; b<v; b++)
918                                         if (shsample->deepbuf[b])
919                                                 MEM_freeN(shsample->deepbuf[b]);
920                                         
921                                 MEM_freeN(shsample->deepbuf);
922                                 MEM_freeN(shsample->totbuf);
923                         }
924                         else {
925                                 intptr_t *ztile= shsample->zbuf;
926                                 char *ctile= shsample->cbuf;
927                                 
928                                 v= (shb->size*shb->size)/256;
929                                 for (b=0; b<v; b++, ztile++, ctile++)
930                                         if (*ctile) MEM_freeN((void *) *ztile);
931                                 
932                                 MEM_freeN(shsample->zbuf);
933                                 MEM_freeN(shsample->cbuf);
934                         }
935                 }
936                 BLI_freelistN(&shb->buffers);
937                 
938                 if (shb->weight) MEM_freeN(shb->weight);
939                 MEM_freeN(lar->shb);
940                 
941                 lar->shb= NULL;
942         }
943 }
944
945
946 static int firstreadshadbuf(ShadBuf *shb, ShadSampleBuf *shsample, int **rz, int xs, int ys, int nr)
947 {
948         /* return a 1 if fully compressed shadbuf-tile && z==const */
949         int ofs;
950         char *ct;
951
952         if (shsample->deepbuf)
953                 return 0;
954
955         /* always test borders of shadowbuffer */
956         if (xs<0) xs= 0; else if (xs>=shb->size) xs= shb->size-1;
957         if (ys<0) ys= 0; else if (ys>=shb->size) ys= shb->size-1;
958
959         /* calc z */
960         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
961         ct= shsample->cbuf+ofs;
962         if (*ct==0) {
963                 if (nr==0) {
964                         *rz= *( (int **)(shsample->zbuf+ofs) );
965                         return 1;
966                 }
967                 else if (*rz!= *( (int **)(shsample->zbuf+ofs) )) return 0;
968                 
969                 return 1;
970         }
971         
972         return 0;
973 }
974
975 static float readdeepvisibility(DeepSample *dsample, int tot, int z, int bias, float *biast)
976 {
977         DeepSample *ds, *prevds;
978         float t;
979         int a;
980
981         /* tricky stuff here; we use ints which can overflow easily with bias values */
982
983         ds= dsample;
984         for (a=0; a<tot && (z-bias > ds->z); a++, ds++) {}
985
986         if (a == tot) {
987                 if (biast)
988                         *biast= 0.0f;
989                 return (ds-1)->v; /* completely behind all samples */
990         }
991         
992         /* check if this read needs bias blending */
993         if (biast) {
994                 if (z > ds->z)
995                         *biast= (float)(z - ds->z)/(float)bias;
996                 else
997                         *biast= 0.0f;
998         }
999
1000         if (a == 0)
1001                 return 1.0f; /* completely in front of all samples */
1002
1003         /* converting to float early here because ds->z - prevds->z can overflow */
1004         prevds= ds-1;
1005         t= ((float)(z-bias) - (float)prevds->z)/((float)ds->z - (float)prevds->z);
1006         return t*ds->v + (1.0f-t)*prevds->v;
1007 }
1008
1009 static float readdeepshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)
1010 {
1011         float v, biasv, biast;
1012         int ofs, tot;
1013
1014         if (zs < - 0x7FFFFE00 + bias)
1015                 return 1.0;     /* extreme close to clipstart */
1016
1017         /* calc z */
1018         ofs= ys*shb->size + xs;
1019         tot= shsample->totbuf[ofs];
1020         if (tot == 0)
1021                 return 1.0f;
1022
1023         v= readdeepvisibility(shsample->deepbuf[ofs], tot, zs, bias, &biast);
1024
1025         if (biast != 0.0f) {
1026                 /* in soft bias area */
1027                 biasv= readdeepvisibility(shsample->deepbuf[ofs], tot, zs, 0, 0);
1028
1029                 biast= biast*biast;
1030                 return (1.0f-biast)*v + biast*biasv;
1031         }
1032
1033         return v;
1034 }
1035
1036 /* return 1.0 : fully in light */
1037 static float readshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)     
1038 {
1039         float temp;
1040         int *rz, ofs;
1041         int zsamp=0;
1042         char *ct, *cz;
1043
1044         /* simpleclip */
1045         /* if (xs<0 || ys<0) return 1.0; */
1046         /* if (xs>=shb->size || ys>=shb->size) return 1.0; */
1047         
1048         /* always test borders of shadowbuffer */
1049         if (xs<0) xs= 0; else if (xs>=shb->size) xs= shb->size-1;
1050         if (ys<0) ys= 0; else if (ys>=shb->size) ys= shb->size-1;
1051
1052         if (shsample->deepbuf)
1053                 return readdeepshadowbuf(shb, shsample, bias, xs, ys, zs);
1054
1055         /* calc z */
1056         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
1057         ct= shsample->cbuf+ofs;
1058         rz= *( (int **)(shsample->zbuf+ofs) );
1059
1060         if (*ct==3) {
1061                 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
1062                 cz= (char *)&zsamp;
1063                 cz[ACOMP]= ct[0];
1064                 cz[BCOMP]= ct[1];
1065                 cz[GCOMP]= ct[2];
1066         }
1067         else if (*ct==2) {
1068                 ct= ((char *)rz);
1069                 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
1070                 zsamp= *rz;
1071         
1072                 cz= (char *)&zsamp;
1073                 cz[BCOMP]= ct[0];
1074                 cz[GCOMP]= ct[1];
1075         }
1076         else if (*ct==1) {
1077                 ct= ((char *)rz);
1078                 ct+= 4+16*(ys & 15)+(xs & 15);
1079                 zsamp= *rz;
1080
1081                 cz= (char *)&zsamp;
1082                 cz[GCOMP]= ct[0];
1083
1084         }
1085         else {
1086                 /* got warning on this for 64 bits.... */
1087                 /* but it's working code! in this case rz is not a pointer but zvalue (ton) */
1088                 zsamp= GET_INT_FROM_POINTER(rz);
1089         }
1090
1091         /* tricky stuff here; we use ints which can overflow easily with bias values */
1092         
1093         if (zsamp > zs) return 1.0;             /* absolute no shadow */
1094         else if (zs < - 0x7FFFFE00 + bias) return 1.0;  /* extreme close to clipstart */
1095         else if (zsamp < zs-bias) return 0.0;   /* absolute in shadow */
1096         else {                                  /* soft area */
1097                 
1098                 temp=  ( (float)(zs- zsamp) )/(float)bias;
1099                 return 1.0f - temp*temp;
1100                         
1101         }
1102 }
1103
1104 static void shadowbuf_project_co(float *x, float *y, float *z, ShadBuf *shb, const float co[3])
1105 {
1106         float hco[4], size= 0.5f*(float)shb->size;
1107
1108         copy_v3_v3(hco, co);
1109         hco[3]= 1.0f;
1110
1111         mul_m4_v4(shb->persmat, hco);
1112
1113         *x= size*(1.0f+hco[0]/hco[3]);
1114         *y= size*(1.0f+hco[1]/hco[3]);
1115         if (z) *z= (hco[2]/hco[3]);
1116 }
1117
1118 /* the externally called shadow testing (reading) function */
1119 /* return 1.0: no shadow at all */
1120 float testshadowbuf(Render *re, ShadBuf *shb, const float co[3], const float dxco[3], const float dyco[3], float inp, float mat_bias)
1121 {
1122         ShadSampleBuf *shsample;
1123         float fac, dco[3], dx[3], dy[3], shadfac=0.0f;
1124         float xs1, ys1, zs1, *jit, *weight, xres, yres, biasf;
1125         int xs, ys, zs, bias, *rz;
1126         short a, num;
1127         
1128         /* crash preventer */
1129         if (shb->buffers.first==NULL)
1130                 return 1.0f;
1131         
1132         /* when facing away, assume fully in shadow */
1133         if (inp <= 0.0f)
1134                 return 0.0f;
1135
1136         /* project coordinate to pixel space */
1137         shadowbuf_project_co(&xs1, &ys1, &zs1, shb, co);
1138
1139         /* clip z coordinate, z is projected so that (-1.0, 1.0) matches
1140          * (clipstart, clipend), so we can do this simple test */
1141         if (zs1>=1.0f)
1142                 return 0.0f;
1143         else if (zs1<= -1.0f)
1144                 return 1.0f;
1145
1146         zs= ((float)0x7FFFFFFF)*zs1;
1147
1148         /* take num*num samples, increase area with fac */
1149         num= get_render_shadow_samples(&re->r, shb->samp);
1150         num= num*num;
1151         fac= shb->soft;
1152         
1153         /* compute z bias */
1154         if (mat_bias!=0.0f) biasf= shb->bias*mat_bias;
1155         else biasf= shb->bias;
1156         /* with inp==1.0, bias is half the size. correction value was 1.1, giving errors 
1157          * on cube edges, with one side being almost frontal lighted (ton)  */
1158         bias= (1.5f-inp*inp)*biasf;
1159         
1160         /* in case of no filtering we can do things simpler */
1161         if (num==1) {
1162                 for (shsample= shb->buffers.first; shsample; shsample= shsample->next)
1163                         shadfac += readshadowbuf(shb, shsample, bias, (int)xs1, (int)ys1, zs);
1164                 
1165                 return shadfac/(float)shb->totbuf;
1166         }
1167
1168         /* calculate filter size */
1169         add_v3_v3v3(dco, co, dxco);
1170         shadowbuf_project_co(&dx[0], &dx[1], NULL, shb, dco);
1171         dx[0]= xs1 - dx[0];
1172         dx[1]= ys1 - dx[1];
1173
1174         add_v3_v3v3(dco, co, dyco);
1175         shadowbuf_project_co(&dy[0], &dy[1], NULL, shb, dco);
1176         dy[0]= xs1 - dy[0];
1177         dy[1]= ys1 - dy[1];
1178         
1179         xres= fac*(fabs(dx[0]) + fabs(dy[0]));
1180         yres= fac*(fabs(dx[1]) + fabs(dy[1]));
1181         if (xres<1.0f) xres= 1.0f;
1182         if (yres<1.0f) yres= 1.0f;
1183         
1184         /* make xs1/xs1 corner of sample area */
1185         xs1 -= xres*0.5f;
1186         ys1 -= yres*0.5f;
1187
1188         /* in case we have a constant value in a tile, we can do quicker lookup */
1189         if (xres<16.0f && yres<16.0f) {
1190                 shsample= shb->buffers.first;
1191                 if (firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)ys1, 0)) {
1192                         if (firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)ys1, 1)) {
1193                                 if (firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)(ys1+yres), 1)) {
1194                                         if (firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)(ys1+yres), 1)) {
1195                                                 return readshadowbuf(shb, shsample, bias, (int)xs1, (int)ys1, zs);
1196                                         }
1197                                 }
1198                         }
1199                 }
1200         }
1201         
1202         /* full jittered shadow buffer lookup */
1203         for (shsample= shb->buffers.first; shsample; shsample= shsample->next) {
1204                 jit= shb->jit;
1205                 weight= shb->weight;
1206                 
1207                 for (a=num; a>0; a--, jit+=2, weight++) {
1208                         /* instead of jit i tried random: ugly! */
1209                         /* note: the plus 0.5 gives best sampling results, jit goes from -0.5 to 0.5 */
1210                         /* xs1 and ys1 are already corrected to be corner of sample area */
1211                         xs= xs1 + xres*(jit[0] + 0.5f);
1212                         ys= ys1 + yres*(jit[1] + 0.5f);
1213                         
1214                         shadfac+= *weight * readshadowbuf(shb, shsample, bias, xs, ys, zs);
1215                 }
1216         }
1217
1218         /* Renormalizes for the sample number: */
1219         return shadfac/(float)shb->totbuf;
1220 }
1221
1222 /* different function... sampling behind clipend can be LIGHT, bias is negative! */
1223 /* return: light */
1224 static float readshadowbuf_halo(ShadBuf *shb, ShadSampleBuf *shsample, int xs, int ys, int zs)
1225 {
1226         float temp;
1227         int *rz, ofs;
1228         int bias, zbias, zsamp;
1229         char *ct, *cz;
1230
1231         /* negative! The other side is more important */
1232         bias= -shb->bias;
1233         
1234         /* simpleclip */
1235         if (xs<0 || ys<0) return 0.0;
1236         if (xs>=shb->size || ys>=shb->size) return 0.0;
1237
1238         /* calc z */
1239         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
1240         ct= shsample->cbuf+ofs;
1241         rz= *( (int **)(shsample->zbuf+ofs) );
1242
1243         if (*ct==3) {
1244                 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
1245                 cz= (char *)&zsamp;
1246                 zsamp= 0;
1247                 cz[ACOMP]= ct[0];
1248                 cz[BCOMP]= ct[1];
1249                 cz[GCOMP]= ct[2];
1250         }
1251         else if (*ct==2) {
1252                 ct= ((char *)rz);
1253                 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
1254                 zsamp= *rz;
1255         
1256                 cz= (char *)&zsamp;
1257                 cz[BCOMP]= ct[0];
1258                 cz[GCOMP]= ct[1];
1259         }
1260         else if (*ct==1) {
1261                 ct= ((char *)rz);
1262                 ct+= 4+16*(ys & 15)+(xs & 15);
1263                 zsamp= *rz;
1264
1265                 cz= (char *)&zsamp;
1266                 cz[GCOMP]= ct[0];
1267
1268         }
1269         else {
1270                 /* same as before */
1271                 /* still working code! (ton) */
1272                 zsamp= GET_INT_FROM_POINTER(rz);
1273         }
1274
1275         /* NO schadow when sampled at 'eternal' distance */
1276
1277         if (zsamp >= 0x7FFFFE00) return 1.0;
1278
1279         if (zsamp > zs) return 1.0;             /* absolute no shadww */
1280         else {
1281                 /* bias is negative, so the (zs-bias) can be beyond 0x7fffffff */
1282                 zbias= 0x7fffffff - zs;
1283                 if (zbias > -bias) {
1284                         if ( zsamp < zs-bias) return 0.0;       /* absolute in shadow */
1285                 }
1286                 else return 0.0;        /* absolute shadow */
1287         }
1288
1289         /* soft area */
1290         
1291         temp=  ( (float)(zs- zsamp) )/(float)bias;
1292         return 1.0f - temp*temp;
1293 }
1294
1295
1296 float shadow_halo(LampRen *lar, const float p1[3], const float p2[3])
1297 {
1298         /* p1 p2 already are rotated in spot-space */
1299         ShadBuf *shb= lar->shb;
1300         ShadSampleBuf *shsample;
1301         float co[4], siz;
1302         float labda, labdao, labdax, labday, ldx, ldy;
1303         float zf, xf1, yf1, zf1, xf2, yf2, zf2;
1304         float count, lightcount;
1305         int x, y, z, xs1, ys1;
1306         int dx = 0, dy = 0;
1307         
1308         siz= 0.5f*(float)shb->size;
1309         
1310         co[0]= p1[0];
1311         co[1]= p1[1];
1312         co[2]= p1[2]/lar->sh_zfac;
1313         co[3]= 1.0;
1314         mul_m4_v4(shb->winmat, co);     /* rational hom co */
1315         xf1= siz*(1.0f+co[0]/co[3]);
1316         yf1= siz*(1.0f+co[1]/co[3]);
1317         zf1= (co[2]/co[3]);
1318
1319
1320         co[0]= p2[0];
1321         co[1]= p2[1];
1322         co[2]= p2[2]/lar->sh_zfac;
1323         co[3]= 1.0;
1324         mul_m4_v4(shb->winmat, co);     /* rational hom co */
1325         xf2= siz*(1.0f+co[0]/co[3]);
1326         yf2= siz*(1.0f+co[1]/co[3]);
1327         zf2= (co[2]/co[3]);
1328
1329         /* the 2dda (a pixel line formula) */
1330
1331         xs1= (int)xf1;
1332         ys1= (int)yf1;
1333
1334         if (xf1 != xf2) {
1335                 if (xf2-xf1 > 0.0f) {
1336                         labdax= (xf1-xs1-1.0f)/(xf1-xf2);
1337                         ldx= -shb->shadhalostep/(xf1-xf2);
1338                         dx= shb->shadhalostep;
1339                 }
1340                 else {
1341                         labdax= (xf1-xs1)/(xf1-xf2);
1342                         ldx= shb->shadhalostep/(xf1-xf2);
1343                         dx= -shb->shadhalostep;
1344                 }
1345         }
1346         else {
1347                 labdax= 1.0;
1348                 ldx= 0.0;
1349         }
1350
1351         if (yf1 != yf2) {
1352                 if (yf2-yf1 > 0.0f) {
1353                         labday= (yf1-ys1-1.0f)/(yf1-yf2);
1354                         ldy= -shb->shadhalostep/(yf1-yf2);
1355                         dy= shb->shadhalostep;
1356                 }
1357                 else {
1358                         labday= (yf1-ys1)/(yf1-yf2);
1359                         ldy= shb->shadhalostep/(yf1-yf2);
1360                         dy= -shb->shadhalostep;
1361                 }
1362         }
1363         else {
1364                 labday= 1.0;
1365                 ldy= 0.0;
1366         }
1367         
1368         x= xs1;
1369         y= ys1;
1370         labda= count= lightcount= 0.0;
1371
1372 /* printf("start %x %x  \n", (int)(0x7FFFFFFF*zf1), (int)(0x7FFFFFFF*zf2)); */
1373
1374         while (1) {
1375                 labdao= labda;
1376                 
1377                 if (labdax==labday) {
1378                         labdax+= ldx;
1379                         x+= dx;
1380                         labday+= ldy;
1381                         y+= dy;
1382                 }
1383                 else {
1384                         if (labdax<labday) {
1385                                 labdax+= ldx;
1386                                 x+= dx;
1387                         }
1388                         else {
1389                                 labday+= ldy;
1390                                 y+= dy;
1391                         }
1392                 }
1393                 
1394                 labda= MIN2(labdax, labday);
1395                 if (labda==labdao || labda>=1.0f) break;
1396                 
1397                 zf= zf1 + labda*(zf2-zf1);
1398                 count+= (float)shb->totbuf;
1399
1400                 if (zf<= -1.0f) lightcount += 1.0f;     /* close to the spot */
1401                 else {
1402                 
1403                         /* make sure, behind the clipend we extend halolines. */
1404                         if (zf>=1.0f) z= 0x7FFFF000;
1405                         else z= (int)(0x7FFFF000*zf);
1406                         
1407                         for (shsample= shb->buffers.first; shsample; shsample= shsample->next)
1408                                 lightcount+= readshadowbuf_halo(shb, shsample, x, y, z);
1409                         
1410                 }
1411         }
1412         
1413         if (count!=0.0f) return (lightcount/count);
1414         return 0.0f;
1415         
1416 }
1417
1418
1419 /* ********************* Irregular Shadow Buffer (ISB) ************* */
1420 /* ********** storage of all view samples in a raster of lists ***** */
1421
1422 /* based on several articles describing this method, like:
1423  * The Irregular Z-Buffer and its Application to Shadow Mapping
1424  * Gregory S. Johnson - William R. Mark - Christopher A. Burns
1425  * and
1426  * Alias-Free Shadow Maps
1427  * Timo Aila and Samuli Laine
1428  */
1429
1430 /* bsp structure (actually kd tree) */
1431
1432 #define BSPMAX_SAMPLE   128
1433 #define BSPMAX_DEPTH    32
1434
1435 /* aligned with struct rctf */
1436 typedef struct Boxf {
1437         float xmin, xmax;
1438         float ymin, ymax;
1439         float zmin, zmax;
1440 } Boxf;
1441
1442 typedef struct ISBBranch {
1443         struct ISBBranch *left, *right;
1444         float divider[2];
1445         Boxf box;
1446         short totsamp, index, full, unused;
1447         ISBSample **samples;
1448 } ISBBranch;
1449
1450 typedef struct BSPFace {
1451         Boxf box;
1452         const float *v1, *v2, *v3, *v4;
1453         int obi;                /* object for face lookup */
1454         int facenr;             /* index to retrieve VlakRen */
1455         int type;               /* only for strand now */
1456         short shad_alpha, is_full;
1457         
1458         /* strand caching data, optimize for point_behind_strand() */
1459         float radline, radline_end, len;
1460         float vec1[3], vec2[3], rc[3];
1461 } BSPFace;
1462
1463 /* boxes are in lamp projection */
1464 static void init_box(Boxf *box)
1465 {
1466         box->xmin = 1000000.0f;
1467         box->xmax = 0;
1468         box->ymin = 1000000.0f;
1469         box->ymax = 0;
1470         box->zmin= 0x7FFFFFFF;
1471         box->zmax= - 0x7FFFFFFF;
1472 }
1473
1474 /* use v1 to calculate boundbox */
1475 static void bound_boxf(Boxf *box, const float v1[3])
1476 {
1477         if (v1[0] < box->xmin) box->xmin = v1[0];
1478         if (v1[0] > box->xmax) box->xmax = v1[0];
1479         if (v1[1] < box->ymin) box->ymin = v1[1];
1480         if (v1[1] > box->ymax) box->ymax = v1[1];
1481         if (v1[2] < box->zmin) box->zmin= v1[2];
1482         if (v1[2] > box->zmax) box->zmax= v1[2];
1483 }
1484
1485 /* use v1 to calculate boundbox */
1486 static void bound_rectf(rctf *box, const float v1[2])
1487 {
1488         if (v1[0] < box->xmin) box->xmin = v1[0];
1489         if (v1[0] > box->xmax) box->xmax = v1[0];
1490         if (v1[1] < box->ymin) box->ymin = v1[1];
1491         if (v1[1] > box->ymax) box->ymax = v1[1];
1492 }
1493
1494
1495 /* halfway splitting, for initializing a more regular tree */
1496 static void isb_bsp_split_init(ISBBranch *root, MemArena *mem, int level)
1497 {
1498         
1499         /* if level > 0 we create new branches and go deeper*/
1500         if (level > 0) {
1501                 ISBBranch *left, *right;
1502                 int i;
1503                 
1504                 /* splitpoint */
1505                 root->divider[0]= 0.5f*(root->box.xmin+root->box.xmax);
1506                 root->divider[1]= 0.5f*(root->box.ymin+root->box.ymax);
1507                 
1508                 /* find best splitpoint */
1509                 if (root->box.xmax-root->box.xmin > root->box.ymax-root->box.ymin)
1510                         i= root->index= 0;
1511                 else
1512                         i= root->index= 1;
1513                 
1514                 left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1515                 right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1516                 
1517                 /* box info */
1518                 left->box= root->box;
1519                 right->box= root->box;
1520                 if (i==0) {
1521                         left->box.xmax = root->divider[0];
1522                         right->box.xmin = root->divider[0];
1523                 }
1524                 else {
1525                         left->box.ymax = root->divider[1];
1526                         right->box.ymin = root->divider[1];
1527                 }
1528                 isb_bsp_split_init(left, mem, level-1);
1529                 isb_bsp_split_init(right, mem, level-1);
1530         }
1531         else {
1532                 /* we add sample array */
1533                 root->samples= BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
1534         }
1535 }
1536
1537 /* note; if all samples on same location we just spread them over 2 new branches */
1538 static void isb_bsp_split(ISBBranch *root, MemArena *mem)
1539 {
1540         ISBBranch *left, *right;
1541         ISBSample *samples[BSPMAX_SAMPLE];
1542         int a, i;
1543
1544         /* splitpoint */
1545         root->divider[0]= root->divider[1]= 0.0f;
1546         for (a=BSPMAX_SAMPLE-1; a>=0; a--) {
1547                 root->divider[0]+= root->samples[a]->zco[0];
1548                 root->divider[1]+= root->samples[a]->zco[1];
1549         }
1550         root->divider[0]/= BSPMAX_SAMPLE;
1551         root->divider[1]/= BSPMAX_SAMPLE;
1552         
1553         /* find best splitpoint */
1554         if (root->box.xmax-root->box.xmin > root->box.ymax-root->box.ymin)
1555                 i= root->index= 0;
1556         else
1557                 i= root->index= 1;
1558         
1559         /* new branches */
1560         left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1561         right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1562
1563         /* new sample array */
1564         left->samples= BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
1565         right->samples= samples; // tmp
1566         
1567         /* split samples */
1568         for (a=BSPMAX_SAMPLE-1; a>=0; a--) {
1569                 int comp= 0;
1570                 /* this prevents adding samples all to 1 branch when divider is equal to samples */
1571                 if (root->samples[a]->zco[i] == root->divider[i])
1572                         comp= a & 1;
1573                 else if (root->samples[a]->zco[i] < root->divider[i])
1574                         comp= 1;
1575                 
1576                 if (comp==1) {
1577                         left->samples[left->totsamp]= root->samples[a];
1578                         left->totsamp++;
1579                 }
1580                 else {
1581                         right->samples[right->totsamp]= root->samples[a];
1582                         right->totsamp++;
1583                 }
1584         }
1585         
1586         /* copy samples from tmp */
1587         memcpy(root->samples, samples, right->totsamp*(sizeof(void *)));
1588         right->samples= root->samples;
1589         root->samples= NULL;
1590         
1591         /* box info */
1592         left->box= root->box;
1593         right->box= root->box;
1594         if (i==0) {
1595                 left->box.xmax = root->divider[0];
1596                 right->box.xmin = root->divider[0];
1597         }
1598         else {
1599                 left->box.ymax = root->divider[1];
1600                 right->box.ymin = root->divider[1];
1601         }
1602 }
1603
1604 /* inserts sample in main tree, also splits on threshold */
1605 /* returns 1 if error */
1606 static int isb_bsp_insert(ISBBranch *root, MemArena *memarena, ISBSample *sample)
1607 {
1608         ISBBranch *bspn= root;
1609         float *zco= sample->zco;
1610         int i= 0;
1611         
1612         /* debug counter, also used to check if something was filled in ever */
1613         root->totsamp++;
1614         
1615         /* going over branches until last one found */
1616         while (bspn->left) {
1617                 if (zco[bspn->index] <= bspn->divider[bspn->index])
1618                         bspn= bspn->left;
1619                 else
1620                         bspn= bspn->right;
1621                 i++;
1622         }
1623         /* bspn now is the last branch */
1624         
1625         if (bspn->totsamp==BSPMAX_SAMPLE) {
1626                 printf("error in bsp branch\n");        /* only for debug, cannot happen */
1627                 return 1;
1628         }
1629         
1630         /* insert */
1631         bspn->samples[bspn->totsamp]= sample;
1632         bspn->totsamp++;
1633
1634         /* split if allowed and needed */
1635         if (bspn->totsamp==BSPMAX_SAMPLE) {
1636                 if (i==BSPMAX_DEPTH) {
1637                         bspn->totsamp--;        /* stop filling in... will give errors */
1638                         return 1;
1639                 }
1640                 isb_bsp_split(bspn, memarena);
1641         }
1642         return 0;
1643 }
1644
1645 /* initialize vars in face, for optimal point-in-face test */
1646 static void bspface_init_strand(BSPFace *face) 
1647 {
1648         
1649         face->radline= 0.5f* len_v2v2(face->v1, face->v2);
1650         
1651         mid_v3_v3v3(face->vec1, face->v1, face->v2);
1652         if (face->v4)
1653                 mid_v3_v3v3(face->vec2, face->v3, face->v4);
1654         else
1655                 copy_v3_v3(face->vec2, face->v3);
1656         
1657         face->rc[0]= face->vec2[0]-face->vec1[0];
1658         face->rc[1]= face->vec2[1]-face->vec1[1];
1659         face->rc[2]= face->vec2[2]-face->vec1[2];
1660         
1661         face->len= face->rc[0]*face->rc[0]+ face->rc[1]*face->rc[1];
1662         
1663         if (face->len!=0.0f) {
1664                 face->radline_end= face->radline/sqrt(face->len);
1665                 face->len= 1.0f/face->len;
1666         }
1667 }
1668
1669 /* brought back to a simple 2d case */
1670 static int point_behind_strand(const float p[3], BSPFace *face)
1671 {
1672         /* v1 - v2 is radius, v1 - v3 length */
1673         float dist, rc[2], pt[2];
1674         
1675         /* using code from dist_to_line_segment_v2(), distance vec to line-piece */
1676
1677         if (face->len==0.0f) {
1678                 rc[0]= p[0]-face->vec1[0];
1679                 rc[1]= p[1]-face->vec1[1];
1680                 dist= (float)(sqrt(rc[0]*rc[0]+ rc[1]*rc[1]));
1681                 
1682                 if (dist < face->radline)
1683                         return 1;
1684         }
1685         else {
1686                 float labda= ( face->rc[0]*(p[0]-face->vec1[0]) + face->rc[1]*(p[1]-face->vec1[1]) )*face->len;
1687                 
1688                 if (labda > -face->radline_end && labda < 1.0f+face->radline_end) {
1689                         /* hesse for dist: */
1690                         //dist= (float)(fabs( (p[0]-vec2[0])*rc[1] + (p[1]-vec2[1])*rc[0])/len);
1691                         
1692                         pt[0]= labda*face->rc[0]+face->vec1[0];
1693                         pt[1]= labda*face->rc[1]+face->vec1[1];
1694                         
1695                         rc[0]= pt[0]-p[0];
1696                         rc[1]= pt[1]-p[1];
1697                         dist= (float)sqrt(rc[0]*rc[0]+ rc[1]*rc[1]);
1698                         
1699                         if (dist < face->radline) {
1700                                 float zval= face->vec1[2] + labda*face->rc[2];
1701                                 if (p[2] > zval)
1702                                         return 1;
1703                         }
1704                 }
1705         }
1706         return 0;
1707 }
1708
1709
1710 /* return 1 if inside. code derived from src/parametrizer.c */
1711 static int point_behind_tria2d(const float p[3], const float v1[3], const float v2[3], const float v3[3])
1712 {
1713         float a[2], c[2], h[2], div;
1714         float u, v;
1715         
1716         a[0] = v2[0] - v1[0];
1717         a[1] = v2[1] - v1[1];
1718         c[0] = v3[0] - v1[0];
1719         c[1] = v3[1] - v1[1];
1720         
1721         div = a[0]*c[1] - a[1]*c[0];
1722         if (div==0.0f)
1723                 return 0;
1724         
1725         h[0] = p[0] - v1[0];
1726         h[1] = p[1] - v1[1];
1727         
1728         div = 1.0f/div;
1729         
1730         u = (h[0]*c[1] - h[1]*c[0])*div;
1731         if (u >= 0.0f) {
1732                 v = (a[0]*h[1] - a[1]*h[0])*div;
1733                 if (v >= 0.0f) {
1734                         if ( u + v <= 1.0f) {
1735                                 /* inside, now check if point p is behind */
1736                                 float z=  (1.0f-u-v)*v1[2] + u*v2[2] + v*v3[2];
1737                                 if (z <= p[2])
1738                                         return 1;
1739                         }
1740                 }
1741         }
1742         
1743         return 0;
1744 }
1745
1746 #if 0
1747 /* tested these calls, but it gives inaccuracy, 'side' cannot be found reliably using v3 */
1748
1749 /* check if line v1-v2 has all rect points on other side of point v3 */
1750 static int rect_outside_line(rctf *rect, const float v1[3], const float v2[3], const float v3[3])
1751 {
1752         float a, b, c;
1753         int side;
1754         
1755         /* line formula for v1-v2 */
1756         a= v2[1]-v1[1];
1757         b= v1[0]-v2[0];
1758         c= -a*v1[0] - b*v1[1];
1759         side= a*v3[0] + b*v3[1] + c < 0.0f;
1760         
1761         /* the four quad points */
1762         if ( side==(rect->xmin*a + rect->ymin*b + c >= 0.0f) )
1763                 if ( side==(rect->xmax*a + rect->ymin*b + c >= 0.0f) )
1764                         if ( side==(rect->xmax*a + rect->ymax*b + c >= 0.0f) )
1765                                 if ( side==(rect->xmin*a + rect->ymax*b + c >= 0.0f) )
1766                                         return 1;
1767         return 0;
1768 }
1769
1770 /* check if one of the triangle edges separates all rect points on 1 side */
1771 static int rect_isect_tria(rctf *rect, const float v1[3], const float v2[3], const float v3[3])
1772 {
1773         if (rect_outside_line(rect, v1, v2, v3))
1774                 return 0;
1775         if (rect_outside_line(rect, v2, v3, v1))
1776                 return 0;
1777         if (rect_outside_line(rect, v3, v1, v2))
1778                 return 0;
1779         return 1;
1780 }
1781 #endif
1782
1783 /* if face overlaps a branch, it executes func. recursive */
1784 static void isb_bsp_face_inside(ISBBranch *bspn, BSPFace *face)
1785 {
1786         
1787         /* are we descending? */
1788         if (bspn->left) {
1789                 /* hrmf, the box struct cannot be addressed with index */
1790                 if (bspn->index==0) {
1791                         if (face->box.xmin <= bspn->divider[0])
1792                                 isb_bsp_face_inside(bspn->left, face);
1793                         if (face->box.xmax > bspn->divider[0])
1794                                 isb_bsp_face_inside(bspn->right, face);
1795                 }
1796                 else {
1797                         if (face->box.ymin <= bspn->divider[1])
1798                                 isb_bsp_face_inside(bspn->left, face);
1799                         if (face->box.ymax > bspn->divider[1])
1800                                 isb_bsp_face_inside(bspn->right, face);
1801                 }
1802         }
1803         else {
1804                 /* else: end branch reached */
1805                 int a;
1806                 
1807                 if (bspn->totsamp==0) return;
1808                 
1809                 /* check for nodes entirely in shadow, can be skipped */
1810                 if (bspn->totsamp==bspn->full)
1811                         return;
1812                 
1813                 /* if bsp node is entirely in front of face, give up */
1814                 if (bspn->box.zmax < face->box.zmin)
1815                         return;
1816                 
1817                 /* if face boundbox is outside of branch rect, give up */
1818                 if (0==BLI_isect_rctf((rctf *)&face->box, (rctf *)&bspn->box, NULL))
1819                         return;
1820                 
1821                 /* test all points inside branch */
1822                 for (a=bspn->totsamp-1; a>=0; a--) {
1823                         ISBSample *samp= bspn->samples[a];
1824                         
1825                         if ((samp->facenr!=face->facenr || samp->obi!=face->obi) && samp->shadfac) {
1826                                 if (face->box.zmin < samp->zco[2]) {
1827                                         if (BLI_in_rctf((rctf *)&face->box, samp->zco[0], samp->zco[1])) {
1828                                                 int inshadow= 0;
1829                                                 
1830                                                 if (face->type) {
1831                                                         if (point_behind_strand(samp->zco, face))
1832                                                                 inshadow= 1;
1833                                                 }
1834                                                 else if ( point_behind_tria2d(samp->zco, face->v1, face->v2, face->v3))
1835                                                         inshadow= 1;
1836                                                 else if (face->v4 && point_behind_tria2d(samp->zco, face->v1, face->v3, face->v4))
1837                                                         inshadow= 1;
1838
1839                                                 if (inshadow) {
1840                                                         *(samp->shadfac) += face->shad_alpha;
1841                                                         /* optimize; is_full means shad_alpha==4096 */
1842                                                         if (*(samp->shadfac) >= 4096 || face->is_full) {
1843                                                                 bspn->full++;
1844                                                                 samp->shadfac= NULL;
1845                                                         }
1846                                                 }
1847                                         }
1848                                 }
1849                         }
1850                 }
1851         }
1852 }
1853
1854 /* based on available samples, recalculate the bounding box for bsp nodes, recursive */
1855 static void isb_bsp_recalc_box(ISBBranch *root)
1856 {
1857         if (root->left) {
1858                 isb_bsp_recalc_box(root->left);
1859                 isb_bsp_recalc_box(root->right);
1860         }
1861         else if (root->totsamp) {
1862                 int a;
1863                 
1864                 init_box(&root->box);
1865                 for (a=root->totsamp-1; a>=0; a--)
1866                         bound_boxf(&root->box, root->samples[a]->zco);
1867         }       
1868 }
1869
1870 /* callback function for zbuf clip */
1871 static void isb_bsp_test_strand(ZSpan *zspan, int obi, int zvlnr,
1872                                 const float *v1, const float *v2, const float *v3, const float *v4)
1873 {
1874         BSPFace face;
1875         
1876         face.v1= v1;
1877         face.v2= v2;
1878         face.v3= v3;
1879         face.v4= v4;
1880         face.obi= obi;
1881         face.facenr= zvlnr & ~RE_QUAD_OFFS;
1882         face.type= R_STRAND;
1883         if (R.osa)
1884                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
1885         else
1886                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
1887         
1888         face.is_full= (zspan->shad_alpha==1.0f);
1889         
1890         /* setup boundbox */
1891         init_box(&face.box);
1892         bound_boxf(&face.box, v1);
1893         bound_boxf(&face.box, v2);
1894         bound_boxf(&face.box, v3);
1895         if (v4)
1896                 bound_boxf(&face.box, v4);
1897         
1898         /* optimize values */
1899         bspface_init_strand(&face);
1900         
1901         isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
1902         
1903 }
1904
1905 /* callback function for zbuf clip */
1906 static void isb_bsp_test_face(ZSpan *zspan, int obi, int zvlnr,
1907                               const float *v1, const float *v2, const float *v3, const float *v4)
1908 {
1909         BSPFace face;
1910         
1911         face.v1= v1;
1912         face.v2= v2;
1913         face.v3= v3;
1914         face.v4= v4;
1915         face.obi= obi;
1916         face.facenr= zvlnr & ~RE_QUAD_OFFS;
1917         face.type= 0;
1918         if (R.osa)
1919                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
1920         else
1921                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
1922         
1923         face.is_full= (zspan->shad_alpha==1.0f);
1924         
1925         /* setup boundbox */
1926         init_box(&face.box);
1927         bound_boxf(&face.box, v1);
1928         bound_boxf(&face.box, v2);
1929         bound_boxf(&face.box, v3);
1930         if (v4)
1931                 bound_boxf(&face.box, v4);
1932
1933         isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
1934 }
1935
1936 static int testclip_minmax(const float ho[4], const float minmax[4])
1937 {
1938         float wco= ho[3];
1939         int flag= 0;
1940         
1941         if ( ho[0] > minmax[1]*wco) flag = 1;
1942         else if ( ho[0]< minmax[0]*wco) flag = 2;
1943         
1944         if ( ho[1] > minmax[3]*wco) flag |= 4;
1945         else if ( ho[1]< minmax[2]*wco) flag |= 8;
1946         
1947         return flag;
1948 }
1949
1950 /* main loop going over all faces and check in bsp overlaps, fill in shadfac values */
1951 static void isb_bsp_fillfaces(Render *re, LampRen *lar, ISBBranch *root)
1952 {
1953         ObjectInstanceRen *obi;
1954         ObjectRen *obr;
1955         ShadBuf *shb= lar->shb;
1956         ZSpan zspan, zspanstrand;
1957         VlakRen *vlr= NULL;
1958         Material *ma= NULL;
1959         float minmaxf[4], winmat[4][4];
1960         int size= shb->size;
1961         int i, a, ok=1, lay= -1;
1962         
1963         /* further optimize, also sets minz maxz */
1964         isb_bsp_recalc_box(root);
1965         
1966         /* extra clipping for minmax */
1967         minmaxf[0]= (2.0f*root->box.xmin - size-2.0f)/size;
1968         minmaxf[1]= (2.0f*root->box.xmax - size+2.0f)/size;
1969         minmaxf[2]= (2.0f*root->box.ymin - size-2.0f)/size;
1970         minmaxf[3]= (2.0f*root->box.ymax - size+2.0f)/size;
1971         
1972         if (lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay;
1973         
1974         /* (ab)use zspan, since we use zbuffer clipping code */
1975         zbuf_alloc_span(&zspan, size, size, re->clipcrop);
1976         
1977         zspan.zmulx=  ((float)size)/2.0f;
1978         zspan.zmuly=  ((float)size)/2.0f;
1979         zspan.zofsx= -0.5f;
1980         zspan.zofsy= -0.5f;
1981         
1982         /* pass on bsp root to zspan */
1983         zspan.rectz= (int *)root;
1984         
1985         /* filling methods */
1986         zspanstrand= zspan;
1987         //      zspan.zbuflinefunc= zbufline_onlyZ;
1988         zspan.zbuffunc= isb_bsp_test_face;
1989         zspanstrand.zbuffunc= isb_bsp_test_strand;
1990         
1991         for (i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
1992                 obr= obi->obr;
1993
1994                 if (obi->flag & R_TRANSFORMED)
1995                         mult_m4_m4m4(winmat, shb->persmat, obi->mat);
1996                 else
1997                         copy_m4_m4(winmat, shb->persmat);
1998
1999                 for (a=0; a<obr->totvlak; a++) {
2000                         
2001                         if ((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
2002                         else vlr++;
2003                         
2004                         /* note, these conditions are copied in shadowbuf_autoclip() */
2005                         if (vlr->mat!= ma) {
2006                                 ma= vlr->mat;
2007                                 ok= 1;
2008                                 if ((ma->mode & MA_SHADBUF)==0) ok= 0;
2009                                 if (ma->material_type == MA_TYPE_WIRE) ok= 0;
2010                                 zspanstrand.shad_alpha= zspan.shad_alpha= ma->shad_alpha;
2011                         }
2012                         
2013                         if (ok && (obi->lay & lay)) {
2014                                 float hoco[4][4];
2015                                 int c1, c2, c3, c4=0;
2016                                 int d1, d2, d3, d4=0;
2017                                 int partclip;
2018                                 
2019                                 /* create hocos per face, it is while render */
2020                                 projectvert(vlr->v1->co, winmat, hoco[0]); d1= testclip_minmax(hoco[0], minmaxf);
2021                                 projectvert(vlr->v2->co, winmat, hoco[1]); d2= testclip_minmax(hoco[1], minmaxf);
2022                                 projectvert(vlr->v3->co, winmat, hoco[2]); d3= testclip_minmax(hoco[2], minmaxf);
2023                                 if (vlr->v4) {
2024                                         projectvert(vlr->v4->co, winmat, hoco[3]); d4= testclip_minmax(hoco[3], minmaxf);
2025                                 }
2026
2027                                 /* minmax clipping */
2028                                 if (vlr->v4) partclip= d1 & d2 & d3 & d4;
2029                                 else partclip= d1 & d2 & d3;
2030                                 
2031                                 if (partclip==0) {
2032                                         
2033                                         /* window clipping */
2034                                         c1= testclip(hoco[0]); 
2035                                         c2= testclip(hoco[1]); 
2036                                         c3= testclip(hoco[2]); 
2037                                         if (vlr->v4)
2038                                                 c4= testclip(hoco[3]); 
2039                                         
2040                                         /* ***** NO WIRE YET */                 
2041                                         if (ma->material_type == MA_TYPE_WIRE) {
2042                                                 if (vlr->v4)
2043                                                         zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
2044                                                 else
2045                                                         zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], 0, c1, c2, c3, 0);
2046                                         }
2047                                         else if (vlr->v4) {
2048                                                 if (vlr->flag & R_STRAND)
2049                                                         zbufclip4(&zspanstrand, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
2050                                                 else
2051                                                         zbufclip4(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
2052                                         }
2053                                         else
2054                                                 zbufclip(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], c1, c2, c3);
2055                                         
2056                                 }
2057                         }
2058                 }
2059         }
2060         
2061         zbuf_free_span(&zspan);
2062 }
2063
2064 /* returns 1 when the viewpixel is visible in lampbuffer */
2065 static int viewpixel_to_lampbuf(ShadBuf *shb, ObjectInstanceRen *obi, VlakRen *vlr, float x, float y, float co_r[3])
2066 {
2067         float hoco[4], v1[3], nor[3];
2068         float dface, fac, siz;
2069         
2070         RE_vlakren_get_normal(&R, obi, vlr, nor);
2071         copy_v3_v3(v1, vlr->v1->co);
2072         if (obi->flag & R_TRANSFORMED)
2073                 mul_m4_v3(obi->mat, v1);
2074
2075         /* from shadepixel() */
2076         dface = dot_v3v3(v1, nor);
2077         hoco[3]= 1.0f;
2078         
2079         /* ortho viewplane cannot intersect using view vector originating in (0, 0, 0) */
2080         if (R.r.mode & R_ORTHO) {
2081                 /* x and y 3d coordinate can be derived from pixel coord and winmat */
2082                 float fx= 2.0f/(R.winx*R.winmat[0][0]);
2083                 float fy= 2.0f/(R.winy*R.winmat[1][1]);
2084                 
2085                 hoco[0]= (x - 0.5f*R.winx)*fx - R.winmat[3][0]/R.winmat[0][0];
2086                 hoco[1]= (y - 0.5f*R.winy)*fy - R.winmat[3][1]/R.winmat[1][1];
2087                 
2088                 /* using a*x + b*y + c*z = d equation, (a b c) is normal */
2089                 if (nor[2]!=0.0f)
2090                         hoco[2]= (dface - nor[0]*hoco[0] - nor[1]*hoco[1])/nor[2];
2091                 else
2092                         hoco[2]= 0.0f;
2093         }
2094         else {
2095                 float div, view[3];
2096                 
2097                 calc_view_vector(view, x, y);
2098                 
2099                 div = dot_v3v3(nor, view);
2100                 if (div==0.0f) 
2101                         return 0;
2102                 
2103                 fac= dface/div;
2104                 
2105                 hoco[0]= fac*view[0];
2106                 hoco[1]= fac*view[1];
2107                 hoco[2]= fac*view[2];
2108         }
2109         
2110         /* move 3d vector to lampbuf */
2111         mul_m4_v4(shb->persmat, hoco);  /* rational hom co */
2112         
2113         /* clip We can test for -1.0/1.0 because of the properties of the
2114          * coordinate transformations. */
2115         fac= fabs(hoco[3]);
2116         if (hoco[0]<-fac || hoco[0]>fac)
2117                 return 0;
2118         if (hoco[1]<-fac || hoco[1]>fac)
2119                 return 0;
2120         if (hoco[2]<-fac || hoco[2]>fac)
2121                 return 0;
2122         
2123         siz= 0.5f*(float)shb->size;
2124         co_r[0]= siz*(1.0f+hoco[0]/hoco[3]) -0.5f;
2125         co_r[1]= siz*(1.0f+hoco[1]/hoco[3]) -0.5f;
2126         co_r[2]= ((float)0x7FFFFFFF)*(hoco[2]/hoco[3]);
2127         
2128         /* XXXX bias, much less than normal shadbuf, or do we need a constant? */
2129         co_r[2] -= 0.05f*shb->bias;
2130         
2131         return 1;
2132 }
2133
2134 /* storage of shadow results, solid osa and transp case */
2135 static void isb_add_shadfac(ISBShadfacA **isbsapp, MemArena *mem, int obi, int facenr, short shadfac, short samples)
2136 {
2137         ISBShadfacA *new;
2138         float shadfacf;
2139         
2140         /* in osa case, the samples were filled in with factor 1.0/R.osa. if fewer samples we have to correct */
2141         if (R.osa)
2142                 shadfacf= ((float)shadfac*R.osa)/(4096.0f*samples);
2143         else
2144                 shadfacf= ((float)shadfac)/(4096.0f);
2145         
2146         new= BLI_memarena_alloc(mem, sizeof(ISBShadfacA));
2147         new->obi= obi;
2148         new->facenr= facenr & ~RE_QUAD_OFFS;
2149         new->shadfac= shadfacf;
2150         if (*isbsapp)
2151                 new->next= (*isbsapp);
2152         else
2153                 new->next= NULL;
2154         
2155         *isbsapp= new;
2156 }
2157
2158 /* adding samples, solid case */
2159 static int isb_add_samples(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSample **samplebuf)
2160 {
2161         int xi, yi, *xcos, *ycos;
2162         int sample, bsp_err= 0;
2163         
2164         /* bsp split doesn't like to handle regular sequences */
2165         xcos= MEM_mallocN(pa->rectx*sizeof(int), "xcos");
2166         ycos= MEM_mallocN(pa->recty*sizeof(int), "ycos");
2167         for (xi=0; xi<pa->rectx; xi++)
2168                 xcos[xi]= xi;
2169         for (yi=0; yi<pa->recty; yi++)
2170                 ycos[yi]= yi;
2171         BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
2172         BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
2173         
2174         for (sample=0; sample<(R.osa?R.osa:1); sample++) {
2175                 ISBSample *samp= samplebuf[sample], *samp1;
2176                 
2177                 for (yi=0; yi<pa->recty; yi++) {
2178                         int y= ycos[yi];
2179                         for (xi=0; xi<pa->rectx; xi++) {
2180                                 int x= xcos[xi];
2181                                 samp1= samp + y*pa->rectx + x;
2182                                 if (samp1->facenr)
2183                                         bsp_err |= isb_bsp_insert(root, memarena, samp1);
2184                         }
2185                         if (bsp_err) break;
2186                 }
2187         }       
2188         
2189         MEM_freeN(xcos);
2190         MEM_freeN(ycos);
2191
2192         return bsp_err;
2193 }
2194
2195 /* solid version */
2196 /* lar->shb, pa->rectz and pa->rectp should exist */
2197 static void isb_make_buffer(RenderPart *pa, LampRen *lar)
2198 {
2199         ShadBuf *shb= lar->shb;
2200         ISBData *isbdata;
2201         ISBSample *samp, *samplebuf[16];        /* should be RE_MAX_OSA */
2202         ISBBranch root;
2203         MemArena *memarena;
2204         intptr_t *rd;
2205         int *recto, *rectp, x, y, sindex, sample, bsp_err=0;
2206         
2207         /* storage for shadow, per thread */
2208         isbdata= shb->isb_result[pa->thread];
2209         
2210         /* to map the shi->xs and ys coordinate */
2211         isbdata->minx= pa->disprect.xmin;
2212         isbdata->miny= pa->disprect.ymin;
2213         isbdata->rectx= pa->rectx;
2214         isbdata->recty= pa->recty;
2215         
2216         /* branches are added using memarena (32k branches) */
2217         memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch), "isb arena");
2218         BLI_memarena_use_calloc(memarena);
2219         
2220         /* samplebuf is in camera view space (pixels) */
2221         for (sample=0; sample<(R.osa?R.osa:1); sample++)
2222                 samplebuf[sample]= MEM_callocN(sizeof(ISBSample)*pa->rectx*pa->recty, "isb samplebuf");
2223         
2224         /* for end result, ISBSamples point to this in non OSA case, otherwise to pixstruct->shadfac */
2225         if (R.osa==0)
2226                 isbdata->shadfacs= MEM_callocN(pa->rectx*pa->recty*sizeof(short), "isb shadfacs");
2227         
2228         /* setup bsp root */
2229         memset(&root, 0, sizeof(ISBBranch));
2230         root.box.xmin = (float)shb->size;
2231         root.box.ymin = (float)shb->size;
2232         
2233         /* create the sample buffers */
2234         for (sindex=0, y=0; y<pa->recty; y++) {
2235                 for (x=0; x<pa->rectx; x++, sindex++) {
2236                         
2237                         /* this makes it a long function, but splitting it out would mean 10+ arguments */
2238                         /* first check OSA case */
2239                         if (R.osa) {
2240                                 rd= pa->rectdaps + sindex;
2241                                 if (*rd) {
2242                                         float xs= (float)(x + pa->disprect.xmin);
2243                                         float ys= (float)(y + pa->disprect.ymin);
2244                                         
2245                                         for (sample=0; sample<R.osa; sample++) {
2246                                                 PixStr *ps= (PixStr *)(*rd);
2247                                                 int mask= (1<<sample);
2248                                                 
2249                                                 while (ps) {
2250                                                         if (ps->mask & mask)
2251                                                                 break;
2252                                                         ps= ps->next;
2253                                                 }
2254                                                 if (ps && ps->facenr>0) {
2255                                                         ObjectInstanceRen *obi= &R.objectinstance[ps->obi];
2256                                                         ObjectRen *obr= obi->obr;
2257                                                         VlakRen *vlr= RE_findOrAddVlak(obr, (ps->facenr-1) & RE_QUAD_MASK);
2258                                                         
2259                                                         samp= samplebuf[sample] + sindex;
2260                                                         /* convert image plane pixel location to lamp buffer space */
2261                                                         if (viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], samp->zco)) {
2262                                                                 samp->obi= ps->obi;
2263                                                                 samp->facenr= ps->facenr & ~RE_QUAD_OFFS;
2264                                                                 ps->shadfac= 0;
2265                                                                 samp->shadfac= &ps->shadfac;
2266                                                                 bound_rectf((rctf *)&root.box, samp->zco);
2267                                                         }
2268                                                 }
2269                                         }
2270                                 }
2271                         }
2272                         else {
2273                                 rectp= pa->rectp + sindex;
2274                                 recto= pa->recto + sindex;
2275                                 if (*rectp>0) {
2276                                         ObjectInstanceRen *obi= &R.objectinstance[*recto];
2277                                         ObjectRen *obr= obi->obr;
2278                                         VlakRen *vlr= RE_findOrAddVlak(obr, (*rectp-1) & RE_QUAD_MASK);
2279                                         float xs= (float)(x + pa->disprect.xmin);
2280                                         float ys= (float)(y + pa->disprect.ymin);
2281                                         
2282                                         samp= samplebuf[0] + sindex;
2283                                         /* convert image plane pixel location to lamp buffer space */
2284                                         if (viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, samp->zco)) {
2285                                                 samp->obi= *recto;
2286                                                 samp->facenr= *rectp & ~RE_QUAD_OFFS;
2287                                                 samp->shadfac= isbdata->shadfacs + sindex;
2288                                                 bound_rectf((rctf *)&root.box, samp->zco);
2289                                         }
2290                                 }
2291                         }
2292                 }
2293         }
2294         
2295         /* simple method to see if we have samples */
2296         if (root.box.xmin != (float)shb->size) {
2297                 /* now create a regular split, root.box has the initial bounding box of all pixels */
2298                 /* split bsp 8 levels deep, in regular grid (16 x 16) */
2299                 isb_bsp_split_init(&root, memarena, 8);
2300                 
2301                 /* insert all samples in BSP now */
2302                 bsp_err= isb_add_samples(pa, &root, memarena, samplebuf);
2303                         
2304                 if (bsp_err==0) {
2305                         /* go over all faces and fill in shadow values */
2306                         
2307                         isb_bsp_fillfaces(&R, lar, &root);      /* shb->persmat should have been calculated */
2308                         
2309                         /* copy shadow samples to persistent buffer, reduce memory overhead */
2310                         if (R.osa) {
2311                                 ISBShadfacA **isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
2312                                 
2313                                 isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA), "isb arena");
2314                                 BLI_memarena_use_calloc(isbdata->memarena);
2315
2316                                 for (rd= pa->rectdaps, x=pa->rectx*pa->recty; x>0; x--, rd++, isbsa++) {
2317                                         
2318                                         if (*rd) {
2319                                                 PixStr *ps= (PixStr *)(*rd);
2320                                                 while (ps) {
2321                                                         if (ps->shadfac)
2322                                                                 isb_add_shadfac(isbsa, isbdata->memarena, ps->obi, ps->facenr, ps->shadfac, count_mask(ps->mask));
2323                                                         ps= ps->next;
2324                                                 }
2325                                         }
2326                                 }
2327                         }
2328                 }
2329         }
2330         else {
2331                 if (isbdata->shadfacs) {
2332                         MEM_freeN(isbdata->shadfacs);
2333                         isbdata->shadfacs= NULL;
2334                 }
2335         }
2336
2337         /* free BSP */
2338         BLI_memarena_free(memarena);
2339         
2340         /* free samples */
2341         for (x=0; x<(R.osa?R.osa:1); x++)
2342                 MEM_freeN(samplebuf[x]);
2343         
2344         if (bsp_err) printf("error in filling bsp\n");
2345 }
2346
2347 /* add sample to buffer, isbsa is the root sample in a buffer */
2348 static ISBSampleA *isb_alloc_sample_transp(ISBSampleA **isbsa, MemArena *mem)
2349 {
2350         ISBSampleA *new;
2351         
2352         new= BLI_memarena_alloc(mem, sizeof(ISBSampleA));
2353         if (*isbsa)
2354                 new->next= (*isbsa);
2355         else
2356                 new->next= NULL;
2357         
2358         *isbsa= new;
2359         return new;
2360 }
2361
2362 /* adding samples in BSP, transparent case */
2363 static int isb_add_samples_transp(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSampleA ***samplebuf)
2364 {
2365         int xi, yi, *xcos, *ycos;
2366         int sample, bsp_err= 0;
2367         
2368         /* bsp split doesn't like to handle regular sequences */
2369         xcos= MEM_mallocN(pa->rectx*sizeof(int), "xcos");
2370         ycos= MEM_mallocN(pa->recty*sizeof(int), "ycos");
2371         for (xi=0; xi<pa->rectx; xi++)
2372                 xcos[xi]= xi;
2373         for (yi=0; yi<pa->recty; yi++)
2374                 ycos[yi]= yi;
2375         BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
2376         BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
2377         
2378         for (sample=0; sample<(R.osa?R.osa:1); sample++) {
2379                 ISBSampleA **samp= samplebuf[sample], *samp1;
2380                 
2381                 for (yi=0; yi<pa->recty; yi++) {
2382                         int y= ycos[yi];
2383                         for (xi=0; xi<pa->rectx; xi++) {
2384                                 int x= xcos[xi];
2385                                 
2386                                 samp1= *(samp + y*pa->rectx + x);
2387                                 while (samp1) {
2388                                         bsp_err |= isb_bsp_insert(root, memarena, (ISBSample *)samp1);
2389                                         samp1= samp1->next;
2390                                 }
2391                         }
2392                         if (bsp_err) break;
2393                 }
2394         }       
2395         
2396         MEM_freeN(xcos);
2397         MEM_freeN(ycos);
2398         
2399         return bsp_err;
2400 }
2401
2402
2403 /* Ztransp version */
2404 /* lar->shb, pa->rectz and pa->rectp should exist */
2405 static void isb_make_buffer_transp(RenderPart *pa, APixstr *apixbuf, LampRen *lar)
2406 {
2407         ShadBuf *shb= lar->shb;
2408         ISBData *isbdata;
2409         ISBSampleA *samp, **samplebuf[16];      /* MAX_OSA */
2410         ISBBranch root;
2411         MemArena *memarena;
2412         APixstr *ap;
2413         int x, y, sindex, sample, bsp_err=0;
2414         
2415         /* storage for shadow, per thread */
2416         isbdata= shb->isb_result[pa->thread];
2417         
2418         /* to map the shi->xs and ys coordinate */
2419         isbdata->minx= pa->disprect.xmin;
2420         isbdata->miny= pa->disprect.ymin;
2421         isbdata->rectx= pa->rectx;
2422         isbdata->recty= pa->recty;
2423         
2424         /* branches are added using memarena (32k branches) */
2425         memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch), "isb arena");
2426         BLI_memarena_use_calloc(memarena);
2427         
2428         /* samplebuf is in camera view space (pixels) */
2429         for (sample=0; sample<(R.osa?R.osa:1); sample++)
2430                 samplebuf[sample]= MEM_callocN(sizeof(void *)*pa->rectx*pa->recty, "isb alpha samplebuf");
2431         
2432         /* setup bsp root */
2433         memset(&root, 0, sizeof(ISBBranch));
2434         root.box.xmin = (float)shb->size;
2435         root.box.ymin = (float)shb->size;
2436
2437         /* create the sample buffers */
2438         for (ap= apixbuf, sindex=0, y=0; y<pa->recty; y++) {
2439                 for (x=0; x<pa->rectx; x++, sindex++, ap++) {
2440                         
2441                         if (ap->p[0]) {
2442                                 APixstr *apn;
2443                                 float xs= (float)(x + pa->disprect.xmin);
2444                                 float ys= (float)(y + pa->disprect.ymin);
2445                                 
2446                                 for (apn=ap; apn; apn= apn->next) {
2447                                         int a;
2448                                         for (a=0; a<4; a++) {
2449                                                 if (apn->p[a]) {
2450                                                         ObjectInstanceRen *obi= &R.objectinstance[apn->obi[a]];
2451                                                         ObjectRen *obr= obi->obr;
2452                                                         VlakRen *vlr= RE_findOrAddVlak(obr, (apn->p[a]-1) & RE_QUAD_MASK);
2453                                                         float zco[3];
2454                                                         
2455                                                         /* here we store shadfac, easier to create the end storage buffer. needs zero'ed, multiple shadowbufs use it */
2456                                                         apn->shadfac[a]= 0;
2457                                                         
2458                                                         if (R.osa) {
2459                                                                 for (sample=0; sample<R.osa; sample++) {
2460                                                                         int mask= (1<<sample);
2461                                                                         
2462                                                                         if (apn->mask[a] & mask) {
2463                                                                                 
2464                                                                                 /* convert image plane pixel location to lamp buffer space */
2465                                                                                 if (viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], zco)) {
2466                                                                                         samp= isb_alloc_sample_transp(samplebuf[sample] + sindex, memarena);
2467                                                                                         samp->obi= apn->obi[a];
2468                                                                                         samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
2469                                                                                         samp->shadfac= &apn->shadfac[a];
2470                                                                                         
2471                                                                                         copy_v3_v3(samp->zco, zco);
2472                                                                                         bound_rectf((rctf *)&root.box, samp->zco);
2473                                                                                 }
2474                                                                         }
2475                                                                 }
2476                                                         }
2477                                                         else {
2478                                                                 
2479                                                                 /* convert image plane pixel location to lamp buffer space */
2480                                                                 if (viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, zco)) {
2481                                                                         
2482                                                                         samp= isb_alloc_sample_transp(samplebuf[0] + sindex, memarena);
2483                                                                         samp->obi= apn->obi[a];
2484                                                                         samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
2485                                                                         samp->shadfac= &apn->shadfac[a];
2486                                                                         
2487                                                                         copy_v3_v3(samp->zco, zco);
2488                                                                         bound_rectf((rctf *)&root.box, samp->zco);
2489                                                                 }
2490                                                         }
2491                                                 }
2492                                         }
2493                                 }
2494                         }
2495                 }
2496         }
2497         
2498         /* simple method to see if we have samples */
2499         if (root.box.xmin != (float)shb->size) {
2500                 /* now create a regular split, root.box has the initial bounding box of all pixels */
2501                 /* split bsp 8 levels deep, in regular grid (16 x 16) */
2502                 isb_bsp_split_init(&root, memarena, 8);
2503                 
2504                 /* insert all samples in BSP now */
2505                 bsp_err= isb_add_samples_transp(pa, &root, memarena, samplebuf);
2506                 
2507                 if (bsp_err==0) {
2508                         ISBShadfacA **isbsa;
2509                         
2510                         /* go over all faces and fill in shadow values */
2511                         isb_bsp_fillfaces(&R, lar, &root);      /* shb->persmat should have been calculated */
2512                         
2513                         /* copy shadow samples to persistent buffer, reduce memory overhead */
2514                         isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
2515                         
2516                         isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA), "isb arena");
2517                         
2518                         for (ap= apixbuf, x=pa->rectx*pa->recty; x>0; x--, ap++, isbsa++) {
2519                                         
2520                                 if (ap->p[0]) {
2521                                         APixstr *apn;
2522                                         for (apn=ap; apn; apn= apn->next) {
2523                                                 int a;
2524                                                 for (a=0; a<4; a++) {
2525                                                         if (apn->p[a] && apn->shadfac[a]) {
2526                                                                 if (R.osa)
2527                                                                         isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], count_mask(apn->mask[a]));
2528                                                                 else
2529                                                                         isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], 0);
2530                                                         }
2531                                                 }
2532                                         }
2533                                 }
2534                         }
2535                 }
2536         }
2537
2538         /* free BSP */
2539         BLI_memarena_free(memarena);
2540
2541         /* free samples */
2542         for (x=0; x<(R.osa?R.osa:1); x++)
2543                 MEM_freeN(samplebuf[x]);
2544
2545         if (bsp_err) printf("error in filling bsp\n");
2546 }
2547
2548
2549
2550 /* exported */
2551
2552 /* returns amount of light (1.0 = no shadow) */
2553 /* note, shadepixel() rounds the coordinate, not the real sample info */
2554 float ISB_getshadow(ShadeInput *shi, ShadBuf *shb)
2555 {
2556         /* if raytracing, we can't accept irregular shadow */
2557         if (shi->depth==0) {
2558                 ISBData *isbdata= shb->isb_result[shi->thread];
2559                 
2560                 if (isbdata) {
2561                         if (isbdata->shadfacs || isbdata->shadfaca) {
2562                                 int x= shi->xs - isbdata->minx;
2563                                 
2564                                 if (x >= 0 && x < isbdata->rectx) {
2565                                         int y= shi->ys - isbdata->miny;
2566                         
2567                                         if (y >= 0 && y < isbdata->recty) {
2568                                                 if (isbdata->shadfacs) {
2569                                                         short *sp= isbdata->shadfacs + y*isbdata->rectx + x;
2570                                                         return *sp>=4096?0.0f:1.0f - ((float)*sp)/4096.0f;
2571                                                 }
2572                                                 else {
2573                                                         int sindex= y*isbdata->rectx + x;
2574                                                         int obi= shi->obi - R.objectinstance;
2575                                                         ISBShadfacA *isbsa= *(isbdata->shadfaca + sindex);
2576                                                         
2577                                                         while (isbsa) {
2578                                                                 if (isbsa->facenr==shi->facenr+1 && isbsa->obi==obi)
2579                                                                         return isbsa->shadfac>=1.0f?0.0f:1.0f - isbsa->shadfac;
2580                                                                 isbsa= isbsa->next;
2581                                                         }
2582                                                 }
2583                                         }
2584                                 }
2585                         }
2586                 }
2587         }
2588         return 1.0f;
2589 }
2590
2591 /* part is supposed to be solid zbuffered (apixbuf==NULL) or transparent zbuffered */
2592 void ISB_create(RenderPart *pa, APixstr *apixbuf)
2593 {
2594         GroupObject *go;
2595         
2596         /* go over all lamps, and make the irregular buffers */
2597         for (go=R.lights.first; go; go= go->next) {
2598                 LampRen *lar= go->lampren;
2599                 
2600                 if (lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
2601                         
2602                         /* create storage for shadow, per thread */
2603                         lar->shb->isb_result[pa->thread]= MEM_callocN(sizeof(ISBData), "isb data");
2604                         
2605                         if (apixbuf)
2606                                 isb_make_buffer_transp(pa, apixbuf, lar);
2607                         else
2608                                 isb_make_buffer(pa, lar);
2609                 }
2610         }
2611 }
2612
2613
2614 /* end of part rendering, free stored shadow data for this thread from all lamps */
2615 void ISB_free(RenderPart *pa)
2616 {
2617         GroupObject *go;
2618         
2619         /* go over all lamps, and free the irregular buffers */
2620         for (go=R.lights.first; go; go= go->next) {
2621                 LampRen *lar= go->lampren;
2622                 
2623                 if (lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
2624                         ISBData *isbdata= lar->shb->isb_result[pa->thread];
2625
2626                         if (isbdata) {
2627                                 if (isbdata->shadfacs)
2628                                         MEM_freeN(isbdata->shadfacs);
2629                                 if (isbdata->shadfaca)
2630                                         MEM_freeN(isbdata->shadfaca);
2631                                 
2632                                 if (isbdata->memarena)
2633                                         BLI_memarena_free(isbdata->memarena);
2634                                 
2635                                 MEM_freeN(isbdata);
2636                                 lar->shb->isb_result[pa->thread]= NULL;
2637                         }
2638                 }
2639         }
2640 }