Blender Internal: Add material property "Cast" which can disable both ray and buffer...
[blender.git] / source / blender / render / intern / source / shadbuf.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
19  * All rights reserved.
20  *
21  * Contributor(s): 2004-2006, Blender Foundation
22  *
23  * ***** END GPL LICENSE BLOCK *****
24  */
25
26 /** \file blender/render/intern/source/shadbuf.c
27  *  \ingroup render
28  */
29
30
31 #include <math.h>
32 #include <string.h>
33
34
35 #include "MEM_guardedalloc.h"
36
37 #include "DNA_group_types.h"
38 #include "DNA_lamp_types.h"
39 #include "DNA_material_types.h"
40
41 #include "BLI_math.h"
42 #include "BLI_blenlib.h"
43 #include "BLI_jitter.h"
44 #include "BLI_memarena.h"
45 #include "BLI_rand.h"
46 #include "BLI_utildefines.h"
47
48 #include "BKE_global.h"
49 #include "BKE_scene.h"
50
51 #include "PIL_time.h"
52
53 #include "renderpipeline.h"
54 #include "render_types.h"
55 #include "renderdatabase.h"
56 #include "rendercore.h"
57 #include "shadbuf.h"
58 #include "shading.h"
59 #include "zbuf.h"
60
61 /* XXX, could be better implemented... this is for endian issues */
62 #ifdef __BIG_ENDIAN__
63 //#  define RCOMP       3
64 #  define GCOMP 2
65 #  define BCOMP 1
66 #  define ACOMP 0
67 #else
68 //#  define RCOMP       0
69 #  define GCOMP 1
70 #  define BCOMP 2
71 #  define ACOMP 3
72 #endif
73
74 #define RCT_SIZE_X(rct)       ((rct)->xmax - (rct)->xmin)
75 #define RCT_SIZE_Y(rct)       ((rct)->ymax - (rct)->ymin)
76
77 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
78 /* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
79 /* only to be used here in this file, it's for speed */
80 extern struct Render R;
81 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
82
83 /* ------------------------------------------------------------------------- */
84
85 /* initshadowbuf() in convertBlenderScene.c */
86
87 /* ------------------------------------------------------------------------- */
88
89 static void copy_to_ztile(int *rectz, int size, int x1, int y1, int tile, char *r1)
90 {
91         int len4, *rz;
92         int x2, y2;
93         
94         x2= x1+tile;
95         y2= y1+tile;
96         if (x2>=size) x2= size-1;
97         if (y2>=size) y2= size-1;
98
99         if (x1>=x2 || y1>=y2) return;
100
101         len4= 4*(x2- x1);
102         rz= rectz + size*y1 + x1;
103         for (; y1<y2; y1++) {
104                 memcpy(r1, rz, len4);
105                 rz+= size;
106                 r1+= len4;
107         }
108 }
109
110 #if 0
111 static int sizeoflampbuf(ShadBuf *shb)
112 {
113         int num, count=0;
114         char *cp;
115         
116         cp= shb->cbuf;
117         num= (shb->size*shb->size)/256;
118
119         while (num--) count+= *(cp++);
120         
121         return 256*count;
122 }
123 #endif
124
125 /* not threadsafe... */
126 static float *give_jitter_tab(int samp)
127 {
128         /* these are all possible jitter tables, takes up some
129          * 12k, not really bad!
130          * For soft shadows, it saves memory and render time
131          */
132         static int tab[17]={1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256};
133         static float jit[1496][2];
134         static char ctab[17]= {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
135         int a, offset=0;
136         
137         if (samp<2) samp= 2;
138         else if (samp>16) samp= 16;
139
140         for (a=0; a<samp-1; a++) offset+= tab[a];
141
142         if (ctab[samp]==0) {
143                 ctab[samp]= 1;
144                 BLI_jitter_init((float (*)[2])jit[offset], samp*samp);
145         }
146                 
147         return jit[offset];
148         
149 }
150
151 static void make_jitter_weight_tab(Render *re, ShadBuf *shb, short filtertype) 
152 {
153         float *jit, totw= 0.0f;
154         int samp= get_render_shadow_samples(&re->r, shb->samp);
155         int a, tot=samp*samp;
156         
157         shb->weight= MEM_mallocN(sizeof(float)*tot, "weight tab lamp");
158         
159         for (jit= shb->jit, a=0; a<tot; a++, jit+=2) {
160                 if (filtertype==LA_SHADBUF_TENT)
161                         shb->weight[a] = 0.71f - sqrtf(jit[0] * jit[0] + jit[1] * jit[1]);
162                 else if (filtertype==LA_SHADBUF_GAUSS)
163                         shb->weight[a] = RE_filter_value(R_FILTER_GAUSS, 1.8f * sqrtf(jit[0] * jit[0] + jit[1] * jit[1]));
164                 else
165                         shb->weight[a]= 1.0f;
166                 
167                 totw+= shb->weight[a];
168         }
169         
170         totw= 1.0f/totw;
171         for (a=0; a<tot; a++) {
172                 shb->weight[a]*= totw;
173         }
174 }
175
176 static int verg_deepsample(const void *poin1, const void *poin2)
177 {
178         const DeepSample *ds1= (const DeepSample*)poin1;
179         const DeepSample *ds2= (const DeepSample*)poin2;
180
181         if (ds1->z < ds2->z) return -1;
182         else if (ds1->z == ds2->z) return 0;
183         else return 1;
184 }
185
186 static int compress_deepsamples(DeepSample *dsample, int tot, float epsilon)
187 {
188         /* uses doubles to avoid overflows and other numerical issues,
189          * could be improved */
190         DeepSample *ds, *newds;
191         float v;
192         double slope, slopemin, slopemax, min, max, div, newmin, newmax;
193         int a, first, z, newtot= 0;
194
195 #if 0
196         if (print) {
197                 for (a=0, ds=dsample; a<tot; a++, ds++)
198                         printf("%lf, %f ", ds->z/(double)0x7FFFFFFF, ds->v);
199                 printf("\n");
200         }
201 #endif
202
203         /* read from and write into same array */
204         ds= dsample;
205         newds= dsample;
206         a= 0;
207
208         /* as long as we are not at the end of the array */
209         for (a++, ds++; a<tot; a++, ds++) {
210                 slopemin= 0.0f;
211                 slopemax= 0.0f;
212                 first= 1;
213
214                 for (; a<tot; a++, ds++) {
215                         //dz= ds->z - newds->z;
216                         if (ds->z == newds->z) {
217                                 /* still in same z position, simply check
218                                  * visibility difference against epsilon */
219                                 if (!(fabsf(newds->v - ds->v) <= epsilon)) {
220                                         break;
221                                 }
222                         }
223                         else {
224                                 /* compute slopes */
225                                 div= (double)0x7FFFFFFF / ((double)ds->z - (double)newds->z);
226                                 min= (double)((ds->v - epsilon) - newds->v) * div;
227                                 max= (double)((ds->v + epsilon) - newds->v) * div;
228
229                                 /* adapt existing slopes */
230                                 if (first) {
231                                         newmin= min;
232                                         newmax= max;
233                                         first= 0;
234                                 }
235                                 else {
236                                         newmin= MAX2(slopemin, min);
237                                         newmax= MIN2(slopemax, max);
238
239                                         /* verify if there is still space between the slopes */
240                                         if (newmin > newmax) {
241                                                 ds--;
242                                                 a--;
243                                                 break;
244                                         }
245                                 }
246
247                                 slopemin= newmin;
248                                 slopemax= newmax;
249                         }
250                 }
251
252                 if (a == tot) {
253                         ds--;
254                         a--;
255                 }
256
257                 /* always previous z */
258                 z= ds->z;
259
260                 if (first || a==tot-1) {
261                         /* if slopes were not initialized, use last visibility */
262                         v= ds->v;
263                 }
264                 else {
265                         /* compute visibility at center between slopes at z */
266                         slope = (slopemin + slopemax) * 0.5;
267                         v = (double)newds->v + slope * ((double)(z - newds->z) / (double)0x7FFFFFFF);
268                 }
269
270                 newds++;
271                 newtot++;
272
273                 newds->z= z;
274                 newds->v= v;
275         }
276
277         if (newtot == 0 || (newds->v != (newds-1)->v))
278                 newtot++;
279
280 #if 0
281         if (print) {
282                 for (a=0, ds=dsample; a<newtot; a++, ds++)
283                         printf("%lf, %f ", ds->z/(double)0x7FFFFFFF, ds->v);
284                 printf("\n");
285         }
286 #endif
287
288         return newtot;
289 }
290
291 static float deep_alpha(Render *re, int obinr, int facenr, int strand)
292 {
293         ObjectInstanceRen *obi= &re->objectinstance[obinr];
294         Material *ma;
295
296         if (strand) {
297                 StrandRen *strand= RE_findOrAddStrand(obi->obr, facenr-1);
298                 ma= strand->buffer->ma;
299         }
300         else {
301                 VlakRen *vlr= RE_findOrAddVlak(obi->obr, (facenr-1) & RE_QUAD_MASK);
302                 ma= vlr->mat;
303         }
304
305         return ma->shad_alpha;
306 }
307
308 static void compress_deepshadowbuf(Render *re, ShadBuf *shb, APixstr *apixbuf, APixstrand *apixbufstrand)
309 {
310         ShadSampleBuf *shsample;
311         DeepSample *ds[RE_MAX_OSA], *sampleds[RE_MAX_OSA], *dsb, *newbuf;
312         APixstr *ap, *apn;
313         APixstrand *aps, *apns;
314         float visibility;
315
316         const int totbuf= shb->totbuf;
317         const float totbuf_f= (float)shb->totbuf;
318         const float totbuf_f_inv= 1.0f/totbuf_f;
319         const int size= shb->size;
320
321         int a, b, c, tot, minz, found, prevtot, newtot;
322         int sampletot[RE_MAX_OSA], totsample = 0, totsamplec = 0;
323         
324         shsample= MEM_callocN(sizeof(ShadSampleBuf), "shad sample buf");
325         BLI_addtail(&shb->buffers, shsample);
326
327         shsample->totbuf = MEM_callocN(sizeof(int) * size * size, "deeptotbuf");
328         shsample->deepbuf = MEM_callocN(sizeof(DeepSample *) * size * size, "deepbuf");
329
330         ap= apixbuf;
331         aps= apixbufstrand;
332         for (a=0; a<size*size; a++, ap++, aps++) {
333                 /* count number of samples */
334                 for (c=0; c<totbuf; c++)
335                         sampletot[c]= 0;
336
337                 tot= 0;
338                 for (apn=ap; apn; apn=apn->next)
339                         for (b=0; b<4; b++)
340                                 if (apn->p[b])
341                                         for (c=0; c<totbuf; c++)
342                                                 if (apn->mask[b] & (1<<c))
343                                                         sampletot[c]++;
344
345                 if (apixbufstrand) {
346                         for (apns=aps; apns; apns=apns->next)
347                                 for (b=0; b<4; b++)
348                                         if (apns->p[b])
349                                                 for (c=0; c<totbuf; c++)
350                                                         if (apns->mask[b] & (1<<c))
351                                                                 sampletot[c]++;
352                 }
353
354                 for (c=0; c<totbuf; c++)
355                         tot += sampletot[c];
356
357                 if (tot == 0) {
358                         shsample->deepbuf[a]= NULL;
359                         shsample->totbuf[a]= 0;
360                         continue;
361                 }
362
363                 /* fill samples */
364                 ds[0]= sampleds[0]= MEM_callocN(sizeof(DeepSample)*tot*2, "deepsample");
365                 for (c=1; c<totbuf; c++)
366                         ds[c]= sampleds[c]= sampleds[c-1] + sampletot[c-1]*2;
367
368                 for (apn=ap; apn; apn=apn->next) {
369                         for (b=0; b<4; b++) {
370                                 if (apn->p[b]) {
371                                         for (c=0; c<totbuf; c++) {
372                                                 if (apn->mask[b] & (1<<c)) {
373                                                         /* two entries to create step profile */
374                                                         ds[c]->z= apn->z[b];
375                                                         ds[c]->v= 1.0f; /* not used */
376                                                         ds[c]++;
377                                                         ds[c]->z= apn->z[b];
378                                                         ds[c]->v= deep_alpha(re, apn->obi[b], apn->p[b], 0);
379                                                         ds[c]++;
380                                                 }
381                                         }
382                                 }
383                         }
384                 }
385
386                 if (apixbufstrand) {
387                         for (apns=aps; apns; apns=apns->next) {
388                                 for (b=0; b<4; b++) {
389                                         if (apns->p[b]) {
390                                                 for (c=0; c<totbuf; c++) {
391                                                         if (apns->mask[b] & (1<<c)) {
392                                                                 /* two entries to create step profile */
393                                                                 ds[c]->z= apns->z[b];
394                                                                 ds[c]->v= 1.0f; /* not used */
395                                                                 ds[c]++;
396                                                                 ds[c]->z= apns->z[b];
397                                                                 ds[c]->v= deep_alpha(re, apns->obi[b], apns->p[b], 1);
398                                                                 ds[c]++;
399                                                         }
400                                                 }
401                                         }
402                                 }
403                         }
404                 }
405
406                 for (c=0; c<totbuf; c++) {
407                         /* sort by increasing z */
408                         qsort(sampleds[c], sampletot[c], sizeof(DeepSample)*2, verg_deepsample);
409
410                         /* sum visibility, replacing alpha values */
411                         visibility= 1.0f;
412                         ds[c]= sampleds[c];
413
414                         for (b=0; b<sampletot[c]; b++) {
415                                 /* two entries creating step profile */
416                                 ds[c]->v= visibility;
417                                 ds[c]++;
418
419                                 visibility *= 1.0f-ds[c]->v;
420                                 ds[c]->v= visibility;
421                                 ds[c]++;
422                         }
423
424                         /* halfway trick, probably won't work well for volumes? */
425                         ds[c]= sampleds[c];
426                         for (b=0; b<sampletot[c]; b++) {
427                                 if (b+1 < sampletot[c]) {
428                                         ds[c]->z= (ds[c]->z>>1) + ((ds[c]+2)->z>>1);
429                                         ds[c]++;
430                                         ds[c]->z= (ds[c]->z>>1) + ((ds[c]+2)->z>>1);
431                                         ds[c]++;
432                                 }
433                                 else {
434                                         ds[c]->z= (ds[c]->z>>1) + (0x7FFFFFFF>>1);
435                                         ds[c]++;
436                                         ds[c]->z= (ds[c]->z>>1) + (0x7FFFFFFF>>1);
437                                         ds[c]++;
438                                 }
439                         }
440
441                         /* init for merge loop */
442                         ds[c]= sampleds[c];
443                         sampletot[c] *= 2;
444                 }
445
446                 shsample->deepbuf[a]= MEM_callocN(sizeof(DeepSample)*tot*2, "deepsample");
447                 shsample->totbuf[a]= 0;
448
449                 /* merge buffers */
450                 dsb= shsample->deepbuf[a];
451                 while (1) {
452                         minz= 0;
453                         found= 0;
454
455                         for (c=0; c<totbuf; c++) {
456                                 if (sampletot[c] && (!found || ds[c]->z < minz)) {
457                                         minz= ds[c]->z;
458                                         found= 1;
459                                 }
460                         }
461
462                         if (!found)
463                                 break;
464
465                         dsb->z= minz;
466                         dsb->v= 0.0f;
467
468                         visibility= 0.0f;
469                         for (c=0; c<totbuf; c++) {
470                                 if (sampletot[c] && ds[c]->z == minz) {
471                                         ds[c]++;
472                                         sampletot[c]--;
473                                 }
474
475                                 if (sampleds[c] == ds[c])
476                                         visibility += totbuf_f_inv;
477                                 else
478                                         visibility += (ds[c]-1)->v / totbuf_f;
479                         }
480
481                         dsb->v= visibility;
482                         dsb++;
483                         shsample->totbuf[a]++;
484                 }
485
486                 prevtot= shsample->totbuf[a];
487                 totsample += prevtot;
488
489                 newtot= compress_deepsamples(shsample->deepbuf[a], prevtot, shb->compressthresh);
490                 shsample->totbuf[a]= newtot;
491                 totsamplec += newtot;
492
493                 if (newtot < prevtot) {
494                         newbuf= MEM_mallocN(sizeof(DeepSample)*newtot, "cdeepsample");
495                         memcpy(newbuf, shsample->deepbuf[a], sizeof(DeepSample)*newtot);
496                         MEM_freeN(shsample->deepbuf[a]);
497                         shsample->deepbuf[a]= newbuf;
498                 }
499
500                 MEM_freeN(sampleds[0]);
501         }
502
503         //printf("%d -> %d, ratio %f\n", totsample, totsamplec, (float)totsamplec/(float)totsample);
504 }
505
506 /* create Z tiles (for compression): this system is 24 bits!!! */
507 static void compress_shadowbuf(ShadBuf *shb, int *rectz, int square)
508 {
509         ShadSampleBuf *shsample;
510         float dist;
511         uintptr_t *ztile;
512         int *rz, *rz1, verg, verg1, size= shb->size;
513         int a, x, y, minx, miny, byt1, byt2;
514         char *rc, *rcline, *ctile, *zt;
515         
516         shsample= MEM_callocN(sizeof(ShadSampleBuf), "shad sample buf");
517         BLI_addtail(&shb->buffers, shsample);
518         
519         shsample->zbuf= MEM_mallocN(sizeof(uintptr_t)*(size*size)/256, "initshadbuf2");
520         shsample->cbuf= MEM_callocN((size*size)/256, "initshadbuf3");
521         
522         ztile= (uintptr_t *)shsample->zbuf;
523         ctile= shsample->cbuf;
524         
525         /* help buffer */
526         rcline= MEM_mallocN(256*4+sizeof(int), "makeshadbuf2");
527         
528         for (y=0; y<size; y+=16) {
529                 if (y< size/2) miny= y+15-size/2;
530                 else miny= y-size/2;
531                 
532                 for (x=0; x<size; x+=16) {
533                         
534                         /* is tile within spotbundle? */
535                         a= size/2;
536                         if (x< a) minx= x+15-a;
537                         else minx= x-a;
538                         
539                         dist= sqrt( (float)(minx*minx+miny*miny) );
540                         
541                         if (square==0 && dist>(float)(a+12)) {  /* 12, tested with a onlyshadow lamp */
542                                 a= 256; verg= 0; /* 0x80000000; */ /* 0x7FFFFFFF; */
543                                 rz1= (&verg)+1;
544                         }
545                         else {
546                                 copy_to_ztile(rectz, size, x, y, 16, rcline);
547                                 rz1= (int *)rcline;
548                                 
549                                 verg= (*rz1 & 0xFFFFFF00);
550                                 
551                                 for (a=0;a<256;a++, rz1++) {
552                                         if ( (*rz1 & 0xFFFFFF00) !=verg) break;
553                                 }
554                         }
555                         if (a==256) { /* complete empty tile */
556                                 *ctile= 0;
557                                 *ztile= *(rz1-1);
558                         }
559                         else {
560                                 
561                                 /* ACOMP etc. are defined to work L/B endian */
562                                 
563                                 rc= rcline;
564                                 rz1= (int *)rcline;
565                                 verg=  rc[ACOMP];
566                                 verg1= rc[BCOMP];
567                                 rc+= 4;
568                                 byt1= 1; byt2= 1;
569                                 for (a=1;a<256;a++, rc+=4) {
570                                         byt1 &= (verg==rc[ACOMP]);
571                                         byt2 &= (verg1==rc[BCOMP]);
572                                         
573                                         if (byt1==0) break;
574                                 }
575                                 if (byt1 && byt2) {     /* only store byte */
576                                         *ctile= 1;
577                                         *ztile= (uintptr_t)MEM_mallocN(256+4, "tile1");
578                                         rz= (int *)*ztile;
579                                         *rz= *rz1;
580                                         
581                                         zt= (char *)(rz+1);
582                                         rc= rcline;
583                                         for (a=0; a<256; a++, zt++, rc+=4) *zt= rc[GCOMP];
584                                 }
585                                 else if (byt1) {                /* only store short */
586                                         *ctile= 2;
587                                         *ztile= (uintptr_t)MEM_mallocN(2*256+4, "Tile2");
588                                         rz= (int *)*ztile;
589                                         *rz= *rz1;
590                                         
591                                         zt= (char *)(rz+1);
592                                         rc= rcline;
593                                         for (a=0; a<256; a++, zt+=2, rc+=4) {
594                                                 zt[0]= rc[BCOMP];
595                                                 zt[1]= rc[GCOMP];
596                                         }
597                                 }
598                                 else {                  /* store triple */
599                                         *ctile= 3;
600                                         *ztile= (uintptr_t)MEM_mallocN(3*256, "Tile3");
601
602                                         zt= (char *)*ztile;
603                                         rc= rcline;
604                                         for (a=0; a<256; a++, zt+=3, rc+=4) {
605                                                 zt[0]= rc[ACOMP];
606                                                 zt[1]= rc[BCOMP];
607                                                 zt[2]= rc[GCOMP];
608                                         }
609                                 }
610                         }
611                         ztile++;
612                         ctile++;
613                 }
614         }
615
616         MEM_freeN(rcline);
617 }
618
619 /* sets start/end clipping. lar->shb should be initialized */
620 static void shadowbuf_autoclip(Render *re, LampRen *lar)
621 {
622         ObjectInstanceRen *obi;
623         ObjectRen *obr;
624         VlakRen *vlr= NULL;
625         VertRen *ver= NULL;
626         Material *ma= NULL;
627         float minz, maxz, vec[3], viewmat[4][4], obviewmat[4][4];
628         unsigned int lay = -1;
629         int i, a, maxtotvert, ok= 1;
630         char *clipflag;
631         
632         minz= 1.0e30f; maxz= -1.0e30f;
633         copy_m4_m4(viewmat, lar->shb->viewmat);
634         
635         if (lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay;
636
637         maxtotvert= 0;
638         for (obr=re->objecttable.first; obr; obr=obr->next)
639                 maxtotvert = max_ii(obr->totvert, maxtotvert);
640
641         clipflag= MEM_callocN(sizeof(char)*maxtotvert, "autoclipflag");
642
643         /* set clip in vertices when face visible */
644         for (i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
645                 obr= obi->obr;
646
647                 if (obi->flag & R_TRANSFORMED)
648                         mul_m4_m4m4(obviewmat, viewmat, obi->mat);
649                 else
650                         copy_m4_m4(obviewmat, viewmat);
651
652                 memset(clipflag, 0, sizeof(char)*obr->totvert);
653
654                 /* clear clip, is being set if face is visible (clip is calculated for real later) */
655                 for (a=0; a<obr->totvlak; a++) {
656                         if ((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
657                         else vlr++;
658                         
659                         /* note; these conditions are copied from zbuffer_shadow() */
660                         if (vlr->mat!= ma) {
661                                 ma= vlr->mat;
662                                 ok= 1;
663                                 if ((ma->mode2 & MA_CASTSHADOW)==0 || (ma->mode & MA_SHADBUF)==0) ok= 0;
664                         }
665                         
666                         if (ok && (obi->lay & lay)) {
667                                 clipflag[vlr->v1->index]= 1;
668                                 clipflag[vlr->v2->index]= 1;
669                                 clipflag[vlr->v3->index]= 1;
670                                 if (vlr->v4) clipflag[vlr->v4->index]= 1;
671                         }
672                 }
673                 
674                 /* calculate min and max */
675                 for (a=0; a< obr->totvert;a++) {
676                         if ((a & 255)==0) ver= RE_findOrAddVert(obr, a);
677                         else ver++;
678                         
679                         if (clipflag[a]) {
680                                 copy_v3_v3(vec, ver->co);
681                                 mul_m4_v3(obviewmat, vec);
682                                 /* Z on visible side of lamp space */
683                                 if (vec[2] < 0.0f) {
684                                         float inpr, z= -vec[2];
685                                         
686                                         /* since vec is rotated in lampspace, this is how to get the cosine of angle */
687                                         /* precision is set 20% larger */
688                                         vec[2]*= 1.2f;
689                                         normalize_v3(vec);
690                                         inpr= - vec[2];
691
692                                         if (inpr>=lar->spotsi) {
693                                                 if (z<minz) minz= z;
694                                                 if (z>maxz) maxz= z;
695                                         }
696                                 }
697                         }
698                 }
699         }
700
701         MEM_freeN(clipflag);
702         
703         /* set clipping min and max */
704         if (minz < maxz) {
705                 float delta= (maxz - minz);     /* threshold to prevent precision issues */
706                 
707                 //printf("minz %f maxz %f delta %f\n", minz, maxz, delta);
708                 if (lar->bufflag & LA_SHADBUF_AUTO_START)
709                         lar->shb->d= minz - delta*0.02f;        /* 0.02 is arbitrary... needs more thinking! */
710                 if (lar->bufflag & LA_SHADBUF_AUTO_END)
711                         lar->shb->clipend= maxz + delta*0.1f;
712                 
713                 /* bias was calculated as percentage, we scale it to prevent animation issues */
714                 delta= (lar->clipend-lar->clipsta)/(lar->shb->clipend-lar->shb->d);
715                 //printf("bias delta %f\n", delta);
716                 lar->shb->bias= (int) (delta*(float)lar->shb->bias);
717         }
718 }
719
720 static void makeflatshadowbuf(Render *re, LampRen *lar, float *jitbuf)
721 {
722         ShadBuf *shb= lar->shb;
723         int *rectz, samples;
724
725         /* zbuffering */
726         rectz= MEM_mapallocN(sizeof(int)*shb->size*shb->size, "makeshadbuf");
727         
728         for (samples=0; samples<shb->totbuf; samples++) {
729                 zbuffer_shadow(re, shb->persmat, lar, rectz, shb->size, jitbuf[2*samples], jitbuf[2*samples+1]);
730                 /* create Z tiles (for compression): this system is 24 bits!!! */
731                 compress_shadowbuf(shb, rectz, lar->mode & LA_SQUARE);
732
733                 if (re->test_break(re->tbh))
734                         break;
735         }
736         
737         MEM_freeN(rectz);
738 }
739
740 static void makedeepshadowbuf(Render *re, LampRen *lar, float *jitbuf)
741 {
742         ShadBuf *shb= lar->shb;
743         APixstr *apixbuf;
744         APixstrand *apixbufstrand= NULL;
745         ListBase apsmbase= {NULL, NULL};
746
747         /* zbuffering */
748         apixbuf= MEM_callocN(sizeof(APixstr)*shb->size*shb->size, "APixbuf");
749         if (re->totstrand)
750                 apixbufstrand= MEM_callocN(sizeof(APixstrand)*shb->size*shb->size, "APixbufstrand");
751
752         zbuffer_abuf_shadow(re, lar, shb->persmat, apixbuf, apixbufstrand, &apsmbase, shb->size,
753                 shb->totbuf, (float(*)[2])jitbuf);
754
755         /* create Z tiles (for compression): this system is 24 bits!!! */
756         compress_deepshadowbuf(re, shb, apixbuf, apixbufstrand);
757         
758         MEM_freeN(apixbuf);
759         if (apixbufstrand)
760                 MEM_freeN(apixbufstrand);
761         freepsA(&apsmbase);
762 }
763
764 void makeshadowbuf(Render *re, LampRen *lar)
765 {
766         ShadBuf *shb= lar->shb;
767         float wsize, *jitbuf, twozero[2]= {0.0f, 0.0f}, angle, temp;
768         
769         if (lar->bufflag & (LA_SHADBUF_AUTO_START|LA_SHADBUF_AUTO_END))
770                 shadowbuf_autoclip(re, lar);
771         
772         /* just to enforce identical behavior of all irregular buffers */
773         if (lar->buftype==LA_SHADBUF_IRREGULAR)
774                 shb->size= 1024;
775         
776         /* matrices and window: in winmat the transformation is being put,
777          * transforming from observer view to lamp view, including lamp window matrix */
778         
779         angle= saacos(lar->spotsi);
780         temp = 0.5f * shb->size * cosf(angle) / sinf(angle);
781         shb->pixsize= (shb->d)/temp;
782         wsize= shb->pixsize*(shb->size/2.0f);
783         
784         perspective_m4(shb->winmat, -wsize, wsize, -wsize, wsize, shb->d, shb->clipend);
785         mul_m4_m4m4(shb->persmat, shb->winmat, shb->viewmat);
786
787         if (ELEM3(lar->buftype, LA_SHADBUF_REGULAR, LA_SHADBUF_HALFWAY, LA_SHADBUF_DEEP)) {
788                 shb->totbuf= lar->buffers;
789
790                 /* jitter, weights - not threadsafe! */
791                 BLI_lock_thread(LOCK_CUSTOM1);
792                 shb->jit= give_jitter_tab(get_render_shadow_samples(&re->r, shb->samp));
793                 make_jitter_weight_tab(re, shb, lar->filtertype);
794                 BLI_unlock_thread(LOCK_CUSTOM1);
795                 
796                 if (shb->totbuf==4) jitbuf= give_jitter_tab(2);
797                 else if (shb->totbuf==9) jitbuf= give_jitter_tab(3);
798                 else jitbuf= twozero;
799                 
800                 /* zbuffering */
801                 if (lar->buftype == LA_SHADBUF_DEEP) {
802                         makedeepshadowbuf(re, lar, jitbuf);
803                         shb->totbuf= 1;
804                 }
805                 else
806                         makeflatshadowbuf(re, lar, jitbuf);
807
808                 /* printf("lampbuf %d\n", sizeoflampbuf(shb)); */
809         }
810 }
811
812 static void *do_shadow_thread(void *re_v)
813 {
814         Render *re = (Render *)re_v;
815         LampRen *lar;
816
817         do {
818                 BLI_lock_thread(LOCK_CUSTOM1);
819                 for (lar=re->lampren.first; lar; lar=lar->next) {
820                         if (lar->shb && !lar->thread_assigned) {
821                                 lar->thread_assigned= 1;
822                                 break;
823                         }
824                 }
825                 BLI_unlock_thread(LOCK_CUSTOM1);
826
827                 /* if type is irregular, this only sets the perspective matrix and autoclips */
828                 if (lar) {
829                         makeshadowbuf(re, lar);
830                         BLI_lock_thread(LOCK_CUSTOM1);
831                         lar->thread_ready= 1;
832                         BLI_unlock_thread(LOCK_CUSTOM1);
833                 }
834         } while (lar && !re->test_break(re->tbh));
835
836         return NULL;
837 }
838
839 static volatile int g_break= 0;
840 static int thread_break(void *UNUSED(arg))
841 {
842         return g_break;
843 }
844
845 void threaded_makeshadowbufs(Render *re)
846 {
847         ListBase threads;
848         LampRen *lar;
849         int a, totthread= 0;
850         int (*test_break)(void *);
851
852         /* count number of threads to use */
853         if (G.is_rendering) {
854                 for (lar=re->lampren.first; lar; lar= lar->next)
855                         if (lar->shb)
856                                 totthread++;
857                 
858                 totthread = min_ii(totthread, re->r.threads);
859         }
860         else
861                 totthread = 1; /* preview render */
862
863         if (totthread <= 1) {
864                 for (lar=re->lampren.first; lar; lar= lar->next) {
865                         if (re->test_break(re->tbh)) break;
866                         if (lar->shb) {
867                                 /* if type is irregular, this only sets the perspective matrix and autoclips */
868                                 makeshadowbuf(re, lar);
869                         }
870                 }
871         }
872         else {
873                 /* swap test break function */
874                 test_break= re->test_break;
875                 re->test_break= thread_break;
876
877                 for (lar=re->lampren.first; lar; lar= lar->next) {
878                         lar->thread_assigned= 0;
879                         lar->thread_ready= 0;
880                 }
881
882                 BLI_init_threads(&threads, do_shadow_thread, totthread);
883                 
884                 for (a=0; a<totthread; a++)
885                         BLI_insert_thread(&threads, re);
886
887                 /* keep rendering as long as there are shadow buffers not ready */
888                 do {
889                         if ((g_break=test_break(re->tbh)))
890                                 break;
891
892                         PIL_sleep_ms(50);
893
894                         BLI_lock_thread(LOCK_CUSTOM1);
895                         for (lar=re->lampren.first; lar; lar= lar->next)
896                                 if (lar->shb && !lar->thread_ready)
897                                         break;
898                         BLI_unlock_thread(LOCK_CUSTOM1);
899                 } while (lar);
900         
901                 BLI_end_threads(&threads);
902
903                 /* unset threadsafety */
904                 re->test_break= test_break;
905                 g_break= 0;
906         }
907 }
908
909 void freeshadowbuf(LampRen *lar)
910 {
911         if (lar->shb) {
912                 ShadBuf *shb= lar->shb;
913                 ShadSampleBuf *shsample;
914                 int b, v;
915                 
916                 for (shsample= shb->buffers.first; shsample; shsample= shsample->next) {
917                         if (shsample->deepbuf) {
918                                 v= shb->size*shb->size;
919                                 for (b=0; b<v; b++)
920                                         if (shsample->deepbuf[b])
921                                                 MEM_freeN(shsample->deepbuf[b]);
922                                         
923                                 MEM_freeN(shsample->deepbuf);
924                                 MEM_freeN(shsample->totbuf);
925                         }
926                         else {
927                                 intptr_t *ztile= shsample->zbuf;
928                                 char *ctile= shsample->cbuf;
929                                 
930                                 v= (shb->size*shb->size)/256;
931                                 for (b=0; b<v; b++, ztile++, ctile++)
932                                         if (*ctile) MEM_freeN((void *) *ztile);
933                                 
934                                 MEM_freeN(shsample->zbuf);
935                                 MEM_freeN(shsample->cbuf);
936                         }
937                 }
938                 BLI_freelistN(&shb->buffers);
939                 
940                 if (shb->weight) MEM_freeN(shb->weight);
941                 MEM_freeN(lar->shb);
942                 
943                 lar->shb= NULL;
944         }
945 }
946
947
948 static int firstreadshadbuf(ShadBuf *shb, ShadSampleBuf *shsample, int **rz, int xs, int ys, int nr)
949 {
950         /* return a 1 if fully compressed shadbuf-tile && z==const */
951         int ofs;
952         char *ct;
953
954         if (shsample->deepbuf)
955                 return 0;
956
957         /* always test borders of shadowbuffer */
958         if (xs<0) xs= 0; else if (xs>=shb->size) xs= shb->size-1;
959         if (ys<0) ys= 0; else if (ys>=shb->size) ys= shb->size-1;
960
961         /* calc z */
962         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
963         ct= shsample->cbuf+ofs;
964         if (*ct==0) {
965                 if (nr==0) {
966                         *rz= *( (int **)(shsample->zbuf+ofs) );
967                         return 1;
968                 }
969                 else if (*rz!= *( (int **)(shsample->zbuf+ofs) )) return 0;
970                 
971                 return 1;
972         }
973         
974         return 0;
975 }
976
977 static float readdeepvisibility(DeepSample *dsample, int tot, int z, int bias, float *biast)
978 {
979         DeepSample *ds, *prevds;
980         float t;
981         int a;
982
983         /* tricky stuff here; we use ints which can overflow easily with bias values */
984
985         ds= dsample;
986         for (a=0; a<tot && (z-bias > ds->z); a++, ds++) {}
987
988         if (a == tot) {
989                 if (biast)
990                         *biast= 0.0f;
991                 return (ds-1)->v; /* completely behind all samples */
992         }
993         
994         /* check if this read needs bias blending */
995         if (biast) {
996                 if (z > ds->z)
997                         *biast= (float)(z - ds->z)/(float)bias;
998                 else
999                         *biast= 0.0f;
1000         }
1001
1002         if (a == 0)
1003                 return 1.0f; /* completely in front of all samples */
1004
1005         /* converting to float early here because ds->z - prevds->z can overflow */
1006         prevds= ds-1;
1007         t= ((float)(z-bias) - (float)prevds->z)/((float)ds->z - (float)prevds->z);
1008         return t*ds->v + (1.0f-t)*prevds->v;
1009 }
1010
1011 static float readdeepshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)
1012 {
1013         float v, biasv, biast;
1014         int ofs, tot;
1015
1016         if (zs < - 0x7FFFFE00 + bias)
1017                 return 1.0;     /* extreme close to clipstart */
1018
1019         /* calc z */
1020         ofs= ys*shb->size + xs;
1021         tot= shsample->totbuf[ofs];
1022         if (tot == 0)
1023                 return 1.0f;
1024
1025         v= readdeepvisibility(shsample->deepbuf[ofs], tot, zs, bias, &biast);
1026
1027         if (biast != 0.0f) {
1028                 /* in soft bias area */
1029                 biasv = readdeepvisibility(shsample->deepbuf[ofs], tot, zs, 0, NULL);
1030
1031                 biast= biast*biast;
1032                 return (1.0f-biast)*v + biast*biasv;
1033         }
1034
1035         return v;
1036 }
1037
1038 /* return 1.0 : fully in light */
1039 static float readshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)     
1040 {
1041         float temp;
1042         int *rz, ofs;
1043         int zsamp=0;
1044         char *ct, *cz;
1045
1046         /* simpleclip */
1047         /* if (xs<0 || ys<0) return 1.0; */
1048         /* if (xs>=shb->size || ys>=shb->size) return 1.0; */
1049         
1050         /* always test borders of shadowbuffer */
1051         if (xs<0) xs= 0; else if (xs>=shb->size) xs= shb->size-1;
1052         if (ys<0) ys= 0; else if (ys>=shb->size) ys= shb->size-1;
1053
1054         if (shsample->deepbuf)
1055                 return readdeepshadowbuf(shb, shsample, bias, xs, ys, zs);
1056
1057         /* calc z */
1058         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
1059         ct= shsample->cbuf+ofs;
1060         rz= *( (int **)(shsample->zbuf+ofs) );
1061
1062         if (*ct==3) {
1063                 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
1064                 cz= (char *)&zsamp;
1065                 cz[ACOMP]= ct[0];
1066                 cz[BCOMP]= ct[1];
1067                 cz[GCOMP]= ct[2];
1068         }
1069         else if (*ct==2) {
1070                 ct= ((char *)rz);
1071                 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
1072                 zsamp= *rz;
1073         
1074                 cz= (char *)&zsamp;
1075                 cz[BCOMP]= ct[0];
1076                 cz[GCOMP]= ct[1];
1077         }
1078         else if (*ct==1) {
1079                 ct= ((char *)rz);
1080                 ct+= 4+16*(ys & 15)+(xs & 15);
1081                 zsamp= *rz;
1082
1083                 cz= (char *)&zsamp;
1084                 cz[GCOMP]= ct[0];
1085
1086         }
1087         else {
1088                 /* got warning on this for 64 bits.... */
1089                 /* but it's working code! in this case rz is not a pointer but zvalue (ton) */
1090                 zsamp= GET_INT_FROM_POINTER(rz);
1091         }
1092
1093         /* tricky stuff here; we use ints which can overflow easily with bias values */
1094         
1095         if (zsamp > zs) return 1.0;             /* absolute no shadow */
1096         else if (zs < - 0x7FFFFE00 + bias) return 1.0;  /* extreme close to clipstart */
1097         else if (zsamp < zs-bias) return 0.0;   /* absolute in shadow */
1098         else {                                  /* soft area */
1099                 
1100                 temp=  ( (float)(zs- zsamp) )/(float)bias;
1101                 return 1.0f - temp*temp;
1102                         
1103         }
1104 }
1105
1106 static void shadowbuf_project_co(float *x, float *y, float *z, ShadBuf *shb, const float co[3])
1107 {
1108         float hco[4], size= 0.5f*(float)shb->size;
1109
1110         copy_v3_v3(hco, co);
1111         hco[3]= 1.0f;
1112
1113         mul_m4_v4(shb->persmat, hco);
1114
1115         *x= size*(1.0f+hco[0]/hco[3]);
1116         *y= size*(1.0f+hco[1]/hco[3]);
1117         if (z) *z= (hco[2]/hco[3]);
1118 }
1119
1120 /* the externally called shadow testing (reading) function */
1121 /* return 1.0: no shadow at all */
1122 float testshadowbuf(Render *re, ShadBuf *shb, const float co[3], const float dxco[3], const float dyco[3], float inp, float mat_bias)
1123 {
1124         ShadSampleBuf *shsample;
1125         float fac, dco[3], dx[3], dy[3], shadfac=0.0f;
1126         float xs1, ys1, zs1, *jit, *weight, xres, yres, biasf;
1127         int xs, ys, zs, bias, *rz;
1128         short a, num;
1129         
1130         /* crash preventer */
1131         if (shb->buffers.first==NULL)
1132                 return 1.0f;
1133         
1134         /* when facing away, assume fully in shadow */
1135         if (inp <= 0.0f)
1136                 return 0.0f;
1137
1138         /* project coordinate to pixel space */
1139         shadowbuf_project_co(&xs1, &ys1, &zs1, shb, co);
1140
1141         /* clip z coordinate, z is projected so that (-1.0, 1.0) matches
1142          * (clipstart, clipend), so we can do this simple test */
1143         if (zs1>=1.0f)
1144                 return 0.0f;
1145         else if (zs1<= -1.0f)
1146                 return 1.0f;
1147
1148         zs= ((float)0x7FFFFFFF)*zs1;
1149
1150         /* take num*num samples, increase area with fac */
1151         num= get_render_shadow_samples(&re->r, shb->samp);
1152         num= num*num;
1153         fac= shb->soft;
1154         
1155         /* compute z bias */
1156         if (mat_bias!=0.0f) biasf= shb->bias*mat_bias;
1157         else biasf= shb->bias;
1158         /* with inp==1.0, bias is half the size. correction value was 1.1, giving errors 
1159          * on cube edges, with one side being almost frontal lighted (ton)  */
1160         bias= (1.5f-inp*inp)*biasf;
1161         
1162         /* in case of no filtering we can do things simpler */
1163         if (num==1) {
1164                 for (shsample= shb->buffers.first; shsample; shsample= shsample->next)
1165                         shadfac += readshadowbuf(shb, shsample, bias, (int)xs1, (int)ys1, zs);
1166                 
1167                 return shadfac/(float)shb->totbuf;
1168         }
1169
1170         /* calculate filter size */
1171         add_v3_v3v3(dco, co, dxco);
1172         shadowbuf_project_co(&dx[0], &dx[1], NULL, shb, dco);
1173         dx[0]= xs1 - dx[0];
1174         dx[1]= ys1 - dx[1];
1175
1176         add_v3_v3v3(dco, co, dyco);
1177         shadowbuf_project_co(&dy[0], &dy[1], NULL, shb, dco);
1178         dy[0]= xs1 - dy[0];
1179         dy[1]= ys1 - dy[1];
1180         
1181         xres = fac * (fabsf(dx[0]) + fabsf(dy[0]));
1182         yres = fac * (fabsf(dx[1]) + fabsf(dy[1]));
1183         if (xres<1.0f) xres= 1.0f;
1184         if (yres<1.0f) yres= 1.0f;
1185         
1186         /* make xs1/xs1 corner of sample area */
1187         xs1 -= xres*0.5f;
1188         ys1 -= yres*0.5f;
1189
1190         /* in case we have a constant value in a tile, we can do quicker lookup */
1191         if (xres<16.0f && yres<16.0f) {
1192                 shsample= shb->buffers.first;
1193                 if (firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)ys1, 0)) {
1194                         if (firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)ys1, 1)) {
1195                                 if (firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)(ys1+yres), 1)) {
1196                                         if (firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)(ys1+yres), 1)) {
1197                                                 return readshadowbuf(shb, shsample, bias, (int)xs1, (int)ys1, zs);
1198                                         }
1199                                 }
1200                         }
1201                 }
1202         }
1203         
1204         /* full jittered shadow buffer lookup */
1205         for (shsample= shb->buffers.first; shsample; shsample= shsample->next) {
1206                 jit= shb->jit;
1207                 weight= shb->weight;
1208                 
1209                 for (a=num; a>0; a--, jit+=2, weight++) {
1210                         /* instead of jit i tried random: ugly! */
1211                         /* note: the plus 0.5 gives best sampling results, jit goes from -0.5 to 0.5 */
1212                         /* xs1 and ys1 are already corrected to be corner of sample area */
1213                         xs= xs1 + xres*(jit[0] + 0.5f);
1214                         ys= ys1 + yres*(jit[1] + 0.5f);
1215                         
1216                         shadfac+= *weight * readshadowbuf(shb, shsample, bias, xs, ys, zs);
1217                 }
1218         }
1219
1220         /* Renormalizes for the sample number: */
1221         return shadfac/(float)shb->totbuf;
1222 }
1223
1224 /* different function... sampling behind clipend can be LIGHT, bias is negative! */
1225 /* return: light */
1226 static float readshadowbuf_halo(ShadBuf *shb, ShadSampleBuf *shsample, int xs, int ys, int zs)
1227 {
1228         float temp;
1229         int *rz, ofs;
1230         int bias, zbias, zsamp;
1231         char *ct, *cz;
1232
1233         /* negative! The other side is more important */
1234         bias= -shb->bias;
1235         
1236         /* simpleclip */
1237         if (xs<0 || ys<0) return 0.0;
1238         if (xs>=shb->size || ys>=shb->size) return 0.0;
1239
1240         /* calc z */
1241         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
1242         ct= shsample->cbuf+ofs;
1243         rz= *( (int **)(shsample->zbuf+ofs) );
1244
1245         if (*ct==3) {
1246                 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
1247                 cz= (char *)&zsamp;
1248                 zsamp= 0;
1249                 cz[ACOMP]= ct[0];
1250                 cz[BCOMP]= ct[1];
1251                 cz[GCOMP]= ct[2];
1252         }
1253         else if (*ct==2) {
1254                 ct= ((char *)rz);
1255                 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
1256                 zsamp= *rz;
1257         
1258                 cz= (char *)&zsamp;
1259                 cz[BCOMP]= ct[0];
1260                 cz[GCOMP]= ct[1];
1261         }
1262         else if (*ct==1) {
1263                 ct= ((char *)rz);
1264                 ct+= 4+16*(ys & 15)+(xs & 15);
1265                 zsamp= *rz;
1266
1267                 cz= (char *)&zsamp;
1268                 cz[GCOMP]= ct[0];
1269
1270         }
1271         else {
1272                 /* same as before */
1273                 /* still working code! (ton) */
1274                 zsamp= GET_INT_FROM_POINTER(rz);
1275         }
1276
1277         /* NO schadow when sampled at 'eternal' distance */
1278
1279         if (zsamp >= 0x7FFFFE00) return 1.0;
1280
1281         if (zsamp > zs) return 1.0;             /* absolute no shadww */
1282         else {
1283                 /* bias is negative, so the (zs-bias) can be beyond 0x7fffffff */
1284                 zbias= 0x7fffffff - zs;
1285                 if (zbias > -bias) {
1286                         if ( zsamp < zs-bias) return 0.0;       /* absolute in shadow */
1287                 }
1288                 else return 0.0;        /* absolute shadow */
1289         }
1290
1291         /* soft area */
1292         
1293         temp=  ( (float)(zs- zsamp) )/(float)bias;
1294         return 1.0f - temp*temp;
1295 }
1296
1297
1298 float shadow_halo(LampRen *lar, const float p1[3], const float p2[3])
1299 {
1300         /* p1 p2 already are rotated in spot-space */
1301         ShadBuf *shb= lar->shb;
1302         ShadSampleBuf *shsample;
1303         float co[4], siz;
1304         float lambda, lambda_o, lambda_x, lambda_y, ldx, ldy;
1305         float zf, xf1, yf1, zf1, xf2, yf2, zf2;
1306         float count, lightcount;
1307         int x, y, z, xs1, ys1;
1308         int dx = 0, dy = 0;
1309         
1310         siz= 0.5f*(float)shb->size;
1311         
1312         co[0]= p1[0];
1313         co[1]= p1[1];
1314         co[2]= p1[2]/lar->sh_zfac;
1315         co[3]= 1.0;
1316         mul_m4_v4(shb->winmat, co);     /* rational hom co */
1317         xf1= siz*(1.0f+co[0]/co[3]);
1318         yf1= siz*(1.0f+co[1]/co[3]);
1319         zf1= (co[2]/co[3]);
1320
1321
1322         co[0]= p2[0];
1323         co[1]= p2[1];
1324         co[2]= p2[2]/lar->sh_zfac;
1325         co[3]= 1.0;
1326         mul_m4_v4(shb->winmat, co);     /* rational hom co */
1327         xf2= siz*(1.0f+co[0]/co[3]);
1328         yf2= siz*(1.0f+co[1]/co[3]);
1329         zf2= (co[2]/co[3]);
1330
1331         /* the 2dda (a pixel line formula) */
1332
1333         xs1= (int)xf1;
1334         ys1= (int)yf1;
1335
1336         if (xf1 != xf2) {
1337                 if (xf2-xf1 > 0.0f) {
1338                         lambda_x= (xf1-xs1-1.0f)/(xf1-xf2);
1339                         ldx= -shb->shadhalostep/(xf1-xf2);
1340                         dx= shb->shadhalostep;
1341                 }
1342                 else {
1343                         lambda_x= (xf1-xs1)/(xf1-xf2);
1344                         ldx= shb->shadhalostep/(xf1-xf2);
1345                         dx= -shb->shadhalostep;
1346                 }
1347         }
1348         else {
1349                 lambda_x= 1.0;
1350                 ldx= 0.0;
1351         }
1352
1353         if (yf1 != yf2) {
1354                 if (yf2-yf1 > 0.0f) {
1355                         lambda_y= (yf1-ys1-1.0f)/(yf1-yf2);
1356                         ldy= -shb->shadhalostep/(yf1-yf2);
1357                         dy= shb->shadhalostep;
1358                 }
1359                 else {
1360                         lambda_y= (yf1-ys1)/(yf1-yf2);
1361                         ldy= shb->shadhalostep/(yf1-yf2);
1362                         dy= -shb->shadhalostep;
1363                 }
1364         }
1365         else {
1366                 lambda_y= 1.0;
1367                 ldy= 0.0;
1368         }
1369         
1370         x= xs1;
1371         y= ys1;
1372         lambda= count= lightcount= 0.0;
1373
1374 /* printf("start %x %x  \n", (int)(0x7FFFFFFF*zf1), (int)(0x7FFFFFFF*zf2)); */
1375
1376         do {
1377                 lambda_o= lambda;
1378                 
1379                 if (lambda_x==lambda_y) {
1380                         lambda_x+= ldx;
1381                         x+= dx;
1382                         lambda_y+= ldy;
1383                         y+= dy;
1384                 }
1385                 else {
1386                         if (lambda_x<lambda_y) {
1387                                 lambda_x+= ldx;
1388                                 x+= dx;
1389                         }
1390                         else {
1391                                 lambda_y+= ldy;
1392                                 y+= dy;
1393                         }
1394                 }
1395                 
1396                 lambda = min_ff(lambda_x, lambda_y);
1397
1398                 /* not making any progress? */
1399                 if (lambda==lambda_o) break;
1400
1401                 /* clip to end of volume */
1402                 lambda = min_ff(lambda, 1.0f);
1403
1404                 zf= zf1 + lambda*(zf2-zf1);
1405                 count+= (float)shb->totbuf;
1406
1407                 if (zf<= -1.0f) lightcount += 1.0f;     /* close to the spot */
1408                 else {
1409                 
1410                         /* make sure, behind the clipend we extend halolines. */
1411                         if (zf>=1.0f) z= 0x7FFFF000;
1412                         else z= (int)(0x7FFFF000*zf);
1413                         
1414                         for (shsample= shb->buffers.first; shsample; shsample= shsample->next)
1415                                 lightcount+= readshadowbuf_halo(shb, shsample, x, y, z);
1416                         
1417                 }
1418         }
1419         while (lambda < 1.0f);
1420
1421         if (count!=0.0f) return (lightcount/count);
1422         return 0.0f;
1423         
1424 }
1425
1426
1427 /* ********************* Irregular Shadow Buffer (ISB) ************* */
1428 /* ********** storage of all view samples in a raster of lists ***** */
1429
1430 /* based on several articles describing this method, like:
1431  * The Irregular Z-Buffer and its Application to Shadow Mapping
1432  * Gregory S. Johnson - William R. Mark - Christopher A. Burns
1433  * and
1434  * Alias-Free Shadow Maps
1435  * Timo Aila and Samuli Laine
1436  */
1437
1438 /* bsp structure (actually kd tree) */
1439
1440 #define BSPMAX_SAMPLE   128
1441 #define BSPMAX_DEPTH    32
1442
1443 /* aligned with struct rctf */
1444 typedef struct Boxf {
1445         float xmin, xmax;
1446         float ymin, ymax;
1447         float zmin, zmax;
1448 } Boxf;
1449
1450 typedef struct ISBBranch {
1451         struct ISBBranch *left, *right;
1452         float divider[2];
1453         Boxf box;
1454         short totsamp, index, full, unused;
1455         ISBSample **samples;
1456 } ISBBranch;
1457
1458 typedef struct BSPFace {
1459         Boxf box;
1460         const float *v1, *v2, *v3, *v4;
1461         int obi;                /* object for face lookup */
1462         int facenr;             /* index to retrieve VlakRen */
1463         int type;               /* only for strand now */
1464         short shad_alpha, is_full;
1465         
1466         /* strand caching data, optimize for point_behind_strand() */
1467         float radline, radline_end, len;
1468         float vec1[3], vec2[3], rc[3];
1469 } BSPFace;
1470
1471 /* boxes are in lamp projection */
1472 static void init_box(Boxf *box)
1473 {
1474         box->xmin = 1000000.0f;
1475         box->xmax = 0;
1476         box->ymin = 1000000.0f;
1477         box->ymax = 0;
1478         box->zmin= 0x7FFFFFFF;
1479         box->zmax= - 0x7FFFFFFF;
1480 }
1481
1482 /* use v1 to calculate boundbox */
1483 static void bound_boxf(Boxf *box, const float v1[3])
1484 {
1485         if (v1[0] < box->xmin) box->xmin = v1[0];
1486         if (v1[0] > box->xmax) box->xmax = v1[0];
1487         if (v1[1] < box->ymin) box->ymin = v1[1];
1488         if (v1[1] > box->ymax) box->ymax = v1[1];
1489         if (v1[2] < box->zmin) box->zmin= v1[2];
1490         if (v1[2] > box->zmax) box->zmax= v1[2];
1491 }
1492
1493 /* use v1 to calculate boundbox */
1494 static void bound_rectf(rctf *box, const float v1[2])
1495 {
1496         if (v1[0] < box->xmin) box->xmin = v1[0];
1497         if (v1[0] > box->xmax) box->xmax = v1[0];
1498         if (v1[1] < box->ymin) box->ymin = v1[1];
1499         if (v1[1] > box->ymax) box->ymax = v1[1];
1500 }
1501
1502
1503 /* halfway splitting, for initializing a more regular tree */
1504 static void isb_bsp_split_init(ISBBranch *root, MemArena *mem, int level)
1505 {
1506         
1507         /* if level > 0 we create new branches and go deeper */
1508         if (level > 0) {
1509                 ISBBranch *left, *right;
1510                 int i;
1511                 
1512                 /* splitpoint */
1513                 root->divider[0]= 0.5f*(root->box.xmin+root->box.xmax);
1514                 root->divider[1]= 0.5f*(root->box.ymin+root->box.ymax);
1515                 
1516                 /* find best splitpoint */
1517                 if (RCT_SIZE_X(&root->box) > RCT_SIZE_Y(&root->box))
1518                         i = root->index = 0;
1519                 else
1520                         i = root->index = 1;
1521                 
1522                 left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1523                 right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1524                 
1525                 /* box info */
1526                 left->box= root->box;
1527                 right->box= root->box;
1528                 if (i==0) {
1529                         left->box.xmax = root->divider[0];
1530                         right->box.xmin = root->divider[0];
1531                 }
1532                 else {
1533                         left->box.ymax = root->divider[1];
1534                         right->box.ymin = root->divider[1];
1535                 }
1536                 isb_bsp_split_init(left, mem, level-1);
1537                 isb_bsp_split_init(right, mem, level-1);
1538         }
1539         else {
1540                 /* we add sample array */
1541                 root->samples= BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
1542         }
1543 }
1544
1545 /* note; if all samples on same location we just spread them over 2 new branches */
1546 static void isb_bsp_split(ISBBranch *root, MemArena *mem)
1547 {
1548         ISBBranch *left, *right;
1549         ISBSample *samples[BSPMAX_SAMPLE];
1550         int a, i;
1551
1552         /* splitpoint */
1553         root->divider[0]= root->divider[1]= 0.0f;
1554         for (a=BSPMAX_SAMPLE-1; a>=0; a--) {
1555                 root->divider[0]+= root->samples[a]->zco[0];
1556                 root->divider[1]+= root->samples[a]->zco[1];
1557         }
1558         root->divider[0]/= BSPMAX_SAMPLE;
1559         root->divider[1]/= BSPMAX_SAMPLE;
1560         
1561         /* find best splitpoint */
1562         if (RCT_SIZE_X(&root->box) > RCT_SIZE_Y(&root->box))
1563                 i = root->index = 0;
1564         else
1565                 i = root->index = 1;
1566         
1567         /* new branches */
1568         left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1569         right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1570
1571         /* new sample array */
1572         left->samples = BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
1573         right->samples = samples;  /* tmp */
1574
1575         /* split samples */
1576         for (a=BSPMAX_SAMPLE-1; a>=0; a--) {
1577                 int comp= 0;
1578                 /* this prevents adding samples all to 1 branch when divider is equal to samples */
1579                 if (root->samples[a]->zco[i] == root->divider[i])
1580                         comp= a & 1;
1581                 else if (root->samples[a]->zco[i] < root->divider[i])
1582                         comp= 1;
1583                 
1584                 if (comp==1) {
1585                         left->samples[left->totsamp]= root->samples[a];
1586                         left->totsamp++;
1587                 }
1588                 else {
1589                         right->samples[right->totsamp]= root->samples[a];
1590                         right->totsamp++;
1591                 }
1592         }
1593         
1594         /* copy samples from tmp */
1595         memcpy(root->samples, samples, right->totsamp*(sizeof(void *)));
1596         right->samples= root->samples;
1597         root->samples= NULL;
1598         
1599         /* box info */
1600         left->box= root->box;
1601         right->box= root->box;
1602         if (i==0) {
1603                 left->box.xmax = root->divider[0];
1604                 right->box.xmin = root->divider[0];
1605         }
1606         else {
1607                 left->box.ymax = root->divider[1];
1608                 right->box.ymin = root->divider[1];
1609         }
1610 }
1611
1612 /* inserts sample in main tree, also splits on threshold */
1613 /* returns 1 if error */
1614 static int isb_bsp_insert(ISBBranch *root, MemArena *memarena, ISBSample *sample)
1615 {
1616         ISBBranch *bspn= root;
1617         float *zco= sample->zco;
1618         int i= 0;
1619         
1620         /* debug counter, also used to check if something was filled in ever */
1621         root->totsamp++;
1622         
1623         /* going over branches until last one found */
1624         while (bspn->left) {
1625                 if (zco[bspn->index] <= bspn->divider[bspn->index])
1626                         bspn= bspn->left;
1627                 else
1628                         bspn= bspn->right;
1629                 i++;
1630         }
1631         /* bspn now is the last branch */
1632         
1633         if (bspn->totsamp==BSPMAX_SAMPLE) {
1634                 printf("error in bsp branch\n");        /* only for debug, cannot happen */
1635                 return 1;
1636         }
1637         
1638         /* insert */
1639         bspn->samples[bspn->totsamp]= sample;
1640         bspn->totsamp++;
1641
1642         /* split if allowed and needed */
1643         if (bspn->totsamp==BSPMAX_SAMPLE) {
1644                 if (i==BSPMAX_DEPTH) {
1645                         bspn->totsamp--;        /* stop filling in... will give errors */
1646                         return 1;
1647                 }
1648                 isb_bsp_split(bspn, memarena);
1649         }
1650         return 0;
1651 }
1652
1653 /* initialize vars in face, for optimal point-in-face test */
1654 static void bspface_init_strand(BSPFace *face) 
1655 {
1656         
1657         face->radline= 0.5f* len_v2v2(face->v1, face->v2);
1658         
1659         mid_v3_v3v3(face->vec1, face->v1, face->v2);
1660         if (face->v4)
1661                 mid_v3_v3v3(face->vec2, face->v3, face->v4);
1662         else
1663                 copy_v3_v3(face->vec2, face->v3);
1664         
1665         face->rc[0]= face->vec2[0]-face->vec1[0];
1666         face->rc[1]= face->vec2[1]-face->vec1[1];
1667         face->rc[2]= face->vec2[2]-face->vec1[2];
1668         
1669         face->len= face->rc[0]*face->rc[0]+ face->rc[1]*face->rc[1];
1670         
1671         if (face->len != 0.0f) {
1672                 face->radline_end = face->radline / sqrtf(face->len);
1673                 face->len = 1.0f / face->len;
1674         }
1675 }
1676
1677 /* brought back to a simple 2d case */
1678 static int point_behind_strand(const float p[3], BSPFace *face)
1679 {
1680         /* v1 - v2 is radius, v1 - v3 length */
1681         float dist, rc[2], pt[2];
1682         
1683         /* using code from dist_to_line_segment_v2(), distance vec to line-piece */
1684
1685         if (face->len==0.0f) {
1686                 rc[0]= p[0]-face->vec1[0];
1687                 rc[1]= p[1]-face->vec1[1];
1688                 dist= (float)(sqrt(rc[0]*rc[0]+ rc[1]*rc[1]));
1689                 
1690                 if (dist < face->radline)
1691                         return 1;
1692         }
1693         else {
1694                 float lambda= ( face->rc[0]*(p[0]-face->vec1[0]) + face->rc[1]*(p[1]-face->vec1[1]) )*face->len;
1695                 
1696                 if (lambda > -face->radline_end && lambda < 1.0f+face->radline_end) {
1697                         /* hesse for dist: */
1698                         //dist= (float)(fabs( (p[0]-vec2[0])*rc[1] + (p[1]-vec2[1])*rc[0])/len);
1699                         
1700                         pt[0]= lambda*face->rc[0]+face->vec1[0];
1701                         pt[1]= lambda*face->rc[1]+face->vec1[1];
1702                         
1703                         rc[0]= pt[0]-p[0];
1704                         rc[1]= pt[1]-p[1];
1705                         dist= sqrtf(rc[0]*rc[0]+ rc[1]*rc[1]);
1706                         
1707                         if (dist < face->radline) {
1708                                 float zval= face->vec1[2] + lambda*face->rc[2];
1709                                 if (p[2] > zval)
1710                                         return 1;
1711                         }
1712                 }
1713         }
1714         return 0;
1715 }
1716
1717
1718 /* return 1 if inside. code derived from src/parametrizer.c */
1719 static int point_behind_tria2d(const float p[3], const float v1[3], const float v2[3], const float v3[3])
1720 {
1721         float a[2], c[2], h[2], div;
1722         float u, v;
1723         
1724         a[0] = v2[0] - v1[0];
1725         a[1] = v2[1] - v1[1];
1726         c[0] = v3[0] - v1[0];
1727         c[1] = v3[1] - v1[1];
1728         
1729         div = a[0]*c[1] - a[1]*c[0];
1730         if (div==0.0f)
1731                 return 0;
1732         
1733         h[0] = p[0] - v1[0];
1734         h[1] = p[1] - v1[1];
1735         
1736         div = 1.0f/div;
1737         
1738         u = (h[0]*c[1] - h[1]*c[0])*div;
1739         if (u >= 0.0f) {
1740                 v = (a[0]*h[1] - a[1]*h[0])*div;
1741                 if (v >= 0.0f) {
1742                         if ( u + v <= 1.0f) {
1743                                 /* inside, now check if point p is behind */
1744                                 float z=  (1.0f-u-v)*v1[2] + u*v2[2] + v*v3[2];
1745                                 if (z <= p[2])
1746                                         return 1;
1747                         }
1748                 }
1749         }
1750         
1751         return 0;
1752 }
1753
1754 #if 0
1755 /* tested these calls, but it gives inaccuracy, 'side' cannot be found reliably using v3 */
1756
1757 /* check if line v1-v2 has all rect points on other side of point v3 */
1758 static int rect_outside_line(rctf *rect, const float v1[3], const float v2[3], const float v3[3])
1759 {
1760         float a, b, c;
1761         int side;
1762         
1763         /* line formula for v1-v2 */
1764         a= v2[1]-v1[1];
1765         b= v1[0]-v2[0];
1766         c= -a*v1[0] - b*v1[1];
1767         side= a*v3[0] + b*v3[1] + c < 0.0f;
1768         
1769         /* the four quad points */
1770         if ( side==(rect->xmin*a + rect->ymin*b + c >= 0.0f) )
1771                 if ( side==(rect->xmax*a + rect->ymin*b + c >= 0.0f) )
1772                         if ( side==(rect->xmax*a + rect->ymax*b + c >= 0.0f) )
1773                                 if ( side==(rect->xmin*a + rect->ymax*b + c >= 0.0f) )
1774                                         return 1;
1775         return 0;
1776 }
1777
1778 /* check if one of the triangle edges separates all rect points on 1 side */
1779 static int rect_isect_tria(rctf *rect, const float v1[3], const float v2[3], const float v3[3])
1780 {
1781         if (rect_outside_line(rect, v1, v2, v3))
1782                 return 0;
1783         if (rect_outside_line(rect, v2, v3, v1))
1784                 return 0;
1785         if (rect_outside_line(rect, v3, v1, v2))
1786                 return 0;
1787         return 1;
1788 }
1789 #endif
1790
1791 /* if face overlaps a branch, it executes func. recursive */
1792 static void isb_bsp_face_inside(ISBBranch *bspn, BSPFace *face)
1793 {
1794         
1795         /* are we descending? */
1796         if (bspn->left) {
1797                 /* hrmf, the box struct cannot be addressed with index */
1798                 if (bspn->index==0) {
1799                         if (face->box.xmin <= bspn->divider[0])
1800                                 isb_bsp_face_inside(bspn->left, face);
1801                         if (face->box.xmax > bspn->divider[0])
1802                                 isb_bsp_face_inside(bspn->right, face);
1803                 }
1804                 else {
1805                         if (face->box.ymin <= bspn->divider[1])
1806                                 isb_bsp_face_inside(bspn->left, face);
1807                         if (face->box.ymax > bspn->divider[1])
1808                                 isb_bsp_face_inside(bspn->right, face);
1809                 }
1810         }
1811         else {
1812                 /* else: end branch reached */
1813                 int a;
1814                 
1815                 if (bspn->totsamp==0) return;
1816                 
1817                 /* check for nodes entirely in shadow, can be skipped */
1818                 if (bspn->totsamp==bspn->full)
1819                         return;
1820                 
1821                 /* if bsp node is entirely in front of face, give up */
1822                 if (bspn->box.zmax < face->box.zmin)
1823                         return;
1824                 
1825                 /* if face boundbox is outside of branch rect, give up */
1826                 if (0==BLI_rctf_isect((rctf *)&face->box, (rctf *)&bspn->box, NULL))
1827                         return;
1828                 
1829                 /* test all points inside branch */
1830                 for (a=bspn->totsamp-1; a>=0; a--) {
1831                         ISBSample *samp= bspn->samples[a];
1832                         
1833                         if ((samp->facenr!=face->facenr || samp->obi!=face->obi) && samp->shadfac) {
1834                                 if (face->box.zmin < samp->zco[2]) {
1835                                         if (BLI_rctf_isect_pt_v((rctf *)&face->box, samp->zco)) {
1836                                                 int inshadow= 0;
1837                                                 
1838                                                 if (face->type) {
1839                                                         if (point_behind_strand(samp->zco, face))
1840                                                                 inshadow= 1;
1841                                                 }
1842                                                 else if ( point_behind_tria2d(samp->zco, face->v1, face->v2, face->v3))
1843                                                         inshadow= 1;
1844                                                 else if (face->v4 && point_behind_tria2d(samp->zco, face->v1, face->v3, face->v4))
1845                                                         inshadow= 1;
1846
1847                                                 if (inshadow) {
1848                                                         *(samp->shadfac) += face->shad_alpha;
1849                                                         /* optimize; is_full means shad_alpha==4096 */
1850                                                         if (*(samp->shadfac) >= 4096 || face->is_full) {
1851                                                                 bspn->full++;
1852                                                                 samp->shadfac= NULL;
1853                                                         }
1854                                                 }
1855                                         }
1856                                 }
1857                         }
1858                 }
1859         }
1860 }
1861
1862 /* based on available samples, recalculate the bounding box for bsp nodes, recursive */
1863 static void isb_bsp_recalc_box(ISBBranch *root)
1864 {
1865         if (root->left) {
1866                 isb_bsp_recalc_box(root->left);
1867                 isb_bsp_recalc_box(root->right);
1868         }
1869         else if (root->totsamp) {
1870                 int a;
1871                 
1872                 init_box(&root->box);
1873                 for (a=root->totsamp-1; a>=0; a--)
1874                         bound_boxf(&root->box, root->samples[a]->zco);
1875         }
1876 }
1877
1878 /* callback function for zbuf clip */
1879 static void isb_bsp_test_strand(ZSpan *zspan, int obi, int zvlnr,
1880                                 const float *v1, const float *v2, const float *v3, const float *v4)
1881 {
1882         BSPFace face;
1883         
1884         face.v1= v1;
1885         face.v2= v2;
1886         face.v3= v3;
1887         face.v4= v4;
1888         face.obi= obi;
1889         face.facenr= zvlnr & ~RE_QUAD_OFFS;
1890         face.type= R_STRAND;
1891         if (R.osa)
1892                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
1893         else
1894                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
1895         
1896         face.is_full= (zspan->shad_alpha==1.0f);
1897         
1898         /* setup boundbox */
1899         init_box(&face.box);
1900         bound_boxf(&face.box, v1);
1901         bound_boxf(&face.box, v2);
1902         bound_boxf(&face.box, v3);
1903         if (v4)
1904                 bound_boxf(&face.box, v4);
1905         
1906         /* optimize values */
1907         bspface_init_strand(&face);
1908         
1909         isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
1910         
1911 }
1912
1913 /* callback function for zbuf clip */
1914 static void isb_bsp_test_face(ZSpan *zspan, int obi, int zvlnr,
1915                               const float *v1, const float *v2, const float *v3, const float *v4)
1916 {
1917         BSPFace face;
1918         
1919         face.v1= v1;
1920         face.v2= v2;
1921         face.v3= v3;
1922         face.v4= v4;
1923         face.obi= obi;
1924         face.facenr= zvlnr & ~RE_QUAD_OFFS;
1925         face.type= 0;
1926         if (R.osa)
1927                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
1928         else
1929                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
1930         
1931         face.is_full= (zspan->shad_alpha==1.0f);
1932         
1933         /* setup boundbox */
1934         init_box(&face.box);
1935         bound_boxf(&face.box, v1);
1936         bound_boxf(&face.box, v2);
1937         bound_boxf(&face.box, v3);
1938         if (v4)
1939                 bound_boxf(&face.box, v4);
1940
1941         isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
1942 }
1943
1944 static int testclip_minmax(const float ho[4], const float minmax[4])
1945 {
1946         float wco= ho[3];
1947         int flag= 0;
1948         
1949         if ( ho[0] > minmax[1]*wco) flag = 1;
1950         else if ( ho[0]< minmax[0]*wco) flag = 2;
1951         
1952         if ( ho[1] > minmax[3]*wco) flag |= 4;
1953         else if ( ho[1]< minmax[2]*wco) flag |= 8;
1954         
1955         return flag;
1956 }
1957
1958 /* main loop going over all faces and check in bsp overlaps, fill in shadfac values */
1959 static void isb_bsp_fillfaces(Render *re, LampRen *lar, ISBBranch *root)
1960 {
1961         ObjectInstanceRen *obi;
1962         ObjectRen *obr;
1963         ShadBuf *shb= lar->shb;
1964         ZSpan zspan, zspanstrand;
1965         VlakRen *vlr= NULL;
1966         Material *ma= NULL;
1967         float minmaxf[4], winmat[4][4];
1968         int size= shb->size;
1969         int i, a, ok=1, lay= -1;
1970         
1971         /* further optimize, also sets minz maxz */
1972         isb_bsp_recalc_box(root);
1973         
1974         /* extra clipping for minmax */
1975         minmaxf[0]= (2.0f*root->box.xmin - size-2.0f)/size;
1976         minmaxf[1]= (2.0f*root->box.xmax - size+2.0f)/size;
1977         minmaxf[2]= (2.0f*root->box.ymin - size-2.0f)/size;
1978         minmaxf[3]= (2.0f*root->box.ymax - size+2.0f)/size;
1979         
1980         if (lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay;
1981         
1982         /* (ab)use zspan, since we use zbuffer clipping code */
1983         zbuf_alloc_span(&zspan, size, size, re->clipcrop);
1984         
1985         zspan.zmulx=  ((float)size)/2.0f;
1986         zspan.zmuly=  ((float)size)/2.0f;
1987         zspan.zofsx= -0.5f;
1988         zspan.zofsy= -0.5f;
1989         
1990         /* pass on bsp root to zspan */
1991         zspan.rectz= (int *)root;
1992         
1993         /* filling methods */
1994         zspanstrand= zspan;
1995         //      zspan.zbuflinefunc= zbufline_onlyZ;
1996         zspan.zbuffunc= isb_bsp_test_face;
1997         zspanstrand.zbuffunc= isb_bsp_test_strand;
1998         
1999         for (i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
2000                 obr= obi->obr;
2001
2002                 if (obi->flag & R_TRANSFORMED)
2003                         mul_m4_m4m4(winmat, shb->persmat, obi->mat);
2004                 else
2005                         copy_m4_m4(winmat, shb->persmat);
2006
2007                 for (a=0; a<obr->totvlak; a++) {
2008                         
2009                         if ((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
2010                         else vlr++;
2011                         
2012                         /* note, these conditions are copied in shadowbuf_autoclip() */
2013                         if (vlr->mat!= ma) {
2014                                 ma= vlr->mat;
2015                                 ok= 1;
2016                                 if ((ma->mode2 & MA_CASTSHADOW)==0 || (ma->mode & MA_SHADBUF)==0) ok= 0;
2017                                 if (ma->material_type == MA_TYPE_WIRE) ok= 0;
2018                                 zspanstrand.shad_alpha= zspan.shad_alpha= ma->shad_alpha;
2019                         }
2020                         
2021                         if (ok && (obi->lay & lay)) {
2022                                 float hoco[4][4];
2023                                 int c1, c2, c3, c4=0;
2024                                 int d1, d2, d3, d4=0;
2025                                 int partclip;
2026                                 
2027                                 /* create hocos per face, it is while render */
2028                                 projectvert(vlr->v1->co, winmat, hoco[0]); d1= testclip_minmax(hoco[0], minmaxf);
2029                                 projectvert(vlr->v2->co, winmat, hoco[1]); d2= testclip_minmax(hoco[1], minmaxf);
2030                                 projectvert(vlr->v3->co, winmat, hoco[2]); d3= testclip_minmax(hoco[2], minmaxf);
2031                                 if (vlr->v4) {
2032                                         projectvert(vlr->v4->co, winmat, hoco[3]); d4= testclip_minmax(hoco[3], minmaxf);
2033                                 }
2034
2035                                 /* minmax clipping */
2036                                 if (vlr->v4) partclip= d1 & d2 & d3 & d4;
2037                                 else partclip= d1 & d2 & d3;
2038                                 
2039                                 if (partclip==0) {
2040                                         
2041                                         /* window clipping */
2042                                         c1= testclip(hoco[0]); 
2043                                         c2= testclip(hoco[1]); 
2044                                         c3= testclip(hoco[2]); 
2045                                         if (vlr->v4)
2046                                                 c4= testclip(hoco[3]); 
2047                                         
2048                                         /* ***** NO WIRE YET */
2049                                         if (ma->material_type == MA_TYPE_WIRE) {
2050                                                 if (vlr->v4)
2051                                                         zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
2052                                                 else
2053                                                         zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], NULL, c1, c2, c3, 0);
2054                                         }
2055                                         else if (vlr->v4) {
2056                                                 if (vlr->flag & R_STRAND)
2057                                                         zbufclip4(&zspanstrand, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
2058                                                 else
2059                                                         zbufclip4(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
2060                                         }
2061                                         else
2062                                                 zbufclip(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], c1, c2, c3);
2063                                         
2064                                 }
2065                         }
2066                 }
2067         }
2068         
2069         zbuf_free_span(&zspan);
2070 }
2071
2072 /* returns 1 when the viewpixel is visible in lampbuffer */
2073 static int viewpixel_to_lampbuf(ShadBuf *shb, ObjectInstanceRen *obi, VlakRen *vlr, float x, float y, float co_r[3])
2074 {
2075         float hoco[4], v1[3], nor[3];
2076         float dface, fac, siz;
2077         
2078         RE_vlakren_get_normal(&R, obi, vlr, nor);
2079         copy_v3_v3(v1, vlr->v1->co);
2080         if (obi->flag & R_TRANSFORMED)
2081                 mul_m4_v3(obi->mat, v1);
2082
2083         /* from shadepixel() */
2084         dface = dot_v3v3(v1, nor);
2085         hoco[3]= 1.0f;
2086         
2087         /* ortho viewplane cannot intersect using view vector originating in (0, 0, 0) */
2088         if (R.r.mode & R_ORTHO) {
2089                 /* x and y 3d coordinate can be derived from pixel coord and winmat */
2090                 float fx= 2.0f/(R.winx*R.winmat[0][0]);
2091                 float fy= 2.0f/(R.winy*R.winmat[1][1]);
2092                 
2093                 hoco[0]= (x - 0.5f*R.winx)*fx - R.winmat[3][0]/R.winmat[0][0];
2094                 hoco[1]= (y - 0.5f*R.winy)*fy - R.winmat[3][1]/R.winmat[1][1];
2095                 
2096                 /* using a*x + b*y + c*z = d equation, (a b c) is normal */
2097                 if (nor[2]!=0.0f)
2098                         hoco[2]= (dface - nor[0]*hoco[0] - nor[1]*hoco[1])/nor[2];
2099                 else
2100                         hoco[2]= 0.0f;
2101         }
2102         else {
2103                 float div, view[3];
2104                 
2105                 calc_view_vector(view, x, y);
2106                 
2107                 div = dot_v3v3(nor, view);
2108                 if (div==0.0f) 
2109                         return 0;
2110                 
2111                 fac= dface/div;
2112                 
2113                 hoco[0]= fac*view[0];
2114                 hoco[1]= fac*view[1];
2115                 hoco[2]= fac*view[2];
2116         }
2117         
2118         /* move 3d vector to lampbuf */
2119         mul_m4_v4(shb->persmat, hoco);  /* rational hom co */
2120         
2121         /* clip We can test for -1.0/1.0 because of the properties of the
2122          * coordinate transformations. */
2123         fac = fabsf(hoco[3]);
2124         if (hoco[0]<-fac || hoco[0]>fac)
2125                 return 0;
2126         if (hoco[1]<-fac || hoco[1]>fac)
2127                 return 0;
2128         if (hoco[2]<-fac || hoco[2]>fac)
2129                 return 0;
2130         
2131         siz= 0.5f*(float)shb->size;
2132         co_r[0]= siz*(1.0f+hoco[0]/hoco[3]) -0.5f;
2133         co_r[1]= siz*(1.0f+hoco[1]/hoco[3]) -0.5f;
2134         co_r[2]= ((float)0x7FFFFFFF)*(hoco[2]/hoco[3]);
2135         
2136         /* XXXX bias, much less than normal shadbuf, or do we need a constant? */
2137         co_r[2] -= 0.05f*shb->bias;
2138         
2139         return 1;
2140 }
2141
2142 /* storage of shadow results, solid osa and transp case */
2143 static void isb_add_shadfac(ISBShadfacA **isbsapp, MemArena *mem, int obi, int facenr, short shadfac, short samples)
2144 {
2145         ISBShadfacA *new;
2146         float shadfacf;
2147         
2148         /* in osa case, the samples were filled in with factor 1.0/R.osa. if fewer samples we have to correct */
2149         if (R.osa)
2150                 shadfacf= ((float)shadfac*R.osa)/(4096.0f*samples);
2151         else
2152                 shadfacf= ((float)shadfac)/(4096.0f);
2153         
2154         new= BLI_memarena_alloc(mem, sizeof(ISBShadfacA));
2155         new->obi= obi;
2156         new->facenr= facenr & ~RE_QUAD_OFFS;
2157         new->shadfac= shadfacf;
2158         if (*isbsapp)
2159                 new->next= (*isbsapp);
2160         else
2161                 new->next= NULL;
2162         
2163         *isbsapp= new;
2164 }
2165
2166 /* adding samples, solid case */
2167 static int isb_add_samples(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSample **samplebuf)
2168 {
2169         int xi, yi, *xcos, *ycos;
2170         int sample, bsp_err= 0;
2171         
2172         /* bsp split doesn't like to handle regular sequences */
2173         xcos= MEM_mallocN(pa->rectx*sizeof(int), "xcos");
2174         ycos= MEM_mallocN(pa->recty*sizeof(int), "ycos");
2175         for (xi=0; xi<pa->rectx; xi++)
2176                 xcos[xi]= xi;
2177         for (yi=0; yi<pa->recty; yi++)
2178                 ycos[yi]= yi;
2179         BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
2180         BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
2181         
2182         for (sample=0; sample<(R.osa?R.osa:1); sample++) {
2183                 ISBSample *samp= samplebuf[sample], *samp1;
2184                 
2185                 for (yi=0; yi<pa->recty; yi++) {
2186                         int y= ycos[yi];
2187                         for (xi=0; xi<pa->rectx; xi++) {
2188                                 int x= xcos[xi];
2189                                 samp1= samp + y*pa->rectx + x;
2190                                 if (samp1->facenr)
2191                                         bsp_err |= isb_bsp_insert(root, memarena, samp1);
2192                         }
2193                         if (bsp_err) break;
2194                 }
2195         }
2196         
2197         MEM_freeN(xcos);
2198         MEM_freeN(ycos);
2199
2200         return bsp_err;
2201 }
2202
2203 /* solid version */
2204 /* lar->shb, pa->rectz and pa->rectp should exist */
2205 static void isb_make_buffer(RenderPart *pa, LampRen *lar)
2206 {
2207         ShadBuf *shb= lar->shb;
2208         ISBData *isbdata;
2209         ISBSample *samp, *samplebuf[16];        /* should be RE_MAX_OSA */
2210         ISBBranch root;
2211         MemArena *memarena;
2212         intptr_t *rd;
2213         int *recto, *rectp, x, y, sindex, sample, bsp_err=0;
2214         
2215         /* storage for shadow, per thread */
2216         isbdata= shb->isb_result[pa->thread];
2217         
2218         /* to map the shi->xs and ys coordinate */
2219         isbdata->minx= pa->disprect.xmin;
2220         isbdata->miny= pa->disprect.ymin;
2221         isbdata->rectx= pa->rectx;
2222         isbdata->recty= pa->recty;
2223         
2224         /* branches are added using memarena (32k branches) */
2225         memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch), "isb arena");
2226         BLI_memarena_use_calloc(memarena);
2227         
2228         /* samplebuf is in camera view space (pixels) */
2229         for (sample=0; sample<(R.osa?R.osa:1); sample++)
2230                 samplebuf[sample]= MEM_callocN(sizeof(ISBSample)*pa->rectx*pa->recty, "isb samplebuf");
2231         
2232         /* for end result, ISBSamples point to this in non OSA case, otherwise to pixstruct->shadfac */
2233         if (R.osa==0)
2234                 isbdata->shadfacs= MEM_callocN(pa->rectx*pa->recty*sizeof(short), "isb shadfacs");
2235         
2236         /* setup bsp root */
2237         memset(&root, 0, sizeof(ISBBranch));
2238         root.box.xmin = (float)shb->size;
2239         root.box.ymin = (float)shb->size;
2240         
2241         /* create the sample buffers */
2242         for (sindex=0, y=0; y<pa->recty; y++) {
2243                 for (x=0; x<pa->rectx; x++, sindex++) {
2244                         
2245                         /* this makes it a long function, but splitting it out would mean 10+ arguments */
2246                         /* first check OSA case */
2247                         if (R.osa) {
2248                                 rd= pa->rectdaps + sindex;
2249                                 if (*rd) {
2250                                         float xs= (float)(x + pa->disprect.xmin);
2251                                         float ys= (float)(y + pa->disprect.ymin);
2252                                         
2253                                         for (sample=0; sample<R.osa; sample++) {
2254                                                 PixStr *ps= (PixStr *)(*rd);
2255                                                 int mask= (1<<sample);
2256                                                 
2257                                                 while (ps) {
2258                                                         if (ps->mask & mask)
2259                                                                 break;
2260                                                         ps= ps->next;
2261                                                 }
2262                                                 if (ps && ps->facenr>0) {
2263                                                         ObjectInstanceRen *obi= &R.objectinstance[ps->obi];
2264                                                         ObjectRen *obr= obi->obr;
2265                                                         VlakRen *vlr= RE_findOrAddVlak(obr, (ps->facenr-1) & RE_QUAD_MASK);
2266                                                         
2267                                                         samp= samplebuf[sample] + sindex;
2268                                                         /* convert image plane pixel location to lamp buffer space */
2269                                                         if (viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], samp->zco)) {
2270                                                                 samp->obi= ps->obi;
2271                                                                 samp->facenr= ps->facenr & ~RE_QUAD_OFFS;
2272                                                                 ps->shadfac= 0;
2273                                                                 samp->shadfac= &ps->shadfac;
2274                                                                 bound_rectf((rctf *)&root.box, samp->zco);
2275                                                         }
2276                                                 }
2277                                         }
2278                                 }
2279                         }
2280                         else {
2281                                 rectp= pa->rectp + sindex;
2282                                 recto= pa->recto + sindex;
2283                                 if (*rectp>0) {
2284                                         ObjectInstanceRen *obi= &R.objectinstance[*recto];
2285                                         ObjectRen *obr= obi->obr;
2286                                         VlakRen *vlr= RE_findOrAddVlak(obr, (*rectp-1) & RE_QUAD_MASK);
2287                                         float xs= (float)(x + pa->disprect.xmin);
2288                                         float ys= (float)(y + pa->disprect.ymin);
2289                                         
2290                                         samp= samplebuf[0] + sindex;
2291                                         /* convert image plane pixel location to lamp buffer space */
2292                                         if (viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, samp->zco)) {
2293                                                 samp->obi= *recto;
2294                                                 samp->facenr= *rectp & ~RE_QUAD_OFFS;
2295                                                 samp->shadfac= isbdata->shadfacs + sindex;
2296                                                 bound_rectf((rctf *)&root.box, samp->zco);
2297                                         }
2298                                 }
2299                         }
2300                 }
2301         }
2302         
2303         /* simple method to see if we have samples */
2304         if (root.box.xmin != (float)shb->size) {
2305                 /* now create a regular split, root.box has the initial bounding box of all pixels */
2306                 /* split bsp 8 levels deep, in regular grid (16 x 16) */
2307                 isb_bsp_split_init(&root, memarena, 8);
2308                 
2309                 /* insert all samples in BSP now */
2310                 bsp_err= isb_add_samples(pa, &root, memarena, samplebuf);
2311                         
2312                 if (bsp_err==0) {
2313                         /* go over all faces and fill in shadow values */
2314                         
2315                         isb_bsp_fillfaces(&R, lar, &root);      /* shb->persmat should have been calculated */
2316                         
2317                         /* copy shadow samples to persistent buffer, reduce memory overhead */
2318                         if (R.osa) {
2319                                 ISBShadfacA **isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
2320                                 
2321                                 isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA), "isb arena");
2322                                 BLI_memarena_use_calloc(isbdata->memarena);
2323
2324                                 for (rd= pa->rectdaps, x=pa->rectx*pa->recty; x>0; x--, rd++, isbsa++) {
2325                                         
2326                                         if (*rd) {
2327                                                 PixStr *ps= (PixStr *)(*rd);
2328                                                 while (ps) {
2329                                                         if (ps->shadfac)
2330                                                                 isb_add_shadfac(isbsa, isbdata->memarena, ps->obi, ps->facenr, ps->shadfac, count_mask(ps->mask));
2331                                                         ps= ps->next;
2332                                                 }
2333                                         }
2334                                 }
2335                         }
2336                 }
2337         }
2338         else {
2339                 if (isbdata->shadfacs) {
2340                         MEM_freeN(isbdata->shadfacs);
2341                         isbdata->shadfacs= NULL;
2342                 }
2343         }
2344
2345         /* free BSP */
2346         BLI_memarena_free(memarena);
2347         
2348         /* free samples */
2349         for (x=0; x<(R.osa?R.osa:1); x++)
2350                 MEM_freeN(samplebuf[x]);
2351         
2352         if (bsp_err) printf("error in filling bsp\n");
2353 }
2354
2355 /* add sample to buffer, isbsa is the root sample in a buffer */
2356 static ISBSampleA *isb_alloc_sample_transp(ISBSampleA **isbsa, MemArena *mem)
2357 {
2358         ISBSampleA *new;
2359         
2360         new= BLI_memarena_alloc(mem, sizeof(ISBSampleA));
2361         if (*isbsa)
2362                 new->next= (*isbsa);
2363         else
2364                 new->next= NULL;
2365         
2366         *isbsa= new;
2367         return new;
2368 }
2369
2370 /* adding samples in BSP, transparent case */
2371 static int isb_add_samples_transp(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSampleA ***samplebuf)
2372 {
2373         int xi, yi, *xcos, *ycos;
2374         int sample, bsp_err= 0;
2375         
2376         /* bsp split doesn't like to handle regular sequences */
2377         xcos= MEM_mallocN(pa->rectx*sizeof(int), "xcos");
2378         ycos= MEM_mallocN(pa->recty*sizeof(int), "ycos");
2379         for (xi=0; xi<pa->rectx; xi++)
2380                 xcos[xi]= xi;
2381         for (yi=0; yi<pa->recty; yi++)
2382                 ycos[yi]= yi;
2383         BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
2384         BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
2385         
2386         for (sample=0; sample<(R.osa?R.osa:1); sample++) {
2387                 ISBSampleA **samp= samplebuf[sample], *samp1;
2388                 
2389                 for (yi=0; yi<pa->recty; yi++) {
2390                         int y= ycos[yi];
2391                         for (xi=0; xi<pa->rectx; xi++) {
2392                                 int x= xcos[xi];
2393                                 
2394                                 samp1= *(samp + y*pa->rectx + x);
2395                                 while (samp1) {
2396                                         bsp_err |= isb_bsp_insert(root, memarena, (ISBSample *)samp1);
2397                                         samp1= samp1->next;
2398                                 }
2399                         }
2400                         if (bsp_err) break;
2401                 }
2402         }
2403         
2404         MEM_freeN(xcos);
2405         MEM_freeN(ycos);
2406         
2407         return bsp_err;
2408 }
2409
2410
2411 /* Ztransp version */
2412 /* lar->shb, pa->rectz and pa->rectp should exist */
2413 static void isb_make_buffer_transp(RenderPart *pa, APixstr *apixbuf, LampRen *lar)
2414 {
2415         ShadBuf *shb= lar->shb;
2416         ISBData *isbdata;
2417         ISBSampleA *samp, **samplebuf[16];      /* MAX_OSA */
2418         ISBBranch root;
2419         MemArena *memarena;
2420         APixstr *ap;
2421         int x, y, sindex, sample, bsp_err=0;
2422         
2423         /* storage for shadow, per thread */
2424         isbdata= shb->isb_result[pa->thread];
2425         
2426         /* to map the shi->xs and ys coordinate */
2427         isbdata->minx= pa->disprect.xmin;
2428         isbdata->miny= pa->disprect.ymin;
2429         isbdata->rectx= pa->rectx;
2430         isbdata->recty= pa->recty;
2431         
2432         /* branches are added using memarena (32k branches) */
2433         memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch), "isb arena");
2434         BLI_memarena_use_calloc(memarena);
2435         
2436         /* samplebuf is in camera view space (pixels) */
2437         for (sample=0; sample<(R.osa?R.osa:1); sample++)
2438                 samplebuf[sample]= MEM_callocN(sizeof(void *)*pa->rectx*pa->recty, "isb alpha samplebuf");
2439         
2440         /* setup bsp root */
2441         memset(&root, 0, sizeof(ISBBranch));
2442         root.box.xmin = (float)shb->size;
2443         root.box.ymin = (float)shb->size;
2444
2445         /* create the sample buffers */
2446         for (ap= apixbuf, sindex=0, y=0; y<pa->recty; y++) {
2447                 for (x=0; x<pa->rectx; x++, sindex++, ap++) {
2448                         
2449                         if (ap->p[0]) {
2450                                 APixstr *apn;
2451                                 float xs= (float)(x + pa->disprect.xmin);
2452                                 float ys= (float)(y + pa->disprect.ymin);
2453                                 
2454                                 for (apn=ap; apn; apn= apn->next) {
2455                                         int a;
2456                                         for (a=0; a<4; a++) {
2457                                                 if (apn->p[a]) {
2458                                                         ObjectInstanceRen *obi= &R.objectinstance[apn->obi[a]];
2459                                                         ObjectRen *obr= obi->obr;
2460                                                         VlakRen *vlr= RE_findOrAddVlak(obr, (apn->p[a]-1) & RE_QUAD_MASK);
2461                                                         float zco[3];
2462                                                         
2463                                                         /* here we store shadfac, easier to create the end storage buffer. needs zero'ed, multiple shadowbufs use it */
2464                                                         apn->shadfac[a]= 0;
2465                                                         
2466                                                         if (R.osa) {
2467                                                                 for (sample=0; sample<R.osa; sample++) {
2468                                                                         int mask= (1<<sample);
2469                                                                         
2470                                                                         if (apn->mask[a] & mask) {
2471                                                                                 
2472                                                                                 /* convert image plane pixel location to lamp buffer space */
2473                                                                                 if (viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], zco)) {
2474                                                                                         samp= isb_alloc_sample_transp(samplebuf[sample] + sindex, memarena);
2475                                                                                         samp->obi= apn->obi[a];
2476                                                                                         samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
2477                                                                                         samp->shadfac= &apn->shadfac[a];
2478                                                                                         
2479                                                                                         copy_v3_v3(samp->zco, zco);
2480                                                                                         bound_rectf((rctf *)&root.box, samp->zco);
2481                                                                                 }
2482                                                                         }
2483                                                                 }
2484                                                         }
2485                                                         else {
2486                                                                 
2487                                                                 /* convert image plane pixel location to lamp buffer space */
2488                                                                 if (viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, zco)) {
2489                                                                         
2490                                                                         samp= isb_alloc_sample_transp(samplebuf[0] + sindex, memarena);
2491                                                                         samp->obi= apn->obi[a];
2492                                                                         samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
2493                                                                         samp->shadfac= &apn->shadfac[a];
2494                                                                         
2495                                                                         copy_v3_v3(samp->zco, zco);
2496                                                                         bound_rectf((rctf *)&root.box, samp->zco);
2497                                                                 }
2498                                                         }
2499                                                 }
2500                                         }
2501                                 }
2502                         }
2503                 }
2504         }
2505         
2506         /* simple method to see if we have samples */
2507         if (root.box.xmin != (float)shb->size) {
2508                 /* now create a regular split, root.box has the initial bounding box of all pixels */
2509                 /* split bsp 8 levels deep, in regular grid (16 x 16) */
2510                 isb_bsp_split_init(&root, memarena, 8);
2511                 
2512                 /* insert all samples in BSP now */
2513                 bsp_err= isb_add_samples_transp(pa, &root, memarena, samplebuf);
2514                 
2515                 if (bsp_err==0) {
2516                         ISBShadfacA **isbsa;
2517                         
2518                         /* go over all faces and fill in shadow values */
2519                         isb_bsp_fillfaces(&R, lar, &root);      /* shb->persmat should have been calculated */
2520                         
2521                         /* copy shadow samples to persistent buffer, reduce memory overhead */
2522                         isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
2523                         
2524                         isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA), "isb arena");
2525                         
2526                         for (ap= apixbuf, x=pa->rectx*pa->recty; x>0; x--, ap++, isbsa++) {
2527                                         
2528                                 if (ap->p[0]) {
2529                                         APixstr *apn;
2530                                         for (apn=ap; apn; apn= apn->next) {
2531                                                 int a;
2532                                                 for (a=0; a<4; a++) {
2533                                                         if (apn->p[a] && apn->shadfac[a]) {
2534                                                                 if (R.osa)
2535                                                                         isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], count_mask(apn->mask[a]));
2536                                                                 else
2537                                                                         isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], 0);
2538                                                         }
2539                                                 }
2540                                         }
2541                                 }
2542                         }
2543                 }
2544         }
2545
2546         /* free BSP */
2547         BLI_memarena_free(memarena);
2548
2549         /* free samples */
2550         for (x=0; x<(R.osa?R.osa:1); x++)
2551                 MEM_freeN(samplebuf[x]);
2552
2553         if (bsp_err) printf("error in filling bsp\n");
2554 }
2555
2556
2557
2558 /* exported */
2559
2560 /* returns amount of light (1.0 = no shadow) */
2561 /* note, shadepixel() rounds the coordinate, not the real sample info */
2562 float ISB_getshadow(ShadeInput *shi, ShadBuf *shb)
2563 {
2564         /* if raytracing, we can't accept irregular shadow */
2565         if (shi->depth==0) {
2566                 ISBData *isbdata= shb->isb_result[shi->thread];
2567                 
2568                 if (isbdata) {
2569                         if (isbdata->shadfacs || isbdata->shadfaca) {
2570                                 int x= shi->xs - isbdata->minx;
2571                                 
2572                                 if (x >= 0 && x < isbdata->rectx) {
2573                                         int y= shi->ys - isbdata->miny;
2574                         
2575                                         if (y >= 0 && y < isbdata->recty) {
2576                                                 if (isbdata->shadfacs) {
2577                                                         short *sp= isbdata->shadfacs + y*isbdata->rectx + x;
2578                                                         return *sp>=4096?0.0f:1.0f - ((float)*sp)/4096.0f;
2579                                                 }
2580                                                 else {
2581                                                         int sindex= y*isbdata->rectx + x;
2582                                                         int obi= shi->obi - R.objectinstance;
2583                                                         ISBShadfacA *isbsa= *(isbdata->shadfaca + sindex);
2584                                                         
2585                                                         while (isbsa) {
2586                                                                 if (isbsa->facenr==shi->facenr+1 && isbsa->obi==obi)
2587                                                                         return isbsa->shadfac>=1.0f?0.0f:1.0f - isbsa->shadfac;
2588                                                                 isbsa= isbsa->next;
2589                                                         }
2590                                                 }
2591                                         }
2592                                 }
2593                         }
2594                 }
2595         }
2596         return 1.0f;
2597 }
2598
2599 /* part is supposed to be solid zbuffered (apixbuf==NULL) or transparent zbuffered */
2600 void ISB_create(RenderPart *pa, APixstr *apixbuf)
2601 {
2602         GroupObject *go;
2603         
2604         /* go over all lamps, and make the irregular buffers */
2605         for (go=R.lights.first; go; go= go->next) {
2606                 LampRen *lar= go->lampren;
2607                 
2608                 if (lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
2609                         
2610                         /* create storage for shadow, per thread */
2611                         lar->shb->isb_result[pa->thread]= MEM_callocN(sizeof(ISBData), "isb data");
2612                         
2613                         if (apixbuf)
2614                                 isb_make_buffer_transp(pa, apixbuf, lar);
2615                         else
2616                                 isb_make_buffer(pa, lar);
2617                 }
2618         }
2619 }
2620
2621
2622 /* end of part rendering, free stored shadow data for this thread from all lamps */
2623 void ISB_free(RenderPart *pa)
2624 {
2625         GroupObject *go;
2626         
2627         /* go over all lamps, and free the irregular buffers */
2628         for (go=R.lights.first; go; go= go->next) {
2629                 LampRen *lar= go->lampren;
2630                 
2631                 if (lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
2632                         ISBData *isbdata= lar->shb->isb_result[pa->thread];
2633
2634                         if (isbdata) {
2635                                 if (isbdata->shadfacs)
2636                                         MEM_freeN(isbdata->shadfacs);
2637                                 if (isbdata->shadfaca)
2638                                         MEM_freeN(isbdata->shadfaca);
2639                                 
2640                                 if (isbdata->memarena)
2641                                         BLI_memarena_free(isbdata->memarena);
2642                                 
2643                                 MEM_freeN(isbdata);
2644                                 lar->shb->isb_result[pa->thread]= NULL;
2645                         }
2646                 }
2647         }
2648 }