ce89ebd4c340b4ee6e95a3bd99f0f5a25560c5ad
[blender.git] / source / blender / render / intern / source / shadbuf.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
19  * All rights reserved.
20  *
21  * Contributor(s): 2004-2006, Blender Foundation
22  *
23  * ***** END GPL LICENSE BLOCK *****
24  */
25
26 /** \file blender/render/intern/source/shadbuf.c
27  *  \ingroup render
28  */
29
30
31 #include <math.h>
32 #include <string.h>
33
34
35 #include "MEM_guardedalloc.h"
36
37 #include "DNA_group_types.h"
38 #include "DNA_lamp_types.h"
39 #include "DNA_material_types.h"
40
41 #include "BKE_global.h"
42 #include "BKE_scene.h"
43
44
45 #include "BLI_math.h"
46 #include "BLI_blenlib.h"
47 #include "BLI_jitter.h"
48 #include "BLI_memarena.h"
49 #include "BLI_rand.h"
50 #include "BLI_utildefines.h"
51
52 #include "PIL_time.h"
53
54 #include "renderpipeline.h"
55 #include "render_types.h"
56 #include "renderdatabase.h"
57 #include "rendercore.h"
58 #include "shadbuf.h"
59 #include "shading.h"
60 #include "zbuf.h"
61
62 /* XXX, could be better implemented... this is for endian issues */
63 #ifdef __BIG_ENDIAN__
64 #  define RCOMP 3
65 #  define GCOMP 2
66 #  define BCOMP 1
67 #  define ACOMP 0
68 #else
69 #  define RCOMP 0
70 #  define GCOMP 1
71 #  define BCOMP 2
72 #  define ACOMP 3
73 #endif
74
75 #define RCT_SIZE_X(rct)       ((rct)->xmax - (rct)->xmin)
76 #define RCT_SIZE_Y(rct)       ((rct)->ymax - (rct)->ymin)
77
78 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
79 /* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
80 /* only to be used here in this file, it's for speed */
81 extern struct Render R;
82 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
83
84 /* ------------------------------------------------------------------------- */
85
86 /* initshadowbuf() in convertBlenderScene.c */
87
88 /* ------------------------------------------------------------------------- */
89
90 static void copy_to_ztile(int *rectz, int size, int x1, int y1, int tile, char *r1)
91 {
92         int len4, *rz;
93         int x2, y2;
94         
95         x2= x1+tile;
96         y2= y1+tile;
97         if (x2>=size) x2= size-1;
98         if (y2>=size) y2= size-1;
99
100         if (x1>=x2 || y1>=y2) return;
101
102         len4= 4*(x2- x1);
103         rz= rectz + size*y1 + x1;
104         for (; y1<y2; y1++) {
105                 memcpy(r1, rz, len4);
106                 rz+= size;
107                 r1+= len4;
108         }
109 }
110
111 #if 0
112 static int sizeoflampbuf(ShadBuf *shb)
113 {
114         int num, count=0;
115         char *cp;
116         
117         cp= shb->cbuf;
118         num= (shb->size*shb->size)/256;
119
120         while (num--) count+= *(cp++);
121         
122         return 256*count;
123 }
124 #endif
125
126 /* not threadsafe... */
127 static float *give_jitter_tab(int samp)
128 {
129         /* these are all possible jitter tables, takes up some
130          * 12k, not really bad!
131          * For soft shadows, it saves memory and render time
132          */
133         static int tab[17]={1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256};
134         static float jit[1496][2];
135         static char ctab[17]= {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
136         int a, offset=0;
137         
138         if (samp<2) samp= 2;
139         else if (samp>16) samp= 16;
140
141         for (a=0; a<samp-1; a++) offset+= tab[a];
142
143         if (ctab[samp]==0) {
144                 ctab[samp]= 1;
145                 BLI_jitter_init(jit[offset], samp*samp);
146         }
147                 
148         return jit[offset];
149         
150 }
151
152 static void make_jitter_weight_tab(Render *re, ShadBuf *shb, short filtertype) 
153 {
154         float *jit, totw= 0.0f;
155         int samp= get_render_shadow_samples(&re->r, shb->samp);
156         int a, tot=samp*samp;
157         
158         shb->weight= MEM_mallocN(sizeof(float)*tot, "weight tab lamp");
159         
160         for (jit= shb->jit, a=0; a<tot; a++, jit+=2) {
161                 if (filtertype==LA_SHADBUF_TENT)
162                         shb->weight[a]= 0.71f - sqrt(jit[0]*jit[0] + jit[1]*jit[1]);
163                 else if (filtertype==LA_SHADBUF_GAUSS)
164                         shb->weight[a]= RE_filter_value(R_FILTER_GAUSS, 1.8f*sqrt(jit[0]*jit[0] + jit[1]*jit[1]));
165                 else
166                         shb->weight[a]= 1.0f;
167                 
168                 totw+= shb->weight[a];
169         }
170         
171         totw= 1.0f/totw;
172         for (a=0; a<tot; a++) {
173                 shb->weight[a]*= totw;
174         }
175 }
176
177 static int verg_deepsample(const void *poin1, const void *poin2)
178 {
179         const DeepSample *ds1= (const DeepSample*)poin1;
180         const DeepSample *ds2= (const DeepSample*)poin2;
181
182         if (ds1->z < ds2->z) return -1;
183         else if (ds1->z == ds2->z) return 0;
184         else return 1;
185 }
186
187 static int compress_deepsamples(DeepSample *dsample, int tot, float epsilon)
188 {
189         /* uses doubles to avoid overflows and other numerical issues,
190          * could be improved */
191         DeepSample *ds, *newds;
192         float v;
193         double slope, slopemin, slopemax, min, max, div, newmin, newmax;
194         int a, first, z, newtot= 0;
195
196 #if 0
197         if (print) {
198                 for (a=0, ds=dsample; a<tot; a++, ds++)
199                         printf("%lf, %f ", ds->z/(double)0x7FFFFFFF, ds->v);
200                 printf("\n");
201         }
202 #endif
203
204         /* read from and write into same array */
205         ds= dsample;
206         newds= dsample;
207         a= 0;
208
209         /* as long as we are not at the end of the array */
210         for (a++, ds++; a<tot; a++, ds++) {
211                 slopemin= 0.0f;
212                 slopemax= 0.0f;
213                 first= 1;
214
215                 for (; a<tot; a++, ds++) {
216                         //dz= ds->z - newds->z;
217                         if (ds->z == newds->z) {
218                                 /* still in same z position, simply check
219                                  * visibility difference against epsilon */
220                                 if (!(fabs(newds->v - ds->v) <= epsilon)) {
221                                         break;
222                                 }
223                         }
224                         else {
225                                 /* compute slopes */
226                                 div= (double)0x7FFFFFFF/((double)ds->z - (double)newds->z);
227                                 min= ((ds->v - epsilon) - newds->v)*div;
228                                 max= ((ds->v + epsilon) - newds->v)*div;
229
230                                 /* adapt existing slopes */
231                                 if (first) {
232                                         newmin= min;
233                                         newmax= max;
234                                         first= 0;
235                                 }
236                                 else {
237                                         newmin= MAX2(slopemin, min);
238                                         newmax= MIN2(slopemax, max);
239
240                                         /* verify if there is still space between the slopes */
241                                         if (newmin > newmax) {
242                                                 ds--;
243                                                 a--;
244                                                 break;
245                                         }
246                                 }
247
248                                 slopemin= newmin;
249                                 slopemax= newmax;
250                         }
251                 }
252
253                 if (a == tot) {
254                         ds--;
255                         a--;
256                 }
257
258                 /* always previous z */
259                 z= ds->z;
260
261                 if (first || a==tot-1) {
262                         /* if slopes were not initialized, use last visibility */
263                         v= ds->v;
264                 }
265                 else {
266                         /* compute visibility at center between slopes at z */
267                         slope= (slopemin+slopemax)*0.5f;
268                         v= newds->v + slope*((z - newds->z)/(double)0x7FFFFFFF);
269                 }
270
271                 newds++;
272                 newtot++;
273
274                 newds->z= z;
275                 newds->v= v;
276         }
277
278         if (newtot == 0 || (newds->v != (newds-1)->v))
279                 newtot++;
280
281 #if 0
282         if (print) {
283                 for (a=0, ds=dsample; a<newtot; a++, ds++)
284                         printf("%lf, %f ", ds->z/(double)0x7FFFFFFF, ds->v);
285                 printf("\n");
286         }
287 #endif
288
289         return newtot;
290 }
291
292 static float deep_alpha(Render *re, int obinr, int facenr, int strand)
293 {
294         ObjectInstanceRen *obi= &re->objectinstance[obinr];
295         Material *ma;
296
297         if (strand) {
298                 StrandRen *strand= RE_findOrAddStrand(obi->obr, facenr-1);
299                 ma= strand->buffer->ma;
300         }
301         else {
302                 VlakRen *vlr= RE_findOrAddVlak(obi->obr, (facenr-1) & RE_QUAD_MASK);
303                 ma= vlr->mat;
304         }
305
306         return ma->shad_alpha;
307 }
308
309 static void compress_deepshadowbuf(Render *re, ShadBuf *shb, APixstr *apixbuf, APixstrand *apixbufstrand)
310 {
311         ShadSampleBuf *shsample;
312         DeepSample *ds[RE_MAX_OSA], *sampleds[RE_MAX_OSA], *dsb, *newbuf;
313         APixstr *ap, *apn;
314         APixstrand *aps, *apns;
315         float visibility;
316
317         const int totbuf= shb->totbuf;
318         const float totbuf_f= (float)shb->totbuf;
319         const float totbuf_f_inv= 1.0f/totbuf_f;
320         const int size= shb->size;
321
322         int a, b, c, tot, minz, found, prevtot, newtot;
323         int sampletot[RE_MAX_OSA], totsample = 0, totsamplec = 0;
324         
325         shsample= MEM_callocN(sizeof(ShadSampleBuf), "shad sample buf");
326         BLI_addtail(&shb->buffers, shsample);
327
328         shsample->totbuf = MEM_callocN(sizeof(int) * size * size, "deeptotbuf");
329         shsample->deepbuf = MEM_callocN(sizeof(DeepSample *) * size * size, "deepbuf");
330
331         ap= apixbuf;
332         aps= apixbufstrand;
333         for (a=0; a<size*size; a++, ap++, aps++) {
334                 /* count number of samples */
335                 for (c=0; c<totbuf; c++)
336                         sampletot[c]= 0;
337
338                 tot= 0;
339                 for (apn=ap; apn; apn=apn->next)
340                         for (b=0; b<4; b++)
341                                 if (apn->p[b])
342                                         for (c=0; c<totbuf; c++)
343                                                 if (apn->mask[b] & (1<<c))
344                                                         sampletot[c]++;
345
346                 if (apixbufstrand) {
347                         for (apns=aps; apns; apns=apns->next)
348                                 for (b=0; b<4; b++)
349                                         if (apns->p[b])
350                                                 for (c=0; c<totbuf; c++)
351                                                         if (apns->mask[b] & (1<<c))
352                                                                 sampletot[c]++;
353                 }
354
355                 for (c=0; c<totbuf; c++)
356                         tot += sampletot[c];
357
358                 if (tot == 0) {
359                         shsample->deepbuf[a]= NULL;
360                         shsample->totbuf[a]= 0;
361                         continue;
362                 }
363
364                 /* fill samples */
365                 ds[0]= sampleds[0]= MEM_callocN(sizeof(DeepSample)*tot*2, "deepsample");
366                 for (c=1; c<totbuf; c++)
367                         ds[c]= sampleds[c]= sampleds[c-1] + sampletot[c-1]*2;
368
369                 for (apn=ap; apn; apn=apn->next) {
370                         for (b=0; b<4; b++) {
371                                 if (apn->p[b]) {
372                                         for (c=0; c<totbuf; c++) {
373                                                 if (apn->mask[b] & (1<<c)) {
374                                                         /* two entries to create step profile */
375                                                         ds[c]->z= apn->z[b];
376                                                         ds[c]->v= 1.0f; /* not used */
377                                                         ds[c]++;
378                                                         ds[c]->z= apn->z[b];
379                                                         ds[c]->v= deep_alpha(re, apn->obi[b], apn->p[b], 0);
380                                                         ds[c]++;
381                                                 }
382                                         }
383                                 }
384                         }
385                 }
386
387                 if (apixbufstrand) {
388                         for (apns=aps; apns; apns=apns->next) {
389                                 for (b=0; b<4; b++) {
390                                         if (apns->p[b]) {
391                                                 for (c=0; c<totbuf; c++) {
392                                                         if (apns->mask[b] & (1<<c)) {
393                                                                 /* two entries to create step profile */
394                                                                 ds[c]->z= apns->z[b];
395                                                                 ds[c]->v= 1.0f; /* not used */
396                                                                 ds[c]++;
397                                                                 ds[c]->z= apns->z[b];
398                                                                 ds[c]->v= deep_alpha(re, apns->obi[b], apns->p[b], 1);
399                                                                 ds[c]++;
400                                                         }
401                                                 }
402                                         }
403                                 }
404                         }
405                 }
406
407                 for (c=0; c<totbuf; c++) {
408                         /* sort by increasing z */
409                         qsort(sampleds[c], sampletot[c], sizeof(DeepSample)*2, verg_deepsample);
410
411                         /* sum visibility, replacing alpha values */
412                         visibility= 1.0f;
413                         ds[c]= sampleds[c];
414
415                         for (b=0; b<sampletot[c]; b++) {
416                                 /* two entries creating step profile */
417                                 ds[c]->v= visibility;
418                                 ds[c]++;
419
420                                 visibility *= 1.0f-ds[c]->v;
421                                 ds[c]->v= visibility;
422                                 ds[c]++;
423                         }
424
425                         /* halfway trick, probably won't work well for volumes? */
426                         ds[c]= sampleds[c];
427                         for (b=0; b<sampletot[c]; b++) {
428                                 if (b+1 < sampletot[c]) {
429                                         ds[c]->z= (ds[c]->z>>1) + ((ds[c]+2)->z>>1);
430                                         ds[c]++;
431                                         ds[c]->z= (ds[c]->z>>1) + ((ds[c]+2)->z>>1);
432                                         ds[c]++;
433                                 }
434                                 else {
435                                         ds[c]->z= (ds[c]->z>>1) + (0x7FFFFFFF>>1);
436                                         ds[c]++;
437                                         ds[c]->z= (ds[c]->z>>1) + (0x7FFFFFFF>>1);
438                                         ds[c]++;
439                                 }
440                         }
441
442                         /* init for merge loop */
443                         ds[c]= sampleds[c];
444                         sampletot[c] *= 2;
445                 }
446
447                 shsample->deepbuf[a]= MEM_callocN(sizeof(DeepSample)*tot*2, "deepsample");
448                 shsample->totbuf[a]= 0;
449
450                 /* merge buffers */
451                 dsb= shsample->deepbuf[a];
452                 while (1) {
453                         minz= 0;
454                         found= 0;
455
456                         for (c=0; c<totbuf; c++) {
457                                 if (sampletot[c] && (!found || ds[c]->z < minz)) {
458                                         minz= ds[c]->z;
459                                         found= 1;
460                                 }
461                         }
462
463                         if (!found)
464                                 break;
465
466                         dsb->z= minz;
467                         dsb->v= 0.0f;
468
469                         visibility= 0.0f;
470                         for (c=0; c<totbuf; c++) {
471                                 if (sampletot[c] && ds[c]->z == minz) {
472                                         ds[c]++;
473                                         sampletot[c]--;
474                                 }
475
476                                 if (sampleds[c] == ds[c])
477                                         visibility += totbuf_f_inv;
478                                 else
479                                         visibility += (ds[c]-1)->v / totbuf_f;
480                         }
481
482                         dsb->v= visibility;
483                         dsb++;
484                         shsample->totbuf[a]++;
485                 }
486
487                 prevtot= shsample->totbuf[a];
488                 totsample += prevtot;
489
490                 newtot= compress_deepsamples(shsample->deepbuf[a], prevtot, shb->compressthresh);
491                 shsample->totbuf[a]= newtot;
492                 totsamplec += newtot;
493
494                 if (newtot < prevtot) {
495                         newbuf= MEM_mallocN(sizeof(DeepSample)*newtot, "cdeepsample");
496                         memcpy(newbuf, shsample->deepbuf[a], sizeof(DeepSample)*newtot);
497                         MEM_freeN(shsample->deepbuf[a]);
498                         shsample->deepbuf[a]= newbuf;
499                 }
500
501                 MEM_freeN(sampleds[0]);
502         }
503
504         //printf("%d -> %d, ratio %f\n", totsample, totsamplec, (float)totsamplec/(float)totsample);
505 }
506
507 /* create Z tiles (for compression): this system is 24 bits!!! */
508 static void compress_shadowbuf(ShadBuf *shb, int *rectz, int square)
509 {
510         ShadSampleBuf *shsample;
511         float dist;
512         uintptr_t *ztile;
513         int *rz, *rz1, verg, verg1, size= shb->size;
514         int a, x, y, minx, miny, byt1, byt2;
515         char *rc, *rcline, *ctile, *zt;
516         
517         shsample= MEM_callocN(sizeof(ShadSampleBuf), "shad sample buf");
518         BLI_addtail(&shb->buffers, shsample);
519         
520         shsample->zbuf= MEM_mallocN(sizeof(uintptr_t)*(size*size)/256, "initshadbuf2");
521         shsample->cbuf= MEM_callocN((size*size)/256, "initshadbuf3");
522         
523         ztile= (uintptr_t *)shsample->zbuf;
524         ctile= shsample->cbuf;
525         
526         /* help buffer */
527         rcline= MEM_mallocN(256*4+sizeof(int), "makeshadbuf2");
528         
529         for (y=0; y<size; y+=16) {
530                 if (y< size/2) miny= y+15-size/2;
531                 else miny= y-size/2;
532                 
533                 for (x=0; x<size; x+=16) {
534                         
535                         /* is tile within spotbundle? */
536                         a= size/2;
537                         if (x< a) minx= x+15-a;
538                         else minx= x-a;
539                         
540                         dist= sqrt( (float)(minx*minx+miny*miny) );
541                         
542                         if (square==0 && dist>(float)(a+12)) {  /* 12, tested with a onlyshadow lamp */
543                                 a= 256; verg= 0; /* 0x80000000; */ /* 0x7FFFFFFF; */
544                                 rz1= (&verg)+1;
545                         }
546                         else {
547                                 copy_to_ztile(rectz, size, x, y, 16, rcline);
548                                 rz1= (int *)rcline;
549                                 
550                                 verg= (*rz1 & 0xFFFFFF00);
551                                 
552                                 for (a=0;a<256;a++, rz1++) {
553                                         if ( (*rz1 & 0xFFFFFF00) !=verg) break;
554                                 }
555                         }
556                         if (a==256) { /* complete empty tile */
557                                 *ctile= 0;
558                                 *ztile= *(rz1-1);
559                         }
560                         else {
561                                 
562                                 /* ACOMP etc. are defined to work L/B endian */
563                                 
564                                 rc= rcline;
565                                 rz1= (int *)rcline;
566                                 verg=  rc[ACOMP];
567                                 verg1= rc[BCOMP];
568                                 rc+= 4;
569                                 byt1= 1; byt2= 1;
570                                 for (a=1;a<256;a++, rc+=4) {
571                                         byt1 &= (verg==rc[ACOMP]);
572                                         byt2 &= (verg1==rc[BCOMP]);
573                                         
574                                         if (byt1==0) break;
575                                 }
576                                 if (byt1 && byt2) {     /* only store byte */
577                                         *ctile= 1;
578                                         *ztile= (uintptr_t)MEM_mallocN(256+4, "tile1");
579                                         rz= (int *)*ztile;
580                                         *rz= *rz1;
581                                         
582                                         zt= (char *)(rz+1);
583                                         rc= rcline;
584                                         for (a=0; a<256; a++, zt++, rc+=4) *zt= rc[GCOMP];
585                                 }
586                                 else if (byt1) {                /* only store short */
587                                         *ctile= 2;
588                                         *ztile= (uintptr_t)MEM_mallocN(2*256+4, "Tile2");
589                                         rz= (int *)*ztile;
590                                         *rz= *rz1;
591                                         
592                                         zt= (char *)(rz+1);
593                                         rc= rcline;
594                                         for (a=0; a<256; a++, zt+=2, rc+=4) {
595                                                 zt[0]= rc[BCOMP];
596                                                 zt[1]= rc[GCOMP];
597                                         }
598                                 }
599                                 else {                  /* store triple */
600                                         *ctile= 3;
601                                         *ztile= (uintptr_t)MEM_mallocN(3*256, "Tile3");
602
603                                         zt= (char *)*ztile;
604                                         rc= rcline;
605                                         for (a=0; a<256; a++, zt+=3, rc+=4) {
606                                                 zt[0]= rc[ACOMP];
607                                                 zt[1]= rc[BCOMP];
608                                                 zt[2]= rc[GCOMP];
609                                         }
610                                 }
611                         }
612                         ztile++;
613                         ctile++;
614                 }
615         }
616
617         MEM_freeN(rcline);
618 }
619
620 /* sets start/end clipping. lar->shb should be initialized */
621 static void shadowbuf_autoclip(Render *re, LampRen *lar)
622 {
623         ObjectInstanceRen *obi;
624         ObjectRen *obr;
625         VlakRen *vlr= NULL;
626         VertRen *ver= NULL;
627         Material *ma= NULL;
628         float minz, maxz, vec[3], viewmat[4][4], obviewmat[4][4];
629         unsigned int lay = -1;
630         int i, a, maxtotvert, ok= 1;
631         char *clipflag;
632         
633         minz= 1.0e30f; maxz= -1.0e30f;
634         copy_m4_m4(viewmat, lar->shb->viewmat);
635         
636         if (lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay;
637
638         maxtotvert= 0;
639         for (obr=re->objecttable.first; obr; obr=obr->next)
640                 maxtotvert= MAX2(obr->totvert, maxtotvert);
641
642         clipflag= MEM_callocN(sizeof(char)*maxtotvert, "autoclipflag");
643
644         /* set clip in vertices when face visible */
645         for (i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
646                 obr= obi->obr;
647
648                 if (obi->flag & R_TRANSFORMED)
649                         mult_m4_m4m4(obviewmat, viewmat, obi->mat);
650                 else
651                         copy_m4_m4(obviewmat, viewmat);
652
653                 memset(clipflag, 0, sizeof(char)*obr->totvert);
654
655                 /* clear clip, is being set if face is visible (clip is calculated for real later) */
656                 for (a=0; a<obr->totvlak; a++) {
657                         if ((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
658                         else vlr++;
659                         
660                         /* note; these conditions are copied from zbuffer_shadow() */
661                         if (vlr->mat!= ma) {
662                                 ma= vlr->mat;
663                                 ok= 1;
664                                 if ((ma->mode & MA_SHADBUF)==0) ok= 0;
665                         }
666                         
667                         if (ok && (obi->lay & lay)) {
668                                 clipflag[vlr->v1->index]= 1;
669                                 clipflag[vlr->v2->index]= 1;
670                                 clipflag[vlr->v3->index]= 1;
671                                 if (vlr->v4) clipflag[vlr->v4->index]= 1;
672                         }
673                 }
674                 
675                 /* calculate min and max */
676                 for (a=0; a< obr->totvert;a++) {
677                         if ((a & 255)==0) ver= RE_findOrAddVert(obr, a);
678                         else ver++;
679                         
680                         if (clipflag[a]) {
681                                 copy_v3_v3(vec, ver->co);
682                                 mul_m4_v3(obviewmat, vec);
683                                 /* Z on visible side of lamp space */
684                                 if (vec[2] < 0.0f) {
685                                         float inpr, z= -vec[2];
686                                         
687                                         /* since vec is rotated in lampspace, this is how to get the cosine of angle */
688                                         /* precision is set 20% larger */
689                                         vec[2]*= 1.2f;
690                                         normalize_v3(vec);
691                                         inpr= - vec[2];
692
693                                         if (inpr>=lar->spotsi) {
694                                                 if (z<minz) minz= z;
695                                                 if (z>maxz) maxz= z;
696                                         }
697                                 }
698                         }
699                 }
700         }
701
702         MEM_freeN(clipflag);
703         
704         /* set clipping min and max */
705         if (minz < maxz) {
706                 float delta= (maxz - minz);     /* threshold to prevent precision issues */
707                 
708                 //printf("minz %f maxz %f delta %f\n", minz, maxz, delta);
709                 if (lar->bufflag & LA_SHADBUF_AUTO_START)
710                         lar->shb->d= minz - delta*0.02f;        /* 0.02 is arbitrary... needs more thinking! */
711                 if (lar->bufflag & LA_SHADBUF_AUTO_END)
712                         lar->shb->clipend= maxz + delta*0.1f;
713                 
714                 /* bias was calculated as percentage, we scale it to prevent animation issues */
715                 delta= (lar->clipend-lar->clipsta)/(lar->shb->clipend-lar->shb->d);
716                 //printf("bias delta %f\n", delta);
717                 lar->shb->bias= (int) (delta*(float)lar->shb->bias);
718         }
719 }
720
721 static void makeflatshadowbuf(Render *re, LampRen *lar, float *jitbuf)
722 {
723         ShadBuf *shb= lar->shb;
724         int *rectz, samples;
725
726         /* zbuffering */
727         rectz= MEM_mapallocN(sizeof(int)*shb->size*shb->size, "makeshadbuf");
728         
729         for (samples=0; samples<shb->totbuf; samples++) {
730                 zbuffer_shadow(re, shb->persmat, lar, rectz, shb->size, jitbuf[2*samples], jitbuf[2*samples+1]);
731                 /* create Z tiles (for compression): this system is 24 bits!!! */
732                 compress_shadowbuf(shb, rectz, lar->mode & LA_SQUARE);
733
734                 if (re->test_break(re->tbh))
735                         break;
736         }
737         
738         MEM_freeN(rectz);
739 }
740
741 static void makedeepshadowbuf(Render *re, LampRen *lar, float *jitbuf)
742 {
743         ShadBuf *shb= lar->shb;
744         APixstr *apixbuf;
745         APixstrand *apixbufstrand= NULL;
746         ListBase apsmbase= {NULL, NULL};
747
748         /* zbuffering */
749         apixbuf= MEM_callocN(sizeof(APixstr)*shb->size*shb->size, "APixbuf");
750         if (re->totstrand)
751                 apixbufstrand= MEM_callocN(sizeof(APixstrand)*shb->size*shb->size, "APixbufstrand");
752
753         zbuffer_abuf_shadow(re, lar, shb->persmat, apixbuf, apixbufstrand, &apsmbase, shb->size,
754                 shb->totbuf, (float(*)[2])jitbuf);
755
756         /* create Z tiles (for compression): this system is 24 bits!!! */
757         compress_deepshadowbuf(re, shb, apixbuf, apixbufstrand);
758         
759         MEM_freeN(apixbuf);
760         if (apixbufstrand)
761                 MEM_freeN(apixbufstrand);
762         freepsA(&apsmbase);
763 }
764
765 void makeshadowbuf(Render *re, LampRen *lar)
766 {
767         ShadBuf *shb= lar->shb;
768         float wsize, *jitbuf, twozero[2]= {0.0f, 0.0f}, angle, temp;
769         
770         if (lar->bufflag & (LA_SHADBUF_AUTO_START|LA_SHADBUF_AUTO_END))
771                 shadowbuf_autoclip(re, lar);
772         
773         /* just to enforce identical behavior of all irregular buffers */
774         if (lar->buftype==LA_SHADBUF_IRREGULAR)
775                 shb->size= 1024;
776         
777         /* matrices and window: in winmat the transformation is being put,
778          * transforming from observer view to lamp view, including lamp window matrix */
779         
780         angle= saacos(lar->spotsi);
781         temp= 0.5f*shb->size*cos(angle)/sin(angle);
782         shb->pixsize= (shb->d)/temp;
783         wsize= shb->pixsize*(shb->size/2.0f);
784         
785         perspective_m4(shb->winmat, -wsize, wsize, -wsize, wsize, shb->d, shb->clipend);
786         mult_m4_m4m4(shb->persmat, shb->winmat, shb->viewmat);
787
788         if (ELEM3(lar->buftype, LA_SHADBUF_REGULAR, LA_SHADBUF_HALFWAY, LA_SHADBUF_DEEP)) {
789                 shb->totbuf= lar->buffers;
790
791                 /* jitter, weights - not threadsafe! */
792                 BLI_lock_thread(LOCK_CUSTOM1);
793                 shb->jit= give_jitter_tab(get_render_shadow_samples(&re->r, shb->samp));
794                 make_jitter_weight_tab(re, shb, lar->filtertype);
795                 BLI_unlock_thread(LOCK_CUSTOM1);
796                 
797                 if (shb->totbuf==4) jitbuf= give_jitter_tab(2);
798                 else if (shb->totbuf==9) jitbuf= give_jitter_tab(3);
799                 else jitbuf= twozero;
800                 
801                 /* zbuffering */
802                 if (lar->buftype == LA_SHADBUF_DEEP) {
803                         makedeepshadowbuf(re, lar, jitbuf);
804                         shb->totbuf= 1;
805                 }
806                 else
807                         makeflatshadowbuf(re, lar, jitbuf);
808
809                 /* printf("lampbuf %d\n", sizeoflampbuf(shb)); */
810         }
811 }
812
813 static void *do_shadow_thread(void *re_v)
814 {
815         Render *re= (Render*)re_v;
816         LampRen *lar;
817
818         do {
819                 BLI_lock_thread(LOCK_CUSTOM1);
820                 for (lar=re->lampren.first; lar; lar=lar->next) {
821                         if (lar->shb && !lar->thread_assigned) {
822                                 lar->thread_assigned= 1;
823                                 break;
824                         }
825                 }
826                 BLI_unlock_thread(LOCK_CUSTOM1);
827
828                 /* if type is irregular, this only sets the perspective matrix and autoclips */
829                 if (lar) {
830                         makeshadowbuf(re, lar);
831                         BLI_lock_thread(LOCK_CUSTOM1);
832                         lar->thread_ready= 1;
833                         BLI_unlock_thread(LOCK_CUSTOM1);
834                 }
835         } while (lar && !re->test_break(re->tbh));
836
837         return NULL;
838 }
839
840 static volatile int g_break= 0;
841 static int thread_break(void *UNUSED(arg))
842 {
843         return g_break;
844 }
845
846 void threaded_makeshadowbufs(Render *re)
847 {
848         ListBase threads;
849         LampRen *lar;
850         int a, totthread= 0;
851         int (*test_break)(void *);
852
853         /* count number of threads to use */
854         if (G.is_rendering) {
855                 for (lar=re->lampren.first; lar; lar= lar->next)
856                         if (lar->shb)
857                                 totthread++;
858                 
859                 totthread= MIN2(totthread, re->r.threads);
860         }
861         else
862                 totthread= 1; /* preview render */
863
864         if (totthread <= 1) {
865                 for (lar=re->lampren.first; lar; lar= lar->next) {
866                         if (re->test_break(re->tbh)) break;
867                         if (lar->shb) {
868                                 /* if type is irregular, this only sets the perspective matrix and autoclips */
869                                 makeshadowbuf(re, lar);
870                         }
871                 }
872         }
873         else {
874                 /* swap test break function */
875                 test_break= re->test_break;
876                 re->test_break= thread_break;
877
878                 for (lar=re->lampren.first; lar; lar= lar->next) {
879                         lar->thread_assigned= 0;
880                         lar->thread_ready= 0;
881                 }
882
883                 BLI_init_threads(&threads, do_shadow_thread, totthread);
884                 
885                 for (a=0; a<totthread; a++)
886                         BLI_insert_thread(&threads, re);
887
888                 /* keep rendering as long as there are shadow buffers not ready */
889                 do {
890                         if ((g_break=test_break(re->tbh)))
891                                 break;
892
893                         PIL_sleep_ms(50);
894
895                         BLI_lock_thread(LOCK_CUSTOM1);
896                         for (lar=re->lampren.first; lar; lar= lar->next)
897                                 if (lar->shb && !lar->thread_ready)
898                                         break;
899                         BLI_unlock_thread(LOCK_CUSTOM1);
900                 } while (lar);
901         
902                 BLI_end_threads(&threads);
903
904                 /* unset threadsafety */
905                 re->test_break= test_break;
906                 g_break= 0;
907         }
908 }
909
910 void freeshadowbuf(LampRen *lar)
911 {
912         if (lar->shb) {
913                 ShadBuf *shb= lar->shb;
914                 ShadSampleBuf *shsample;
915                 int b, v;
916                 
917                 for (shsample= shb->buffers.first; shsample; shsample= shsample->next) {
918                         if (shsample->deepbuf) {
919                                 v= shb->size*shb->size;
920                                 for (b=0; b<v; b++)
921                                         if (shsample->deepbuf[b])
922                                                 MEM_freeN(shsample->deepbuf[b]);
923                                         
924                                 MEM_freeN(shsample->deepbuf);
925                                 MEM_freeN(shsample->totbuf);
926                         }
927                         else {
928                                 intptr_t *ztile= shsample->zbuf;
929                                 char *ctile= shsample->cbuf;
930                                 
931                                 v= (shb->size*shb->size)/256;
932                                 for (b=0; b<v; b++, ztile++, ctile++)
933                                         if (*ctile) MEM_freeN((void *) *ztile);
934                                 
935                                 MEM_freeN(shsample->zbuf);
936                                 MEM_freeN(shsample->cbuf);
937                         }
938                 }
939                 BLI_freelistN(&shb->buffers);
940                 
941                 if (shb->weight) MEM_freeN(shb->weight);
942                 MEM_freeN(lar->shb);
943                 
944                 lar->shb= NULL;
945         }
946 }
947
948
949 static int firstreadshadbuf(ShadBuf *shb, ShadSampleBuf *shsample, int **rz, int xs, int ys, int nr)
950 {
951         /* return a 1 if fully compressed shadbuf-tile && z==const */
952         int ofs;
953         char *ct;
954
955         if (shsample->deepbuf)
956                 return 0;
957
958         /* always test borders of shadowbuffer */
959         if (xs<0) xs= 0; else if (xs>=shb->size) xs= shb->size-1;
960         if (ys<0) ys= 0; else if (ys>=shb->size) ys= shb->size-1;
961
962         /* calc z */
963         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
964         ct= shsample->cbuf+ofs;
965         if (*ct==0) {
966                 if (nr==0) {
967                         *rz= *( (int **)(shsample->zbuf+ofs) );
968                         return 1;
969                 }
970                 else if (*rz!= *( (int **)(shsample->zbuf+ofs) )) return 0;
971                 
972                 return 1;
973         }
974         
975         return 0;
976 }
977
978 static float readdeepvisibility(DeepSample *dsample, int tot, int z, int bias, float *biast)
979 {
980         DeepSample *ds, *prevds;
981         float t;
982         int a;
983
984         /* tricky stuff here; we use ints which can overflow easily with bias values */
985
986         ds= dsample;
987         for (a=0; a<tot && (z-bias > ds->z); a++, ds++) {}
988
989         if (a == tot) {
990                 if (biast)
991                         *biast= 0.0f;
992                 return (ds-1)->v; /* completely behind all samples */
993         }
994         
995         /* check if this read needs bias blending */
996         if (biast) {
997                 if (z > ds->z)
998                         *biast= (float)(z - ds->z)/(float)bias;
999                 else
1000                         *biast= 0.0f;
1001         }
1002
1003         if (a == 0)
1004                 return 1.0f; /* completely in front of all samples */
1005
1006         /* converting to float early here because ds->z - prevds->z can overflow */
1007         prevds= ds-1;
1008         t= ((float)(z-bias) - (float)prevds->z)/((float)ds->z - (float)prevds->z);
1009         return t*ds->v + (1.0f-t)*prevds->v;
1010 }
1011
1012 static float readdeepshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)
1013 {
1014         float v, biasv, biast;
1015         int ofs, tot;
1016
1017         if (zs < - 0x7FFFFE00 + bias)
1018                 return 1.0;     /* extreme close to clipstart */
1019
1020         /* calc z */
1021         ofs= ys*shb->size + xs;
1022         tot= shsample->totbuf[ofs];
1023         if (tot == 0)
1024                 return 1.0f;
1025
1026         v= readdeepvisibility(shsample->deepbuf[ofs], tot, zs, bias, &biast);
1027
1028         if (biast != 0.0f) {
1029                 /* in soft bias area */
1030                 biasv= readdeepvisibility(shsample->deepbuf[ofs], tot, zs, 0, 0);
1031
1032                 biast= biast*biast;
1033                 return (1.0f-biast)*v + biast*biasv;
1034         }
1035
1036         return v;
1037 }
1038
1039 /* return 1.0 : fully in light */
1040 static float readshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)     
1041 {
1042         float temp;
1043         int *rz, ofs;
1044         int zsamp=0;
1045         char *ct, *cz;
1046
1047         /* simpleclip */
1048         /* if (xs<0 || ys<0) return 1.0; */
1049         /* if (xs>=shb->size || ys>=shb->size) return 1.0; */
1050         
1051         /* always test borders of shadowbuffer */
1052         if (xs<0) xs= 0; else if (xs>=shb->size) xs= shb->size-1;
1053         if (ys<0) ys= 0; else if (ys>=shb->size) ys= shb->size-1;
1054
1055         if (shsample->deepbuf)
1056                 return readdeepshadowbuf(shb, shsample, bias, xs, ys, zs);
1057
1058         /* calc z */
1059         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
1060         ct= shsample->cbuf+ofs;
1061         rz= *( (int **)(shsample->zbuf+ofs) );
1062
1063         if (*ct==3) {
1064                 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
1065                 cz= (char *)&zsamp;
1066                 cz[ACOMP]= ct[0];
1067                 cz[BCOMP]= ct[1];
1068                 cz[GCOMP]= ct[2];
1069         }
1070         else if (*ct==2) {
1071                 ct= ((char *)rz);
1072                 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
1073                 zsamp= *rz;
1074         
1075                 cz= (char *)&zsamp;
1076                 cz[BCOMP]= ct[0];
1077                 cz[GCOMP]= ct[1];
1078         }
1079         else if (*ct==1) {
1080                 ct= ((char *)rz);
1081                 ct+= 4+16*(ys & 15)+(xs & 15);
1082                 zsamp= *rz;
1083
1084                 cz= (char *)&zsamp;
1085                 cz[GCOMP]= ct[0];
1086
1087         }
1088         else {
1089                 /* got warning on this for 64 bits.... */
1090                 /* but it's working code! in this case rz is not a pointer but zvalue (ton) */
1091                 zsamp= GET_INT_FROM_POINTER(rz);
1092         }
1093
1094         /* tricky stuff here; we use ints which can overflow easily with bias values */
1095         
1096         if (zsamp > zs) return 1.0;             /* absolute no shadow */
1097         else if (zs < - 0x7FFFFE00 + bias) return 1.0;  /* extreme close to clipstart */
1098         else if (zsamp < zs-bias) return 0.0;   /* absolute in shadow */
1099         else {                                  /* soft area */
1100                 
1101                 temp=  ( (float)(zs- zsamp) )/(float)bias;
1102                 return 1.0f - temp*temp;
1103                         
1104         }
1105 }
1106
1107 static void shadowbuf_project_co(float *x, float *y, float *z, ShadBuf *shb, const float co[3])
1108 {
1109         float hco[4], size= 0.5f*(float)shb->size;
1110
1111         copy_v3_v3(hco, co);
1112         hco[3]= 1.0f;
1113
1114         mul_m4_v4(shb->persmat, hco);
1115
1116         *x= size*(1.0f+hco[0]/hco[3]);
1117         *y= size*(1.0f+hco[1]/hco[3]);
1118         if (z) *z= (hco[2]/hco[3]);
1119 }
1120
1121 /* the externally called shadow testing (reading) function */
1122 /* return 1.0: no shadow at all */
1123 float testshadowbuf(Render *re, ShadBuf *shb, const float co[3], const float dxco[3], const float dyco[3], float inp, float mat_bias)
1124 {
1125         ShadSampleBuf *shsample;
1126         float fac, dco[3], dx[3], dy[3], shadfac=0.0f;
1127         float xs1, ys1, zs1, *jit, *weight, xres, yres, biasf;
1128         int xs, ys, zs, bias, *rz;
1129         short a, num;
1130         
1131         /* crash preventer */
1132         if (shb->buffers.first==NULL)
1133                 return 1.0f;
1134         
1135         /* when facing away, assume fully in shadow */
1136         if (inp <= 0.0f)
1137                 return 0.0f;
1138
1139         /* project coordinate to pixel space */
1140         shadowbuf_project_co(&xs1, &ys1, &zs1, shb, co);
1141
1142         /* clip z coordinate, z is projected so that (-1.0, 1.0) matches
1143          * (clipstart, clipend), so we can do this simple test */
1144         if (zs1>=1.0f)
1145                 return 0.0f;
1146         else if (zs1<= -1.0f)
1147                 return 1.0f;
1148
1149         zs= ((float)0x7FFFFFFF)*zs1;
1150
1151         /* take num*num samples, increase area with fac */
1152         num= get_render_shadow_samples(&re->r, shb->samp);
1153         num= num*num;
1154         fac= shb->soft;
1155         
1156         /* compute z bias */
1157         if (mat_bias!=0.0f) biasf= shb->bias*mat_bias;
1158         else biasf= shb->bias;
1159         /* with inp==1.0, bias is half the size. correction value was 1.1, giving errors 
1160          * on cube edges, with one side being almost frontal lighted (ton)  */
1161         bias= (1.5f-inp*inp)*biasf;
1162         
1163         /* in case of no filtering we can do things simpler */
1164         if (num==1) {
1165                 for (shsample= shb->buffers.first; shsample; shsample= shsample->next)
1166                         shadfac += readshadowbuf(shb, shsample, bias, (int)xs1, (int)ys1, zs);
1167                 
1168                 return shadfac/(float)shb->totbuf;
1169         }
1170
1171         /* calculate filter size */
1172         add_v3_v3v3(dco, co, dxco);
1173         shadowbuf_project_co(&dx[0], &dx[1], NULL, shb, dco);
1174         dx[0]= xs1 - dx[0];
1175         dx[1]= ys1 - dx[1];
1176
1177         add_v3_v3v3(dco, co, dyco);
1178         shadowbuf_project_co(&dy[0], &dy[1], NULL, shb, dco);
1179         dy[0]= xs1 - dy[0];
1180         dy[1]= ys1 - dy[1];
1181         
1182         xres = fac * (fabsf(dx[0]) + fabsf(dy[0]));
1183         yres = fac * (fabsf(dx[1]) + fabsf(dy[1]));
1184         if (xres<1.0f) xres= 1.0f;
1185         if (yres<1.0f) yres= 1.0f;
1186         
1187         /* make xs1/xs1 corner of sample area */
1188         xs1 -= xres*0.5f;
1189         ys1 -= yres*0.5f;
1190
1191         /* in case we have a constant value in a tile, we can do quicker lookup */
1192         if (xres<16.0f && yres<16.0f) {
1193                 shsample= shb->buffers.first;
1194                 if (firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)ys1, 0)) {
1195                         if (firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)ys1, 1)) {
1196                                 if (firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)(ys1+yres), 1)) {
1197                                         if (firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)(ys1+yres), 1)) {
1198                                                 return readshadowbuf(shb, shsample, bias, (int)xs1, (int)ys1, zs);
1199                                         }
1200                                 }
1201                         }
1202                 }
1203         }
1204         
1205         /* full jittered shadow buffer lookup */
1206         for (shsample= shb->buffers.first; shsample; shsample= shsample->next) {
1207                 jit= shb->jit;
1208                 weight= shb->weight;
1209                 
1210                 for (a=num; a>0; a--, jit+=2, weight++) {
1211                         /* instead of jit i tried random: ugly! */
1212                         /* note: the plus 0.5 gives best sampling results, jit goes from -0.5 to 0.5 */
1213                         /* xs1 and ys1 are already corrected to be corner of sample area */
1214                         xs= xs1 + xres*(jit[0] + 0.5f);
1215                         ys= ys1 + yres*(jit[1] + 0.5f);
1216                         
1217                         shadfac+= *weight * readshadowbuf(shb, shsample, bias, xs, ys, zs);
1218                 }
1219         }
1220
1221         /* Renormalizes for the sample number: */
1222         return shadfac/(float)shb->totbuf;
1223 }
1224
1225 /* different function... sampling behind clipend can be LIGHT, bias is negative! */
1226 /* return: light */
1227 static float readshadowbuf_halo(ShadBuf *shb, ShadSampleBuf *shsample, int xs, int ys, int zs)
1228 {
1229         float temp;
1230         int *rz, ofs;
1231         int bias, zbias, zsamp;
1232         char *ct, *cz;
1233
1234         /* negative! The other side is more important */
1235         bias= -shb->bias;
1236         
1237         /* simpleclip */
1238         if (xs<0 || ys<0) return 0.0;
1239         if (xs>=shb->size || ys>=shb->size) return 0.0;
1240
1241         /* calc z */
1242         ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
1243         ct= shsample->cbuf+ofs;
1244         rz= *( (int **)(shsample->zbuf+ofs) );
1245
1246         if (*ct==3) {
1247                 ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
1248                 cz= (char *)&zsamp;
1249                 zsamp= 0;
1250                 cz[ACOMP]= ct[0];
1251                 cz[BCOMP]= ct[1];
1252                 cz[GCOMP]= ct[2];
1253         }
1254         else if (*ct==2) {
1255                 ct= ((char *)rz);
1256                 ct+= 4+2*16*(ys & 15)+2*(xs & 15);
1257                 zsamp= *rz;
1258         
1259                 cz= (char *)&zsamp;
1260                 cz[BCOMP]= ct[0];
1261                 cz[GCOMP]= ct[1];
1262         }
1263         else if (*ct==1) {
1264                 ct= ((char *)rz);
1265                 ct+= 4+16*(ys & 15)+(xs & 15);
1266                 zsamp= *rz;
1267
1268                 cz= (char *)&zsamp;
1269                 cz[GCOMP]= ct[0];
1270
1271         }
1272         else {
1273                 /* same as before */
1274                 /* still working code! (ton) */
1275                 zsamp= GET_INT_FROM_POINTER(rz);
1276         }
1277
1278         /* NO schadow when sampled at 'eternal' distance */
1279
1280         if (zsamp >= 0x7FFFFE00) return 1.0;
1281
1282         if (zsamp > zs) return 1.0;             /* absolute no shadww */
1283         else {
1284                 /* bias is negative, so the (zs-bias) can be beyond 0x7fffffff */
1285                 zbias= 0x7fffffff - zs;
1286                 if (zbias > -bias) {
1287                         if ( zsamp < zs-bias) return 0.0;       /* absolute in shadow */
1288                 }
1289                 else return 0.0;        /* absolute shadow */
1290         }
1291
1292         /* soft area */
1293         
1294         temp=  ( (float)(zs- zsamp) )/(float)bias;
1295         return 1.0f - temp*temp;
1296 }
1297
1298
1299 float shadow_halo(LampRen *lar, const float p1[3], const float p2[3])
1300 {
1301         /* p1 p2 already are rotated in spot-space */
1302         ShadBuf *shb= lar->shb;
1303         ShadSampleBuf *shsample;
1304         float co[4], siz;
1305         float labda, labdao, labdax, labday, ldx, ldy;
1306         float zf, xf1, yf1, zf1, xf2, yf2, zf2;
1307         float count, lightcount;
1308         int x, y, z, xs1, ys1;
1309         int dx = 0, dy = 0;
1310         
1311         siz= 0.5f*(float)shb->size;
1312         
1313         co[0]= p1[0];
1314         co[1]= p1[1];
1315         co[2]= p1[2]/lar->sh_zfac;
1316         co[3]= 1.0;
1317         mul_m4_v4(shb->winmat, co);     /* rational hom co */
1318         xf1= siz*(1.0f+co[0]/co[3]);
1319         yf1= siz*(1.0f+co[1]/co[3]);
1320         zf1= (co[2]/co[3]);
1321
1322
1323         co[0]= p2[0];
1324         co[1]= p2[1];
1325         co[2]= p2[2]/lar->sh_zfac;
1326         co[3]= 1.0;
1327         mul_m4_v4(shb->winmat, co);     /* rational hom co */
1328         xf2= siz*(1.0f+co[0]/co[3]);
1329         yf2= siz*(1.0f+co[1]/co[3]);
1330         zf2= (co[2]/co[3]);
1331
1332         /* the 2dda (a pixel line formula) */
1333
1334         xs1= (int)xf1;
1335         ys1= (int)yf1;
1336
1337         if (xf1 != xf2) {
1338                 if (xf2-xf1 > 0.0f) {
1339                         labdax= (xf1-xs1-1.0f)/(xf1-xf2);
1340                         ldx= -shb->shadhalostep/(xf1-xf2);
1341                         dx= shb->shadhalostep;
1342                 }
1343                 else {
1344                         labdax= (xf1-xs1)/(xf1-xf2);
1345                         ldx= shb->shadhalostep/(xf1-xf2);
1346                         dx= -shb->shadhalostep;
1347                 }
1348         }
1349         else {
1350                 labdax= 1.0;
1351                 ldx= 0.0;
1352         }
1353
1354         if (yf1 != yf2) {
1355                 if (yf2-yf1 > 0.0f) {
1356                         labday= (yf1-ys1-1.0f)/(yf1-yf2);
1357                         ldy= -shb->shadhalostep/(yf1-yf2);
1358                         dy= shb->shadhalostep;
1359                 }
1360                 else {
1361                         labday= (yf1-ys1)/(yf1-yf2);
1362                         ldy= shb->shadhalostep/(yf1-yf2);
1363                         dy= -shb->shadhalostep;
1364                 }
1365         }
1366         else {
1367                 labday= 1.0;
1368                 ldy= 0.0;
1369         }
1370         
1371         x= xs1;
1372         y= ys1;
1373         labda= count= lightcount= 0.0;
1374
1375 /* printf("start %x %x  \n", (int)(0x7FFFFFFF*zf1), (int)(0x7FFFFFFF*zf2)); */
1376
1377         while (1) {
1378                 labdao= labda;
1379                 
1380                 if (labdax==labday) {
1381                         labdax+= ldx;
1382                         x+= dx;
1383                         labday+= ldy;
1384                         y+= dy;
1385                 }
1386                 else {
1387                         if (labdax<labday) {
1388                                 labdax+= ldx;
1389                                 x+= dx;
1390                         }
1391                         else {
1392                                 labday+= ldy;
1393                                 y+= dy;
1394                         }
1395                 }
1396                 
1397                 labda = minf(labdax, labday);
1398                 if (labda==labdao || labda>=1.0f) break;
1399                 
1400                 zf= zf1 + labda*(zf2-zf1);
1401                 count+= (float)shb->totbuf;
1402
1403                 if (zf<= -1.0f) lightcount += 1.0f;     /* close to the spot */
1404                 else {
1405                 
1406                         /* make sure, behind the clipend we extend halolines. */
1407                         if (zf>=1.0f) z= 0x7FFFF000;
1408                         else z= (int)(0x7FFFF000*zf);
1409                         
1410                         for (shsample= shb->buffers.first; shsample; shsample= shsample->next)
1411                                 lightcount+= readshadowbuf_halo(shb, shsample, x, y, z);
1412                         
1413                 }
1414         }
1415         
1416         if (count!=0.0f) return (lightcount/count);
1417         return 0.0f;
1418         
1419 }
1420
1421
1422 /* ********************* Irregular Shadow Buffer (ISB) ************* */
1423 /* ********** storage of all view samples in a raster of lists ***** */
1424
1425 /* based on several articles describing this method, like:
1426  * The Irregular Z-Buffer and its Application to Shadow Mapping
1427  * Gregory S. Johnson - William R. Mark - Christopher A. Burns
1428  * and
1429  * Alias-Free Shadow Maps
1430  * Timo Aila and Samuli Laine
1431  */
1432
1433 /* bsp structure (actually kd tree) */
1434
1435 #define BSPMAX_SAMPLE   128
1436 #define BSPMAX_DEPTH    32
1437
1438 /* aligned with struct rctf */
1439 typedef struct Boxf {
1440         float xmin, xmax;
1441         float ymin, ymax;
1442         float zmin, zmax;
1443 } Boxf;
1444
1445 typedef struct ISBBranch {
1446         struct ISBBranch *left, *right;
1447         float divider[2];
1448         Boxf box;
1449         short totsamp, index, full, unused;
1450         ISBSample **samples;
1451 } ISBBranch;
1452
1453 typedef struct BSPFace {
1454         Boxf box;
1455         const float *v1, *v2, *v3, *v4;
1456         int obi;                /* object for face lookup */
1457         int facenr;             /* index to retrieve VlakRen */
1458         int type;               /* only for strand now */
1459         short shad_alpha, is_full;
1460         
1461         /* strand caching data, optimize for point_behind_strand() */
1462         float radline, radline_end, len;
1463         float vec1[3], vec2[3], rc[3];
1464 } BSPFace;
1465
1466 /* boxes are in lamp projection */
1467 static void init_box(Boxf *box)
1468 {
1469         box->xmin = 1000000.0f;
1470         box->xmax = 0;
1471         box->ymin = 1000000.0f;
1472         box->ymax = 0;
1473         box->zmin= 0x7FFFFFFF;
1474         box->zmax= - 0x7FFFFFFF;
1475 }
1476
1477 /* use v1 to calculate boundbox */
1478 static void bound_boxf(Boxf *box, const float v1[3])
1479 {
1480         if (v1[0] < box->xmin) box->xmin = v1[0];
1481         if (v1[0] > box->xmax) box->xmax = v1[0];
1482         if (v1[1] < box->ymin) box->ymin = v1[1];
1483         if (v1[1] > box->ymax) box->ymax = v1[1];
1484         if (v1[2] < box->zmin) box->zmin= v1[2];
1485         if (v1[2] > box->zmax) box->zmax= v1[2];
1486 }
1487
1488 /* use v1 to calculate boundbox */
1489 static void bound_rectf(rctf *box, const float v1[2])
1490 {
1491         if (v1[0] < box->xmin) box->xmin = v1[0];
1492         if (v1[0] > box->xmax) box->xmax = v1[0];
1493         if (v1[1] < box->ymin) box->ymin = v1[1];
1494         if (v1[1] > box->ymax) box->ymax = v1[1];
1495 }
1496
1497
1498 /* halfway splitting, for initializing a more regular tree */
1499 static void isb_bsp_split_init(ISBBranch *root, MemArena *mem, int level)
1500 {
1501         
1502         /* if level > 0 we create new branches and go deeper */
1503         if (level > 0) {
1504                 ISBBranch *left, *right;
1505                 int i;
1506                 
1507                 /* splitpoint */
1508                 root->divider[0]= 0.5f*(root->box.xmin+root->box.xmax);
1509                 root->divider[1]= 0.5f*(root->box.ymin+root->box.ymax);
1510                 
1511                 /* find best splitpoint */
1512                 if (RCT_SIZE_X(&root->box) > RCT_SIZE_Y(&root->box))
1513                         i = root->index = 0;
1514                 else
1515                         i = root->index = 1;
1516                 
1517                 left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1518                 right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1519                 
1520                 /* box info */
1521                 left->box= root->box;
1522                 right->box= root->box;
1523                 if (i==0) {
1524                         left->box.xmax = root->divider[0];
1525                         right->box.xmin = root->divider[0];
1526                 }
1527                 else {
1528                         left->box.ymax = root->divider[1];
1529                         right->box.ymin = root->divider[1];
1530                 }
1531                 isb_bsp_split_init(left, mem, level-1);
1532                 isb_bsp_split_init(right, mem, level-1);
1533         }
1534         else {
1535                 /* we add sample array */
1536                 root->samples= BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
1537         }
1538 }
1539
1540 /* note; if all samples on same location we just spread them over 2 new branches */
1541 static void isb_bsp_split(ISBBranch *root, MemArena *mem)
1542 {
1543         ISBBranch *left, *right;
1544         ISBSample *samples[BSPMAX_SAMPLE];
1545         int a, i;
1546
1547         /* splitpoint */
1548         root->divider[0]= root->divider[1]= 0.0f;
1549         for (a=BSPMAX_SAMPLE-1; a>=0; a--) {
1550                 root->divider[0]+= root->samples[a]->zco[0];
1551                 root->divider[1]+= root->samples[a]->zco[1];
1552         }
1553         root->divider[0]/= BSPMAX_SAMPLE;
1554         root->divider[1]/= BSPMAX_SAMPLE;
1555         
1556         /* find best splitpoint */
1557         if (RCT_SIZE_X(&root->box) > RCT_SIZE_Y(&root->box))
1558                 i = root->index = 0;
1559         else
1560                 i = root->index = 1;
1561         
1562         /* new branches */
1563         left= root->left= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1564         right= root->right= BLI_memarena_alloc(mem, sizeof(ISBBranch));
1565
1566         /* new sample array */
1567         left->samples = BLI_memarena_alloc(mem, BSPMAX_SAMPLE*sizeof(void *));
1568         right->samples = samples;  /* tmp */
1569
1570         /* split samples */
1571         for (a=BSPMAX_SAMPLE-1; a>=0; a--) {
1572                 int comp= 0;
1573                 /* this prevents adding samples all to 1 branch when divider is equal to samples */
1574                 if (root->samples[a]->zco[i] == root->divider[i])
1575                         comp= a & 1;
1576                 else if (root->samples[a]->zco[i] < root->divider[i])
1577                         comp= 1;
1578                 
1579                 if (comp==1) {
1580                         left->samples[left->totsamp]= root->samples[a];
1581                         left->totsamp++;
1582                 }
1583                 else {
1584                         right->samples[right->totsamp]= root->samples[a];
1585                         right->totsamp++;
1586                 }
1587         }
1588         
1589         /* copy samples from tmp */
1590         memcpy(root->samples, samples, right->totsamp*(sizeof(void *)));
1591         right->samples= root->samples;
1592         root->samples= NULL;
1593         
1594         /* box info */
1595         left->box= root->box;
1596         right->box= root->box;
1597         if (i==0) {
1598                 left->box.xmax = root->divider[0];
1599                 right->box.xmin = root->divider[0];
1600         }
1601         else {
1602                 left->box.ymax = root->divider[1];
1603                 right->box.ymin = root->divider[1];
1604         }
1605 }
1606
1607 /* inserts sample in main tree, also splits on threshold */
1608 /* returns 1 if error */
1609 static int isb_bsp_insert(ISBBranch *root, MemArena *memarena, ISBSample *sample)
1610 {
1611         ISBBranch *bspn= root;
1612         float *zco= sample->zco;
1613         int i= 0;
1614         
1615         /* debug counter, also used to check if something was filled in ever */
1616         root->totsamp++;
1617         
1618         /* going over branches until last one found */
1619         while (bspn->left) {
1620                 if (zco[bspn->index] <= bspn->divider[bspn->index])
1621                         bspn= bspn->left;
1622                 else
1623                         bspn= bspn->right;
1624                 i++;
1625         }
1626         /* bspn now is the last branch */
1627         
1628         if (bspn->totsamp==BSPMAX_SAMPLE) {
1629                 printf("error in bsp branch\n");        /* only for debug, cannot happen */
1630                 return 1;
1631         }
1632         
1633         /* insert */
1634         bspn->samples[bspn->totsamp]= sample;
1635         bspn->totsamp++;
1636
1637         /* split if allowed and needed */
1638         if (bspn->totsamp==BSPMAX_SAMPLE) {
1639                 if (i==BSPMAX_DEPTH) {
1640                         bspn->totsamp--;        /* stop filling in... will give errors */
1641                         return 1;
1642                 }
1643                 isb_bsp_split(bspn, memarena);
1644         }
1645         return 0;
1646 }
1647
1648 /* initialize vars in face, for optimal point-in-face test */
1649 static void bspface_init_strand(BSPFace *face) 
1650 {
1651         
1652         face->radline= 0.5f* len_v2v2(face->v1, face->v2);
1653         
1654         mid_v3_v3v3(face->vec1, face->v1, face->v2);
1655         if (face->v4)
1656                 mid_v3_v3v3(face->vec2, face->v3, face->v4);
1657         else
1658                 copy_v3_v3(face->vec2, face->v3);
1659         
1660         face->rc[0]= face->vec2[0]-face->vec1[0];
1661         face->rc[1]= face->vec2[1]-face->vec1[1];
1662         face->rc[2]= face->vec2[2]-face->vec1[2];
1663         
1664         face->len= face->rc[0]*face->rc[0]+ face->rc[1]*face->rc[1];
1665         
1666         if (face->len!=0.0f) {
1667                 face->radline_end= face->radline/sqrt(face->len);
1668                 face->len= 1.0f/face->len;
1669         }
1670 }
1671
1672 /* brought back to a simple 2d case */
1673 static int point_behind_strand(const float p[3], BSPFace *face)
1674 {
1675         /* v1 - v2 is radius, v1 - v3 length */
1676         float dist, rc[2], pt[2];
1677         
1678         /* using code from dist_to_line_segment_v2(), distance vec to line-piece */
1679
1680         if (face->len==0.0f) {
1681                 rc[0]= p[0]-face->vec1[0];
1682                 rc[1]= p[1]-face->vec1[1];
1683                 dist= (float)(sqrt(rc[0]*rc[0]+ rc[1]*rc[1]));
1684                 
1685                 if (dist < face->radline)
1686                         return 1;
1687         }
1688         else {
1689                 float labda= ( face->rc[0]*(p[0]-face->vec1[0]) + face->rc[1]*(p[1]-face->vec1[1]) )*face->len;
1690                 
1691                 if (labda > -face->radline_end && labda < 1.0f+face->radline_end) {
1692                         /* hesse for dist: */
1693                         //dist= (float)(fabs( (p[0]-vec2[0])*rc[1] + (p[1]-vec2[1])*rc[0])/len);
1694                         
1695                         pt[0]= labda*face->rc[0]+face->vec1[0];
1696                         pt[1]= labda*face->rc[1]+face->vec1[1];
1697                         
1698                         rc[0]= pt[0]-p[0];
1699                         rc[1]= pt[1]-p[1];
1700                         dist= (float)sqrt(rc[0]*rc[0]+ rc[1]*rc[1]);
1701                         
1702                         if (dist < face->radline) {
1703                                 float zval= face->vec1[2] + labda*face->rc[2];
1704                                 if (p[2] > zval)
1705                                         return 1;
1706                         }
1707                 }
1708         }
1709         return 0;
1710 }
1711
1712
1713 /* return 1 if inside. code derived from src/parametrizer.c */
1714 static int point_behind_tria2d(const float p[3], const float v1[3], const float v2[3], const float v3[3])
1715 {
1716         float a[2], c[2], h[2], div;
1717         float u, v;
1718         
1719         a[0] = v2[0] - v1[0];
1720         a[1] = v2[1] - v1[1];
1721         c[0] = v3[0] - v1[0];
1722         c[1] = v3[1] - v1[1];
1723         
1724         div = a[0]*c[1] - a[1]*c[0];
1725         if (div==0.0f)
1726                 return 0;
1727         
1728         h[0] = p[0] - v1[0];
1729         h[1] = p[1] - v1[1];
1730         
1731         div = 1.0f/div;
1732         
1733         u = (h[0]*c[1] - h[1]*c[0])*div;
1734         if (u >= 0.0f) {
1735                 v = (a[0]*h[1] - a[1]*h[0])*div;
1736                 if (v >= 0.0f) {
1737                         if ( u + v <= 1.0f) {
1738                                 /* inside, now check if point p is behind */
1739                                 float z=  (1.0f-u-v)*v1[2] + u*v2[2] + v*v3[2];
1740                                 if (z <= p[2])
1741                                         return 1;
1742                         }
1743                 }
1744         }
1745         
1746         return 0;
1747 }
1748
1749 #if 0
1750 /* tested these calls, but it gives inaccuracy, 'side' cannot be found reliably using v3 */
1751
1752 /* check if line v1-v2 has all rect points on other side of point v3 */
1753 static int rect_outside_line(rctf *rect, const float v1[3], const float v2[3], const float v3[3])
1754 {
1755         float a, b, c;
1756         int side;
1757         
1758         /* line formula for v1-v2 */
1759         a= v2[1]-v1[1];
1760         b= v1[0]-v2[0];
1761         c= -a*v1[0] - b*v1[1];
1762         side= a*v3[0] + b*v3[1] + c < 0.0f;
1763         
1764         /* the four quad points */
1765         if ( side==(rect->xmin*a + rect->ymin*b + c >= 0.0f) )
1766                 if ( side==(rect->xmax*a + rect->ymin*b + c >= 0.0f) )
1767                         if ( side==(rect->xmax*a + rect->ymax*b + c >= 0.0f) )
1768                                 if ( side==(rect->xmin*a + rect->ymax*b + c >= 0.0f) )
1769                                         return 1;
1770         return 0;
1771 }
1772
1773 /* check if one of the triangle edges separates all rect points on 1 side */
1774 static int rect_isect_tria(rctf *rect, const float v1[3], const float v2[3], const float v3[3])
1775 {
1776         if (rect_outside_line(rect, v1, v2, v3))
1777                 return 0;
1778         if (rect_outside_line(rect, v2, v3, v1))
1779                 return 0;
1780         if (rect_outside_line(rect, v3, v1, v2))
1781                 return 0;
1782         return 1;
1783 }
1784 #endif
1785
1786 /* if face overlaps a branch, it executes func. recursive */
1787 static void isb_bsp_face_inside(ISBBranch *bspn, BSPFace *face)
1788 {
1789         
1790         /* are we descending? */
1791         if (bspn->left) {
1792                 /* hrmf, the box struct cannot be addressed with index */
1793                 if (bspn->index==0) {
1794                         if (face->box.xmin <= bspn->divider[0])
1795                                 isb_bsp_face_inside(bspn->left, face);
1796                         if (face->box.xmax > bspn->divider[0])
1797                                 isb_bsp_face_inside(bspn->right, face);
1798                 }
1799                 else {
1800                         if (face->box.ymin <= bspn->divider[1])
1801                                 isb_bsp_face_inside(bspn->left, face);
1802                         if (face->box.ymax > bspn->divider[1])
1803                                 isb_bsp_face_inside(bspn->right, face);
1804                 }
1805         }
1806         else {
1807                 /* else: end branch reached */
1808                 int a;
1809                 
1810                 if (bspn->totsamp==0) return;
1811                 
1812                 /* check for nodes entirely in shadow, can be skipped */
1813                 if (bspn->totsamp==bspn->full)
1814                         return;
1815                 
1816                 /* if bsp node is entirely in front of face, give up */
1817                 if (bspn->box.zmax < face->box.zmin)
1818                         return;
1819                 
1820                 /* if face boundbox is outside of branch rect, give up */
1821                 if (0==BLI_rctf_isect((rctf *)&face->box, (rctf *)&bspn->box, NULL))
1822                         return;
1823                 
1824                 /* test all points inside branch */
1825                 for (a=bspn->totsamp-1; a>=0; a--) {
1826                         ISBSample *samp= bspn->samples[a];
1827                         
1828                         if ((samp->facenr!=face->facenr || samp->obi!=face->obi) && samp->shadfac) {
1829                                 if (face->box.zmin < samp->zco[2]) {
1830                                         if (BLI_rctf_isect_pt_v((rctf *)&face->box, samp->zco)) {
1831                                                 int inshadow= 0;
1832                                                 
1833                                                 if (face->type) {
1834                                                         if (point_behind_strand(samp->zco, face))
1835                                                                 inshadow= 1;
1836                                                 }
1837                                                 else if ( point_behind_tria2d(samp->zco, face->v1, face->v2, face->v3))
1838                                                         inshadow= 1;
1839                                                 else if (face->v4 && point_behind_tria2d(samp->zco, face->v1, face->v3, face->v4))
1840                                                         inshadow= 1;
1841
1842                                                 if (inshadow) {
1843                                                         *(samp->shadfac) += face->shad_alpha;
1844                                                         /* optimize; is_full means shad_alpha==4096 */
1845                                                         if (*(samp->shadfac) >= 4096 || face->is_full) {
1846                                                                 bspn->full++;
1847                                                                 samp->shadfac= NULL;
1848                                                         }
1849                                                 }
1850                                         }
1851                                 }
1852                         }
1853                 }
1854         }
1855 }
1856
1857 /* based on available samples, recalculate the bounding box for bsp nodes, recursive */
1858 static void isb_bsp_recalc_box(ISBBranch *root)
1859 {
1860         if (root->left) {
1861                 isb_bsp_recalc_box(root->left);
1862                 isb_bsp_recalc_box(root->right);
1863         }
1864         else if (root->totsamp) {
1865                 int a;
1866                 
1867                 init_box(&root->box);
1868                 for (a=root->totsamp-1; a>=0; a--)
1869                         bound_boxf(&root->box, root->samples[a]->zco);
1870         }
1871 }
1872
1873 /* callback function for zbuf clip */
1874 static void isb_bsp_test_strand(ZSpan *zspan, int obi, int zvlnr,
1875                                 const float *v1, const float *v2, const float *v3, const float *v4)
1876 {
1877         BSPFace face;
1878         
1879         face.v1= v1;
1880         face.v2= v2;
1881         face.v3= v3;
1882         face.v4= v4;
1883         face.obi= obi;
1884         face.facenr= zvlnr & ~RE_QUAD_OFFS;
1885         face.type= R_STRAND;
1886         if (R.osa)
1887                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
1888         else
1889                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
1890         
1891         face.is_full= (zspan->shad_alpha==1.0f);
1892         
1893         /* setup boundbox */
1894         init_box(&face.box);
1895         bound_boxf(&face.box, v1);
1896         bound_boxf(&face.box, v2);
1897         bound_boxf(&face.box, v3);
1898         if (v4)
1899                 bound_boxf(&face.box, v4);
1900         
1901         /* optimize values */
1902         bspface_init_strand(&face);
1903         
1904         isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
1905         
1906 }
1907
1908 /* callback function for zbuf clip */
1909 static void isb_bsp_test_face(ZSpan *zspan, int obi, int zvlnr,
1910                               const float *v1, const float *v2, const float *v3, const float *v4)
1911 {
1912         BSPFace face;
1913         
1914         face.v1= v1;
1915         face.v2= v2;
1916         face.v3= v3;
1917         face.v4= v4;
1918         face.obi= obi;
1919         face.facenr= zvlnr & ~RE_QUAD_OFFS;
1920         face.type= 0;
1921         if (R.osa)
1922                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha/(float)R.osa);
1923         else
1924                 face.shad_alpha= (short)ceil(4096.0f*zspan->shad_alpha);
1925         
1926         face.is_full= (zspan->shad_alpha==1.0f);
1927         
1928         /* setup boundbox */
1929         init_box(&face.box);
1930         bound_boxf(&face.box, v1);
1931         bound_boxf(&face.box, v2);
1932         bound_boxf(&face.box, v3);
1933         if (v4)
1934                 bound_boxf(&face.box, v4);
1935
1936         isb_bsp_face_inside((ISBBranch *)zspan->rectz, &face);
1937 }
1938
1939 static int testclip_minmax(const float ho[4], const float minmax[4])
1940 {
1941         float wco= ho[3];
1942         int flag= 0;
1943         
1944         if ( ho[0] > minmax[1]*wco) flag = 1;
1945         else if ( ho[0]< minmax[0]*wco) flag = 2;
1946         
1947         if ( ho[1] > minmax[3]*wco) flag |= 4;
1948         else if ( ho[1]< minmax[2]*wco) flag |= 8;
1949         
1950         return flag;
1951 }
1952
1953 /* main loop going over all faces and check in bsp overlaps, fill in shadfac values */
1954 static void isb_bsp_fillfaces(Render *re, LampRen *lar, ISBBranch *root)
1955 {
1956         ObjectInstanceRen *obi;
1957         ObjectRen *obr;
1958         ShadBuf *shb= lar->shb;
1959         ZSpan zspan, zspanstrand;
1960         VlakRen *vlr= NULL;
1961         Material *ma= NULL;
1962         float minmaxf[4], winmat[4][4];
1963         int size= shb->size;
1964         int i, a, ok=1, lay= -1;
1965         
1966         /* further optimize, also sets minz maxz */
1967         isb_bsp_recalc_box(root);
1968         
1969         /* extra clipping for minmax */
1970         minmaxf[0]= (2.0f*root->box.xmin - size-2.0f)/size;
1971         minmaxf[1]= (2.0f*root->box.xmax - size+2.0f)/size;
1972         minmaxf[2]= (2.0f*root->box.ymin - size-2.0f)/size;
1973         minmaxf[3]= (2.0f*root->box.ymax - size+2.0f)/size;
1974         
1975         if (lar->mode & (LA_LAYER|LA_LAYER_SHADOW)) lay= lar->lay;
1976         
1977         /* (ab)use zspan, since we use zbuffer clipping code */
1978         zbuf_alloc_span(&zspan, size, size, re->clipcrop);
1979         
1980         zspan.zmulx=  ((float)size)/2.0f;
1981         zspan.zmuly=  ((float)size)/2.0f;
1982         zspan.zofsx= -0.5f;
1983         zspan.zofsy= -0.5f;
1984         
1985         /* pass on bsp root to zspan */
1986         zspan.rectz= (int *)root;
1987         
1988         /* filling methods */
1989         zspanstrand= zspan;
1990         //      zspan.zbuflinefunc= zbufline_onlyZ;
1991         zspan.zbuffunc= isb_bsp_test_face;
1992         zspanstrand.zbuffunc= isb_bsp_test_strand;
1993         
1994         for (i=0, obi=re->instancetable.first; obi; i++, obi=obi->next) {
1995                 obr= obi->obr;
1996
1997                 if (obi->flag & R_TRANSFORMED)
1998                         mult_m4_m4m4(winmat, shb->persmat, obi->mat);
1999                 else
2000                         copy_m4_m4(winmat, shb->persmat);
2001
2002                 for (a=0; a<obr->totvlak; a++) {
2003                         
2004                         if ((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
2005                         else vlr++;
2006                         
2007                         /* note, these conditions are copied in shadowbuf_autoclip() */
2008                         if (vlr->mat!= ma) {
2009                                 ma= vlr->mat;
2010                                 ok= 1;
2011                                 if ((ma->mode & MA_SHADBUF)==0) ok= 0;
2012                                 if (ma->material_type == MA_TYPE_WIRE) ok= 0;
2013                                 zspanstrand.shad_alpha= zspan.shad_alpha= ma->shad_alpha;
2014                         }
2015                         
2016                         if (ok && (obi->lay & lay)) {
2017                                 float hoco[4][4];
2018                                 int c1, c2, c3, c4=0;
2019                                 int d1, d2, d3, d4=0;
2020                                 int partclip;
2021                                 
2022                                 /* create hocos per face, it is while render */
2023                                 projectvert(vlr->v1->co, winmat, hoco[0]); d1= testclip_minmax(hoco[0], minmaxf);
2024                                 projectvert(vlr->v2->co, winmat, hoco[1]); d2= testclip_minmax(hoco[1], minmaxf);
2025                                 projectvert(vlr->v3->co, winmat, hoco[2]); d3= testclip_minmax(hoco[2], minmaxf);
2026                                 if (vlr->v4) {
2027                                         projectvert(vlr->v4->co, winmat, hoco[3]); d4= testclip_minmax(hoco[3], minmaxf);
2028                                 }
2029
2030                                 /* minmax clipping */
2031                                 if (vlr->v4) partclip= d1 & d2 & d3 & d4;
2032                                 else partclip= d1 & d2 & d3;
2033                                 
2034                                 if (partclip==0) {
2035                                         
2036                                         /* window clipping */
2037                                         c1= testclip(hoco[0]); 
2038                                         c2= testclip(hoco[1]); 
2039                                         c3= testclip(hoco[2]); 
2040                                         if (vlr->v4)
2041                                                 c4= testclip(hoco[3]); 
2042                                         
2043                                         /* ***** NO WIRE YET */
2044                                         if (ma->material_type == MA_TYPE_WIRE) {
2045                                                 if (vlr->v4)
2046                                                         zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
2047                                                 else
2048                                                         zbufclipwire(&zspan, i, a+1, vlr->ec, hoco[0], hoco[1], hoco[2], 0, c1, c2, c3, 0);
2049                                         }
2050                                         else if (vlr->v4) {
2051                                                 if (vlr->flag & R_STRAND)
2052                                                         zbufclip4(&zspanstrand, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
2053                                                 else
2054                                                         zbufclip4(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], hoco[3], c1, c2, c3, c4);
2055                                         }
2056                                         else
2057                                                 zbufclip(&zspan, i, a+1, hoco[0], hoco[1], hoco[2], c1, c2, c3);
2058                                         
2059                                 }
2060                         }
2061                 }
2062         }
2063         
2064         zbuf_free_span(&zspan);
2065 }
2066
2067 /* returns 1 when the viewpixel is visible in lampbuffer */
2068 static int viewpixel_to_lampbuf(ShadBuf *shb, ObjectInstanceRen *obi, VlakRen *vlr, float x, float y, float co_r[3])
2069 {
2070         float hoco[4], v1[3], nor[3];
2071         float dface, fac, siz;
2072         
2073         RE_vlakren_get_normal(&R, obi, vlr, nor);
2074         copy_v3_v3(v1, vlr->v1->co);
2075         if (obi->flag & R_TRANSFORMED)
2076                 mul_m4_v3(obi->mat, v1);
2077
2078         /* from shadepixel() */
2079         dface = dot_v3v3(v1, nor);
2080         hoco[3]= 1.0f;
2081         
2082         /* ortho viewplane cannot intersect using view vector originating in (0, 0, 0) */
2083         if (R.r.mode & R_ORTHO) {
2084                 /* x and y 3d coordinate can be derived from pixel coord and winmat */
2085                 float fx= 2.0f/(R.winx*R.winmat[0][0]);
2086                 float fy= 2.0f/(R.winy*R.winmat[1][1]);
2087                 
2088                 hoco[0]= (x - 0.5f*R.winx)*fx - R.winmat[3][0]/R.winmat[0][0];
2089                 hoco[1]= (y - 0.5f*R.winy)*fy - R.winmat[3][1]/R.winmat[1][1];
2090                 
2091                 /* using a*x + b*y + c*z = d equation, (a b c) is normal */
2092                 if (nor[2]!=0.0f)
2093                         hoco[2]= (dface - nor[0]*hoco[0] - nor[1]*hoco[1])/nor[2];
2094                 else
2095                         hoco[2]= 0.0f;
2096         }
2097         else {
2098                 float div, view[3];
2099                 
2100                 calc_view_vector(view, x, y);
2101                 
2102                 div = dot_v3v3(nor, view);
2103                 if (div==0.0f) 
2104                         return 0;
2105                 
2106                 fac= dface/div;
2107                 
2108                 hoco[0]= fac*view[0];
2109                 hoco[1]= fac*view[1];
2110                 hoco[2]= fac*view[2];
2111         }
2112         
2113         /* move 3d vector to lampbuf */
2114         mul_m4_v4(shb->persmat, hoco);  /* rational hom co */
2115         
2116         /* clip We can test for -1.0/1.0 because of the properties of the
2117          * coordinate transformations. */
2118         fac= fabs(hoco[3]);
2119         if (hoco[0]<-fac || hoco[0]>fac)
2120                 return 0;
2121         if (hoco[1]<-fac || hoco[1]>fac)
2122                 return 0;
2123         if (hoco[2]<-fac || hoco[2]>fac)
2124                 return 0;
2125         
2126         siz= 0.5f*(float)shb->size;
2127         co_r[0]= siz*(1.0f+hoco[0]/hoco[3]) -0.5f;
2128         co_r[1]= siz*(1.0f+hoco[1]/hoco[3]) -0.5f;
2129         co_r[2]= ((float)0x7FFFFFFF)*(hoco[2]/hoco[3]);
2130         
2131         /* XXXX bias, much less than normal shadbuf, or do we need a constant? */
2132         co_r[2] -= 0.05f*shb->bias;
2133         
2134         return 1;
2135 }
2136
2137 /* storage of shadow results, solid osa and transp case */
2138 static void isb_add_shadfac(ISBShadfacA **isbsapp, MemArena *mem, int obi, int facenr, short shadfac, short samples)
2139 {
2140         ISBShadfacA *new;
2141         float shadfacf;
2142         
2143         /* in osa case, the samples were filled in with factor 1.0/R.osa. if fewer samples we have to correct */
2144         if (R.osa)
2145                 shadfacf= ((float)shadfac*R.osa)/(4096.0f*samples);
2146         else
2147                 shadfacf= ((float)shadfac)/(4096.0f);
2148         
2149         new= BLI_memarena_alloc(mem, sizeof(ISBShadfacA));
2150         new->obi= obi;
2151         new->facenr= facenr & ~RE_QUAD_OFFS;
2152         new->shadfac= shadfacf;
2153         if (*isbsapp)
2154                 new->next= (*isbsapp);
2155         else
2156                 new->next= NULL;
2157         
2158         *isbsapp= new;
2159 }
2160
2161 /* adding samples, solid case */
2162 static int isb_add_samples(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSample **samplebuf)
2163 {
2164         int xi, yi, *xcos, *ycos;
2165         int sample, bsp_err= 0;
2166         
2167         /* bsp split doesn't like to handle regular sequences */
2168         xcos= MEM_mallocN(pa->rectx*sizeof(int), "xcos");
2169         ycos= MEM_mallocN(pa->recty*sizeof(int), "ycos");
2170         for (xi=0; xi<pa->rectx; xi++)
2171                 xcos[xi]= xi;
2172         for (yi=0; yi<pa->recty; yi++)
2173                 ycos[yi]= yi;
2174         BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
2175         BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
2176         
2177         for (sample=0; sample<(R.osa?R.osa:1); sample++) {
2178                 ISBSample *samp= samplebuf[sample], *samp1;
2179                 
2180                 for (yi=0; yi<pa->recty; yi++) {
2181                         int y= ycos[yi];
2182                         for (xi=0; xi<pa->rectx; xi++) {
2183                                 int x= xcos[xi];
2184                                 samp1= samp + y*pa->rectx + x;
2185                                 if (samp1->facenr)
2186                                         bsp_err |= isb_bsp_insert(root, memarena, samp1);
2187                         }
2188                         if (bsp_err) break;
2189                 }
2190         }
2191         
2192         MEM_freeN(xcos);
2193         MEM_freeN(ycos);
2194
2195         return bsp_err;
2196 }
2197
2198 /* solid version */
2199 /* lar->shb, pa->rectz and pa->rectp should exist */
2200 static void isb_make_buffer(RenderPart *pa, LampRen *lar)
2201 {
2202         ShadBuf *shb= lar->shb;
2203         ISBData *isbdata;
2204         ISBSample *samp, *samplebuf[16];        /* should be RE_MAX_OSA */
2205         ISBBranch root;
2206         MemArena *memarena;
2207         intptr_t *rd;
2208         int *recto, *rectp, x, y, sindex, sample, bsp_err=0;
2209         
2210         /* storage for shadow, per thread */
2211         isbdata= shb->isb_result[pa->thread];
2212         
2213         /* to map the shi->xs and ys coordinate */
2214         isbdata->minx= pa->disprect.xmin;
2215         isbdata->miny= pa->disprect.ymin;
2216         isbdata->rectx= pa->rectx;
2217         isbdata->recty= pa->recty;
2218         
2219         /* branches are added using memarena (32k branches) */
2220         memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch), "isb arena");
2221         BLI_memarena_use_calloc(memarena);
2222         
2223         /* samplebuf is in camera view space (pixels) */
2224         for (sample=0; sample<(R.osa?R.osa:1); sample++)
2225                 samplebuf[sample]= MEM_callocN(sizeof(ISBSample)*pa->rectx*pa->recty, "isb samplebuf");
2226         
2227         /* for end result, ISBSamples point to this in non OSA case, otherwise to pixstruct->shadfac */
2228         if (R.osa==0)
2229                 isbdata->shadfacs= MEM_callocN(pa->rectx*pa->recty*sizeof(short), "isb shadfacs");
2230         
2231         /* setup bsp root */
2232         memset(&root, 0, sizeof(ISBBranch));
2233         root.box.xmin = (float)shb->size;
2234         root.box.ymin = (float)shb->size;
2235         
2236         /* create the sample buffers */
2237         for (sindex=0, y=0; y<pa->recty; y++) {
2238                 for (x=0; x<pa->rectx; x++, sindex++) {
2239                         
2240                         /* this makes it a long function, but splitting it out would mean 10+ arguments */
2241                         /* first check OSA case */
2242                         if (R.osa) {
2243                                 rd= pa->rectdaps + sindex;
2244                                 if (*rd) {
2245                                         float xs= (float)(x + pa->disprect.xmin);
2246                                         float ys= (float)(y + pa->disprect.ymin);
2247                                         
2248                                         for (sample=0; sample<R.osa; sample++) {
2249                                                 PixStr *ps= (PixStr *)(*rd);
2250                                                 int mask= (1<<sample);
2251                                                 
2252                                                 while (ps) {
2253                                                         if (ps->mask & mask)
2254                                                                 break;
2255                                                         ps= ps->next;
2256                                                 }
2257                                                 if (ps && ps->facenr>0) {
2258                                                         ObjectInstanceRen *obi= &R.objectinstance[ps->obi];
2259                                                         ObjectRen *obr= obi->obr;
2260                                                         VlakRen *vlr= RE_findOrAddVlak(obr, (ps->facenr-1) & RE_QUAD_MASK);
2261                                                         
2262                                                         samp= samplebuf[sample] + sindex;
2263                                                         /* convert image plane pixel location to lamp buffer space */
2264                                                         if (viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], samp->zco)) {
2265                                                                 samp->obi= ps->obi;
2266                                                                 samp->facenr= ps->facenr & ~RE_QUAD_OFFS;
2267                                                                 ps->shadfac= 0;
2268                                                                 samp->shadfac= &ps->shadfac;
2269                                                                 bound_rectf((rctf *)&root.box, samp->zco);
2270                                                         }
2271                                                 }
2272                                         }
2273                                 }
2274                         }
2275                         else {
2276                                 rectp= pa->rectp + sindex;
2277                                 recto= pa->recto + sindex;
2278                                 if (*rectp>0) {
2279                                         ObjectInstanceRen *obi= &R.objectinstance[*recto];
2280                                         ObjectRen *obr= obi->obr;
2281                                         VlakRen *vlr= RE_findOrAddVlak(obr, (*rectp-1) & RE_QUAD_MASK);
2282                                         float xs= (float)(x + pa->disprect.xmin);
2283                                         float ys= (float)(y + pa->disprect.ymin);
2284                                         
2285                                         samp= samplebuf[0] + sindex;
2286                                         /* convert image plane pixel location to lamp buffer space */
2287                                         if (viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, samp->zco)) {
2288                                                 samp->obi= *recto;
2289                                                 samp->facenr= *rectp & ~RE_QUAD_OFFS;
2290                                                 samp->shadfac= isbdata->shadfacs + sindex;
2291                                                 bound_rectf((rctf *)&root.box, samp->zco);
2292                                         }
2293                                 }
2294                         }
2295                 }
2296         }
2297         
2298         /* simple method to see if we have samples */
2299         if (root.box.xmin != (float)shb->size) {
2300                 /* now create a regular split, root.box has the initial bounding box of all pixels */
2301                 /* split bsp 8 levels deep, in regular grid (16 x 16) */
2302                 isb_bsp_split_init(&root, memarena, 8);
2303                 
2304                 /* insert all samples in BSP now */
2305                 bsp_err= isb_add_samples(pa, &root, memarena, samplebuf);
2306                         
2307                 if (bsp_err==0) {
2308                         /* go over all faces and fill in shadow values */
2309                         
2310                         isb_bsp_fillfaces(&R, lar, &root);      /* shb->persmat should have been calculated */
2311                         
2312                         /* copy shadow samples to persistent buffer, reduce memory overhead */
2313                         if (R.osa) {
2314                                 ISBShadfacA **isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
2315                                 
2316                                 isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA), "isb arena");
2317                                 BLI_memarena_use_calloc(isbdata->memarena);
2318
2319                                 for (rd= pa->rectdaps, x=pa->rectx*pa->recty; x>0; x--, rd++, isbsa++) {
2320                                         
2321                                         if (*rd) {
2322                                                 PixStr *ps= (PixStr *)(*rd);
2323                                                 while (ps) {
2324                                                         if (ps->shadfac)
2325                                                                 isb_add_shadfac(isbsa, isbdata->memarena, ps->obi, ps->facenr, ps->shadfac, count_mask(ps->mask));
2326                                                         ps= ps->next;
2327                                                 }
2328                                         }
2329                                 }
2330                         }
2331                 }
2332         }
2333         else {
2334                 if (isbdata->shadfacs) {
2335                         MEM_freeN(isbdata->shadfacs);
2336                         isbdata->shadfacs= NULL;
2337                 }
2338         }
2339
2340         /* free BSP */
2341         BLI_memarena_free(memarena);
2342         
2343         /* free samples */
2344         for (x=0; x<(R.osa?R.osa:1); x++)
2345                 MEM_freeN(samplebuf[x]);
2346         
2347         if (bsp_err) printf("error in filling bsp\n");
2348 }
2349
2350 /* add sample to buffer, isbsa is the root sample in a buffer */
2351 static ISBSampleA *isb_alloc_sample_transp(ISBSampleA **isbsa, MemArena *mem)
2352 {
2353         ISBSampleA *new;
2354         
2355         new= BLI_memarena_alloc(mem, sizeof(ISBSampleA));
2356         if (*isbsa)
2357                 new->next= (*isbsa);
2358         else
2359                 new->next= NULL;
2360         
2361         *isbsa= new;
2362         return new;
2363 }
2364
2365 /* adding samples in BSP, transparent case */
2366 static int isb_add_samples_transp(RenderPart *pa, ISBBranch *root, MemArena *memarena, ISBSampleA ***samplebuf)
2367 {
2368         int xi, yi, *xcos, *ycos;
2369         int sample, bsp_err= 0;
2370         
2371         /* bsp split doesn't like to handle regular sequences */
2372         xcos= MEM_mallocN(pa->rectx*sizeof(int), "xcos");
2373         ycos= MEM_mallocN(pa->recty*sizeof(int), "ycos");
2374         for (xi=0; xi<pa->rectx; xi++)
2375                 xcos[xi]= xi;
2376         for (yi=0; yi<pa->recty; yi++)
2377                 ycos[yi]= yi;
2378         BLI_array_randomize(xcos, sizeof(int), pa->rectx, 12345);
2379         BLI_array_randomize(ycos, sizeof(int), pa->recty, 54321);
2380         
2381         for (sample=0; sample<(R.osa?R.osa:1); sample++) {
2382                 ISBSampleA **samp= samplebuf[sample], *samp1;
2383                 
2384                 for (yi=0; yi<pa->recty; yi++) {
2385                         int y= ycos[yi];
2386                         for (xi=0; xi<pa->rectx; xi++) {
2387                                 int x= xcos[xi];
2388                                 
2389                                 samp1= *(samp + y*pa->rectx + x);
2390                                 while (samp1) {
2391                                         bsp_err |= isb_bsp_insert(root, memarena, (ISBSample *)samp1);
2392                                         samp1= samp1->next;
2393                                 }
2394                         }
2395                         if (bsp_err) break;
2396                 }
2397         }
2398         
2399         MEM_freeN(xcos);
2400         MEM_freeN(ycos);
2401         
2402         return bsp_err;
2403 }
2404
2405
2406 /* Ztransp version */
2407 /* lar->shb, pa->rectz and pa->rectp should exist */
2408 static void isb_make_buffer_transp(RenderPart *pa, APixstr *apixbuf, LampRen *lar)
2409 {
2410         ShadBuf *shb= lar->shb;
2411         ISBData *isbdata;
2412         ISBSampleA *samp, **samplebuf[16];      /* MAX_OSA */
2413         ISBBranch root;
2414         MemArena *memarena;
2415         APixstr *ap;
2416         int x, y, sindex, sample, bsp_err=0;
2417         
2418         /* storage for shadow, per thread */
2419         isbdata= shb->isb_result[pa->thread];
2420         
2421         /* to map the shi->xs and ys coordinate */
2422         isbdata->minx= pa->disprect.xmin;
2423         isbdata->miny= pa->disprect.ymin;
2424         isbdata->rectx= pa->rectx;
2425         isbdata->recty= pa->recty;
2426         
2427         /* branches are added using memarena (32k branches) */
2428         memarena = BLI_memarena_new(0x8000 * sizeof(ISBBranch), "isb arena");
2429         BLI_memarena_use_calloc(memarena);
2430         
2431         /* samplebuf is in camera view space (pixels) */
2432         for (sample=0; sample<(R.osa?R.osa:1); sample++)
2433                 samplebuf[sample]= MEM_callocN(sizeof(void *)*pa->rectx*pa->recty, "isb alpha samplebuf");
2434         
2435         /* setup bsp root */
2436         memset(&root, 0, sizeof(ISBBranch));
2437         root.box.xmin = (float)shb->size;
2438         root.box.ymin = (float)shb->size;
2439
2440         /* create the sample buffers */
2441         for (ap= apixbuf, sindex=0, y=0; y<pa->recty; y++) {
2442                 for (x=0; x<pa->rectx; x++, sindex++, ap++) {
2443                         
2444                         if (ap->p[0]) {
2445                                 APixstr *apn;
2446                                 float xs= (float)(x + pa->disprect.xmin);
2447                                 float ys= (float)(y + pa->disprect.ymin);
2448                                 
2449                                 for (apn=ap; apn; apn= apn->next) {
2450                                         int a;
2451                                         for (a=0; a<4; a++) {
2452                                                 if (apn->p[a]) {
2453                                                         ObjectInstanceRen *obi= &R.objectinstance[apn->obi[a]];
2454                                                         ObjectRen *obr= obi->obr;
2455                                                         VlakRen *vlr= RE_findOrAddVlak(obr, (apn->p[a]-1) & RE_QUAD_MASK);
2456                                                         float zco[3];
2457                                                         
2458                                                         /* here we store shadfac, easier to create the end storage buffer. needs zero'ed, multiple shadowbufs use it */
2459                                                         apn->shadfac[a]= 0;
2460                                                         
2461                                                         if (R.osa) {
2462                                                                 for (sample=0; sample<R.osa; sample++) {
2463                                                                         int mask= (1<<sample);
2464                                                                         
2465                                                                         if (apn->mask[a] & mask) {
2466                                                                                 
2467                                                                                 /* convert image plane pixel location to lamp buffer space */
2468                                                                                 if (viewpixel_to_lampbuf(shb, obi, vlr, xs + R.jit[sample][0], ys + R.jit[sample][1], zco)) {
2469                                                                                         samp= isb_alloc_sample_transp(samplebuf[sample] + sindex, memarena);
2470                                                                                         samp->obi= apn->obi[a];
2471                                                                                         samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
2472                                                                                         samp->shadfac= &apn->shadfac[a];
2473                                                                                         
2474                                                                                         copy_v3_v3(samp->zco, zco);
2475                                                                                         bound_rectf((rctf *)&root.box, samp->zco);
2476                                                                                 }
2477                                                                         }
2478                                                                 }
2479                                                         }
2480                                                         else {
2481                                                                 
2482                                                                 /* convert image plane pixel location to lamp buffer space */
2483                                                                 if (viewpixel_to_lampbuf(shb, obi, vlr, xs, ys, zco)) {
2484                                                                         
2485                                                                         samp= isb_alloc_sample_transp(samplebuf[0] + sindex, memarena);
2486                                                                         samp->obi= apn->obi[a];
2487                                                                         samp->facenr= apn->p[a] & ~RE_QUAD_OFFS;
2488                                                                         samp->shadfac= &apn->shadfac[a];
2489                                                                         
2490                                                                         copy_v3_v3(samp->zco, zco);
2491                                                                         bound_rectf((rctf *)&root.box, samp->zco);
2492                                                                 }
2493                                                         }
2494                                                 }
2495                                         }
2496                                 }
2497                         }
2498                 }
2499         }
2500         
2501         /* simple method to see if we have samples */
2502         if (root.box.xmin != (float)shb->size) {
2503                 /* now create a regular split, root.box has the initial bounding box of all pixels */
2504                 /* split bsp 8 levels deep, in regular grid (16 x 16) */
2505                 isb_bsp_split_init(&root, memarena, 8);
2506                 
2507                 /* insert all samples in BSP now */
2508                 bsp_err= isb_add_samples_transp(pa, &root, memarena, samplebuf);
2509                 
2510                 if (bsp_err==0) {
2511                         ISBShadfacA **isbsa;
2512                         
2513                         /* go over all faces and fill in shadow values */
2514                         isb_bsp_fillfaces(&R, lar, &root);      /* shb->persmat should have been calculated */
2515                         
2516                         /* copy shadow samples to persistent buffer, reduce memory overhead */
2517                         isbsa= isbdata->shadfaca= MEM_callocN(pa->rectx*pa->recty*sizeof(void *), "isb shadfacs");
2518                         
2519                         isbdata->memarena = BLI_memarena_new(0x8000 * sizeof(ISBSampleA), "isb arena");
2520                         
2521                         for (ap= apixbuf, x=pa->rectx*pa->recty; x>0; x--, ap++, isbsa++) {
2522                                         
2523                                 if (ap->p[0]) {
2524                                         APixstr *apn;
2525                                         for (apn=ap; apn; apn= apn->next) {
2526                                                 int a;
2527                                                 for (a=0; a<4; a++) {
2528                                                         if (apn->p[a] && apn->shadfac[a]) {
2529                                                                 if (R.osa)
2530                                                                         isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], count_mask(apn->mask[a]));
2531                                                                 else
2532                                                                         isb_add_shadfac(isbsa, isbdata->memarena, apn->obi[a], apn->p[a], apn->shadfac[a], 0);
2533                                                         }
2534                                                 }
2535                                         }
2536                                 }
2537                         }
2538                 }
2539         }
2540
2541         /* free BSP */
2542         BLI_memarena_free(memarena);
2543
2544         /* free samples */
2545         for (x=0; x<(R.osa?R.osa:1); x++)
2546                 MEM_freeN(samplebuf[x]);
2547
2548         if (bsp_err) printf("error in filling bsp\n");
2549 }
2550
2551
2552
2553 /* exported */
2554
2555 /* returns amount of light (1.0 = no shadow) */
2556 /* note, shadepixel() rounds the coordinate, not the real sample info */
2557 float ISB_getshadow(ShadeInput *shi, ShadBuf *shb)
2558 {
2559         /* if raytracing, we can't accept irregular shadow */
2560         if (shi->depth==0) {
2561                 ISBData *isbdata= shb->isb_result[shi->thread];
2562                 
2563                 if (isbdata) {
2564                         if (isbdata->shadfacs || isbdata->shadfaca) {
2565                                 int x= shi->xs - isbdata->minx;
2566                                 
2567                                 if (x >= 0 && x < isbdata->rectx) {
2568                                         int y= shi->ys - isbdata->miny;
2569                         
2570                                         if (y >= 0 && y < isbdata->recty) {
2571                                                 if (isbdata->shadfacs) {
2572                                                         short *sp= isbdata->shadfacs + y*isbdata->rectx + x;
2573                                                         return *sp>=4096?0.0f:1.0f - ((float)*sp)/4096.0f;
2574                                                 }
2575                                                 else {
2576                                                         int sindex= y*isbdata->rectx + x;
2577                                                         int obi= shi->obi - R.objectinstance;
2578                                                         ISBShadfacA *isbsa= *(isbdata->shadfaca + sindex);
2579                                                         
2580                                                         while (isbsa) {
2581                                                                 if (isbsa->facenr==shi->facenr+1 && isbsa->obi==obi)
2582                                                                         return isbsa->shadfac>=1.0f?0.0f:1.0f - isbsa->shadfac;
2583                                                                 isbsa= isbsa->next;
2584                                                         }
2585                                                 }
2586                                         }
2587                                 }
2588                         }
2589                 }
2590         }
2591         return 1.0f;
2592 }
2593
2594 /* part is supposed to be solid zbuffered (apixbuf==NULL) or transparent zbuffered */
2595 void ISB_create(RenderPart *pa, APixstr *apixbuf)
2596 {
2597         GroupObject *go;
2598         
2599         /* go over all lamps, and make the irregular buffers */
2600         for (go=R.lights.first; go; go= go->next) {
2601                 LampRen *lar= go->lampren;
2602                 
2603                 if (lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
2604                         
2605                         /* create storage for shadow, per thread */
2606                         lar->shb->isb_result[pa->thread]= MEM_callocN(sizeof(ISBData), "isb data");
2607                         
2608                         if (apixbuf)
2609                                 isb_make_buffer_transp(pa, apixbuf, lar);
2610                         else
2611                                 isb_make_buffer(pa, lar);
2612                 }
2613         }
2614 }
2615
2616
2617 /* end of part rendering, free stored shadow data for this thread from all lamps */
2618 void ISB_free(RenderPart *pa)
2619 {
2620         GroupObject *go;
2621         
2622         /* go over all lamps, and free the irregular buffers */
2623         for (go=R.lights.first; go; go= go->next) {
2624                 LampRen *lar= go->lampren;
2625                 
2626                 if (lar->type==LA_SPOT && lar->shb && lar->buftype==LA_SHADBUF_IRREGULAR) {
2627                         ISBData *isbdata= lar->shb->isb_result[pa->thread];
2628
2629                         if (isbdata) {
2630                                 if (isbdata->shadfacs)
2631                                         MEM_freeN(isbdata->shadfacs);
2632                                 if (isbdata->shadfaca)
2633                                         MEM_freeN(isbdata->shadfaca);
2634                                 
2635                                 if (isbdata->memarena)
2636                                         BLI_memarena_free(isbdata->memarena);
2637                                 
2638                                 MEM_freeN(isbdata);
2639                                 lar->shb->isb_result[pa->thread]= NULL;
2640                         }
2641                 }
2642         }
2643 }