Fixes for SSS with render layers. Now in the preprocessing pass
[blender.git] / source / blender / render / intern / source / rendercore.c
1 /**
2  * $Id$
3  *
4  * ***** BEGIN GPL LICENSE BLOCK *****
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software Foundation,
18  * Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19  *
20  * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
21  * All rights reserved.
22  *
23  * Contributors: Hos, Robert Wenzlaff.
24  * Contributors: 2004/2005/2006 Blender Foundation, full recode
25  *
26  * ***** END GPL LICENSE BLOCK *****
27  */
28
29 /* system includes */
30 #include <stdio.h>
31 #include <math.h>
32 #include <string.h>
33
34 /* External modules: */
35 #include "MEM_guardedalloc.h"
36
37 #include "BLI_arithb.h"
38 #include "BLI_blenlib.h"
39 #include "BLI_jitter.h"
40 #include "BLI_rand.h"
41 #include "BLI_threads.h"
42
43 #include "BKE_utildefines.h"
44
45 #include "DNA_image_types.h"
46 #include "DNA_lamp_types.h"
47 #include "DNA_material_types.h"
48 #include "DNA_meshdata_types.h"
49
50 #include "BKE_global.h"
51 #include "BKE_image.h"
52 #include "BKE_main.h"
53 #include "BKE_node.h"
54 #include "BKE_texture.h"
55
56 #include "IMB_imbuf_types.h"
57 #include "IMB_imbuf.h"
58
59 /* local include */
60 #include "renderpipeline.h"
61 #include "render_types.h"
62 #include "renderdatabase.h"
63 #include "occlusion.h"
64 #include "pixelblending.h"
65 #include "pixelshading.h"
66 #include "shadbuf.h"
67 #include "shading.h"
68 #include "sss.h"
69 #include "zbuf.h"
70 #include "RE_raytrace.h"
71
72 #include "PIL_time.h"
73
74 /* own include */
75 #include "rendercore.h"
76
77
78 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
79 /* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
80 /* only to be used here in this file, it's for speed */
81 extern struct Render R;
82 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
83
84 /* x and y are current pixels in rect to be rendered */
85 /* do not normalize! */
86 void calc_view_vector(float *view, float x, float y)
87 {
88
89         view[2]= -ABS(R.clipsta);
90         
91         if(R.r.mode & R_ORTHO) {
92                 view[0]= view[1]= 0.0f;
93         }
94         else {
95                 
96                 if(R.r.mode & R_PANORAMA)
97                         x-= R.panodxp;
98                 
99                 /* move x and y to real viewplane coords */
100                 x= (x/(float)R.winx);
101                 view[0]= R.viewplane.xmin + x*(R.viewplane.xmax - R.viewplane.xmin);
102                 
103                 y= (y/(float)R.winy);
104                 view[1]= R.viewplane.ymin + y*(R.viewplane.ymax - R.viewplane.ymin);
105                 
106 //              if(R.flag & R_SEC_FIELD) {
107 //                      if(R.r.mode & R_ODDFIELD) view[1]= (y+R.ystart)*R.ycor;
108 //                      else view[1]= (y+R.ystart+1.0)*R.ycor;
109 //              }
110 //              else view[1]= (y+R.ystart+R.bluroffsy+0.5)*R.ycor;
111         
112                 if(R.r.mode & R_PANORAMA) {
113                         float u= view[0] + R.panodxv; float v= view[2];
114                         view[0]= R.panoco*u + R.panosi*v;
115                         view[2]= -R.panosi*u + R.panoco*v;
116                 }
117         }
118 }
119
120 void calc_renderco_ortho(float *co, float x, float y, int z)
121 {
122         /* x and y 3d coordinate can be derived from pixel coord and winmat */
123         float fx= 2.0f/(R.winx*R.winmat[0][0]);
124         float fy= 2.0f/(R.winy*R.winmat[1][1]);
125         float zco;
126         
127         co[0]= (x - 0.5f*R.winx)*fx - R.winmat[3][0]/R.winmat[0][0];
128         co[1]= (y - 0.5f*R.winy)*fy - R.winmat[3][1]/R.winmat[1][1];
129         
130         zco= ((float)z)/2147483647.0f;
131         co[2]= R.winmat[3][2]/( R.winmat[2][3]*zco - R.winmat[2][2] );
132 }
133
134 void calc_renderco_zbuf(float *co, float *view, int z)
135 {
136         float fac, zco;
137         
138         /* inverse of zbuf calc: zbuf = MAXZ*hoco_z/hoco_w */
139         zco= ((float)z)/2147483647.0f;
140         co[2]= R.winmat[3][2]/( R.winmat[2][3]*zco - R.winmat[2][2] );
141
142         fac= co[2]/view[2];
143         co[0]= fac*view[0];
144         co[1]= fac*view[1];
145 }
146
147 /* also used in zbuf.c and shadbuf.c */
148 int count_mask(unsigned short mask)
149 {
150         if(R.samples)
151                 return (R.samples->cmask[mask & 255]+R.samples->cmask[mask>>8]);
152         return 0;
153 }
154
155 static int calchalo_z(HaloRen *har, int zz)
156 {
157         
158         if(har->type & HA_ONLYSKY) {
159                 if(zz!=0x7FFFFFFF) zz= - 0x7FFFFF;
160         }
161         else {
162                 zz= (zz>>8);
163         }
164         return zz;
165 }
166
167 static void halo_pixelstruct(HaloRen *har, float *rb, float dist, float xn, float yn, PixStr *ps)
168 {
169         float col[4], accol[4];
170         int amount, amountm, zz, flarec;
171         
172         amount= 0;
173         accol[0]=accol[1]=accol[2]=accol[3]= 0.0f;
174         flarec= har->flarec;
175         
176         while(ps) {
177                 amountm= count_mask(ps->mask);
178                 amount+= amountm;
179                 
180                 zz= calchalo_z(har, ps->z);
181                 if(zz> har->zs) {
182                         float fac;
183                         
184                         shadeHaloFloat(har, col, zz, dist, xn, yn, flarec);
185                         fac= ((float)amountm)/(float)R.osa;
186                         accol[0]+= fac*col[0];
187                         accol[1]+= fac*col[1];
188                         accol[2]+= fac*col[2];
189                         accol[3]+= fac*col[3];
190                         flarec= 0;
191                 }
192                 
193                 ps= ps->next;
194         }
195         /* now do the sky sub-pixels */
196         amount= R.osa-amount;
197         if(amount) {
198                 float fac;
199
200                 shadeHaloFloat(har, col, 0x7FFFFF, dist, xn, yn, flarec);
201                 fac= ((float)amount)/(float)R.osa;
202                 accol[0]+= fac*col[0];
203                 accol[1]+= fac*col[1];
204                 accol[2]+= fac*col[2];
205                 accol[3]+= fac*col[3];
206         }
207         col[0]= accol[0];
208         col[1]= accol[1];
209         col[2]= accol[2];
210         col[3]= accol[3];
211         
212         addalphaAddfacFloat(rb, col, har->add);
213         
214 }
215
216 static void halo_tile(RenderPart *pa, float *pass, unsigned int lay)
217 {
218         HaloRen *har;
219         rcti disprect= pa->disprect, testrect= pa->disprect;
220         float dist, xsq, ysq, xn, yn, *rb;
221         float col[4];
222         long *rd= NULL;
223         int a, *rz, zz, y;
224         short minx, maxx, miny, maxy, x;
225
226         /* we don't render halos in the cropped area, gives errors in flare counter */
227         if(pa->crop) {
228                 testrect.xmin+= pa->crop;
229                 testrect.xmax-= pa->crop;
230                 testrect.ymin+= pa->crop;
231                 testrect.ymax-= pa->crop;
232         }
233         
234         for(a=0; a<R.tothalo; a++) {
235                 har= R.sortedhalos[a];
236
237                 /* layer test, clip halo with y */
238                 if((har->lay & lay)==0);
239                 else if(testrect.ymin > har->maxy);
240                 else if(testrect.ymax < har->miny);
241                 else {
242                         
243                         minx= floor(har->xs-har->rad);
244                         maxx= ceil(har->xs+har->rad);
245                         
246                         if(testrect.xmin > maxx);
247                         else if(testrect.xmax < minx);
248                         else {
249                                 
250                                 minx= MAX2(minx, testrect.xmin);
251                                 maxx= MIN2(maxx, testrect.xmax);
252                         
253                                 miny= MAX2(har->miny, testrect.ymin);
254                                 maxy= MIN2(har->maxy, testrect.ymax);
255                         
256                                 for(y=miny; y<maxy; y++) {
257                                         int rectofs= (y-disprect.ymin)*pa->rectx + (minx - disprect.xmin);
258                                         rb= pass + 4*rectofs;
259                                         rz= pa->rectz + rectofs;
260                                         
261                                         if(pa->rectdaps)
262                                                 rd= pa->rectdaps + rectofs;
263                                         
264                                         yn= (y-har->ys)*R.ycor;
265                                         ysq= yn*yn;
266                                         
267                                         for(x=minx; x<maxx; x++, rb+=4, rz++) {
268                                                 xn= x- har->xs;
269                                                 xsq= xn*xn;
270                                                 dist= xsq+ysq;
271                                                 if(dist<har->radsq) {
272                                                         if(rd && *rd) {
273                                                                 halo_pixelstruct(har, rb, dist, xn, yn, (PixStr *)*rd);
274                                                         }
275                                                         else {
276                                                                 zz= calchalo_z(har, *rz);
277                                                                 if(zz> har->zs) {
278                                                                         shadeHaloFloat(har, col, zz, dist, xn, yn, har->flarec);
279                                                                         addalphaAddfacFloat(rb, col, har->add);
280                                                                 }
281                                                         }
282                                                 }
283                                                 if(rd) rd++;
284                                         }
285                                 }
286                         }
287                 }
288                 if(R.test_break() ) break; 
289         }
290 }
291
292 static void lamphalo_tile(RenderPart *pa, RenderLayer *rl)
293 {
294         ShadeInput shi;
295         float *pass= rl->rectf;
296         float fac;
297         long *rd= pa->rectdaps;
298         int x, y, *rz= pa->rectz;
299         
300         shade_input_initialize(&shi, pa, rl, 0); /* this zero's ShadeInput for us */
301         
302         for(y=pa->disprect.ymin; y<pa->disprect.ymax; y++) {
303                 for(x=pa->disprect.xmin; x<pa->disprect.xmax; x++, rz++, pass+=4) {
304                         
305                         calc_view_vector(shi.view, x, y);
306                         
307                         if(rd && *rd) {
308                                 PixStr *ps= (PixStr *)*rd;
309                                 int samp, totsamp= 0;
310                                 
311                                 while(ps) {
312                                         if(R.r.mode & R_ORTHO)
313                                                 calc_renderco_ortho(shi.co, (float)x, (float)y, ps->z);
314                                         else
315                                                 calc_renderco_zbuf(shi.co, shi.view, ps->z);
316                                         
317                                         totsamp+= samp= count_mask(ps->mask);
318                                         fac= ((float)samp)/(float)R.osa;
319                                         renderspothalo(&shi, pass, fac);
320                                         ps= ps->next;
321                                 }
322                                 if(totsamp<R.osa) {
323                                         fac= ((float)R.osa-totsamp)/(float)R.osa;
324                                         shi.co[2]= 0.0f;
325                                         renderspothalo(&shi, pass, fac);
326                                 }
327                         }
328                         else {
329                                 if(R.r.mode & R_ORTHO)
330                                         calc_renderco_ortho(shi.co, (float)x, (float)y, *rz);
331                                 else
332                                         calc_renderco_zbuf(shi.co, shi.view, *rz);
333                                 
334                                 renderspothalo(&shi, pass, 1.0f);
335                         }
336                         
337                         if(rd) rd++;
338                 }
339                 if(y&1)
340                         if(R.test_break()) break; 
341         }
342 }                               
343
344
345 /* ********************* MAINLOOPS ******************** */
346
347 /* osa version */
348 static void add_filt_passes(RenderLayer *rl, int curmask, int rectx, int offset, ShadeInput *shi, ShadeResult *shr)
349 {
350         RenderPass *rpass;
351         
352         for(rpass= rl->passes.first; rpass; rpass= rpass->next) {
353                 float *fp, *col= NULL;
354                 int pixsize= 3;
355                 
356                 switch(rpass->passtype) {
357                         case SCE_PASS_RGBA:
358                                 col= shr->col;
359                                 pixsize= 4;
360                                 break;
361                         case SCE_PASS_DIFFUSE:
362                                 col= shr->diff;
363                                 break;
364                         case SCE_PASS_SPEC:
365                                 col= shr->spec;
366                                 break;
367                         case SCE_PASS_SHADOW:
368                                 col= shr->shad;
369                                 break;
370                         case SCE_PASS_AO:
371                                 col= shr->ao;
372                                 break;
373                         case SCE_PASS_REFLECT:
374                                 col= shr->refl;
375                                 break;
376                         case SCE_PASS_REFRACT:
377                                 col= shr->refr;
378                                 break;
379                         case SCE_PASS_RADIO:
380                                 col= shr->rad;
381                                 break;
382                         case SCE_PASS_NORMAL:
383                                 col= shr->nor;
384                                 break;
385                         case SCE_PASS_UV:
386                                 /* box filter only, gauss will screwup UV too much */
387                                 if(shi->totuv) {
388                                         float mult= (float)count_mask(curmask)/(float)R.osa;
389                                         fp= rpass->rect + 3*offset;
390                                         fp[0]+= mult*(0.5f + 0.5f*shi->uv[shi->actuv].uv[0]);
391                                         fp[1]+= mult*(0.5f + 0.5f*shi->uv[shi->actuv].uv[1]);
392                                         fp[2]+= mult;
393                                 }
394                                 break;
395                         case SCE_PASS_INDEXOB:
396                                 /* no filter */
397                                 if(shi->vlr) {
398                                         fp= rpass->rect + offset;
399                                         if(*fp==0.0f)
400                                                 *fp= (float)shi->obr->ob->index;
401                                 }
402                                 break;
403                         case SCE_PASS_MIST:
404                                 /*  */
405                                 col= &shr->mist;
406                                 pixsize= 1;
407                                 break;
408                         
409                         case SCE_PASS_VECTOR:
410                         {
411                                 /* add minimum speed in pixel, no filter */
412                                 fp= rpass->rect + 4*offset;
413                                 if( (ABS(shr->winspeed[0]) + ABS(shr->winspeed[1]))< (ABS(fp[0]) + ABS(fp[1])) ) {
414                                         fp[0]= shr->winspeed[0];
415                                         fp[1]= shr->winspeed[1];
416                                 }
417                                 if( (ABS(shr->winspeed[2]) + ABS(shr->winspeed[3]))< (ABS(fp[2]) + ABS(fp[3])) ) {
418                                         fp[2]= shr->winspeed[2];
419                                         fp[3]= shr->winspeed[3];
420                                 }
421                         }
422                                 break;
423                 }
424                 if(col) {
425                         fp= rpass->rect + pixsize*offset;
426                         add_filt_fmask_pixsize(curmask, col, fp, rectx, pixsize);
427                 }
428         }
429 }
430
431 /* non-osa version */
432 static void add_passes(RenderLayer *rl, int offset, ShadeInput *shi, ShadeResult *shr)
433 {
434         RenderPass *rpass;
435         
436         for(rpass= rl->passes.first; rpass; rpass= rpass->next) {
437                 float *fp, *col= NULL, uvcol[3];
438                 int a, pixsize= 3;
439                 
440                 switch(rpass->passtype) {
441                         case SCE_PASS_RGBA:
442                                 col= shr->col;
443                                 pixsize= 4;
444                                 break;
445                         case SCE_PASS_DIFFUSE:
446                                 col= shr->diff;
447                                 break;
448                         case SCE_PASS_SPEC:
449                                 col= shr->spec;
450                                 break;
451                         case SCE_PASS_SHADOW:
452                                 col= shr->shad;
453                                 break;
454                         case SCE_PASS_AO:
455                                 col= shr->ao;
456                                 break;
457                         case SCE_PASS_REFLECT:
458                                 col= shr->refl;
459                                 break;
460                         case SCE_PASS_REFRACT:
461                                 col= shr->refr;
462                                 break;
463                         case SCE_PASS_RADIO:
464                                 col= shr->rad;
465                                 break;
466                         case SCE_PASS_NORMAL:
467                                 col= shr->nor;
468                                 break;
469                         case SCE_PASS_UV:
470                                 if(shi->totuv) {
471                                         uvcol[0]= 0.5f + 0.5f*shi->uv[shi->actuv].uv[0];
472                                         uvcol[1]= 0.5f + 0.5f*shi->uv[shi->actuv].uv[1];
473                                         uvcol[2]= 1.0f;
474                                         col= uvcol;
475                                 }
476                                 break;
477                         case SCE_PASS_VECTOR:
478                                 col= shr->winspeed;
479                                 pixsize= 4;
480                                 break;
481                         case SCE_PASS_INDEXOB:
482                                 if(shi->vlr) {
483                                         fp= rpass->rect + offset;
484                                         *fp= (float)shi->obr->ob->index;
485                                 }
486                                 break;
487                         case SCE_PASS_MIST:
488                                 fp= rpass->rect + offset;
489                                 *fp= shr->mist;
490                                 break;
491                 }
492                 if(col) {
493                         fp= rpass->rect + pixsize*offset;
494                         for(a=0; a<pixsize; a++)
495                                 fp[a]= col[a];
496                 }
497         }
498 }
499
500 /* only do sky, is default in the solid layer (shade_tile) btw */
501 static void sky_tile(RenderPart *pa, float *pass)
502 {
503         float col[4];
504         int x, y;
505         
506         if(R.r.alphamode!=R_ADDSKY)
507                 return;
508         
509         for(y=pa->disprect.ymin; y<pa->disprect.ymax; y++) {
510                 for(x=pa->disprect.xmin; x<pa->disprect.xmax; x++, pass+=4) {
511                         if(pass[3]<1.0f) {
512                                 if(pass[3]==0.0f)
513                                         shadeSkyPixel(pass, x, y);
514                                 else {
515                                         shadeSkyPixel(col, x, y);
516                                         addAlphaOverFloat(col, pass);
517                                         QUATCOPY(pass, col);
518                                 }
519                         }
520                 }
521                 
522                 if(y&1)
523                         if(R.test_break()) break; 
524         }
525 }
526
527 static void shadeDA_tile(RenderPart *pa, RenderLayer *rl)
528 {
529         RenderResult *rr= pa->result;
530         ShadeSample ssamp;
531         float *fcol, *rf, *rectf= rl->rectf;
532         long *rd, *rectdaps= pa->rectdaps;
533         int samp;
534         int x, y, seed, crop=0, offs=0, od, addpassflag;
535         
536         if(R.test_break()) return; 
537         
538         /* irregular shadowb buffer creation */
539         if(R.r.mode & R_SHADOW)
540                 ISB_create(pa, NULL);
541         
542         /* we set per pixel a fixed seed, for random AO and shadow samples */
543         seed= pa->rectx*pa->disprect.ymin;
544         
545         /* general shader info, passes */
546         shade_sample_initialize(&ssamp, pa, rl);
547         addpassflag= rl->passflag & ~(SCE_PASS_Z|SCE_PASS_COMBINED);
548
549         /* occlusion caching */
550         if(R.occlusiontree)
551                 cache_occ_samples(&R, pa, &ssamp);
552                 
553         /* filtered render, for now we assume only 1 filter size */
554         if(pa->crop) {
555                 crop= 1;
556                 rectf+= 4*(pa->rectx + 1);
557                 rectdaps+= pa->rectx + 1;
558                 offs= pa->rectx + 1;
559         }
560         
561         /* scanline updates have to be 2 lines behind */
562         rr->renrect.ymin= 0;
563         rr->renrect.ymax= -2*crop;
564         rr->renlay= rl;
565                                 
566         for(y=pa->disprect.ymin+crop; y<pa->disprect.ymax-crop; y++, rr->renrect.ymax++) {
567                 rf= rectf;
568                 rd= rectdaps;
569                 od= offs;
570                 
571                 for(x=pa->disprect.xmin+crop; x<pa->disprect.xmax-crop; x++, rd++, rf+=4, od++) {
572                         BLI_thread_srandom(pa->thread, seed++);
573                         
574                         if(*rd) {
575                                 if(shade_samples(&ssamp, (PixStr *)(*rd), x, y)) {
576                                         for(samp=0; samp<ssamp.tot; samp++) {
577                                                 
578                                                 fcol= ssamp.shr[samp].combined;
579                                                 add_filt_fmask(ssamp.shi[samp].mask, fcol, rf, pa->rectx);
580                                                 
581                                                 if(addpassflag)
582                                                         add_filt_passes(rl, ssamp.shi[samp].mask, pa->rectx, od, &ssamp.shi[samp], &ssamp.shr[samp]);
583                                         }
584                                 }
585                         }
586                 }
587                 
588                 rectf+= 4*pa->rectx;
589                 rectdaps+= pa->rectx;
590                 offs+= pa->rectx;
591                 
592                 if(y&1) if(R.test_break()) break; 
593         }
594         
595         /* disable scanline updating */
596         rr->renlay= NULL;
597         
598         if(R.r.mode & R_SHADOW)
599                 ISB_free(pa);
600
601         if(R.occlusiontree)
602                 free_occ_samples(&R, pa);
603 }
604
605 /* ************* pixel struct ******** */
606
607
608 static PixStrMain *addpsmain(ListBase *lb)
609 {
610         PixStrMain *psm;
611         
612         psm= (PixStrMain *)MEM_mallocN(sizeof(PixStrMain),"pixstrMain");
613         BLI_addtail(lb, psm);
614         
615         psm->ps= (PixStr *)MEM_mallocN(4096*sizeof(PixStr),"pixstr");
616         psm->counter= 0;
617         
618         return psm;
619 }
620
621 static void freeps(ListBase *lb)
622 {
623         PixStrMain *psm, *psmnext;
624         
625         for(psm= lb->first; psm; psm= psmnext) {
626                 psmnext= psm->next;
627                 if(psm->ps)
628                         MEM_freeN(psm->ps);
629                 MEM_freeN(psm);
630         }
631         lb->first= lb->last= NULL;
632 }
633
634 void addps(ListBase *lb, long *rd, int obi, int facenr, int z, unsigned short mask)
635 {
636         PixStrMain *psm;
637         PixStr *ps, *last= NULL;
638         
639         if(*rd) {       
640                 ps= (PixStr *)(*rd);
641                 
642                 while(ps) {
643                         if( ps->obi == obi && ps->facenr == facenr ) {
644                                 ps->mask |= mask;
645                                 return;
646                         }
647                         last= ps;
648                         ps= ps->next;
649                 }
650         }
651         
652         /* make new PS (pixel struct) */
653         psm= lb->last;
654         
655         if(psm->counter==4095)
656                 psm= addpsmain(lb);
657         
658         ps= psm->ps + psm->counter++;
659         
660         if(last) last->next= ps;
661         else *rd= (long)ps;
662         
663         ps->next= NULL;
664         ps->obi= obi;
665         ps->facenr= facenr;
666         ps->z= z;
667         ps->mask = mask;
668         ps->shadfac= 0;
669 }
670
671 static void edge_enhance_add(RenderPart *pa, float *rectf, float *arect)
672 {
673         float addcol[4];
674         int pix;
675         
676         if(arect==NULL)
677                 return;
678         
679         for(pix= pa->rectx*pa->recty; pix>0; pix--, arect++, rectf+=4) {
680                 if(*arect != 0.0f) {
681                         addcol[0]= *arect * R.r.edgeR;
682                         addcol[1]= *arect * R.r.edgeG;
683                         addcol[2]= *arect * R.r.edgeB;
684                         addcol[3]= *arect;
685                         addAlphaOverFloat(rectf, addcol);
686                 }
687         }
688 }
689
690
691 static void convert_to_key_alpha(RenderPart *pa, float *rectf)
692 {
693         int y;
694         
695         for(y= pa->rectx*pa->recty; y>0; y--, rectf+=4) {
696                 if(rectf[3] >= 1.0f);
697                 else if(rectf[3] > 0.0f) {
698                         rectf[0] /= rectf[3];
699                         rectf[1] /= rectf[3];
700                         rectf[2] /= rectf[3];
701                 }
702         }
703 }
704
705 /* adds only alpha values */
706 void edge_enhance_tile(RenderPart *pa, float *rectf)    
707 {
708         /* use zbuffer to define edges, add it to the image */
709         int y, x, col, *rz, *rz1, *rz2, *rz3;
710         int zval1, zval2, zval3;
711         float *rf;
712         
713         /* shift values in zbuffer 4 to the right (anti overflows), for filter we need multiplying with 12 max */
714         rz= pa->rectz;
715         if(rz==NULL) return;
716         
717         for(y=0; y<pa->recty; y++)
718                 for(x=0; x<pa->rectx; x++, rz++) (*rz)>>= 4;
719         
720         rz1= pa->rectz;
721         rz2= rz1+pa->rectx;
722         rz3= rz2+pa->rectx;
723         
724         rf= rectf+pa->rectx+1;
725         
726         for(y=0; y<pa->recty-2; y++) {
727                 for(x=0; x<pa->rectx-2; x++, rz1++, rz2++, rz3++, rf++) {
728                         
729                         /* prevent overflow with sky z values */
730                         zval1=   rz1[0] + 2*rz1[1] +   rz1[2];
731                         zval2=  2*rz2[0]           + 2*rz2[2];
732                         zval3=   rz3[0] + 2*rz3[1] +   rz3[2];
733                         
734                         col= ( 4*rz2[1] - (zval1 + zval2 + zval3)/3 );
735                         if(col<0) col= -col;
736                         
737                         col >>= 5;
738                         if(col > (1<<16)) col= (1<<16);
739                         else col= (R.r.edgeint*col)>>8;
740                         
741                         if(col>0) {
742                                 float fcol;
743                                 
744                                 if(col>255) fcol= 1.0f;
745                                 else fcol= (float)col/255.0f;
746                                 
747                                 if(R.osa)
748                                         *rf+= fcol/(float)R.osa;
749                                 else
750                                         *rf= fcol;
751                         }
752                 }
753                 rz1+= 2;
754                 rz2+= 2;
755                 rz3+= 2;
756                 rf+= 2;
757         }
758         
759         /* shift back zbuf values, we might need it still */
760         rz= pa->rectz;
761         for(y=0; y<pa->recty; y++)
762                 for(x=0; x<pa->rectx; x++, rz++) (*rz)<<= 4;
763         
764 }
765
766 static void reset_sky_speed(RenderPart *pa, RenderLayer *rl)
767 {
768         /* for all pixels with max speed, set to zero */
769         float *fp;
770         int a;
771         
772         fp= RE_RenderLayerGetPass(rl, SCE_PASS_VECTOR);
773         if(fp==NULL) return;
774         
775         for(a= 4*pa->rectx*pa->recty - 1; a>=0; a--)
776                 if(fp[a] == PASS_VECTOR_MAX) fp[a]= 0.0f;
777 }
778
779
780 static unsigned short *make_solid_mask(RenderPart *pa)
781
782         long *rd= pa->rectdaps;
783         unsigned short *solidmask, *sp;
784         int x;
785         
786         if(rd==NULL) return NULL;
787         
788         sp=solidmask= MEM_mallocN(sizeof(short)*pa->rectx*pa->recty, "solidmask");
789         
790         for(x=pa->rectx*pa->recty; x>0; x--, rd++, sp++) {
791                 if(*rd) {
792                         PixStr *ps= (PixStr *)*rd;
793                         
794                         *sp= ps->mask;
795                         for(ps= ps->next; ps; ps= ps->next)
796                                 *sp |= ps->mask;
797                 }
798                 else
799                         *sp= 0;
800         }
801                         
802         return solidmask;
803 }
804
805 static void addAlphaOverFloatMask(float *dest, float *source, unsigned short dmask, unsigned short smask)
806 {
807         unsigned short shared= dmask & smask;
808         float mul= 1.0 - source[3];
809         
810         if(shared) {    /* overlapping masks */
811                 
812                 /* masks differ, we make a mixture of 'add' and 'over' */
813                 if(shared!=dmask) {
814                         float shared_bits= (float)count_mask(shared);           /* alpha over */
815                         float tot_bits= (float)count_mask(smask|dmask);         /* alpha add */
816                         
817                         float add= (tot_bits - shared_bits)/tot_bits;           /* add level */
818                         mul= add + (1.0f-add)*mul;
819                 }
820         }
821         else if(dmask && smask) {
822                 /* works for premul only, of course */
823                 dest[0]+= source[0];
824                 dest[1]+= source[1];
825                 dest[2]+= source[2];
826                 dest[3]+= source[3];
827                 
828                 return;
829         }
830
831         dest[0]= (mul*dest[0]) + source[0];
832         dest[1]= (mul*dest[1]) + source[1];
833         dest[2]= (mul*dest[2]) + source[2];
834         dest[3]= (mul*dest[3]) + source[3];
835 }
836
837 typedef struct ZbufSolidData {
838         RenderLayer *rl;
839         ListBase *psmlist;
840         float *edgerect;
841 } ZbufSolidData;
842
843 void make_pixelstructs(RenderPart *pa, ZSpan *zspan, int sample, void *data)
844 {
845         ZbufSolidData *sdata= (ZbufSolidData*)data;
846         ListBase *lb= sdata->psmlist;
847         long *rd= pa->rectdaps;
848         int *ro= zspan->recto;
849         int *rp= zspan->rectp;
850         int *rz= zspan->rectz;
851         int x, y;
852         int mask= 1<<sample;
853
854         for(y=0; y<pa->recty; y++) {
855                 for(x=0; x<pa->rectx; x++, rd++, rp++, ro++) {
856                         if(*rp) {
857                                 addps(lb, rd, *ro, *rp, *(rz+x), mask);
858                         }
859                 }
860                 rz+= pa->rectx;
861         }
862
863         if(sdata->rl->layflag & SCE_LAY_EDGE) 
864                 if(R.r.mode & R_EDGE) 
865                         edge_enhance_tile(pa, sdata->edgerect);
866 }
867
868 /* main call for shading Delta Accum, for OSA */
869 /* supposed to be fully threadable! */
870 void zbufshadeDA_tile(RenderPart *pa)
871 {
872         RenderResult *rr= pa->result;
873         RenderLayer *rl;
874         ListBase psmlist= {NULL, NULL};
875         float *edgerect= NULL;
876         
877         /* allocate the necessary buffers */
878                                 /* zbuffer inits these rects */
879         pa->recto= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "recto");
880         pa->rectp= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectp");
881         pa->rectz= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectz");
882         
883         for(rl= rr->layers.first; rl; rl= rl->next) {
884
885                 /* initialize pixelstructs and edge buffer */
886                 addpsmain(&psmlist);
887                 pa->rectdaps= MEM_callocN(sizeof(long)*pa->rectx*pa->recty+4, "zbufDArectd");
888                 
889                 if(rl->layflag & SCE_LAY_EDGE) 
890                         if(R.r.mode & R_EDGE) 
891                                 edgerect= MEM_callocN(sizeof(float)*pa->rectx*pa->recty, "rectedge");
892                 
893                 /* always fill visibility */
894                 for(pa->sample=0; pa->sample<R.osa; pa->sample+=4) {
895                         ZbufSolidData sdata;
896
897                         sdata.rl= rl;
898                         sdata.psmlist= &psmlist;
899                         sdata.edgerect= edgerect;
900                         zbuffer_solid(pa, rl, make_pixelstructs, &sdata);
901                         if(R.test_break()) break; 
902                 }
903                 
904                 /* shades solid */
905                 if(rl->layflag & SCE_LAY_SOLID) 
906                         shadeDA_tile(pa, rl);
907                 
908                 /* lamphalo after solid, before ztra, looks nicest because ztra does own halo */
909                 if(R.flag & R_LAMPHALO)
910                         if(rl->layflag & SCE_LAY_HALO)
911                                 lamphalo_tile(pa, rl);
912                 
913                 /* halo before ztra, because ztra fills in zbuffer now */
914                 if(R.flag & R_HALO)
915                         if(rl->layflag & SCE_LAY_HALO)
916                                 halo_tile(pa, rl->rectf, rl->lay);
917
918                 /* transp layer */
919                 if(R.flag & R_ZTRA) {
920                         if(rl->layflag & SCE_LAY_ZTRA) {
921                                 unsigned short *ztramask, *solidmask= NULL; /* 16 bits, MAX_OSA */
922                                 
923                                 /* allocate, but not free here, for asynchronous display of this rect in main thread */
924                                 rl->acolrect= MEM_callocN(4*sizeof(float)*pa->rectx*pa->recty, "alpha layer");
925                                 
926                                 /* swap for live updates, and it is used in zbuf.c!!! */
927                                 SWAP(float *, rl->acolrect, rl->rectf);
928                                 ztramask= zbuffer_transp_shade(pa, rl, rl->rectf, &psmlist);
929                                 SWAP(float *, rl->acolrect, rl->rectf);
930                                 
931                                 /* zbuffer transp only returns ztramask if there's solid rendered */
932                                 if(ztramask)
933                                         solidmask= make_solid_mask(pa);
934
935                                 if(ztramask && solidmask) {
936                                         unsigned short *sps= solidmask, *spz= ztramask;
937                                         unsigned short fullmask= (1<<R.osa)-1;
938                                         float *fcol= rl->rectf; float *acol= rl->acolrect;
939                                         int x;
940                                         
941                                         for(x=pa->rectx*pa->recty; x>0; x--, acol+=4, fcol+=4, sps++, spz++) {
942                                                 if(*sps == fullmask)
943                                                         addAlphaOverFloat(fcol, acol);
944                                                 else
945                                                         addAlphaOverFloatMask(fcol, acol, *sps, *spz);
946                                         }
947                                 }
948                                 else {
949                                         float *fcol= rl->rectf; float *acol= rl->acolrect;
950                                         int x;
951                                         for(x=pa->rectx*pa->recty; x>0; x--, acol+=4, fcol+=4) {
952                                                 addAlphaOverFloat(fcol, acol);
953                                         }
954                                 }
955                                 if(solidmask) MEM_freeN(solidmask);
956                                 if(ztramask) MEM_freeN(ztramask);
957                         }
958                 }
959
960                 /* strand rendering */
961                 if((rl->layflag & SCE_LAY_STRAND) && R.totstrand) {
962                         float *fcol, *scol;
963                         unsigned short *strandmask, *solidmask= NULL; /* 16 bits, MAX_OSA */
964                         int x;
965                         
966                         /* allocate, but not free here, for asynchronous display of this rect in main thread */
967                         rl->scolrect= MEM_callocN(4*sizeof(float)*pa->rectx*pa->recty, "strand layer");
968
969                         /* swap for live updates, and it is used in zbuf.c!!! */
970                         SWAP(float*, rl->scolrect, rl->rectf);
971                         strandmask= zbuffer_strands_shade(&R, pa, rl, rl->rectf);
972                         SWAP(float*, rl->scolrect, rl->rectf);
973
974                         /* zbuffer strands only returns strandmask if there's solid rendered */
975                         if(strandmask)
976                                 solidmask= make_solid_mask(pa);
977                         
978                         if(strandmask && solidmask) {
979                                 unsigned short *sps= solidmask, *spz= strandmask;
980                                 unsigned short fullmask= (1<<R.osa)-1;
981
982                                 fcol= rl->rectf; scol= rl->scolrect;
983                                 for(x=pa->rectx*pa->recty; x>0; x--, scol+=4, fcol+=4, sps++, spz++) {
984                                         if(*sps == fullmask)
985                                                 addAlphaOverFloat(fcol, scol);
986                                         else
987                                                 addAlphaOverFloatMask(fcol, scol, *sps, *spz);
988                                 }
989                         }
990                         else {
991                                 fcol= rl->rectf; scol= rl->scolrect;
992                                 for(x=pa->rectx*pa->recty; x>0; x--, scol+=4, fcol+=4)
993                                         addAlphaOverFloat(fcol, scol);
994                         }
995
996                         if(solidmask) MEM_freeN(solidmask);
997                         if(strandmask) MEM_freeN(strandmask);
998                 }
999
1000                 /* sky before edge */
1001                 if(rl->layflag & SCE_LAY_SKY)
1002                         sky_tile(pa, rl->rectf);
1003
1004                 /* extra layers */
1005                 if(rl->layflag & SCE_LAY_EDGE) 
1006                         if(R.r.mode & R_EDGE) 
1007                                 edge_enhance_add(pa, rl->rectf, edgerect);
1008                 
1009                 if(rl->passflag & SCE_PASS_Z)
1010                         convert_zbuf_to_distbuf(pa, rl);
1011                 
1012                 if(rl->passflag & SCE_PASS_VECTOR)
1013                         reset_sky_speed(pa, rl);
1014                 
1015                 /* de-premul alpha */
1016                 if(R.r.alphamode & R_ALPHAKEY)
1017                         convert_to_key_alpha(pa, rl->rectf);
1018                 
1019                 /* free stuff within loop! */
1020                 MEM_freeN(pa->rectdaps); pa->rectdaps= NULL;
1021                 freeps(&psmlist);
1022                 
1023                 if(edgerect) MEM_freeN(edgerect);
1024                 edgerect= NULL;
1025         }
1026         
1027         /* free all */
1028         MEM_freeN(pa->recto); pa->recto= NULL;
1029         MEM_freeN(pa->rectp); pa->rectp= NULL;
1030         MEM_freeN(pa->rectz); pa->rectz= NULL;
1031         
1032         /* display active layer */
1033         rr->renrect.ymin=rr->renrect.ymax= 0;
1034         rr->renlay= render_get_active_layer(&R, rr);
1035 }
1036
1037
1038 /* ------------------------------------------------------------------------ */
1039
1040 /* non OSA case, full tile render */
1041 /* supposed to be fully threadable! */
1042 void zbufshade_tile(RenderPart *pa)
1043 {
1044         ShadeSample ssamp;
1045         RenderResult *rr= pa->result;
1046         RenderLayer *rl;
1047         PixStr ps;
1048         float *edgerect= NULL;
1049         int addpassflag;
1050         
1051         /* fake pixel struct, to comply to osa render */
1052         ps.next= NULL;
1053         ps.mask= 0xFFFF;
1054         
1055         /* zbuffer code clears/inits rects */
1056         pa->recto= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "recto");
1057         pa->rectp= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectp");
1058         pa->rectz= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectz");
1059
1060         for(rl= rr->layers.first; rl; rl= rl->next) {
1061                 
1062                 /* general shader info, passes */
1063                 shade_sample_initialize(&ssamp, pa, rl);
1064                 addpassflag= rl->passflag & ~(SCE_PASS_Z|SCE_PASS_COMBINED);
1065                 
1066                 zbuffer_solid(pa, rl, NULL, NULL);
1067                 
1068                 if(!R.test_break()) {   /* NOTE: this if() is not consistant */
1069                         
1070                         /* edges only for solid part, ztransp doesn't support it yet anti-aliased */
1071                         if(rl->layflag & SCE_LAY_EDGE) {
1072                                 if(R.r.mode & R_EDGE) {
1073                                         edgerect= MEM_callocN(sizeof(float)*pa->rectx*pa->recty, "rectedge");
1074                                         edge_enhance_tile(pa, edgerect);
1075                                 }
1076                         }
1077                         
1078                         /* initialize scanline updates for main thread */
1079                         rr->renrect.ymin= 0;
1080                         rr->renlay= rl;
1081                         
1082                         if(rl->layflag & SCE_LAY_SOLID) {
1083                                 float *fcol= rl->rectf;
1084                                 int *ro= pa->recto, *rp= pa->rectp, *rz= pa->rectz;
1085                                 int x, y, offs=0, seed;
1086                                 
1087                                 /* we set per pixel a fixed seed, for random AO and shadow samples */
1088                                 seed= pa->rectx*pa->disprect.ymin;
1089                                 
1090                                 /* irregular shadowb buffer creation */
1091                                 if(R.r.mode & R_SHADOW)
1092                                         ISB_create(pa, NULL);
1093
1094                                 if(R.occlusiontree)
1095                                         cache_occ_samples(&R, pa, &ssamp);
1096                                 
1097                                 for(y=pa->disprect.ymin; y<pa->disprect.ymax; y++, rr->renrect.ymax++) {
1098                                         for(x=pa->disprect.xmin; x<pa->disprect.xmax; x++, ro++, rz++, rp++, fcol+=4, offs++) {
1099                                                 /* per pixel fixed seed */
1100                                                 BLI_thread_srandom(pa->thread, seed++);
1101                                                 
1102                                                 if(*rp) {
1103                                                         ps.obi= *ro;
1104                                                         ps.facenr= *rp;
1105                                                         ps.z= *rz;
1106                                                         if(shade_samples(&ssamp, &ps, x, y)) {
1107                                                                 QUATCOPY(fcol, ssamp.shr[0].combined);
1108
1109                                                                 /* passes */
1110                                                                 if(addpassflag)
1111                                                                         add_passes(rl, offs, ssamp.shi, ssamp.shr);
1112                                                         }
1113                                                 }
1114                                         }
1115                                         if(y&1)
1116                                                 if(R.test_break()) break; 
1117                                 }
1118                                 
1119                                 if(R.occlusiontree)
1120                                         free_occ_samples(&R, pa);
1121                                 
1122                                 if(R.r.mode & R_SHADOW)
1123                                         ISB_free(pa);
1124                         }
1125                         
1126                         /* disable scanline updating */
1127                         rr->renlay= NULL;
1128                 }
1129                 
1130                 /* lamphalo after solid, before ztra, looks nicest because ztra does own halo */
1131                 if(R.flag & R_LAMPHALO)
1132                         if(rl->layflag & SCE_LAY_HALO)
1133                                 lamphalo_tile(pa, rl);
1134                 
1135                 /* halo before ztra, because ztra fills in zbuffer now */
1136                 if(R.flag & R_HALO)
1137                         if(rl->layflag & SCE_LAY_HALO)
1138                                 halo_tile(pa, rl->rectf, rl->lay);
1139                 
1140                 if(R.flag & R_ZTRA) {
1141                         if(rl->layflag & SCE_LAY_ZTRA) {
1142                                 float *fcol, *acol;
1143                                 int x;
1144                                 
1145                                 /* allocate, but not free here, for asynchronous display of this rect in main thread */
1146                                 rl->acolrect= MEM_callocN(4*sizeof(float)*pa->rectx*pa->recty, "alpha layer");
1147                                 
1148                                 /* swap for live updates */
1149                                 SWAP(float *, rl->acolrect, rl->rectf);
1150                                 zbuffer_transp_shade(pa, rl, rl->rectf, NULL);
1151                                 SWAP(float *, rl->acolrect, rl->rectf);
1152                                 
1153                                 fcol= rl->rectf; acol= rl->acolrect;
1154                                 for(x=pa->rectx*pa->recty; x>0; x--, acol+=4, fcol+=4) {
1155                                         addAlphaOverFloat(fcol, acol);
1156                                 }
1157                         }
1158                 }
1159
1160                 /* strand rendering */
1161                 if((rl->layflag & SCE_LAY_STRAND) && R.totstrand) {
1162                         float *fcol, *scol;
1163                         int x;
1164                         
1165                         /* allocate, but not free here, for asynchronous display of this rect in main thread */
1166                         rl->scolrect= MEM_callocN(4*sizeof(float)*pa->rectx*pa->recty, "strand layer");
1167
1168                         /* swap for live updates */
1169                         SWAP(float*, rl->scolrect, rl->rectf);
1170                         zbuffer_strands_shade(&R, pa, rl, rl->rectf);
1171                         SWAP(float*, rl->scolrect, rl->rectf);
1172
1173                         fcol= rl->rectf; scol= rl->scolrect;
1174                         for(x=pa->rectx*pa->recty; x>0; x--, scol+=4, fcol+=4)
1175                                 addAlphaOverFloat(fcol, scol);
1176                 }
1177                 
1178                 /* sky before edge */
1179                 if(rl->layflag & SCE_LAY_SKY)
1180                         sky_tile(pa, rl->rectf);
1181                 
1182                 if(!R.test_break()) {
1183                         if(rl->layflag & SCE_LAY_EDGE) 
1184                                 if(R.r.mode & R_EDGE)
1185                                         edge_enhance_add(pa, rl->rectf, edgerect);
1186                 }
1187                 
1188                 if(rl->passflag & SCE_PASS_Z)
1189                         convert_zbuf_to_distbuf(pa, rl);
1190                 
1191                 if(rl->passflag & SCE_PASS_VECTOR)
1192                         reset_sky_speed(pa, rl);
1193                 
1194                 /* de-premul alpha */
1195                 if(R.r.alphamode & R_ALPHAKEY)
1196                         convert_to_key_alpha(pa, rl->rectf);
1197                 
1198                 if(edgerect) MEM_freeN(edgerect);
1199                 edgerect= NULL;
1200         }
1201
1202         /* display active layer */
1203         rr->renrect.ymin=rr->renrect.ymax= 0;
1204         rr->renlay= render_get_active_layer(&R, rr);
1205         
1206         MEM_freeN(pa->recto); pa->recto= NULL;
1207         MEM_freeN(pa->rectp); pa->rectp= NULL;
1208         MEM_freeN(pa->rectz); pa->rectz= NULL;
1209 }
1210
1211 /* SSS preprocess tile render, fully threadable */
1212 typedef struct ZBufSSSHandle {
1213         RenderPart *pa;
1214         ListBase psmlist;
1215         int totps;
1216 } ZBufSSSHandle;
1217
1218 static void addps_sss(void *cb_handle, int obi, int facenr, int x, int y, int z)
1219 {
1220         ZBufSSSHandle *handle = cb_handle;
1221         RenderPart *pa= handle->pa;
1222
1223         /* extra border for filter gives double samples on part edges,
1224            don't use those */
1225         if(x<pa->crop || x>=pa->rectx-pa->crop)
1226                 return;
1227         if(y<pa->crop || y>=pa->recty-pa->crop)
1228                 return;
1229         
1230         if(pa->rectall) {
1231                 long *rs= pa->rectall + pa->rectx*y + x;
1232
1233                 addps(&handle->psmlist, rs, obi, facenr, z, 0);
1234                 handle->totps++;
1235         }
1236         if(pa->rectz) {
1237                 int *rz= pa->rectz + pa->rectx*y + x;
1238                 int *rp= pa->rectp + pa->rectx*y + x;
1239                 int *ro= pa->recto + pa->rectx*y + x;
1240
1241                 if(z < *rz) {
1242                         if(*rp == 0)
1243                                 handle->totps++;
1244                         *rz= z;
1245                         *rp= facenr;
1246                         *ro= obi;
1247                 }
1248         }
1249         if(pa->rectbackz) {
1250                 int *rz= pa->rectbackz + pa->rectx*y + x;
1251                 int *rp= pa->rectbackp + pa->rectx*y + x;
1252                 int *ro= pa->rectbacko + pa->rectx*y + x;
1253
1254                 if(z >= *rz) {
1255                         if(*rp == 0)
1256                                 handle->totps++;
1257                         *rz= z;
1258                         *rp= facenr;
1259                         *ro= obi;
1260                 }
1261         }
1262 }
1263
1264 static void shade_sample_sss(ShadeSample *ssamp, Material *mat, ObjectInstanceRen *obi, VlakRen *vlr, int quad, float x, float y, float z, float *co, float *color, float *area)
1265 {
1266         ShadeInput *shi= ssamp->shi;
1267         ShadeResult shr;
1268         float texfac, orthoarea, nor[3];
1269
1270         /* cache for shadow */
1271         shi->samplenr++;
1272         
1273         if(quad) 
1274                 shade_input_set_triangle_i(shi, obi, vlr, 0, 2, 3);
1275         else
1276                 shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
1277
1278         /* center pixel */
1279         x += 0.5f;
1280         y += 0.5f;
1281
1282         /* we estimate the area here using shi->dxco and shi->dyco. we need to
1283            enabled shi->osatex these are filled. we compute two areas, one with
1284            the normal pointed at the camera and one with the original normal, and
1285            then clamp to avoid a too large contribution from a single pixel */
1286         shi->osatex= 1;
1287
1288         VECCOPY(nor, shi->facenor);
1289         calc_view_vector(shi->facenor, x, y);
1290         Normalize(shi->facenor);
1291         shade_input_set_viewco(shi, x, y, z);
1292         orthoarea= VecLength(shi->dxco)*VecLength(shi->dyco);
1293
1294         VECCOPY(shi->facenor, nor);
1295         shade_input_set_viewco(shi, x, y, z);
1296         *area= VecLength(shi->dxco)*VecLength(shi->dyco);
1297         *area= MIN2(*area, 2.0f*orthoarea);
1298
1299         shade_input_set_uv(shi);
1300         shade_input_set_normals(shi);
1301
1302         /* we don't want flipped normals, they screw up back scattering */
1303         if(shi->flippednor)
1304                 shade_input_flip_normals(shi);
1305
1306         /* not a pretty solution, but fixes common cases */
1307         if(shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) {
1308                 VecMulf(shi->vn, -1.0f);
1309                 VecMulf(shi->vno, -1.0f);
1310         }
1311
1312         /* if nodetree, use the material that we are currently preprocessing
1313            instead of the node material */
1314         if(shi->mat->nodetree && shi->mat->use_nodes)
1315                 shi->mat= mat;
1316
1317         /* init material vars */
1318         // note, keep this synced with render_types.h
1319         memcpy(&shi->r, &shi->mat->r, 23*sizeof(float));
1320         shi->har= shi->mat->har;
1321         
1322         /* render */
1323         shade_input_set_shade_texco(shi);
1324         
1325         shade_samples_do_AO(ssamp);
1326         shade_material_loop(shi, &shr);
1327         
1328         VECCOPY(co, shi->co);
1329         VECCOPY(color, shr.combined);
1330
1331         /* texture blending */
1332         texfac= shi->mat->sss_texfac;
1333
1334         if(texfac == 0.0f) {
1335                 if(shr.col[0]!=0.0f) color[0] /= shr.col[0];
1336                 if(shr.col[1]!=0.0f) color[1] /= shr.col[1];
1337                 if(shr.col[2]!=0.0f) color[2] /= shr.col[2];
1338         }
1339         else if(texfac != 1.0f) {
1340                 if(shr.col[0]!=0.0f) color[0] *= pow(shr.col[0], texfac)/shr.col[0];
1341                 if(shr.col[1]!=0.0f) color[1] *= pow(shr.col[1], texfac)/shr.col[1];
1342                 if(shr.col[2]!=0.0f) color[2] *= pow(shr.col[2], texfac)/shr.col[2];
1343         }
1344 }
1345
1346 static void zbufshade_sss_free(RenderPart *pa)
1347 {
1348 #if 0
1349         MEM_freeN(pa->rectall); pa->rectall= NULL;
1350         freeps(&handle.psmlist);
1351 #else
1352         MEM_freeN(pa->rectz); pa->rectz= NULL;
1353         MEM_freeN(pa->rectp); pa->rectp= NULL;
1354         MEM_freeN(pa->recto); pa->recto= NULL;
1355         MEM_freeN(pa->rectbackz); pa->rectbackz= NULL;
1356         MEM_freeN(pa->rectbackp); pa->rectbackp= NULL;
1357         MEM_freeN(pa->rectbacko); pa->rectbacko= NULL;
1358 #endif
1359 }
1360
1361 void zbufshade_sss_tile(RenderPart *pa)
1362 {
1363         Render *re= &R;
1364         ShadeSample ssamp;
1365         ZBufSSSHandle handle;
1366         RenderResult *rr= pa->result;
1367         RenderLayer *rl;
1368         VlakRen *vlr;
1369         Material *mat= re->sss_mat;
1370         float (*co)[3], (*color)[3], *area, *fcol;
1371         int x, y, seed, quad, totpoint, display = !(re->r.scemode & R_PREVIEWBUTS);
1372         int *ro, *rz, *rp, *rbo, *rbz, *rbp, lay;
1373 #if 0
1374         PixStr *ps;
1375         long *rs;
1376         int z;
1377 #endif
1378
1379         /* setup pixelstr list and buffer for zbuffering */
1380         handle.pa= pa;
1381         handle.totps= 0;
1382
1383 #if 0
1384         handle.psmlist.first= handle.psmlist.last= NULL;
1385         addpsmain(&handle.psmlist);
1386
1387         pa->rectall= MEM_callocN(sizeof(long)*pa->rectx*pa->recty+4, "rectall");
1388 #else
1389         pa->recto= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "recto");
1390         pa->rectp= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectp");
1391         pa->rectz= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectz");
1392         pa->rectbacko= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectbacko");
1393         pa->rectbackp= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectbackp");
1394         pa->rectbackz= MEM_mallocN(sizeof(int)*pa->rectx*pa->recty, "rectbackz");
1395 #endif
1396
1397         /* setup shade sample with correct passes */
1398         memset(&ssamp, 0, sizeof(ssamp));
1399         shade_sample_initialize(&ssamp, pa, rr->layers.first);
1400         ssamp.tot= 1;
1401         
1402         for(rl=rr->layers.first; rl; rl=rl->next) {
1403                 ssamp.shi[0].lay |= rl->lay;
1404                 ssamp.shi[0].layflag |= rl->layflag;
1405                 ssamp.shi[0].passflag |= rl->passflag;
1406                 ssamp.shi[0].combinedflag |= ~rl->pass_xor;
1407         }
1408
1409         rl= rr->layers.first;
1410         ssamp.shi[0].passflag |= SCE_PASS_RGBA|SCE_PASS_COMBINED;
1411         ssamp.shi[0].combinedflag &= ~(SCE_PASS_SPEC);
1412         lay= ssamp.shi[0].lay;
1413
1414         /* create the pixelstrs to be used later */
1415         zbuffer_sss(pa, lay, &handle, addps_sss);
1416
1417         if(handle.totps==0) {
1418                 zbufshade_sss_free(pa);
1419                 return;
1420         }
1421         
1422         fcol= rl->rectf;
1423
1424         co= MEM_mallocN(sizeof(float)*3*handle.totps, "SSSCo");
1425         color= MEM_mallocN(sizeof(float)*3*handle.totps, "SSSColor");
1426         area= MEM_mallocN(sizeof(float)*handle.totps, "SSSArea");
1427
1428 #if 0
1429         /* create ISB (does not work currently!) */
1430         if(re->r.mode & R_SHADOW)
1431                 ISB_create(pa, NULL);
1432 #endif
1433
1434         if(display) {
1435                 /* initialize scanline updates for main thread */
1436                 rr->renrect.ymin= 0;
1437                 rr->renlay= rl;
1438         }
1439         
1440         seed= pa->rectx*pa->disprect.ymin;
1441 #if 0
1442         rs= pa->rectall;
1443 #else
1444         rz= pa->rectz;
1445         rp= pa->rectp;
1446         ro= pa->recto;
1447         rbz= pa->rectbackz;
1448         rbp= pa->rectbackp;
1449         rbo= pa->rectbacko;
1450 #endif
1451         totpoint= 0;
1452
1453         for(y=pa->disprect.ymin; y<pa->disprect.ymax; y++, rr->renrect.ymax++) {
1454                 for(x=pa->disprect.xmin; x<pa->disprect.xmax; x++, fcol+=4) {
1455                         /* per pixel fixed seed */
1456                         BLI_thread_srandom(pa->thread, seed++);
1457                         
1458 #if 0
1459                         if(rs) {
1460                                 /* for each sample in this pixel, shade it */
1461                                 for(ps=(PixStr*)*rs; ps; ps=ps->next) {
1462                                         ObjectInstanceRen *obi= &re->objectinstance[ps->obi];
1463                                         ObjectRen *obr= obi->obr;
1464                                         vlr= RE_findOrAddVlak(obr, (ps->facenr-1) & RE_QUAD_MASK);
1465                                         quad= (ps->facenr & RE_QUAD_OFFS);
1466                                         z= ps->z;
1467
1468                                         shade_sample_sss(&ssamp, mat, obi, vlr, quad, x, y, z,
1469                                                 co[totpoint], color[totpoint], &area[totpoint]);
1470
1471                                         totpoint++;
1472
1473                                         VECADD(fcol, fcol, color);
1474                                         fcol[3]= 1.0f;
1475                                 }
1476
1477                                 rs++;
1478                         }
1479 #else
1480                         if(rp) {
1481                                 if(*rp != 0) {
1482                                         ObjectInstanceRen *obi= &re->objectinstance[*ro];
1483                                         ObjectRen *obr= obi->obr;
1484
1485                                         /* shade front */
1486                                         vlr= RE_findOrAddVlak(obr, (*rp-1) & RE_QUAD_MASK);
1487                                         quad= ((*rp) & RE_QUAD_OFFS);
1488
1489                                         shade_sample_sss(&ssamp, mat, obi, vlr, quad, x, y, *rz,
1490                                                 co[totpoint], color[totpoint], &area[totpoint]);
1491                                         
1492                                         VECADD(fcol, fcol, color[totpoint]);
1493                                         fcol[3]= 1.0f;
1494                                         totpoint++;
1495                                 }
1496
1497                                 rp++; rz++; ro++;
1498                         }
1499
1500                         if(rbp) {
1501                                 if(*rbp != 0 && !(*rbp == *(rp-1) && *rbo == *(ro-1))) {
1502                                         ObjectInstanceRen *obi= &re->objectinstance[*rbo];
1503                                         ObjectRen *obr= obi->obr;
1504
1505                                         /* shade back */
1506                                         vlr= RE_findOrAddVlak(obr, (*rbp-1) & RE_QUAD_MASK);
1507                                         quad= ((*rbp) & RE_QUAD_OFFS);
1508
1509                                         shade_sample_sss(&ssamp, mat, obi, vlr, quad, x, y, *rbz,
1510                                                 co[totpoint], color[totpoint], &area[totpoint]);
1511                                         
1512                                         /* to indicate this is a back sample */
1513                                         area[totpoint]= -area[totpoint];
1514
1515                                         VECADD(fcol, fcol, color[totpoint]);
1516                                         fcol[3]= 1.0f;
1517                                         totpoint++;
1518                                 }
1519
1520                                 rbz++; rbp++; rbo++;
1521                         }
1522 #endif
1523                 }
1524
1525                 if(y&1)
1526                         if(re->test_break()) break; 
1527         }
1528
1529         /* note: after adding we do not free these arrays, sss keeps them */
1530         if(totpoint > 0) {
1531                 sss_add_points(re, co, color, area, totpoint);
1532         }
1533         else {
1534                 MEM_freeN(co);
1535                 MEM_freeN(color);
1536                 MEM_freeN(area);
1537         }
1538         
1539 #if 0
1540         if(re->r.mode & R_SHADOW)
1541                 ISB_free(pa);
1542 #endif
1543                 
1544         if(display) {
1545                 /* display active layer */
1546                 rr->renrect.ymin=rr->renrect.ymax= 0;
1547                 rr->renlay= render_get_active_layer(&R, rr);
1548         }
1549         
1550         zbufshade_sss_free(pa);
1551 }
1552
1553 /* ------------------------------------------------------------------------ */
1554
1555 static void renderhalo_post(RenderResult *rr, float *rectf, HaloRen *har)       /* postprocess version */
1556 {
1557         float dist, xsq, ysq, xn, yn, colf[4], *rectft, *rtf;
1558         float haloxs, haloys;
1559         int minx, maxx, miny, maxy, x, y;
1560
1561         /* calculate the disprect mapped coordinate for halo. note: rectx is disprect corrected */
1562         haloxs= har->xs - R.disprect.xmin;
1563         haloys= har->ys - R.disprect.ymin;
1564         
1565         har->miny= miny= haloys - har->rad/R.ycor;
1566         har->maxy= maxy= haloys + har->rad/R.ycor;
1567         
1568         if(maxy<0);
1569         else if(rr->recty<miny);
1570         else {
1571                 minx= floor(haloxs-har->rad);
1572                 maxx= ceil(haloxs+har->rad);
1573                         
1574                 if(maxx<0);
1575                 else if(rr->rectx<minx);
1576                 else {
1577                 
1578                         if(minx<0) minx= 0;
1579                         if(maxx>=rr->rectx) maxx= rr->rectx-1;
1580                         if(miny<0) miny= 0;
1581                         if(maxy>rr->recty) maxy= rr->recty;
1582         
1583                         rectft= rectf+ 4*rr->rectx*miny;
1584
1585                         for(y=miny; y<maxy; y++) {
1586         
1587                                 rtf= rectft+4*minx;
1588                                 
1589                                 yn= (y - haloys)*R.ycor;
1590                                 ysq= yn*yn;
1591                                 
1592                                 for(x=minx; x<=maxx; x++) {
1593                                         xn= x - haloxs;
1594                                         xsq= xn*xn;
1595                                         dist= xsq+ysq;
1596                                         if(dist<har->radsq) {
1597                                                 
1598                                                 shadeHaloFloat(har, colf, 0x7FFFFF, dist, xn, yn, har->flarec);
1599                                                 addalphaAddfacFloat(rtf, colf, har->add);
1600                                         }
1601                                         rtf+=4;
1602                                 }
1603         
1604                                 rectft+= 4*rr->rectx;
1605                                 
1606                                 if(R.test_break()) break; 
1607                         }
1608                 }
1609         }
1610
1611 /* ------------------------------------------------------------------------ */
1612
1613 static void renderflare(RenderResult *rr, float *rectf, HaloRen *har)
1614 {
1615         extern float hashvectf[];
1616         HaloRen fla;
1617         Material *ma;
1618         float *rc, rad, alfa, visifac, vec[3];
1619         int b, type;
1620         
1621         fla= *har;
1622         fla.linec= fla.ringc= fla.flarec= 0;
1623         
1624         rad= har->rad;
1625         alfa= har->alfa;
1626         
1627         visifac= R.ycor*(har->pixels);
1628         /* all radials added / r^3  == 1.0f! */
1629         visifac /= (har->rad*har->rad*har->rad);
1630         visifac*= visifac;
1631
1632         ma= har->mat;
1633         
1634         /* first halo: just do */
1635         
1636         har->rad= rad*ma->flaresize*visifac;
1637         har->radsq= har->rad*har->rad;
1638         har->zs= fla.zs= 0;
1639         
1640         har->alfa= alfa*visifac;
1641
1642         renderhalo_post(rr, rectf, har);
1643         
1644         /* next halo's: the flares */
1645         rc= hashvectf + ma->seed2;
1646         
1647         for(b=1; b<har->flarec; b++) {
1648                 
1649                 fla.r= fabs(rc[0]);
1650                 fla.g= fabs(rc[1]);
1651                 fla.b= fabs(rc[2]);
1652                 fla.alfa= ma->flareboost*fabs(alfa*visifac*rc[3]);
1653                 fla.hard= 20.0f + fabs(70*rc[7]);
1654                 fla.tex= 0;
1655                 
1656                 type= (int)(fabs(3.9*rc[6]));
1657
1658                 fla.rad= ma->subsize*sqrt(fabs(2.0f*har->rad*rc[4]));
1659                 
1660                 if(type==3) {
1661                         fla.rad*= 3.0f;
1662                         fla.rad+= R.rectx/10;
1663                 }
1664                 
1665                 fla.radsq= fla.rad*fla.rad;
1666                 
1667                 vec[0]= 1.4*rc[5]*(har->xs-R.winx/2);
1668                 vec[1]= 1.4*rc[5]*(har->ys-R.winy/2);
1669                 vec[2]= 32.0f*sqrt(vec[0]*vec[0] + vec[1]*vec[1] + 1.0f);
1670                 
1671                 fla.xs= R.winx/2 + vec[0] + (1.2+rc[8])*R.rectx*vec[0]/vec[2];
1672                 fla.ys= R.winy/2 + vec[1] + (1.2+rc[8])*R.rectx*vec[1]/vec[2];
1673
1674                 if(R.flag & R_SEC_FIELD) {
1675                         if(R.r.mode & R_ODDFIELD) fla.ys += 0.5;
1676                         else fla.ys -= 0.5;
1677                 }
1678                 if(type & 1) fla.type= HA_FLARECIRC;
1679                 else fla.type= 0;
1680                 renderhalo_post(rr, rectf, &fla);
1681
1682                 fla.alfa*= 0.5;
1683                 if(type & 2) fla.type= HA_FLARECIRC;
1684                 else fla.type= 0;
1685                 renderhalo_post(rr, rectf, &fla);
1686                 
1687                 rc+= 7;
1688         }
1689 }
1690
1691 /* needs recode... integrate this better! */
1692 void add_halo_flare(Render *re)
1693 {
1694         RenderResult *rr= re->result;
1695         RenderLayer *rl;
1696         HaloRen *har;
1697         int a, mode, do_draw=0;
1698         
1699         /* for now, we get the first renderlayer in list with halos set */
1700         for(rl= rr->layers.first; rl; rl= rl->next)
1701                 if(rl->layflag & SCE_LAY_HALO)
1702                         break;
1703
1704         if(rl==NULL || rl->rectf==NULL)
1705                 return;
1706         
1707         mode= R.r.mode;
1708         R.r.mode &= ~R_PANORAMA;
1709         
1710         project_renderdata(&R, projectverto, 0, 0, 0);
1711         
1712         for(a=0; a<R.tothalo; a++) {
1713                 har= R.sortedhalos[a];
1714                 
1715                 if(har->flarec) {
1716                         do_draw= 1;
1717                         renderflare(rr, rl->rectf, har);
1718                 }
1719         }
1720
1721         if(do_draw) {
1722                 /* weak... the display callback wants an active renderlayer pointer... */
1723                 rr->renlay= rl;
1724                 re->display_draw(rr, NULL);
1725         }
1726         
1727         R.r.mode= mode; 
1728 }
1729
1730 /* ************************* used for shaded view ************************ */
1731
1732 /* if *re, then initialize, otherwise execute */
1733 void RE_shade_external(Render *re, ShadeInput *shi, ShadeResult *shr)
1734 {
1735         static VlakRen vlr;
1736         static ObjectRen obr;
1737         static ObjectInstanceRen obi;
1738         
1739         /* init */
1740         if(re) {
1741                 R= *re;
1742                 
1743                 /* fake render face */
1744                 memset(&vlr, 0, sizeof(VlakRen));
1745                 memset(&obr, 0, sizeof(ObjectRen));
1746                 memset(&obi, 0, sizeof(ObjectInstanceRen));
1747                 obr.lay= -1;
1748                 obi.obr= &obr;
1749                 
1750                 return;
1751         }
1752         shi->vlr= &vlr;
1753         shi->obr= &obr;
1754         shi->obi= &obi;
1755         
1756         if(shi->mat->nodetree && shi->mat->use_nodes)
1757                 ntreeShaderExecTree(shi->mat->nodetree, shi, shr);
1758         else {
1759                 /* copy all relevant material vars, note, keep this synced with render_types.h */
1760                 memcpy(&shi->r, &shi->mat->r, 23*sizeof(float));
1761                 shi->har= shi->mat->har;
1762                 
1763                 shade_material_loop(shi, shr);
1764         }
1765 }
1766
1767 /* ************************* bake ************************ */
1768
1769 #define FTOCHAR(val) val<=0.0f?0: (val>=1.0f?255: (char)(255.0f*val))
1770
1771 typedef struct BakeShade {
1772         ShadeSample ssamp;
1773         ObjectInstanceRen *obi;
1774         VlakRen *vlr;
1775         
1776         ZSpan *zspan;
1777         Image *ima;
1778         ImBuf *ibuf;
1779         
1780         int rectx, recty, quad, type, vdone, ready;
1781
1782         float dir[3];
1783         Object *actob;
1784         
1785         unsigned int *rect;
1786         float *rect_float;
1787 } BakeShade;
1788
1789 static void bake_set_shade_input(ObjectInstanceRen *obi, VlakRen *vlr, ShadeInput *shi, int quad, int isect, int x, int y, float u, float v)
1790 {
1791         if(isect) {
1792                 /* raytrace intersection with different u,v than scanconvert */
1793                 if(vlr->v4) {
1794                         if(quad)
1795                                 shade_input_set_triangle_i(shi, obi, vlr, 2, 1, 3);
1796                         else
1797                                 shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 3);
1798                 }
1799                 else
1800                         shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
1801         }
1802         else {
1803                 /* regular scanconvert */
1804                 if(quad) 
1805                         shade_input_set_triangle_i(shi, obi, vlr, 0, 2, 3);
1806                 else
1807                         shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
1808         }
1809                 
1810         /* set up view vector */
1811         VECCOPY(shi->view, shi->co);
1812         Normalize(shi->view);
1813         
1814         /* cache for shadow */
1815         shi->samplenr++;
1816         
1817         shi->u= -u;
1818         shi->v= -v;
1819         shi->xs= x;
1820         shi->ys= y;
1821         
1822         shade_input_set_normals(shi);
1823
1824         /* no normal flip */
1825         if(shi->flippednor)
1826                 shade_input_flip_normals(shi);
1827 }
1828
1829 static void bake_shade(void *handle, Object *ob, ShadeInput *shi, int quad, int x, int y, float u, float v, float *tvn, float *ttang)
1830 {
1831         BakeShade *bs= handle;
1832         ShadeSample *ssamp= &bs->ssamp;
1833         ShadeResult shr;
1834         VlakRen *vlr= shi->vlr;
1835         
1836         /* init material vars */
1837         memcpy(&shi->r, &shi->mat->r, 23*sizeof(float));        // note, keep this synced with render_types.h
1838         shi->har= shi->mat->har;
1839         
1840         if(bs->type==RE_BAKE_AO) {
1841                 ambient_occlusion(shi);
1842                 ambient_occlusion_to_diffuse(shi, shr.combined);
1843         }
1844         else {
1845                 shade_input_set_shade_texco(shi);
1846                 
1847                 shade_samples_do_AO(ssamp);
1848                 
1849                 if(shi->mat->nodetree && shi->mat->use_nodes) {
1850                         ntreeShaderExecTree(shi->mat->nodetree, shi, &shr);
1851                         shi->mat= vlr->mat;             /* shi->mat is being set in nodetree */
1852                 }
1853                 else
1854                         shade_material_loop(shi, &shr);
1855                 
1856                 if(bs->type==RE_BAKE_NORMALS) {
1857                         float nor[3];
1858
1859                         VECCOPY(nor, shi->vn);
1860
1861                         if(R.r.bake_normal_space == R_BAKE_SPACE_CAMERA);
1862                         else if(R.r.bake_normal_space == R_BAKE_SPACE_TANGENT) {
1863                                 float mat[3][3], imat[3][3];
1864
1865                                 /* bitangent */
1866                                 if(tvn && ttang) {
1867                                         VECCOPY(mat[0], ttang);
1868                                         Crossf(mat[1], tvn, ttang);
1869                                         VECCOPY(mat[2], tvn);
1870                                 }
1871                                 else {
1872                                         VECCOPY(mat[0], shi->tang);
1873                                         Crossf(mat[1], shi->vn, shi->tang);
1874                                         VECCOPY(mat[2], shi->vn);
1875                                 }
1876
1877                                 Mat3Inv(imat, mat);
1878                                 Mat3MulVecfl(imat, nor);
1879                         }
1880                         else if(R.r.bake_normal_space == R_BAKE_SPACE_OBJECT)
1881                                 Mat4Mul3Vecfl(ob->imat, nor); /* ob->imat includes viewinv! */
1882                         else if(R.r.bake_normal_space == R_BAKE_SPACE_WORLD)
1883                                 Mat4Mul3Vecfl(R.viewinv, nor);
1884
1885                         Normalize(nor); /* in case object has scaling */
1886
1887                         shr.combined[0]= nor[0]/2.0f + 0.5f;
1888                         shr.combined[1]= 0.5f - nor[1]/2.0f;
1889                         shr.combined[2]= nor[2]/2.0f + 0.5f;
1890                 }
1891                 else if(bs->type==RE_BAKE_TEXTURE) {
1892                         shr.combined[0]= shi->r;
1893                         shr.combined[1]= shi->g;
1894                         shr.combined[2]= shi->b;
1895                 }
1896         }
1897         
1898         if(bs->rect) {
1899                 char *col= (char *)(bs->rect + bs->rectx*y + x);
1900                 col[0]= FTOCHAR(shr.combined[0]);
1901                 col[1]= FTOCHAR(shr.combined[1]);
1902                 col[2]= FTOCHAR(shr.combined[2]);
1903                 col[3]= 255;
1904         }
1905         else {
1906                 float *col= bs->rect_float + 4*(bs->rectx*y + x);
1907                 VECCOPY(col, shr.combined);
1908                 col[3]= 1.0f;
1909         }
1910 }
1911
1912 static void bake_displacement(void *handle, ShadeInput *shi, Isect *isec, int dir, int x, int y)
1913 {
1914         BakeShade *bs= handle;
1915         float disp;
1916         
1917         disp = 0.5 + (isec->labda*VecLength(isec->vec) * -dir);
1918         
1919         if(bs->rect_float) {
1920                 float *col= bs->rect_float + 4*(bs->rectx*y + x);
1921                 col[0] = col[1] = col[2] = disp;
1922                 col[3]= 1.0f;
1923         } else {        
1924                 char *col= (char *)(bs->rect + bs->rectx*y + x);
1925                 col[0]= FTOCHAR(disp);
1926                 col[1]= FTOCHAR(disp);
1927                 col[2]= FTOCHAR(disp);
1928                 col[3]= 255;
1929         }
1930 }
1931
1932 static int bake_check_intersect(Isect *is, int ob, RayFace *face)
1933 {
1934         BakeShade *bs = (BakeShade*)is->userdata;
1935         
1936         /* no direction checking for now, doesn't always improve the result
1937          * (INPR(shi->facenor, bs->dir) > 0.0f); */
1938
1939         return (R.objectinstance[ob].obr->ob != bs->actob);
1940 }
1941
1942 static int bake_intersect_tree(RayTree* raytree, Isect* isect, float *dir, float sign, float *hitco)
1943 {
1944         float maxdist;
1945         int hit;
1946
1947         /* might be useful to make a user setting for maxsize*/
1948         if(R.r.bake_maxdist > 0.0f)
1949                 maxdist= R.r.bake_maxdist;
1950         else
1951                 maxdist= RE_ray_tree_max_size(R.raytree);
1952
1953         isect->end[0] = isect->start[0] + dir[0]*maxdist*sign;
1954         isect->end[1] = isect->start[1] + dir[1]*maxdist*sign;
1955         isect->end[2] = isect->start[2] + dir[2]*maxdist*sign;
1956
1957         hit = RE_ray_tree_intersect_check(R.raytree, isect, bake_check_intersect);
1958         if(hit) {
1959                 hitco[0] = isect->start[0] + isect->labda*isect->vec[0];
1960                 hitco[1] = isect->start[1] + isect->labda*isect->vec[1];
1961                 hitco[2] = isect->start[2] + isect->labda*isect->vec[2];
1962         }
1963
1964         return hit;
1965 }
1966
1967 static void do_bake_shade(void *handle, int x, int y, float u, float v)
1968 {
1969         BakeShade *bs= handle;
1970         VlakRen *vlr= bs->vlr;
1971         ObjectInstanceRen *obi= bs->obi;
1972         Object *ob= obi->obr->ob;
1973         float l, *v1, *v2, *v3, tvn[3], ttang[3];
1974         int quad;
1975         ShadeSample *ssamp= &bs->ssamp;
1976         ShadeInput *shi= ssamp->shi;
1977         
1978         /* fast threadsafe break test */
1979         if(R.test_break())
1980                 return;
1981         
1982         /* setup render coordinates */
1983         if(bs->quad) {
1984                 v1= vlr->v1->co;
1985                 v2= vlr->v3->co;
1986                 v3= vlr->v4->co;
1987         }
1988         else {
1989                 v1= vlr->v1->co;
1990                 v2= vlr->v2->co;
1991                 v3= vlr->v3->co;
1992         }
1993         
1994         /* renderco */
1995         l= 1.0f-u-v;
1996         
1997         shi->co[0]= l*v3[0]+u*v1[0]+v*v2[0];
1998         shi->co[1]= l*v3[1]+u*v1[1]+v*v2[1];
1999         shi->co[2]= l*v3[2]+u*v1[2]+v*v2[2];
2000         
2001         if(obi->flag & R_TRANSFORMED)
2002                 Mat4MulVecfl(obi->mat, shi->co);
2003         
2004         quad= bs->quad;
2005         bake_set_shade_input(obi, vlr, shi, quad, 0, x, y, u, v);
2006
2007         if(bs->type==RE_BAKE_NORMALS && R.r.bake_normal_space==R_BAKE_SPACE_TANGENT) {
2008                 shade_input_set_shade_texco(shi);
2009                 VECCOPY(tvn, shi->vn);
2010                 VECCOPY(ttang, shi->tang);
2011         }
2012
2013         /* if we are doing selected to active baking, find point on other face */
2014         if(bs->actob) {
2015                 Isect isec, minisec;
2016                 float co[3], minco[3];
2017                 int hit, sign, dir=1;
2018                 
2019                 /* intersect with ray going forward and backward*/
2020                 hit= 0;
2021                 memset(&minisec, 0, sizeof(minisec));
2022                 minco[0]= minco[1]= minco[2]= 0.0f;
2023                 
2024                 VECCOPY(bs->dir, shi->vn);
2025                 
2026                 for(sign=-1; sign<=1; sign+=2) {
2027                         memset(&isec, 0, sizeof(isec));
2028                         VECCOPY(isec.start, shi->co);
2029                         isec.mode= RE_RAY_MIRROR;
2030                         isec.faceorig= (RayFace*)vlr;
2031                         isec.oborig= RAY_OBJECT_SET(&R, obi);
2032                         isec.userdata= bs;
2033                         
2034                         if(bake_intersect_tree(R.raytree, &isec, shi->vn, sign, co)) {
2035                                 if(!hit || VecLenf(shi->co, co) < VecLenf(shi->co, minco)) {
2036                                         minisec= isec;
2037                                         VECCOPY(minco, co);
2038                                         hit= 1;
2039                                         dir = sign;
2040                                 }
2041                         }
2042                 }
2043
2044                 if (hit && bs->type==RE_BAKE_DISPLACEMENT) {;
2045                         bake_displacement(handle, shi, &minisec, dir, x, y);
2046                         return;
2047                 }
2048
2049                 /* if hit, we shade from the new point, otherwise from point one starting face */
2050                 if(hit) {
2051                         vlr= (VlakRen*)minisec.face;
2052                         obi= RAY_OBJECT_GET(&R, minisec.ob);
2053                         quad= (minisec.isect == 2);
2054                         VECCOPY(shi->co, minco);
2055                         
2056                         u= -minisec.u;
2057                         v= -minisec.v;
2058                         bake_set_shade_input(obi, vlr, shi, quad, 1, x, y, u, v);
2059                 }
2060         }
2061
2062         if(bs->type==RE_BAKE_NORMALS && R.r.bake_normal_space==R_BAKE_SPACE_TANGENT)
2063                 bake_shade(handle, ob, shi, quad, x, y, u, v, tvn, ttang);
2064         else
2065                 bake_shade(handle, ob, shi, quad, x, y, u, v, 0, 0);
2066 }
2067
2068 static int get_next_bake_face(BakeShade *bs)
2069 {
2070         ObjectRen *obr;
2071         VlakRen *vlr;
2072         MTFace *tface;
2073         static int v= 0, vdone= 0;
2074         static ObjectInstanceRen *obi= NULL;
2075         
2076         if(bs==NULL) {
2077                 vlr= NULL;
2078                 v= vdone= 0;
2079                 obi= R.instancetable.first;
2080                 return 0;
2081         }
2082         
2083         BLI_lock_thread(LOCK_CUSTOM1);  
2084
2085         for(; obi; obi=obi->next, v=0) {
2086                 obr= obi->obr;
2087
2088                 for(; v<obr->totvlak; v++) {
2089                         vlr= RE_findOrAddVlak(obr, v);
2090
2091                         if((bs->actob && bs->actob == obr->ob) || (!bs->actob && (obr->ob->flag & SELECT))) {
2092                                 tface= RE_vlakren_get_tface(obr, vlr, obr->actmtface, NULL, 0);
2093
2094                                 if(tface && tface->tpage) {
2095                                         Image *ima= tface->tpage;
2096                                         ImBuf *ibuf= BKE_image_get_ibuf(ima, NULL);
2097                                         float vec[4]= {0.0f, 0.0f, 0.0f, 0.0f};
2098                                         
2099                                         if(ibuf==NULL)
2100                                                 continue;
2101                                         
2102                                         if(ibuf->rect==NULL && ibuf->rect_float==NULL)
2103                                                 continue;
2104                                         
2105                                         if(ibuf->rect_float && !(ibuf->channels==0 || ibuf->channels==4))
2106                                                 continue;
2107                                         
2108                                         /* find the image for the first time? */
2109                                         if(ima->id.flag & LIB_DOIT) {
2110                                                 ima->id.flag &= ~LIB_DOIT;
2111                                                 
2112                                                 /* we either fill in float or char, this ensures things go fine */
2113                                                 if(ibuf->rect_float)
2114                                                         imb_freerectImBuf(ibuf);
2115                                                 /* clear image */
2116                                                 if(R.r.bake_flag & R_BAKE_CLEAR)
2117                                                         IMB_rectfill(ibuf, vec);
2118                                         
2119                                                 /* might be read by UI to set active image for display */
2120                                                 R.bakebuf= ima;
2121                                         }                               
2122                                         
2123                                         bs->obi= obi;
2124                                         bs->vlr= vlr;
2125                                         
2126                                         bs->vdone++;    /* only for error message if nothing was rendered */
2127                                         v++;
2128                                         
2129                                         BLI_unlock_thread(LOCK_CUSTOM1);
2130                                         return 1;
2131                                 }
2132                         }
2133                 }
2134         }
2135         
2136         BLI_unlock_thread(LOCK_CUSTOM1);
2137         return 0;
2138 }
2139
2140 /* already have tested for tface and ima and zspan */
2141 static void shade_tface(BakeShade *bs)
2142 {
2143         VlakRen *vlr= bs->vlr;
2144         ObjectInstanceRen *obi= bs->obi;
2145         ObjectRen *obr= obi->obr;
2146         MTFace *tface= RE_vlakren_get_tface(obr, vlr, obr->actmtface, NULL, 0);
2147         Image *ima= tface->tpage;
2148         float vec[4][2];
2149         int a, i1, i2, i3;
2150         
2151         /* check valid zspan */
2152         if(ima!=bs->ima) {
2153                 bs->ima= ima;
2154                 bs->ibuf= BKE_image_get_ibuf(ima, NULL);
2155                 /* note, these calls only free/fill contents of zspan struct, not zspan itself */
2156                 zbuf_free_span(bs->zspan);
2157                 zbuf_alloc_span(bs->zspan, bs->ibuf->x, bs->ibuf->y, R.clipcrop);
2158         }                               
2159         
2160         bs->rectx= bs->ibuf->x;
2161         bs->recty= bs->ibuf->y;
2162         bs->rect= bs->ibuf->rect;
2163         bs->rect_float= bs->ibuf->rect_float;
2164         bs->quad= 0;
2165         
2166         /* get pixel level vertex coordinates */
2167         for(a=0; a<4; a++) {
2168                 vec[a][0]= tface->uv[a][0]*(float)bs->rectx - 0.5f;
2169                 vec[a][1]= tface->uv[a][1]*(float)bs->recty - 0.5f;
2170         }
2171         
2172         /* UV indices have to be corrected for possible quad->tria splits */
2173         i1= 0; i2= 1; i3= 2;
2174         vlr_set_uv_indices(vlr, &i1, &i2, &i3);
2175         zspan_scanconvert(bs->zspan, bs, vec[i1], vec[i2], vec[i3], do_bake_shade);
2176         
2177         if(vlr->v4) {
2178                 bs->quad= 1;
2179                 zspan_scanconvert(bs->zspan, bs, vec[0], vec[2], vec[3], do_bake_shade);
2180         }
2181 }
2182
2183 static void *do_bake_thread(void *bs_v)
2184 {
2185         BakeShade *bs= bs_v;
2186         
2187         while(get_next_bake_face(bs)) {
2188                 shade_tface(bs);
2189                 
2190                 /* fast threadsafe break test */
2191                 if(R.test_break())
2192                         break;
2193         }
2194         bs->ready= 1;
2195         
2196         return NULL;
2197 }
2198
2199 /* using object selection tags, the faces with UV maps get baked */
2200 /* render should have been setup */
2201 /* returns 0 if nothing was handled */
2202 int RE_bake_shade_all_selected(Render *re, int type, Object *actob)
2203 {
2204         BakeShade handles[BLENDER_MAX_THREADS];
2205         ListBase threads;
2206         Image *ima;
2207         int a, vdone=0;
2208
2209         /* initialize render global */
2210         R= *re;
2211         R.bakebuf= NULL;
2212         
2213         /* initialize static vars */
2214         get_next_bake_face(NULL);
2215         
2216         /* baker uses this flag to detect if image was initialized */
2217         for(ima= G.main->image.first; ima; ima= ima->id.next)
2218                 ima->id.flag |= LIB_DOIT;
2219         
2220         BLI_init_threads(&threads, do_bake_thread, re->r.threads);
2221
2222         /* get the threads running */
2223         for(a=0; a<re->r.threads; a++) {
2224                 /* set defaults in handles */
2225                 memset(&handles[a], 0, sizeof(BakeShade));
2226                 
2227                 handles[a].ssamp.shi[0].lay= re->scene->lay;
2228                 handles[a].ssamp.shi[0].passflag= SCE_PASS_COMBINED;
2229                 handles[a].ssamp.shi[0].combinedflag= ~(SCE_PASS_SPEC);
2230                 handles[a].ssamp.shi[0].thread= a;
2231                 handles[a].ssamp.tot= 1;
2232                 
2233                 handles[a].type= type;
2234                 handles[a].actob= actob;
2235                 handles[a].zspan= MEM_callocN(sizeof(ZSpan), "zspan for bake");
2236                 
2237                 BLI_insert_thread(&threads, &handles[a]);
2238         }
2239         
2240         /* wait for everything to be done */
2241         a= 0;
2242         while(a!=re->r.threads) {
2243                 
2244                 PIL_sleep_ms(50);
2245
2246                 for(a=0; a<re->r.threads; a++)
2247                         if(handles[a].ready==0)
2248                                 break;
2249         }
2250         
2251         /* filter and refresh images */
2252         for(ima= G.main->image.first; ima; ima= ima->id.next) {
2253                 if((ima->id.flag & LIB_DOIT)==0) {
2254                         ImBuf *ibuf= BKE_image_get_ibuf(ima, NULL);
2255                         for(a=0; a<re->r.bake_filter; a++)
2256                                 IMB_filter_extend(ibuf);
2257                         ibuf->userflags |= IB_BITMAPDIRTY;
2258                         
2259                         if (ibuf->rect_float) IMB_rect_from_float(ibuf);
2260                 }
2261         }
2262         
2263         /* calculate return value */
2264         for(a=0; a<re->r.threads; a++) {
2265                 vdone+= handles[a].vdone;
2266                 
2267                 zbuf_free_span(handles[a].zspan);
2268                 MEM_freeN(handles[a].zspan);
2269         }
2270         
2271         BLI_end_threads(&threads);
2272         return vdone;
2273 }
2274
2275 struct Image *RE_bake_shade_get_image(void)
2276 {
2277         return R.bakebuf;
2278 }
2279