1b82da372ec76005e6d1adf504ccec5805601931
[blender.git] / source / blender / nodes / composite / nodes / node_composite_defocus.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version. 
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2006 Blender Foundation.
19  * All rights reserved.
20  *
21  * The Original Code is: all of this file.
22  *
23  * Contributor(s): none yet.
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 /** \file blender/nodes/composite/nodes/node_composite_defocus.c
29  *  \ingroup cmpnodes
30  */
31
32
33 #include "node_composite_util.h"
34
35 /* ************ qdn: Defocus node ****************** */
36 static bNodeSocketTemplate cmp_node_defocus_in[]= {
37         {       SOCK_RGBA, 1, N_("Image"),                      1.0f, 1.0f, 1.0f, 1.0f},
38         {       SOCK_FLOAT, 1, N_("Z"),                 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, PROP_FACTOR},
39         {       -1, 0, ""       }
40 };
41 static bNodeSocketTemplate cmp_node_defocus_out[]= {
42         {       SOCK_RGBA, 0, N_("Image")},
43         {       -1, 0, ""       }
44 };
45
46
47 // line coefs for point sampling & scancon. data.
48 typedef struct BokehCoeffs {
49         float x0, y0, dx, dy;
50         float ls_x, ls_y;
51         float min_x, min_y, max_x, max_y;
52 } BokehCoeffs;
53
54 // returns array of BokehCoeffs
55 // returns length of array in 'len_bkh',
56 // radius squared of inscribed disk in 'inradsq', needed in getWeight() test,
57 // BKH[8] is the data returned for the bokeh shape & bkh_b[4] is it's 2d bound
58 static void makeBokeh(char bktype, char ro, int* len_bkh, float* inradsq, BokehCoeffs BKH[8], float bkh_b[4])
59 {
60         float x0, x1, y0, y1, dx, dy, iDxy;
61         /* ro now is in radians. */
62         float w = MAX2(1e-6f, ro);  // never reported stangely enough, but a zero offset causes missing center line...
63         float wi = DEG2RADF(360.f/bktype);
64         int i, ov, nv;
65         
66         // bktype must be at least 3 & <= 8
67         bktype = (bktype<3) ? 3 : ((bktype>8) ? 8 : bktype);
68         *len_bkh = bktype;
69         *inradsq = -1.f;
70
71         for (i=0; i<(*len_bkh); i++) {
72                 x0 = cos(w);
73                 y0 = sin(w);
74                 w += wi;
75                 x1 = cos(w);
76                 y1 = sin(w);
77                 if ((*inradsq)<0.f) {
78                         // radius squared of inscribed disk
79                         float idx=(x0+x1)*0.5f, idy=(y0+y1)*0.5f;
80                         *inradsq = idx*idx + idy*idy;
81                 }
82                 BKH[i].x0 = x0;
83                 BKH[i].y0 = y0;
84                 dx = x1-x0, dy = y1-y0;
85                 iDxy = 1.f / sqrtf(dx*dx + dy*dy);
86                 dx *= iDxy;
87                 dy *= iDxy;
88                 BKH[i].dx = dx;
89                 BKH[i].dy = dy;
90         }
91
92         // precalc scanconversion data
93         // bokeh bound, not transformed, for scanconvert
94         bkh_b[0] = bkh_b[2] = 1e10f;    // xmin/ymin
95         bkh_b[1] = bkh_b[3] = -1e10f;   // xmax/ymax
96         ov = (*len_bkh) - 1;
97         for (nv=0; nv<(*len_bkh); nv++) {
98                 bkh_b[0] = MIN2(bkh_b[0], BKH[nv].x0);  // xmin
99                 bkh_b[1] = MAX2(bkh_b[1], BKH[nv].x0);  // xmax
100                 bkh_b[2] = MIN2(bkh_b[2], BKH[nv].y0);  // ymin
101                 bkh_b[3] = MAX2(bkh_b[3], BKH[nv].y0);  // ymax
102                 BKH[nv].min_x = MIN2(BKH[ov].x0, BKH[nv].x0);
103                 BKH[nv].max_x = MAX2(BKH[ov].x0, BKH[nv].x0);
104                 BKH[nv].min_y = MIN2(BKH[ov].y0, BKH[nv].y0);
105                 BKH[nv].max_y = MAX2(BKH[ov].y0, BKH[nv].y0);
106                 dy = BKH[nv].y0 - BKH[ov].y0;
107                 BKH[nv].ls_x = (BKH[nv].x0 - BKH[ov].x0) / ((dy==0.f) ? 1.f : dy);
108                 BKH[nv].ls_y = (BKH[nv].ls_x==0.f) ? 1.f : (1.f/BKH[nv].ls_x);
109                 ov = nv;
110         }
111 }
112
113 // test if u/v inside shape & returns weight value
114 static float getWeight(BokehCoeffs* BKH, int len_bkh, float u, float v, float rad, float inradsq)
115 {
116         BokehCoeffs* bc = BKH;
117         float cdist, irad = (rad==0.f) ? 1.f : (1.f/rad);
118         u *= irad;
119         v *= irad;
120  
121         // early out test1: if point outside outer unit disk, it cannot be inside shape
122         cdist = u*u + v*v;
123         if (cdist>1.f) return 0.f;
124         
125         // early out test2: if point inside or on inner disk, point must be inside shape
126         if (cdist<=inradsq) return 1.f;
127         
128         while (len_bkh--) {
129                 if ((bc->dy*(u - bc->x0) - bc->dx*(v - bc->y0)) > 0.f) return 0.f;
130                 bc++;
131         }
132         return 1.f;
133 }
134
135 // QMC.seq. for sampling, A.Keller, EMS
136 static float RI_vdC(unsigned int bits, unsigned int r)
137 {
138         bits = ( bits << 16) | ( bits >> 16);
139         bits = ((bits & 0x00ff00ff) << 8) | ((bits & 0xff00ff00) >> 8);
140         bits = ((bits & 0x0f0f0f0f) << 4) | ((bits & 0xf0f0f0f0) >> 4);
141         bits = ((bits & 0x33333333) << 2) | ((bits & 0xcccccccc) >> 2);
142         bits = ((bits & 0x55555555) << 1) | ((bits & 0xaaaaaaaa) >> 1);
143         bits ^= r;
144         return (float)((double)bits / 4294967296.0);
145 }
146
147 // single channel IIR gaussian filtering
148 // much faster than anything else, constant time independent of width
149 // should extend to multichannel and make this a node, could be useful
150 // note: this is an almost exact copy of 'IIR_gauss'
151 static void IIR_gauss_single(CompBuf* buf, float sigma)
152 {
153         double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
154         float *X, *Y, *W;
155         int i, x, y, sz;
156
157         // single channel only for now
158         if (buf->type != CB_VAL) return;
159
160         // <0.5 not valid, though can have a possibly useful sort of sharpening effect
161         if (sigma < 0.5f) return;
162         
163         // see "Recursive Gabor Filtering" by Young/VanVliet
164         // all factors here in double.prec. Required, because for single.prec it seems to blow up if sigma > ~200
165         if (sigma >= 3.556f)
166                 q = 0.9804f*(sigma - 3.556f) + 2.5091f;
167         else // sigma >= 0.5
168                 q = (0.0561f*sigma + 0.5784f)*sigma - 0.2568f;
169         q2 = q*q;
170         sc = (1.1668 + q)*(3.203729649  + (2.21566 + q)*q);
171         // no gabor filtering here, so no complex multiplies, just the regular coefs.
172         // all negated here, so as not to have to recalc Triggs/Sdika matrix
173         cf[1] = q*(5.788961737 + (6.76492 + 3.0*q)*q)/ sc;
174         cf[2] = -q2*(3.38246 + 3.0*q)/sc;
175         // 0 & 3 unchanged
176         cf[3] = q2*q/sc;
177         cf[0] = 1.0 - cf[1] - cf[2] - cf[3];
178
179         // Triggs/Sdika border corrections,
180         // it seems to work, not entirely sure if it is actually totally correct,
181         // Besides J.M.Geusebroek's anigauss.c (see http://www.science.uva.nl/~mark),
182         // found one other implementation by Cristoph Lampert,
183         // but neither seem to be quite the same, result seems to be ok sofar anyway.
184         // Extra scale factor here to not have to do it in filter,
185         // though maybe this had something to with the precision errors
186         sc = cf[0]/((1.0 + cf[1] - cf[2] + cf[3])*(1.0 - cf[1] - cf[2] - cf[3])*(1.0 + cf[2] + (cf[1] - cf[3])*cf[3]));
187         tsM[0] = sc*(-cf[3]*cf[1] + 1.0 - cf[3]*cf[3] - cf[2]);
188         tsM[1] = sc*((cf[3] + cf[1])*(cf[2] + cf[3]*cf[1]));
189         tsM[2] = sc*(cf[3]*(cf[1] + cf[3]*cf[2]));
190         tsM[3] = sc*(cf[1] + cf[3]*cf[2]);
191         tsM[4] = sc*(-(cf[2] - 1.0)*(cf[2] + cf[3]*cf[1]));
192         tsM[5] = sc*(-(cf[3]*cf[1] + cf[3]*cf[3] + cf[2] - 1.0)*cf[3]);
193         tsM[6] = sc*(cf[3]*cf[1] + cf[2] + cf[1]*cf[1] - cf[2]*cf[2]);
194         tsM[7] = sc*(cf[1]*cf[2] + cf[3]*cf[2]*cf[2] - cf[1]*cf[3]*cf[3] - cf[3]*cf[3]*cf[3] - cf[3]*cf[2] + cf[3]);
195         tsM[8] = sc*(cf[3]*(cf[1] + cf[3]*cf[2]));
196
197 #define YVV(L)\
198 {\
199         W[0] = cf[0]*X[0] + cf[1]*X[0] + cf[2]*X[0] + cf[3]*X[0];\
200         W[1] = cf[0]*X[1] + cf[1]*W[0] + cf[2]*X[0] + cf[3]*X[0];\
201         W[2] = cf[0]*X[2] + cf[1]*W[1] + cf[2]*W[0] + cf[3]*X[0];\
202         for (i=3; i<L; i++)\
203                 W[i] = cf[0]*X[i] + cf[1]*W[i-1] + cf[2]*W[i-2] + cf[3]*W[i-3];\
204         tsu[0] = W[L-1] - X[L-1];\
205         tsu[1] = W[L-2] - X[L-1];\
206         tsu[2] = W[L-3] - X[L-1];\
207         tsv[0] = tsM[0]*tsu[0] + tsM[1]*tsu[1] + tsM[2]*tsu[2] + X[L-1];\
208         tsv[1] = tsM[3]*tsu[0] + tsM[4]*tsu[1] + tsM[5]*tsu[2] + X[L-1];\
209         tsv[2] = tsM[6]*tsu[0] + tsM[7]*tsu[1] + tsM[8]*tsu[2] + X[L-1];\
210         Y[L-1] = cf[0]*W[L-1] + cf[1]*tsv[0] + cf[2]*tsv[1] + cf[3]*tsv[2];\
211         Y[L-2] = cf[0]*W[L-2] + cf[1]*Y[L-1] + cf[2]*tsv[0] + cf[3]*tsv[1];\
212         Y[L-3] = cf[0]*W[L-3] + cf[1]*Y[L-2] + cf[2]*Y[L-1] + cf[3]*tsv[0];\
213         for (i=L-4; i>=0; i--)\
214                 Y[i] = cf[0]*W[i] + cf[1]*Y[i+1] + cf[2]*Y[i+2] + cf[3]*Y[i+3];\
215 }
216
217         // intermediate buffers
218         sz = MAX2(buf->x, buf->y);
219         Y = MEM_callocN(sz*sizeof(float), "IIR_gauss Y buf");
220         W = MEM_callocN(sz*sizeof(float), "IIR_gauss W buf");
221         // H
222         for (y=0; y<buf->y; y++) {
223                 X = &buf->rect[y*buf->x];
224                 YVV(buf->x);
225                 memcpy(X, Y, sizeof(float)*buf->x);
226         }
227         // V
228         X = MEM_callocN(buf->y*sizeof(float), "IIR_gauss X buf");
229         for (x=0; x<buf->x; x++) {
230                 for (y=0; y<buf->y; y++)
231                         X[y] = buf->rect[x + y*buf->x];
232                 YVV(buf->y);
233                 for (y=0; y<buf->y; y++)
234                         buf->rect[x + y*buf->x] = Y[y];
235         }
236         MEM_freeN(X);
237
238         MEM_freeN(W);
239         MEM_freeN(Y);
240 #undef YVV
241 }
242
243 static void defocus_blur(bNode *node, CompBuf *new, CompBuf *img, CompBuf *zbuf, float inpval, int no_zbuf)
244 {
245         NodeDefocus *nqd = node->storage;
246         CompBuf *wts;           // weights buffer
247         CompBuf *crad;          // CoC radius buffer
248         BokehCoeffs BKH[8];     // bokeh shape data, here never > 8 pts.
249         float bkh_b[4] = {0};   // shape 2D bound
250         float cam_fdist=1, cam_invfdist=1, cam_lens=35;
251         float dof_sp, maxfgc, bk_hn_theta=0, inradsq=0;
252         int y, len_bkh=0, ydone = FALSE;
253         float aspect, aperture;
254         int minsz;
255         //float bcrad, nmaxc, scf;
256         
257         // get some required params from the current scene camera
258         // (ton) this is wrong, needs fixed
259         Scene *scene= (Scene*)node->id;
260         Object* camob = (scene)? scene->camera: NULL;
261         if (camob && camob->type==OB_CAMERA) {
262                 Camera* cam = (Camera*)camob->data;
263                 cam_lens = cam->lens;
264                 cam_fdist = BKE_camera_object_dof_distance(camob);
265                 if (cam_fdist==0.0f) cam_fdist = 1e10f; /* if the dof is 0.0 then set it be be far away */
266                 cam_invfdist = 1.f/cam_fdist;
267         }
268
269         // guess work here.. best match with raytraced result
270         minsz = MIN2(img->x, img->y);
271         dof_sp = (float)minsz / (16.f / cam_lens);      // <- == aspect * MIN2(img->x, img->y) / tan(0.5f * fov);
272         
273         // aperture
274         aspect = (img->x > img->y) ? (img->y / (float)img->x) : (img->x / (float)img->y);
275         aperture = 0.5f*(cam_lens / (aspect*32.f)) / nqd->fstop;
276         
277         // if not disk, make bokeh coefficients and other needed data
278         if (nqd->bktype!=0) {
279                 makeBokeh(nqd->bktype, nqd->rotation, &len_bkh, &inradsq, BKH, bkh_b);
280                 bk_hn_theta = 0.5 * nqd->bktype * sin(2.0 * M_PI / nqd->bktype);        // weight factor
281         }
282         
283         // accumulated weights
284         wts = alloc_compbuf(img->x, img->y, CB_VAL, 1);
285         // CoC radius buffer
286         crad = alloc_compbuf(img->x, img->y, CB_VAL, 1);
287
288         // if 'no_zbuf' flag set (which is always set if input is not an image),
289         // values are instead interpreted directly as blur radius values
290         if (no_zbuf) {
291                 // to prevent *reaaallly* big radius values and impossible calculation times,
292                 // limit the maximum to half the image width or height, whichever is smaller
293                 float maxr = 0.5f*(float)MIN2(img->x, img->y);
294                 unsigned int p;
295
296                 for (p=0; p<(unsigned int)(img->x*img->y); p++) {
297                         crad->rect[p] = zbuf ? (zbuf->rect[p]*nqd->scale) : inpval;
298                         // bug #5921, limit minimum
299                         crad->rect[p] = MAX2(1e-5f, crad->rect[p]);
300                         crad->rect[p] = MIN2(crad->rect[p], maxr);
301                         // if maxblur!=0, limit maximum
302                         if (nqd->maxblur != 0.f) crad->rect[p] = MIN2(crad->rect[p], nqd->maxblur);
303                 }
304         }
305         else {
306                 float wt;
307
308                 // actual zbuffer.
309                 // separate foreground from background CoC's
310                 // then blur background and blend in again with foreground,
311                 // improves the 'blurred foreground overlapping in-focus midground' sharp boundary problem.
312                 // wts buffer here used for blendmask
313                 maxfgc = 0.f; // maximum foreground CoC radius
314                 for (y=0; y<img->y; y++) {
315                         unsigned int p = y * img->x;
316                         int x;
317                         for (x=0; x<img->x; x++) {
318                                 unsigned int px = p + x;
319                                 float iZ = (zbuf->rect[px]==0.f) ? 0.f : (1.f/zbuf->rect[px]);
320                                 crad->rect[px] = 0.5f*(aperture*(dof_sp*(cam_invfdist - iZ) - 1.f));
321                                 if (crad->rect[px] <= 0.f) {
322                                         wts->rect[px] = 1.f;
323                                         crad->rect[px] = -crad->rect[px];
324                                         if (crad->rect[px] > maxfgc) maxfgc = crad->rect[px];
325                                 }
326                                 else crad->rect[px] = wts->rect[px] = 0;
327                         }
328                 }
329                 
330                 // fast blur...
331                 // bug #6656 part 1, probably when previous node_composite.c was split into separate files, it was not properly updated
332                 // to include recent cvs commits (well, at least not defocus node), so this part was missing...
333                 wt = aperture*128.f;
334                 IIR_gauss_single(crad, wt);
335                 IIR_gauss_single(wts, wt);
336                 
337                 // bug #6656 part 2a, although foreground blur is not based anymore on closest object,
338                 // the rescaling op below was still based on that anyway, and unlike the comment in below code,
339                 // the difference is therefore not always that small at all...
340                 // so for now commented out, not sure if this is going to cause other future problems, lets just wait and see...
341                 /*
342                 // find new maximum to scale it back to original
343                 // (could skip this, not strictly necessary, in general, difference is quite small, but just in case...)
344                 nmaxc = 0;
345                 for (p=0; p<(img->x*img->y); p++)
346                         if (crad->rect[p] > nmaxc) nmaxc = crad->rect[p];
347                 // rescale factor
348                 scf = (nmaxc==0.f) ? 1.f: (maxfgc / nmaxc);
349                 */
350
351                 // and blend...
352                 for (y=0; y<img->y; y++) {
353                         unsigned int p = y*img->x;
354                         int x;
355
356                         for (x=0; x<img->x; x++) {
357                                 unsigned px = p + x;
358                                 if (zbuf->rect[px]!=0.f) {
359                                         float iZ = (zbuf->rect[px]==0.f) ? 0.f : (1.f/zbuf->rect[px]);
360                                         
361                                         // bug #6656 part 2b, do not rescale
362                                         /*
363                                         bcrad = 0.5f*fabs(aperture*(dof_sp*(cam_invfdist - iZ) - 1.f));
364                                         // scale crad back to original maximum and blend
365                                         crad->rect[px] = bcrad + wts->rect[px]*(scf*crad->rect[px] - bcrad);
366                                         */
367                                         crad->rect[px] = 0.5f*fabsf(aperture*(dof_sp*(cam_invfdist - iZ) - 1.f));
368                                         
369                                         // 'bug' #6615, limit minimum radius to 1 pixel, not really a solution, but somewhat mitigates the problem
370                                         crad->rect[px] = MAX2(crad->rect[px], 0.5f);
371                                         // if maxblur!=0, limit maximum
372                                         if (nqd->maxblur != 0.f) crad->rect[px] = MIN2(crad->rect[px], nqd->maxblur);
373                                 }
374                                 else crad->rect[px] = 0.f;
375                                 // clear weights for next part
376                                 wts->rect[px] = 0.f;
377                         }
378                         // esc set by main calling process
379                         if (node->exec & NODE_BREAK)
380                                 break;
381                 }
382         }
383
384         //------------------------------------------------------------------
385         // main loop
386 #ifndef __APPLE__ /* can crash on Mac, see bug #22856, disabled for now */
387 #ifdef __INTEL_COMPILER /* icc doesn't like the compound statement -- internal error: 0_1506 */
388         #pragma omp parallel for private(y) if (!nqd->preview) schedule(guided)
389 #else
390         #pragma omp parallel for private(y) if (!nqd->preview && img->y*img->x > 16384) schedule(guided)
391 #endif
392 #endif
393         for (y=0; y<img->y; y++) {
394                 unsigned int p, p4, zp, cp, cp4;
395                 float *ctcol, u, v, ct_crad, cR2=0;
396                 int x, sx, sy;
397
398                 // some sort of visual feedback would be nice, or at least this text in the renderwin header
399                 // but for now just print some info in the console every 8 scanlines.
400                 #pragma omp critical
401                 {
402                         if (((ydone & 7)==0) || (ydone==(img->y-1))) {
403                                 if (G.background==0) {
404                                         printf("\rdefocus: Processing Line %d of %d ... ", ydone+1, img->y);
405                                         fflush(stdout);
406                                 }
407                         }
408
409                         ydone++;
410                 }
411
412                 // esc set by main calling process. don't break because openmp doesn't
413                 // allow it, just continue and do nothing 
414                 if (node->exec & NODE_BREAK)
415                         continue;
416
417                 zp = y * img->x;
418                 for (x=0; x<img->x; x++) {
419                         cp = zp + x;
420                         cp4 = cp * img->type;
421
422                         // Circle of Confusion radius for current pixel
423                         cR2 = ct_crad = crad->rect[cp];
424                         // skip if zero (border render)
425                         if (ct_crad==0.f) {
426                                 // related to bug #5921, forgot output image when skipping 0 radius values
427                                 new->rect[cp4] = img->rect[cp4];
428                                 if (new->type != CB_VAL) {
429                                         new->rect[cp4+1] = img->rect[cp4+1];
430                                         new->rect[cp4+2] = img->rect[cp4+2];
431                                         new->rect[cp4+3] = img->rect[cp4+3];
432                                 }
433                                 continue;
434                         }
435                         cR2 *= cR2;
436                         
437                         // pixel color
438                         ctcol = &img->rect[cp4];
439                         
440                         if (!nqd->preview) {
441                                 int xs, xe, ys, ye;
442                                 float lwt, wtcol[4] = {0}, aacol[4] = {0};
443                                 float wt;
444
445                                 // shape weight
446                                 if (nqd->bktype==0)     // disk
447                                         wt = 1.f/((float)M_PI*cR2);
448                                 else
449                                         wt = 1.f/(cR2*bk_hn_theta);
450
451                                 // weighted color
452                                 wtcol[0] = wt*ctcol[0];
453                                 if (new->type != CB_VAL) {
454                                         wtcol[1] = wt*ctcol[1];
455                                         wtcol[2] = wt*ctcol[2];
456                                         wtcol[3] = wt*ctcol[3];
457                                 }
458
459                                 // macro for background blur overlap test
460                                 // unfortunately, since this is done per pixel,
461                                 // it has a very significant negative impact on processing time...
462                                 // (eg. aa disk blur without test: 112 sec, vs with test: 176 sec...)
463                                 // iff center blur radius > threshold
464                                 // and if overlap pixel in focus, do nothing, else add color/weigbt
465                                 // (threshold constant is dependent on amount of blur)
466                                 #define TESTBG1(c, w) {\
467                                         if (ct_crad > nqd->bthresh) {\
468                                                 if (crad->rect[p] > nqd->bthresh) {\
469                                                         new->rect[p] += c[0];\
470                                                         wts->rect[p] += w;\
471                                                 }\
472                                         }\
473                                         else {\
474                                                 new->rect[p] += c[0];\
475                                                 wts->rect[p] += w;\
476                                         }\
477                                 }
478                                 #define TESTBG4(c, w) {\
479                                         if (ct_crad > nqd->bthresh) {\
480                                                 if (crad->rect[p] > nqd->bthresh) {\
481                                                         new->rect[p4] += c[0];\
482                                                         new->rect[p4+1] += c[1];\
483                                                         new->rect[p4+2] += c[2];\
484                                                         new->rect[p4+3] += c[3];\
485                                                         wts->rect[p] += w;\
486                                                 }\
487                                         }\
488                                         else {\
489                                                 new->rect[p4] += c[0];\
490                                                 new->rect[p4+1] += c[1];\
491                                                 new->rect[p4+2] += c[2];\
492                                                 new->rect[p4+3] += c[3];\
493                                                 wts->rect[p] += w;\
494                                         }\
495                                 }
496                                 if (nqd->bktype == 0) {
497                                         // Disk
498                                         int _x, i, j, di;
499                                         float Dj, T;
500                                         // AA pixel
501                                         #define AAPIX(a, b) {\
502                                                 int _ny = b;\
503                                                 if ((_ny >= 0) && (_ny < new->y)) {\
504                                                         int _nx = a;\
505                                                         if ((_nx >=0) && (_nx < new->x)) {\
506                                                                 p = _ny*new->x + _nx;\
507                                                                 if (new->type==CB_VAL) {\
508                                                                         TESTBG1(aacol, lwt);\
509                                                                 }\
510                                                                 else {\
511                                                                         p4 = p * new->type;\
512                                                                         TESTBG4(aacol, lwt);\
513                                                                 }\
514                                                         }\
515                                                 }\
516                                         }
517                                         // circle scanline
518                                         #define CSCAN(a, b) {\
519                                                 int _ny = y + b;\
520                                                 if ((_ny >= 0) && (_ny < new->y)) {\
521                                                         xs = x - a + 1;\
522                                                         if (xs < 0) xs = 0;\
523                                                         xe = x + a;\
524                                                         if (xe > new->x) xe = new->x;\
525                                                         p = _ny*new->x + xs;\
526                                                         if (new->type==CB_VAL) {\
527                                                                 for (_x=xs; _x<xe; _x++, p++) TESTBG1(wtcol, wt);\
528                                                         }\
529                                                         else {\
530                                                                 p4 = p * new->type;\
531                                                                 for (_x=xs; _x<xe; _x++, p++, p4+=new->type) TESTBG4(wtcol, wt);\
532                                                         }\
533                                                 }\
534                                         }
535
536                                         i = ceil(ct_crad);
537                                         j = 0;
538                                         T = 0;
539                                         while (i > j) {
540                                                 Dj = sqrt(cR2 - j*j);
541                                                 Dj -= floorf(Dj);
542                                                 di = 0;
543                                                 if (Dj > T) { i--;  di = 1; }
544                                                 T = Dj;
545                                                 aacol[0] = wtcol[0]*Dj;
546                                                 if (new->type != CB_VAL) {
547                                                         aacol[1] = wtcol[1]*Dj;
548                                                         aacol[2] = wtcol[2]*Dj;
549                                                         aacol[3] = wtcol[3]*Dj;
550                                                 }
551                                                 lwt = wt*Dj;
552                                                 if (i!=j) {
553                                                         // outer pixels
554                                                         AAPIX(x+j, y+i)
555                                                         AAPIX(x+j, y-i)
556                                                         if (j) {
557                                                                 AAPIX(x-j, y+i) // BL
558                                                                 AAPIX(x-j, y-i) // TL
559                                                         }
560                                                         if (di) { // only when i changed, interior of outer section
561                                                                 CSCAN(j, i) // bottom
562                                                                 CSCAN(j, -i) // top
563                                                         }
564                                                 }
565                                                 // lower mid section
566                                                 AAPIX(x+i, y+j)
567                                                 if (i) AAPIX(x-i, y+j)
568                                                 CSCAN(i, j)
569                                                 // upper mid section
570                                                 if (j) {
571                                                         AAPIX(x+i, y-j)
572                                                         if (i) AAPIX(x-i, y-j)
573                                                         CSCAN(i, -j)
574                                                 }
575                                                 j++;
576                                         }
577                                         #undef CSCAN
578                                         #undef AAPIX
579                                 }
580                                 else {
581                                         // n-agonal
582                                         int ov, nv;
583                                         float mind, maxd, lwt;
584                                         ys = MAX2((int)floor(bkh_b[2]*ct_crad + y), 0);
585                                         ye = MIN2((int)ceil(bkh_b[3]*ct_crad + y), new->y - 1);
586                                         for (sy=ys; sy<=ye; sy++) {
587                                                 float fxs = 1e10f, fxe = -1e10f;
588                                                 float yf = (sy - y)/ct_crad;
589                                                 int found = 0;
590                                                 ov = len_bkh - 1;
591                                                 mind = maxd = 0;
592                                                 for (nv=0; nv<len_bkh; nv++) {
593                                                         if ((BKH[nv].max_y >= yf) && (BKH[nv].min_y <= yf)) {
594                                                                 float tx = BKH[ov].x0 + BKH[nv].ls_x*(yf - BKH[ov].y0);
595                                                                 if (tx < fxs) { fxs = tx;  mind = BKH[nv].ls_x; }
596                                                                 if (tx > fxe) { fxe = tx;  maxd = BKH[nv].ls_x; }
597                                                                 if (++found == 2) break;
598                                                         }
599                                                         ov = nv;
600                                                 }
601                                                 if (found) {
602                                                         fxs = fxs*ct_crad + x;
603                                                         fxe = fxe*ct_crad + x;
604                                                         xs = (int)floor(fxs), xe = (int)ceil(fxe);
605                                                         // AA hack for first and last x pixel, near vertical edges only
606                                                         if (fabsf(mind) <= 1.f) {
607                                                                 if ((xs >= 0) && (xs < new->x)) {
608                                                                         lwt = 1.f-(fxs - xs);
609                                                                         aacol[0] = wtcol[0]*lwt;
610                                                                         p = xs + sy*new->x;
611                                                                         if (new->type==CB_VAL) {
612                                                                                 lwt *= wt;
613                                                                                 TESTBG1(aacol, lwt);
614                                                                         }
615                                                                         else {
616                                                                                 p4 = p * new->type;
617                                                                                 aacol[1] = wtcol[1]*lwt;
618                                                                                 aacol[2] = wtcol[2]*lwt;
619                                                                                 aacol[3] = wtcol[3]*lwt;
620                                                                                 lwt *= wt;
621                                                                                 TESTBG4(aacol, lwt);
622                                                                         }
623                                                                 }
624                                                         }
625                                                         if (fabsf(maxd) <= 1.f) {
626                                                                 if ((xe >= 0) && (xe < new->x)) {
627                                                                         lwt = 1.f-(xe - fxe);
628                                                                         aacol[0] = wtcol[0]*lwt;
629                                                                         p = xe + sy*new->x;
630                                                                         if (new->type==CB_VAL) {
631                                                                                 lwt *= wt;
632                                                                                 TESTBG1(aacol, lwt);
633                                                                         }
634                                                                         else {
635                                                                                 p4 = p * new->type;
636                                                                                 aacol[1] = wtcol[1]*lwt;
637                                                                                 aacol[2] = wtcol[2]*lwt;
638                                                                                 aacol[3] = wtcol[3]*lwt;
639                                                                                 lwt *= wt;
640                                                                                 TESTBG4(aacol, lwt);
641                                                                         }
642                                                                 }
643                                                         }
644                                                         xs = MAX2(xs+1, 0);
645                                                         xe = MIN2(xe, new->x);
646                                                         // remaining interior scanline
647                                                         p = sy*new->x + xs;
648                                                         if (new->type==CB_VAL) {
649                                                                 for (sx=xs; sx<xe; sx++, p++) TESTBG1(wtcol, wt);
650                                                         }
651                                                         else {
652                                                                 p4 = p * new->type;
653                                                                 for (sx=xs; sx<xe; sx++, p++, p4+=new->type) TESTBG4(wtcol, wt);
654                                                         }
655                                                 }
656                                         }
657
658                                         // now traverse in opposite direction, y scanlines,
659                                         // but this time only draw the near horizontal edges,
660                                         // applying same AA hack as above
661                                         xs = MAX2((int)floor(bkh_b[0]*ct_crad + x), 0);
662                                         xe = MIN2((int)ceil(bkh_b[1]*ct_crad + x), img->x - 1);
663                                         for (sx=xs; sx<=xe; sx++) {
664                                                 float xf = (sx - x)/ct_crad;
665                                                 float fys = 1e10f, fye = -1e10f;
666                                                 int found = 0;
667                                                 ov = len_bkh - 1;
668                                                 mind = maxd = 0;
669                                                 for (nv=0; nv<len_bkh; nv++) {
670                                                         if ((BKH[nv].max_x >= xf) && (BKH[nv].min_x <= xf)) {
671                                                                 float ty = BKH[ov].y0 + BKH[nv].ls_y*(xf - BKH[ov].x0);
672                                                                 if (ty < fys) { fys = ty;  mind = BKH[nv].ls_y; }
673                                                                 if (ty > fye) { fye = ty;  maxd = BKH[nv].ls_y; }
674                                                                 if (++found == 2) break;
675                                                         }
676                                                         ov = nv;
677                                                 }
678                                                 if (found) {
679                                                         fys = fys*ct_crad + y;
680                                                         fye = fye*ct_crad + y;
681                                                         // near horizontal edges only, line slope <= 1
682                                                         if (fabsf(mind) <= 1.f) {
683                                                                 int iys = (int)floor(fys);
684                                                                 if ((iys >= 0) && (iys < new->y)) {
685                                                                         lwt = 1.f - (fys - iys);
686                                                                         aacol[0] = wtcol[0]*lwt;
687                                                                         p = sx + iys*new->x;
688                                                                         if (new->type==CB_VAL) {
689                                                                                 lwt *= wt;
690                                                                                 TESTBG1(aacol, lwt);
691                                                                         }
692                                                                         else {
693                                                                                 p4 = p * new->type;
694                                                                                 aacol[1] = wtcol[1]*lwt;
695                                                                                 aacol[2] = wtcol[2]*lwt;
696                                                                                 aacol[3] = wtcol[3]*lwt;
697                                                                                 lwt *= wt;
698                                                                                 TESTBG4(aacol, lwt);
699                                                                         }
700                                                                 }
701                                                         }
702                                                         if (fabsf(maxd) <= 1.f) {
703                                                                 int iye = ceil(fye);
704                                                                 if ((iye >= 0) && (iye < new->y)) {
705                                                                         lwt = 1.f - (iye - fye);
706                                                                         aacol[0] = wtcol[0]*lwt;
707                                                                         p = sx + iye*new->x;
708                                                                         if (new->type==CB_VAL) {
709                                                                                 lwt *= wt;
710                                                                                 TESTBG1(aacol, lwt);
711                                                                         }
712                                                                         else {
713                                                                                 p4 = p * new->type;
714                                                                                 aacol[1] = wtcol[1]*lwt;
715                                                                                 aacol[2] = wtcol[2]*lwt;
716                                                                                 aacol[3] = wtcol[3]*lwt;
717                                                                                 lwt *= wt;
718                                                                                 TESTBG4(aacol, lwt);
719                                                                         }
720                                                                 }
721                                                         }
722                                                 }
723                                         }
724
725                                 }
726                                 #undef TESTBG4
727                                 #undef TESTBG1
728
729                         }
730                         else {
731                                 // sampled, simple rejection sampling here, good enough
732                                 unsigned int maxsam, s, ui = BLI_rand()*BLI_rand();
733                                 float wcor, cpr = BLI_frand(), lwt;
734                                 if (no_zbuf)
735                                         maxsam = nqd->samples;  // no zbuffer input, use sample value directly
736                                 else {
737                                         // depth adaptive sampling hack, the more out of focus, the more samples taken, 16 minimum.
738                                         maxsam = (int)(0.5f + nqd->samples*(1.f-(float)exp(-fabs(zbuf->rect[cp] - cam_fdist))));
739                                         if (maxsam < 16) maxsam = 16;
740                                 }
741                                 wcor = 1.f/(float)maxsam;
742                                 for (s=0; s<maxsam; ++s) {
743                                         u = ct_crad*(2.f*RI_vdC(s, ui) - 1.f);
744                                         v = ct_crad*(2.f*(s + cpr)/(float)maxsam - 1.f);
745                                         sx = (int)(x + u + 0.5f), sy = (int)(y + v + 0.5f);
746                                         if ((sx<0) || (sx >= new->x) || (sy<0) || (sy >= new->y)) continue;
747                                         p = sx + sy*new->x;
748                                         p4 = p * new->type;
749                                         if (nqd->bktype==0)     // Disk
750                                                 lwt = ((u*u + v*v)<=cR2) ? wcor : 0.f;
751                                         else  /* AA not needed here */
752                                                 lwt = wcor * getWeight(BKH, len_bkh, u, v, ct_crad, inradsq);
753                                         // prevent background bleeding onto in-focus pixels, user-option
754                                         if (ct_crad > nqd->bthresh) {  // if center blur > threshold
755                                                 if (crad->rect[p] > nqd->bthresh) { // if overlap pixel in focus, do nothing, else add color/weigbt
756                                                         new->rect[p4] += ctcol[0] * lwt;
757                                                         if (new->type != CB_VAL) {
758                                                                 new->rect[p4+1] += ctcol[1] * lwt;
759                                                                 new->rect[p4+2] += ctcol[2] * lwt;
760                                                                 new->rect[p4+3] += ctcol[3] * lwt;
761                                                         }
762                                                         wts->rect[p] += lwt;
763                                                 }
764                                         }
765                                         else {
766                                                 new->rect[p4] += ctcol[0] * lwt;
767                                                 if (new->type != CB_VAL) {
768                                                         new->rect[p4+1] += ctcol[1] * lwt;
769                                                         new->rect[p4+2] += ctcol[2] * lwt;
770                                                         new->rect[p4+3] += ctcol[3] * lwt;
771                                                 }
772                                                 wts->rect[p] += lwt;
773                                         }
774                                 }
775                         }
776
777                 }
778         }
779         
780         // finally, normalize
781         for (y=0; y<new->y; y++) {
782                 unsigned int p = y * new->x;
783                 unsigned int p4 = p * new->type;
784                 int x;
785
786                 for (x=0; x<new->x; x++) {
787                         float dv = (wts->rect[p]==0.f) ? 1.f : (1.f/wts->rect[p]);
788                         new->rect[p4] *= dv;
789                         if (new->type!=CB_VAL) {
790                                 new->rect[p4+1] *= dv;
791                                 new->rect[p4+2] *= dv;
792                                 new->rect[p4+3] *= dv;
793                         }
794                         p++;
795                         p4 += new->type;
796                 }
797         }
798
799         free_compbuf(crad);
800         free_compbuf(wts);
801         
802         printf("Done\n");
803 }
804
805
806 static void node_composit_exec_defocus(void *UNUSED(data), bNode *node, bNodeStack **in, bNodeStack **out)
807 {
808         CompBuf *new, *old, *zbuf_use = NULL, *img = in[0]->data, *zbuf = in[1]->data;
809         NodeDefocus *nqd = node->storage;
810         int no_zbuf = nqd->no_zbuf;
811         
812         if ((img==NULL) || (out[0]->hasoutput==0)) return;
813         
814         // if image not valid type or fstop==infinite (128), nothing to do, pass in to out
815         if (((img->type!=CB_RGBA) && (img->type!=CB_VAL)) || ((no_zbuf==0) && (nqd->fstop==128.f))) {
816                 out[0]->data = pass_on_compbuf(img);
817                 return;
818         }
819         
820         if (zbuf!=NULL) {
821                 // Zbuf input, check to make sure, single channel, same size
822                 // doesn't have to be actual zbuffer, but must be value type
823                 if ((zbuf->x != img->x) || (zbuf->y != img->y)) {
824                         // could do a scale here instead...
825                         printf("Z input must be same size as image !\n");
826                         return;
827                 }
828                 zbuf_use = typecheck_compbuf(zbuf, CB_VAL);
829         }
830         else no_zbuf = 1;       // no zbuffer input
831                 
832         // ok, process
833         old = img;
834         if (nqd->gamco) {
835                 // gamma correct, blender func is simplified, fixed value & RGBA only,
836                 // should make user param. also depremul and premul afterwards, gamma
837                 // correction can't work with premul alpha
838                 old = dupalloc_compbuf(img);
839                 premul_compbuf(old, 1);
840                 gamma_correct_compbuf(old, 0);
841                 premul_compbuf(old, 0);
842         }
843         
844         new = alloc_compbuf(old->x, old->y, old->type, 1);
845         defocus_blur(node, new, old, zbuf_use, in[1]->vec[0]*nqd->scale, no_zbuf);
846         
847         if (nqd->gamco) {
848                 premul_compbuf(new, 1);
849                 gamma_correct_compbuf(new, 1);
850                 premul_compbuf(new, 0);
851                 free_compbuf(old);
852         }
853         if (node->exec & NODE_BREAK) {
854                 free_compbuf(new);
855                 new= NULL;
856         }       
857         out[0]->data = new;
858         if (zbuf_use && (zbuf_use != zbuf)) free_compbuf(zbuf_use);
859 }
860
861 static void node_composit_init_defocus(bNodeTree *UNUSED(ntree), bNode* node, bNodeTemplate *UNUSED(ntemp))
862 {
863         /* qdn: defocus node */
864         NodeDefocus *nbd = MEM_callocN(sizeof(NodeDefocus), "node defocus data");
865         nbd->bktype = 0;
866         nbd->rotation = 0.0f;
867         nbd->preview = 1;
868         nbd->gamco = 0;
869         nbd->samples = 16;
870         nbd->fstop = 128.f;
871         nbd->maxblur = 16;
872         nbd->bthresh = 1.f;
873         nbd->scale = 1.f;
874         nbd->no_zbuf = 1;
875         node->storage = nbd;
876 }
877
878 void register_node_type_cmp_defocus(bNodeTreeType *ttype)
879 {
880         static bNodeType ntype;
881
882         node_type_base(ttype, &ntype, CMP_NODE_DEFOCUS, "Defocus", NODE_CLASS_OP_FILTER, NODE_OPTIONS);
883         node_type_socket_templates(&ntype, cmp_node_defocus_in, cmp_node_defocus_out);
884         node_type_size(&ntype, 150, 120, 200);
885         node_type_init(&ntype, node_composit_init_defocus);
886         node_type_storage(&ntype, "NodeDefocus", node_free_standard_storage, node_copy_standard_storage);
887         node_type_exec(&ntype, node_composit_exec_defocus);
888
889         nodeRegisterType(ttype, &ntype);
890 }