2ae3cd6ba5637eda9cf4e9e39a89e2339c6a4105
[blender.git] / source / blender / nodes / composite / nodes / node_composite_defocus.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version. 
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2006 Blender Foundation.
19  * All rights reserved.
20  *
21  * The Original Code is: all of this file.
22  *
23  * Contributor(s): none yet.
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 /** \file blender/nodes/composite/nodes/node_composite_defocus.c
29  *  \ingroup cmpnodes
30  */
31
32 #include "node_composite_util.h"
33
34 #include <limits.h>
35
36 /* ************ qdn: Defocus node ****************** */
37 static bNodeSocketTemplate cmp_node_defocus_in[]= {
38         {       SOCK_RGBA, 1, N_("Image"),                      1.0f, 1.0f, 1.0f, 1.0f},
39         {       SOCK_FLOAT, 1, N_("Z"),                 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, PROP_FACTOR},
40         {       -1, 0, ""       }
41 };
42 static bNodeSocketTemplate cmp_node_defocus_out[]= {
43         {       SOCK_RGBA, 0, N_("Image")},
44         {       -1, 0, ""       }
45 };
46
47
48 // line coefs for point sampling & scancon. data.
49 typedef struct BokehCoeffs {
50         float x0, y0, dx, dy;
51         float ls_x, ls_y;
52         float min_x, min_y, max_x, max_y;
53 } BokehCoeffs;
54
55 // returns array of BokehCoeffs
56 // returns length of array in 'len_bkh',
57 // radius squared of inscribed disk in 'inradsq', needed in getWeight() test,
58 // BKH[8] is the data returned for the bokeh shape & bkh_b[4] is it's 2d bound
59 static void makeBokeh(char bktype, char ro, int* len_bkh, float* inradsq, BokehCoeffs BKH[8], float bkh_b[4])
60 {
61         float x0, x1, y0, y1, dx, dy, iDxy;
62         /* ro now is in radians. */
63         float w = MAX2(1e-6f, ro);  // never reported stangely enough, but a zero offset causes missing center line...
64         float wi = DEG2RADF(360.f/bktype);
65         int i, ov, nv;
66         
67         // bktype must be at least 3 & <= 8
68         bktype = (bktype<3) ? 3 : ((bktype>8) ? 8 : bktype);
69         *len_bkh = bktype;
70         *inradsq = -1.f;
71
72         for (i=0; i<(*len_bkh); i++) {
73                 x0 = cos(w);
74                 y0 = sin(w);
75                 w += wi;
76                 x1 = cos(w);
77                 y1 = sin(w);
78                 if ((*inradsq)<0.f) {
79                         // radius squared of inscribed disk
80                         float idx=(x0+x1)*0.5f, idy=(y0+y1)*0.5f;
81                         *inradsq = idx*idx + idy*idy;
82                 }
83                 BKH[i].x0 = x0;
84                 BKH[i].y0 = y0;
85                 dx = x1-x0, dy = y1-y0;
86                 iDxy = 1.f / sqrtf(dx*dx + dy*dy);
87                 dx *= iDxy;
88                 dy *= iDxy;
89                 BKH[i].dx = dx;
90                 BKH[i].dy = dy;
91         }
92
93         // precalc scanconversion data
94         // bokeh bound, not transformed, for scanconvert
95         bkh_b[0] = bkh_b[2] = 1e10f;    // xmin/ymin
96         bkh_b[1] = bkh_b[3] = -1e10f;   // xmax/ymax
97         ov = (*len_bkh) - 1;
98         for (nv=0; nv<(*len_bkh); nv++) {
99                 bkh_b[0] = MIN2(bkh_b[0], BKH[nv].x0);  // xmin
100                 bkh_b[1] = MAX2(bkh_b[1], BKH[nv].x0);  // xmax
101                 bkh_b[2] = MIN2(bkh_b[2], BKH[nv].y0);  // ymin
102                 bkh_b[3] = MAX2(bkh_b[3], BKH[nv].y0);  // ymax
103                 BKH[nv].min_x = MIN2(BKH[ov].x0, BKH[nv].x0);
104                 BKH[nv].max_x = MAX2(BKH[ov].x0, BKH[nv].x0);
105                 BKH[nv].min_y = MIN2(BKH[ov].y0, BKH[nv].y0);
106                 BKH[nv].max_y = MAX2(BKH[ov].y0, BKH[nv].y0);
107                 dy = BKH[nv].y0 - BKH[ov].y0;
108                 BKH[nv].ls_x = (BKH[nv].x0 - BKH[ov].x0) / ((dy==0.f) ? 1.f : dy);
109                 BKH[nv].ls_y = (BKH[nv].ls_x==0.f) ? 1.f : (1.f/BKH[nv].ls_x);
110                 ov = nv;
111         }
112 }
113
114 // test if u/v inside shape & returns weight value
115 static float getWeight(BokehCoeffs* BKH, int len_bkh, float u, float v, float rad, float inradsq)
116 {
117         BokehCoeffs* bc = BKH;
118         float cdist, irad = (rad==0.f) ? 1.f : (1.f/rad);
119         u *= irad;
120         v *= irad;
121  
122         // early out test1: if point outside outer unit disk, it cannot be inside shape
123         cdist = u*u + v*v;
124         if (cdist>1.f) return 0.f;
125         
126         // early out test2: if point inside or on inner disk, point must be inside shape
127         if (cdist<=inradsq) return 1.f;
128         
129         while (len_bkh--) {
130                 if ((bc->dy*(u - bc->x0) - bc->dx*(v - bc->y0)) > 0.f) return 0.f;
131                 bc++;
132         }
133         return 1.f;
134 }
135
136 // QMC.seq. for sampling, A.Keller, EMS
137 static float RI_vdC(unsigned int bits, unsigned int r)
138 {
139         bits = ( bits << 16) | ( bits >> 16);
140         bits = ((bits & 0x00ff00ff) << 8) | ((bits & 0xff00ff00) >> 8);
141         bits = ((bits & 0x0f0f0f0f) << 4) | ((bits & 0xf0f0f0f0) >> 4);
142         bits = ((bits & 0x33333333) << 2) | ((bits & 0xcccccccc) >> 2);
143         bits = ((bits & 0x55555555) << 1) | ((bits & 0xaaaaaaaa) >> 1);
144         bits ^= r;
145         return (float)((double)bits / 4294967296.0);
146 }
147
148 // single channel IIR gaussian filtering
149 // much faster than anything else, constant time independent of width
150 // should extend to multichannel and make this a node, could be useful
151 // note: this is an almost exact copy of 'IIR_gauss'
152 static void IIR_gauss_single(CompBuf *buf, float sigma)
153 {
154         double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
155         float *X, *Y, *W;
156         const unsigned int src_width = buf->x;
157         const unsigned int src_height = buf->y;
158         unsigned int i, x, y, sz;
159
160         // single channel only for now
161         if (buf->type != CB_VAL) return;
162
163         // <0.5 not valid, though can have a possibly useful sort of sharpening effect
164         if (sigma < 0.5f) return;
165         
166         // see "Recursive Gabor Filtering" by Young/VanVliet
167         // all factors here in double.prec. Required, because for single.prec it seems to blow up if sigma > ~200
168         if (sigma >= 3.556f)
169                 q = 0.9804f*(sigma - 3.556f) + 2.5091f;
170         else // sigma >= 0.5
171                 q = (0.0561f*sigma + 0.5784f)*sigma - 0.2568f;
172         q2 = q*q;
173         sc = (1.1668 + q)*(3.203729649  + (2.21566 + q)*q);
174         // no gabor filtering here, so no complex multiplies, just the regular coefs.
175         // all negated here, so as not to have to recalc Triggs/Sdika matrix
176         cf[1] = q*(5.788961737 + (6.76492 + 3.0*q)*q)/ sc;
177         cf[2] = -q2*(3.38246 + 3.0*q)/sc;
178         // 0 & 3 unchanged
179         cf[3] = q2*q/sc;
180         cf[0] = 1.0 - cf[1] - cf[2] - cf[3];
181
182         // Triggs/Sdika border corrections,
183         // it seems to work, not entirely sure if it is actually totally correct,
184         // Besides J.M.Geusebroek's anigauss.c (see http://www.science.uva.nl/~mark),
185         // found one other implementation by Cristoph Lampert,
186         // but neither seem to be quite the same, result seems to be ok so far anyway.
187         // Extra scale factor here to not have to do it in filter,
188         // though maybe this had something to with the precision errors
189         sc = cf[0] / ((1.0 + cf[1] - cf[2] + cf[3]) * (1.0 - cf[1] - cf[2] - cf[3]) * (1.0 + cf[2] + (cf[1] - cf[3]) * cf[3]));
190         tsM[0] = sc * (-cf[3] * cf[1] + 1.0 - cf[3] * cf[3] - cf[2]);
191         tsM[1] = sc * ((cf[3] + cf[1]) * (cf[2] + cf[3] * cf[1]));
192         tsM[2] = sc * (cf[3] * (cf[1] + cf[3] * cf[2]));
193         tsM[3] = sc * (cf[1] + cf[3] * cf[2]);
194         tsM[4] = sc * (-(cf[2] - 1.0) * (cf[2] + cf[3] * cf[1]));
195         tsM[5] = sc * (-(cf[3] * cf[1] + cf[3] * cf[3] + cf[2] - 1.0) * cf[3]);
196         tsM[6] = sc * (cf[3] * cf[1] + cf[2] + cf[1] * cf[1] - cf[2] * cf[2]);
197         tsM[7] = sc * (cf[1] * cf[2] + cf[3] * cf[2] * cf[2] - cf[1] * cf[3] * cf[3] - cf[3] * cf[3] * cf[3] - cf[3] * cf[2] + cf[3]);
198         tsM[8] = sc * (cf[3] * (cf[1] + cf[3] * cf[2]));
199
200 #define YVV(L)                                                                          \
201 {                                                                                       \
202         W[0] = cf[0] * X[0] + cf[1] * X[0] + cf[2] * X[0] + cf[3] * X[0];                   \
203         W[1] = cf[0] * X[1] + cf[1] * W[0] + cf[2] * X[0] + cf[3] * X[0];                   \
204         W[2] = cf[0] * X[2] + cf[1] * W[1] + cf[2] * W[0] + cf[3] * X[0];                   \
205         for (i = 3; i < L; i++) {                                                           \
206                 W[i] = cf[0] * X[i] + cf[1] * W[i - 1] + cf[2] * W[i - 2] + cf[3] * W[i - 3];   \
207         }                                                                                   \
208         tsu[0] = W[L - 1] - X[L - 1];                                                       \
209         tsu[1] = W[L - 2] - X[L - 1];                                                       \
210         tsu[2] = W[L - 3] - X[L - 1];                                                       \
211         tsv[0] = tsM[0] * tsu[0] + tsM[1] * tsu[1] + tsM[2] * tsu[2] + X[L - 1];            \
212         tsv[1] = tsM[3] * tsu[0] + tsM[4] * tsu[1] + tsM[5] * tsu[2] + X[L - 1];            \
213         tsv[2] = tsM[6] * tsu[0] + tsM[7] * tsu[1] + tsM[8] * tsu[2] + X[L - 1];            \
214         Y[L - 1] = cf[0] * W[L - 1] + cf[1] * tsv[0] + cf[2] * tsv[1] + cf[3] * tsv[2];     \
215         Y[L - 2] = cf[0] * W[L - 2] + cf[1] * Y[L - 1] + cf[2] * tsv[0] + cf[3] * tsv[1];   \
216         Y[L - 3] = cf[0] * W[L - 3] + cf[1] * Y[L - 2] + cf[2] * Y[L - 1] + cf[3] * tsv[0]; \
217         /* 'i != UINT_MAX' is really 'i >= 0', but necessary for unsigned int wrapping */   \
218         for (i = L - 4; i != UINT_MAX; i--) {                                               \
219                 Y[i] = cf[0] * W[i] + cf[1] * Y[i + 1] + cf[2] * Y[i + 2] + cf[3] * Y[i + 3];   \
220         }                                                                                   \
221 } (void)0
222
223         // intermediate buffers
224         sz = MAX2(src_width, src_height);
225         Y = MEM_callocN(sz * sizeof(float), "IIR_gauss Y buf");
226         W = MEM_callocN(sz * sizeof(float), "IIR_gauss W buf");
227         // H
228         for (y = 0; y < src_height; y++) {
229                 X = &buf->rect[y * src_width];
230                 YVV(src_width);
231                 memcpy(X, Y, sizeof(float) * src_width);
232         }
233         // V
234         X = MEM_callocN(src_height * sizeof(float), "IIR_gauss X buf");
235         for (x = 0; x < src_width; x++) {
236                 for (y = 0; y < src_height; y++)
237                         X[y] = buf->rect[x + y * src_width];
238                 YVV(src_height);
239                 for (y = 0; y < src_height; y++)
240                         buf->rect[x + y * src_width] = Y[y];
241         }
242         MEM_freeN(X);
243
244         MEM_freeN(W);
245         MEM_freeN(Y);
246 #undef YVV
247 }
248
249 static void defocus_blur(bNode *node, CompBuf *new, CompBuf *img, CompBuf *zbuf, float inpval, int no_zbuf)
250 {
251         NodeDefocus *nqd = node->storage;
252         CompBuf *wts;           // weights buffer
253         CompBuf *crad;          // CoC radius buffer
254         BokehCoeffs BKH[8];     // bokeh shape data, here never > 8 pts.
255         float bkh_b[4] = {0};   // shape 2D bound
256         float cam_fdist=1, cam_invfdist=1, cam_lens=35;
257         float dof_sp, maxfgc, bk_hn_theta=0, inradsq=0;
258         int y, len_bkh=0, ydone = FALSE;
259         float aspect, aperture;
260         int minsz;
261         //float bcrad, nmaxc, scf;
262         
263         // get some required params from the current scene camera
264         // (ton) this is wrong, needs fixed
265         Scene *scene= (Scene*)node->id;
266         Object* camob = (scene)? scene->camera: NULL;
267         if (camob && camob->type==OB_CAMERA) {
268                 Camera* cam = (Camera*)camob->data;
269                 cam_lens = cam->lens;
270                 cam_fdist = BKE_camera_object_dof_distance(camob);
271                 if (cam_fdist==0.0f) cam_fdist = 1e10f; /* if the dof is 0.0 then set it be be far away */
272                 cam_invfdist = 1.f/cam_fdist;
273         }
274
275         // guess work here.. best match with raytraced result
276         minsz = MIN2(img->x, img->y);
277         dof_sp = (float)minsz / (16.f / cam_lens);      // <- == aspect * MIN2(img->x, img->y) / tan(0.5f * fov);
278         
279         // aperture
280         aspect = (img->x > img->y) ? (img->y / (float)img->x) : (img->x / (float)img->y);
281         aperture = 0.5f*(cam_lens / (aspect*32.f)) / nqd->fstop;
282         
283         // if not disk, make bokeh coefficients and other needed data
284         if (nqd->bktype!=0) {
285                 makeBokeh(nqd->bktype, nqd->rotation, &len_bkh, &inradsq, BKH, bkh_b);
286                 bk_hn_theta = 0.5 * nqd->bktype * sin(2.0 * M_PI / nqd->bktype);        // weight factor
287         }
288         
289         // accumulated weights
290         wts = alloc_compbuf(img->x, img->y, CB_VAL, 1);
291         // CoC radius buffer
292         crad = alloc_compbuf(img->x, img->y, CB_VAL, 1);
293
294         // if 'no_zbuf' flag set (which is always set if input is not an image),
295         // values are instead interpreted directly as blur radius values
296         if (no_zbuf) {
297                 // to prevent *reaaallly* big radius values and impossible calculation times,
298                 // limit the maximum to half the image width or height, whichever is smaller
299                 float maxr = 0.5f*(float)MIN2(img->x, img->y);
300                 unsigned int p;
301
302                 for (p=0; p<(unsigned int)(img->x*img->y); p++) {
303                         crad->rect[p] = zbuf ? (zbuf->rect[p]*nqd->scale) : inpval;
304                         // bug #5921, limit minimum
305                         crad->rect[p] = MAX2(1e-5f, crad->rect[p]);
306                         crad->rect[p] = MIN2(crad->rect[p], maxr);
307                         // if maxblur!=0, limit maximum
308                         if (nqd->maxblur != 0.f) crad->rect[p] = MIN2(crad->rect[p], nqd->maxblur);
309                 }
310         }
311         else {
312                 float wt;
313
314                 // actual zbuffer.
315                 // separate foreground from background CoC's
316                 // then blur background and blend in again with foreground,
317                 // improves the 'blurred foreground overlapping in-focus midground' sharp boundary problem.
318                 // wts buffer here used for blendmask
319                 maxfgc = 0.f; // maximum foreground CoC radius
320                 for (y=0; y<img->y; y++) {
321                         unsigned int p = y * img->x;
322                         int x;
323                         for (x=0; x<img->x; x++) {
324                                 unsigned int px = p + x;
325                                 float iZ = (zbuf->rect[px]==0.f) ? 0.f : (1.f/zbuf->rect[px]);
326                                 crad->rect[px] = 0.5f*(aperture*(dof_sp*(cam_invfdist - iZ) - 1.f));
327                                 if (crad->rect[px] <= 0.f) {
328                                         wts->rect[px] = 1.f;
329                                         crad->rect[px] = -crad->rect[px];
330                                         if (crad->rect[px] > maxfgc) maxfgc = crad->rect[px];
331                                 }
332                                 else crad->rect[px] = wts->rect[px] = 0;
333                         }
334                 }
335                 
336                 // fast blur...
337                 // bug #6656 part 1, probably when previous node_composite.c was split into separate files, it was not properly updated
338                 // to include recent cvs commits (well, at least not defocus node), so this part was missing...
339                 wt = aperture*128.f;
340                 IIR_gauss_single(crad, wt);
341                 IIR_gauss_single(wts, wt);
342                 
343                 // bug #6656 part 2a, although foreground blur is not based anymore on closest object,
344                 // the rescaling op below was still based on that anyway, and unlike the comment in below code,
345                 // the difference is therefore not always that small at all...
346                 // so for now commented out, not sure if this is going to cause other future problems, lets just wait and see...
347                 /*
348                 // find new maximum to scale it back to original
349                 // (could skip this, not strictly necessary, in general, difference is quite small, but just in case...)
350                 nmaxc = 0;
351                 for (p=0; p<(img->x*img->y); p++)
352                         if (crad->rect[p] > nmaxc) nmaxc = crad->rect[p];
353                 // rescale factor
354                 scf = (nmaxc==0.f) ? 1.f: (maxfgc / nmaxc);
355                 */
356
357                 // and blend...
358                 for (y=0; y<img->y; y++) {
359                         unsigned int p = y*img->x;
360                         int x;
361
362                         for (x=0; x<img->x; x++) {
363                                 unsigned px = p + x;
364                                 if (zbuf->rect[px]!=0.f) {
365                                         float iZ = (zbuf->rect[px]==0.f) ? 0.f : (1.f/zbuf->rect[px]);
366                                         
367                                         // bug #6656 part 2b, do not rescale
368                                         /*
369                                         bcrad = 0.5f*fabs(aperture*(dof_sp*(cam_invfdist - iZ) - 1.f));
370                                         // scale crad back to original maximum and blend
371                                         crad->rect[px] = bcrad + wts->rect[px]*(scf*crad->rect[px] - bcrad);
372                                         */
373                                         crad->rect[px] = 0.5f*fabsf(aperture*(dof_sp*(cam_invfdist - iZ) - 1.f));
374                                         
375                                         // 'bug' #6615, limit minimum radius to 1 pixel, not really a solution, but somewhat mitigates the problem
376                                         crad->rect[px] = MAX2(crad->rect[px], 0.5f);
377                                         // if maxblur!=0, limit maximum
378                                         if (nqd->maxblur != 0.f) crad->rect[px] = MIN2(crad->rect[px], nqd->maxblur);
379                                 }
380                                 else crad->rect[px] = 0.f;
381                                 // clear weights for next part
382                                 wts->rect[px] = 0.f;
383                         }
384                         // esc set by main calling process
385                         if (node->exec & NODE_BREAK)
386                                 break;
387                 }
388         }
389
390         //------------------------------------------------------------------
391         // main loop
392 #ifndef __APPLE__ /* can crash on Mac, see bug #22856, disabled for now */
393 #ifdef __INTEL_COMPILER /* icc doesn't like the compound statement -- internal error: 0_1506 */
394         #pragma omp parallel for private(y) if (!nqd->preview) schedule(guided)
395 #else
396         #pragma omp parallel for private(y) if (!nqd->preview && img->y*img->x > 16384) schedule(guided)
397 #endif
398 #endif
399         for (y=0; y<img->y; y++) {
400                 unsigned int p, p4, zp, cp, cp4;
401                 float *ctcol, u, v, ct_crad, cR2=0;
402                 int x, sx, sy;
403
404                 // some sort of visual feedback would be nice, or at least this text in the renderwin header
405                 // but for now just print some info in the console every 8 scanlines.
406                 #pragma omp critical
407                 {
408                         if (((ydone & 7)==0) || (ydone==(img->y-1))) {
409                                 if (G.background==0) {
410                                         printf("\rdefocus: Processing Line %d of %d ... ", ydone+1, img->y);
411                                         fflush(stdout);
412                                 }
413                         }
414
415                         ydone++;
416                 }
417
418                 // esc set by main calling process. don't break because openmp doesn't
419                 // allow it, just continue and do nothing 
420                 if (node->exec & NODE_BREAK)
421                         continue;
422
423                 zp = y * img->x;
424                 for (x=0; x<img->x; x++) {
425                         cp = zp + x;
426                         cp4 = cp * img->type;
427
428                         // Circle of Confusion radius for current pixel
429                         cR2 = ct_crad = crad->rect[cp];
430                         // skip if zero (border render)
431                         if (ct_crad==0.f) {
432                                 // related to bug #5921, forgot output image when skipping 0 radius values
433                                 new->rect[cp4] = img->rect[cp4];
434                                 if (new->type != CB_VAL) {
435                                         new->rect[cp4+1] = img->rect[cp4+1];
436                                         new->rect[cp4+2] = img->rect[cp4+2];
437                                         new->rect[cp4+3] = img->rect[cp4+3];
438                                 }
439                                 continue;
440                         }
441                         cR2 *= cR2;
442                         
443                         // pixel color
444                         ctcol = &img->rect[cp4];
445                         
446                         if (!nqd->preview) {
447                                 int xs, xe, ys, ye;
448                                 float lwt, wtcol[4] = {0}, aacol[4] = {0};
449                                 float wt;
450
451                                 // shape weight
452                                 if (nqd->bktype==0)     // disk
453                                         wt = 1.f/((float)M_PI*cR2);
454                                 else
455                                         wt = 1.f/(cR2*bk_hn_theta);
456
457                                 // weighted color
458                                 wtcol[0] = wt*ctcol[0];
459                                 if (new->type != CB_VAL) {
460                                         wtcol[1] = wt*ctcol[1];
461                                         wtcol[2] = wt*ctcol[2];
462                                         wtcol[3] = wt*ctcol[3];
463                                 }
464
465                                 // macro for background blur overlap test
466                                 // unfortunately, since this is done per pixel,
467                                 // it has a very significant negative impact on processing time...
468                                 // (eg. aa disk blur without test: 112 sec, vs with test: 176 sec...)
469                                 // iff center blur radius > threshold
470                                 // and if overlap pixel in focus, do nothing, else add color/weigbt
471                                 // (threshold constant is dependent on amount of blur)
472                                 #define TESTBG1(c, w) {\
473                                         if (ct_crad > nqd->bthresh) {\
474                                                 if (crad->rect[p] > nqd->bthresh) {\
475                                                         new->rect[p] += c[0];\
476                                                         wts->rect[p] += w;\
477                                                 }\
478                                         }\
479                                         else {\
480                                                 new->rect[p] += c[0];\
481                                                 wts->rect[p] += w;\
482                                         }\
483                                 }
484                                 #define TESTBG4(c, w) {\
485                                         if (ct_crad > nqd->bthresh) {\
486                                                 if (crad->rect[p] > nqd->bthresh) {\
487                                                         new->rect[p4] += c[0];\
488                                                         new->rect[p4+1] += c[1];\
489                                                         new->rect[p4+2] += c[2];\
490                                                         new->rect[p4+3] += c[3];\
491                                                         wts->rect[p] += w;\
492                                                 }\
493                                         }\
494                                         else {\
495                                                 new->rect[p4] += c[0];\
496                                                 new->rect[p4+1] += c[1];\
497                                                 new->rect[p4+2] += c[2];\
498                                                 new->rect[p4+3] += c[3];\
499                                                 wts->rect[p] += w;\
500                                         }\
501                                 }
502                                 if (nqd->bktype == 0) {
503                                         // Disk
504                                         int _x, i, j, di;
505                                         float Dj, T;
506                                         // AA pixel
507                                         #define AAPIX(a, b) {\
508                                                 int _ny = b;\
509                                                 if ((_ny >= 0) && (_ny < new->y)) {\
510                                                         int _nx = a;\
511                                                         if ((_nx >=0) && (_nx < new->x)) {\
512                                                                 p = _ny*new->x + _nx;\
513                                                                 if (new->type==CB_VAL) {\
514                                                                         TESTBG1(aacol, lwt);\
515                                                                 }\
516                                                                 else {\
517                                                                         p4 = p * new->type;\
518                                                                         TESTBG4(aacol, lwt);\
519                                                                 }\
520                                                         }\
521                                                 }\
522                                         }
523                                         // circle scanline
524                                         #define CSCAN(a, b) {\
525                                                 int _ny = y + b;\
526                                                 if ((_ny >= 0) && (_ny < new->y)) {\
527                                                         xs = x - a + 1;\
528                                                         if (xs < 0) xs = 0;\
529                                                         xe = x + a;\
530                                                         if (xe > new->x) xe = new->x;\
531                                                         p = _ny*new->x + xs;\
532                                                         if (new->type==CB_VAL) {\
533                                                                 for (_x=xs; _x<xe; _x++, p++) TESTBG1(wtcol, wt);\
534                                                         }\
535                                                         else {\
536                                                                 p4 = p * new->type;\
537                                                                 for (_x=xs; _x<xe; _x++, p++, p4+=new->type) TESTBG4(wtcol, wt);\
538                                                         }\
539                                                 }\
540                                         }
541
542                                         i = ceil(ct_crad);
543                                         j = 0;
544                                         T = 0;
545                                         while (i > j) {
546                                                 Dj = sqrt(cR2 - j*j);
547                                                 Dj -= floorf(Dj);
548                                                 di = 0;
549                                                 if (Dj > T) { i--;  di = 1; }
550                                                 T = Dj;
551                                                 aacol[0] = wtcol[0]*Dj;
552                                                 if (new->type != CB_VAL) {
553                                                         aacol[1] = wtcol[1]*Dj;
554                                                         aacol[2] = wtcol[2]*Dj;
555                                                         aacol[3] = wtcol[3]*Dj;
556                                                 }
557                                                 lwt = wt*Dj;
558                                                 if (i!=j) {
559                                                         // outer pixels
560                                                         AAPIX(x+j, y+i)
561                                                         AAPIX(x+j, y-i)
562                                                         if (j) {
563                                                                 AAPIX(x-j, y+i) // BL
564                                                                 AAPIX(x-j, y-i) // TL
565                                                         }
566                                                         if (di) { // only when i changed, interior of outer section
567                                                                 CSCAN(j, i) // bottom
568                                                                 CSCAN(j, -i) // top
569                                                         }
570                                                 }
571                                                 // lower mid section
572                                                 AAPIX(x+i, y+j)
573                                                 if (i) AAPIX(x-i, y+j)
574                                                 CSCAN(i, j)
575                                                 // upper mid section
576                                                 if (j) {
577                                                         AAPIX(x+i, y-j)
578                                                         if (i) AAPIX(x-i, y-j)
579                                                         CSCAN(i, -j)
580                                                 }
581                                                 j++;
582                                         }
583                                         #undef CSCAN
584                                         #undef AAPIX
585                                 }
586                                 else {
587                                         // n-agonal
588                                         int ov, nv;
589                                         float mind, maxd, lwt;
590                                         ys = MAX2((int)floor(bkh_b[2]*ct_crad + y), 0);
591                                         ye = MIN2((int)ceil(bkh_b[3]*ct_crad + y), new->y - 1);
592                                         for (sy=ys; sy<=ye; sy++) {
593                                                 float fxs = 1e10f, fxe = -1e10f;
594                                                 float yf = (sy - y)/ct_crad;
595                                                 int found = 0;
596                                                 ov = len_bkh - 1;
597                                                 mind = maxd = 0;
598                                                 for (nv=0; nv<len_bkh; nv++) {
599                                                         if ((BKH[nv].max_y >= yf) && (BKH[nv].min_y <= yf)) {
600                                                                 float tx = BKH[ov].x0 + BKH[nv].ls_x*(yf - BKH[ov].y0);
601                                                                 if (tx < fxs) { fxs = tx;  mind = BKH[nv].ls_x; }
602                                                                 if (tx > fxe) { fxe = tx;  maxd = BKH[nv].ls_x; }
603                                                                 if (++found == 2) break;
604                                                         }
605                                                         ov = nv;
606                                                 }
607                                                 if (found) {
608                                                         fxs = fxs*ct_crad + x;
609                                                         fxe = fxe*ct_crad + x;
610                                                         xs = (int)floor(fxs), xe = (int)ceil(fxe);
611                                                         // AA hack for first and last x pixel, near vertical edges only
612                                                         if (fabsf(mind) <= 1.f) {
613                                                                 if ((xs >= 0) && (xs < new->x)) {
614                                                                         lwt = 1.f-(fxs - xs);
615                                                                         aacol[0] = wtcol[0]*lwt;
616                                                                         p = xs + sy*new->x;
617                                                                         if (new->type==CB_VAL) {
618                                                                                 lwt *= wt;
619                                                                                 TESTBG1(aacol, lwt);
620                                                                         }
621                                                                         else {
622                                                                                 p4 = p * new->type;
623                                                                                 aacol[1] = wtcol[1]*lwt;
624                                                                                 aacol[2] = wtcol[2]*lwt;
625                                                                                 aacol[3] = wtcol[3]*lwt;
626                                                                                 lwt *= wt;
627                                                                                 TESTBG4(aacol, lwt);
628                                                                         }
629                                                                 }
630                                                         }
631                                                         if (fabsf(maxd) <= 1.f) {
632                                                                 if ((xe >= 0) && (xe < new->x)) {
633                                                                         lwt = 1.f-(xe - fxe);
634                                                                         aacol[0] = wtcol[0]*lwt;
635                                                                         p = xe + sy*new->x;
636                                                                         if (new->type==CB_VAL) {
637                                                                                 lwt *= wt;
638                                                                                 TESTBG1(aacol, lwt);
639                                                                         }
640                                                                         else {
641                                                                                 p4 = p * new->type;
642                                                                                 aacol[1] = wtcol[1]*lwt;
643                                                                                 aacol[2] = wtcol[2]*lwt;
644                                                                                 aacol[3] = wtcol[3]*lwt;
645                                                                                 lwt *= wt;
646                                                                                 TESTBG4(aacol, lwt);
647                                                                         }
648                                                                 }
649                                                         }
650                                                         xs = MAX2(xs+1, 0);
651                                                         xe = MIN2(xe, new->x);
652                                                         // remaining interior scanline
653                                                         p = sy*new->x + xs;
654                                                         if (new->type==CB_VAL) {
655                                                                 for (sx=xs; sx<xe; sx++, p++) TESTBG1(wtcol, wt);
656                                                         }
657                                                         else {
658                                                                 p4 = p * new->type;
659                                                                 for (sx=xs; sx<xe; sx++, p++, p4+=new->type) TESTBG4(wtcol, wt);
660                                                         }
661                                                 }
662                                         }
663
664                                         // now traverse in opposite direction, y scanlines,
665                                         // but this time only draw the near horizontal edges,
666                                         // applying same AA hack as above
667                                         xs = MAX2((int)floor(bkh_b[0]*ct_crad + x), 0);
668                                         xe = MIN2((int)ceil(bkh_b[1]*ct_crad + x), img->x - 1);
669                                         for (sx=xs; sx<=xe; sx++) {
670                                                 float xf = (sx - x)/ct_crad;
671                                                 float fys = 1e10f, fye = -1e10f;
672                                                 int found = 0;
673                                                 ov = len_bkh - 1;
674                                                 mind = maxd = 0;
675                                                 for (nv=0; nv<len_bkh; nv++) {
676                                                         if ((BKH[nv].max_x >= xf) && (BKH[nv].min_x <= xf)) {
677                                                                 float ty = BKH[ov].y0 + BKH[nv].ls_y*(xf - BKH[ov].x0);
678                                                                 if (ty < fys) { fys = ty;  mind = BKH[nv].ls_y; }
679                                                                 if (ty > fye) { fye = ty;  maxd = BKH[nv].ls_y; }
680                                                                 if (++found == 2) break;
681                                                         }
682                                                         ov = nv;
683                                                 }
684                                                 if (found) {
685                                                         fys = fys*ct_crad + y;
686                                                         fye = fye*ct_crad + y;
687                                                         // near horizontal edges only, line slope <= 1
688                                                         if (fabsf(mind) <= 1.f) {
689                                                                 int iys = (int)floor(fys);
690                                                                 if ((iys >= 0) && (iys < new->y)) {
691                                                                         lwt = 1.f - (fys - iys);
692                                                                         aacol[0] = wtcol[0]*lwt;
693                                                                         p = sx + iys*new->x;
694                                                                         if (new->type==CB_VAL) {
695                                                                                 lwt *= wt;
696                                                                                 TESTBG1(aacol, lwt);
697                                                                         }
698                                                                         else {
699                                                                                 p4 = p * new->type;
700                                                                                 aacol[1] = wtcol[1]*lwt;
701                                                                                 aacol[2] = wtcol[2]*lwt;
702                                                                                 aacol[3] = wtcol[3]*lwt;
703                                                                                 lwt *= wt;
704                                                                                 TESTBG4(aacol, lwt);
705                                                                         }
706                                                                 }
707                                                         }
708                                                         if (fabsf(maxd) <= 1.f) {
709                                                                 int iye = ceil(fye);
710                                                                 if ((iye >= 0) && (iye < new->y)) {
711                                                                         lwt = 1.f - (iye - fye);
712                                                                         aacol[0] = wtcol[0]*lwt;
713                                                                         p = sx + iye*new->x;
714                                                                         if (new->type==CB_VAL) {
715                                                                                 lwt *= wt;
716                                                                                 TESTBG1(aacol, lwt);
717                                                                         }
718                                                                         else {
719                                                                                 p4 = p * new->type;
720                                                                                 aacol[1] = wtcol[1]*lwt;
721                                                                                 aacol[2] = wtcol[2]*lwt;
722                                                                                 aacol[3] = wtcol[3]*lwt;
723                                                                                 lwt *= wt;
724                                                                                 TESTBG4(aacol, lwt);
725                                                                         }
726                                                                 }
727                                                         }
728                                                 }
729                                         }
730
731                                 }
732                                 #undef TESTBG4
733                                 #undef TESTBG1
734
735                         }
736                         else {
737                                 // sampled, simple rejection sampling here, good enough
738                                 unsigned int maxsam, s, ui = BLI_rand()*BLI_rand();
739                                 float wcor, cpr = BLI_frand(), lwt;
740                                 if (no_zbuf)
741                                         maxsam = nqd->samples;  // no zbuffer input, use sample value directly
742                                 else {
743                                         // depth adaptive sampling hack, the more out of focus, the more samples taken, 16 minimum.
744                                         maxsam = (int)(0.5f + nqd->samples*(1.f-(float)exp(-fabs(zbuf->rect[cp] - cam_fdist))));
745                                         if (maxsam < 16) maxsam = 16;
746                                 }
747                                 wcor = 1.f/(float)maxsam;
748                                 for (s=0; s<maxsam; ++s) {
749                                         u = ct_crad*(2.f*RI_vdC(s, ui) - 1.f);
750                                         v = ct_crad*(2.f*(s + cpr)/(float)maxsam - 1.f);
751                                         sx = (int)(x + u + 0.5f), sy = (int)(y + v + 0.5f);
752                                         if ((sx<0) || (sx >= new->x) || (sy<0) || (sy >= new->y)) continue;
753                                         p = sx + sy*new->x;
754                                         p4 = p * new->type;
755                                         if (nqd->bktype==0)     // Disk
756                                                 lwt = ((u*u + v*v)<=cR2) ? wcor : 0.f;
757                                         else  /* AA not needed here */
758                                                 lwt = wcor * getWeight(BKH, len_bkh, u, v, ct_crad, inradsq);
759                                         // prevent background bleeding onto in-focus pixels, user-option
760                                         if (ct_crad > nqd->bthresh) {  // if center blur > threshold
761                                                 if (crad->rect[p] > nqd->bthresh) { // if overlap pixel in focus, do nothing, else add color/weigbt
762                                                         new->rect[p4] += ctcol[0] * lwt;
763                                                         if (new->type != CB_VAL) {
764                                                                 new->rect[p4+1] += ctcol[1] * lwt;
765                                                                 new->rect[p4+2] += ctcol[2] * lwt;
766                                                                 new->rect[p4+3] += ctcol[3] * lwt;
767                                                         }
768                                                         wts->rect[p] += lwt;
769                                                 }
770                                         }
771                                         else {
772                                                 new->rect[p4] += ctcol[0] * lwt;
773                                                 if (new->type != CB_VAL) {
774                                                         new->rect[p4+1] += ctcol[1] * lwt;
775                                                         new->rect[p4+2] += ctcol[2] * lwt;
776                                                         new->rect[p4+3] += ctcol[3] * lwt;
777                                                 }
778                                                 wts->rect[p] += lwt;
779                                         }
780                                 }
781                         }
782
783                 }
784         }
785         
786         // finally, normalize
787         for (y=0; y<new->y; y++) {
788                 unsigned int p = y * new->x;
789                 unsigned int p4 = p * new->type;
790                 int x;
791
792                 for (x=0; x<new->x; x++) {
793                         float dv = (wts->rect[p]==0.f) ? 1.f : (1.f/wts->rect[p]);
794                         new->rect[p4] *= dv;
795                         if (new->type!=CB_VAL) {
796                                 new->rect[p4+1] *= dv;
797                                 new->rect[p4+2] *= dv;
798                                 new->rect[p4+3] *= dv;
799                         }
800                         p++;
801                         p4 += new->type;
802                 }
803         }
804
805         free_compbuf(crad);
806         free_compbuf(wts);
807         
808         printf("Done\n");
809 }
810
811
812 static void node_composit_exec_defocus(void *UNUSED(data), bNode *node, bNodeStack **in, bNodeStack **out)
813 {
814         CompBuf *new, *old, *zbuf_use = NULL, *img = in[0]->data, *zbuf = in[1]->data;
815         NodeDefocus *nqd = node->storage;
816         int no_zbuf = nqd->no_zbuf;
817         
818         if ((img==NULL) || (out[0]->hasoutput==0)) return;
819         
820         // if image not valid type or fstop==infinite (128), nothing to do, pass in to out
821         if (((img->type!=CB_RGBA) && (img->type!=CB_VAL)) || ((no_zbuf==0) && (nqd->fstop==128.f))) {
822                 out[0]->data = pass_on_compbuf(img);
823                 return;
824         }
825         
826         if (zbuf!=NULL) {
827                 // Zbuf input, check to make sure, single channel, same size
828                 // doesn't have to be actual zbuffer, but must be value type
829                 if ((zbuf->x != img->x) || (zbuf->y != img->y)) {
830                         // could do a scale here instead...
831                         printf("Z input must be same size as image !\n");
832                         return;
833                 }
834                 zbuf_use = typecheck_compbuf(zbuf, CB_VAL);
835         }
836         else no_zbuf = 1;       // no zbuffer input
837                 
838         // ok, process
839         old = img;
840         if (nqd->gamco) {
841                 // gamma correct, blender func is simplified, fixed value & RGBA only,
842                 // should make user param. also depremul and premul afterwards, gamma
843                 // correction can't work with premul alpha
844                 old = dupalloc_compbuf(img);
845                 premul_compbuf(old, 1);
846                 gamma_correct_compbuf(old, 0);
847                 premul_compbuf(old, 0);
848         }
849         
850         new = alloc_compbuf(old->x, old->y, old->type, 1);
851         defocus_blur(node, new, old, zbuf_use, in[1]->vec[0]*nqd->scale, no_zbuf);
852         
853         if (nqd->gamco) {
854                 premul_compbuf(new, 1);
855                 gamma_correct_compbuf(new, 1);
856                 premul_compbuf(new, 0);
857                 free_compbuf(old);
858         }
859         if (node->exec & NODE_BREAK) {
860                 free_compbuf(new);
861                 new= NULL;
862         }       
863         out[0]->data = new;
864         if (zbuf_use && (zbuf_use != zbuf)) free_compbuf(zbuf_use);
865 }
866
867 static void node_composit_init_defocus(bNodeTree *UNUSED(ntree), bNode* node, bNodeTemplate *UNUSED(ntemp))
868 {
869         /* qdn: defocus node */
870         NodeDefocus *nbd = MEM_callocN(sizeof(NodeDefocus), "node defocus data");
871         nbd->bktype = 0;
872         nbd->rotation = 0.0f;
873         nbd->preview = 1;
874         nbd->gamco = 0;
875         nbd->samples = 16;
876         nbd->fstop = 128.f;
877         nbd->maxblur = 16;
878         nbd->bthresh = 1.f;
879         nbd->scale = 1.f;
880         nbd->no_zbuf = 1;
881         node->storage = nbd;
882 }
883
884 void register_node_type_cmp_defocus(bNodeTreeType *ttype)
885 {
886         static bNodeType ntype;
887
888         node_type_base(ttype, &ntype, CMP_NODE_DEFOCUS, "Defocus", NODE_CLASS_OP_FILTER, NODE_OPTIONS);
889         node_type_socket_templates(&ntype, cmp_node_defocus_in, cmp_node_defocus_out);
890         node_type_size(&ntype, 150, 120, 200);
891         node_type_init(&ntype, node_composit_init_defocus);
892         node_type_storage(&ntype, "NodeDefocus", node_free_standard_storage, node_copy_standard_storage);
893         node_type_exec(&ntype, node_composit_exec_defocus);
894
895         nodeRegisterType(ttype, &ntype);
896 }