7830eef829cc2c850a6236ed0deb424fcb069100
[blender.git] / source / blender / compositor / operations / COM_FastGaussianBlurOperation.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * Contributor: 
19  *              Jeroen Bakker 
20  *              Monique Dewanchand
21  */
22
23 #include "COM_FastGaussianBlurOperation.h"
24 #include "MEM_guardedalloc.h"
25 #include "BLI_utildefines.h"
26
27 FastGaussianBlurOperation::FastGaussianBlurOperation() : BlurBaseOperation()
28 {
29         this->iirgaus = NULL;
30 }
31
32 void FastGaussianBlurOperation::executePixel(float *color, int x, int y, MemoryBuffer *inputBuffers[], void *data)
33 {
34         MemoryBuffer *newData = (MemoryBuffer *)data;
35         newData->read(color, x, y);     
36 }
37
38 bool FastGaussianBlurOperation::determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output)
39 {
40         rcti newInput;
41         rcti sizeInput;
42         sizeInput.xmin = 0;
43         sizeInput.ymin = 0;
44         sizeInput.xmax = 5;
45         sizeInput.ymax = 5;
46         
47         NodeOperation *operation = this->getInputOperation(1);
48         if (operation->determineDependingAreaOfInterest(&sizeInput, readOperation, output)) {
49                 return true;
50         }
51         else {
52                 if (this->iirgaus) {
53                         return false;
54                 }
55                 else {
56                         newInput.xmin = 0;
57                         newInput.ymin = 0;
58                         newInput.xmax = this->getWidth();
59                         newInput.ymax = this->getHeight();
60                 }
61                 return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
62         }
63 }
64
65 void FastGaussianBlurOperation::initExecution()
66 {
67         BlurBaseOperation::initExecution();
68         BlurBaseOperation::initMutex();
69 }
70
71 void FastGaussianBlurOperation::deinitExecution() 
72 {
73         if (this->iirgaus) {
74                 delete this->iirgaus;
75                 this->iirgaus = NULL;
76         }
77         BlurBaseOperation::deinitMutex();       
78 }
79
80 void *FastGaussianBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers)
81 {
82         lockMutex();
83         if (!iirgaus) {
84                 MemoryBuffer *newBuf = (MemoryBuffer *)this->inputProgram->initializeTileData(rect, memoryBuffers);
85                 MemoryBuffer *copy = newBuf->duplicate();
86                 updateSize(memoryBuffers);
87                 
88                 int c;
89                 sx = data->sizex * this->size / 2.0f;
90                 sy = data->sizey * this->size / 2.0f;
91                 
92                 if ((sx == sy) && (sx > 0.f)) {
93                         for (c = 0; c < COM_NUMBER_OF_CHANNELS; ++c)
94                                 IIR_gauss(copy, sx, c, 3);
95                 }
96                 else {
97                         if (sx > 0.f) {
98                                 for (c = 0; c < COM_NUMBER_OF_CHANNELS; ++c)
99                                         IIR_gauss(copy, sx, c, 1);
100                         }
101                         if (sy > 0.f) {
102                                 for (c = 0; c < COM_NUMBER_OF_CHANNELS; ++c)
103                                         IIR_gauss(copy, sy, c, 2);
104                         }
105                 }
106                 this->iirgaus = copy;
107         }
108         unlockMutex();
109         return iirgaus;
110 }
111
112 void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, int chan, int xy)
113 {
114         double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
115         double *X, *Y, *W;
116         int i, x, y, sz;
117         float *buffer = src->getBuffer();
118         
119         // <0.5 not valid, though can have a possibly useful sort of sharpening effect
120         if (sigma < 0.5f) return;
121         
122         if ((xy < 1) || (xy > 3)) xy = 3;
123         
124         // XXX The YVV macro defined below explicitly expects sources of at least 3x3 pixels,
125         //     so just skiping blur along faulty direction if src's def is below that limit!
126         if (src->getWidth() < 3) xy &= ~(int) 1;
127         if (src->getHeight() < 3) xy &= ~(int) 2;
128         if (xy < 1) return;
129         
130         // see "Recursive Gabor Filtering" by Young/VanVliet
131         // all factors here in double.prec. Required, because for single.prec it seems to blow up if sigma > ~200
132         if (sigma >= 3.556f)
133                 q = 0.9804f * (sigma - 3.556f) + 2.5091f;
134         else // sigma >= 0.5
135                 q = (0.0561f * sigma + 0.5784f) * sigma - 0.2568f;
136         q2 = q * q;
137         sc = (1.1668 + q) * (3.203729649  + (2.21566 + q) * q);
138         // no gabor filtering here, so no complex multiplies, just the regular coefs.
139         // all negated here, so as not to have to recalc Triggs/Sdika matrix
140         cf[1] = q * (5.788961737 + (6.76492 + 3.0 * q) * q) / sc;
141         cf[2] = -q2 * (3.38246 + 3.0 * q) / sc;
142         // 0 & 3 unchanged
143         cf[3] = q2 * q / sc;
144         cf[0] = 1.0 - cf[1] - cf[2] - cf[3];
145         
146         // Triggs/Sdika border corrections,
147         // it seems to work, not entirely sure if it is actually totally correct,
148         // Besides J.M.Geusebroek's anigauss.c (see http://www.science.uva.nl/~mark),
149         // found one other implementation by Cristoph Lampert,
150         // but neither seem to be quite the same, result seems to be ok so far anyway.
151         // Extra scale factor here to not have to do it in filter,
152         // though maybe this had something to with the precision errors
153         sc = cf[0] / ((1.0 + cf[1] - cf[2] + cf[3]) * (1.0 - cf[1] - cf[2] - cf[3]) * (1.0 + cf[2] + (cf[1] - cf[3]) * cf[3]));
154         tsM[0] = sc * (-cf[3] * cf[1] + 1.0 - cf[3] * cf[3] - cf[2]);
155         tsM[1] = sc * ((cf[3] + cf[1]) * (cf[2] + cf[3] * cf[1]));
156         tsM[2] = sc * (cf[3] * (cf[1] + cf[3] * cf[2]));
157         tsM[3] = sc * (cf[1] + cf[3] * cf[2]);
158         tsM[4] = sc * (-(cf[2] - 1.0) * (cf[2] + cf[3] * cf[1]));
159         tsM[5] = sc * (-(cf[3] * cf[1] + cf[3] * cf[3] + cf[2] - 1.0) * cf[3]);
160         tsM[6] = sc * (cf[3] * cf[1] + cf[2] + cf[1] * cf[1] - cf[2] * cf[2]);
161         tsM[7] = sc * (cf[1] * cf[2] + cf[3] * cf[2] * cf[2] - cf[1] * cf[3] * cf[3] - cf[3] * cf[3] * cf[3] - cf[3] * cf[2] + cf[3]);
162         tsM[8] = sc * (cf[3] * (cf[1] + cf[3] * cf[2]));
163         
164 #define YVV(L)                                                                          \
165 {                                                                                       \
166         W[0] = cf[0] * X[0] + cf[1] * X[0] + cf[2] * X[0] + cf[3] * X[0];                   \
167         W[1] = cf[0] * X[1] + cf[1] * W[0] + cf[2] * X[0] + cf[3] * X[0];                   \
168         W[2] = cf[0] * X[2] + cf[1] * W[1] + cf[2] * W[0] + cf[3] * X[0];                   \
169         for (i = 3; i < L; i++) {                                                           \
170                 W[i] = cf[0] * X[i] + cf[1] * W[i - 1] + cf[2] * W[i - 2] + cf[3] * W[i - 3];   \
171         }                                                                                   \
172         tsu[0] = W[L - 1] - X[L - 1];                                                       \
173         tsu[1] = W[L - 2] - X[L - 1];                                                       \
174         tsu[2] = W[L - 3] - X[L - 1];                                                       \
175         tsv[0] = tsM[0] * tsu[0] + tsM[1] * tsu[1] + tsM[2] * tsu[2] + X[L - 1];            \
176         tsv[1] = tsM[3] * tsu[0] + tsM[4] * tsu[1] + tsM[5] * tsu[2] + X[L - 1];            \
177         tsv[2] = tsM[6] * tsu[0] + tsM[7] * tsu[1] + tsM[8] * tsu[2] + X[L - 1];            \
178         Y[L - 1] = cf[0] * W[L - 1] + cf[1] * tsv[0] + cf[2] * tsv[1] + cf[3] * tsv[2];     \
179         Y[L - 2] = cf[0] * W[L - 2] + cf[1] * Y[L - 1] + cf[2] * tsv[0] + cf[3] * tsv[1];   \
180         Y[L - 3] = cf[0] * W[L - 3] + cf[1] * Y[L - 2] + cf[2] * Y[L - 1] + cf[3] * tsv[0]; \
181         for (i = L - 4; i >= 0; i--) {                                                      \
182                 Y[i] = cf[0] * W[i] + cf[1] * Y[i + 1] + cf[2] * Y[i + 2] + cf[3] * Y[i + 3];   \
183         }                                                                                   \
184 } (void)0
185         
186         // intermediate buffers
187         sz = MAX2(src->getWidth(), src->getHeight());
188         X = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss X buf");
189         Y = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss Y buf");
190         W = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss W buf");
191         if (xy & 1) {   // H
192                 for (y = 0; y < src->getHeight(); ++y) {
193                         const int yx = y * src->getWidth();
194                         for (x = 0; x < src->getWidth(); ++x)
195                                 X[x] = buffer[(x + yx) * COM_NUMBER_OF_CHANNELS + chan];
196                         YVV(src->getWidth());
197                         for (x = 0; x < src->getWidth(); ++x)
198                                 buffer[(x + yx) * COM_NUMBER_OF_CHANNELS + chan] = Y[x];
199                 }
200         }
201         if (xy & 2) {   // V
202                 for (x = 0; x < src->getWidth(); ++x) {
203                         for (y = 0; y < src->getHeight(); ++y)
204                                 X[y] = buffer[(x + y * src->getWidth()) * COM_NUMBER_OF_CHANNELS + chan];
205                         YVV(src->getHeight());
206                         for (y = 0; y < src->getHeight(); ++y)
207                                 buffer[(x + y * src->getWidth()) * COM_NUMBER_OF_CHANNELS + chan] = Y[y];
208                 }
209         }
210         
211         MEM_freeN(X);
212         MEM_freeN(W);
213         MEM_freeN(Y);
214 #undef YVV
215         
216 }