aec1e1387c3765ae5acd9851e01aea0f1fd9d6f2
[blender.git] / source / blender / compositor / operations / COM_FastGaussianBlurOperation.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * Contributor: 
19  *              Jeroen Bakker 
20  *              Monique Dewanchand
21  */
22
23 #include <limits.h>
24
25 #include "COM_FastGaussianBlurOperation.h"
26 #include "MEM_guardedalloc.h"
27 #include "BLI_utildefines.h"
28
29 FastGaussianBlurOperation::FastGaussianBlurOperation() : BlurBaseOperation(COM_DT_COLOR)
30 {
31         this->m_iirgaus = NULL;
32         this->m_chunksize = 256;
33 }
34
35 void FastGaussianBlurOperation::executePixel(float output[4], int x, int y, void *data)
36 {
37         MemoryBuffer *newData = (MemoryBuffer *)data;
38         newData->read(output, x, y);
39 }
40
41 // Calculate the depending area of interest. This depends on the
42 // size of the blur operation; if the blur is large it is faster
43 // to just calculate the whole image at once.
44 // Returns true if the area is just a tile and false if it is
45 // the whole image.
46 bool FastGaussianBlurOperation::getDAI(rcti *rect, rcti *output)
47 {
48         // m_data->sizex * m_size should be enough? For some reason there
49         // seem to be errors in the boundary between tiles.
50         int sx = this->m_data->sizex * this->m_size * 2;
51         if (sx < 1)
52                 sx = 1;
53         int sy = this->m_data->sizey * this->m_size * 2;
54         if (sy < 1)
55                 sy = 1;
56
57         if (sx >= this->m_chunksize || sy >= this->m_chunksize) {
58                 output->xmin = 0;
59                 output->xmax = this->getWidth();
60                 output->ymin = 0;
61                 output->ymax = this->getHeight();
62                 return false;
63         }
64         else {
65                 output->xmin = rect->xmin - sx - 1;
66                 output->xmax = rect->xmax + sx + 1;
67                 output->ymin = rect->ymin - sy - 1;
68                 output->ymax = rect->ymax + sy + 1;
69                 return true;
70         }
71 }
72
73 bool FastGaussianBlurOperation::determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output)
74 {
75         rcti newInput;
76         
77         if (!this->m_sizeavailable) {
78                 rcti sizeInput;
79                 sizeInput.xmin = 0;
80                 sizeInput.ymin = 0;
81                 sizeInput.xmax = 5;
82                 sizeInput.ymax = 5;
83                 NodeOperation *operation = this->getInputOperation(1);
84                 if (operation->determineDependingAreaOfInterest(&sizeInput, readOperation, output)) {
85                         return true;
86                 }
87         }
88         {
89                 if (this->m_sizeavailable) {
90                         getDAI(input, &newInput);
91                 }
92                 else {
93                         newInput.xmin = 0;
94                         newInput.ymin = 0;
95                         newInput.xmax = this->getWidth();
96                         newInput.ymax = this->getHeight();
97                 }
98                 return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
99         }
100 }
101
102 void FastGaussianBlurOperation::initExecution()
103 {
104         BlurBaseOperation::initExecution();
105         BlurBaseOperation::initMutex();
106 }
107
108 void FastGaussianBlurOperation::deinitExecution() 
109 {
110         if (this->m_iirgaus) {
111                 delete this->m_iirgaus;
112                 this->m_iirgaus = NULL;
113         }
114         BlurBaseOperation::deinitMutex();
115 }
116
117 void *FastGaussianBlurOperation::initializeTileData(rcti *rect)
118 {
119 /*      lockMutex();
120     if (!this->m_iirgaus) {
121         MemoryBuffer *newBuf = (MemoryBuffer *)this->m_inputProgram->initializeTileData(rect);
122                 MemoryBuffer *copy = newBuf->duplicate();
123                 updateSize();
124
125                 int c;
126                 this->m_sx = this->m_data->sizex * this->m_size / 2.0f;
127                 this->m_sy = this->m_data->sizey * this->m_size / 2.0f;
128                 
129                 if ((this->m_sx == this->m_sy) && (this->m_sx > 0.f)) {
130                         for (c = 0; c < COM_NUMBER_OF_CHANNELS; ++c)
131                                 IIR_gauss(copy, this->m_sx, c, 3);
132                 }
133                 else {
134                         if (this->m_sx > 0.0f) {
135                                 for (c = 0; c < COM_NUMBER_OF_CHANNELS; ++c)
136                                         IIR_gauss(copy, this->m_sx, c, 1);
137                         }
138                         if (this->m_sy > 0.0f) {
139                                 for (c = 0; c < COM_NUMBER_OF_CHANNELS; ++c)
140                                         IIR_gauss(copy, this->m_sy, c, 2);
141                         }
142                 }
143                 this->m_iirgaus = copy;
144         }
145         unlockMutex();
146         return this->m_iirgaus;
147 }*/
148
149         lockMutex();
150         if (this->m_iirgaus) {
151                 // if this->m_iirgaus is set, we don't do tile rendering, so
152                 // we can return the already calculated cache
153                 unlockMutex();
154                 return this->m_iirgaus;
155         }
156         updateSize();
157         rcti dai;
158         bool use_tiles = getDAI(rect, &dai);
159         if (use_tiles) {
160                 unlockMutex();
161         }
162  
163         MemoryBuffer *buffer = (MemoryBuffer *)this->m_inputProgram->initializeTileData(NULL);
164         rcti *buf_rect = buffer->getRect();
165
166         dai.xmin = max(dai.xmin, buf_rect->xmin);
167         dai.xmax = min(dai.xmax, buf_rect->xmax);
168         dai.ymin = max(dai.ymin, buf_rect->ymin);
169         dai.ymax = min(dai.ymax, buf_rect->ymax);
170
171         MemoryBuffer *tile = new MemoryBuffer(NULL, &dai);
172         tile->copyContentFrom(buffer);
173
174         int c;
175         float sx = this->m_data->sizex * this->m_size / 2.0f;
176         float sy = this->m_data->sizey * this->m_size / 2.0f;
177
178         if ((sx == sy) && (sx > 0.f)) {
179                 for (c = 0; c < COM_NUMBER_OF_CHANNELS; ++c)
180                         IIR_gauss(tile, sx, c, 3);
181         }
182         else {
183                 if (sx > 0.0f) {
184                         for (c = 0; c < COM_NUMBER_OF_CHANNELS; ++c)
185                                 IIR_gauss(tile, sx, c, 1);
186                 }
187                 if (sy > 0.0f) {
188                         for (c = 0; c < COM_NUMBER_OF_CHANNELS; ++c)
189                                 IIR_gauss(tile, sy, c, 2);
190                 }
191         }
192         if (!use_tiles) {
193                 this->m_iirgaus = tile;
194                 unlockMutex();
195         }
196         return tile;
197 }
198
199 void FastGaussianBlurOperation::deinitializeTileData(rcti *rect, void *data)
200 {
201         if (!this->m_iirgaus && data) {
202                 MemoryBuffer *tile = (MemoryBuffer *)data;
203                 delete tile;
204         }
205 }
206
207
208 void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, unsigned int chan, unsigned int xy)
209 {
210         double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
211         double *X, *Y, *W;
212         const unsigned int src_width = src->getWidth();
213         const unsigned int src_height = src->getHeight();
214         unsigned int x, y, sz;
215         unsigned int i;
216         float *buffer = src->getBuffer();
217         
218         // <0.5 not valid, though can have a possibly useful sort of sharpening effect
219         if (sigma < 0.5f) return;
220         
221         if ((xy < 1) || (xy > 3)) xy = 3;
222         
223         // XXX The YVV macro defined below explicitly expects sources of at least 3x3 pixels,
224         //     so just skiping blur along faulty direction if src's def is below that limit!
225         if (src_width < 3) xy &= ~1;
226         if (src_height < 3) xy &= ~2;
227         if (xy < 1) return;
228         
229         // see "Recursive Gabor Filtering" by Young/VanVliet
230         // all factors here in double.prec. Required, because for single.prec it seems to blow up if sigma > ~200
231         if (sigma >= 3.556f)
232                 q = 0.9804f * (sigma - 3.556f) + 2.5091f;
233         else // sigma >= 0.5
234                 q = (0.0561f * sigma + 0.5784f) * sigma - 0.2568f;
235         q2 = q * q;
236         sc = (1.1668 + q) * (3.203729649  + (2.21566 + q) * q);
237         // no gabor filtering here, so no complex multiplies, just the regular coefs.
238         // all negated here, so as not to have to recalc Triggs/Sdika matrix
239         cf[1] = q * (5.788961737 + (6.76492 + 3.0 * q) * q) / sc;
240         cf[2] = -q2 * (3.38246 + 3.0 * q) / sc;
241         // 0 & 3 unchanged
242         cf[3] = q2 * q / sc;
243         cf[0] = 1.0 - cf[1] - cf[2] - cf[3];
244         
245         // Triggs/Sdika border corrections,
246         // it seems to work, not entirely sure if it is actually totally correct,
247         // Besides J.M.Geusebroek's anigauss.c (see http://www.science.uva.nl/~mark),
248         // found one other implementation by Cristoph Lampert,
249         // but neither seem to be quite the same, result seems to be ok so far anyway.
250         // Extra scale factor here to not have to do it in filter,
251         // though maybe this had something to with the precision errors
252         sc = cf[0] / ((1.0 + cf[1] - cf[2] + cf[3]) * (1.0 - cf[1] - cf[2] - cf[3]) * (1.0 + cf[2] + (cf[1] - cf[3]) * cf[3]));
253         tsM[0] = sc * (-cf[3] * cf[1] + 1.0 - cf[3] * cf[3] - cf[2]);
254         tsM[1] = sc * ((cf[3] + cf[1]) * (cf[2] + cf[3] * cf[1]));
255         tsM[2] = sc * (cf[3] * (cf[1] + cf[3] * cf[2]));
256         tsM[3] = sc * (cf[1] + cf[3] * cf[2]);
257         tsM[4] = sc * (-(cf[2] - 1.0) * (cf[2] + cf[3] * cf[1]));
258         tsM[5] = sc * (-(cf[3] * cf[1] + cf[3] * cf[3] + cf[2] - 1.0) * cf[3]);
259         tsM[6] = sc * (cf[3] * cf[1] + cf[2] + cf[1] * cf[1] - cf[2] * cf[2]);
260         tsM[7] = sc * (cf[1] * cf[2] + cf[3] * cf[2] * cf[2] - cf[1] * cf[3] * cf[3] - cf[3] * cf[3] * cf[3] - cf[3] * cf[2] + cf[3]);
261         tsM[8] = sc * (cf[3] * (cf[1] + cf[3] * cf[2]));
262         
263 #define YVV(L)                                                                          \
264 {                                                                                       \
265         W[0] = cf[0] * X[0] + cf[1] * X[0] + cf[2] * X[0] + cf[3] * X[0];                   \
266         W[1] = cf[0] * X[1] + cf[1] * W[0] + cf[2] * X[0] + cf[3] * X[0];                   \
267         W[2] = cf[0] * X[2] + cf[1] * W[1] + cf[2] * W[0] + cf[3] * X[0];                   \
268         for (i = 3; i < L; i++) {                                                           \
269                 W[i] = cf[0] * X[i] + cf[1] * W[i - 1] + cf[2] * W[i - 2] + cf[3] * W[i - 3];   \
270         }                                                                                   \
271         tsu[0] = W[L - 1] - X[L - 1];                                                       \
272         tsu[1] = W[L - 2] - X[L - 1];                                                       \
273         tsu[2] = W[L - 3] - X[L - 1];                                                       \
274         tsv[0] = tsM[0] * tsu[0] + tsM[1] * tsu[1] + tsM[2] * tsu[2] + X[L - 1];            \
275         tsv[1] = tsM[3] * tsu[0] + tsM[4] * tsu[1] + tsM[5] * tsu[2] + X[L - 1];            \
276         tsv[2] = tsM[6] * tsu[0] + tsM[7] * tsu[1] + tsM[8] * tsu[2] + X[L - 1];            \
277         Y[L - 1] = cf[0] * W[L - 1] + cf[1] * tsv[0] + cf[2] * tsv[1] + cf[3] * tsv[2];     \
278         Y[L - 2] = cf[0] * W[L - 2] + cf[1] * Y[L - 1] + cf[2] * tsv[0] + cf[3] * tsv[1];   \
279         Y[L - 3] = cf[0] * W[L - 3] + cf[1] * Y[L - 2] + cf[2] * Y[L - 1] + cf[3] * tsv[0]; \
280         /* 'i != UINT_MAX' is really 'i >= 0', but necessary for unsigned int wrapping */   \
281         for (i = L - 4; i != UINT_MAX; i--) {                                               \
282                 Y[i] = cf[0] * W[i] + cf[1] * Y[i + 1] + cf[2] * Y[i + 2] + cf[3] * Y[i + 3];   \
283         }                                                                                   \
284 } (void)0
285         
286         // intermediate buffers
287         sz = max(src_width, src_height);
288         X = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss X buf");
289         Y = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss Y buf");
290         W = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss W buf");
291     if (xy & 1) {   // H
292         int offset;
293                 for (y = 0; y < src_height; ++y) {
294             const int yx = y * src_width;
295             offset = yx*COM_NUMBER_OF_CHANNELS + chan;
296             for (x = 0; x < src_width; ++x) {
297                 X[x] = buffer[offset];
298                 offset += COM_NUMBER_OF_CHANNELS;
299             }
300                         YVV(src_width);
301             offset = yx*COM_NUMBER_OF_CHANNELS + chan;
302             for (x = 0; x < src_width; ++x) {
303                 buffer[offset] = Y[x];
304                 offset += COM_NUMBER_OF_CHANNELS;
305             }
306         }
307         }
308     if (xy & 2) {   // V
309         int offset;
310         const int add = src_width * COM_NUMBER_OF_CHANNELS;
311
312         for (x = 0; x < src_width; ++x) {
313             offset = x * COM_NUMBER_OF_CHANNELS + chan;
314             for (y = 0; y < src_height; ++y) {
315                 X[y] = buffer[offset];
316                 offset += add;
317             }
318                         YVV(src_height);
319             offset = x * COM_NUMBER_OF_CHANNELS + chan;
320             for (y = 0; y < src_height; ++y) {
321                 buffer[offset] = Y[y];
322                 offset += add;
323             }
324         }
325     }
326         
327         MEM_freeN(X);
328         MEM_freeN(W);
329         MEM_freeN(Y);
330 #undef YVV
331         
332 }
333
334
335 ///
336 FastGaussianBlurValueOperation::FastGaussianBlurValueOperation() : NodeOperation()
337 {
338         this->addInputSocket(COM_DT_VALUE);
339         this->addOutputSocket(COM_DT_VALUE);
340         this->m_iirgaus = NULL;
341         this->m_inputprogram = NULL;
342         this->m_sigma = 1.0f;
343         this->m_overlay = 0;
344         setComplex(true);
345 }
346
347 void FastGaussianBlurValueOperation::executePixel(float output[4], int x, int y, void *data)
348 {
349         MemoryBuffer *newData = (MemoryBuffer *)data;
350         newData->read(output, x, y);
351 }
352
353 bool FastGaussianBlurValueOperation::determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output)
354 {
355         rcti newInput;
356         
357         if (this->m_iirgaus) {
358                 return false;
359         }
360         else {
361                 newInput.xmin = 0;
362                 newInput.ymin = 0;
363                 newInput.xmax = this->getWidth();
364                 newInput.ymax = this->getHeight();
365         }
366         return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
367 }
368
369 void FastGaussianBlurValueOperation::initExecution()
370 {
371         this->m_inputprogram = getInputSocketReader(0);
372         initMutex();
373 }
374
375 void FastGaussianBlurValueOperation::deinitExecution() 
376 {
377         if (this->m_iirgaus) {
378                 delete this->m_iirgaus;
379                 this->m_iirgaus = NULL;
380         }
381         deinitMutex();
382 }
383
384 void *FastGaussianBlurValueOperation::initializeTileData(rcti *rect)
385 {
386         lockMutex();
387         if (!this->m_iirgaus) {
388                 MemoryBuffer *newBuf = (MemoryBuffer *)this->m_inputprogram->initializeTileData(rect);
389                 MemoryBuffer *copy = newBuf->duplicate();
390                 FastGaussianBlurOperation::IIR_gauss(copy, this->m_sigma, 0, 3);
391
392                 if (this->m_overlay == FAST_GAUSS_OVERLAY_MIN) {
393                         float *src = newBuf->getBuffer();
394                         float *dst = copy->getBuffer();
395                         for (int i = copy->getWidth() * copy->getHeight(); i != 0; i--, src += COM_NUMBER_OF_CHANNELS, dst += COM_NUMBER_OF_CHANNELS) {
396                                 if (*src < *dst) {
397                                         *dst = *src;
398                                 }
399                         }
400                 }
401                 else if (this->m_overlay == FAST_GAUSS_OVERLAY_MAX) {
402                         float *src = newBuf->getBuffer();
403                         float *dst = copy->getBuffer();
404                         for (int i = copy->getWidth() * copy->getHeight(); i != 0; i--, src += COM_NUMBER_OF_CHANNELS, dst += COM_NUMBER_OF_CHANNELS) {
405                                 if (*src > *dst) {
406                                         *dst = *src;
407                                 }
408                         }
409                 }
410
411 //              newBuf->
412
413                 this->m_iirgaus = copy;
414         }
415         unlockMutex();
416         return this->m_iirgaus;
417 }
418