4 * ***** BEGIN GPL LICENSE BLOCK *****
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * The Original Code is Copyright (C) 2006 Blender Foundation.
21 * All rights reserved.
23 * The Original Code is: all of this file.
25 * Contributor(s): none yet.
27 * ***** END GPL LICENSE BLOCK *****
32 CompBuf *alloc_compbuf(int sizex, int sizey, int type, int alloc)
34 CompBuf *cbuf= MEM_callocN(sizeof(CompBuf), "compbuf");
43 if(cbuf->type==CB_RGBA)
44 cbuf->rect= MEM_mapallocN(4*sizeof(float)*sizex*sizey, "compbuf RGBA rect");
45 else if(cbuf->type==CB_VEC3)
46 cbuf->rect= MEM_mapallocN(3*sizeof(float)*sizex*sizey, "compbuf Vector3 rect");
47 else if(cbuf->type==CB_VEC2)
48 cbuf->rect= MEM_mapallocN(2*sizeof(float)*sizex*sizey, "compbuf Vector2 rect");
50 cbuf->rect= MEM_mapallocN(sizeof(float)*sizex*sizey, "compbuf Fac rect");
53 cbuf->disprect.xmin= 0;
54 cbuf->disprect.ymin= 0;
55 cbuf->disprect.xmax= sizex;
56 cbuf->disprect.ymax= sizey;
61 CompBuf *dupalloc_compbuf(CompBuf *cbuf)
63 CompBuf *dupbuf= alloc_compbuf(cbuf->x, cbuf->y, cbuf->type, 1);
65 memcpy(dupbuf->rect, cbuf->rect, cbuf->type*sizeof(float)*cbuf->x*cbuf->y);
67 dupbuf->xof= cbuf->xof;
68 dupbuf->yof= cbuf->yof;
73 /* instead of reference counting, we create a list */
74 CompBuf *pass_on_compbuf(CompBuf *cbuf)
76 CompBuf *dupbuf= (cbuf)? alloc_compbuf(cbuf->x, cbuf->y, cbuf->type, 0): NULL;
80 dupbuf->rect= cbuf->rect;
81 dupbuf->xof= cbuf->xof;
82 dupbuf->yof= cbuf->yof;
85 /* get last buffer in list, and append dupbuf */
86 for(lastbuf= cbuf; lastbuf; lastbuf= lastbuf->next)
87 if(lastbuf->next==NULL)
89 lastbuf->next= dupbuf;
90 dupbuf->prev= lastbuf;
96 void free_compbuf(CompBuf *cbuf)
98 /* check referencing, then remove from list and set malloc tag */
99 if(cbuf->prev || cbuf->next) {
101 cbuf->prev->next= cbuf->next;
103 cbuf->next->prev= cbuf->prev;
106 cbuf->prev->malloc= 1;
108 cbuf->next->malloc= 1;
113 if(cbuf->malloc && cbuf->rect)
114 MEM_freeN(cbuf->rect);
119 void print_compbuf(char *str, CompBuf *cbuf)
121 printf("Compbuf %s %d %d %p\n", str, cbuf->x, cbuf->y, cbuf->rect);
125 void compbuf_set_node(CompBuf *cbuf, bNode *node)
127 if (cbuf) cbuf->node = node;
130 /* used for disabling node (similar code in drawnode.c for disable line) */
131 void node_compo_pass_on(bNode *node, bNodeStack **nsin, bNodeStack **nsout)
133 CompBuf *valbuf= NULL, *colbuf= NULL, *vecbuf= NULL;
137 /* connect the first value buffer in with first value out */
138 /* connect the first RGBA buffer in with first RGBA out */
140 /* test the inputs */
141 for(a=0, sock= node->inputs.first; sock; sock= sock->next, a++) {
143 CompBuf *cbuf= nsin[a]->data;
144 if(cbuf->type==1 && valbuf==NULL) valbuf= cbuf;
145 if(cbuf->type==3 && vecbuf==NULL) vecbuf= cbuf;
146 if(cbuf->type==4 && colbuf==NULL) colbuf= cbuf;
151 if(valbuf || colbuf || vecbuf) {
152 for(a=0, sock= node->outputs.first; sock; sock= sock->next, a++) {
153 if(nsout[a]->hasoutput) {
154 if(sock->type==SOCK_VALUE && valbuf) {
155 nsout[a]->data= pass_on_compbuf(valbuf);
158 if(sock->type==SOCK_VECTOR && vecbuf) {
159 nsout[a]->data= pass_on_compbuf(vecbuf);
162 if(sock->type==SOCK_RGBA && colbuf) {
163 nsout[a]->data= pass_on_compbuf(colbuf);
172 CompBuf *get_cropped_compbuf(rcti *drect, float *rectf, int rectx, int recty, int type)
175 rcti disprect= *drect;
179 if(disprect.xmax>rectx) disprect.xmax= rectx;
180 if(disprect.ymax>recty) disprect.ymax= recty;
181 if(disprect.xmin>= disprect.xmax) return NULL;
182 if(disprect.ymin>= disprect.ymax) return NULL;
184 cbuf= alloc_compbuf(disprect.xmax-disprect.xmin, disprect.ymax-disprect.ymin, type, 1);
186 rectf += type*(disprect.ymin*rectx + disprect.xmin);
188 for(y=cbuf->y; y>0; y--, outfp+=dx, rectf+=type*rectx)
189 memcpy(outfp, rectf, sizeof(float)*dx);
194 CompBuf *scalefast_compbuf(CompBuf *inbuf, int newx, int newy)
197 float *rectf, *newrectf, *rf;
198 int x, y, c, pixsize= inbuf->type;
199 int ofsx, ofsy, stepx, stepy;
201 if(inbuf->x==newx && inbuf->y==newy)
202 return dupalloc_compbuf(inbuf);
204 outbuf= alloc_compbuf(newx, newy, inbuf->type, 1);
205 newrectf= outbuf->rect;
207 stepx = (65536.0 * (inbuf->x - 1.0) / (newx - 1.0)) + 0.5;
208 stepy = (65536.0 * (inbuf->y - 1.0) / (newy - 1.0)) + 0.5;
211 for (y = newy; y > 0 ; y--){
213 rectf += pixsize * (ofsy >> 16) * inbuf->x;
218 for (x = newx ; x>0 ; x--) {
220 rf= rectf + pixsize*(ofsx >> 16);
221 for(c=0; c<pixsize; c++)
233 void typecheck_compbuf_color(float *out, float *in, int outtype, int intype)
235 if(intype == outtype) {
236 memcpy(out, in, sizeof(float)*outtype);
238 else if(outtype==CB_VAL) {
239 if(intype==CB_VEC2) {
240 *out= 0.5f*(in[0]+in[1]);
242 else if(intype==CB_VEC3) {
243 *out= 0.333333f*(in[0]+in[1]+in[2]);
245 else if(intype==CB_RGBA) {
246 *out= in[0]*0.35f + in[1]*0.45f + in[2]*0.2f;
249 else if(outtype==CB_VEC2) {
254 else if(intype==CB_VEC3) {
258 else if(intype==CB_RGBA) {
263 else if(outtype==CB_VEC3) {
269 else if(intype==CB_VEC2) {
274 else if(intype==CB_RGBA) {
280 else if(outtype==CB_RGBA) {
287 else if(intype==CB_VEC2) {
293 else if(intype==CB_VEC3) {
302 CompBuf *typecheck_compbuf(CompBuf *inbuf, int type)
304 if(inbuf && inbuf->type!=type) {
309 outbuf= alloc_compbuf(inbuf->x, inbuf->y, type, 1);
311 /* warning note: xof and yof are applied in pixelprocessor, but should be copied otherwise? */
312 outbuf->xof= inbuf->xof;
313 outbuf->yof= inbuf->yof;
315 if(inbuf->rect_procedural) {
316 outbuf->rect_procedural= inbuf->rect_procedural;
317 VECCOPY(outbuf->procedural_size, inbuf->procedural_size);
318 VECCOPY(outbuf->procedural_offset, inbuf->procedural_offset);
319 outbuf->procedural_type= inbuf->procedural_type;
320 outbuf->node= inbuf->node;
326 x= inbuf->x*inbuf->y;
329 if(inbuf->type==CB_VEC2) {
330 for(; x>0; x--, outrf+= 1, inrf+= 2)
331 *outrf= 0.5f*(inrf[0]+inrf[1]);
333 else if(inbuf->type==CB_VEC3) {
334 for(; x>0; x--, outrf+= 1, inrf+= 3)
335 *outrf= 0.333333f*(inrf[0]+inrf[1]+inrf[2]);
337 else if(inbuf->type==CB_RGBA) {
338 for(; x>0; x--, outrf+= 1, inrf+= 4)
339 *outrf= inrf[0]*0.35f + inrf[1]*0.45f + inrf[2]*0.2f;
342 else if(type==CB_VEC2) {
343 if(inbuf->type==CB_VAL) {
344 for(; x>0; x--, outrf+= 2, inrf+= 1) {
349 else if(inbuf->type==CB_VEC3) {
350 for(; x>0; x--, outrf+= 2, inrf+= 3) {
355 else if(inbuf->type==CB_RGBA) {
356 for(; x>0; x--, outrf+= 2, inrf+= 4) {
362 else if(type==CB_VEC3) {
363 if(inbuf->type==CB_VAL) {
364 for(; x>0; x--, outrf+= 3, inrf+= 1) {
370 else if(inbuf->type==CB_VEC2) {
371 for(; x>0; x--, outrf+= 3, inrf+= 2) {
377 else if(inbuf->type==CB_RGBA) {
378 for(; x>0; x--, outrf+= 3, inrf+= 4) {
385 else if(type==CB_RGBA) {
386 if(inbuf->type==CB_VAL) {
387 for(; x>0; x--, outrf+= 4, inrf+= 1) {
394 else if(inbuf->type==CB_VEC2) {
395 for(; x>0; x--, outrf+= 4, inrf+= 2) {
402 else if(inbuf->type==CB_VEC3) {
403 for(; x>0; x--, outrf+= 4, inrf+= 3) {
417 float *compbuf_get_pixel(CompBuf *cbuf, float *rectf, int x, int y, int xrad, int yrad)
420 if(cbuf->rect_procedural) {
421 cbuf->rect_procedural(cbuf, rectf, (float)x/(float)xrad, (float)y/(float)yrad);
425 static float col[4]= {0.0f, 0.0f, 0.0f, 0.0f};
431 if(y<-cbuf->yrad || y>= -cbuf->yrad+cbuf->y) return col;
432 if(x<-cbuf->xrad || x>= -cbuf->xrad+cbuf->x) return col;
434 return cbuf->rect + cbuf->type*( (cbuf->yrad+y)*cbuf->x + (cbuf->xrad+x) );
440 /* **************************************************** */
442 /* Pixel-to-Pixel operation, 1 Image in, 1 out */
443 void composit1_pixel_processor(bNode *node, CompBuf *out, CompBuf *src_buf, float *src_col,
444 void (*func)(bNode *, float *, float *),
448 float *outfp=out->rect, *srcfp;
449 int xrad, yrad, x, y;
451 src_use= typecheck_compbuf(src_buf, src_type);
456 for(y= -yrad; y<-yrad+out->y; y++) {
457 for(x= -xrad; x<-xrad+out->x; x++, outfp+=out->type) {
458 srcfp= compbuf_get_pixel(src_use, src_col, x, y, xrad, yrad);
459 func(node, outfp, srcfp);
464 free_compbuf(src_use);
467 /* Pixel-to-Pixel operation, 2 Images in, 1 out */
468 void composit2_pixel_processor(bNode *node, CompBuf *out, CompBuf *src_buf, float *src_col,
469 CompBuf *fac_buf, float *fac, void (*func)(bNode *, float *, float *, float *),
470 int src_type, int fac_type)
472 CompBuf *src_use, *fac_use;
473 float *outfp=out->rect, *srcfp, *facfp;
474 int xrad, yrad, x, y;
476 src_use= typecheck_compbuf(src_buf, src_type);
477 fac_use= typecheck_compbuf(fac_buf, fac_type);
482 for(y= -yrad; y<-yrad+out->y; y++) {
483 for(x= -xrad; x<-xrad+out->x; x++, outfp+=out->type) {
484 srcfp= compbuf_get_pixel(src_use, src_col, x, y, xrad, yrad);
485 facfp= compbuf_get_pixel(fac_use, fac, x, y, xrad, yrad);
487 func(node, outfp, srcfp, facfp);
491 free_compbuf(src_use);
493 free_compbuf(fac_use);
496 /* Pixel-to-Pixel operation, 3 Images in, 1 out */
497 void composit3_pixel_processor(bNode *node, CompBuf *out, CompBuf *src1_buf, float *src1_col, CompBuf *src2_buf, float *src2_col,
498 CompBuf *fac_buf, float *fac, void (*func)(bNode *, float *, float *, float *, float *),
499 int src1_type, int src2_type, int fac_type)
501 CompBuf *src1_use, *src2_use, *fac_use;
502 float *outfp=out->rect, *src1fp, *src2fp, *facfp;
503 int xrad, yrad, x, y;
505 src1_use= typecheck_compbuf(src1_buf, src1_type);
506 src2_use= typecheck_compbuf(src2_buf, src2_type);
507 fac_use= typecheck_compbuf(fac_buf, fac_type);
512 for(y= -yrad; y<-yrad+out->y; y++) {
513 for(x= -xrad; x<-xrad+out->x; x++, outfp+=out->type) {
514 src1fp= compbuf_get_pixel(src1_use, src1_col, x, y, xrad, yrad);
515 src2fp= compbuf_get_pixel(src2_use, src2_col, x, y, xrad, yrad);
516 facfp= compbuf_get_pixel(fac_use, fac, x, y, xrad, yrad);
518 func(node, outfp, src1fp, src2fp, facfp);
522 if(src1_use!=src1_buf)
523 free_compbuf(src1_use);
524 if(src2_use!=src2_buf)
525 free_compbuf(src2_use);
527 free_compbuf(fac_use);
530 /* Pixel-to-Pixel operation, 4 Images in, 1 out */
531 void composit4_pixel_processor(bNode *node, CompBuf *out, CompBuf *src1_buf, float *src1_col, CompBuf *fac1_buf, float *fac1,
532 CompBuf *src2_buf, float *src2_col, CompBuf *fac2_buf, float *fac2,
533 void (*func)(bNode *, float *, float *, float *, float *, float *),
534 int src1_type, int fac1_type, int src2_type, int fac2_type)
536 CompBuf *src1_use, *src2_use, *fac1_use, *fac2_use;
537 float *outfp=out->rect, *src1fp, *src2fp, *fac1fp, *fac2fp;
538 int xrad, yrad, x, y;
540 src1_use= typecheck_compbuf(src1_buf, src1_type);
541 src2_use= typecheck_compbuf(src2_buf, src2_type);
542 fac1_use= typecheck_compbuf(fac1_buf, fac1_type);
543 fac2_use= typecheck_compbuf(fac2_buf, fac2_type);
548 for(y= -yrad; y<-yrad+out->y; y++) {
549 for(x= -xrad; x<-xrad+out->x; x++, outfp+=out->type) {
550 src1fp= compbuf_get_pixel(src1_use, src1_col, x, y, xrad, yrad);
551 src2fp= compbuf_get_pixel(src2_use, src2_col, x, y, xrad, yrad);
552 fac1fp= compbuf_get_pixel(fac1_use, fac1, x, y, xrad, yrad);
553 fac2fp= compbuf_get_pixel(fac2_use, fac2, x, y, xrad, yrad);
555 func(node, outfp, src1fp, fac1fp, src2fp, fac2fp);
559 if(src1_use!=src1_buf)
560 free_compbuf(src1_use);
561 if(src2_use!=src2_buf)
562 free_compbuf(src2_use);
563 if(fac1_use!=fac1_buf)
564 free_compbuf(fac1_use);
565 if(fac2_use!=fac2_buf)
566 free_compbuf(fac2_use);
570 CompBuf *valbuf_from_rgbabuf(CompBuf *cbuf, int channel)
572 CompBuf *valbuf= alloc_compbuf(cbuf->x, cbuf->y, CB_VAL, 1);
576 /* warning note: xof and yof are applied in pixelprocessor, but should be copied otherwise? */
577 valbuf->xof= cbuf->xof;
578 valbuf->yof= cbuf->yof;
582 /* defaults to returning alpha channel */
583 if ((channel < CHAN_R) || (channel > CHAN_A)) channel = CHAN_A;
585 rectf= cbuf->rect + channel;
587 for(tot= cbuf->x*cbuf->y; tot>0; tot--, valf++, rectf+=4)
593 static CompBuf *generate_procedural_preview(CompBuf *cbuf, int newx, int newy)
597 int xrad, yrad, x, y;
599 outbuf= alloc_compbuf(newx, newy, CB_RGBA, 1);
605 for(y= -yrad; y<-yrad+outbuf->y; y++)
606 for(x= -xrad; x<-xrad+outbuf->x; x++, outfp+=outbuf->type)
607 cbuf->rect_procedural(cbuf, outfp, (float)x/(float)xrad, (float)y/(float)yrad);
612 void generate_preview(void *data, bNode *node, CompBuf *stackbuf)
614 RenderData *rd= data;
615 bNodePreview *preview= node->preview;
617 int color_manage= rd->color_mgt_flag & R_COLOR_MANAGEMENT;
620 if(preview && stackbuf) {
621 CompBuf *cbuf, *stackbuf_use;
623 if(stackbuf->rect==NULL && stackbuf->rect_procedural==NULL) return;
625 stackbuf_use= typecheck_compbuf(stackbuf, CB_RGBA);
627 if(stackbuf->x > stackbuf->y) {
629 ysize= (140*stackbuf->y)/stackbuf->x;
633 xsize= (140*stackbuf->x)/stackbuf->y;
636 if(stackbuf_use->rect_procedural)
637 cbuf= generate_procedural_preview(stackbuf_use, xsize, ysize);
639 cbuf= scalefast_compbuf(stackbuf_use, xsize, ysize);
641 /* convert to byte for preview */
642 rect= MEM_callocN(sizeof(unsigned char)*4*xsize*ysize, "bNodePreview.rect");
645 floatbuf_to_srgb_byte(cbuf->rect, rect, 0, xsize, 0, ysize, xsize);
647 floatbuf_to_byte(cbuf->rect, rect, 0, xsize, 0, ysize, xsize);
650 if(stackbuf_use!=stackbuf)
651 free_compbuf(stackbuf_use);
653 BLI_lock_thread(LOCK_PREVIEW);
656 MEM_freeN(preview->rect);
657 preview->xsize= xsize;
658 preview->ysize= ysize;
661 BLI_unlock_thread(LOCK_PREVIEW);
665 void do_rgba_to_yuva(bNode *node, float *out, float *in)
667 rgb_to_yuv(in[0],in[1],in[2], &out[0], &out[1], &out[2]);
671 void do_rgba_to_hsva(bNode *node, float *out, float *in)
673 rgb_to_hsv(in[0],in[1],in[2], &out[0], &out[1], &out[2]);
677 void do_rgba_to_ycca(bNode *node, float *out, float *in)
679 rgb_to_ycc(in[0],in[1],in[2], &out[0], &out[1], &out[2], BLI_YCC_ITU_BT601);
683 void do_yuva_to_rgba(bNode *node, float *out, float *in)
685 yuv_to_rgb(in[0],in[1],in[2], &out[0], &out[1], &out[2]);
689 void do_hsva_to_rgba(bNode *node, float *out, float *in)
691 hsv_to_rgb(in[0],in[1],in[2], &out[0], &out[1], &out[2]);
695 void do_ycca_to_rgba(bNode *node, float *out, float *in)
697 ycc_to_rgb(in[0],in[1],in[2], &out[0], &out[1], &out[2], BLI_YCC_ITU_BT601);
701 void do_copy_rgba(bNode *node, float *out, float *in)
706 void do_copy_rgb(bNode *node, float *out, float *in)
712 void do_copy_value(bNode *node, float *out, float *in)
717 void do_copy_a_rgba(bNode *node, float *out, float *in, float *fac)
723 /* only accepts RGBA buffers */
724 void gamma_correct_compbuf(CompBuf *img, int inversed)
729 if(img->type!=CB_RGBA) return;
733 for(x=img->x*img->y; x>0; x--, drect+=4) {
734 if(drect[0]>0.0f) drect[0]= sqrt(drect[0]); else drect[0]= 0.0f;
735 if(drect[1]>0.0f) drect[1]= sqrt(drect[1]); else drect[1]= 0.0f;
736 if(drect[2]>0.0f) drect[2]= sqrt(drect[2]); else drect[2]= 0.0f;
740 for(x=img->x*img->y; x>0; x--, drect+=4) {
741 if(drect[0]>0.0f) drect[0]*= drect[0]; else drect[0]= 0.0f;
742 if(drect[1]>0.0f) drect[1]*= drect[1]; else drect[1]= 0.0f;
743 if(drect[2]>0.0f) drect[2]*= drect[2]; else drect[2]= 0.0f;
748 void premul_compbuf(CompBuf *img, int inversed)
753 if(img->type!=CB_RGBA) return;
757 for(x=img->x*img->y; x>0; x--, drect+=4) {
758 if(fabs(drect[3]) < 1e-5f) {
764 drect[0] /= drect[3];
765 drect[1] /= drect[3];
766 drect[2] /= drect[3];
771 for(x=img->x*img->y; x>0; x--, drect+=4) {
772 drect[0] *= drect[3];
773 drect[1] *= drect[3];
774 drect[2] *= drect[3];
782 * 2D Fast Hartley Transform, used for convolution
787 // returns next highest power of 2 of x, as well it's log2 in L2
788 static unsigned int nextPow2(unsigned int x, unsigned int* L2)
790 unsigned int pw, x_notpow2 = x & (x-1);
792 while (x>>=1) ++(*L2);
794 if (x_notpow2) { (*L2)++; pw<<=1; }
798 //------------------------------------------------------------------------------
800 // from FXT library by Joerg Arndt, faster in order bitreversal
801 // use: r = revbin_upd(r, h) where h = N>>1
802 static unsigned int revbin_upd(unsigned int r, unsigned int h)
804 while (!((r^=h)&h)) h >>= 1;
807 //------------------------------------------------------------------------------
808 static void FHT(fREAL* data, unsigned int M, unsigned int inverse)
810 double tt, fc, dc, fs, ds, a = M_PI;
812 int n2, bd, bl, istep, k, len = 1 << M, n = 1;
815 unsigned int Nh = len >> 1;
816 for (i=1;i<(len-1);++i) {
817 j = revbin_upd(j, Nh);
826 fREAL* data_n = &data[n];
829 for (k=0; k<len; k+=istep) {
831 data_n[k] = data[k] - t1;
838 fs = ds = sqrt(1.0 - fc*fc); //sin(a);
840 for (bl=1; bl<n2; bl++) {
841 fREAL* data_nbd = &data_n[bd];
842 fREAL* data_bd = &data[bd];
843 for (k=bl; k<len; k+=istep) {
844 t1 = fc*data_n[k] + fs*data_nbd[k];
845 t2 = fs*data_n[k] - fc*data_nbd[k];
846 data_n[k] = data[k] - t1;
847 data_nbd[k] = data_bd[k] - t2;
859 for (k=n2; k<len; k+=istep) {
861 data_n[k] = data[k] - t1;
871 fREAL sc = (fREAL)1 / (fREAL)len;
872 for (k=0; k<len; ++k)
876 //------------------------------------------------------------------------------
877 /* 2D Fast Hartley Transform, Mx/My -> log2 of width/height,
878 nzp -> the row where zero pad data starts,
879 inverse -> see above */
880 static void FHT2D(fREAL *data, unsigned int Mx, unsigned int My,
881 unsigned int nzp, unsigned int inverse)
883 unsigned int i, j, Nx, Ny, maxy;
889 // rows (forward transform skips 0 pad data)
890 maxy = inverse ? Ny : nzp;
891 for (j=0; j<maxy; ++j)
892 FHT(&data[Nx*j], Mx, inverse);
895 if (Nx==Ny) { // square
897 for (i=j+1; i<Nx; ++i) {
898 unsigned int op = i + (j << Mx), np = j + (i << My);
899 t=data[op], data[op]=data[np], data[np]=t;
902 else { // rectangular
903 unsigned int k, Nym = Ny-1, stm = 1 << (Mx + My);
904 for (i=0; stm>0; i++) {
905 #define pred(k) (((k & Nym) << Mx) + (k >> My))
906 for (j=pred(i); j>i; j=pred(j));
908 for (k=i, j=pred(i); j!=i; k=j, j=pred(j), stm--)
909 { t=data[j], data[j]=data[k], data[k]=t; }
914 // swap Mx/My & Nx/Ny
915 i = Nx, Nx = Ny, Ny = i;
916 i = Mx, Mx = My, My = i;
918 // now columns == transposed rows
920 FHT(&data[Nx*j], Mx, inverse);
923 for (j=0; j<=(Ny >> 1); j++) {
924 unsigned int jm = (Ny - j) & (Ny-1);
925 unsigned int ji = j << Mx;
926 unsigned int jmi = jm << Mx;
927 for (i=0; i<=(Nx >> 1); i++) {
928 unsigned int im = (Nx - i) & (Nx-1);
929 fREAL A = data[ji + i];
930 fREAL B = data[jmi + i];
931 fREAL C = data[ji + im];
932 fREAL D = data[jmi + im];
933 fREAL E = (fREAL)0.5*((A + D) - (B + C));
934 data[ji + i] = A - E;
935 data[jmi + i] = B + E;
936 data[ji + im] = C + E;
937 data[jmi + im] = D - E;
943 //------------------------------------------------------------------------------
945 /* 2D convolution calc, d1 *= d2, M/N - > log2 of width/height */
946 static void fht_convolve(fREAL* d1, fREAL* d2, unsigned int M, unsigned int N)
949 unsigned int i, j, k, L, mj, mL;
950 unsigned int m = 1 << M, n = 1 << N;
951 unsigned int m2 = 1 << (M-1), n2 = 1 << (N-1);
952 unsigned int mn2 = m << (N-1);
957 d1[m2 + mn2] *= d2[m2 + mn2];
958 for (i=1; i<m2; i++) {
960 a = d1[i]*d2[i] - d1[k]*d2[k];
961 b = d1[k]*d2[i] + d1[i]*d2[k];
962 d1[i] = (b + a)*(fREAL)0.5;
963 d1[k] = (b - a)*(fREAL)0.5;
964 a = d1[i + mn2]*d2[i + mn2] - d1[k + mn2]*d2[k + mn2];
965 b = d1[k + mn2]*d2[i + mn2] + d1[i + mn2]*d2[k + mn2];
966 d1[i + mn2] = (b + a)*(fREAL)0.5;
967 d1[k + mn2] = (b - a)*(fREAL)0.5;
969 for (j=1; j<n2; j++) {
973 a = d1[mj]*d2[mj] - d1[mL]*d2[mL];
974 b = d1[mL]*d2[mj] + d1[mj]*d2[mL];
975 d1[mj] = (b + a)*(fREAL)0.5;
976 d1[mL] = (b - a)*(fREAL)0.5;
977 a = d1[m2 + mj]*d2[m2 + mj] - d1[m2 + mL]*d2[m2 + mL];
978 b = d1[m2 + mL]*d2[m2 + mj] + d1[m2 + mj]*d2[m2 + mL];
979 d1[m2 + mj] = (b + a)*(fREAL)0.5;
980 d1[m2 + mL] = (b - a)*(fREAL)0.5;
982 for (i=1; i<m2; i++) {
984 for (j=1; j<n2; j++) {
988 a = d1[i + mj]*d2[i + mj] - d1[k + mL]*d2[k + mL];
989 b = d1[k + mL]*d2[i + mj] + d1[i + mj]*d2[k + mL];
990 d1[i + mj] = (b + a)*(fREAL)0.5;
991 d1[k + mL] = (b - a)*(fREAL)0.5;
992 a = d1[i + mL]*d2[i + mL] - d1[k + mj]*d2[k + mj];
993 b = d1[k + mj]*d2[i + mL] + d1[i + mL]*d2[k + mj];
994 d1[i + mL] = (b + a)*(fREAL)0.5;
995 d1[k + mj] = (b - a)*(fREAL)0.5;
1000 //------------------------------------------------------------------------------
1002 void convolve(CompBuf* dst, CompBuf* in1, CompBuf* in2)
1004 fREAL *data1, *data2, *fp;
1005 unsigned int w2, h2, hw, hh, log2_w, log2_h;
1008 int xbl, ybl, nxb, nyb, xbsz, ybsz;
1011 CompBuf* rdst = alloc_compbuf(in1->x, in1->y, in1->type, 1);
1013 // convolution result width & height
1016 // FFT pow2 required size & log2
1017 w2 = nextPow2(w2, &log2_w);
1018 h2 = nextPow2(h2, &log2_h);
1021 data1 = (fREAL*)MEM_callocN(3*w2*h2*sizeof(fREAL), "convolve_fast FHT data1");
1022 data2 = (fREAL*)MEM_callocN(w2*h2*sizeof(fREAL), "convolve_fast FHT data2");
1024 // normalize convolutor
1025 wt[0] = wt[1] = wt[2] = 0.f;
1026 for (y=0; y<in2->y; y++) {
1027 colp = (fRGB*)&in2->rect[y*in2->x*in2->type];
1028 for (x=0; x<in2->x; x++)
1029 fRGB_add(wt, colp[x]);
1031 if (wt[0] != 0.f) wt[0] = 1.f/wt[0];
1032 if (wt[1] != 0.f) wt[1] = 1.f/wt[1];
1033 if (wt[2] != 0.f) wt[2] = 1.f/wt[2];
1034 for (y=0; y<in2->y; y++) {
1035 colp = (fRGB*)&in2->rect[y*in2->x*in2->type];
1036 for (x=0; x<in2->x; x++)
1037 fRGB_colormult(colp[x], wt);
1040 // copy image data, unpacking interleaved RGBA into separate channels
1041 // only need to calc data1 once
1043 // block add-overlap
1046 xbsz = (w2 + 1) - in2->x;
1047 ybsz = (h2 + 1) - in2->y;
1048 nxb = in1->x / xbsz;
1049 if (in1->x % xbsz) nxb++;
1050 nyb = in1->y / ybsz;
1051 if (in1->y % ybsz) nyb++;
1052 for (ybl=0; ybl<nyb; ybl++) {
1053 for (xbl=0; xbl<nxb; xbl++) {
1055 // each channel one by one
1056 for (ch=0; ch<3; ch++) {
1057 fREAL* data1ch = &data1[ch*w2*h2];
1059 // only need to calc fht data from in2 once, can re-use for every block
1061 // in2, channel ch -> data1
1062 for (y=0; y<in2->y; y++) {
1063 fp = &data1ch[y*w2];
1064 colp = (fRGB*)&in2->rect[y*in2->x*in2->type];
1065 for (x=0; x<in2->x; x++)
1066 fp[x] = colp[x][ch];
1070 // in1, channel ch -> data2
1071 memset(data2, 0, w2*h2*sizeof(fREAL));
1072 for (y=0; y<ybsz; y++) {
1073 int yy = ybl*ybsz + y;
1074 if (yy >= in1->y) continue;
1076 colp = (fRGB*)&in1->rect[yy*in1->x*in1->type];
1077 for (x=0; x<xbsz; x++) {
1078 int xx = xbl*xbsz + x;
1079 if (xx >= in1->x) continue;
1080 fp[x] = colp[xx][ch];
1085 // zero pad data start is different for each == height+1
1086 if (!in2done) FHT2D(data1ch, log2_w, log2_h, in2->y+1, 0);
1087 FHT2D(data2, log2_w, log2_h, in2->y+1, 0);
1089 // FHT2D transposed data, row/col now swapped
1090 // convolve & inverse FHT
1091 fht_convolve(data2, data1ch, log2_h, log2_w);
1092 FHT2D(data2, log2_h, log2_w, 0, 1);
1093 // data again transposed, so in order again
1095 // overlap-add result
1096 for (y=0; y<(int)h2; y++) {
1097 const int yy = ybl*ybsz + y - hh;
1098 if ((yy < 0) || (yy >= in1->y)) continue;
1100 colp = (fRGB*)&rdst->rect[yy*in1->x*in1->type];
1101 for (x=0; x<(int)w2; x++) {
1102 const int xx = xbl*xbsz + x - hw;
1103 if ((xx < 0) || (xx >= in1->x)) continue;
1104 colp[xx][ch] += fp[x];
1115 memcpy(dst->rect, rdst->rect, sizeof(float)*dst->x*dst->y*dst->type);
1122 * Utility functions qd_* should probably be intergrated better with other functions here.
1125 // sets fcol to pixelcolor at (x, y)
1126 void qd_getPixel(CompBuf* src, int x, int y, float* col)
1128 if(src->rect_procedural) {
1130 src->rect_procedural(src, bc, (float)x/(float)src->xrad, (float)y/(float)src->yrad);
1133 /* these fallthrough to get all the channels */
1134 case CB_RGBA: col[3]=bc[3];
1135 case CB_VEC3: col[2]=bc[2];
1136 case CB_VEC2: col[1]=bc[1];
1137 case CB_VAL: col[0]=bc[0];
1140 else if ((x >= 0) && (x < src->x) && (y >= 0) && (y < src->y)) {
1141 float* bc = &src->rect[(x + y*src->x)*src->type];
1143 /* these fallthrough to get all the channels */
1144 case CB_RGBA: col[3]=bc[3];
1145 case CB_VEC3: col[2]=bc[2];
1146 case CB_VEC2: col[1]=bc[1];
1147 case CB_VAL: col[0]=bc[0];
1152 /* these fallthrough to get all the channels */
1153 case CB_RGBA: col[3]=0.0;
1154 case CB_VEC3: col[2]=0.0;
1155 case CB_VEC2: col[1]=0.0;
1156 case CB_VAL: col[0]=0.0;
1161 // sets pixel (x, y) to color col
1162 void qd_setPixel(CompBuf* src, int x, int y, float* col)
1164 if ((x >= 0) && (x < src->x) && (y >= 0) && (y < src->y)) {
1165 float* bc = &src->rect[(x + y*src->x)*src->type];
1167 /* these fallthrough to get all the channels */
1168 case CB_RGBA: bc[3]=col[3];
1169 case CB_VEC3: bc[2]=col[2];
1170 case CB_VEC2: bc[1]=col[1];
1171 case CB_VAL: bc[0]=col[0];
1176 // adds fcol to pixelcolor (x, y)
1177 void qd_addPixel(CompBuf* src, int x, int y, float* col)
1179 if ((x >= 0) && (x < src->x) && (y >= 0) && (y < src->y)) {
1180 float* bc = &src->rect[(x + y*src->x)*src->type];
1181 bc[0] += col[0], bc[1] += col[1], bc[2] += col[2];
1185 // multiplies pixel by factor value f
1186 void qd_multPixel(CompBuf* src, int x, int y, float f)
1188 if ((x >= 0) && (x < src->x) && (y >= 0) && (y < src->y)) {
1189 float* bc = &src->rect[(x + y*src->x)*src->type];
1190 bc[0] *= f, bc[1] *= f, bc[2] *= f;
1194 // bilinear interpolation with wraparound
1195 void qd_getPixelLerpWrap(CompBuf* src, float u, float v, float* col)
1197 const float ufl = floor(u), vfl = floor(v);
1198 const int nx = (int)ufl % src->x, ny = (int)vfl % src->y;
1199 const int x1 = (nx < 0) ? (nx + src->x) : nx;
1200 const int y1 = (ny < 0) ? (ny + src->y) : ny;
1201 const int x2 = (x1 + 1) % src->x, y2 = (y1 + 1) % src->y;
1202 const float* c00 = &src->rect[(x1 + y1*src->x)*src->type];
1203 const float* c10 = &src->rect[(x2 + y1*src->x)*src->type];
1204 const float* c01 = &src->rect[(x1 + y2*src->x)*src->type];
1205 const float* c11 = &src->rect[(x2 + y2*src->x)*src->type];
1206 const float uf = u - ufl, vf = v - vfl;
1207 const float w00=(1.f-uf)*(1.f-vf), w10=uf*(1.f-vf), w01=(1.f-uf)*vf, w11=uf*vf;
1208 col[0] = w00*c00[0] + w10*c10[0] + w01*c01[0] + w11*c11[0];
1209 if (src->type != CB_VAL) {
1210 col[1] = w00*c00[1] + w10*c10[1] + w01*c01[1] + w11*c11[1];
1211 col[2] = w00*c00[2] + w10*c10[2] + w01*c01[2] + w11*c11[2];
1212 col[3] = w00*c00[3] + w10*c10[3] + w01*c01[3] + w11*c11[3];
1216 // as above, without wrap around
1217 void qd_getPixelLerp(CompBuf* src, float u, float v, float* col)
1219 const float ufl = floor(u), vfl = floor(v);
1220 const int x1 = (int)ufl, y1 = (int)vfl;
1221 const int x2 = (int)ceil(u), y2 = (int)ceil(v);
1222 if ((x2 >= 0) && (y2 >= 0) && (x1 < src->x) && (y1 < src->y)) {
1223 const float B[4] = {0,0,0,0};
1224 const int ox1 = (x1 < 0), oy1 = (y1 < 0), ox2 = (x2 >= src->x), oy2 = (y2 >= src->y);
1225 const float* c00 = (ox1 || oy1) ? B : &src->rect[(x1 + y1*src->x)*src->type];
1226 const float* c10 = (ox2 || oy1) ? B : &src->rect[(x2 + y1*src->x)*src->type];
1227 const float* c01 = (ox1 || oy2) ? B : &src->rect[(x1 + y2*src->x)*src->type];
1228 const float* c11 = (ox2 || oy2) ? B : &src->rect[(x2 + y2*src->x)*src->type];
1229 const float uf = u - ufl, vf = v - vfl;
1230 const float w00=(1.f-uf)*(1.f-vf), w10=uf*(1.f-vf), w01=(1.f-uf)*vf, w11=uf*vf;
1231 col[0] = w00*c00[0] + w10*c10[0] + w01*c01[0] + w11*c11[0];
1232 if (src->type != CB_VAL) {
1233 col[1] = w00*c00[1] + w10*c10[1] + w01*c01[1] + w11*c11[1];
1234 col[2] = w00*c00[2] + w10*c10[2] + w01*c01[2] + w11*c11[2];
1235 col[3] = w00*c00[3] + w10*c10[3] + w01*c01[3] + w11*c11[3];
1238 else col[0] = col[1] = col[2] = col[3] = 0.f;
1241 // as above, sampling only one channel
1242 void qd_getPixelLerpChan(CompBuf* src, float u, float v, int chan, float* out)
1244 const float ufl = floor(u), vfl = floor(v);
1245 const int x1 = (int)ufl, y1 = (int)vfl;
1246 const int x2 = (int)ceil(u), y2 = (int)ceil(v);
1247 if (chan >= src->type) chan = 0;
1248 if ((x2 >= 0) && (y2 >= 0) && (x1 < src->x) && (y1 < src->y)) {
1249 const float B[4] = {0,0,0,0};
1250 const int ox1 = (x1 < 0), oy1 = (y1 < 0), ox2 = (x2 >= src->x), oy2 = (y2 >= src->y);
1251 const float* c00 = (ox1 || oy1) ? B : &src->rect[(x1 + y1*src->x)*src->type + chan];
1252 const float* c10 = (ox2 || oy1) ? B : &src->rect[(x2 + y1*src->x)*src->type + chan];
1253 const float* c01 = (ox1 || oy2) ? B : &src->rect[(x1 + y2*src->x)*src->type + chan];
1254 const float* c11 = (ox2 || oy2) ? B : &src->rect[(x2 + y2*src->x)*src->type + chan];
1255 const float uf = u - ufl, vf = v - vfl;
1256 const float w00=(1.f-uf)*(1.f-vf), w10=uf*(1.f-vf), w01=(1.f-uf)*vf, w11=uf*vf;
1257 out[0] = w00*c00[0] + w10*c10[0] + w01*c01[0] + w11*c11[0];
1263 CompBuf* qd_downScaledCopy(CompBuf* src, int scale)
1267 fbuf = dupalloc_compbuf(src);
1269 int nw = src->x/scale, nh = src->y/scale;
1270 if ((2*(src->x % scale)) > scale) nw++;
1271 if ((2*(src->y % scale)) > scale) nh++;
1272 fbuf = alloc_compbuf(nw, nh, src->type, 1);
1274 int x, y, xx, yy, sx, sy, mx, my;
1275 float colsum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
1276 float fscale = 1.f/(float)(scale*scale);
1277 for (y=0; y<nh; y++) {
1278 fRGB* fcolp = (fRGB*)&fbuf->rect[y*fbuf->x*fbuf->type];
1281 if (my > src->y) my = src->y;
1282 for (x=0; x<nw; x++) {
1285 if (mx > src->x) mx = src->x;
1286 colsum[0] = colsum[1] = colsum[2] = 0.f;
1287 for (sy=yy; sy<my; sy++) {
1288 fRGB* scolp = (fRGB*)&src->rect[sy*src->x*src->type];
1289 for (sx=xx; sx<mx; sx++)
1290 fRGB_add(colsum, scolp[sx]);
1292 fRGB_mult(colsum, fscale);
1293 fRGB_copy(fcolp[x], colsum);
1301 // fast g.blur, per channel
1302 // xy var. bits 1 & 2 ca be used to blur in x or y direction separately
1303 void IIR_gauss(CompBuf* src, float sigma, int chan, int xy)
1305 double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
1309 // <0.5 not valid, though can have a possibly useful sort of sharpening effect
1310 if (sigma < 0.5) return;
1312 if ((xy < 1) || (xy > 3)) xy = 3;
1314 // see "Recursive Gabor Filtering" by Young/VanVliet
1315 // all factors here in double.prec. Required, because for single.prec it seems to blow up if sigma > ~200
1317 q = 0.9804*(sigma - 3.556) + 2.5091;
1318 else // sigma >= 0.5
1319 q = (0.0561*sigma + 0.5784)*sigma - 0.2568;
1321 sc = (1.1668 + q)*(3.203729649 + (2.21566 + q)*q);
1322 // no gabor filtering here, so no complex multiplies, just the regular coefs.
1323 // all negated here, so as not to have to recalc Triggs/Sdika matrix
1324 cf[1] = q*(5.788961737 + (6.76492 + 3.0*q)*q)/ sc;
1325 cf[2] = -q2*(3.38246 + 3.0*q)/sc;
1328 cf[0] = 1.0 - cf[1] - cf[2] - cf[3];
1330 // Triggs/Sdika border corrections,
1331 // it seems to work, not entirely sure if it is actually totally correct,
1332 // Besides J.M.Geusebroek's anigauss.c (see http://www.science.uva.nl/~mark),
1333 // found one other implementation by Cristoph Lampert,
1334 // but neither seem to be quite the same, result seems to be ok sofar anyway.
1335 // Extra scale factor here to not have to do it in filter,
1336 // though maybe this had something to with the precision errors
1337 sc = cf[0]/((1.0 + cf[1] - cf[2] + cf[3])*(1.0 - cf[1] - cf[2] - cf[3])*(1.0 + cf[2] + (cf[1] - cf[3])*cf[3]));
1338 tsM[0] = sc*(-cf[3]*cf[1] + 1.0 - cf[3]*cf[3] - cf[2]);
1339 tsM[1] = sc*((cf[3] + cf[1])*(cf[2] + cf[3]*cf[1]));
1340 tsM[2] = sc*(cf[3]*(cf[1] + cf[3]*cf[2]));
1341 tsM[3] = sc*(cf[1] + cf[3]*cf[2]);
1342 tsM[4] = sc*(-(cf[2] - 1.0)*(cf[2] + cf[3]*cf[1]));
1343 tsM[5] = sc*(-(cf[3]*cf[1] + cf[3]*cf[3] + cf[2] - 1.0)*cf[3]);
1344 tsM[6] = sc*(cf[3]*cf[1] + cf[2] + cf[1]*cf[1] - cf[2]*cf[2]);
1345 tsM[7] = sc*(cf[1]*cf[2] + cf[3]*cf[2]*cf[2] - cf[1]*cf[3]*cf[3] - cf[3]*cf[3]*cf[3] - cf[3]*cf[2] + cf[3]);
1346 tsM[8] = sc*(cf[3]*(cf[1] + cf[3]*cf[2]));
1350 W[0] = cf[0]*X[0] + cf[1]*X[0] + cf[2]*X[0] + cf[3]*X[0];\
1351 W[1] = cf[0]*X[1] + cf[1]*W[0] + cf[2]*X[0] + cf[3]*X[0];\
1352 W[2] = cf[0]*X[2] + cf[1]*W[1] + cf[2]*W[0] + cf[3]*X[0];\
1353 for (i=3; i<L; i++)\
1354 W[i] = cf[0]*X[i] + cf[1]*W[i-1] + cf[2]*W[i-2] + cf[3]*W[i-3];\
1355 tsu[0] = W[L-1] - X[L-1];\
1356 tsu[1] = W[L-2] - X[L-1];\
1357 tsu[2] = W[L-3] - X[L-1];\
1358 tsv[0] = tsM[0]*tsu[0] + tsM[1]*tsu[1] + tsM[2]*tsu[2] + X[L-1];\
1359 tsv[1] = tsM[3]*tsu[0] + tsM[4]*tsu[1] + tsM[5]*tsu[2] + X[L-1];\
1360 tsv[2] = tsM[6]*tsu[0] + tsM[7]*tsu[1] + tsM[8]*tsu[2] + X[L-1];\
1361 Y[L-1] = cf[0]*W[L-1] + cf[1]*tsv[0] + cf[2]*tsv[1] + cf[3]*tsv[2];\
1362 Y[L-2] = cf[0]*W[L-2] + cf[1]*Y[L-1] + cf[2]*tsv[0] + cf[3]*tsv[1];\
1363 Y[L-3] = cf[0]*W[L-3] + cf[1]*Y[L-2] + cf[2]*Y[L-1] + cf[3]*tsv[0];\
1364 for (i=L-4; i>=0; i--)\
1365 Y[i] = cf[0]*W[i] + cf[1]*Y[i+1] + cf[2]*Y[i+2] + cf[3]*Y[i+3];\
1368 // intermediate buffers
1369 sz = MAX2(src->x, src->y);
1370 X = MEM_callocN(sz*sizeof(float), "IIR_gauss X buf");
1371 Y = MEM_callocN(sz*sizeof(float), "IIR_gauss Y buf");
1372 W = MEM_callocN(sz*sizeof(float), "IIR_gauss W buf");
1374 for (y=0; y<src->y; ++y) {
1375 const int yx = y*src->x;
1376 for (x=0; x<src->x; ++x)
1377 X[x] = src->rect[(x + yx)*src->type + chan];
1379 for (x=0; x<src->x; ++x)
1380 src->rect[(x + yx)*src->type + chan] = Y[x];
1384 for (x=0; x<src->x; ++x) {
1385 for (y=0; y<src->y; ++y)
1386 X[y] = src->rect[(x + y*src->x)*src->type + chan];
1388 for (y=0; y<src->y; ++y)
1389 src->rect[(x + y*src->x)*src->type + chan] = Y[y];