add Anti-Aliasing (very rough draft algorithm, NOT FINAL version) to raskter lib...
[blender.git] / intern / raskter / raskter.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2012 Blender Foundation.
19  * All rights reserved.
20  *
21  * The Original Code is: all of this file.
22  *
23  * Contributor(s): Peter Larabell.
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27 /** \file raskter.c
28  *  \ingroup RASKTER
29  */
30
31 #include <stdlib.h>
32 #include "raskter.h"
33
34 /* from BLI_utildefines.h */
35 #define MIN2(x, y)               ( (x) < (y) ? (x) : (y) )
36 #define MAX2(x, y)               ( (x) > (y) ? (x) : (y) )
37 #define ABS(a)          ( (a) < 0 ? (-(a)) : (a) )
38
39 struct e_status {
40         int x;
41         int ybeg;
42         int xshift;
43         int xdir;
44         int drift;
45         int drift_inc;
46         int drift_dec;
47         int num;
48         struct e_status *e_next;
49 };
50
51 struct r_buffer_stats {
52         float *buf;
53         int sizex;
54         int sizey;
55 };
56
57 struct r_fill_context {
58         struct e_status *all_edges, *possible_edges;
59         struct r_buffer_stats rb;
60 };
61
62 /*
63  * Sort all the edges of the input polygon by Y, then by X, of the "first" vertex encountered.
64  * This will ensure we can scan convert the entire poly in one pass.
65  *
66  * Really the poly should be clipped to the frame buffer's dimensions here for speed of drawing
67  * just the poly. Since the DEM code could end up being coupled with this, we'll keep it separate
68  * for now.
69  */
70 static void preprocess_all_edges(struct r_fill_context *ctx, struct poly_vert *verts, int num_verts, struct e_status *open_edge) {
71         int i;
72         int xbeg;
73         int ybeg;
74         int xend;
75         int yend;
76         int dx;
77         int dy;
78         int temp_pos;
79         int xdist;
80         struct e_status *e_new;
81         struct e_status *next_edge;
82         struct e_status **next_edge_ref;
83         struct poly_vert *v;
84         /* set up pointers */
85         v = verts;
86         ctx->all_edges = NULL;
87         /* loop all verts */
88         for (i = 0; i < num_verts; i++) {
89                 /* determine beginnings and endings of edges, linking last vertex to first vertex */
90                 xbeg = v[i].x;
91                 ybeg = v[i].y;
92                 if (i) {
93                         /* we're not at the last vert, so end of the edge is the previous vertex */
94                         xend = v[i - 1].x;
95                         yend = v[i - 1].y;
96                 } else {
97                         /* we're at the first vertex, so the "end" of this edge is the last vertex */
98                         xend = v[num_verts - 1].x;
99                         yend = v[num_verts - 1].y;
100                 }
101                 /* make sure our edges are facing the correct direction */
102                 if (ybeg > yend) {
103                         /* flip the Xs */
104                         temp_pos = xbeg;
105                         xbeg = xend;
106                         xend = temp_pos;
107                         /* flip the Ys */
108                         temp_pos = ybeg;
109                         ybeg = yend;
110                         yend = temp_pos;
111                 }
112
113                 /* calculate y delta */
114                 dy = yend - ybeg;
115                 /* dont draw horizontal lines directly, they are scanned as part of the edges they connect, so skip em. :) */
116                 if (dy) {
117                         /* create the edge and determine it's slope (for incremental line drawing) */
118                         e_new = open_edge++;
119
120                         /* calculate x delta */
121                         dx = xend - xbeg;
122                         if (dx > 0) {
123                                 e_new->xdir = 1;
124                                 xdist = dx;
125                         } else {
126                                 e_new->xdir = -1;
127                                 xdist = -dx;
128                         }
129
130                         e_new->x = xbeg;
131                         e_new->ybeg = ybeg;
132                         e_new->num = dy;
133                         e_new->drift_dec = dy;
134
135                         /* calculate deltas for incremental drawing */
136                         if (dx >= 0) {
137                                 e_new->drift = 0;
138                         } else {
139                                 e_new->drift = -dy + 1;
140                         }
141                         if (dy >= xdist) {
142                                 e_new->drift_inc = xdist;
143                                 e_new->xshift = 0;
144                         } else {
145                                 e_new->drift_inc = xdist % dy;
146                                 e_new->xshift = (xdist / dy) * e_new->xdir;
147                         }
148                         next_edge_ref = &ctx->all_edges;
149                         /* link in all the edges, in sorted order */
150                         for (;; ) {
151                                 next_edge = *next_edge_ref;
152                                 if (!next_edge || (next_edge->ybeg > ybeg) || ((next_edge->ybeg == ybeg) && (next_edge->x >= xbeg))) {
153                                         e_new->e_next = next_edge;
154                                         *next_edge_ref = e_new;
155                                         break;
156                                 }
157                                 next_edge_ref = &next_edge->e_next;
158                         }
159                 }
160         }
161 }
162
163 /*
164  * This function clips drawing to the frame buffer. That clipping will likely be moved into the preprocessor
165  * for speed, but waiting on final design choices for curve-data before eliminating data the DEM code will need
166  * if it ends up being coupled with this function.
167  */
168 static int rast_scan_fill(struct r_fill_context *ctx, struct poly_vert *verts, int num_verts, float intensity) {
169         int x_curr;                 /* current pixel position in X */
170         int y_curr;                 /* current scan line being drawn */
171         int yp;                     /* y-pixel's position in frame buffer */
172         int swixd = 0;              /* whether or not edges switched position in X */
173         float *cpxl;                /* pixel pointers... */
174         float *mpxl;
175         float *spxl;
176         struct e_status *e_curr;    /* edge pointers... */
177         struct e_status *e_temp;
178         struct e_status *edgbuf;
179         struct e_status **edgec;
180
181
182         /*
183          * If the number of verts specified to render as a polygon is less than 3,
184          * return immediately. Obviously we cant render a poly with sides < 3. The
185          * return for this we set to 1, simply so it can be distinguished from the
186          * next place we could return, /home/guest/blender-svn/soc-2011-tomato/intern/raskter/raskter.
187          * which is a failure to allocate memory.
188          */
189         if (num_verts < 3) {
190                 return(1);
191         }
192
193         /*
194          * Try to allocate an edge buffer in memory. needs to be the size of the edge tracking data
195          * multiplied by the number of edges, which is always equal to the number of verts in
196          * a 2D polygon. Here we return 0 to indicate a memory allocation failure, as opposed to a 1 for
197          * the preceeding error, which was a rasterization request on a 2D poly with less than
198          * 3 sides.
199          */
200         if ((edgbuf = (struct e_status *)(malloc(sizeof(struct e_status) * num_verts))) == NULL) {
201                 return(0);
202         }
203
204         /*
205          * Do some preprocessing on all edges. This constructs a table structure in memory of all
206          * the edge properties and can "flip" some edges so sorting works correctly.
207          */
208         preprocess_all_edges(ctx, verts, num_verts, edgbuf);
209
210         /*
211          * Set the pointer for tracking the edges currently in processing to NULL to make sure
212          * we don't get some crazy value after initialization.
213          */
214         ctx->possible_edges = NULL;
215
216         /*
217          * Loop through all scan lines to be drawn. Since we sorted by Y values during
218          * preprocess_all_edges(), we can already exact values for the lowest and
219          * highest Y values we could possibly need by induction. The preprocessing sorted
220          * out edges by Y position, we can cycle the current edge being processed once
221          * it runs out of Y pixels. When we have no more edges, meaning the current edge
222          * is NULL after setting the "current" edge to be the previous current edge's
223          * "next" edge in the Y sorted edge connection chain, we can stop looping Y values,
224          * since we can't possibly have more scan lines if we ran out of edges. :)
225          *
226          * TODO: This clips Y to the frame buffer, which should be done in the preprocessor, but for now is done here.
227          *       Will get changed once DEM code gets in.
228          */
229         for (y_curr = ctx->all_edges->ybeg; (ctx->all_edges || ctx->possible_edges); y_curr++) {
230
231                 /*
232                  * Link any edges that start on the current scan line into the list of
233                  * edges currently needed to draw at least this, if not several, scan lines.
234                  */
235
236                 /*
237                  * Set the current edge to the beginning of the list of edges to be rasterized
238                  * into this scan line.
239                  *
240                  * We could have lots of edge here, so iterate over all the edges needed. The
241                  * preprocess_all_edges() function sorted edges by X within each chunk of Y sorting
242                  * so we safely cycle edges to thier own "next" edges in order.
243                  *
244                  * At each iteration, make sure we still have a non-NULL edge.
245                  */
246                 for (edgec = &ctx->possible_edges; ctx->all_edges && (ctx->all_edges->ybeg == y_curr); ) {
247                         x_curr = ctx->all_edges->x;                  /* Set current X position. */
248                         for (;; ) {                                  /* Start looping edges. Will break when edges run out. */
249                                 e_curr = *edgec;                         /* Set up a current edge pointer. */
250                                 if (!e_curr || (e_curr->x >= x_curr)) {  /* If we have an no edge, or we need to skip some X-span, */
251                                         e_temp = ctx->all_edges->e_next;     /* set a temp "next" edge to test. */
252                                         *edgec = ctx->all_edges;             /* Add this edge to the list to be scanned. */
253                                         ctx->all_edges->e_next = e_curr;     /* Set up the next edge. */
254                                         edgec = &ctx->all_edges->e_next;     /* Set our list to the next edge's location in memory. */
255                                         ctx->all_edges = e_temp;             /* Skip the NULL or bad X edge, set pointer to next edge. */
256                                         break;                               /* Stop looping edges (since we ran out or hit empty X span. */
257                                 } else {
258                                         edgec = &e_curr->e_next;             /* Set the pointer to the edge list the "next" edge. */
259                                 }
260                         }
261                 }
262
263                 /*
264                  * Determine the current scan line's offset in the pixel buffer based on its Y position.
265                  * Basically we just multiply the current scan line's Y value by the number of pixels in each line.
266                  */
267                 yp = y_curr * ctx->rb.sizex;
268                 /*
269                  * Set a "scan line pointer" in memory. The location of the buffer plus the row offset.
270                  */
271                 spxl = ctx->rb.buf + (yp);
272                 /*
273                  * Set up the current edge to the first (in X) edge. The edges which could possibly be in this
274                  * list were determined in the preceeding edge loop above. They were already sorted in X by the
275                  * initial processing function.
276                  *
277                  * At each iteration, test for a NULL edge. Since we'll keep cycling edge's to their own "next" edge
278                  * we will eventually hit a NULL when the list runs out.
279                  */
280                 for (e_curr = ctx->possible_edges; e_curr; e_curr = e_curr->e_next) {
281                         /*
282                          * Calculate a span of pixels to fill on the current scan line.
283                          *
284                          * Set the current pixel pointer by adding the X offset to the scan line's start offset.
285                          * Cycle the current edge the next edge.
286                          * Set the max X value to draw to be one less than the next edge's first pixel. This way we are
287                          * sure not to ever get into a situation where we have overdraw. (drawing the same pixel more than
288                          * one time because it's on a vertex connecting two edges)
289                          *
290                          * Then blast through all the pixels in the span, advancing the pointer and setting the color to white.
291                          *
292                          * TODO: Here we clip to the scan line, this is not efficient, and should be done in the preprocessor,
293                          *       but for now it is done here until the DEM code comes in.
294                          */
295
296                         /* set up xmin and xmax bounds on this scan line */
297                         cpxl = spxl + MAX2(e_curr->x, 0);
298                         e_curr = e_curr->e_next;
299                         mpxl = spxl + MIN2(e_curr->x, ctx->rb.sizex) - 1;
300
301                         if ((y_curr >= 0) && (y_curr < ctx->rb.sizey)) {
302                                 /* draw the pixels. */
303                                 for(; cpxl <= mpxl; *cpxl++ += intensity);
304                         }
305                 }
306
307                 /*
308                  * Loop through all edges of polygon that could be hit by this scan line,
309                  * and figure out their x-intersections with the next scan line.
310                  *
311                  * Either A.) we wont have any more edges to test, or B.) we just add on the
312                  * slope delta computed in preprocessing step. Since this draws non-antialiased
313                  * polygons, we dont have fractional positions, so we only move in x-direction
314                  * when needed to get all the way to the next pixel over...
315                  */
316                 for (edgec = &ctx->possible_edges; (e_curr = *edgec); ) {
317                         if (!(--(e_curr->num))) {
318                                 *edgec = e_curr->e_next;
319                         } else {
320                                 e_curr->x += e_curr->xshift;
321                                 if ((e_curr->drift += e_curr->drift_inc) > 0) {
322                                         e_curr->x += e_curr->xdir;
323                                         e_curr->drift -= e_curr->drift_dec;
324                                 }
325                                 edgec = &e_curr->e_next;
326                         }
327                 }
328                 /*
329                  * It's possible that some edges may have crossed during the last step, so we'll be sure
330                  * that we ALWAYS intersect scan lines in order by shuffling if needed to make all edges
331                  * sorted by x-intersection coordinate. We'll always scan through at least once to see if
332                  * edges crossed, and if so, we set the 'swixd' flag. If 'swixd' gets set on the initial
333                  * pass, then we know we need to sort by x, so then cycle through edges again and perform
334                  * the sort.-
335                  */
336                 if (ctx->possible_edges) {
337                         for (edgec = &ctx->possible_edges; (e_curr = *edgec)->e_next; edgec = &(*edgec)->e_next) {
338                                 /* if the current edge hits scan line at greater X than the next edge, we need to exchange the edges */
339                                 if (e_curr->x > e_curr->e_next->x) {
340                                         *edgec = e_curr->e_next;
341                                         /* exchange the pointers */
342                                         e_temp = e_curr->e_next->e_next;
343                                         e_curr->e_next->e_next = e_curr;
344                                         e_curr->e_next = e_temp;
345                                         /* set flag that we had at least one switch */
346                                         swixd = 1;
347                                 }
348                         }
349                         /* if we did have a switch, look for more (there will more if there was one) */
350                         for (;; ) {
351                                 /* reset exchange flag so it's only set if we encounter another one */
352                                 swixd = 0;
353                                 for (edgec = &ctx->possible_edges; (e_curr = *edgec)->e_next; edgec = &(*edgec)->e_next) {
354                                         /* again, if current edge hits scan line at higher X than next edge, exchange the edges and set flag */
355                                         if (e_curr->x > e_curr->e_next->x) {
356                                                 *edgec = e_curr->e_next;
357                                                 /* exchange the pointers */
358                                                 e_temp = e_curr->e_next->e_next;
359                                                 e_curr->e_next->e_next = e_curr;
360                                                 e_curr->e_next = e_temp;
361                                                 /* flip the exchanged flag */
362                                                 swixd = 1;
363                                         }
364                                 }
365                                 /* if we had no exchanges, we're done reshuffling the pointers */
366                                 if (!swixd) {
367                                         break;
368                                 }
369                         }
370                 }
371         }
372
373         free(edgbuf);
374         return 1;
375 }
376
377 int PLX_raskterize(float (*base_verts)[2], int num_base_verts,
378                                    float *buf, int buf_x, int buf_y, int do_mask_AA) {
379         int subdiv_AA = (do_mask_AA != 0)? 8:0;
380         int i;                                   /* i: Loop counter. */
381         int sAx;
382         int sAy;
383         struct poly_vert *ply;                   /* ply: Pointer to a list of integer buffer-space vertex coordinates. */
384         struct r_fill_context ctx = {0};
385         const float buf_x_f = (float)(buf_x);
386         const float buf_y_f = (float)(buf_y);
387         float div_offset=(1.0f / (float)(subdiv_AA));
388         float div_offset_static = 0.5f * (float)(subdiv_AA) * div_offset;
389         /*
390          * Allocate enough memory for our poly_vert list. It'll be the size of the poly_vert
391          * data structure multiplied by the number of base_verts.
392          *
393          * In the event of a failure to allocate the memory, return 0, so this error can
394          * be distinguished as a memory allocation error.
395          */
396         if ((ply = (struct poly_vert *)(malloc(sizeof(struct poly_vert) * num_base_verts))) == NULL) {
397                 return(0);
398         }
399
400         ctx.rb.buf = buf;                            /* Set the output buffer pointer. */
401         ctx.rb.sizex = buf_x;                        /* Set the output buffer size in X. (width) */
402         ctx.rb.sizey = buf_y;                        /* Set the output buffer size in Y. (height) */
403         /*
404          * Loop over all verts passed in to be rasterized. Each vertex's X and Y coordinates are
405          * then converted from normalized screen space (0.0 <= POS <= 1.0) to integer coordinates
406          * in the buffer-space coordinates passed in inside buf_x and buf_y.
407          *
408          * It's worth noting that this function ONLY outputs fully white pixels in a mask. Every pixel
409          * drawn will be 1.0f in value, there is no anti-aliasing.
410          */
411
412         if(!subdiv_AA) {
413         for (i = 0; i < num_base_verts; i++) {                          /* Loop over all base_verts. */
414                         ply[i].x = (int)((base_verts[i][0] * buf_x_f) + 0.5f);       /* Range expand normalized X to integer buffer-space X. */
415                         ply[i].y = (int)((base_verts[i][1] * buf_y_f) + 0.5f); /* Range expand normalized Y to integer buffer-space Y. */
416         }
417
418                 i = rast_scan_fill(&ctx, ply, num_base_verts,1.0f);  /* Call our rasterizer, passing in the integer coords for each vert. */
419         } else {
420                 for(sAx=0; sAx < subdiv_AA; sAx++) {
421                         for(sAy=0; sAy < subdiv_AA; sAy++) {
422                                 for(i=0; i < num_base_verts; i++) {
423                                         ply[i].x = (int)((base_verts[i][0]*buf_x_f)+0.5f - div_offset_static + (div_offset*(float)(sAx)));
424                                         ply[i].y = (int)((base_verts[i][1]*buf_y_f)+0.5f - div_offset_static + (div_offset*(float)(sAy)));
425                                 }
426                                 i = rast_scan_fill(&ctx, ply, num_base_verts,(1.0f / (float)(subdiv_AA*subdiv_AA)));
427                         }
428                 }
429         }
430         free(ply);                                      /* Free the memory allocated for the integer coordinate table. */
431         return(i);                                      /* Return the value returned by the rasterizer. */
432 }
433
434 /*
435  * This function clips drawing to the frame buffer. That clipping will likely be moved into the preprocessor
436  * for speed, but waiting on final design choices for curve-data before eliminating data the DEM code will need
437  * if it ends up being coupled with this function.
438  */
439 static int rast_scan_feather(struct r_fill_context *ctx,
440                              float (*base_verts_f)[2], int num_base_verts,
441                                                          struct poly_vert *feather_verts, float(*feather_verts_f)[2], int num_feather_verts) {
442         int x_curr;                 /* current pixel position in X */
443         int y_curr;                 /* current scan line being drawn */
444         int yp;                     /* y-pixel's position in frame buffer */
445         int swixd = 0;              /* whether or not edges switched position in X */
446         float *cpxl;                /* pixel pointers... */
447         float *mpxl;
448         float *spxl;
449         struct e_status *e_curr;    /* edge pointers... */
450         struct e_status *e_temp;
451         struct e_status *edgbuf;
452         struct e_status **edgec;
453
454         /* from dem */
455         int a;                          // a = temporary pixel index buffer loop counter
456         float fsz;                        // size of the frame
457         unsigned int rsl;               // long used for finding fast 1.0/sqrt
458         float rsf;                      // float used for finding fast 1.0/sqrt
459         const float rsopf = 1.5f;       // constant float used for finding fast 1.0/sqrt
460
461         //unsigned int gradientFillOffset;
462         float t;
463         float ud;                // ud = unscaled edge distance
464         float dmin;              // dmin = minimun edge distance
465         float odist;                    // odist = current outer edge distance
466         float idist;                    // idist = current inner edge distance
467         float dx;                         // dx = X-delta (used for distance proportion calculation)
468         float dy;                         // dy = Y-delta (used for distance proportion calculation)
469         float xpxw = (1.0f / (float)(ctx->rb.sizex));  // xpxw = normalized pixel width
470         float ypxh = (1.0f / (float)(ctx->rb.sizey));  // ypxh = normalized pixel height
471
472         /*
473          * If the number of verts specified to render as a polygon is less than 3,
474          * return immediately. Obviously we cant render a poly with sides < 3. The
475          * return for this we set to 1, simply so it can be distinguished from the
476          * next place we could return, /home/guest/blender-svn/soc-2011-tomato/intern/raskter/raskter
477          * which is a failure to allocate memory.
478          */
479         if (num_feather_verts < 3) {
480                 return(1);
481         }
482
483         /*
484          * Try to allocate an edge buffer in memory. needs to be the size of the edge tracking data
485          * multiplied by the number of edges, which is always equal to the number of verts in
486          * a 2D polygon. Here we return 0 to indicate a memory allocation failure, as opposed to a 1 for
487          * the preceeding error, which was a rasterization request on a 2D poly with less than
488          * 3 sides.
489          */
490         if ((edgbuf = (struct e_status *)(malloc(sizeof(struct e_status) * num_feather_verts))) == NULL) {
491                 return(0);
492         }
493
494         /*
495          * Do some preprocessing on all edges. This constructs a table structure in memory of all
496          * the edge properties and can "flip" some edges so sorting works correctly.
497          */
498         preprocess_all_edges(ctx, feather_verts, num_feather_verts, edgbuf);
499
500         /*
501          * Set the pointer for tracking the edges currently in processing to NULL to make sure
502          * we don't get some crazy value after initialization.
503          */
504         ctx->possible_edges = NULL;
505
506         /*
507          * Loop through all scan lines to be drawn. Since we sorted by Y values during
508          * preprocess_all_edges(), we can already exact values for the lowest and
509          * highest Y values we could possibly need by induction. The preprocessing sorted
510          * out edges by Y position, we can cycle the current edge being processed once
511          * it runs out of Y pixels. When we have no more edges, meaning the current edge
512          * is NULL after setting the "current" edge to be the previous current edge's
513          * "next" edge in the Y sorted edge connection chain, we can stop looping Y values,
514          * since we can't possibly have more scan lines if we ran out of edges. :)
515          *
516          * TODO: This clips Y to the frame buffer, which should be done in the preprocessor, but for now is done here.
517          *       Will get changed once DEM code gets in.
518          */
519         for (y_curr = ctx->all_edges->ybeg; (ctx->all_edges || ctx->possible_edges); y_curr++) {
520
521                 /*
522                  * Link any edges that start on the current scan line into the list of
523                  * edges currently needed to draw at least this, if not several, scan lines.
524                  */
525
526                 /*
527                  * Set the current edge to the beginning of the list of edges to be rasterized
528                  * into this scan line.
529                  *
530                  * We could have lots of edge here, so iterate over all the edges needed. The
531                  * preprocess_all_edges() function sorted edges by X within each chunk of Y sorting
532                  * so we safely cycle edges to thier own "next" edges in order.
533                  *
534                  * At each iteration, make sure we still have a non-NULL edge.
535                  */
536                 for (edgec = &ctx->possible_edges; ctx->all_edges && (ctx->all_edges->ybeg == y_curr); ) {
537                         x_curr = ctx->all_edges->x;                  /* Set current X position. */
538                         for (;; ) {                                  /* Start looping edges. Will break when edges run out. */
539                                 e_curr = *edgec;                         /* Set up a current edge pointer. */
540                                 if (!e_curr || (e_curr->x >= x_curr)) {  /* If we have an no edge, or we need to skip some X-span, */
541                                         e_temp = ctx->all_edges->e_next;     /* set a temp "next" edge to test. */
542                                         *edgec = ctx->all_edges;             /* Add this edge to the list to be scanned. */
543                                         ctx->all_edges->e_next = e_curr;     /* Set up the next edge. */
544                                         edgec = &ctx->all_edges->e_next;     /* Set our list to the next edge's location in memory. */
545                                         ctx->all_edges = e_temp;             /* Skip the NULL or bad X edge, set pointer to next edge. */
546                                         break;                               /* Stop looping edges (since we ran out or hit empty X span. */
547                                 } else {
548                                         edgec = &e_curr->e_next;             /* Set the pointer to the edge list the "next" edge. */
549                                 }
550                         }
551                 }
552
553                 /*
554                  * Determine the current scan line's offset in the pixel buffer based on its Y position.
555                  * Basically we just multiply the current scan line's Y value by the number of pixels in each line.
556                  */
557                 yp = y_curr * ctx->rb.sizex;
558                 /*
559                  * Set a "scan line pointer" in memory. The location of the buffer plus the row offset.
560                  */
561                 spxl = ctx->rb.buf + (yp);
562                 /*
563                  * Set up the current edge to the first (in X) edge. The edges which could possibly be in this
564                  * list were determined in the preceeding edge loop above. They were already sorted in X by the
565                  * initial processing function.
566                  *
567                  * At each iteration, test for a NULL edge. Since we'll keep cycling edge's to their own "next" edge
568                  * we will eventually hit a NULL when the list runs out.
569                  */
570                 for (e_curr = ctx->possible_edges; e_curr; e_curr = e_curr->e_next) {
571                         /*
572                          * Calculate a span of pixels to fill on the current scan line.
573                          *
574                          * Set the current pixel pointer by adding the X offset to the scan line's start offset.
575                          * Cycle the current edge the next edge.
576                          * Set the max X value to draw to be one less than the next edge's first pixel. This way we are
577                          * sure not to ever get into a situation where we have overdraw. (drawing the same pixel more than
578                          * one time because it's on a vertex connecting two edges)
579                          *
580                          * Then blast through all the pixels in the span, advancing the pointer and setting the color to white.
581                          *
582                          * TODO: Here we clip to the scan line, this is not efficient, and should be done in the preprocessor,
583                          *       but for now it is done here until the DEM code comes in.
584                          */
585
586                         /* set up xmin and xmax bounds on this scan line */
587                         cpxl = spxl + MAX2(e_curr->x, 0);
588                         e_curr = e_curr->e_next;
589                         mpxl = spxl + MIN2(e_curr->x, ctx->rb.sizex) - 1;
590
591                         if ((y_curr >= 0) && (y_curr < ctx->rb.sizey)) {
592                                 t = ((float)((cpxl - spxl) % ctx->rb.sizex) + 0.5f) * xpxw;
593                                 fsz = ((float)(y_curr) + 0.5f) * ypxh;
594                                 /* draw the pixels. */
595                                 for (; cpxl <= mpxl; cpxl++, t += xpxw) {
596                                         //do feather check
597                                         // first check that pixel isn't already full, and only operate if it is not
598                                         if (*cpxl < 0.9999f) {
599
600                                                 dmin = 2.0f;                        // reset min distance to edge pixel
601                                                 for (a = 0; a < num_feather_verts; a++) { // loop through all outer edge buffer pixels
602                                                         dy = t - feather_verts_f[a][0];          // set dx to gradient pixel column - outer edge pixel row
603                                                         dx = fsz - feather_verts_f[a][1];        // set dy to gradient pixel row - outer edge pixel column
604                                                         ud = dx * dx + dy * dy;               // compute sum of squares
605                                                         if (ud < dmin) {                      // if our new sum of squares is less than the current minimum
606                                                                 dmin = ud;                        // set a new minimum equal to the new lower value
607                                                         }
608                                                 }
609                                                 odist = dmin;                    // cast outer min to a float
610                                                 rsf = odist * 0.5f;                       //
611                                                 rsl = *(unsigned int *)&odist;            // use some peculiar properties of the way bits are stored
612                                                 rsl = 0x5f3759df - (rsl >> 1);            // in floats vs. unsigned ints to compute an approximate
613                                                 odist = *(float *)&rsl;                   // reciprocal square root
614                                                 odist = odist * (rsopf - (rsf * odist * odist));  // -- ** this line can be iterated for more accuracy ** --
615                                                 odist = odist * (rsopf - (rsf * odist * odist));
616                                                 dmin = 2.0f;                        // reset min distance to edge pixel
617                                                 for (a = 0; a < num_base_verts; a++) {    // loop through all inside edge pixels
618                                                         dy = t - base_verts_f[a][0];             // compute delta in Y from gradient pixel to inside edge pixel
619                                                         dx = fsz - base_verts_f[a][1];           // compute delta in X from gradient pixel to inside edge pixel
620                                                         ud = dx * dx + dy * dy;   // compute sum of squares
621                                                         if (ud < dmin) {          // if our new sum of squares is less than the current minimum we've found
622                                                                 dmin = ud;            // set a new minimum equal to the new lower value
623                                                         }
624                                                 }
625                                                 idist = dmin;                    // cast inner min to a float
626                                                 rsf = idist * 0.5f;                       //
627                                                 rsl = *(unsigned int *)&idist;            //
628                                                 rsl = 0x5f3759df - (rsl >> 1);            // see notes above
629                                                 idist = *(float *)&rsl;                   //
630                                                 idist = idist * (rsopf - (rsf * idist * idist));  //
631                                                 idist = idist * (rsopf - (rsf * idist * idist));
632                                                 /*
633                                                  * Note once again that since we are using reciprocals of distance values our
634                                                  * proportion is already the correct intensity, and does not need to be
635                                                  * subracted from 1.0 like it would have if we used real distances.
636                                                  */
637
638                                                 /* set intensity, do the += so overlapping gradients are additive */
639                                                 *cpxl = (idist / (idist + odist));
640                                         }
641                                 }
642                         }
643                 }
644
645                 /*
646                  * Loop through all edges of polygon that could be hit by this scan line,
647                  * and figure out their x-intersections with the next scan line.
648                  *
649                  * Either A.) we wont have any more edges to test, or B.) we just add on the
650                  * slope delta computed in preprocessing step. Since this draws non-antialiased
651                  * polygons, we dont have fractional positions, so we only move in x-direction
652                  * when needed to get all the way to the next pixel over...
653                  */
654                 for (edgec = &ctx->possible_edges; (e_curr = *edgec); ) {
655                         if (!(--(e_curr->num))) {
656                                 *edgec = e_curr->e_next;
657                         } else {
658                                 e_curr->x += e_curr->xshift;
659                                 if ((e_curr->drift += e_curr->drift_inc) > 0) {
660                                         e_curr->x += e_curr->xdir;
661                                         e_curr->drift -= e_curr->drift_dec;
662                                 }
663                                 edgec = &e_curr->e_next;
664                         }
665                 }
666                 /*
667                  * It's possible that some edges may have crossed during the last step, so we'll be sure
668                  * that we ALWAYS intersect scan lines in order by shuffling if needed to make all edges
669                  * sorted by x-intersection coordinate. We'll always scan through at least once to see if
670                  * edges crossed, and if so, we set the 'swixd' flag. If 'swixd' gets set on the initial
671                  * pass, then we know we need to sort by x, so then cycle through edges again and perform
672                  * the sort.-
673                  */
674                 if (ctx->possible_edges) {
675                         for (edgec = &ctx->possible_edges; (e_curr = *edgec)->e_next; edgec = &(*edgec)->e_next) {
676                                 /* if the current edge hits scan line at greater X than the next edge, we need to exchange the edges */
677                                 if (e_curr->x > e_curr->e_next->x) {
678                                         *edgec = e_curr->e_next;
679                                         /* exchange the pointers */
680                                         e_temp = e_curr->e_next->e_next;
681                                         e_curr->e_next->e_next = e_curr;
682                                         e_curr->e_next = e_temp;
683                                         /* set flag that we had at least one switch */
684                                         swixd = 1;
685                                 }
686                         }
687                         /* if we did have a switch, look for more (there will more if there was one) */
688                         for (;; ) {
689                                 /* reset exchange flag so it's only set if we encounter another one */
690                                 swixd = 0;
691                                 for (edgec = &ctx->possible_edges; (e_curr = *edgec)->e_next; edgec = &(*edgec)->e_next) {
692                                         /* again, if current edge hits scan line at higher X than next edge,
693                                          * exchange the edges and set flag */
694                                         if (e_curr->x > e_curr->e_next->x) {
695                                                 *edgec = e_curr->e_next;
696                                                 /* exchange the pointers */
697                                                 e_temp = e_curr->e_next->e_next;
698                                                 e_curr->e_next->e_next = e_curr;
699                                                 e_curr->e_next = e_temp;
700                                                 /* flip the exchanged flag */
701                                                 swixd = 1;
702                                         }
703                                 }
704                                 /* if we had no exchanges, we're done reshuffling the pointers */
705                                 if (!swixd) {
706                                         break;
707                                 }
708                         }
709                 }
710         }
711
712         free(edgbuf);
713         return 1;
714 }
715
716 int PLX_raskterize_feather(float (*base_verts)[2], int num_base_verts, float (*feather_verts)[2], int num_feather_verts,
717                                                    float *buf, int buf_x, int buf_y) {
718         int i;                            /* i: Loop counter. */
719         struct poly_vert *fe;             /* fe: Pointer to a list of integer buffer-space feather vertex coords. */
720         struct r_fill_context ctx = {0};
721
722         /* for faster multiply */
723         const float buf_x_f = (float)buf_x;
724         const float buf_y_f = (float)buf_y;
725
726         /*
727          * Allocate enough memory for our poly_vert list. It'll be the size of the poly_vert
728          * data structure multiplied by the number of verts.
729          *
730          * In the event of a failure to allocate the memory, return 0, so this error can
731          * be distinguished as a memory allocation error.
732          */
733         if ((fe = (struct poly_vert *)(malloc(sizeof(struct poly_vert) * num_feather_verts))) == NULL) {
734                 return(0);
735         }
736
737         /*
738          * Loop over all verts passed in to be rasterized. Each vertex's X and Y coordinates are
739          * then converted from normalized screen space (0.0 <= POS <= 1.0) to integer coordinates
740          * in the buffer-space coordinates passed in inside buf_x and buf_y.
741          *
742          * It's worth noting that this function ONLY outputs fully white pixels in a mask. Every pixel
743          * drawn will be 1.0f in value, there is no anti-aliasing.
744          */
745         for (i = 0; i < num_feather_verts; i++) {            /* Loop over all verts. */
746                 fe[i].x = (int)((feather_verts[i][0] * buf_x_f) + 0.5f);  /* Range expand normalized X to integer buffer-space X. */
747                 fe[i].y = (int)((feather_verts[i][1] * buf_y_f) + 0.5f);  /* Range expand normalized Y to integer buffer-space Y. */
748         }
749
750         ctx.rb.buf = buf;                            /* Set the output buffer pointer. */
751         ctx.rb.sizex = buf_x;                        /* Set the output buffer size in X. (width) */
752         ctx.rb.sizey = buf_y;                        /* Set the output buffer size in Y. (height) */
753
754         /* Call our rasterizer, passing in the integer coords for each vert. */
755         i = rast_scan_feather(&ctx, base_verts, num_base_verts, fe, feather_verts, num_feather_verts);
756         free(fe);
757         return i;                                   /* Return the value returned by the rasterizer. */
758 }
759
760 int get_range_expanded_pixel_coord(float normalized_value, int max_value) {
761         return (int)((normalized_value * (float)(max_value)) + 0.5f);
762 }
763
764 float get_pixel_intensity(float *buf, int buf_x, int buf_y, int pos_x, int pos_y) {
765         if(pos_x < 0 || pos_x >= buf_x || pos_y < 0 || pos_y >= buf_y) {
766                 return 0.0f;
767         }
768         return buf[(pos_y * buf_y) + buf_x];
769 }
770
771 float get_pixel_intensity_bilinear(float *buf, int buf_x, int buf_y, float u, float v) {
772         int a;
773         int b;
774         int a_plus_1;
775         int b_plus_1;
776         float prop_u;
777         float prop_v;
778         float inv_prop_u;
779         float inv_prop_v;
780         if(u<0.0f || u>1.0f || v<0.0f || v>1.0f) {
781                 return 0.0f;
782         }
783         u = u * (float)(buf_x) - 0.5f;
784         v = v * (float)(buf_y) - 0.5f;
785         a = (int)(u);
786         b = (int)(v);
787         prop_u = u - (float)(a);
788         prop_v = v - (float)(b);
789         inv_prop_u = 1.0f - prop_u;
790         inv_prop_v = 1.0f - prop_v;
791         a_plus_1 = MIN2((buf_x-1),a+1);
792         b_plus_1 = MIN2((buf_y-1),b+1);
793         return (buf[(b * buf_y) + a] * inv_prop_u + buf[(b*buf_y)+(a_plus_1)] * prop_u)*inv_prop_v+(buf[((b_plus_1) * buf_y)+a] * inv_prop_u + buf[((b_plus_1)*buf_y)+(a_plus_1)] * prop_u) * prop_v;
794
795 }
796
797 void set_pixel_intensity(float *buf, int buf_x, int buf_y, int pos_x, int pos_y, float intensity) {
798         if(pos_x < 0 || pos_x >= buf_x || pos_y < 0 || pos_y >= buf_y) {
799                 return;
800         }
801         buf[(pos_y * buf_y) + buf_x] = intensity;
802 }
803 #define __PLX__FAKE_AA__
804 int PLX_antialias_buffer(float *buf, int buf_x, int buf_y) {
805 #ifdef __PLX__FAKE_AA__
806 #ifdef __PLX_GREY_AA__
807         int i=0;
808         int sz = buf_x * buf_y;
809         for(i=0; i<sz; i++) {
810                 buf[i] *= 0.5f;
811         }
812 #endif
813         return 1;
814 #else
815         /*XXX - TODO: THIS IS NOT FINAL CODE - IT DOES NOT WORK - DO NOT ENABLE IT */
816         const float p0 = 1.0f;
817         const float p1 = 1.0f;
818         const float p2 = 1.0f;
819         const float p3 = 1.0f;
820         const float p4 = 1.0f;
821         const float p5 = 1.5f;
822         const float p6 = 2.0f;
823         const float p7 = 2.0f;
824         const float p8 = 2.0f;
825         const float p9 = 2.0f;
826         const float p10 = 4.0f;
827         const float p11 = 8.0f;
828
829         const float edge_threshold = 0.063f;
830         const float edge_threshold_min = 0.0312f;
831         const float quality_subpix = 1.0f;
832 //      int px_x;
833 //      int px_y;
834
835         float posM_x,posM_y;
836         float posB_x,posB_y;
837         float posN_x,posN_y;
838         float posP_x,posP_y;
839         float offNP_x,offNP_y;
840         float lumaM;
841         float lumaS;
842         float lumaE;
843         float lumaN;
844         float lumaW;
845         float lumaNW;
846         float lumaSE;
847         float lumaNE;
848         float lumaSW;
849         float lumaNS;
850         float lumaWE;
851         float lumaNESE;
852         float lumaNWNE;
853         float lumaNWSW;
854         float lumaSWSE;
855         float lumaNN;
856         float lumaSS;
857         float lumaEndN;
858         float lumaEndP;
859         float lumaMM;
860         float lumaMLTZero;
861         float subpixNWSWNESE;
862         float subpixRcpRange;
863         float subpixNSWE;
864         float maxSM;
865         float minSM;
866         float maxESM;
867         float minESM;
868         float maxWN;
869         float minWN;
870         float rangeMax;
871         float rangeMin;
872         float rangeMaxScaled;
873         float range;
874         float rangeMaxClamped;
875         float edgeHorz;
876         float edgeVert;
877         float edgeHorz1;
878         float edgeVert1;
879         float edgeHorz2;
880         float edgeVert2;
881         float edgeHorz3;
882         float edgeVert3;
883         float edgeHorz4;
884         float edgeVert4;
885         float lengthSign;
886         float subpixA;
887         float subpixB;
888         float subpixC;
889         float subpixD;
890         float subpixE;
891         float subpixF;
892         float subpixG;
893         float subpixH;
894         float gradientN;
895         float gradientS;
896         float gradient;
897         float gradientScaled;
898         float dstN;
899         float dstP;
900         float dst;
901         float spanLength;
902         float spanLengthRcp;
903         float pixelOffset;
904         float pixelOffsetGood;
905         float pixelOffsetSubpix;
906         int directionN;
907         int goodSpan;
908         int goodSpanN;
909         int goodSpanP;
910         int horzSpan;
911         int earlyExit;
912         int pairN;
913         int doneN;
914         int doneP;
915         int doneNP;
916         int curr_x=0;
917         int curr_y=0;
918         for(curr_y=0; curr_y < buf_y; curr_y++) {
919                 for(curr_x=0; curr_x < buf_x; curr_x++) {
920                         posM_x = ((float)(curr_x) + 0.5f) * (1.0f/(float)(buf_x));
921                         posM_y = ((float)(curr_y) + 0.5f) * (1.0f/(float)(buf_y));
922
923                         lumaM = get_pixel_intensity(buf, buf_x, buf_y, curr_x, curr_y);
924                         lumaS = get_pixel_intensity(buf, buf_x, buf_y, curr_x, curr_y - 1);
925                         lumaE = get_pixel_intensity(buf, buf_x, buf_y, curr_x + 1, curr_y);
926                         lumaN = get_pixel_intensity(buf, buf_x, buf_y, curr_x, curr_y + 1);
927                         lumaW = get_pixel_intensity(buf, buf_x, buf_y, curr_x - 1, curr_y);
928
929                         maxSM = MAX2(lumaS, lumaM);
930                         minSM = MIN2(lumaS, lumaM);
931                         maxESM = MAX2(lumaE, maxSM);
932                         minESM = MIN2(lumaE, minSM);
933                         maxWN = MAX2(lumaN, lumaW);
934                         minWN = MIN2(lumaN, lumaW);
935                         rangeMax = MAX2(maxWN, maxESM);
936                         rangeMin = MIN2(minWN, minESM);
937                         rangeMaxScaled = rangeMax * edge_threshold;
938                         range = rangeMax - rangeMin;
939                         rangeMaxClamped = MAX2(edge_threshold_min, rangeMaxScaled);
940
941                         earlyExit = range < rangeMaxClamped ? 1:0;
942                         if(earlyExit) {
943                                 set_pixel_intensity(buf, buf_x, buf_y, curr_x, curr_y, lumaM);
944                         }
945
946                         lumaNW = get_pixel_intensity(buf, buf_x, buf_y, curr_x + 1, curr_y - 1);
947                         lumaSE = get_pixel_intensity(buf, buf_x, buf_y, curr_x - 1, curr_y + 1);
948                         lumaNE = get_pixel_intensity(buf, buf_x, buf_y, curr_x + 1, curr_y + 1);
949                         lumaSW = get_pixel_intensity(buf, buf_x, buf_y, curr_x - 1, curr_y - 1);
950
951                         lumaNS = lumaN + lumaS;
952                         lumaWE = lumaW + lumaE;
953                         subpixRcpRange = 1.0f/range;
954                         subpixNSWE = lumaNS + lumaWE;
955                         edgeHorz1 = (-2.0f * lumaM) + lumaNS;
956                         edgeVert1 = (-2.0f * lumaM) + lumaWE;
957
958                         lumaNESE = lumaNE + lumaSE;
959                         lumaNWNE = lumaNW + lumaNE;
960                         edgeHorz2 = (-2.0f * lumaE) + lumaNESE;
961                         edgeVert2 = (-2.0f * lumaN) + lumaNWNE;
962
963                         lumaNWSW = lumaNW + lumaSW;
964                         lumaSWSE = lumaSW + lumaSE;
965                         edgeHorz4 = (ABS(edgeHorz1) * 2.0f) + ABS(edgeHorz2);
966                         edgeVert4 = (ABS(edgeVert1) * 2.0f) + ABS(edgeVert2);
967                         edgeHorz3 = (-2.0f * lumaW) + lumaNWSW;
968                         edgeVert3 = (-2.0f * lumaS) + lumaSWSE;
969                         edgeHorz = ABS(edgeHorz3) + edgeHorz4;
970                         edgeVert = ABS(edgeVert3) + edgeVert4;
971
972                         subpixNWSWNESE = lumaNWSW + lumaNESE;
973                         lengthSign = 1.0f / (float)(buf_x);
974                         horzSpan = edgeHorz >= edgeVert ? 1:0;
975                         subpixA = subpixNSWE * 2.0f + subpixNWSWNESE;
976
977                         if(!horzSpan) {
978                                 lumaN = lumaW;
979                                 lumaS = lumaE;
980                         } else {
981                                 lengthSign = 1.0f / (float)(buf_y);
982                         }
983                         subpixB = (subpixA * (1.0f/12.0f)) - lumaM;
984
985                         gradientN = lumaN - lumaM;
986                         gradientS = lumaS - lumaM;
987                         lumaNN = lumaN + lumaM;
988                         lumaSS = lumaS + lumaM;
989                         pairN = (ABS(gradientN)) >= (ABS(gradientS)) ? 1:0;
990                         gradient = MAX2(ABS(gradientN), ABS(gradientS));
991                         if(pairN) {
992                                 lengthSign = -lengthSign;
993                         }
994                         subpixC = MAX2(MIN2(ABS(subpixB) * subpixRcpRange,1.0f),0.0f);
995
996                         posB_x = posM_x;
997                         posB_y = posM_y;
998                         offNP_x = (!horzSpan) ? 0.0f:(1.0f / (float)(buf_x));
999                         offNP_y = (horzSpan) ? 0.0f:(1.0f / (float)(buf_y));
1000                         if(!horzSpan) {
1001                                 posB_x += lengthSign * 0.5f;
1002                         } else {
1003                                 posB_y += lengthSign * 0.5f;
1004                         }
1005
1006                         posN_x = posB_x - offNP_x * p0;
1007                         posN_y = posB_y - offNP_y * p0;
1008                         posP_x = posB_x + offNP_x * p0;
1009                         posP_y = posB_y + offNP_y * p0;
1010                         subpixD = ((-2.0f)*subpixC) + 3.0f;
1011                         //may need bilinear filtered get_pixel_intensity() here...done
1012                         lumaEndN = get_pixel_intensity_bilinear(buf, buf_x, buf_y,posN_x,posN_y);
1013                         subpixE = subpixC * subpixC;
1014                         //may need bilinear filtered get_pixel_intensity() here...done
1015                         lumaEndP = get_pixel_intensity_bilinear(buf, buf_x, buf_y, posP_x,posP_y);
1016
1017                         if(!pairN) {
1018                                 lumaNN = lumaSS;
1019                         }
1020                         gradientScaled = gradient * 1.0f/4.0f;
1021                         lumaMM =lumaM - lumaNN * 0.5f;
1022                         subpixF = subpixD * subpixE;
1023                         lumaMLTZero = lumaMM < 0.0f ? 1:0;
1024
1025                         lumaEndN -= lumaNN * 0.5f;
1026                         lumaEndP -= lumaNN * 0.5f;
1027                         doneN = (ABS(lumaEndN)) >= gradientScaled ? 1:0;
1028                         doneP = (ABS(lumaEndP)) >= gradientScaled ? 1:0;
1029                         if(!doneN) {
1030                                 posN_x -= offNP_x * p1;
1031                                 posN_y -= offNP_y * p1;
1032                         }
1033                         doneNP = (!doneN) || (!doneP) ? 1:0;
1034                         if(!doneP) {
1035                                 posP_x += offNP_x * p1;
1036                                 posP_y += offNP_y * p1;
1037                         }
1038
1039                         if(doneNP) {
1040                                 if(!doneN) {
1041                                         lumaEndN = get_pixel_intensity_bilinear(buf, buf_x, buf_y, posN_x,posN_y);
1042                                 }
1043                                 if(!doneP) {
1044                                         lumaEndP = get_pixel_intensity_bilinear(buf, buf_x, buf_y, posP_x, posP_y);
1045                                 }
1046                                 if(!doneN) {
1047                                         lumaEndN = lumaEndN - lumaNN * 0.5;
1048                                 }
1049                                 if(!doneP) {
1050                                         lumaEndP = lumaEndP - lumaNN * 0.5;
1051                                 }
1052                                 doneN = (ABS(lumaEndN)) >= gradientScaled ? 1:0;
1053                                 doneP = (ABS(lumaEndP)) >= gradientScaled ? 1:0;
1054                                 if(!doneN) {
1055                                         posN_x -= offNP_x * p2;
1056                                         posN_y -= offNP_y * p2;
1057                                 }
1058                                 doneNP = (!doneN) || (!doneP) ? 1:0;
1059                                 if(!doneP) {
1060                                         posP_x += offNP_x * p2;
1061                                         posP_y += offNP_y * p2;
1062                                 }
1063                                 if(doneNP) {
1064                                         if(!doneN) {
1065                                                 lumaEndN = get_pixel_intensity_bilinear(buf, buf_x, buf_y,posN_x,posN_y);
1066                                         }
1067                                         if(!doneP) {
1068                                                 lumaEndP = get_pixel_intensity_bilinear(buf, buf_x, buf_y, posP_x,posP_y);
1069                                         }
1070                                         if(!doneN) {
1071                                                 lumaEndN = lumaEndN - lumaNN * 0.5;
1072                                         }
1073                                         if(!doneP) {
1074                                                 lumaEndP = lumaEndP - lumaNN * 0.5;
1075                                         }
1076                                         doneN = (ABS(lumaEndN)) >= gradientScaled ? 1:0;
1077                                         doneP = (ABS(lumaEndP)) >= gradientScaled ? 1:0;
1078                                         if(!doneN) {
1079                                                 posN_x -= offNP_x * p3;
1080                                                 posN_y -= offNP_y * p3;
1081                                         }
1082                                         doneNP = (!doneN) || (!doneP) ? 1:0;
1083                                         if(!doneP) {
1084                                                 posP_x += offNP_x * p3;
1085                                                 posP_y += offNP_y * p3;
1086                                         }
1087                                         if(doneNP) {
1088                                                 if(!doneN) {
1089                                                         lumaEndN = get_pixel_intensity_bilinear(buf, buf_x, buf_y,posN_x,posN_y);
1090                                                 }
1091                                                 if(!doneP) {
1092                                                         lumaEndP = get_pixel_intensity_bilinear(buf, buf_x, buf_y, posP_x,posP_y);
1093                                                 }
1094                                                 if(!doneN) {
1095                                                         lumaEndN = lumaEndN - lumaNN * 0.5;
1096                                                 }
1097                                                 if(!doneP) {
1098                                                         lumaEndP = lumaEndP - lumaNN * 0.5;
1099                                                 }
1100                                                 doneN = (ABS(lumaEndN)) >= gradientScaled ? 1:0;
1101                                                 doneP = (ABS(lumaEndP)) >= gradientScaled ? 1:0;
1102                                                 if(!doneN) {
1103                                                         posN_x -= offNP_x * p4;
1104                                                         posN_y -= offNP_y * p4;
1105                                                 }
1106                                                 doneNP = (!doneN) || (!doneP) ? 1:0;
1107                                                 if(!doneP) {
1108                                                         posP_x += offNP_x * p4;
1109                                                         posP_y += offNP_y * p4;
1110                                                 }
1111                                                 if(doneNP) {
1112                                                         if(!doneN) {
1113                                                                 lumaEndN = get_pixel_intensity_bilinear(buf, buf_x, buf_y,posN_x,posN_y);
1114                                                         }
1115                                                         if(!doneP) {
1116                                                                 lumaEndP = get_pixel_intensity_bilinear(buf, buf_x, buf_y, posP_x,posP_y);
1117                                                         }
1118                                                         if(!doneN) {
1119                                                                 lumaEndN = lumaEndN - lumaNN * 0.5;
1120                                                         }
1121                                                         if(!doneP) {
1122                                                                 lumaEndP = lumaEndP - lumaNN * 0.5;
1123                                                         }
1124                                                         doneN = (ABS(lumaEndN)) >= gradientScaled ? 1:0;
1125                                                         doneP = (ABS(lumaEndP)) >= gradientScaled ? 1:0;
1126                                                         if(!doneN) {
1127                                                                 posN_x -= offNP_x * p5;
1128                                                                 posN_y -= offNP_y * p5;
1129                                                         }
1130                                                         doneNP = (!doneN) || (!doneP) ? 1:0;
1131                                                         if(!doneP) {
1132                                                                 posP_x += offNP_x * p5;
1133                                                                 posP_y += offNP_y * p5;
1134                                                         }
1135                                                         if(doneNP) {
1136                                                                 if(!doneN) {
1137                                                                         lumaEndN = get_pixel_intensity_bilinear(buf, buf_x, buf_y,posN_x,posN_y);
1138                                                                 }
1139                                                                 if(!doneP) {
1140                                                                         lumaEndP = get_pixel_intensity_bilinear(buf, buf_x, buf_y, posP_x,posP_y);
1141                                                                 }
1142                                                                 if(!doneN) {
1143                                                                         lumaEndN = lumaEndN - lumaNN * 0.5;
1144                                                                 }
1145                                                                 if(!doneP) {
1146                                                                         lumaEndP = lumaEndP - lumaNN * 0.5;
1147                                                                 }
1148                                                                 doneN = (ABS(lumaEndN)) >= gradientScaled ? 1:0;
1149                                                                 doneP = (ABS(lumaEndP)) >= gradientScaled ? 1:0;
1150                                                                 if(!doneN) {
1151                                                                         posN_x -= offNP_x * p6;
1152                                                                         posN_y -= offNP_y * p6;
1153                                                                 }
1154                                                                 doneNP = (!doneN) || (!doneP) ? 1:0;
1155                                                                 if(!doneP) {
1156                                                                         posP_x += offNP_x * p6;
1157                                                                         posP_y += offNP_y * p6;
1158                                                                 }
1159                                                                 if(doneNP) {
1160                                                                         if(!doneN) {
1161                                                                                 lumaEndN = get_pixel_intensity_bilinear(buf, buf_x, buf_y,posN_x,posN_y);
1162                                                                         }
1163                                                                         if(!doneP) {
1164                                                                                 lumaEndP = get_pixel_intensity_bilinear(buf, buf_x, buf_y, posP_x,posP_y);
1165                                                                         }
1166                                                                         if(!doneN) {
1167                                                                                 lumaEndN = lumaEndN - lumaNN * 0.5;
1168                                                                         }
1169                                                                         if(!doneP) {
1170                                                                                 lumaEndP = lumaEndP - lumaNN * 0.5;
1171                                                                         }
1172                                                                         doneN = (ABS(lumaEndN)) >= gradientScaled ? 1:0;
1173                                                                         doneP = (ABS(lumaEndP)) >= gradientScaled ? 1:0;
1174                                                                         if(!doneN) {
1175                                                                                 posN_x -= offNP_x * p7;
1176                                                                                 posN_y -= offNP_y * p7;
1177                                                                         }
1178                                                                         doneNP = (!doneN) || (!doneP) ? 1:0;
1179                                                                         if(!doneP) {
1180                                                                                 posP_x += offNP_x * p7;
1181                                                                                 posP_y += offNP_y * p7;
1182                                                                         }
1183                                                                         if(doneNP) {
1184                                                                                 if(!doneN) {
1185                                                                                         lumaEndN = get_pixel_intensity_bilinear(buf, buf_x, buf_y,posN_x,posN_y);
1186                                                                                 }
1187                                                                                 if(!doneP) {
1188                                                                                         lumaEndP = get_pixel_intensity_bilinear(buf, buf_x, buf_y, posP_x,posP_y);
1189                                                                                 }
1190                                                                                 if(!doneN) {
1191                                                                                         lumaEndN = lumaEndN - lumaNN * 0.5;
1192                                                                                 }
1193                                                                                 if(!doneP) {
1194                                                                                         lumaEndP = lumaEndP - lumaNN * 0.5;
1195                                                                                 }
1196                                                                                 doneN = (ABS(lumaEndN)) >= gradientScaled ? 1:0;
1197                                                                                 doneP = (ABS(lumaEndP)) >= gradientScaled ? 1:0;
1198                                                                                 if(!doneN) {
1199                                                                                         posN_x -= offNP_x * p8;
1200                                                                                         posN_y -= offNP_y * p8;
1201                                                                                 }
1202                                                                                 doneNP = (!doneN) || (!doneP) ? 1:0;
1203                                                                                 if(!doneP) {
1204                                                                                         posP_x += offNP_x * p8;
1205                                                                                         posP_y += offNP_y * p8;
1206                                                                                 }
1207                                                                                 if(doneNP) {
1208                                                                                         if(!doneN) {
1209                                                                                                 lumaEndN = get_pixel_intensity_bilinear(buf, buf_x, buf_y,posN_x,posN_y);
1210                                                                                         }
1211                                                                                         if(!doneP) {
1212                                                                                                 lumaEndP = get_pixel_intensity_bilinear(buf, buf_x, buf_y, posP_x,posP_y);
1213                                                                                         }
1214                                                                                         if(!doneN) {
1215                                                                                                 lumaEndN = lumaEndN - lumaNN * 0.5;
1216                                                                                         }
1217                                                                                         if(!doneP) {
1218                                                                                                 lumaEndP = lumaEndP - lumaNN * 0.5;
1219                                                                                         }
1220                                                                                         doneN = (ABS(lumaEndN)) >= gradientScaled ? 1:0;
1221                                                                                         doneP = (ABS(lumaEndP)) >= gradientScaled ? 1:0;
1222                                                                                         if(!doneN) {
1223                                                                                                 posN_x -= offNP_x * p9;
1224                                                                                                 posN_y -= offNP_y * p9;
1225                                                                                         }
1226                                                                                         doneNP = (!doneN) || (!doneP) ? 1:0;
1227                                                                                         if(!doneP) {
1228                                                                                                 posP_x += offNP_x * p9;
1229                                                                                                 posP_y += offNP_y * p9;
1230                                                                                         }
1231                                                                                         if(doneNP) {
1232                                                                                                 if(!doneN) {
1233                                                                                                         lumaEndN = get_pixel_intensity_bilinear(buf, buf_x, buf_y,posN_x,posN_y);
1234                                                                                                 }
1235                                                                                                 if(!doneP) {
1236                                                                                                         lumaEndP = get_pixel_intensity_bilinear(buf, buf_x, buf_y, posP_x,posP_y);
1237                                                                                                 }
1238                                                                                                 if(!doneN) {
1239                                                                                                         lumaEndN = lumaEndN - lumaNN * 0.5;
1240                                                                                                 }
1241                                                                                                 if(!doneP) {
1242                                                                                                         lumaEndP = lumaEndP - lumaNN * 0.5;
1243                                                                                                 }
1244                                                                                                 doneN = (ABS(lumaEndN)) >= gradientScaled ? 1:0;
1245                                                                                                 doneP = (ABS(lumaEndP)) >= gradientScaled ? 1:0;
1246                                                                                                 if(!doneN) {
1247                                                                                                         posN_x -= offNP_x * p10;
1248                                                                                                         posN_y -= offNP_y * p10;
1249                                                                                                 }
1250                                                                                                 doneNP = (!doneN) || (!doneP) ? 1:0;
1251                                                                                                 if(!doneP) {
1252                                                                                                         posP_x += offNP_x * p10;
1253                                                                                                         posP_y += offNP_y * p10;
1254                                                                                                 }
1255                                                                                                 if(doneNP) {
1256                                                                                                         if(!doneN) {
1257                                                                                                                 lumaEndN = get_pixel_intensity_bilinear(buf, buf_x, buf_y,posN_x,posN_y);
1258                                                                                                         }
1259                                                                                                         if(!doneP) {
1260                                                                                                                 lumaEndP = get_pixel_intensity_bilinear(buf, buf_x, buf_y, posP_x,posP_y);
1261                                                                                                         }
1262                                                                                                         if(!doneN) {
1263                                                                                                                 lumaEndN = lumaEndN - lumaNN * 0.5;
1264                                                                                                         }
1265                                                                                                         if(!doneP) {
1266                                                                                                                 lumaEndP = lumaEndP - lumaNN * 0.5;
1267                                                                                                         }
1268                                                                                                         doneN = (ABS(lumaEndN)) >= gradientScaled ? 1:0;
1269                                                                                                         doneP = (ABS(lumaEndP)) >= gradientScaled ? 1:0;
1270                                                                                                         if(!doneN) {
1271                                                                                                                 posN_x -= offNP_x * p11;
1272                                                                                                                 posN_y -= offNP_y * p11;
1273                                                                                                         }
1274                                                                                                         doneNP = (!doneN) || (!doneP) ? 1:0;
1275                                                                                                         if(!doneP) {
1276                                                                                                                 posP_x += offNP_x * p11;
1277                                                                                                                 posP_y += offNP_y * p11;
1278                                                                                                         }
1279                                                                                                 }
1280                                                                                         }
1281                                                                                 }
1282                                                                         }
1283                                                                 }
1284                                                         }
1285                                                 }
1286                                         }
1287                                 }
1288                         }
1289                         dstN = posM_x - posN_x;
1290                         dstP = posP_x - posM_x;
1291                         if(!horzSpan) {
1292                                 dstN = posM_y - posN_y;
1293                                 dstP = posP_y - posM_y;
1294                         }
1295
1296                         goodSpanN = ((lumaEndN < 0.0f) ? 1:0) != lumaMLTZero ? 1:0;
1297                         spanLength = (dstP + dstN);
1298                         goodSpanP = ((lumaEndP < 0.0f) ? 1:0) != lumaMLTZero ? 1:0;
1299                         spanLengthRcp = 1.0f/spanLength;
1300
1301                         directionN = dstN < dstP ? 1:0;
1302                         dst = MIN2(dstN, dstP);
1303                         goodSpan = (directionN==1) ? goodSpanN:goodSpanP;
1304                         subpixG = subpixF * subpixF;
1305                         pixelOffset = (dst * (-spanLengthRcp)) + 0.5f;
1306                         subpixH = subpixG * quality_subpix;
1307
1308                         pixelOffsetGood = (goodSpan==1) ? pixelOffset : 0.0f;
1309                         pixelOffsetSubpix = MAX2(pixelOffsetGood, subpixH);
1310                         if(!horzSpan) {
1311                                 posM_x += pixelOffsetSubpix * lengthSign;
1312                         } else {
1313                                 posM_y += pixelOffsetSubpix * lengthSign;
1314                         }
1315                         //may need bilinear filtered get_pixel_intensity() here...
1316                         set_pixel_intensity(buf,buf_x,buf_y,curr_x,curr_y,get_pixel_intensity_bilinear(buf, buf_x, buf_y, posM_x,posM_y)* lumaM);
1317
1318                 }
1319         }
1320         return 1;
1321
1322 #endif
1323 }
1324