From c024262eae4e00567ccb66a59b4d572621233cbc Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 3 Oct 2005 10:20:29 +0000 Subject: [PATCH] Merge r1.2 of fbedge.c from xserver CVS: Optimize spans where the same value is being added to multiple pixels. This improves the speed of rendering wide trapezoids. I tested this with a small set of xlibs cairo-benchmarks with Xvfb and saw a 4% decrease in time taken to run them. --- fb/fbedge.c | 229 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 204 insertions(+), 25 deletions(-) diff --git a/fb/fbedge.c b/fb/fbedge.c index fc52bc975..b6ca829a4 100644 --- a/fb/fbedge.c +++ b/fb/fbedge.c @@ -22,6 +22,8 @@ * PERFORMANCE OF THIS SOFTWARE. */ +#include + #ifdef HAVE_DIX_CONFIG_H #include #endif @@ -35,31 +37,6 @@ #include "renderedge.h" #include "fbpict.h" -/* - * 8 bit alpha - */ - -#define N_BITS 8 -#define rasterizeEdges fbRasterizeEdges8 - -#define DefineAlpha(line,x) \ - CARD8 *__ap = (CARD8 *) line + (x) - -#define StepAlpha __ap++ - -#define AddAlpha(a) { \ - CARD16 __a = a + *__ap; \ - *__ap = ((CARD8) ((__a) | (0 - ((__a) >> 8)))); \ -} - -#include "fbedgeimp.h" - -#undef AddAlpha -#undef StepAlpha -#undef DefineAlpha -#undef rasterizeEdges -#undef N_BITS - /* * 4 bit alpha */ @@ -109,6 +86,208 @@ #undef rasterizeEdges #undef N_BITS +/* + * 8 bit alpha + */ + +static INLINE CARD8 +clip255 (int x) +{ + if (x > 255) return 255; + return x; +} + +static INLINE void +add_saturate_8 (CARD8 *buf, int value, int length) +{ + while (length--) + { + *buf = clip255 (*buf + value); + buf++; + } +} + +/* + * We want to detect the case where we add the same value to a long + * span of pixels. The triangles on the end are filled in while we + * count how many sub-pixel scanlines contribute to the middle section. + * + * +--------------------------+ + * fill_height =| \ / + * +------------------+ + * |================| + * fill_start fill_end + */ +static void +fbRasterizeEdges8 (FbBits *buf, + int width, + int stride, + RenderEdge *l, + RenderEdge *r, + xFixed t, + xFixed b) +{ + xFixed y = t; + FbBits *line; + int fill_start = -1, fill_end = -1; + int fill_size = 0; + + line = buf + xFixedToInt (y) * stride; + + for (;;) + { + CARD8 *ap = (CARD8 *) line; + xFixed lx, rx; + int lxi, rxi; + + /* clip X */ + lx = l->x; + if (lx < 0) + lx = 0; + rx = r->x; + if (xFixedToInt (rx) >= width) + rx = IntToxFixed (width); + + /* Skip empty (or backwards) sections */ + if (rx > lx) + { + int lxs, rxs; + + /* Find pixel bounds for span. */ + lxi = xFixedToInt (lx); + rxi = xFixedToInt (rx); + + /* Sample coverage for edge pixels */ + lxs = RenderSamplesX (lx, 8); + rxs = RenderSamplesX (rx, 8); + + /* Add coverage across row */ + if (lxi == rxi) + { + ap[lxi] = clip255 (ap[lxi] + rxs - lxs); + } + else + { + ap[lxi] = clip255 (ap[lxi] + N_X_FRAC(8) - lxs); + + /* Move forward so that lxi/rxi is the pixel span */ + lxi++; + + /* Don't bother trying to optimize the fill unless + * the span is longer than 4 pixels. */ + if (rxi - lxi > 4) + { + if (fill_start < 0) + { + fill_start = lxi; + fill_end = rxi; + fill_size++; + } + else + { + if (lxi >= fill_end || rxi < fill_start) + { + /* We're beyond what we saved, just fill it */ + add_saturate_8 (ap + fill_start, + fill_size * N_X_FRAC(8), + fill_end - fill_start); + fill_start = lxi; + fill_end = rxi; + fill_size = 1; + } + else + { + /* Update fill_start */ + if (lxi > fill_start) + { + add_saturate_8 (ap + fill_start, + fill_size * N_X_FRAC(8), + lxi - fill_start); + fill_start = lxi; + } + else if (lxi < fill_start) + { + add_saturate_8 (ap + lxi, N_X_FRAC(8), + fill_start - lxi); + } + + /* Update fill_end */ + if (rxi < fill_end) + { + add_saturate_8 (ap + rxi, + fill_size * N_X_FRAC(8), + fill_end - rxi); + fill_end = rxi; + } + else if (fill_end < rxi) + { + add_saturate_8 (ap + fill_end, + N_X_FRAC(8), + rxi - fill_end); + } + fill_size++; + } + } + } + else + { + add_saturate_8 (ap + lxi, N_X_FRAC(8), rxi - lxi); + } + + /* Do not add in a 0 alpha here. This check is + * necessary to avoid a buffer overrun, (when rx + * is exactly on a pixel boundary). */ + if (rxs) + ap[rxi] = clip255 (ap[rxi] + rxs); + } + } + + if (y == b) { + /* We're done, make sure we clean up any remaining fill. */ + if (fill_start != fill_end) { + if (fill_size == N_Y_FRAC(8)) + { + memset (ap + fill_start, 0xff, fill_end - fill_start); + } + else + { + add_saturate_8 (ap + fill_start, fill_size * N_X_FRAC(8), + fill_end - fill_start); + } + } + break; + } + + if (xFixedFrac (y) != Y_FRAC_LAST(8)) + { + RenderEdgeStepSmall (l); + RenderEdgeStepSmall (r); + y += STEP_Y_SMALL(8); + } + else + { + RenderEdgeStepBig (l); + RenderEdgeStepBig (r); + y += STEP_Y_BIG(8); + if (fill_start != fill_end) + { + if (fill_size == N_Y_FRAC(8)) + { + memset (ap + fill_start, 0xff, fill_end - fill_start); + } + else + { + add_saturate_8 (ap + fill_start, fill_size * N_X_FRAC(8), + fill_end - fill_start); + } + fill_start = fill_end = -1; + fill_size = 0; + } + line += stride; + } + } +} + void fbRasterizeEdges (FbBits *buf, int bpp,