Merge r1.2 of fbedge.c from xserver CVS: Optimize spans where the same

value is being added to multiple pixels. This improves the speed of
    rendering wide trapezoids. I tested this with a small set of xlibs
    cairo-benchmarks with Xvfb and saw a 4% decrease in time taken to run
    them.
This commit is contained in:
Eric Anholt 2005-10-03 10:20:29 +00:00
parent a7e3c6fa8c
commit c024262eae

View File

@ -22,6 +22,8 @@
* PERFORMANCE OF THIS SOFTWARE.
*/
#include <string.h>
#ifdef HAVE_DIX_CONFIG_H
#include <dix-config.h>
#endif
@ -35,31 +37,6 @@
#include "renderedge.h"
#include "fbpict.h"
/*
* 8 bit alpha
*/
#define N_BITS 8
#define rasterizeEdges fbRasterizeEdges8
#define DefineAlpha(line,x) \
CARD8 *__ap = (CARD8 *) line + (x)
#define StepAlpha __ap++
#define AddAlpha(a) { \
CARD16 __a = a + *__ap; \
*__ap = ((CARD8) ((__a) | (0 - ((__a) >> 8)))); \
}
#include "fbedgeimp.h"
#undef AddAlpha
#undef StepAlpha
#undef DefineAlpha
#undef rasterizeEdges
#undef N_BITS
/*
* 4 bit alpha
*/
@ -109,6 +86,208 @@
#undef rasterizeEdges
#undef N_BITS
/*
* 8 bit alpha
*/
static INLINE CARD8
clip255 (int x)
{
if (x > 255) return 255;
return x;
}
static INLINE void
add_saturate_8 (CARD8 *buf, int value, int length)
{
while (length--)
{
*buf = clip255 (*buf + value);
buf++;
}
}
/*
* We want to detect the case where we add the same value to a long
* span of pixels. The triangles on the end are filled in while we
* count how many sub-pixel scanlines contribute to the middle section.
*
* +--------------------------+
* fill_height =| \ /
* +------------------+
* |================|
* fill_start fill_end
*/
static void
fbRasterizeEdges8 (FbBits *buf,
int width,
int stride,
RenderEdge *l,
RenderEdge *r,
xFixed t,
xFixed b)
{
xFixed y = t;
FbBits *line;
int fill_start = -1, fill_end = -1;
int fill_size = 0;
line = buf + xFixedToInt (y) * stride;
for (;;)
{
CARD8 *ap = (CARD8 *) line;
xFixed lx, rx;
int lxi, rxi;
/* clip X */
lx = l->x;
if (lx < 0)
lx = 0;
rx = r->x;
if (xFixedToInt (rx) >= width)
rx = IntToxFixed (width);
/* Skip empty (or backwards) sections */
if (rx > lx)
{
int lxs, rxs;
/* Find pixel bounds for span. */
lxi = xFixedToInt (lx);
rxi = xFixedToInt (rx);
/* Sample coverage for edge pixels */
lxs = RenderSamplesX (lx, 8);
rxs = RenderSamplesX (rx, 8);
/* Add coverage across row */
if (lxi == rxi)
{
ap[lxi] = clip255 (ap[lxi] + rxs - lxs);
}
else
{
ap[lxi] = clip255 (ap[lxi] + N_X_FRAC(8) - lxs);
/* Move forward so that lxi/rxi is the pixel span */
lxi++;
/* Don't bother trying to optimize the fill unless
* the span is longer than 4 pixels. */
if (rxi - lxi > 4)
{
if (fill_start < 0)
{
fill_start = lxi;
fill_end = rxi;
fill_size++;
}
else
{
if (lxi >= fill_end || rxi < fill_start)
{
/* We're beyond what we saved, just fill it */
add_saturate_8 (ap + fill_start,
fill_size * N_X_FRAC(8),
fill_end - fill_start);
fill_start = lxi;
fill_end = rxi;
fill_size = 1;
}
else
{
/* Update fill_start */
if (lxi > fill_start)
{
add_saturate_8 (ap + fill_start,
fill_size * N_X_FRAC(8),
lxi - fill_start);
fill_start = lxi;
}
else if (lxi < fill_start)
{
add_saturate_8 (ap + lxi, N_X_FRAC(8),
fill_start - lxi);
}
/* Update fill_end */
if (rxi < fill_end)
{
add_saturate_8 (ap + rxi,
fill_size * N_X_FRAC(8),
fill_end - rxi);
fill_end = rxi;
}
else if (fill_end < rxi)
{
add_saturate_8 (ap + fill_end,
N_X_FRAC(8),
rxi - fill_end);
}
fill_size++;
}
}
}
else
{
add_saturate_8 (ap + lxi, N_X_FRAC(8), rxi - lxi);
}
/* Do not add in a 0 alpha here. This check is
* necessary to avoid a buffer overrun, (when rx
* is exactly on a pixel boundary). */
if (rxs)
ap[rxi] = clip255 (ap[rxi] + rxs);
}
}
if (y == b) {
/* We're done, make sure we clean up any remaining fill. */
if (fill_start != fill_end) {
if (fill_size == N_Y_FRAC(8))
{
memset (ap + fill_start, 0xff, fill_end - fill_start);
}
else
{
add_saturate_8 (ap + fill_start, fill_size * N_X_FRAC(8),
fill_end - fill_start);
}
}
break;
}
if (xFixedFrac (y) != Y_FRAC_LAST(8))
{
RenderEdgeStepSmall (l);
RenderEdgeStepSmall (r);
y += STEP_Y_SMALL(8);
}
else
{
RenderEdgeStepBig (l);
RenderEdgeStepBig (r);
y += STEP_Y_BIG(8);
if (fill_start != fill_end)
{
if (fill_size == N_Y_FRAC(8))
{
memset (ap + fill_start, 0xff, fill_end - fill_start);
}
else
{
add_saturate_8 (ap + fill_start, fill_size * N_X_FRAC(8),
fill_end - fill_start);
}
fill_start = fill_end = -1;
fill_size = 0;
}
line += stride;
}
}
}
void
fbRasterizeEdges (FbBits *buf,
int bpp,