Merge r1.2 of fbedge.c from xserver CVS: Optimize spans where the same
value is being added to multiple pixels. This improves the speed of rendering wide trapezoids. I tested this with a small set of xlibs cairo-benchmarks with Xvfb and saw a 4% decrease in time taken to run them.
This commit is contained in:
parent
a7e3c6fa8c
commit
c024262eae
229
fb/fbedge.c
229
fb/fbedge.c
|
@ -22,6 +22,8 @@
|
|||
* PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#ifdef HAVE_DIX_CONFIG_H
|
||||
#include <dix-config.h>
|
||||
#endif
|
||||
|
@ -35,31 +37,6 @@
|
|||
#include "renderedge.h"
|
||||
#include "fbpict.h"
|
||||
|
||||
/*
|
||||
* 8 bit alpha
|
||||
*/
|
||||
|
||||
#define N_BITS 8
|
||||
#define rasterizeEdges fbRasterizeEdges8
|
||||
|
||||
#define DefineAlpha(line,x) \
|
||||
CARD8 *__ap = (CARD8 *) line + (x)
|
||||
|
||||
#define StepAlpha __ap++
|
||||
|
||||
#define AddAlpha(a) { \
|
||||
CARD16 __a = a + *__ap; \
|
||||
*__ap = ((CARD8) ((__a) | (0 - ((__a) >> 8)))); \
|
||||
}
|
||||
|
||||
#include "fbedgeimp.h"
|
||||
|
||||
#undef AddAlpha
|
||||
#undef StepAlpha
|
||||
#undef DefineAlpha
|
||||
#undef rasterizeEdges
|
||||
#undef N_BITS
|
||||
|
||||
/*
|
||||
* 4 bit alpha
|
||||
*/
|
||||
|
@ -109,6 +86,208 @@
|
|||
#undef rasterizeEdges
|
||||
#undef N_BITS
|
||||
|
||||
/*
|
||||
* 8 bit alpha
|
||||
*/
|
||||
|
||||
static INLINE CARD8
|
||||
clip255 (int x)
|
||||
{
|
||||
if (x > 255) return 255;
|
||||
return x;
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
add_saturate_8 (CARD8 *buf, int value, int length)
|
||||
{
|
||||
while (length--)
|
||||
{
|
||||
*buf = clip255 (*buf + value);
|
||||
buf++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We want to detect the case where we add the same value to a long
|
||||
* span of pixels. The triangles on the end are filled in while we
|
||||
* count how many sub-pixel scanlines contribute to the middle section.
|
||||
*
|
||||
* +--------------------------+
|
||||
* fill_height =| \ /
|
||||
* +------------------+
|
||||
* |================|
|
||||
* fill_start fill_end
|
||||
*/
|
||||
static void
|
||||
fbRasterizeEdges8 (FbBits *buf,
|
||||
int width,
|
||||
int stride,
|
||||
RenderEdge *l,
|
||||
RenderEdge *r,
|
||||
xFixed t,
|
||||
xFixed b)
|
||||
{
|
||||
xFixed y = t;
|
||||
FbBits *line;
|
||||
int fill_start = -1, fill_end = -1;
|
||||
int fill_size = 0;
|
||||
|
||||
line = buf + xFixedToInt (y) * stride;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
CARD8 *ap = (CARD8 *) line;
|
||||
xFixed lx, rx;
|
||||
int lxi, rxi;
|
||||
|
||||
/* clip X */
|
||||
lx = l->x;
|
||||
if (lx < 0)
|
||||
lx = 0;
|
||||
rx = r->x;
|
||||
if (xFixedToInt (rx) >= width)
|
||||
rx = IntToxFixed (width);
|
||||
|
||||
/* Skip empty (or backwards) sections */
|
||||
if (rx > lx)
|
||||
{
|
||||
int lxs, rxs;
|
||||
|
||||
/* Find pixel bounds for span. */
|
||||
lxi = xFixedToInt (lx);
|
||||
rxi = xFixedToInt (rx);
|
||||
|
||||
/* Sample coverage for edge pixels */
|
||||
lxs = RenderSamplesX (lx, 8);
|
||||
rxs = RenderSamplesX (rx, 8);
|
||||
|
||||
/* Add coverage across row */
|
||||
if (lxi == rxi)
|
||||
{
|
||||
ap[lxi] = clip255 (ap[lxi] + rxs - lxs);
|
||||
}
|
||||
else
|
||||
{
|
||||
ap[lxi] = clip255 (ap[lxi] + N_X_FRAC(8) - lxs);
|
||||
|
||||
/* Move forward so that lxi/rxi is the pixel span */
|
||||
lxi++;
|
||||
|
||||
/* Don't bother trying to optimize the fill unless
|
||||
* the span is longer than 4 pixels. */
|
||||
if (rxi - lxi > 4)
|
||||
{
|
||||
if (fill_start < 0)
|
||||
{
|
||||
fill_start = lxi;
|
||||
fill_end = rxi;
|
||||
fill_size++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (lxi >= fill_end || rxi < fill_start)
|
||||
{
|
||||
/* We're beyond what we saved, just fill it */
|
||||
add_saturate_8 (ap + fill_start,
|
||||
fill_size * N_X_FRAC(8),
|
||||
fill_end - fill_start);
|
||||
fill_start = lxi;
|
||||
fill_end = rxi;
|
||||
fill_size = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Update fill_start */
|
||||
if (lxi > fill_start)
|
||||
{
|
||||
add_saturate_8 (ap + fill_start,
|
||||
fill_size * N_X_FRAC(8),
|
||||
lxi - fill_start);
|
||||
fill_start = lxi;
|
||||
}
|
||||
else if (lxi < fill_start)
|
||||
{
|
||||
add_saturate_8 (ap + lxi, N_X_FRAC(8),
|
||||
fill_start - lxi);
|
||||
}
|
||||
|
||||
/* Update fill_end */
|
||||
if (rxi < fill_end)
|
||||
{
|
||||
add_saturate_8 (ap + rxi,
|
||||
fill_size * N_X_FRAC(8),
|
||||
fill_end - rxi);
|
||||
fill_end = rxi;
|
||||
}
|
||||
else if (fill_end < rxi)
|
||||
{
|
||||
add_saturate_8 (ap + fill_end,
|
||||
N_X_FRAC(8),
|
||||
rxi - fill_end);
|
||||
}
|
||||
fill_size++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
add_saturate_8 (ap + lxi, N_X_FRAC(8), rxi - lxi);
|
||||
}
|
||||
|
||||
/* Do not add in a 0 alpha here. This check is
|
||||
* necessary to avoid a buffer overrun, (when rx
|
||||
* is exactly on a pixel boundary). */
|
||||
if (rxs)
|
||||
ap[rxi] = clip255 (ap[rxi] + rxs);
|
||||
}
|
||||
}
|
||||
|
||||
if (y == b) {
|
||||
/* We're done, make sure we clean up any remaining fill. */
|
||||
if (fill_start != fill_end) {
|
||||
if (fill_size == N_Y_FRAC(8))
|
||||
{
|
||||
memset (ap + fill_start, 0xff, fill_end - fill_start);
|
||||
}
|
||||
else
|
||||
{
|
||||
add_saturate_8 (ap + fill_start, fill_size * N_X_FRAC(8),
|
||||
fill_end - fill_start);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (xFixedFrac (y) != Y_FRAC_LAST(8))
|
||||
{
|
||||
RenderEdgeStepSmall (l);
|
||||
RenderEdgeStepSmall (r);
|
||||
y += STEP_Y_SMALL(8);
|
||||
}
|
||||
else
|
||||
{
|
||||
RenderEdgeStepBig (l);
|
||||
RenderEdgeStepBig (r);
|
||||
y += STEP_Y_BIG(8);
|
||||
if (fill_start != fill_end)
|
||||
{
|
||||
if (fill_size == N_Y_FRAC(8))
|
||||
{
|
||||
memset (ap + fill_start, 0xff, fill_end - fill_start);
|
||||
}
|
||||
else
|
||||
{
|
||||
add_saturate_8 (ap + fill_start, fill_size * N_X_FRAC(8),
|
||||
fill_end - fill_start);
|
||||
}
|
||||
fill_start = fill_end = -1;
|
||||
fill_size = 0;
|
||||
}
|
||||
line += stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fbRasterizeEdges (FbBits *buf,
|
||||
int bpp,
|
||||
|
|
Loading…
Reference in New Issue
Block a user