diff --git a/fb/fbpict.c b/fb/fbpict.c index 072f8d891..660112c7d 100644 --- a/fb/fbpict.c +++ b/fb/fbpict.c @@ -105,6 +105,51 @@ fbIn (CARD32 x, CARD8 y) return m|n|o|p; } +#define genericCombine24(a,b,c,d) (((a)*(c)+(b)*(d))) + +#define fastcombine32(alpha, source, destval, destptr, dstrb, dstag, drb, dag) \ + dstrb=destval&0xFF00FF; dstag=(destval>>8)&0xFF00FF; \ + drb=((source&0xFF00FF)-dstrb)*alpha; dag=(((source>>8)&0xFF00FF)-dstag)*alpha; \ + *destptr++=((((drb>>8) + dstrb) & 0x00FF00FF) | ((((dag>>8) + dstag) << 8) & 0xFF00FF00)); \ + +#define fastcombine32(alpha, source, destval, destptr, dstrb, dstag, drb, dag) \ + dstrb=destval&0xFF00FF; dstag=(destval>>8)&0xFF00FF; \ + drb=((source&0xFF00FF)-dstrb)*alpha; dag=(((source>>8)&0xFF00FF)-dstag)*alpha; \ + *destptr++=((((drb>>8) + dstrb) & 0x00FF00FF) | ((((dag>>8) + dstag) << 8) & 0xFF00FF00)); \ + +// Note: this macro expects 6 bits of alpha, not 8! +#define fastCombine0565(alpha, source, destval, destptr) { \ + CARD16 dstrb = destval & 0xf81f; CARD16 dstg = destval & 0x7e0; \ + CARD32 drb = ((source&0xf81f)-dstrb)*alpha; CARD32 dg=((source & 0x7e0)-dstg)*alpha; \ + destptr= ((((drb>>6) + dstrb)&0xf81f) | (((dg>>6) + dstg) & 0x7e0)); \ + } + +#if IMAGE_BYTE_ORDER == LSBFirst + #define setupPackedReader(count,temp,where,workingWhere,workingVal) count=(int)where; \ + temp=count&3; \ + where-=temp; \ + workingWhere=(CARD32 *)where; \ + workingVal=*workingWhere++; \ + count=4-temp; \ + workingVal>>=(8*temp) + #define readPacked(where,x,y,z) {if(!(x)) { (x)=4; y=*z++; } where=(y)&0xff; (y)>>=8; (x)--;} + #define readPackedSource(where) readPacked(where,ws,workingSource,wsrc) + #define readPackedDest(where) readPacked(where,wd,workingiDest,widst) + #define writePacked(what) workingoDest>>=8; workingoDest|=(what<<24); ww--; if(!ww) { ww=4; *wodst++=workingoDest; } +#else + #warning "I havn't tested fbCompositeTrans_0888xnx0888() on big endian yet!" + #define setupPackedReader(count,temp,where,workingWhere,workingVal) count=(int)where; \ + temp=count&3; \ + where-=temp; \ + workingWhere=(CARD32 *)where; \ + workingVal=*workingWhere++; \ + count=4-temp; \ + workingVal<<=(8*temp) + #define readPacked(where,x,y,z) {if(!(x)) { (x)=4; y=*z++; } where=(y)>>24; (y)<<=8; (x)--;} + #define readPackedSource(where) readPacked(where,ws,workingSource,wsrc) + #define readPackedDest(where) readPacked(where,wd,workingiDest,widst) + #define writePacked(what) workingoDest<<=8; workingoDest|=what; ww--; if(!ww) { ww=4; *wodst++=workingoDest; } +#endif /* * Naming convention: * @@ -248,6 +293,7 @@ fbCompositeSolidMask_nx8888x8888C (CARD8 op, fbFinishAccess (pDst->pDrawable); } +#define srcAlphaCombine24(a,b) genericCombine24(a,b,srca,srcia) void fbCompositeSolidMask_nx8x0888 (CARD8 op, PicturePtr pSrc, @@ -262,52 +308,86 @@ fbCompositeSolidMask_nx8x0888 (CARD8 op, CARD16 width, CARD16 height) { - CARD32 src, srca; - CARD8 *dstLine, *dst; + CARD32 src, srca, srcia; + CARD8 *dstLine, *dst, *edst; CARD32 d; CARD8 *maskLine, *mask, m; FbStride dstStride, maskStride; CARD16 w; + CARD32 rs,gs,bs,rd,gd,bd; fbComposeGetSolid(pSrc, src, pDst->format); srca = src >> 24; + srcia = 255-srca; if (src == 0) return; + rs=src&0xff; + gs=(src>>8)&0xff; + bs=(src>>16)&0xff; + fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 3); fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1); - while (height--) - { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; - w = width; - - while (w--) - { - m = READ(mask++); - if (m == 0xff) - { - if (srca == 0xff) - d = src; - else - { - d = Fetch24(dst); - d = fbOver24 (src, d); + while (height--) + { + // fixme: cleanup unused + unsigned int wt,wd; + CARD32 workingiDest; + CARD32 *widst; + + edst=dst = dstLine; + dstLine += dstStride; + mask = maskLine; + maskLine += maskStride; + w = width; + +#ifndef NO_MASKED_PACKED_READ + setupPackedReader(wd,wt,edst,widst,workingiDest); +#endif + + while (w--) + { +#ifndef NO_MASKED_PACKED_READ + readPackedDest(rd); + readPackedDest(gd); + readPackedDest(bd); +#else + rd= *edst++; + gd= *edst++; + bd= *edst++; +#endif + m = *mask++; + if (m == 0xff) + { + if (srca == 0xff) + { + *dst++=rs; + *dst++=gs; + *dst++=bs; + } + else + { + *dst++=(srcAlphaCombine24(rs, rd)>>8); + *dst++=(srcAlphaCombine24(gs, gd)>>8); + *dst++=(srcAlphaCombine24(bs, bd)>>8); + } + } + else if (m) + { + int na=(srca*(int)m)>>8; + int nia=255-na; + *dst++=(genericCombine24(rs, rd, na, nia)>>8); + *dst++=(genericCombine24(gs, gd, na, nia)>>8); + *dst++=(genericCombine24(bs, bd, na, nia)>>8); + } + else + { + dst+=3; + } } - Store24(dst,d); - } - else if (m) - { - d = fbOver24 (fbIn(src,m), Fetch24(dst)); - Store24(dst,d); - } - dst += 3; } - } fbFinishAccess (pMask->pDrawable); fbFinishAccess (pDst->pDrawable); @@ -327,54 +407,57 @@ fbCompositeSolidMask_nx8x0565 (CARD8 op, CARD16 width, CARD16 height) { - CARD32 src, srca; - CARD16 *dstLine, *dst; - CARD32 d; - CARD8 *maskLine, *mask, m; - FbStride dstStride, maskStride; - CARD16 w; - - fbComposeGetSolid(pSrc, src, pDst->format); - - srca = src >> 24; - if (src == 0) - return; - - fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1); - - while (height--) - { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; - w = width; - - while (w--) - { - m = READ(mask++); - if (m == 0xff) - { - if (srca == 0xff) - d = src; - else - { - d = READ(dst); - d = fbOver24 (src, cvt0565to8888(d)); - } - WRITE(dst, cvt8888to0565(d)); - } - else if (m) - { - d = READ(dst); - d = fbOver24 (fbIn(src,m), cvt0565to8888(d)); - WRITE(dst, cvt8888to0565(d)); - } - dst++; - } - } - + CARD32 src, srca,na, rsrca; + CARD16 *dstLine, *dst; + CARD16 d; + CARD8 *maskLine, *mask, m; + FbStride dstStride, maskStride; + CARD16 w,src16; + + fbComposeGetSolid(pSrc, src, pDst->format); + src16 = cvt8888to0565(src); + + rsrca = src >> 24; + srca=rsrca>>2; + if (src == 0) + return; + + fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1); + fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + mask = maskLine; + maskLine += maskStride; + w = width; + + while (w--) + { + m = *mask++; + if (m == 0xff) + { + if (srca == 0xff) + { + *dst=src16; + } + else + { + d = *dst; + fastCombine0565(srca, src16, d, *dst++); + } + } + else if (m) + { + na=(rsrca*(int)m)>>10; + d = *dst; + fastCombine0565(na, src16, d, *dst++); + } + else + dst++; + } + } fbFinishAccess (pMask->pDrawable); fbFinishAccess (pDst->pDrawable); } @@ -906,7 +989,7 @@ fbCompositeTrans_0565xnx0565(CARD8 op, CARD32 s_32, d_32, i_32, r_32; fbComposeGetSolid (pMask, mask, pDst->format); - maskAlpha = mask >> 24; + maskAlpha = mask >> 26; if (!maskAlpha) return; @@ -922,31 +1005,276 @@ fbCompositeTrans_0565xnx0565(CARD8 op, fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1); while (height--) - { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; - - while (w--) - { - s_16 = READ(src++); - s_32 = cvt0565to8888(s_16); - d_16 = READ(dst); - d_32 = cvt0565to8888(d_16); - - i_32 = fbIn24 (s_32, maskAlpha); - r_32 = fbOver24 (i_32, d_32); - r_16 = cvt8888to0565(r_32); - WRITE(dst++, r_16); - } - } + { + CARD32 *isrc; + dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width; + + if(((int)src&1)==1) + { + s_16 = *src++; + d_16 = *dst; + fastCombine0565(maskAlpha, s_16, d_16, *dst++); + w--; + } + isrc=(CARD32 *)src; + while (w>1) + { + s_32=*isrc++; +#if IMAGE_BYTE_ORDER == LSBFirst + s_16=s_32&0xffff; +#else + s_16=s_32>>16; +#endif + d_16 = *dst; + fastCombine0565(maskAlpha, s_16, d_16, *dst++); + #if IMAGE_BYTE_ORDER == LSBFirst + s_16=s_32>>16; + #else + s_16=s_32&0xffff; + #endif + d_16 = *dst; + fastCombine0565(maskAlpha, s_16, d_16, *dst++); + w-=2; + } + src=(CARD16 *)isrc; + if(w!=0) + { + s_16 = *src; + d_16 = *dst; + fastCombine0565(maskAlpha, s_16, d_16, *dst); + } + } fbFinishAccess (pSrc->pDrawable); fbFinishAccess (pDst->pDrawable); } +// macros for "i can't believe it's not fast" packed pixel handling +#define alphamaskCombine24(a,b) genericCombine24(a,b,maskAlpha,maskiAlpha) +static void +fbCompositeTrans_0888xnx0888(CARD8 op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) + { + CARD8 *dstLine, *dst,*idst; + CARD8 *srcLine, *src; + FbStride dstStride, srcStride; + CARD16 w; + FbBits mask; + CARD16 maskAlpha,maskiAlpha; + + fbComposeGetSolid (pMask, mask, pDst->format); + maskAlpha = mask >> 24; + maskiAlpha= 255-maskAlpha; + + if (!maskAlpha) + return; + //if (maskAlpha == 0xff) + //{ + //fbCompositeSrc_0888x0888 (op, pSrc, pMask, pDst, + // xSrc, ySrc, xMask, yMask, xDst, yDst, + // width, height); + //return; + //} + + fbComposeGetStart (pSrc, xSrc, ySrc, CARD8, srcStride, srcLine, 3); + fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 3); + + { + unsigned int ws,wt,wd,ww; + CARD32 workingSource; + CARD32 *wsrc; + CARD32 rs,gs,bs; + CARD32 rd,gd,bd; + + CARD32 workingiDest,workingoDest; + CARD32 *widst,*wodst; + + + // are xSrc and xDst at the same alignment? if not, we need to be complicated :) + //if(0==0) + if( (((xSrc*3)&3)!=((xDst*3)&3)) || (srcStride&3)!=0 || (dstStride&3)!=0) + { + while (height--) + { + idst=dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width*3; + + setupPackedReader(wd,wt,idst,widst,workingiDest); + ww=(int)dst; + wt=ww&3; + dst-=wt; + wodst=(CARD32 *)dst; + workingoDest=*wodst; + ww=4-wt; +#if IMAGE_BYTE_ORDER == LSBFirst + workingoDest<<=(8*(ww+1)); +#else + workingoDest>>=(8*(ww+1)); +#endif + + // get to word aligned + switch(!(int)src&3) + { + case 1: + readPackedDest(rd); + rd=alphamaskCombine24(*src++, rd)>>8; + writePacked(rd); + w--; if(w==0) break; + case 2: + readPackedDest(rd); + rd=alphamaskCombine24(*src++, rd)>>8; + writePacked(rd); + w--; if(w==0) break; + case 3: + readPackedDest(rd); + rd=alphamaskCombine24(*src++, rd)>>8; + writePacked(rd); + w--; if(w==0) break; + } + wsrc=(CARD32 *)src; + while (w>3) + { + rs=*wsrc++; + // FIXME: write a version of readPackedDest() which + // can collect 4 bytes at once if we're on a boundry (which we're + // actually guarenteed not to be in this version, but do it anyhow), and can + // collect as 2 16bit words on a 2byte boundry, and then use the 32bit combine here + #if IMAGE_BYTE_ORDER == LSBFirst + readPackedDest(rd); + rd=alphamaskCombine24(rs&0xff, rd)>>8; + writePacked(rd); + + readPackedDest(rd); + rd=alphamaskCombine24((rs>>8)&0xff, rd)>>8; + writePacked(rd); + + readPackedDest(rd); + rd=alphamaskCombine24((rs>>16)&0xff, rd)>>8; + writePacked(rd); + + readPackedDest(rd); + rd=alphamaskCombine24(rs>>24, rd)>>8; + writePacked(rd); + #else + readPackedDest(rd); + rd=alphamaskCombine24(rs>>24, rd)>>8; + writePacked(rd); + + readPackedDest(rd); + rd=alphamaskCombine24((rs>>16)&0xff, rd)>>8; + writePacked(rd); + + readPackedDest(rd); + rd=alphamaskCombine24((rs>>8)&0xff, rd)>>8; + writePacked(rd); + + readPackedDest(rd); + rd=alphamaskCombine24(rs&0xff, rd)>>8; + writePacked(rd); + #endif + w-=4; + } + src=(CARD8 *)wsrc; + switch(w) + { + case 3: + readPackedDest(rd); + rd=alphamaskCombine24(*src++, rd)>>8; + writePacked(rd); + case 2: + readPackedDest(rd); + rd=alphamaskCombine24(*src++, rd)>>8; + writePacked(rd); + case 1: + readPackedDest(rd); + rd=alphamaskCombine24(*src++, rd)>>8; + writePacked(rd); + } + dst=(CARD8 *)wodst; + switch(ww) + { + case 1: + dst[2]=(workingoDest>>8)&0xff; + case 2: + dst[1]=(workingoDest>>16)&0xff; + case 3: + dst[0]=workingoDest>>24; + } + } + } + else + { + while (height--) + { + idst=dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width*3; + // get to word aligned + switch(!(int)src&3) + { + case 1: + rd=alphamaskCombine24(*src++, *dst)>>8; + *dst++=rd; + w--; if(w==0) break; + case 2: + rd=alphamaskCombine24(*src++, *dst)>>8; + *dst++=rd; + w--; if(w==0) break; + case 3: + rd=alphamaskCombine24(*src++, *dst)>>8; + *dst++=rd; + w--; if(w==0) break; + } + wsrc=(CARD32 *)src; + widst=(CARD32 *)dst; + + register CARD32 t1, t2, t3, t4; + while(w>3) + { + rs = *wsrc++; + rd = *widst; + fastcombine32(maskAlpha, rs, rd, widst, t1, t2, t3, t4); + w-=4; + } + src=(CARD8 *)wsrc; + dst=(CARD8 *)widst; + switch(w) + { + case 3: + rd=alphamaskCombine24(*src++, *dst)>>8; + *dst++=rd; + case 2: + rd=alphamaskCombine24(*src++, *dst)>>8; + *dst++=rd; + case 1: + rd=alphamaskCombine24(*src++, *dst)>>8; + *dst++=rd; + } + } + } + } + } + + /* * Simple bitblt */ @@ -972,30 +1300,81 @@ fbCompositeSrcSrc_nxn (CARD8 op, int dstXoff, dstYoff; int srcBpp; int dstBpp; + // these need to be signed now! + int iwidth=width; + int iheight=height; Bool reverse = FALSE; Bool upsidedown = FALSE; + int initialWidth=width; + int initialX=xDst; + + // FIXME: this is possibly the worst piece of code I've ever written. + // My main objection to it, is that it is incrfedibly slow in a few cases, due to the + // call-per-repeat structure of it - the *correct* solution is to implement + // repeat into fbBlt(), but that's a nontrivial job, and it's far more + // important to get the "requireRepeat" stuff implented functionally + // first, *then* make it fast. + // -- jj + Bool srcRepeat=pSrc->repeat; + CARD32 srcHeight=pSrc->pDrawable->height; + CARD32 srcWidth=pSrc->pDrawable->width; + + fbGetDrawable(pSrc->pDrawable,src,srcStride,srcBpp,srcXoff,srcYoff); + fbGetDrawable(pDst->pDrawable,dst,dstStride,dstBpp,dstXoff,dstYoff); + + if(srcRepeat) + { + xSrc%=srcWidth; + ySrc%=srcHeight; + } + + while(iheight>0) + { + int wheight=iheight; + if(wheight>(srcHeight-ySrc)) + wheight=(srcHeight-ySrc); + iwidth=initialWidth; + xDst=initialX; + while(iwidth>0) + { + int wwidth=iwidth; + if(wwidth>(srcWidth-xSrc)) + wwidth=(srcWidth-xSrc); + + fbBlt (src + (ySrc + srcYoff) * srcStride, + srcStride, + (xSrc + srcXoff) * srcBpp, + + dst + (yDst + dstYoff) * dstStride, + dstStride, + (xDst + dstXoff) * dstBpp, + + (wwidth) * dstBpp, + (wheight), + + GXcopy, + FB_ALLONES, + dstBpp, + + reverse, + upsidedown); + if(!srcRepeat) + iwidth=0; + else + { + xDst+=wwidth; + iwidth-=wwidth; + } + } + if(!srcRepeat) + iheight=0; + else + { + yDst+=wheight; + iheight-=wheight; + } + } - fbGetDrawable(pSrc->pDrawable,src,srcStride,srcBpp,srcXoff,srcYoff); - fbGetDrawable(pDst->pDrawable,dst,dstStride,dstBpp,dstXoff,dstYoff); - - fbBlt (src + (ySrc + srcYoff) * srcStride, - srcStride, - (xSrc + srcXoff) * srcBpp, - - dst + (yDst + dstYoff) * dstStride, - dstStride, - (xDst + dstXoff) * dstBpp, - - (width) * dstBpp, - (height), - - GXcopy, - FB_ALLONES, - dstBpp, - - reverse, - upsidedown); - fbFinishAccess(pSrc->pDrawable); fbFinishAccess(pDst->pDrawable); } @@ -1294,6 +1673,11 @@ fbComposite (CARD8 op, if (pDst->format == pSrc->format) func = fbCompositeTrans_0565xnx0565; break; + case PICT_r8g8b8: + case PICT_b8g8r8: + if (pDst->format == pSrc->format) + func = fbCompositeTrans_0888xnx0888; + break; default: break; } @@ -1524,6 +1908,14 @@ fbComposite (CARD8 op, n = REGION_NUM_RECTS (®ion); pbox = REGION_RECTS (®ion); + // FIXME: this is bascially a "white list" of composites that work + // with repeat until they are all implented. Once that's done, we + // remove the checks below entirely + if(func==fbCompositeSrcSrc_nxn) + { + srcRepeat=maskRepeat=FALSE; + } + while (n--) { h = pbox->y2 - pbox->y1;