xserver-multidpi/hw/kdrive/ati/ati_draw.c
2004-01-11 00:10:34 +00:00

607 lines
15 KiB
C

/*
* $Id$
*
* Copyright © 2003 Eric Anholt
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Eric Anholt not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. Eric Anholt makes no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* ERIC ANHOLT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
* EVENT SHALL ERIC ANHOLT BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*/
/* $Header$ */
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "ati.h"
#include "ati_reg.h"
#include "ati_draw.h"
#ifdef USE_DRI
#include "radeon_common.h"
#include "r128_common.h"
#include "ati_sarea.h"
#endif /* USE_DRI */
CARD8 ATISolidRop[16] = {
/* GXclear */ 0x00, /* 0 */
/* GXand */ 0xa0, /* src AND dst */
/* GXandReverse */ 0x50, /* src AND NOT dst */
/* GXcopy */ 0xf0, /* src */
/* GXandInverted*/ 0x0a, /* NOT src AND dst */
/* GXnoop */ 0xaa, /* dst */
/* GXxor */ 0x5a, /* src XOR dst */
/* GXor */ 0xfa, /* src OR dst */
/* GXnor */ 0x05, /* NOT src AND NOT dst */
/* GXequiv */ 0xa5, /* NOT src XOR dst */
/* GXinvert */ 0x55, /* NOT dst */
/* GXorReverse */ 0xf5, /* src OR NOT dst */
/* GXcopyInverted*/ 0x0f, /* NOT src */
/* GXorInverted */ 0xaf, /* NOT src OR dst */
/* GXnand */ 0x5f, /* NOT src OR NOT dst */
/* GXset */ 0xff, /* 1 */
};
CARD8 ATIBltRop[16] = {
/* GXclear */ 0x00, /* 0 */
/* GXand */ 0x88, /* src AND dst */
/* GXandReverse */ 0x44, /* src AND NOT dst */
/* GXcopy */ 0xcc, /* src */
/* GXandInverted*/ 0x22, /* NOT src AND dst */
/* GXnoop */ 0xaa, /* dst */
/* GXxor */ 0x66, /* src XOR dst */
/* GXor */ 0xee, /* src OR dst */
/* GXnor */ 0x11, /* NOT src AND NOT dst */
/* GXequiv */ 0x99, /* NOT src XOR dst */
/* GXinvert */ 0x55, /* NOT dst */
/* GXorReverse */ 0xdd, /* src OR NOT dst */
/* GXcopyInverted*/ 0x33, /* NOT src */
/* GXorInverted */ 0xbb, /* NOT src OR dst */
/* GXnand */ 0x77, /* NOT src OR NOT dst */
/* GXset */ 0xff, /* 1 */
};
static CARD32 R128BlendOp[] = {
/* Clear */
R128_ALPHA_BLEND_SRC_ZERO | R128_ALPHA_BLEND_DST_ZERO,
/* Src */
R128_ALPHA_BLEND_SRC_ONE | R128_ALPHA_BLEND_DST_ZERO,
/* Dst */
R128_ALPHA_BLEND_SRC_ZERO | R128_ALPHA_BLEND_DST_ONE,
/* Over */
R128_ALPHA_BLEND_SRC_ONE | R128_ALPHA_BLEND_DST_INVSRCALPHA,
/* OverReverse */
R128_ALPHA_BLEND_SRC_INVDSTALPHA | R128_ALPHA_BLEND_DST_ONE,
/* In */
R128_ALPHA_BLEND_SRC_DSTALPHA | R128_ALPHA_BLEND_DST_ZERO,
/* InReverse */
R128_ALPHA_BLEND_SRC_ZERO | R128_ALPHA_BLEND_DST_SRCALPHA,
/* Out */
R128_ALPHA_BLEND_SRC_INVDSTALPHA | R128_ALPHA_BLEND_DST_ZERO,
/* OutReverse */
R128_ALPHA_BLEND_SRC_ZERO | R128_ALPHA_BLEND_DST_INVSRCALPHA,
/* Atop */
R128_ALPHA_BLEND_SRC_DSTALPHA | R128_ALPHA_BLEND_DST_INVSRCALPHA,
/* AtopReverse */
R128_ALPHA_BLEND_SRC_INVDSTALPHA | R128_ALPHA_BLEND_DST_SRCALPHA,
/* Xor */
R128_ALPHA_BLEND_SRC_INVDSTALPHA | R128_ALPHA_BLEND_DST_INVSRCALPHA,
/* Add */
R128_ALPHA_BLEND_SRC_ONE | R128_ALPHA_BLEND_DST_ONE,
};
int copydx, copydy;
int fifo_size;
ATIScreenInfo *accel_atis;
int src_pitch;
int src_offset;
int src_bpp;
/* If is_24bpp is set, then we are using the accelerator in 8-bit mode due
* to it being broken for 24bpp, so coordinates have to be multiplied by 3.
*/
Bool is_24bpp;
/* For r128 Blend, tells whether to force src x/y offset to (0,0). */
Bool is_repeat;
static void
ATIWaitAvailMMIO(int n)
{
ATICardInfo *atic = accel_atis->atic;
char *mmio = atic->reg_base;
if (fifo_size >= n) {
fifo_size -= n;
return;
}
if (atic->is_radeon) {
do {
fifo_size = MMIO_IN32(mmio, RADEON_REG_RBBM_STATUS) &
RADEON_RBBM_FIFOCNT_MASK;
} while (fifo_size < n);
} else {
do {
fifo_size = MMIO_IN32(mmio, R128_REG_GUI_STAT) & 0xfff;
} while (fifo_size < n);
}
fifo_size -= n;
}
static void
RadeonWaitIdle(void)
{
ATIScreenInfo *atis = accel_atis;
ATICardInfo *atic = atis->atic;
char *mmio = atic->reg_base;
CARD32 temp;
#ifdef USE_DRI
if (atis->using_dma) {
int ret;
do {
ret = drmCommandNone(atic->drmFd, DRM_RADEON_CP_IDLE);
} while (ret == -EBUSY);
if (ret != 0)
ErrorF("Failed to idle DMA, returned %d\n", ret);
}
#endif /* USE_DRI */
/* Wait for the engine to go idle */
ATIWaitAvailMMIO(64);
while ((MMIO_IN32(mmio, RADEON_REG_RBBM_STATUS) &
RADEON_RBBM_ACTIVE) != 0)
;
/* Flush pixel cache */
temp = MMIO_IN32(mmio, RADEON_REG_RB2D_DSTCACHE_CTLSTAT);
temp |= RADEON_RB2D_DC_FLUSH_ALL;
MMIO_OUT32(mmio, RADEON_REG_RB2D_DSTCACHE_CTLSTAT, temp);
while ((MMIO_IN32(mmio, RADEON_REG_RB2D_DSTCACHE_CTLSTAT) &
RADEON_RB2D_DC_BUSY) != 0)
;
}
static void
R128WaitIdle(void)
{
ATIScreenInfo *atis = accel_atis;
ATICardInfo *atic = atis->atic;
char *mmio = atic->reg_base;
CARD32 temp;
int tries;
#ifdef USE_DRI
if (atis->using_dma) {
int ret;
do {
ret = drmCommandNone(atic->drmFd, DRM_R128_CCE_IDLE);
} while (ret == -EBUSY);
if (ret != 0)
ErrorF("Failed to idle DMA, returned %d\n", ret);
}
#endif /* USE_DRI */
ATIWaitAvailMMIO(64);
tries = 1000000;
while (tries--) {
if ((MMIO_IN32(mmio, R128_REG_GUI_STAT) & R128_GUI_ACTIVE) == 0)
break;
}
temp = MMIO_IN32(mmio, R128_REG_PC_NGUI_CTLSTAT);
MMIO_OUT32(mmio, R128_REG_PC_NGUI_CTLSTAT, temp | 0xff);
tries = 1000000;
while (tries--) {
if ((MMIO_IN32(mmio, R128_REG_PC_NGUI_CTLSTAT) & R128_PC_BUSY) !=
R128_PC_BUSY)
break;
}
}
void
ATIWaitIdle(void)
{
ATIScreenInfo *atis = accel_atis;
ATICardInfo *atic = atis->atic;
#ifdef USE_DRI
/* Dispatch any accumulated commands first. */
if (atis->using_dma && atis->indirectBuffer != NULL)
ATIDMAFlushIndirect(0);
#endif /* USE_DRI */
if (atic->is_radeon)
RadeonWaitIdle();
else
R128WaitIdle();
}
#ifdef USE_DRI
void ATIDMAStart(ScreenPtr pScreen)
{
KdScreenPriv(pScreen);
ATICardInfo(pScreenPriv);
ATIScreenInfo(pScreenPriv);
int ret;
if (atic->is_radeon)
ret = drmCommandNone(atic->drmFd, DRM_RADEON_CP_START);
else
ret = drmCommandNone(atic->drmFd, DRM_R128_CCE_START);
if (ret == 0)
atis->using_dma = TRUE;
else
ErrorF("%s: DMA start returned %d\n", __FUNCTION__, ret);
}
/* Attempts to idle the DMA engine, and stops it. Note that the ioctl is the
* same for both R128 and Radeon, so we can just use the name of one of them.
*/
void ATIDMAStop(ScreenPtr pScreen)
{
KdScreenPriv(pScreen);
ATICardInfo(pScreenPriv);
ATIScreenInfo(pScreenPriv);
drmRadeonCPStop stop;
int ret;
stop.flush = 1;
stop.idle = 1;
ret = drmCommandWrite(atic->drmFd, DRM_RADEON_CP_STOP, &stop,
sizeof(drmRadeonCPStop));
if (ret != 0 && errno == EBUSY) {
ErrorF("Failed to idle the DMA engine\n");
stop.idle = 0;
ret = drmCommandWrite(atic->drmFd, DRM_RADEON_CP_STOP, &stop,
sizeof(drmRadeonCPStop));
}
atis->using_dma = FALSE;
}
/* The R128 and Radeon Indirect ioctls differ only in the ioctl number */
void ATIDMADispatchIndirect(Bool discard)
{
ATIScreenInfo *atis = accel_atis;
ATICardInfo *atic = atis->atic;
drmBufPtr buffer = atis->indirectBuffer;
drmR128Indirect indirect;
int cmd;
indirect.idx = buffer->idx;
indirect.start = atis->indirectStart;
indirect.end = buffer->used;
indirect.discard = discard;
cmd = atic->is_radeon ? DRM_RADEON_INDIRECT : DRM_R128_INDIRECT;
drmCommandWriteRead(atic->drmFd, cmd, &indirect,
sizeof(drmR128Indirect));
}
/* Flush the indirect buffer to the kernel for submission to the card */
void ATIDMAFlushIndirect(Bool discard)
{
ATIScreenInfo *atis = accel_atis;
drmBufPtr buffer = atis->indirectBuffer;
if (buffer == NULL)
return;
if ((atis->indirectStart == buffer->used) && !discard)
return;
ATIDMADispatchIndirect(discard);
if (discard) {
atis->indirectBuffer = ATIDMAGetBuffer();
atis->indirectStart = 0;
} else {
/* Start on a double word boundary */
atis->indirectStart = buffer->used = (buffer->used + 7) & ~7;
}
}
/* Get an indirect buffer for the DMA 2D acceleration commands */
drmBufPtr ATIDMAGetBuffer()
{
ATIScreenInfo *atis = accel_atis;
ATICardInfo *atic = atis->atic;
drmDMAReq dma;
drmBufPtr buf = NULL;
int indx = 0;
int size = 0;
int ret;
dma.context = atis->serverContext;
dma.send_count = 0;
dma.send_list = NULL;
dma.send_sizes = NULL;
dma.flags = 0;
dma.request_count = 1;
if (atis->atic->is_radeon)
dma.request_size = RADEON_BUFFER_SIZE;
else
dma.request_size = R128_BUFFER_SIZE;
dma.request_list = &indx;
dma.request_sizes = &size;
dma.granted_count = 0;
do {
ret = drmDMA(atic->drmFd, &dma);
} while (ret != 0);
buf = &atis->buffers->list[indx];
buf->used = 0;
return buf;
}
/* The hardware has a cache on the memory controller for writes to the
* destination, which I guess is separate for 2d and 3d. So, when switching
* between 2d and 3d you need to wait for idle and for the cache to clean.
*/
void
RadeonSwitchTo2D(void)
{
ATIScreenInfo *atis = accel_atis;
RING_LOCALS;
BEGIN_RING(2);
OUT_RING(DMA_PACKET0(RADEON_REG_WAIT_UNTIL, 0));
OUT_RING(RADEON_WAIT_HOST_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
ADVANCE_RING();
}
void
RadeonSwitchTo3D(void)
{
ATIScreenInfo *atis = accel_atis;
RING_LOCALS;
BEGIN_RING(2);
OUT_RING(DMA_PACKET0(RADEON_REG_WAIT_UNTIL, 0));
OUT_RING(RADEON_WAIT_HOST_IDLECLEAN | RADEON_WAIT_2D_IDLECLEAN);
ADVANCE_RING();
}
#endif /* USE_DRI */
static Bool
ATIUploadToScreen(PixmapPtr pDst, char *src, int src_pitch)
{
int i;
char *dst;
int dst_pitch;
int bytes;
dst = pDst->devPrivate.ptr;
dst_pitch = pDst->devKind;
bytes = src_pitch < dst_pitch ? src_pitch : dst_pitch;
KdCheckSync(pDst->drawable.pScreen);
for (i = 0; i < pDst->drawable.height; i++) {
memcpy(dst, src, bytes);
dst += dst_pitch;
src += src_pitch;
}
return TRUE;
}
static Bool
ATIUploadToScratch(PixmapPtr pSrc, PixmapPtr pDst)
{
KdScreenPriv(pSrc->drawable.pScreen);
ATIScreenInfo(pScreenPriv);
int dst_pitch;
dst_pitch = (pSrc->drawable.width * pSrc->drawable.bitsPerPixel / 8 +
atis->kaa.offscreenPitch - 1) & ~(atis->kaa.offscreenPitch - 1);
if (dst_pitch * pSrc->drawable.height > atis->scratch_size)
ATI_FALLBACK(("Pixmap too large for scratch (%d,%d)\n",
pSrc->drawable.width, pSrc->drawable.height));
memcpy(pDst, pSrc, sizeof(*pDst));
pDst->devKind = dst_pitch;
pDst->devPrivate.ptr = atis->scratch_offset +
pScreenPriv->screen->memory_base;
return ATIUploadToScreen(pDst, pSrc->devPrivate.ptr, pSrc->devKind);
}
static Bool
R128GetDatatypePict(CARD32 format, CARD32 *type)
{
switch (format) {
case PICT_a8r8g8b8:
*type = R128_DATATYPE_ARGB_8888;
return TRUE;
case PICT_r5g6b5:
*type = R128_DATATYPE_RGB_565;
return TRUE;
}
ATI_FALLBACK(("Unsupported format: %x\n", format));
return FALSE;
}
/* Assumes that depth 15 and 16 can be used as depth 16, which is okay since we
* require src and dest datatypes to be equal.
*/
static Bool
ATIGetDatatypeBpp(int bpp, CARD32 *type)
{
is_24bpp = FALSE;
switch (bpp) {
case 8:
*type = R128_DATATYPE_C8;
return TRUE;
case 16:
*type = R128_DATATYPE_RGB_565;
return TRUE;
case 24:
*type = R128_DATATYPE_C8;
is_24bpp = TRUE;
return TRUE;
case 32:
*type = R128_DATATYPE_ARGB_8888;
return TRUE;
default:
ATI_FALLBACK(("Unsupported bpp: %x\n", bpp));
return FALSE;
}
}
#ifdef USE_DRI
#define USE_DMA
#include "ati_drawtmp.h"
#include "r128_blendtmp.h"
#endif /* USE_DRI */
#undef USE_DMA
#include "ati_drawtmp.h"
#include "r128_blendtmp.h"
static void
ATIDoneSolid(void)
{
}
static void
ATIDoneCopy(void)
{
}
Bool
ATIDrawInit(ScreenPtr pScreen)
{
KdScreenPriv(pScreen);
ATIScreenInfo(pScreenPriv);
ATICardInfo(pScreenPriv);
ErrorF("Screen: %d/%d depth/bpp\n", pScreenPriv->screen->fb[0].depth,
pScreenPriv->screen->fb[0].bitsPerPixel);
#ifdef USE_DRI
if (atis->using_dri)
ATIDMAStart(pScreen);
else {
if (!atic->is_r300 && ATIDRIScreenInit(pScreen))
atis->using_dri = TRUE;
}
#endif /* USE_DRI */
memset(&atis->kaa, 0, sizeof(KaaScreenInfoRec));
#ifdef USE_DRI
if (atis->using_dma) {
atis->kaa.PrepareSolid = ATIPrepareSolidDMA;
atis->kaa.Solid = ATISolidDMA;
atis->kaa.PrepareCopy = ATIPrepareCopyDMA;
atis->kaa.Copy = ATICopyDMA;
if (!atic->is_radeon) {
atis->kaa.PrepareBlend = R128PrepareBlendDMA;
atis->kaa.Blend = R128BlendDMA;
atis->kaa.DoneBlend = R128DoneBlendDMA;
} else if (!atic->is_r200) {
atis->kaa.PrepareBlend = RadeonPrepareBlend;
atis->kaa.Blend = RadeonBlend;
atis->kaa.DoneBlend = RadeonDoneBlend;
atis->kaa.PrepareComposite = RadeonPrepareComposite;
atis->kaa.Composite = RadeonComposite;
atis->kaa.DoneComposite = RadeonDoneComposite;
}
} else {
#else
{
#endif /* USE_DRI */
atis->kaa.PrepareSolid = ATIPrepareSolidMMIO;
atis->kaa.Solid = ATISolidMMIO;
atis->kaa.PrepareCopy = ATIPrepareCopyMMIO;
atis->kaa.Copy = ATICopyMMIO;
if (!atic->is_radeon) {
atis->kaa.PrepareBlend = R128PrepareBlendMMIO;
atis->kaa.Blend = R128BlendMMIO;
atis->kaa.DoneBlend = R128DoneBlendMMIO;
}
}
atis->kaa.UploadToScreen = ATIUploadToScreen;
if (atis->scratch_size != 0)
atis->kaa.UploadToScratch = ATIUploadToScratch;
atis->kaa.DoneSolid = ATIDoneSolid;
atis->kaa.DoneCopy = ATIDoneCopy;
atis->kaa.flags = KAA_OFFSCREEN_PIXMAPS;
if (atic->is_radeon) {
atis->kaa.offscreenByteAlign = 1024;
atis->kaa.offscreenPitch = 64;
} else {
atis->kaa.offscreenByteAlign = 32;
/* Pitch alignment is in sets of 8 pixels, and we need to cover
* 32bpp, so 32 bytes.
*/
atis->kaa.offscreenPitch = 32;
}
if (!kaaDrawInit(pScreen, &atis->kaa))
return FALSE;
return TRUE;
}
void
ATIDrawEnable(ScreenPtr pScreen)
{
KdMarkSync(pScreen);
}
void
ATIDrawDisable(ScreenPtr pScreen)
{
}
void
ATIDrawFini(ScreenPtr pScreen)
{
#ifdef USE_DRI
KdScreenPriv(pScreen);
ATIScreenInfo(pScreenPriv);
if (atis->using_dma)
ATIDMAStop(pScreen);
if (atis->using_dri)
ATIDRICloseScreen(pScreen);
#endif /* USE_DRI */
kaaDrawFini(pScreen);
}
void
ATIDrawSync(ScreenPtr pScreen)
{
KdScreenPriv(pScreen);
ATIScreenInfo(pScreenPriv);
accel_atis = atis;
ATIWaitIdle();
}