Windows2003-3790/enduser/netmeeting/av/codecs/dec/dech261/idct.c
2020-09-30 16:53:55 +02:00

968 lines
24 KiB
C

/*
* @DEC_COPYRIGHT@
*/
/*
* HISTORY
* $Log: sc_idct.c,v $
* Revision 1.1.4.3 1996/03/20 22:32:42 Hans_Graves
* Moved ScScaleIDCT8x8i_C to sc_idct_scaled.c
* [1996/03/20 22:13:55 Hans_Graves]
*
* Revision 1.1.4.2 1996/03/08 18:46:17 Hans_Graves
* Changed ScScaleIDCT8x8i_C() back to 20-bit
* [1996/03/08 18:31:42 Hans_Graves]
*
* Revision 1.1.2.6 1996/02/21 22:52:40 Hans_Graves
* Changed precision of ScScaleIDCT8x8i_C() from 20 to 19 bits
* [1996/02/21 22:45:34 Hans_Graves]
*
* Revision 1.1.2.5 1996/01/26 19:01:34 Hans_Graves
* Fix bug in ScScaleIDCT8x8i_C()
* [1996/01/26 18:59:08 Hans_Graves]
*
* Revision 1.1.2.4 1996/01/24 19:33:15 Hans_Graves
* Optimization of ScScaleIDCT8x8i_C
* [1996/01/24 18:09:55 Hans_Graves]
*
* Revision 1.1.2.3 1996/01/08 20:19:31 Bjorn_Engberg
* Removed unused local variable to get rid of a warning on NT.
* [1996/01/08 20:17:34 Bjorn_Engberg]
*
* Revision 1.1.2.2 1996/01/08 16:41:17 Hans_Graves
* Moved IDCT routines from sc_dct.c
* [1996/01/08 15:30:46 Hans_Graves]
*
* $EndLog$
*/
/*****************************************************************************
** Copyright (c) Digital Equipment Corporation, 1995 **
** **
** All Rights Reserved. Unpublished rights reserved under the copyright **
** laws of the United States. **
** **
** The software contained on this media is proprietary to and embodies **
** the confidential technology of Digital Equipment Corporation. **
** Possession, use, duplication or dissemination of the software and **
** media is authorized only pursuant to a valid written license from **
** Digital Equipment Corporation. **
** **
** RESTRICTED RIGHTS LEGEND Use, duplication, or disclosure by the U.S. **
** Government is subject to restrictions as set forth in Subparagraph **
** (c)(1)(ii) of DFARS 252.227-7013, or in FAR 52.227-19, as applicable. **
******************************************************************************/
/*
** Filename: sc_idct.c
** Inverse DCT related functions.
*/
/*
#define _SLIBDEBUG_
*/
#include <math.h>
#include "SC.h"
#ifdef _SLIBDEBUG_
#define _DEBUG_ 1 /* detailed debuging statements */
#define _VERBOSE_ 1 /* show progress */
#define _VERIFY_ 1 /* verify correct operation */
#define _WARN_ 1 /* warnings about strange behavior */
#endif
#define F (float)
#define RSQ2 F 0.7071067811865
#define COSM1P3 F 1.3065629648764
#define COS1M3 F 0.5411961001462
#define COS3 F 0.3826834323651
#define Point 14
/*
** Name: ScIDCT8x8
** Purpose: 2-d Inverse DCT. Customized for (8x8) blocks
**
** Note: This scheme uses the direct transposition of the forward
** DCT. This may not be the preferred way in Hardware
** Implementations
**
** Reference: FEIGs
**
*/
void ScIDCT8x8(int *outbuf)
{
register int *outptr, itmp, *spptr, *interptr;
register int t0, t1, t2, t3, t4, t5, t6, t7, tmp, mtmp;
int i;
static int tempptr[64];
spptr = outbuf;
interptr = tempptr;
/*
** Row Computations:
*/
for (i = 0; i < 8; i++) {
/*
** Check for zeros:
*/
t0 = spptr[0];
t1 = spptr[32];
t2 = spptr[16];
t3 = spptr[48];
t4 = spptr[40];
t5 = spptr[8];
t6 = spptr[56];
t7 = spptr[24];
if (!(t1|t2|t3|t4|t5|t6|t7)) {
interptr[0] = t0;
interptr[1] = t0;
interptr[2] = t0;
interptr[3] = t0;
interptr[4] = t0;
interptr[5] = t0;
interptr[6] = t0;
interptr[7] = t0;
interptr += 8;
}
else {
/* Compute B1-t P' */
tmp = t4;
t4 -= t7;
t7 += tmp;
tmp = t6;
t6 = t5 -t6;
t5 += tmp;
/* Compute B2-t */
tmp = t3;
t3 += t2;
t2 -= tmp;
tmp = t7;
t7 += t5;
t5 -= tmp;
/* Compute M */
tmp = t2 + (t2 >> 2);
tmp += (tmp >> 3);
t2 = (tmp + (t2 >> 7)) >> 1;
tmp = t5 + (t5 >> 2);
tmp += (tmp >> 3);
t5 = (tmp + (t5 >> 7)) >> 1;
tmp = t6 - t4;
mtmp = tmp + (tmp >> 1) + (tmp >> 5) - (tmp >> 11);
tmp = mtmp >> 2;
mtmp = t4 + (t4 >> 2) + (t4 >> 4) - (t4 >> 7) + (t4 >> 9);
t4 = -mtmp - tmp;
mtmp = (t6 + (t6 >> 4) + (t6 >> 6) + (t6 >> 8)) >> 1;
t6 = mtmp + tmp;
/* Compute A1-t */
tmp = t0;
t0 += t1;
t1 = tmp - t1;
t3 = t2 + t3;
/* Compute A2-t */
tmp = t0;
t0 += t3;
t3 = tmp - t3;
tmp = t1;
t1 += t2;
t2 = tmp - t2;
t7 += t6;
t6 += t5;
t5 -= t4;
/* Compute A3-t */
interptr[0] = t0 + t7;
interptr[1] = t1 + t6;
interptr[2] = t2 + t5;
interptr[3] = t3 - t4; /* Note in the prev. stage no
t4 = -t4 */
interptr[4] = t3 + t4;
interptr[5] = t2 - t5;
interptr[6] = t1 - t6;
interptr[7] = t0 - t7;
interptr += 8;
}
spptr++;
}
spptr = tempptr;
outptr = outbuf;
/*
** Column Computations
*/
for (i = 0; i < 8; i++) {
/* Check for zeros */
t0 = spptr[0];
t1 = spptr[32];
t2 = spptr[16];
t3 = spptr[48];
t4 = spptr[40];
t5 = spptr[8];
t6 = spptr[56];
t7 = spptr[24];
if (!(t1|t2|t3|t4|t5|t6|t7)) {
itmp = (t0 >> Point) + 128;
outptr[0] = itmp;
outptr[1] = itmp;
outptr[2] = itmp;
outptr[3] = itmp;
outptr[4] = itmp;
outptr[5] = itmp;
outptr[6] = itmp;
outptr[7] = itmp;
outptr += 8;
}
else
{
/* Compute B1-t P' */
tmp = t4;
t4 -= t7;
t7 += tmp;
tmp = t6;
t6 = t5 -t6;
t5 += tmp;
/* Compute B2-tilde */
tmp = t3;
t3 += t2;
t2 -= tmp;
tmp = t7;
t7 += t5;
t5 -= tmp;
/* Compute M-Tilde */
tmp = t2 + (t2 >> 2);
tmp += (tmp >> 3);
t2 = (tmp + (t2 >> 7)) >> 1;
tmp = t5 + (t5 >> 2);
tmp += (tmp >> 3);
t5 = (tmp + (t5 >> 7)) >> 1;
tmp = t6 - t4;
mtmp = tmp + (tmp >> 1) + (tmp >> 5) - (tmp >> 11);
tmp = mtmp >> 2;
mtmp = t4 + (t4 >> 2) + (t4 >> 4) - (t4 >> 7) + (t4 >> 9);
t4 = -mtmp - tmp;
mtmp = (t6 + (t6 >> 4) + (t6 >> 6) + (t6 >> 8)) >> 1;
t6 = mtmp + tmp;
/* Compute A1-t */
tmp = t0;
t0 += t1;
t1 = tmp - t1;
t3 = t2 + t3;
/* Compute A2-t */
tmp = t0;
t0 += t3;
t3 = tmp - t3;
tmp = t1;
t1 += t2;
t2 = tmp - t2;
t7 += t6;
t6 += t5;
t5 -= t4;
/* Compute A3-t */
outptr[0] = ((t0 + t7) >> Point) + 128;
outptr[1] = ((t1 + t6) >> Point) + 128;
outptr[2] = ((t2 + t5) >> Point) + 128;
outptr[3] = ((t3 - t4) >> Point) + 128;
outptr[4] = ((t3 + t4) >> Point) + 128;
outptr[5] = ((t2 - t5) >> Point) + 128;
outptr[6] = ((t1 - t6) >> Point) + 128;
outptr[7] = ((t0 - t7) >> Point) + 128;
outptr += 8;
}
spptr++;
}
}
/*
** Function: ScScaleIDCT8x8
** Note: This scheme uses the direct transposition of the forward
** DCT. This may not be the preferred way in Hardware
** Implementations
*/
void ScScaleIDCT8x8_C(float *ipbuf, int *outbuf)
{
int i;
int *outptr;
register int itmp;
register float t0, t1, t2, t3, t4, t5, t6, t7, tmp;
float *spptr, *interptr;
float tempptr[64];
spptr = ipbuf;
interptr = tempptr;
/* Perform Row Computations */
for (i=0; i<8; i++)
{
/* Check for zeros */
t0 = spptr[0];
t1 = spptr[4];
t2 = spptr[2];
t3 = spptr[6];
t4 = spptr[5];
t5 = spptr[1];
t6 = spptr[7];
t7 = spptr[3];
if (!(t1||t2||t3||t4||t5||t6||t7))
{
interptr[0] = t0;
interptr[8] = t0;
interptr[16] = t0;
interptr[24] = t0;
interptr[32] = t0;
interptr[40] = t0;
interptr[48] = t0;
interptr[56] = t0;
}
else
{
/* Compute B1-t P' */
tmp = t4;
t4 -= t7;
t7 += tmp;
tmp = t6;
t6 = t5 -t6;
t5 += tmp;
/* Compute B2-t */
tmp = t3;
t3 += t2;
t2 -= tmp;
tmp = t7;
t7 += t5;
t5 -= tmp;
/* Compute M */
t2 = t2*RSQ2;
t5 = t5*RSQ2;
tmp = (t6 - t4)*COS3;
t4 = -t4*COSM1P3 - tmp;
t6 = COS1M3*t6 + tmp;
/* Compute A1-t */
tmp = t0;
t0 += t1;
t1 = tmp - t1;
t3 = t2 + t3;
/* Compute A2-t */
tmp = t0;
t0 += t3;
t3 = tmp - t3;
tmp = t1;
t1 += t2;
t2 = tmp - t2;
t7 += t6;
t6 += t5;
t5 -= t4;
/* Compute A3-t */
interptr[0] = t0 + t7;
interptr[56] = t0 - t7;
interptr[8] = t1 + t6;
interptr[48] = t1 - t6;
interptr[16] = t2 + t5;
interptr[40] = t2 - t5;
interptr[24] = t3 - t4; /* Note in the prev. stage no
t4 = -t4 */
interptr[32] = t3 + t4;
}
spptr += 8;
interptr++;
}
spptr = tempptr;
outptr = outbuf;
/* Perform Column Computations */
for (i=0; i<8; i++)
{
/* Check for zeros */
t0 = spptr[0];
t1 = spptr[4];
t2 = spptr[2];
t3 = spptr[6];
t4 = spptr[5];
t5 = spptr[1];
t6 = spptr[7];
t7 = spptr[3];
if (!(t1||t2||t3||t4||t5||t6||t7))
{
itmp = (int) (t0);
outptr[0] = itmp;
outptr[8] = itmp;
outptr[16] = itmp;
outptr[24] = itmp;
outptr[32] = itmp;
outptr[40] = itmp;
outptr[48] = itmp;
outptr[56] = itmp;
}
else
{
/* Compute B1-t P' */
tmp = t4;
t4 -= t7;
t7 += tmp;
tmp = t6;
t6 = t5 -t6;
t5 += tmp;
/* Compute B2-tilde */
tmp = t3;
t3 += t2;
t2 -= tmp;
tmp = t7;
t7 += t5;
t5 -= tmp;
/* Compute M-Tilde */
t2 = t2*RSQ2 ;
t5 = t5*RSQ2 ;
tmp = (t6 - t4)*COS3;
t4 = -t4*COSM1P3 - tmp;
t6 = COS1M3*t6 + tmp ;
/* Compute A1-t */
tmp = t0;
t0 += t1;
t1 = tmp - t1;
t3 = t2 + t3;
/* Compute A2-t */
tmp = t0;
t0 += t3;
t3 = tmp - t3;
tmp = t1;
t1 += t2;
t2 = tmp - t2;
t7 += t6;
t6 += t5;
t5 -= t4;
/* Compute A3-t */
outptr[0] = (int)(t0+t7);
outptr[56] = (int)(t0-t7);
outptr[8] = (int)(t1+t6);
outptr[48] = (int)(t1-t6);
outptr[16] = (int)(t2+t5);
outptr[40] = (int)(t2-t5);
outptr[24] = (int)(t3-t4);
outptr[32] = (int)(t3+t4);
}
outptr++;
spptr += 8;
}
}
/*
** Function: ScIDCT8x8s
** Note: This scheme uses the direct transposition of the forward
** DCT. This may not be the preferred way in Hardware
** Implementations
*/
#define W1 2841 /* 2048*sqrt(2)*cos(1*pi/16) */
#define W2 2676 /* 2048*sqrt(2)*cos(2*pi/16) */
#define W3 2408 /* 2048*sqrt(2)*cos(3*pi/16) */
#define W5 1609 /* 2048*sqrt(2)*cos(5*pi/16) */
#define W6 1108 /* 2048*sqrt(2)*cos(6*pi/16) */
#define W7 565 /* 2048*sqrt(2)*cos(7*pi/16) */
#define IDCTSHIFTR 8
#define IDCTSHIFTC (14+0)
#if 1
#define limit(var, min, max) (var<=min ? min : (var>=max ? max : var))
#else
#define limit(var, min, max) var
#endif
void ScIDCT8x8s_C(short *inbuf, short *outbuf)
{
int i;
register tmp0, tmp1, tmp2, tmp3, x0, x1, x2, x3, x4, x5, x6, x7, x8;
register short *inblk, *outblk;
register int *tmpblk;
int tmpbuf[64];
inblk = inbuf;
tmpblk = tmpbuf;
for (i=0; i<8; i++, inblk+=8, tmpblk+=8)
{
x0 = inblk[0];
x1 = inblk[4];
x1 = x1<<11;
x2 = inblk[6];
x3 = inblk[2];
x4 = inblk[1];
x5 = inblk[7];
x6 = inblk[5];
x7 = inblk[3];
if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
{
tmpblk[0]=tmpblk[1]=tmpblk[2]=tmpblk[3]=tmpblk[4]=tmpblk[5]=tmpblk[6]=
tmpblk[7]=x0<<3;
}
else
{
tmp0 = x4 + x5;
tmp0 = W7*tmp0;
x0 = x0<<11;
x0 = x0 + 128;
x8 = x0 + x1;
tmp1 = x6 + x7;
x0 = x0 - x1;
tmp1 = W3*tmp1;
tmp2 = (W2+W6)*x2;
tmp3 = (W2-W6)*x3;
x4 = (W1-W7)*x4;
x5 = (W1+W7)*x5;
x4 = tmp0 + x4;
x1 = x3 + x2;
x5 = tmp0 - x5;
x1 = W6*x1;
tmp0 = (W3-W5)*x6;
x7 = (W3+W5)*x7;
x2 = x1 - tmp2;
x3 = x1 + tmp3;
tmp0 = tmp1 - tmp0;
x7 = tmp1 - x7;
x1 = x4 + tmp0;
x4 = x4 - tmp0;
x6 = x5 + x7; /* F */
x5 = x5 - x7; /* F */
tmp0 = x4 + x5;
tmp0 = 181*tmp0;
x7 = x8 + x3; /* F */
tmp1 = x4 - x5;
x8 = x8 - x3; /* F */
tmp1 = 181*tmp1;
x3 = x0 + x2; /* F */
x0 = x0 - x2; /* F */
x2 = tmp0 + 128;
x4 = tmp1 + 128;
x2 = x2>>8; /* F */
x4 = x4>>8; /* F */
tmp0 = x7+x1;
tmp0 = tmp0>>IDCTSHIFTR;
tmp1 = x3+x2;
tmp1 = tmp1>>IDCTSHIFTR;
tmp2 = x0+x4;
tmp2 = tmp2>>IDCTSHIFTR;
tmp3 = x8+x6;
tmp3 = tmp3>>IDCTSHIFTR;
tmpblk[0] = tmp0;
tmpblk[1] = tmp1;
tmpblk[2] = tmp2;
tmpblk[3] = tmp3;
tmp0 = x8-x6;
tmp0 = tmp0>>IDCTSHIFTR;
tmp1 = x0-x4;
tmp1 = tmp1>>IDCTSHIFTR;
tmp2 = x3-x2;
tmp2 = tmp2>>IDCTSHIFTR;
tmp3 = x7-x1;
tmp3 = tmp3>>IDCTSHIFTR;
tmpblk[4] = tmp0;
tmpblk[5] = tmp1;
tmpblk[6] = tmp2;
tmpblk[7] = tmp3;
}
}
tmpblk = tmpbuf;
outblk = outbuf;
for (i=0; i<8; i++, tmpblk++, outblk++)
{
/* shortcut */
x0 = tmpblk[8*0];
x1 = tmpblk[4*8]<<8;
x2 = tmpblk[6*8];
x3 = tmpblk[2*8];
x4 = tmpblk[1*8];
x5 = tmpblk[7*8];
x6 = tmpblk[5*8];
x7 = tmpblk[3*8];
if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
{
tmp0=(x0+32)>>6;
outblk[8*0]=outblk[8*1]=outblk[8*2]=outblk[8*3]=outblk[8*4]=outblk[8*5]=
outblk[8*6]=outblk[8*7]=limit(tmp0, -256, 255);
}
else
{
x0 = tmpblk[8*0];
tmp0 = x4+x5;
x0 = x0<<8;
tmp0 = W7*tmp0;
x0 = x0 + 8192;
tmp1 = x6+x7;
tmp0 = tmp0 + 4;
tmp1 = W3*tmp1;
tmp1 = tmp1 + 4;
x8 = x0 + x1;
tmp2 = (W2+W6)*x2;
x0 = x0 - x1;
x1 = x3 + x2;
x1 = W6*x1;
tmp3 = (W2-W6)*x3;
x1 = x1 + 4;
x4 = (W1-W7)*x4;
x4 = tmp0 + x4;
x4 = x4>>3;
x5 = (W1+W7)*x5;
x2 = x1 - tmp2;
x3 = x1 + tmp3;
x6 = (W3-W5)*x6;
x2 = x2>>3;
x5 = tmp0 - x5;
x5 = x5>>3;
x6 = tmp1 - x6;
x6 = x6>>3;
x7 = (W3+W5)*x7;
x7 = tmp1 - x7;
x3 = x3>>3;
x7 = x7>>3;
x1 = x4 + x6; /* F */
x4 = x4 - x6;
x6 = x5 + x7; /* F */
x5 = x5 - x7; /* F */
tmp1 = x4 + x5;
x7 = x8 + x3; /* F */
tmp1 = 181*tmp1;
x8 = x8 - x3; /* F */
x3 = x0 + x2; /* F */
tmp2 = x4 - x5;
x0 = x0 - x2; /* F */
tmp2 = 181*tmp2;
x2 = tmp1+128;
x4 = tmp2+128;
x2 = x2>>8; /* F */
x4 = x4>>8; /* F */
/* fourth stage */
tmp0=x7+x1;
tmp1=x3+x2;
tmp0=tmp0>>IDCTSHIFTC;
tmp2=x0+x4;
tmp1=tmp1>>IDCTSHIFTC;
tmp3=x8+x6;
tmp2=tmp2>>IDCTSHIFTC;
tmp3=tmp3>>IDCTSHIFTC;
outblk[8*0] = limit(tmp0, -256, 255);
outblk[8*1] = limit(tmp1, -256, 255);
outblk[8*2] = limit(tmp2, -256, 255);
outblk[8*3] = limit(tmp3, -256, 255);
tmp0=x8-x6;
tmp1=x0-x4;
tmp0=tmp0>>IDCTSHIFTC;
tmp2=x3-x2;
tmp1=tmp1>>IDCTSHIFTC;
tmp3=x7-x1;
tmp2=tmp2>>IDCTSHIFTC;
tmp3=tmp3>>IDCTSHIFTC;
outblk[8*4] = limit(tmp0, -256, 255);
outblk[8*5] = limit(tmp1, -256, 255);
outblk[8*6] = limit(tmp2, -256, 255);
outblk[8*7] = limit(tmp3, -256, 255);
}
}
}
#if 0
void ScIDCT8x8s_C(short *inbuf, short *outbuf)
{
register int i, tmp, x0, x1, x2, x3, x4, x5, x6, x7, x8;
register short *inblk, *outblk;
register int *tmpblk;
int tmpbuf[64];
inblk = inbuf;
tmpblk = tmpbuf;
for (i=0; i<8; i++, inblk+=8, tmpblk+=8)
{
if (!((x1 = inblk[4]<<11) | (x2 = inblk[6]) | (x3 = inblk[2]) |
(x4 = inblk[1]) | (x5 = inblk[7]) | (x6 = inblk[5]) | (x7 = inblk[3])))
{
tmpblk[0]=tmpblk[1]=tmpblk[2]=tmpblk[3]=tmpblk[4]=tmpblk[5]=tmpblk[6]=
tmpblk[7]=inblk[0]<<3;
}
else
{
x0 = (inblk[0]<<11) + 128; /* for proper rounding in the fourth stage */
/* first stage */
x8 = W7*(x4+x5);
x4 = x8 + (W1-W7)*x4;
x5 = x8 - (W1+W7)*x5;
x8 = W3*(x6+x7);
x6 = x8 - (W3-W5)*x6;
x7 = x8 - (W3+W5)*x7;
/* second stage */
x8 = x0 + x1;
x0 -= x1;
x1 = W6*(x3+x2);
x2 = x1 - (W2+W6)*x2;
x3 = x1 + (W2-W6)*x3;
x1 = x4 + x6;
x4 -= x6;
x6 = x5 + x7;
x5 -= x7;
/* third stage */
x7 = x8 + x3;
x8 -= x3;
x3 = x0 + x2;
x0 -= x2;
x2 = (181*(x4+x5)+128)>>8;
x4 = (181*(x4-x5)+128)>>8;
/* fourth stage */
tmpblk[0] = (x7+x1)>>8;
tmpblk[1] = (x3+x2)>>8;
tmpblk[2] = (x0+x4)>>8;
tmpblk[3] = (x8+x6)>>8;
tmpblk[4] = (x8-x6)>>8;
tmpblk[5] = (x0-x4)>>8;
tmpblk[6] = (x3-x2)>>8;
tmpblk[7] = (x7-x1)>>8;
}
}
tmpblk = tmpbuf;
outblk = outbuf;
for (i=0; i<8; i++, tmpblk++, outblk++)
{
/* shortcut */
if (!((x1 = (tmpblk[4*8]<<8)) | (x2 = tmpblk[6*8]) | (x3 = tmpblk[2*8]) |
(x4 = tmpblk[1*8]) | (x5 = tmpblk[7*8]) | (x6 = tmpblk[5*8]) |
(x7 = tmpblk[3*8])))
{
tmp=(tmpblk[8*0]+32)>>6;
if (tmp<-256) tmp=-256; else if (tmp>255) tmp=255;
outblk[8*0]=outblk[8*1]=outblk[8*2]=outblk[8*3]=outblk[8*4]=outblk[8*5]=
outblk[8*6]=outblk[8*7]=tmp;
}
else
{
x0 = (tmpblk[8*0]<<8) + 8192;
/* first stage */
x8 = W7*(x4+x5) + 4;
x4 = (x8+((W1-W7)*x4))>>3;
x5 = (x8-((W1+W7)*x5))>>3;
x8 = W3*(x6+x7) + 4;
x6 = (x8-((W3-W5)*x6))>>3;
x7 = (x8-((W3+W5)*x7))>>3;
/* second stage */
x8 = x0 + x1;
x0 -= x1;
x1 = W6*(x3+x2) + 4;
x2 = (x1-((W2+W6)*x2))>>3;
x3 = (x1+((W2-W6)*x3))>>3;
x1 = x4 + x6;
x4 -= x6;
x6 = x5 + x7;
x5 -= x7;
/* third stage */
x7 = x8 + x3;
x8 -= x3;
x3 = x0 + x2;
x0 -= x2;
x2 = ((181*(x4+x5))+128)>>8;
x4 = ((181*(x4-x5))+128)>>8;
/* fourth stage */
outblk[8*0] = ((tmp=(x7+x1)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
outblk[8*1] = ((tmp=(x3+x2)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
outblk[8*2] = ((tmp=(x0+x4)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
outblk[8*3] = ((tmp=(x8+x6)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
outblk[8*4] = ((tmp=(x8-x6)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
outblk[8*5] = ((tmp=(x0-x4)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
outblk[8*6] = ((tmp=(x3-x2)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
outblk[8*7] = ((tmp=(x7-x1)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
}
}
}
#endif
#if 0
/* row (horizontal) IDCT
*
* 7 pi 1
* dst[k] = sum c[l] * src[l] * cos( -- * ( k + - ) * l )
* l=0 8 2
*
* where: c[0] = 128
* c[1..7] = 128*sqrt(2)
*/
static void idctrow(short *inblk, short *outblk)
{
int x0, x1, x2, x3, x4, x5, x6, x7, x8;
/* shortcut */
if (!((x1 = inblk[4]<<11) | (x2 = inblk[6]) | (x3 = inblk[2]) |
(x4 = inblk[1]) | (x5 = inblk[7]) | (x6 = inblk[5]) | (x7 = inblk[3])))
{
outblk[0]=outblk[1]=outblk[2]=outblk[3]=outblk[4]=outblk[5]=outblk[6]=
outblk[7]=inblk[0]<<3;
return;
}
x0 = (inblk[0]<<11) + 128; /* for proper rounding in the fourth stage */
/* first stage */
x8 = W7*(x4+x5);
x4 = x8 + (W1-W7)*x4;
x5 = x8 - (W1+W7)*x5;
x8 = W3*(x6+x7);
x6 = x8 - (W3-W5)*x6;
x7 = x8 - (W3+W5)*x7;
/* second stage */
x8 = x0 + x1;
x0 -= x1;
x1 = W6*(x3+x2);
x2 = x1 - (W2+W6)*x2;
x3 = x1 + (W2-W6)*x3;
x1 = x4 + x6;
x4 -= x6;
x6 = x5 + x7;
x5 -= x7;
/* third stage */
x7 = x8 + x3;
x8 -= x3;
x3 = x0 + x2;
x0 -= x2;
x2 = (181*(x4+x5)+128)>>8;
x4 = (181*(x4-x5)+128)>>8;
/* fourth stage */
outblk[0] = (x7+x1)>>8;
outblk[1] = (x3+x2)>>8;
outblk[2] = (x0+x4)>>8;
outblk[3] = (x8+x6)>>8;
outblk[4] = (x8-x6)>>8;
outblk[5] = (x0-x4)>>8;
outblk[6] = (x3-x2)>>8;
outblk[7] = (x7-x1)>>8;
}
/* column (vertical) IDCT
*
* 7 pi 1
* dst[8*k] = sum c[l] * src[8*l] * cos( -- * ( k + - ) * l )
* l=0 8 2
*
* where: c[0] = 1/1024
* c[1..7] = (1/1024)*sqrt(2)
*/
static void idctcol(short *inblk, short *outblk)
{
int tmp, x0, x1, x2, x3, x4, x5, x6, x7, x8;
/* shortcut */
if (!((x1 = (inblk[8*4]<<8)) | (x2 = inblk[8*6]) | (x3 = inblk[8*2]) |
(x4 = inblk[8*1]) | (x5 = inblk[8*7]) | (x6 = inblk[8*5]) |
(x7 = inblk[8*3])))
{
tmp=(inblk[8*0]+32)>>6;
if (tmp<-256) tmp=-256; else if (tmp>255) tmp=255;
outblk[8*0]=outblk[8*1]=outblk[8*2]=outblk[8*3]=outblk[8*4]=outblk[8*5]=
outblk[8*6]=outblk[8*7]=tmp;
return;
}
x0 = (inblk[8*0]<<8) + 8192;
/* first stage */
x8 = W7*(x4+x5) + 4;
x4 = (x8+(W1-W7)*x4)>>3;
x5 = (x8-(W1+W7)*x5)>>3;
x8 = W3*(x6+x7) + 4;
x6 = (x8-(W3-W5)*x6)>>3;
x7 = (x8-(W3+W5)*x7)>>3;
/* second stage */
x8 = x0 + x1;
x0 -= x1;
x1 = W6*(x3+x2) + 4;
x2 = (x1-(W2+W6)*x2)>>3;
x3 = (x1+(W2-W6)*x3)>>3;
x1 = x4 + x6;
x4 -= x6;
x6 = x5 + x7;
x5 -= x7;
/* third stage */
x7 = x8 + x3;
x8 -= x3;
x3 = x0 + x2;
x0 -= x2;
x2 = (181*(x4+x5)+128)>>8;
x4 = (181*(x4-x5)+128)>>8;
/* fourth stage */
tmp=(x7+x1)>>14;
outblk[8*0] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
tmp=(x3+x2)>>14;
outblk[8*1] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
tmp=(x0+x4)>>14;
outblk[8*2] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
tmp=(x8+x6)>>14;
outblk[8*3] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
tmp=(x8-x6)>>14;
outblk[8*4] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
tmp=(x0-x4)>>14;
outblk[8*5] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
tmp=(x3-x2)>>14;
outblk[8*6] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
tmp=(x7-x1)>>14;
outblk[8*7] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
}
#endif