968 lines
24 KiB
C
968 lines
24 KiB
C
/*
|
|
* @DEC_COPYRIGHT@
|
|
*/
|
|
/*
|
|
* HISTORY
|
|
* $Log: sc_idct.c,v $
|
|
* Revision 1.1.4.3 1996/03/20 22:32:42 Hans_Graves
|
|
* Moved ScScaleIDCT8x8i_C to sc_idct_scaled.c
|
|
* [1996/03/20 22:13:55 Hans_Graves]
|
|
*
|
|
* Revision 1.1.4.2 1996/03/08 18:46:17 Hans_Graves
|
|
* Changed ScScaleIDCT8x8i_C() back to 20-bit
|
|
* [1996/03/08 18:31:42 Hans_Graves]
|
|
*
|
|
* Revision 1.1.2.6 1996/02/21 22:52:40 Hans_Graves
|
|
* Changed precision of ScScaleIDCT8x8i_C() from 20 to 19 bits
|
|
* [1996/02/21 22:45:34 Hans_Graves]
|
|
*
|
|
* Revision 1.1.2.5 1996/01/26 19:01:34 Hans_Graves
|
|
* Fix bug in ScScaleIDCT8x8i_C()
|
|
* [1996/01/26 18:59:08 Hans_Graves]
|
|
*
|
|
* Revision 1.1.2.4 1996/01/24 19:33:15 Hans_Graves
|
|
* Optimization of ScScaleIDCT8x8i_C
|
|
* [1996/01/24 18:09:55 Hans_Graves]
|
|
*
|
|
* Revision 1.1.2.3 1996/01/08 20:19:31 Bjorn_Engberg
|
|
* Removed unused local variable to get rid of a warning on NT.
|
|
* [1996/01/08 20:17:34 Bjorn_Engberg]
|
|
*
|
|
* Revision 1.1.2.2 1996/01/08 16:41:17 Hans_Graves
|
|
* Moved IDCT routines from sc_dct.c
|
|
* [1996/01/08 15:30:46 Hans_Graves]
|
|
*
|
|
* $EndLog$
|
|
*/
|
|
/*****************************************************************************
|
|
** Copyright (c) Digital Equipment Corporation, 1995 **
|
|
** **
|
|
** All Rights Reserved. Unpublished rights reserved under the copyright **
|
|
** laws of the United States. **
|
|
** **
|
|
** The software contained on this media is proprietary to and embodies **
|
|
** the confidential technology of Digital Equipment Corporation. **
|
|
** Possession, use, duplication or dissemination of the software and **
|
|
** media is authorized only pursuant to a valid written license from **
|
|
** Digital Equipment Corporation. **
|
|
** **
|
|
** RESTRICTED RIGHTS LEGEND Use, duplication, or disclosure by the U.S. **
|
|
** Government is subject to restrictions as set forth in Subparagraph **
|
|
** (c)(1)(ii) of DFARS 252.227-7013, or in FAR 52.227-19, as applicable. **
|
|
******************************************************************************/
|
|
/*
|
|
** Filename: sc_idct.c
|
|
** Inverse DCT related functions.
|
|
*/
|
|
|
|
|
|
/*
|
|
#define _SLIBDEBUG_
|
|
*/
|
|
|
|
#include <math.h>
|
|
#include "SC.h"
|
|
|
|
#ifdef _SLIBDEBUG_
|
|
#define _DEBUG_ 1 /* detailed debuging statements */
|
|
#define _VERBOSE_ 1 /* show progress */
|
|
#define _VERIFY_ 1 /* verify correct operation */
|
|
#define _WARN_ 1 /* warnings about strange behavior */
|
|
#endif
|
|
|
|
#define F (float)
|
|
#define RSQ2 F 0.7071067811865
|
|
#define COSM1P3 F 1.3065629648764
|
|
#define COS1M3 F 0.5411961001462
|
|
#define COS3 F 0.3826834323651
|
|
|
|
#define Point 14
|
|
|
|
/*
|
|
** Name: ScIDCT8x8
|
|
** Purpose: 2-d Inverse DCT. Customized for (8x8) blocks
|
|
**
|
|
** Note: This scheme uses the direct transposition of the forward
|
|
** DCT. This may not be the preferred way in Hardware
|
|
** Implementations
|
|
**
|
|
** Reference: FEIGs
|
|
**
|
|
*/
|
|
void ScIDCT8x8(int *outbuf)
|
|
{
|
|
register int *outptr, itmp, *spptr, *interptr;
|
|
register int t0, t1, t2, t3, t4, t5, t6, t7, tmp, mtmp;
|
|
int i;
|
|
static int tempptr[64];
|
|
|
|
spptr = outbuf;
|
|
interptr = tempptr;
|
|
|
|
/*
|
|
** Row Computations:
|
|
*/
|
|
for (i = 0; i < 8; i++) {
|
|
/*
|
|
** Check for zeros:
|
|
*/
|
|
t0 = spptr[0];
|
|
t1 = spptr[32];
|
|
t2 = spptr[16];
|
|
t3 = spptr[48];
|
|
|
|
t4 = spptr[40];
|
|
t5 = spptr[8];
|
|
t6 = spptr[56];
|
|
t7 = spptr[24];
|
|
|
|
if (!(t1|t2|t3|t4|t5|t6|t7)) {
|
|
interptr[0] = t0;
|
|
interptr[1] = t0;
|
|
interptr[2] = t0;
|
|
interptr[3] = t0;
|
|
interptr[4] = t0;
|
|
interptr[5] = t0;
|
|
interptr[6] = t0;
|
|
interptr[7] = t0;
|
|
interptr += 8;
|
|
}
|
|
else {
|
|
/* Compute B1-t P' */
|
|
tmp = t4;
|
|
t4 -= t7;
|
|
t7 += tmp;
|
|
|
|
tmp = t6;
|
|
t6 = t5 -t6;
|
|
t5 += tmp;
|
|
|
|
/* Compute B2-t */
|
|
tmp = t3;
|
|
t3 += t2;
|
|
t2 -= tmp;
|
|
|
|
tmp = t7;
|
|
t7 += t5;
|
|
t5 -= tmp;
|
|
|
|
/* Compute M */
|
|
tmp = t2 + (t2 >> 2);
|
|
tmp += (tmp >> 3);
|
|
t2 = (tmp + (t2 >> 7)) >> 1;
|
|
|
|
tmp = t5 + (t5 >> 2);
|
|
tmp += (tmp >> 3);
|
|
t5 = (tmp + (t5 >> 7)) >> 1;
|
|
|
|
tmp = t6 - t4;
|
|
mtmp = tmp + (tmp >> 1) + (tmp >> 5) - (tmp >> 11);
|
|
tmp = mtmp >> 2;
|
|
|
|
mtmp = t4 + (t4 >> 2) + (t4 >> 4) - (t4 >> 7) + (t4 >> 9);
|
|
t4 = -mtmp - tmp;
|
|
|
|
mtmp = (t6 + (t6 >> 4) + (t6 >> 6) + (t6 >> 8)) >> 1;
|
|
t6 = mtmp + tmp;
|
|
|
|
/* Compute A1-t */
|
|
tmp = t0;
|
|
t0 += t1;
|
|
t1 = tmp - t1;
|
|
t3 = t2 + t3;
|
|
|
|
/* Compute A2-t */
|
|
tmp = t0;
|
|
t0 += t3;
|
|
t3 = tmp - t3;
|
|
|
|
tmp = t1;
|
|
t1 += t2;
|
|
t2 = tmp - t2;
|
|
|
|
t7 += t6;
|
|
t6 += t5;
|
|
t5 -= t4;
|
|
|
|
/* Compute A3-t */
|
|
interptr[0] = t0 + t7;
|
|
interptr[1] = t1 + t6;
|
|
interptr[2] = t2 + t5;
|
|
interptr[3] = t3 - t4; /* Note in the prev. stage no
|
|
t4 = -t4 */
|
|
interptr[4] = t3 + t4;
|
|
interptr[5] = t2 - t5;
|
|
interptr[6] = t1 - t6;
|
|
interptr[7] = t0 - t7;
|
|
interptr += 8;
|
|
}
|
|
spptr++;
|
|
}
|
|
|
|
spptr = tempptr;
|
|
outptr = outbuf;
|
|
|
|
/*
|
|
** Column Computations
|
|
*/
|
|
for (i = 0; i < 8; i++) {
|
|
/* Check for zeros */
|
|
t0 = spptr[0];
|
|
t1 = spptr[32];
|
|
t2 = spptr[16];
|
|
t3 = spptr[48];
|
|
|
|
t4 = spptr[40];
|
|
t5 = spptr[8];
|
|
t6 = spptr[56];
|
|
t7 = spptr[24];
|
|
|
|
if (!(t1|t2|t3|t4|t5|t6|t7)) {
|
|
itmp = (t0 >> Point) + 128;
|
|
outptr[0] = itmp;
|
|
outptr[1] = itmp;
|
|
outptr[2] = itmp;
|
|
outptr[3] = itmp;
|
|
outptr[4] = itmp;
|
|
outptr[5] = itmp;
|
|
outptr[6] = itmp;
|
|
outptr[7] = itmp;
|
|
outptr += 8;
|
|
}
|
|
else
|
|
{
|
|
/* Compute B1-t P' */
|
|
tmp = t4;
|
|
t4 -= t7;
|
|
t7 += tmp;
|
|
|
|
tmp = t6;
|
|
t6 = t5 -t6;
|
|
t5 += tmp;
|
|
|
|
/* Compute B2-tilde */
|
|
tmp = t3;
|
|
t3 += t2;
|
|
t2 -= tmp;
|
|
|
|
tmp = t7;
|
|
t7 += t5;
|
|
t5 -= tmp;
|
|
|
|
/* Compute M-Tilde */
|
|
tmp = t2 + (t2 >> 2);
|
|
tmp += (tmp >> 3);
|
|
t2 = (tmp + (t2 >> 7)) >> 1;
|
|
|
|
tmp = t5 + (t5 >> 2);
|
|
tmp += (tmp >> 3);
|
|
t5 = (tmp + (t5 >> 7)) >> 1;
|
|
|
|
tmp = t6 - t4;
|
|
mtmp = tmp + (tmp >> 1) + (tmp >> 5) - (tmp >> 11);
|
|
tmp = mtmp >> 2;
|
|
|
|
mtmp = t4 + (t4 >> 2) + (t4 >> 4) - (t4 >> 7) + (t4 >> 9);
|
|
t4 = -mtmp - tmp;
|
|
|
|
mtmp = (t6 + (t6 >> 4) + (t6 >> 6) + (t6 >> 8)) >> 1;
|
|
t6 = mtmp + tmp;
|
|
|
|
/* Compute A1-t */
|
|
tmp = t0;
|
|
t0 += t1;
|
|
t1 = tmp - t1;
|
|
t3 = t2 + t3;
|
|
|
|
/* Compute A2-t */
|
|
tmp = t0;
|
|
t0 += t3;
|
|
t3 = tmp - t3;
|
|
|
|
tmp = t1;
|
|
t1 += t2;
|
|
t2 = tmp - t2;
|
|
|
|
t7 += t6;
|
|
t6 += t5;
|
|
t5 -= t4;
|
|
|
|
/* Compute A3-t */
|
|
|
|
outptr[0] = ((t0 + t7) >> Point) + 128;
|
|
outptr[1] = ((t1 + t6) >> Point) + 128;
|
|
outptr[2] = ((t2 + t5) >> Point) + 128;
|
|
outptr[3] = ((t3 - t4) >> Point) + 128;
|
|
outptr[4] = ((t3 + t4) >> Point) + 128;
|
|
outptr[5] = ((t2 - t5) >> Point) + 128;
|
|
outptr[6] = ((t1 - t6) >> Point) + 128;
|
|
outptr[7] = ((t0 - t7) >> Point) + 128;
|
|
|
|
outptr += 8;
|
|
}
|
|
spptr++;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
** Function: ScScaleIDCT8x8
|
|
** Note: This scheme uses the direct transposition of the forward
|
|
** DCT. This may not be the preferred way in Hardware
|
|
** Implementations
|
|
*/
|
|
void ScScaleIDCT8x8_C(float *ipbuf, int *outbuf)
|
|
{
|
|
int i;
|
|
int *outptr;
|
|
register int itmp;
|
|
register float t0, t1, t2, t3, t4, t5, t6, t7, tmp;
|
|
float *spptr, *interptr;
|
|
float tempptr[64];
|
|
|
|
spptr = ipbuf;
|
|
interptr = tempptr;
|
|
|
|
/* Perform Row Computations */
|
|
for (i=0; i<8; i++)
|
|
{
|
|
/* Check for zeros */
|
|
t0 = spptr[0];
|
|
t1 = spptr[4];
|
|
t2 = spptr[2];
|
|
t3 = spptr[6];
|
|
|
|
t4 = spptr[5];
|
|
t5 = spptr[1];
|
|
t6 = spptr[7];
|
|
t7 = spptr[3];
|
|
|
|
if (!(t1||t2||t3||t4||t5||t6||t7))
|
|
{
|
|
interptr[0] = t0;
|
|
interptr[8] = t0;
|
|
interptr[16] = t0;
|
|
interptr[24] = t0;
|
|
interptr[32] = t0;
|
|
interptr[40] = t0;
|
|
interptr[48] = t0;
|
|
interptr[56] = t0;
|
|
}
|
|
else
|
|
{
|
|
/* Compute B1-t P' */
|
|
tmp = t4;
|
|
t4 -= t7;
|
|
t7 += tmp;
|
|
|
|
tmp = t6;
|
|
t6 = t5 -t6;
|
|
t5 += tmp;
|
|
|
|
/* Compute B2-t */
|
|
tmp = t3;
|
|
t3 += t2;
|
|
t2 -= tmp;
|
|
|
|
tmp = t7;
|
|
t7 += t5;
|
|
t5 -= tmp;
|
|
|
|
/* Compute M */
|
|
t2 = t2*RSQ2;
|
|
t5 = t5*RSQ2;
|
|
tmp = (t6 - t4)*COS3;
|
|
t4 = -t4*COSM1P3 - tmp;
|
|
t6 = COS1M3*t6 + tmp;
|
|
|
|
/* Compute A1-t */
|
|
tmp = t0;
|
|
t0 += t1;
|
|
t1 = tmp - t1;
|
|
t3 = t2 + t3;
|
|
|
|
/* Compute A2-t */
|
|
tmp = t0;
|
|
t0 += t3;
|
|
t3 = tmp - t3;
|
|
|
|
tmp = t1;
|
|
t1 += t2;
|
|
t2 = tmp - t2;
|
|
|
|
t7 += t6;
|
|
t6 += t5;
|
|
t5 -= t4;
|
|
|
|
/* Compute A3-t */
|
|
interptr[0] = t0 + t7;
|
|
interptr[56] = t0 - t7;
|
|
interptr[8] = t1 + t6;
|
|
interptr[48] = t1 - t6;
|
|
interptr[16] = t2 + t5;
|
|
interptr[40] = t2 - t5;
|
|
interptr[24] = t3 - t4; /* Note in the prev. stage no
|
|
t4 = -t4 */
|
|
interptr[32] = t3 + t4;
|
|
}
|
|
spptr += 8;
|
|
interptr++;
|
|
}
|
|
|
|
spptr = tempptr;
|
|
outptr = outbuf;
|
|
|
|
/* Perform Column Computations */
|
|
for (i=0; i<8; i++)
|
|
{
|
|
/* Check for zeros */
|
|
t0 = spptr[0];
|
|
t1 = spptr[4];
|
|
t2 = spptr[2];
|
|
t3 = spptr[6];
|
|
|
|
t4 = spptr[5];
|
|
t5 = spptr[1];
|
|
t6 = spptr[7];
|
|
t7 = spptr[3];
|
|
|
|
if (!(t1||t2||t3||t4||t5||t6||t7))
|
|
{
|
|
itmp = (int) (t0);
|
|
outptr[0] = itmp;
|
|
outptr[8] = itmp;
|
|
outptr[16] = itmp;
|
|
outptr[24] = itmp;
|
|
outptr[32] = itmp;
|
|
outptr[40] = itmp;
|
|
outptr[48] = itmp;
|
|
outptr[56] = itmp;
|
|
}
|
|
else
|
|
{
|
|
/* Compute B1-t P' */
|
|
tmp = t4;
|
|
t4 -= t7;
|
|
t7 += tmp;
|
|
|
|
tmp = t6;
|
|
t6 = t5 -t6;
|
|
t5 += tmp;
|
|
|
|
/* Compute B2-tilde */
|
|
tmp = t3;
|
|
t3 += t2;
|
|
t2 -= tmp;
|
|
|
|
tmp = t7;
|
|
t7 += t5;
|
|
t5 -= tmp;
|
|
|
|
/* Compute M-Tilde */
|
|
t2 = t2*RSQ2 ;
|
|
t5 = t5*RSQ2 ;
|
|
tmp = (t6 - t4)*COS3;
|
|
t4 = -t4*COSM1P3 - tmp;
|
|
t6 = COS1M3*t6 + tmp ;
|
|
|
|
/* Compute A1-t */
|
|
tmp = t0;
|
|
t0 += t1;
|
|
t1 = tmp - t1;
|
|
t3 = t2 + t3;
|
|
|
|
/* Compute A2-t */
|
|
tmp = t0;
|
|
t0 += t3;
|
|
t3 = tmp - t3;
|
|
tmp = t1;
|
|
t1 += t2;
|
|
t2 = tmp - t2;
|
|
|
|
t7 += t6;
|
|
t6 += t5;
|
|
t5 -= t4;
|
|
|
|
/* Compute A3-t */
|
|
outptr[0] = (int)(t0+t7);
|
|
outptr[56] = (int)(t0-t7);
|
|
outptr[8] = (int)(t1+t6);
|
|
outptr[48] = (int)(t1-t6);
|
|
outptr[16] = (int)(t2+t5);
|
|
outptr[40] = (int)(t2-t5);
|
|
outptr[24] = (int)(t3-t4);
|
|
outptr[32] = (int)(t3+t4);
|
|
}
|
|
outptr++;
|
|
spptr += 8;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Function: ScIDCT8x8s
|
|
** Note: This scheme uses the direct transposition of the forward
|
|
** DCT. This may not be the preferred way in Hardware
|
|
** Implementations
|
|
*/
|
|
#define W1 2841 /* 2048*sqrt(2)*cos(1*pi/16) */
|
|
#define W2 2676 /* 2048*sqrt(2)*cos(2*pi/16) */
|
|
#define W3 2408 /* 2048*sqrt(2)*cos(3*pi/16) */
|
|
#define W5 1609 /* 2048*sqrt(2)*cos(5*pi/16) */
|
|
#define W6 1108 /* 2048*sqrt(2)*cos(6*pi/16) */
|
|
#define W7 565 /* 2048*sqrt(2)*cos(7*pi/16) */
|
|
|
|
#define IDCTSHIFTR 8
|
|
#define IDCTSHIFTC (14+0)
|
|
#if 1
|
|
#define limit(var, min, max) (var<=min ? min : (var>=max ? max : var))
|
|
#else
|
|
#define limit(var, min, max) var
|
|
#endif
|
|
|
|
void ScIDCT8x8s_C(short *inbuf, short *outbuf)
|
|
{
|
|
int i;
|
|
register tmp0, tmp1, tmp2, tmp3, x0, x1, x2, x3, x4, x5, x6, x7, x8;
|
|
register short *inblk, *outblk;
|
|
register int *tmpblk;
|
|
int tmpbuf[64];
|
|
|
|
inblk = inbuf;
|
|
tmpblk = tmpbuf;
|
|
for (i=0; i<8; i++, inblk+=8, tmpblk+=8)
|
|
{
|
|
x0 = inblk[0];
|
|
x1 = inblk[4];
|
|
x1 = x1<<11;
|
|
x2 = inblk[6];
|
|
x3 = inblk[2];
|
|
x4 = inblk[1];
|
|
x5 = inblk[7];
|
|
x6 = inblk[5];
|
|
x7 = inblk[3];
|
|
if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
|
|
{
|
|
tmpblk[0]=tmpblk[1]=tmpblk[2]=tmpblk[3]=tmpblk[4]=tmpblk[5]=tmpblk[6]=
|
|
tmpblk[7]=x0<<3;
|
|
}
|
|
else
|
|
{
|
|
tmp0 = x4 + x5;
|
|
tmp0 = W7*tmp0;
|
|
x0 = x0<<11;
|
|
x0 = x0 + 128;
|
|
x8 = x0 + x1;
|
|
tmp1 = x6 + x7;
|
|
x0 = x0 - x1;
|
|
tmp1 = W3*tmp1;
|
|
tmp2 = (W2+W6)*x2;
|
|
tmp3 = (W2-W6)*x3;
|
|
x4 = (W1-W7)*x4;
|
|
x5 = (W1+W7)*x5;
|
|
x4 = tmp0 + x4;
|
|
x1 = x3 + x2;
|
|
x5 = tmp0 - x5;
|
|
x1 = W6*x1;
|
|
tmp0 = (W3-W5)*x6;
|
|
x7 = (W3+W5)*x7;
|
|
x2 = x1 - tmp2;
|
|
x3 = x1 + tmp3;
|
|
tmp0 = tmp1 - tmp0;
|
|
x7 = tmp1 - x7;
|
|
x1 = x4 + tmp0;
|
|
x4 = x4 - tmp0;
|
|
x6 = x5 + x7; /* F */
|
|
x5 = x5 - x7; /* F */
|
|
tmp0 = x4 + x5;
|
|
tmp0 = 181*tmp0;
|
|
x7 = x8 + x3; /* F */
|
|
tmp1 = x4 - x5;
|
|
x8 = x8 - x3; /* F */
|
|
tmp1 = 181*tmp1;
|
|
x3 = x0 + x2; /* F */
|
|
x0 = x0 - x2; /* F */
|
|
x2 = tmp0 + 128;
|
|
x4 = tmp1 + 128;
|
|
x2 = x2>>8; /* F */
|
|
x4 = x4>>8; /* F */
|
|
tmp0 = x7+x1;
|
|
tmp0 = tmp0>>IDCTSHIFTR;
|
|
tmp1 = x3+x2;
|
|
tmp1 = tmp1>>IDCTSHIFTR;
|
|
tmp2 = x0+x4;
|
|
tmp2 = tmp2>>IDCTSHIFTR;
|
|
tmp3 = x8+x6;
|
|
tmp3 = tmp3>>IDCTSHIFTR;
|
|
tmpblk[0] = tmp0;
|
|
tmpblk[1] = tmp1;
|
|
tmpblk[2] = tmp2;
|
|
tmpblk[3] = tmp3;
|
|
tmp0 = x8-x6;
|
|
tmp0 = tmp0>>IDCTSHIFTR;
|
|
tmp1 = x0-x4;
|
|
tmp1 = tmp1>>IDCTSHIFTR;
|
|
tmp2 = x3-x2;
|
|
tmp2 = tmp2>>IDCTSHIFTR;
|
|
tmp3 = x7-x1;
|
|
tmp3 = tmp3>>IDCTSHIFTR;
|
|
tmpblk[4] = tmp0;
|
|
tmpblk[5] = tmp1;
|
|
tmpblk[6] = tmp2;
|
|
tmpblk[7] = tmp3;
|
|
}
|
|
}
|
|
|
|
tmpblk = tmpbuf;
|
|
outblk = outbuf;
|
|
for (i=0; i<8; i++, tmpblk++, outblk++)
|
|
{
|
|
/* shortcut */
|
|
x0 = tmpblk[8*0];
|
|
x1 = tmpblk[4*8]<<8;
|
|
x2 = tmpblk[6*8];
|
|
x3 = tmpblk[2*8];
|
|
x4 = tmpblk[1*8];
|
|
x5 = tmpblk[7*8];
|
|
x6 = tmpblk[5*8];
|
|
x7 = tmpblk[3*8];
|
|
if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
|
|
{
|
|
tmp0=(x0+32)>>6;
|
|
outblk[8*0]=outblk[8*1]=outblk[8*2]=outblk[8*3]=outblk[8*4]=outblk[8*5]=
|
|
outblk[8*6]=outblk[8*7]=limit(tmp0, -256, 255);
|
|
}
|
|
else
|
|
{
|
|
x0 = tmpblk[8*0];
|
|
tmp0 = x4+x5;
|
|
x0 = x0<<8;
|
|
tmp0 = W7*tmp0;
|
|
x0 = x0 + 8192;
|
|
tmp1 = x6+x7;
|
|
tmp0 = tmp0 + 4;
|
|
tmp1 = W3*tmp1;
|
|
tmp1 = tmp1 + 4;
|
|
x8 = x0 + x1;
|
|
tmp2 = (W2+W6)*x2;
|
|
x0 = x0 - x1;
|
|
x1 = x3 + x2;
|
|
x1 = W6*x1;
|
|
tmp3 = (W2-W6)*x3;
|
|
x1 = x1 + 4;
|
|
x4 = (W1-W7)*x4;
|
|
x4 = tmp0 + x4;
|
|
x4 = x4>>3;
|
|
x5 = (W1+W7)*x5;
|
|
x2 = x1 - tmp2;
|
|
x3 = x1 + tmp3;
|
|
x6 = (W3-W5)*x6;
|
|
x2 = x2>>3;
|
|
x5 = tmp0 - x5;
|
|
x5 = x5>>3;
|
|
x6 = tmp1 - x6;
|
|
x6 = x6>>3;
|
|
x7 = (W3+W5)*x7;
|
|
x7 = tmp1 - x7;
|
|
x3 = x3>>3;
|
|
x7 = x7>>3;
|
|
x1 = x4 + x6; /* F */
|
|
x4 = x4 - x6;
|
|
x6 = x5 + x7; /* F */
|
|
x5 = x5 - x7; /* F */
|
|
tmp1 = x4 + x5;
|
|
x7 = x8 + x3; /* F */
|
|
tmp1 = 181*tmp1;
|
|
x8 = x8 - x3; /* F */
|
|
x3 = x0 + x2; /* F */
|
|
tmp2 = x4 - x5;
|
|
x0 = x0 - x2; /* F */
|
|
tmp2 = 181*tmp2;
|
|
x2 = tmp1+128;
|
|
x4 = tmp2+128;
|
|
x2 = x2>>8; /* F */
|
|
x4 = x4>>8; /* F */
|
|
|
|
/* fourth stage */
|
|
tmp0=x7+x1;
|
|
tmp1=x3+x2;
|
|
tmp0=tmp0>>IDCTSHIFTC;
|
|
tmp2=x0+x4;
|
|
tmp1=tmp1>>IDCTSHIFTC;
|
|
tmp3=x8+x6;
|
|
tmp2=tmp2>>IDCTSHIFTC;
|
|
tmp3=tmp3>>IDCTSHIFTC;
|
|
outblk[8*0] = limit(tmp0, -256, 255);
|
|
outblk[8*1] = limit(tmp1, -256, 255);
|
|
outblk[8*2] = limit(tmp2, -256, 255);
|
|
outblk[8*3] = limit(tmp3, -256, 255);
|
|
tmp0=x8-x6;
|
|
tmp1=x0-x4;
|
|
tmp0=tmp0>>IDCTSHIFTC;
|
|
tmp2=x3-x2;
|
|
tmp1=tmp1>>IDCTSHIFTC;
|
|
tmp3=x7-x1;
|
|
tmp2=tmp2>>IDCTSHIFTC;
|
|
tmp3=tmp3>>IDCTSHIFTC;
|
|
outblk[8*4] = limit(tmp0, -256, 255);
|
|
outblk[8*5] = limit(tmp1, -256, 255);
|
|
outblk[8*6] = limit(tmp2, -256, 255);
|
|
outblk[8*7] = limit(tmp3, -256, 255);
|
|
}
|
|
}
|
|
}
|
|
|
|
#if 0
|
|
void ScIDCT8x8s_C(short *inbuf, short *outbuf)
|
|
{
|
|
register int i, tmp, x0, x1, x2, x3, x4, x5, x6, x7, x8;
|
|
register short *inblk, *outblk;
|
|
register int *tmpblk;
|
|
int tmpbuf[64];
|
|
|
|
inblk = inbuf;
|
|
tmpblk = tmpbuf;
|
|
for (i=0; i<8; i++, inblk+=8, tmpblk+=8)
|
|
{
|
|
if (!((x1 = inblk[4]<<11) | (x2 = inblk[6]) | (x3 = inblk[2]) |
|
|
(x4 = inblk[1]) | (x5 = inblk[7]) | (x6 = inblk[5]) | (x7 = inblk[3])))
|
|
{
|
|
tmpblk[0]=tmpblk[1]=tmpblk[2]=tmpblk[3]=tmpblk[4]=tmpblk[5]=tmpblk[6]=
|
|
tmpblk[7]=inblk[0]<<3;
|
|
}
|
|
else
|
|
{
|
|
x0 = (inblk[0]<<11) + 128; /* for proper rounding in the fourth stage */
|
|
|
|
/* first stage */
|
|
x8 = W7*(x4+x5);
|
|
x4 = x8 + (W1-W7)*x4;
|
|
x5 = x8 - (W1+W7)*x5;
|
|
x8 = W3*(x6+x7);
|
|
x6 = x8 - (W3-W5)*x6;
|
|
x7 = x8 - (W3+W5)*x7;
|
|
|
|
/* second stage */
|
|
x8 = x0 + x1;
|
|
x0 -= x1;
|
|
x1 = W6*(x3+x2);
|
|
x2 = x1 - (W2+W6)*x2;
|
|
x3 = x1 + (W2-W6)*x3;
|
|
x1 = x4 + x6;
|
|
x4 -= x6;
|
|
x6 = x5 + x7;
|
|
x5 -= x7;
|
|
|
|
/* third stage */
|
|
x7 = x8 + x3;
|
|
x8 -= x3;
|
|
x3 = x0 + x2;
|
|
x0 -= x2;
|
|
x2 = (181*(x4+x5)+128)>>8;
|
|
x4 = (181*(x4-x5)+128)>>8;
|
|
|
|
/* fourth stage */
|
|
tmpblk[0] = (x7+x1)>>8;
|
|
tmpblk[1] = (x3+x2)>>8;
|
|
tmpblk[2] = (x0+x4)>>8;
|
|
tmpblk[3] = (x8+x6)>>8;
|
|
tmpblk[4] = (x8-x6)>>8;
|
|
tmpblk[5] = (x0-x4)>>8;
|
|
tmpblk[6] = (x3-x2)>>8;
|
|
tmpblk[7] = (x7-x1)>>8;
|
|
}
|
|
}
|
|
|
|
tmpblk = tmpbuf;
|
|
outblk = outbuf;
|
|
for (i=0; i<8; i++, tmpblk++, outblk++)
|
|
{
|
|
/* shortcut */
|
|
if (!((x1 = (tmpblk[4*8]<<8)) | (x2 = tmpblk[6*8]) | (x3 = tmpblk[2*8]) |
|
|
(x4 = tmpblk[1*8]) | (x5 = tmpblk[7*8]) | (x6 = tmpblk[5*8]) |
|
|
(x7 = tmpblk[3*8])))
|
|
{
|
|
tmp=(tmpblk[8*0]+32)>>6;
|
|
if (tmp<-256) tmp=-256; else if (tmp>255) tmp=255;
|
|
outblk[8*0]=outblk[8*1]=outblk[8*2]=outblk[8*3]=outblk[8*4]=outblk[8*5]=
|
|
outblk[8*6]=outblk[8*7]=tmp;
|
|
}
|
|
else
|
|
{
|
|
x0 = (tmpblk[8*0]<<8) + 8192;
|
|
|
|
/* first stage */
|
|
x8 = W7*(x4+x5) + 4;
|
|
x4 = (x8+((W1-W7)*x4))>>3;
|
|
x5 = (x8-((W1+W7)*x5))>>3;
|
|
x8 = W3*(x6+x7) + 4;
|
|
x6 = (x8-((W3-W5)*x6))>>3;
|
|
x7 = (x8-((W3+W5)*x7))>>3;
|
|
|
|
/* second stage */
|
|
x8 = x0 + x1;
|
|
x0 -= x1;
|
|
x1 = W6*(x3+x2) + 4;
|
|
x2 = (x1-((W2+W6)*x2))>>3;
|
|
x3 = (x1+((W2-W6)*x3))>>3;
|
|
x1 = x4 + x6;
|
|
x4 -= x6;
|
|
x6 = x5 + x7;
|
|
x5 -= x7;
|
|
|
|
/* third stage */
|
|
x7 = x8 + x3;
|
|
x8 -= x3;
|
|
x3 = x0 + x2;
|
|
x0 -= x2;
|
|
x2 = ((181*(x4+x5))+128)>>8;
|
|
x4 = ((181*(x4-x5))+128)>>8;
|
|
|
|
/* fourth stage */
|
|
outblk[8*0] = ((tmp=(x7+x1)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
|
|
outblk[8*1] = ((tmp=(x3+x2)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
|
|
outblk[8*2] = ((tmp=(x0+x4)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
|
|
outblk[8*3] = ((tmp=(x8+x6)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
|
|
outblk[8*4] = ((tmp=(x8-x6)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
|
|
outblk[8*5] = ((tmp=(x0-x4)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
|
|
outblk[8*6] = ((tmp=(x3-x2)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
|
|
outblk[8*7] = ((tmp=(x7-x1)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
#if 0
|
|
/* row (horizontal) IDCT
|
|
*
|
|
* 7 pi 1
|
|
* dst[k] = sum c[l] * src[l] * cos( -- * ( k + - ) * l )
|
|
* l=0 8 2
|
|
*
|
|
* where: c[0] = 128
|
|
* c[1..7] = 128*sqrt(2)
|
|
*/
|
|
static void idctrow(short *inblk, short *outblk)
|
|
{
|
|
int x0, x1, x2, x3, x4, x5, x6, x7, x8;
|
|
/* shortcut */
|
|
if (!((x1 = inblk[4]<<11) | (x2 = inblk[6]) | (x3 = inblk[2]) |
|
|
(x4 = inblk[1]) | (x5 = inblk[7]) | (x6 = inblk[5]) | (x7 = inblk[3])))
|
|
{
|
|
outblk[0]=outblk[1]=outblk[2]=outblk[3]=outblk[4]=outblk[5]=outblk[6]=
|
|
outblk[7]=inblk[0]<<3;
|
|
return;
|
|
}
|
|
|
|
x0 = (inblk[0]<<11) + 128; /* for proper rounding in the fourth stage */
|
|
|
|
/* first stage */
|
|
x8 = W7*(x4+x5);
|
|
x4 = x8 + (W1-W7)*x4;
|
|
x5 = x8 - (W1+W7)*x5;
|
|
x8 = W3*(x6+x7);
|
|
x6 = x8 - (W3-W5)*x6;
|
|
x7 = x8 - (W3+W5)*x7;
|
|
|
|
/* second stage */
|
|
x8 = x0 + x1;
|
|
x0 -= x1;
|
|
x1 = W6*(x3+x2);
|
|
x2 = x1 - (W2+W6)*x2;
|
|
x3 = x1 + (W2-W6)*x3;
|
|
x1 = x4 + x6;
|
|
x4 -= x6;
|
|
x6 = x5 + x7;
|
|
x5 -= x7;
|
|
|
|
/* third stage */
|
|
x7 = x8 + x3;
|
|
x8 -= x3;
|
|
x3 = x0 + x2;
|
|
x0 -= x2;
|
|
x2 = (181*(x4+x5)+128)>>8;
|
|
x4 = (181*(x4-x5)+128)>>8;
|
|
|
|
/* fourth stage */
|
|
outblk[0] = (x7+x1)>>8;
|
|
outblk[1] = (x3+x2)>>8;
|
|
outblk[2] = (x0+x4)>>8;
|
|
outblk[3] = (x8+x6)>>8;
|
|
outblk[4] = (x8-x6)>>8;
|
|
outblk[5] = (x0-x4)>>8;
|
|
outblk[6] = (x3-x2)>>8;
|
|
outblk[7] = (x7-x1)>>8;
|
|
}
|
|
|
|
/* column (vertical) IDCT
|
|
*
|
|
* 7 pi 1
|
|
* dst[8*k] = sum c[l] * src[8*l] * cos( -- * ( k + - ) * l )
|
|
* l=0 8 2
|
|
*
|
|
* where: c[0] = 1/1024
|
|
* c[1..7] = (1/1024)*sqrt(2)
|
|
*/
|
|
static void idctcol(short *inblk, short *outblk)
|
|
{
|
|
int tmp, x0, x1, x2, x3, x4, x5, x6, x7, x8;
|
|
|
|
/* shortcut */
|
|
if (!((x1 = (inblk[8*4]<<8)) | (x2 = inblk[8*6]) | (x3 = inblk[8*2]) |
|
|
(x4 = inblk[8*1]) | (x5 = inblk[8*7]) | (x6 = inblk[8*5]) |
|
|
(x7 = inblk[8*3])))
|
|
{
|
|
tmp=(inblk[8*0]+32)>>6;
|
|
if (tmp<-256) tmp=-256; else if (tmp>255) tmp=255;
|
|
outblk[8*0]=outblk[8*1]=outblk[8*2]=outblk[8*3]=outblk[8*4]=outblk[8*5]=
|
|
outblk[8*6]=outblk[8*7]=tmp;
|
|
return;
|
|
}
|
|
|
|
x0 = (inblk[8*0]<<8) + 8192;
|
|
|
|
/* first stage */
|
|
x8 = W7*(x4+x5) + 4;
|
|
x4 = (x8+(W1-W7)*x4)>>3;
|
|
x5 = (x8-(W1+W7)*x5)>>3;
|
|
x8 = W3*(x6+x7) + 4;
|
|
x6 = (x8-(W3-W5)*x6)>>3;
|
|
x7 = (x8-(W3+W5)*x7)>>3;
|
|
|
|
/* second stage */
|
|
x8 = x0 + x1;
|
|
x0 -= x1;
|
|
x1 = W6*(x3+x2) + 4;
|
|
x2 = (x1-(W2+W6)*x2)>>3;
|
|
x3 = (x1+(W2-W6)*x3)>>3;
|
|
x1 = x4 + x6;
|
|
x4 -= x6;
|
|
x6 = x5 + x7;
|
|
x5 -= x7;
|
|
|
|
/* third stage */
|
|
x7 = x8 + x3;
|
|
x8 -= x3;
|
|
x3 = x0 + x2;
|
|
x0 -= x2;
|
|
x2 = (181*(x4+x5)+128)>>8;
|
|
x4 = (181*(x4-x5)+128)>>8;
|
|
|
|
/* fourth stage */
|
|
tmp=(x7+x1)>>14;
|
|
outblk[8*0] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
|
|
tmp=(x3+x2)>>14;
|
|
outblk[8*1] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
|
|
tmp=(x0+x4)>>14;
|
|
outblk[8*2] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
|
|
tmp=(x8+x6)>>14;
|
|
outblk[8*3] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
|
|
tmp=(x8-x6)>>14;
|
|
outblk[8*4] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
|
|
tmp=(x0-x4)>>14;
|
|
outblk[8*5] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
|
|
tmp=(x3-x2)>>14;
|
|
outblk[8*6] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
|
|
tmp=(x7-x1)>>14;
|
|
outblk[8*7] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
|
|
}
|
|
#endif
|