Windows2003-3790/enduser/netmeeting/av/codecs/intel/h263/d3bef.cpp
2020-09-30 16:53:55 +02:00

766 lines
18 KiB
C++

/* *************************************************************************
** INTEL Corporation Proprietary Information
**
** This listing is supplied under the terms of a license
** agreement with INTEL Corporation and may not be copied
** nor disclosed except in accordance with the terms of
** that agreement.
**
** Copyright (c) 1995 Intel Corporation.
** Copyright (c) 1996 Intel Corporation.
** All Rights Reserved.
**
** *************************************************************************
*/
#include "precomp.h"
/* map of coded and not-coded blocks */
extern char coded_map[][22+1];
/* QP map */
extern char QP_map[][22];
#if defined(H263P) // { if defined(H263P)
/* table for de-blocking filter */
/* currently requires 2048 bytes */
signed char dxQP[64][32];
#if 0 // { 0
static void HorizEdgeFilter(unsigned char *rec,
int width,
int height,
int pitch,
int shift) {
int i, j, k;
int d, delta;
int mbc;
int mod_div = 1 << shift;
unsigned char *r = rec + (pitch << 3);
unsigned char *r_2 = r - (pitch << 1);
unsigned char *r_1 = r - pitch;
unsigned char *r1 = r + pitch;
char *pcoded_row0 = &coded_map[8>>shift][0];
char *pcoded_row1 = pcoded_row0 + sizeof(coded_map[0]);
char *pQP_map = &QP_map[0][0];
for (j = 8; j < height; ) {
for (i = 0; i < width; i += 8) {
mbc = i >> shift;
if (pcoded_row0[mbc+1] || pcoded_row1[mbc+1]) {
for (k = i; k < i+8; k++) {
d = (r_2[k]+(r_2[k]<<1)-(r_1[k]<<3)+(r[k]<<3)-(r1[k]+(r1[k]<<1)))>>4;
if (d && (d >= -32) && (d < 32)) {
delta = dxQP[d+32][pQP_map[mbc]];
r[k] = ClampTbl[r[k]-delta+CLAMP_BIAS];
r_1[k] = ClampTbl[r_1[k]+delta+CLAMP_BIAS];
}
}
}
}
r_2 += (pitch<<3);
r_1 += (pitch<<3);
r += (pitch<<3);
r1 += (pitch<<3);
if (0 == ((j+=8)%mod_div)) {
pcoded_row0 += sizeof(coded_map[0]);
pcoded_row1 += sizeof(coded_map[0]);
pQP_map += sizeof(QP_map[0]);
}
}
}
static void VertEdgeFilter(unsigned char *rec,
int width,
int height,
int pitch,
int shift) {
unsigned char *r = rec;
int i, j, k;
int mbc;
int d, delta;
int mod_div = 1 << shift;
char *pcoded_row1 = &coded_map[1][0];
char *pQP_map = &QP_map[0][0];
for (j = 0; j < height; ) {
for (i = 8; i < width; i += 8) {
mbc = i >> shift;
if (pcoded_row1[mbc] || pcoded_row1[mbc+1]) {
for (k = 0; k < 8; k++) {
d = (r[i-2]+(r[i-2]<<1)-(r[i-1]<<3)+(r[i]<<3)-(r[i+1]+(r[i+1]<<1)))>>4;
if (d && (d > -32) && (d < 32)) {
delta = dxQP[d+32][pQP_map[mbc]];
r[i] = ClampTbl[r[i]-delta+CLAMP_BIAS];
r[i-1] = ClampTbl[r[i-1]+delta+CLAMP_BIAS];
}
r += pitch;
}
r -= pitch<<3;
}
}
r += pitch<<3;
if (0 == ((j+=8)%mod_div)) {
pcoded_row1 += sizeof(coded_map[0]);
pQP_map += sizeof(QP_map[0]);
}
}
}
#else // }{ 0
__declspec(naked)
static void HorizEdgeFilter(unsigned char *rec,
int width,
int height,
int pitch,
int shift) {
// Permanent (callee-save) registers - ebx, esi, edi, ebp
// Temporary (caller-save) registers - eax, ecx, edx
//
// Stack frame layout
// | shift | + 68
// | pitch | + 64
// | height | + 60
// | width | + 56
// | rec | + 52
// -----------------------------
// | return addr | + 48
// | saved ebp | + 44
// | saved ebx | + 40
// | saved esi | + 36
// | saved edi | + 32
#define LOCALSIZE 32
#define SHIFT 68
#define PITCH_PARM 64
#define HEIGHT 60
#define WIDTH 56
#define REC 52
#define LOOP_I 28
#define LOOP_J 24
#define LOOP_K 20
#define PCODED_ROW0 16
#define PCODED_ROW1 12
#define PQP_MAP 8
#define MBC 4
#define LOOP_K_LIMIT 0
_asm {
push ebp
push ebx
push esi
push edi
sub esp, LOCALSIZE
// r = rec + (pitch << 3)
// r_2 = r - (pitch << 1)
// r_1 = r - pitch
// r1 = r + pitch
// assign(esi, r_2)
// assign(edi, r1)
// assign(ebp, pitch)
mov ebp, [esp + PITCH_PARM]
mov esi, [esp + REC]
lea esi, [esi + ebp*4]
lea esi, [esi + ebp*2]
lea edi, [esi + ebp*2]
lea edi, [edi + ebp]
// pcoded_row0 = &coded_map[8>>shift][0]
// pcoded_row1 = pcoded_row0 + sizeof(coded_map[0])
// pQP_map = &QP_map[0][0]
mov eax, 8
mov ecx, [esp + SHIFT]
shr eax, cl
mov ebx, TYPE coded_map[0]
imul eax, ebx
lea eax, [coded_map + eax]
mov [esp + PCODED_ROW0], eax
add eax, ebx
mov [esp + PCODED_ROW1], eax
lea eax, [QP_map]
mov [esp + PQP_MAP], eax
// for (j = 8; j < height; )
mov DWORD PTR [esp + LOOP_J], 8
L1:
// for (i = 0; i < width; i += 8)
mov DWORD PTR [esp + LOOP_I], 0
L2:
// mbc = i >> shift
// if (pcoded_row0[mbc+1] || pcoded_row1[mbc+1])
mov eax, [esp + LOOP_I]
mov ecx, [esp + SHIFT]
shr eax, cl
mov ebx, [esp + PCODED_ROW0]
mov [esp + MBC], eax
mov cl, [ebx+eax+1]
mov ebx, [esp + PCODED_ROW1]
test ecx, ecx
jnz L3
mov cl, [ebx+eax+1]
test ecx, ecx
jz L4
L3:
// for (k = i; k < i+8; k++)
mov eax, [esp + LOOP_I]
xor ebx, ebx
add eax, 8
// read r_1[k]
mov bl, [esi+ebp]
mov [esp + LOOP_K_LIMIT], eax
xor eax, eax
L5:
// d = (r_2[k]+(r_2[k]<<1)-(r_1[k]<<3)+(r[k]<<3)-(r1[k]+(r1[k]<<1)))>>4
// read r_2[k]
mov al, [esi]
xor ecx, ecx
// read r[k]
mov cl, [esi+ebp*2]
xor edx, edx
// read r1[k] and compute r_2[k]*3
mov dl, [edi]
lea eax,[eax+eax*2]
// compute r_1[k]*8 and r[k]*8
lea ebx, [ebx*8]
lea ecx, [ecx*8]
// compute r1[k]*3 and (r_2[k]*3 - r_1[k]*8)
lea edx, [edx+edx*2]
sub eax, ebx
// compute (r_2[k]*3 - r_1[k]*8 + r[k]*8)
add eax, ecx
xor ecx, ecx
// compute (r_2[k]*3 - r_1[k]*8 + r[k]*8 - r1[k]*3)
sub eax, edx
xor edx, edx
// compute (r_2[k]*3 - r_1[k]*8 + r[k]*8 - r1[k]*3) >> 4
sar eax, 4
mov ebx, [esp + PQP_MAP]
// if (d && (d >= -32) && (d < 32))
add ebx, [esp + MBC]
test eax, eax
jz L6
cmp eax, -32
jl L6
cmp eax, 32
jge L6
// delta = dxQP[d+32][pQP_map[mbc]]
// r[k] = ClampTbl[r[k]-delta+CLAMP_BIAS]
// r_1[k] = ClampTbl[r_1[k]+delta+CLAMP_BIAS]
lea eax, [eax + 32]
mov cl, [ebx]
shl eax, 5
mov dl, [esi+ebp]
mov al, dxQP[eax+ecx]
mov cl, [esi+ebp*2]
movsx eax, al
sub ecx, eax
mov dl, ClampTbl[edx + eax + CLAMP_BIAS]
mov cl, ClampTbl[ecx + CLAMP_BIAS]
mov [esi+ebp], dl
mov [esi+ebp*2], cl
nop
L6:
mov edx, [esp + LOOP_I]
inc esi
inc edx
inc edi
xor eax, eax
xor ebx, ebx
mov [esp + LOOP_I], edx
mov bl, [esi+ebp]
cmp edx, [esp + LOOP_K_LIMIT]
jl L5
jmp L4a
L4:
mov eax, [esp + LOOP_I]
lea esi, [esi+8]
add eax, 8
lea edi, [edi+8]
mov [esp + LOOP_I],eax
nop
L4a:
mov eax, [esp + LOOP_I]
cmp eax, [esp + WIDTH]
jl L2
// r_2 += (pitch<<3)
// r_1 += (pitch<<3)
// r += (pitch<<3)
// r1 += (pitch<<3)
mov eax, ebp
shl eax, 3
sub eax, [esp + WIDTH]
lea esi, [esi + eax]
lea edi, [edi + eax]
// if (0 == ((j+=8)%mod_div))
mov eax, [esp + LOOP_J]
add eax, 8
mov [esp + LOOP_J], eax
mov ebx, eax
mov ecx, [esp + SHIFT]
shr eax, cl
shl eax, cl
sub ebx, eax
jnz L7
// pcoded_row0 += sizeof(coded_map[0])
// pcoded_row1 += sizeof(coded_map[0])
// pQP_map += sizeof(QP_map[0])
mov eax, [esp + PCODED_ROW0]
mov ebx, [esp + PCODED_ROW1]
mov ecx, [esp + PQP_MAP]
add eax, TYPE coded_map[0]
add ebx, TYPE coded_map[0]
add ecx, TYPE QP_map[0]
mov [esp + PCODED_ROW0], eax
mov [esp + PCODED_ROW1], ebx
mov [esp + PQP_MAP], ecx
L7:
mov eax, [esp + LOOP_J]
cmp eax, [esp + HEIGHT]
jl L1
add esp, LOCALSIZE
pop edi
pop esi
pop ebx
pop ebp
ret
}
}
#undef LOCALSIZE
#undef SHIFT
#undef PITCH_PARM
#undef HEIGHT
#undef WIDTH
#undef REC
#undef LOOP_I
#undef LOOP_J
#undef LOOP_K
#undef PCODED_ROW0
#undef PCODED_ROW1
#undef PQP_MAP
#undef MBC
#undef LOOP_K_LIMIT
__declspec(naked)
static void VertEdgeFilter(unsigned char *rec,
int width,
int height,
int pitch,
int shift) {
// Permanent (callee-save) registers - ebx, esi, edi, ebp
// Temporary (caller-save) registers - eax, ecx, edx
//
// Stack frame layout
// | shift | + 56
// | pitch | + 52
// | height | + 48
// | width | + 44
// | rec | + 40
// -----------------------------
// | return addr | + 36
// | saved ebp | + 32
// | saved ebx | + 28
// | saved esi | + 24
// | saved edi | + 20
#define LOCALSIZE 20
#define SHIFT 56
#define PITCH_PARM 52
#define HEIGHT 48
#define WIDTH 44
#define REC 40
#define LOOP_K 16
#define LOOP_J 12
#define PCODED_ROW1 8
#define PQP_MAP 4
#define MBC 0
_asm {
push ebp
push ebx
push esi
push edi
sub esp, LOCALSIZE
// assign(esi, r)
mov esi, [esp + REC]
// assign(edi, pitch)
mov edi, [esp + PITCH_PARM]
// pcoded_row1 = &coded_map[1][0]
mov eax, TYPE coded_map[0]
lea eax, [coded_map + eax]
mov [esp + PCODED_ROW1], eax
// pQP_map = &QP_map[0][0]
lea eax, [QP_map]
mov [esp + PQP_MAP], eax
// for (j = 0; j < height; )
xor eax, eax
mov [esp + LOOP_J], eax
L1:
// for (i = 8; i < width; i += 8)
// assign(ebp,i)
mov ebp, 8
// mbc = i >> shift
L2:
mov eax, ebp
mov ecx, [esp + SHIFT]
shr eax, cl
mov [esp + MBC], eax
// if (pcoded_row1[mbc] || pcoded_row1[mbc+1])
xor ecx, ecx
mov ebx, [esp + PCODED_ROW1]
mov cl, [ebx+eax]
test ecx, ecx
jnz L3
mov cl, [ebx+eax+1]
test ecx, ecx
jz L4
L3:
// for (k = 0; k < 8; k++)
mov DWORD PTR [esp + LOOP_K], 8
xor eax, eax
xor ebx, ebx
xor ecx, ecx
xor edx, edx
L5:
// d = (r[i-2]+(r[i-2]<<1)-(r[i-1]<<3)+(r[i]<<3)-(r[i+1]+(r[i+1]<<1)))>>4
// read r[i-2] and r[i]
mov al, [esi+ebp-2]
mov bl, [esi+ebp]
// read r[i-1] and r[i+1]
mov cl, [esi+ebp-1]
mov dl, [esi+ebp+1]
// compute r[i-2]*3 and r[i]*8
lea eax, [eax+eax*2]
lea ebx, [ebx*8]
// compute r[i-1]*8 and r[i+1]*3
lea ecx, [ecx*8]
lea edx, [edx+edx*2]
// compute (r[i-2]*3 + r[i]*8) and (r[i-1]*8 + r[i+1]*3)
add eax, ebx
add ecx, edx
// compute (r[i-2]*3 - r[i-1]*8 + r[i]*8 - r[i+1]*3)
sub eax, ecx
xor ecx, ecx
// compute ((r[i-2]*3 - r[i-1]*8 + r[i]*8 - r[i+1]*3) >> 4)
sar eax, 4
xor edx, edx
// if (d && (d >= -32) && (d < 32))
test eax, eax
jz L6
cmp eax, -32
jl L6
cmp eax, 32
jge L6
// delta = dxQP[d+32][pQP_map[mbc]]
// r[i] = ClampTbl[r[i]-delta+CLAMP_BIAS]
// r[i-1] = ClampTbl[r[i-1]+delta+CLAMP_BIAS]
lea eax, [eax + 32]
mov ebx, [esp + PQP_MAP]
shl eax, 5
add ebx, [esp + MBC]
mov cl, [ebx]
xor ebx, ebx
mov al, dxQP[eax+ecx]
mov bl, [esi+ebp]
movsx eax, al
sub ebx, eax
mov cl, [esi+ebp-1]
mov bl, ClampTbl[ebx + CLAMP_BIAS]
mov cl, ClampTbl[ecx + eax + CLAMP_BIAS]
mov [esi+ebp], bl
mov [esi+ebp-1], cl
L6:
add esi, edi
mov eax, [esp + LOOP_K]
xor ebx, ebx
dec eax
mov [esp + LOOP_K], eax
jnz L5
// r -= (pitch<<3)
mov eax, edi
shl eax, 3
sub esi, eax
L4:
add ebp, 8
cmp ebp, [esp + WIDTH]
jl L2
// r += (pitch<<3)
mov eax, edi
shl eax, 3
lea esi, [esi + eax]
// if (0 == ((j+=8)%mod_div))
mov eax, [esp + LOOP_J]
add eax, 8
mov [esp + LOOP_J], eax
mov ebx, eax
mov ecx, [esp + SHIFT]
shr eax, cl
shl eax, cl
sub ebx, eax
jnz L7
// pcoded_row1 += sizeof(coded_map[0])
// pQP_map += sizeof(QP_map[0])
mov eax, [esp + PCODED_ROW1]
mov ebx, [esp + PQP_MAP]
add eax, TYPE coded_map[0]
add ebx, TYPE QP_map[0]
mov [esp + PCODED_ROW1], eax
mov [esp + PQP_MAP], ebx
L7:
mov eax, [esp + LOOP_J]
cmp eax, [esp + HEIGHT]
jl L1
add esp, LOCALSIZE
pop edi
pop esi
pop ebx
pop ebp
ret
}
}
#undef LOCALSIZE
#undef SHIFT
#undef PITCH_PARM
#undef HEIGHT
#undef WIDTH
#undef REC
#undef LOOP_K
#undef LOOP_J
#undef PCODED_ROW1
#undef PQP_MAP
#undef MBC
#endif // } 0
#define abs(x) (((x)>0)?(x):(-(x)))
#define sign(x) (((x)<0)?(-1):(1))
void InitEdgeFilterTab()
{
int d,QP;
for (d = 0; d < 64; d++) { // -32 <= d < 32
for (QP = 0; QP < 32; QP++) { // 0 <= QP < 32
dxQP[d][QP] = sign(d-32)*(max(0,(abs(d-32)-max(0,((2*abs(d-32))-QP)))));
}
}
}
/**********************************************************************
*
* Name: EdgeFilter
* Description: performs deblocking filtering on
* reconstructed frames
*
* Input: pointers to reconstructed frame and difference
* image
* Returns:
* Side effects:
*
* Date: 951129 Author: Gisle.Bjontegaard@fou.telenor.no
* Karl.Lillevold@nta.no
* Modified for annex J in H.263+: 961120 Karl O. Lillevold
*
***********************************************************************/
// C version of block edge filter functions
// takes about 3 ms for QCIF and 12 ms for CIF on a Pentium 120.
void EdgeFilter(unsigned char *lum,
unsigned char *Cb,
unsigned char *Cr,
int width, int height, int pitch) {
/* Luma */
HorizEdgeFilter(lum, width, height, pitch, 4);
VertEdgeFilter (lum, width, height, pitch, 4);
/* Chroma */
HorizEdgeFilter(Cb, width>>1, height>>1, pitch, 3);
VertEdgeFilter (Cb, width>>1, height>>1, pitch, 3);
HorizEdgeFilter(Cr, width>>1, height>>1, pitch, 3);
VertEdgeFilter (Cr, width>>1, height>>1, pitch, 3);
return;
}
#else // Karl's original version }{
/* currently requires 11232 bytes */
signed char dtab[352*32];
/***********************************************************************/
static void HorizEdgeFilter(unsigned char *rec,
int width, int height, int pitch, int chr)
{
int i,j,k;
int delta;
int mbc, mbr, do_filter;
unsigned char *r_2, *r_1, *r, *r1;
signed char *deltatab;
/* horizontal edges */
r = rec + 8*pitch;
r_2 = r - 2*pitch;
r_1 = r - pitch;
r1 = r + pitch;
for (j = 8; j < height; j += 8) {
for (i = 0; i < width; i += 8) {
if (!chr) {
mbr = (j >> 4);
mbc = (i >> 4);
}
else {
mbr = (j >> 3);
mbc = (i >> 3);
}
deltatab = dtab + 176 + 351 * (QP_map[mbr][mbc] - 1);
do_filter = coded_map[mbr+1][mbc+1] || coded_map[mbr][mbc+1];
if (do_filter) {
for (k = i; k < i+8; k++) {
delta = (int)deltatab[ (( (int)(*(r_2 + k) * 3) -
(int)(*(r_1 + k) * 8) +
(int)(*(r + k) * 8) -
(int)(*(r1 + k) * 3)) >>4)];
*(r + k) = ClampTbl[ (int)(*(r + k)) - delta + CLAMP_BIAS];
*(r_1 + k) = ClampTbl[ (int)(*(r_1 + k)) + delta + CLAMP_BIAS];
}
}
}
r += (pitch<<3);
r1 += (pitch<<3);
r_1 += (pitch<<3);
r_2 += (pitch<<3);
}
return;
}
static void VertEdgeFilter(unsigned char *rec,
int width, int height, int pitch, int chr)
{
int i,j,k;
int delta;
int mbc, mbr;
int do_filter;
signed char *deltatab;
unsigned char *r;
/* vertical edges */
for (i = 8; i < width; i += 8)
{
r = rec;
for (j = 0; j < height; j +=8)
{
if (!chr) {
mbr = (j >> 4);
mbc = (i >> 4);
}
else {
mbr = (j >> 3);
mbc = (i >> 3);
}
deltatab = dtab + 176 + 351 * (QP_map[mbr][mbc] - 1);
do_filter = coded_map[mbr+1][mbc+1] || coded_map[mbr+1][mbc];
if (do_filter) {
for (k = 0; k < 8; k++) {
delta = (int)deltatab[(( (int)(*(r + i-2 ) * 3) -
(int)(*(r + i-1 ) * 8) +
(int)(*(r + i ) * 8) -
(int)(*(r + i+1 ) * 3) ) >>4)];
*(r + i ) = ClampTbl[ (int)(*(r + i )) - delta + CLAMP_BIAS];
*(r + i-1 ) = ClampTbl[ (int)(*(r + i-1)) + delta + CLAMP_BIAS];
r += pitch;
}
}
else {
r += (pitch<<3);
}
}
}
return;
}
/**********************************************************************
*
* Name: EdgeFilter
* Description: performs deblocking filtering on
* reconstructed frames
*
* Input: pointers to reconstructed frame and difference
* image
* Returns:
* Side effects:
*
* Date: 951129 Author: Gisle.Bjontegaard@fou.telenor.no
* Karl.Lillevold@nta.no
* Modified for annex J in H.263+: 961120 Karl O. Lillevold
*
***********************************************************************/
void EdgeFilter(unsigned char *lum,
unsigned char *Cb,
unsigned char *Cr,
int width, int height, int pitch)
{
/* Luma */
HorizEdgeFilter(lum, width, height, pitch, 0);
VertEdgeFilter (lum, width, height, pitch, 0);
/* Chroma */
HorizEdgeFilter(Cb, width>>1, height>>1, pitch, 1);
VertEdgeFilter (Cb, width>>1, height>>1, pitch, 1);
HorizEdgeFilter(Cr, width>>1, height>>1, pitch, 1);
VertEdgeFilter (Cr, width>>1, height>>1, pitch, 1);
return;
}
#define sign(a) ((a) < 0 ? -1 : 1)
void InitEdgeFilterTab()
{
int i,QP;
for (QP = 1; QP <= 31; QP++) {
for (i = -176; i <= 175; i++) {
dtab[i+176 +(QP-1)*351] = sign(i) * (max(0,abs(i)-max(0,2*abs(i) - QP)));
}
}
}
#endif // } if defined(H263P)