1013 lines
22 KiB
C++
1013 lines
22 KiB
C++
|
/* *************************************************************************
|
||
|
** INTEL Corporation Proprietary Information
|
||
|
**
|
||
|
** This listing is supplied under the terms of a license
|
||
|
** agreement with INTEL Corporation and may not be copied
|
||
|
** nor disclosed except in accordance with the terms of
|
||
|
** that agreement.
|
||
|
**
|
||
|
** Copyright (c) 1995 Intel Corporation.
|
||
|
** All Rights Reserved.
|
||
|
**
|
||
|
** *************************************************************************
|
||
|
*/
|
||
|
|
||
|
#include "precomp.h"
|
||
|
|
||
|
#ifdef H263P // {
|
||
|
|
||
|
//
|
||
|
// For the P5 versions, the strategy is to compute the Y value for an odd RGB value
|
||
|
// followed by computing the Y value for the corresponding even RGB value. The registers
|
||
|
// are then set with the proper values to compute U and V values for the even RGB
|
||
|
// value. This avoids repeating the shifting and masking needed to extract the Red,
|
||
|
// Green and Blue components.
|
||
|
//
|
||
|
|
||
|
/*****************************************************************************
|
||
|
*
|
||
|
* H26X_BGR32toYUV12()
|
||
|
*
|
||
|
* Convert from BGR32 to YUV12 (YCrCb 4:2:0) and copy to destination memory
|
||
|
* with pitch defined by the constant PITCH. The input data is stored in
|
||
|
* the order B,G,R,B,G,R...
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
#if 0 // { 0
|
||
|
|
||
|
void C_H26X_BGR32toYUV12(
|
||
|
LPBITMAPINFOHEADER lpbiInput,
|
||
|
WORD OutputWidth,
|
||
|
WORD OutputHeight,
|
||
|
U8 *lpInput,
|
||
|
U8 *YPlane,
|
||
|
U8 *UPlane,
|
||
|
U8 *VPlane,
|
||
|
const int pitch)
|
||
|
{
|
||
|
int tm1, tm2;
|
||
|
int t1, t2, t3, t4;
|
||
|
|
||
|
C_RGB_COLOR_CONVERT_INIT
|
||
|
|
||
|
// This assignment statement is here simply to avoid a warning message.
|
||
|
t = t;
|
||
|
|
||
|
for ( j = 0; j < LumaIters; j++) {
|
||
|
|
||
|
for (k = 0; k < mark; k++) {
|
||
|
|
||
|
for (i = OutputWidth; i > 0; i-=4, YPlane+=4) {
|
||
|
tm1 = *pnext++;
|
||
|
t1 = (BYUV[(tm1>>1)&0x7F].YU +
|
||
|
GYUV[(tm1>>9)&0x7F].YU +
|
||
|
RYUV[(tm1>>17)&0x7F].YU);
|
||
|
tm = *pnext++;
|
||
|
t2 = (BYUV[(tm>>1)&0x7F].YU +
|
||
|
GYUV[(tm>>9)&0x7F].YU +
|
||
|
RYUV[(tm>>17)&0x7F].YU);
|
||
|
tm2 = *pnext++;
|
||
|
t3 = (BYUV[(tm2>>1)&0x7F].YU +
|
||
|
GYUV[(tm2>>9)&0x7F].YU +
|
||
|
RYUV[(tm2>>17)&0x7F].YU);
|
||
|
tm = *pnext++;
|
||
|
t4 = (BYUV[(tm>>1)&0x7F].YU +
|
||
|
GYUV[(tm>>9)&0x7F].YU +
|
||
|
RYUV[(tm>>17)&0x7F].YU);
|
||
|
*(U32 *)YPlane =
|
||
|
(((t1+0x800)>>8)&0xFF) |
|
||
|
((t2+0x800)&0xFF00) |
|
||
|
(((t3+0x800)<<8)&0xFF0000) |
|
||
|
(((t4+0x800)<<16)&0xFF000000);
|
||
|
if (0 == (k&1)) {
|
||
|
*(U16 *)UPlane =
|
||
|
((t1+0x40000000)>>24) |
|
||
|
(((t2+0x40000000)>>16)&0xFF00);
|
||
|
t1 = (BYUV[(tm1>>1)&0x7F].V +
|
||
|
GYUV[(tm1>>9)&0x7F].V +
|
||
|
RYUV[(tm1>>17)&0x7F].V);
|
||
|
t2 = (BYUV[(tm2>>1)&0x7F].V +
|
||
|
GYUV[(tm2>>9)&0x7F].V +
|
||
|
RYUV[(tm2>>17)&0x7F].V);
|
||
|
*(U16 *)VPlane =
|
||
|
((t1+0x4000)>>8) |
|
||
|
((t2+0x4000)&0xFF00);
|
||
|
UPlane += 2; VPlane += 2;
|
||
|
}
|
||
|
}
|
||
|
// The next two cases are mutually exclusive.
|
||
|
// If there is a width_diff there cannot be a stretch and
|
||
|
// if there is a stretch, there cannot be a width_diff.
|
||
|
C_WIDTH_FILL
|
||
|
if (stretch && (0 == k) && j) {
|
||
|
for (i = OutputWidth; i > 0; i -= 8) {
|
||
|
tm = ((*pyprev++ & 0xFEFEFEFE) >> 1);
|
||
|
tm += ((*pynext++ & 0xFEFEFEFE) >> 1);
|
||
|
*pyspace++ = tm;
|
||
|
tm = ((*pyprev++ & 0xFEFEFEFE) >> 1);
|
||
|
tm += ((*pynext++ & 0xFEFEFEFE) >> 1);
|
||
|
*pyspace++ = tm;
|
||
|
}
|
||
|
}
|
||
|
pnext += BackTwoLines;
|
||
|
YPlane += byte_ypitch_adj;
|
||
|
// Increment after even lines.
|
||
|
if(0 == (k&1)) {
|
||
|
UPlane += byte_uvpitch_adj;
|
||
|
VPlane += byte_uvpitch_adj;
|
||
|
}
|
||
|
} // end of for k
|
||
|
if (stretch) {
|
||
|
pyprev = (U32 *)(YPlane - pitch);
|
||
|
pyspace = (U32 *)YPlane;
|
||
|
pynext = (U32 *)(YPlane += pitch);
|
||
|
}
|
||
|
} // end of for j
|
||
|
// The next two cases are mutually exclusive.
|
||
|
// If there is a height_diff there cannot be a stretch and
|
||
|
// if there is a stretch, there cannot be a height_diff.
|
||
|
C_HEIGHT_FILL
|
||
|
if (stretch) {
|
||
|
for (i = OutputWidth; i > 0; i -= 4) {
|
||
|
*pyspace++ = *pyprev++;
|
||
|
}
|
||
|
}
|
||
|
} // end of C_H26X_BGR32toYUV12()
|
||
|
|
||
|
#endif // } 0
|
||
|
|
||
|
__declspec(naked)
|
||
|
void P5_H26X_BGR32toYUV12(
|
||
|
LPBITMAPINFOHEADER lpbiInput,
|
||
|
WORD OutputWidth,
|
||
|
WORD OutputHeight,
|
||
|
U8 *lpInput,
|
||
|
U8 *YPlane,
|
||
|
U8 *UPlane,
|
||
|
U8 *VPlane,
|
||
|
const int pitch)
|
||
|
{
|
||
|
// Permanent (callee-save) registers - ebx, esi, edi, ebp
|
||
|
// Temporary (caller-save) registers - eax, ecx, edx
|
||
|
//
|
||
|
// Stack frame layout
|
||
|
// | pitch | +136
|
||
|
// | VPlane | +132
|
||
|
// | UPlane | +128
|
||
|
// | YPlane | +124
|
||
|
// | lpInput | +120
|
||
|
// | OutputHeight | +116
|
||
|
// | OutputWidth | +112
|
||
|
// | lpbiInput | +108
|
||
|
// ----------------------------
|
||
|
// | return addr | +104
|
||
|
// | saved ebp | +100
|
||
|
// | saved ebx | + 96
|
||
|
// | saved esi | + 92
|
||
|
// | saved edi | + 88
|
||
|
|
||
|
// | output_width | + 84
|
||
|
// | pyprev | + 80
|
||
|
// | pyspace | + 76
|
||
|
// | pynext | + 72
|
||
|
// | puvprev | + 68
|
||
|
// | puvspace | + 64
|
||
|
// | i | + 60
|
||
|
// | j | + 56
|
||
|
// | k | + 52
|
||
|
// | BackTwoLines | + 48
|
||
|
// | widthx16 | + 44
|
||
|
// | heightx16 | + 40
|
||
|
// | width_diff | + 36
|
||
|
// | height_diff | + 32
|
||
|
// | width_adj | + 28
|
||
|
// | height_adj | + 24
|
||
|
// | stretch | + 20
|
||
|
// | aspect | + 16
|
||
|
// | LumaIters | + 12
|
||
|
// | mark | + 8
|
||
|
// | byte_ypitch_adj | + 4
|
||
|
// | byte_uvpitch_adj | + 0
|
||
|
|
||
|
#define LOCALSIZE 88
|
||
|
|
||
|
#define PITCH_PARM 136
|
||
|
#define VPLANE 132
|
||
|
#define UPLANE 128
|
||
|
#define YPLANE 124
|
||
|
#define LP_INPUT 120
|
||
|
#define OUTPUT_HEIGHT_WORD 116
|
||
|
#define OUTPUT_WIDTH_WORD 112
|
||
|
#define LPBI_INPUT 108
|
||
|
|
||
|
#define OUTPUT_WIDTH 84
|
||
|
#define PYPREV 80
|
||
|
#define PYSPACE 76
|
||
|
#define PYNEXT 72
|
||
|
#define PUVPREV 68
|
||
|
#define PUVSPACE 64
|
||
|
#define LOOP_I 60
|
||
|
#define LOOP_J 56
|
||
|
#define LOOP_K 52
|
||
|
#define BACK_TWO_LINES 48
|
||
|
#define WIDTHX16 44
|
||
|
#define HEIGHTX16 40
|
||
|
#define WIDTH_DIFF 36
|
||
|
#define HEIGHT_DIFF 32
|
||
|
#define WIDTH_ADJ 28
|
||
|
#define HEIGHT_ADJ 24
|
||
|
#define STRETCH 20
|
||
|
#define ASPECT 16
|
||
|
#define LUMA_ITERS 12
|
||
|
#define MARK 8
|
||
|
#define BYTE_YPITCH_ADJ 4
|
||
|
#define BYTE_UVPITCH_ADJ 0
|
||
|
|
||
|
_asm {
|
||
|
|
||
|
push ebp
|
||
|
push ebx
|
||
|
push esi
|
||
|
push edi
|
||
|
sub esp, LOCALSIZE
|
||
|
|
||
|
// int width_diff = 0
|
||
|
// int height_diff = 0
|
||
|
// int width_adj = 0
|
||
|
// int height_adj = 0
|
||
|
// int stretch = 0
|
||
|
// int aspect = 0
|
||
|
|
||
|
xor eax, eax
|
||
|
mov [esp + WIDTH_DIFF], eax
|
||
|
mov [esp + HEIGHT_DIFF], eax
|
||
|
mov [esp + WIDTH_ADJ], eax
|
||
|
mov [esp + HEIGHT_ADJ], eax
|
||
|
mov [esp + STRETCH], eax
|
||
|
mov [esp + ASPECT], eax
|
||
|
|
||
|
// int LumaIters = 1
|
||
|
|
||
|
inc eax
|
||
|
mov [esp + LUMA_ITERS], eax
|
||
|
|
||
|
// int mark = OutputHeight
|
||
|
// int output_width = OutputWidth
|
||
|
// int byte_ypitch_adj = pitch - OutputWidth
|
||
|
// int byte_uvpitch_adj = pitch - (OutputWidth >> 1)
|
||
|
|
||
|
xor ebx, ebx
|
||
|
mov bx, [esp + OUTPUT_HEIGHT_WORD]
|
||
|
mov [esp + MARK], ebx
|
||
|
mov bx, [esp + OUTPUT_WIDTH_WORD]
|
||
|
mov [esp + OUTPUT_WIDTH], ebx
|
||
|
mov ecx, [esp + PITCH_PARM]
|
||
|
mov edx, ecx
|
||
|
sub ecx, ebx
|
||
|
mov [esp + BYTE_YPITCH_ADJ], ecx
|
||
|
shr ebx, 1
|
||
|
sub edx, ebx
|
||
|
mov [esp + BYTE_UVPITCH_ADJ], edx
|
||
|
|
||
|
// if (lpbiInput->biHeight > OutputHeight)
|
||
|
|
||
|
mov ebx, [esp + LPBI_INPUT]
|
||
|
mov ecx, (LPBITMAPINFOHEADER)[ebx].biHeight
|
||
|
xor edx, edx
|
||
|
mov dx, [esp + OUTPUT_HEIGHT_WORD]
|
||
|
cmp ecx, edx
|
||
|
jle Lno_stretch
|
||
|
|
||
|
// for (LumaIters = 0, i = OutputHeight; i > 0; i -= 48) LumaIters += 4
|
||
|
|
||
|
xor ecx, ecx
|
||
|
Lrepeat48:
|
||
|
lea ecx, [ecx + 4]
|
||
|
sub edx, 48
|
||
|
jnz Lrepeat48
|
||
|
mov [esp + LUMA_ITERS], ecx
|
||
|
|
||
|
// aspect = LumaIters
|
||
|
|
||
|
mov [esp + ASPECT], ecx
|
||
|
|
||
|
// width_adj = (lpbiInput->biWidth - OutputWidth) >> 1
|
||
|
// width_adj *= lpbiInput->biBitCount
|
||
|
// width_adj >>= 3
|
||
|
|
||
|
mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth
|
||
|
mov edx, [esp + OUTPUT_WIDTH]
|
||
|
sub ecx, edx
|
||
|
shr ecx, 1
|
||
|
xor edx, edx
|
||
|
mov dx, (LPBITMAPINFOHEADER)[ebx].biBitCount
|
||
|
imul ecx, edx
|
||
|
shr ecx, 3
|
||
|
mov [esp + WIDTH_ADJ], ecx
|
||
|
|
||
|
// height_adj = (lpbiInput->biHeight - (OutputHeight - aspect)) >> 1
|
||
|
|
||
|
mov ecx, (LPBITMAPINFOHEADER)[ebx].biHeight
|
||
|
xor edx, edx
|
||
|
mov dx, [esp + OUTPUT_HEIGHT_WORD]
|
||
|
sub ecx, edx
|
||
|
add ecx, [esp + ASPECT]
|
||
|
shr ecx, 1
|
||
|
mov [esp + HEIGHT_ADJ], ecx
|
||
|
|
||
|
// stretch = 1
|
||
|
// mark = 11
|
||
|
|
||
|
mov ecx, 1
|
||
|
mov edx, 11
|
||
|
mov [esp + STRETCH], ecx
|
||
|
mov [esp + MARK], edx
|
||
|
jmp Lif_done
|
||
|
|
||
|
Lno_stretch:
|
||
|
|
||
|
// widthx16 = (lpbiInput->biWidth + 0xF) & ~0xF
|
||
|
// width_diff = widthx16 - OutputWidth
|
||
|
|
||
|
mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth
|
||
|
add ecx, 00FH
|
||
|
and ecx, 0FFFFFFF0H
|
||
|
mov [esp + WIDTHX16], ecx
|
||
|
mov edx, [esp + OUTPUT_WIDTH]
|
||
|
sub ecx, edx
|
||
|
mov [esp + WIDTH_DIFF], ecx
|
||
|
|
||
|
// byte_ypitch_adj -= width_diff
|
||
|
|
||
|
mov edx, [esp + BYTE_YPITCH_ADJ]
|
||
|
sub edx, ecx
|
||
|
mov [esp + BYTE_YPITCH_ADJ], edx
|
||
|
|
||
|
// byte_uvpitch_adj -= (width_diff >> 1)
|
||
|
|
||
|
mov edx, [esp + BYTE_UVPITCH_ADJ]
|
||
|
shr ecx, 1
|
||
|
sub edx, ecx
|
||
|
mov [esp + BYTE_UVPITCH_ADJ], edx
|
||
|
|
||
|
// heightx16 = (lpbiInput->biHeight + 0xF) & ~0xF
|
||
|
// height_diff = heightx16 - OutputHeight
|
||
|
|
||
|
mov ecx, (LPBITMAPINFOHEADER)[ebx].biHeight
|
||
|
add ecx, 00FH
|
||
|
and ecx, 0FFFFFFF0H
|
||
|
mov [esp + HEIGHTX16], ecx
|
||
|
xor edx, edx
|
||
|
mov dx, [esp + OUTPUT_HEIGHT_WORD]
|
||
|
sub ecx, edx
|
||
|
mov [esp + HEIGHT_DIFF], ecx
|
||
|
|
||
|
Lif_done:
|
||
|
|
||
|
// BackTwoLines = -(lpbiInput->biWidth + OutputWidth);
|
||
|
// BackTwoLines *= lpbiInput->biBitCount
|
||
|
// BackTwoLines >>= 3
|
||
|
|
||
|
mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth
|
||
|
mov edx, [esp + OUTPUT_WIDTH]
|
||
|
add ecx, edx
|
||
|
neg ecx
|
||
|
xor edx, edx
|
||
|
mov dx, (LPBITMAPINFOHEADER)[ebx].biBitCount
|
||
|
imul ecx, edx
|
||
|
sar ecx, 3
|
||
|
mov [esp + BACK_TWO_LINES], ecx
|
||
|
|
||
|
// pnext = (U32 *)(lpInput +
|
||
|
// (((lpbiInput->biWidth * lpbiInput->biBitCount) >> 3)) *
|
||
|
// ((OutputHeight - aspect - 1) + height_adj)) +
|
||
|
// width_adj)
|
||
|
// assign (esi, pnext)
|
||
|
|
||
|
mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth
|
||
|
xor edx, edx
|
||
|
mov dx, (LPBITMAPINFOHEADER)[ebx].biBitCount
|
||
|
imul ecx, edx
|
||
|
shr ecx, 3
|
||
|
xor edx, edx
|
||
|
mov dx, [esp + OUTPUT_HEIGHT_WORD]
|
||
|
sub edx, [esp + ASPECT]
|
||
|
dec edx
|
||
|
add edx, [esp + HEIGHT_ADJ]
|
||
|
imul ecx, edx
|
||
|
add ecx, [esp + WIDTH_ADJ]
|
||
|
add ecx, [esp + LP_INPUT]
|
||
|
mov esi, ecx
|
||
|
|
||
|
// assign (edi, YPlane)
|
||
|
mov edi, [esp + YPLANE]
|
||
|
// for (j = 0; j < LumaIters; j++)
|
||
|
xor eax, eax
|
||
|
mov [esp + LOOP_J], eax
|
||
|
// for (k = 0; k < mark; k++)
|
||
|
L4:
|
||
|
xor eax, eax
|
||
|
mov [esp + LOOP_K], eax
|
||
|
// for (i = OutputWidth; i > 0; i -= 4, pnext += 16)
|
||
|
L5:
|
||
|
mov eax, [esp + OUTPUT_WIDTH]
|
||
|
mov [esp + LOOP_I], eax
|
||
|
// This jump is here to make sure the following loop starts in the U pipe
|
||
|
jmp L6
|
||
|
L6:
|
||
|
// ---------------------
|
||
|
// | | R1 | G1 | B1 | pnext[0]
|
||
|
// ---------------------
|
||
|
// | | R2 | G2 | B2 | pnext[1]
|
||
|
// ---------------------
|
||
|
// | | R3 | G3 | B3 | pnext[2]
|
||
|
// ---------------------
|
||
|
// | | R4 | G4 | B4 | pnext[3]
|
||
|
// ---------------------
|
||
|
|
||
|
// t0 = pnext[0]
|
||
|
// t1 = pnext[1]
|
||
|
// t = ( BYUV[(t1>> 1)&0x7F].YU +
|
||
|
// GYUV[(t1>> 9)&0x7F].YU +
|
||
|
// RYUV[(t1>>17)&0x7F].YU )
|
||
|
// *(YPlane+1) = ((t>>8)+8)
|
||
|
// t = ( BYUV[(t0>> 1)&0x7F].YU +
|
||
|
// GYUV[(t0>> 9)&0x7F].YU +
|
||
|
// RYUV[(t0>>17)&0x7F].YU )
|
||
|
// *YPlane = ((t>>8)+8)
|
||
|
// assign(eax: B2,Y1,Y2,U)
|
||
|
// assign(ebx: B1,V)
|
||
|
// assign(ecx: G2,G1)
|
||
|
// assign(edx: R2,R1)
|
||
|
// assign(ebp: B1)
|
||
|
|
||
|
// 1
|
||
|
mov ebx, [esi]
|
||
|
mov ecx, [esi + 4]
|
||
|
// 2
|
||
|
mov eax, ecx
|
||
|
mov edx, ecx
|
||
|
// 3
|
||
|
shr eax, 1
|
||
|
and ecx, 0xFE00
|
||
|
// 4
|
||
|
shr ecx, 9
|
||
|
and eax, 0x7F
|
||
|
// 5
|
||
|
shr edx, 17
|
||
|
nop
|
||
|
// 6
|
||
|
mov eax, [BYUV+eax*8].YU
|
||
|
and edx, 0x7F
|
||
|
// 7
|
||
|
add eax, [GYUV+ecx*8].YU
|
||
|
mov ecx, ebx
|
||
|
// 8
|
||
|
add eax, [RYUV+edx*8].YU
|
||
|
mov edx, ebx
|
||
|
// 9
|
||
|
shr ebx, 1
|
||
|
add eax, 0x800
|
||
|
// 10
|
||
|
sar eax, 8
|
||
|
and ecx, 0xFE00
|
||
|
// 11
|
||
|
shr ecx, 9
|
||
|
and ebx, 0x7F
|
||
|
// 12
|
||
|
shr edx, 17
|
||
|
mov [edi + 1], al
|
||
|
// 13
|
||
|
mov eax, [BYUV+ebx*8].YU
|
||
|
and edx, 0x7F
|
||
|
// 14
|
||
|
add eax, [GYUV+ecx*8].YU
|
||
|
mov ebp, ebx
|
||
|
// 15
|
||
|
add eax, [RYUV+edx*8].YU
|
||
|
nop
|
||
|
// 16
|
||
|
sar eax, 8
|
||
|
mov ebx, [esp + LOOP_K]
|
||
|
// 17
|
||
|
add eax, 8
|
||
|
and ebx, 1
|
||
|
// 18
|
||
|
mov [edi], al
|
||
|
jnz L9
|
||
|
|
||
|
// At this point, ebp: B1, ecx: G1, edx: R1
|
||
|
// t0 = pnext[0]
|
||
|
// *UPlane++ = ((t>>24)+64)
|
||
|
// t = ( RYUV[(t0>>17)&0x7F].V +
|
||
|
// GYUV[(t0>> 9)&0x7F].V +
|
||
|
// BYUV[(t0>> 1)&0x7F].V )
|
||
|
// *VPlane++ = ((t>>8)+64)
|
||
|
|
||
|
// 19
|
||
|
mov ebx, [RYUV+edx*8].V
|
||
|
mov edx, [esp + UPLANE]
|
||
|
// 20
|
||
|
sar eax, 16
|
||
|
add ebx, [GYUV+ecx*8].V
|
||
|
// 21
|
||
|
add eax, 64
|
||
|
add ebx, [BYUV+ebp*8].V
|
||
|
// 22
|
||
|
mov [edx], al
|
||
|
inc edx
|
||
|
// 23
|
||
|
mov [esp + UPLANE], edx
|
||
|
mov edx, [esp + VPLANE]
|
||
|
// 24
|
||
|
sar ebx, 8
|
||
|
inc edx
|
||
|
// 25
|
||
|
add ebx, 64
|
||
|
mov [esp + VPLANE], edx
|
||
|
// 26
|
||
|
mov [edx - 1], bl
|
||
|
nop
|
||
|
|
||
|
L9:
|
||
|
// ---------------------
|
||
|
// | | R1 | G1 | B1 | pnext[0]
|
||
|
// ---------------------
|
||
|
// | | R2 | G2 | B2 | pnext[1]
|
||
|
// ---------------------
|
||
|
// | | R3 | G3 | B3 | pnext[2]
|
||
|
// ---------------------
|
||
|
// | | R4 | G4 | B4 | pnext[3]
|
||
|
// ---------------------
|
||
|
|
||
|
// t2 = pnext[2]
|
||
|
// t3 = pnext[3]
|
||
|
// t = ( BYUV[(t3>> 1)&0x7F].YU +
|
||
|
// GYUV[(t3>> 9)&0x7F].YU +
|
||
|
// RYUV[(t3>>17)&0x7F].YU )
|
||
|
// *(YPlane+3) = ((t>>8)+8)
|
||
|
// t = ( BYUV[(t2>> 1)&0x7F].YU +
|
||
|
// GYUV[(t2>> 9)&0x7F].YU +
|
||
|
// RYUV[(t2>>17)&0x7F].YU )
|
||
|
// *(YPlane+2) = ((t>>8)+8)
|
||
|
// YPlane += 4
|
||
|
// assign(eax: B4,Y3,Y4,U)
|
||
|
// assign(ebx: R3,V)
|
||
|
// assign(ecx: G4,G3)
|
||
|
// assign(edx: R4/B3)
|
||
|
// assign(ebp: R3)
|
||
|
|
||
|
// 27
|
||
|
mov ebx, [esi + 8]
|
||
|
mov ecx, [esi + 12]
|
||
|
// 28
|
||
|
mov eax, ecx
|
||
|
mov edx, ecx
|
||
|
// 29
|
||
|
shr eax, 1
|
||
|
and ecx, 0xFE00
|
||
|
// 30
|
||
|
shr ecx, 9
|
||
|
and eax, 0x7F
|
||
|
// 31
|
||
|
shr edx, 17
|
||
|
nop
|
||
|
// 32
|
||
|
mov eax, [BYUV+eax*8].YU
|
||
|
and edx, 0x7F
|
||
|
// 33
|
||
|
add eax, [GYUV+ecx*8].YU
|
||
|
mov ecx, ebx
|
||
|
// 34
|
||
|
add eax, [RYUV+edx*8].YU
|
||
|
mov edx, ebx
|
||
|
// 35
|
||
|
shr ebx, 1
|
||
|
add eax, 0x800
|
||
|
// 36
|
||
|
sar eax, 8
|
||
|
and ebx, 0x7F
|
||
|
// 37
|
||
|
shr ecx, 9
|
||
|
mov [edi + 3], al
|
||
|
// 38
|
||
|
shr edx, 17
|
||
|
and ecx, 0x7F
|
||
|
// 39
|
||
|
mov eax, [BYUV+ebx*8].YU
|
||
|
and edx, 0x7F
|
||
|
// 40
|
||
|
add eax, [GYUV+ecx*8].YU
|
||
|
mov ebp, ebx
|
||
|
// 41
|
||
|
add eax, [RYUV+edx*8].YU
|
||
|
nop
|
||
|
// 42
|
||
|
sar eax, 8
|
||
|
mov ebx, [esp + LOOP_K]
|
||
|
// 43
|
||
|
add eax, 8
|
||
|
and ebx, 1
|
||
|
// 44
|
||
|
mov [edi + 2], al
|
||
|
jnz L16
|
||
|
|
||
|
// At this point, ebp: R3, ecx: G3, edx: B3
|
||
|
// t1 = pnext[1]
|
||
|
// t2 = pnext[2]
|
||
|
// *UPlane++ = ((t>>16)+64)
|
||
|
// t = ( RYUV[(t2>> 1)&0x7F].V +
|
||
|
// GYUV[t1>>25].V +
|
||
|
// BYUV[(t1>>17)&0x7F].V )
|
||
|
// *VPlane++ = ((t>>8)+64)
|
||
|
|
||
|
// 45
|
||
|
mov ebx, [RYUV+edx*8].V
|
||
|
mov edx, [esp + UPLANE]
|
||
|
// 46
|
||
|
sar eax, 16
|
||
|
add ebx, [GYUV+ecx*8].V
|
||
|
// 47
|
||
|
add eax, 64
|
||
|
add ebx, [BYUV+ebp*8].V
|
||
|
// 48
|
||
|
mov [edx], al
|
||
|
inc edx
|
||
|
// 49
|
||
|
mov [esp + UPLANE], edx
|
||
|
mov edx, [esp + VPLANE]
|
||
|
// 50
|
||
|
sar ebx, 8
|
||
|
inc edx
|
||
|
// 51
|
||
|
add ebx, 64
|
||
|
mov [esp + VPLANE], edx
|
||
|
// 52
|
||
|
mov [edx - 1], bl
|
||
|
nop
|
||
|
L16:
|
||
|
// 53
|
||
|
mov eax, [esp + LOOP_I]
|
||
|
lea esi, [esi + 16]
|
||
|
// 54
|
||
|
sub eax, 4
|
||
|
lea edi, [edi + 4]
|
||
|
// 55
|
||
|
mov [esp + LOOP_I], eax
|
||
|
jnz L6
|
||
|
|
||
|
// Assembler version of C_WIDTH_DIFF
|
||
|
// if (width_diff)
|
||
|
mov eax, [esp + WIDTH_DIFF]
|
||
|
mov edx, eax
|
||
|
test eax, eax
|
||
|
jz Lno_width_diff
|
||
|
// tm = (*(YPlane-1)) << 24
|
||
|
// tm |= (tm>>8) | (tm>>16) | (tm>>24)
|
||
|
mov bl, [edi - 1]
|
||
|
shl ebx, 24
|
||
|
mov ecx, ebx
|
||
|
shr ebx, 8
|
||
|
or ecx, ebx
|
||
|
shr ebx, 8
|
||
|
or ecx, ebx
|
||
|
shr ebx, 8
|
||
|
or ecx, ebx
|
||
|
// *(U32 *)YPlane = tm
|
||
|
mov [edi], ecx
|
||
|
// if ((width_diff-4) > 0)
|
||
|
sub eax, 4
|
||
|
jz Lupdate_YPlane
|
||
|
// *(U32 *)(YPlane + 4) = tm
|
||
|
mov [edi + 4], ecx
|
||
|
sub eax, 4
|
||
|
// if ((width_diff-8) > 0)
|
||
|
jz Lupdate_YPlane
|
||
|
// *(U32 *)(YPlane + 8) = tm
|
||
|
mov [edi + 8], ecx
|
||
|
Lupdate_YPlane:
|
||
|
// YPlane += width_diff
|
||
|
lea edi, [edi + edx]
|
||
|
///if (0 == (k&1))
|
||
|
mov eax, [esp + LOOP_K]
|
||
|
test eax, 1
|
||
|
jnz Lno_width_diff
|
||
|
// t8u = *(UPlane-1)
|
||
|
// t8v = *(VPlane-1)
|
||
|
// *UPlane++ = t8u
|
||
|
// *UPlane++ = t8u
|
||
|
// *VPlane++ = t8v
|
||
|
// *VPlane++ = t8v
|
||
|
mov ebp, edx
|
||
|
mov eax, [esp + UPLANE]
|
||
|
mov ebx, [esp + VPLANE]
|
||
|
mov cl, [eax - 1]
|
||
|
mov ch, [ebx - 1]
|
||
|
mov [eax], cl
|
||
|
mov [eax + 1], cl
|
||
|
mov [ebx], ch
|
||
|
mov [ebx + 1], ch
|
||
|
// if ((width_diff-4) > 0)
|
||
|
sub ebp, 4
|
||
|
jz Lupdate_UVPlane
|
||
|
// *UPlane++ = t8u
|
||
|
// *UPlane++ = t8u
|
||
|
// *VPlane++ = t8v
|
||
|
// *VPlane++ = t8v
|
||
|
mov [eax + 2], cl
|
||
|
mov [eax + 3], cl
|
||
|
mov [ebx + 2], ch
|
||
|
mov [ebx + 3], ch
|
||
|
// if ((width_diff-8) > 0)
|
||
|
sub ebp, 4
|
||
|
jz Lupdate_UVPlane
|
||
|
// *UPlane++ = t8u
|
||
|
// *UPlane++ = t8u
|
||
|
// *VPlane++ = t8v
|
||
|
// *VPlane++ = t8v
|
||
|
mov [eax + 4], cl
|
||
|
mov [eax + 5], cl
|
||
|
mov [ebx + 4], ch
|
||
|
mov [ebx + 5], ch
|
||
|
Lupdate_UVPlane:
|
||
|
shr edx, 1
|
||
|
lea eax, [eax + edx]
|
||
|
mov [esp + UPLANE], eax
|
||
|
lea ebx, [ebx + edx]
|
||
|
mov [esp + VPLANE], ebx
|
||
|
Lno_width_diff:
|
||
|
|
||
|
// if (stretch && (0 == k) && j)
|
||
|
mov eax, [esp + STRETCH]
|
||
|
test eax, eax
|
||
|
jz L21
|
||
|
mov eax, [esp + LOOP_K]
|
||
|
test eax, eax
|
||
|
jnz L21
|
||
|
mov eax, [esp + LOOP_J]
|
||
|
test eax, eax
|
||
|
jz L21
|
||
|
|
||
|
// spill YPlane ptr
|
||
|
mov [esp + YPLANE], edi
|
||
|
nop
|
||
|
|
||
|
// for (i = OutputWidth; i > 0; i -= 8)
|
||
|
// assign (ebx, pyprev)
|
||
|
// assign (ecx, t)
|
||
|
// assign (edx, pynext)
|
||
|
// assign (edi, pyspace)
|
||
|
// assign (ebp, i)
|
||
|
|
||
|
// make sure offsets are such that there are no bank conflicts here
|
||
|
mov ebx, [esp + PYPREV]
|
||
|
mov edi, [esp + PYSPACE]
|
||
|
|
||
|
mov edx, [esp + PYNEXT]
|
||
|
mov ebp, [esp + OUTPUT_WIDTH]
|
||
|
|
||
|
// t = (*pyprev++ & 0xFEFEFEFE) >> 1
|
||
|
// t += (*pynext++ & 0xFEFEFEFE) >> 1
|
||
|
// *pyspace++ = t
|
||
|
// t = (*pyprev++ & 0xFEFEFEFE) >> 1
|
||
|
// t += (*pynext++ & 0xFEFEFEFE) >> 1
|
||
|
// *pyspace++ = t
|
||
|
L22:
|
||
|
// 1
|
||
|
mov eax, [ebx]
|
||
|
lea ebx, [ebx + 4]
|
||
|
// 2
|
||
|
mov ecx, [edx]
|
||
|
lea edx, [edx + 4]
|
||
|
// 3
|
||
|
shr ecx, 1
|
||
|
and eax, 0xFEFEFEFE
|
||
|
// 4
|
||
|
shr eax, 1
|
||
|
and ecx, 0x7F7F7F7F
|
||
|
// 5
|
||
|
add eax, ecx
|
||
|
mov ecx, [ebx]
|
||
|
// 6
|
||
|
shr ecx, 1
|
||
|
mov [edi], eax
|
||
|
// 7
|
||
|
mov eax, [edx]
|
||
|
and ecx, 0x7F7F7F7F
|
||
|
// 8
|
||
|
shr eax, 1
|
||
|
lea edi, [edi + 4]
|
||
|
// 9
|
||
|
and eax, 0x7F7F7F7F
|
||
|
lea ebx, [ebx + 4]
|
||
|
// 10
|
||
|
lea edx, [edx + 4]
|
||
|
add eax, ecx
|
||
|
// 11
|
||
|
mov [edi], eax
|
||
|
lea edi, [edi + 4]
|
||
|
// 12
|
||
|
sub ebp, 8
|
||
|
jnz L22
|
||
|
// kill (ebx, pyprev)
|
||
|
// kill (ecx, t)
|
||
|
// kill (edx, pynext)
|
||
|
// kill (edi, pyspace)
|
||
|
// kill (ebp, i)
|
||
|
|
||
|
// restore YPlane
|
||
|
mov edi, [esp + YPLANE]
|
||
|
|
||
|
// pnext += BackTwoLines
|
||
|
L21:
|
||
|
add esi, [esp + BACK_TWO_LINES]
|
||
|
// YPlane += byte_ypitch_adj;
|
||
|
add edi, [esp + BYTE_YPITCH_ADJ]
|
||
|
// if(0 == (k&1))
|
||
|
mov eax, [esp + LOOP_K]
|
||
|
and eax, 1
|
||
|
jnz L23
|
||
|
// UPlane += byte_uvpitch_adj;
|
||
|
// VPlane += byte_uvpitch_adj;
|
||
|
mov eax, [esp + BYTE_UVPITCH_ADJ]
|
||
|
add [esp + UPLANE], eax
|
||
|
add [esp + VPLANE], eax
|
||
|
|
||
|
L23:
|
||
|
inc DWORD PTR [esp + LOOP_K]
|
||
|
mov eax, [esp + LOOP_K]
|
||
|
cmp eax, [esp + MARK]
|
||
|
jl L5
|
||
|
|
||
|
// if (stretch)
|
||
|
cmp DWORD PTR [esp + STRETCH], 0
|
||
|
je L24
|
||
|
// pyprev = YPlane - pitch
|
||
|
mov eax, edi
|
||
|
sub eax, [esp + PITCH_PARM]
|
||
|
mov [esp + PYPREV], eax
|
||
|
// pyspace = YPlane
|
||
|
mov [esp + PYSPACE], edi
|
||
|
// pynext = (YPlane += pitch)
|
||
|
add edi, [esp + PITCH_PARM]
|
||
|
mov [esp + PYNEXT], edi
|
||
|
|
||
|
L24:
|
||
|
inc DWORD PTR [esp + LOOP_J]
|
||
|
mov eax, [esp + LOOP_J]
|
||
|
cmp eax, [esp + LUMA_ITERS]
|
||
|
jl L4
|
||
|
|
||
|
// kill (esi, pnext)
|
||
|
// kill (edi, YPlane)
|
||
|
|
||
|
// ASM version of C_HEIGHT_FILL
|
||
|
// if (height_diff)
|
||
|
mov eax, [esp + HEIGHT_DIFF]
|
||
|
test eax, eax
|
||
|
jz Lno_height_diff
|
||
|
|
||
|
// pyspace = (U32 *)YPlane
|
||
|
mov esi, edi
|
||
|
// pyprev = (U32 *)(YPlane - pitch)
|
||
|
sub esi, [esp + PITCH_PARM]
|
||
|
// for (j = height_diff; j > 0; j--)
|
||
|
Lheight_yfill_loop:
|
||
|
mov ebx, [esp + WIDTHX16]
|
||
|
// for (i = widthx16; i>0; i -=4)
|
||
|
Lheight_yfill_row:
|
||
|
// *pyspace++ = *pyprev++
|
||
|
mov ecx, [esi]
|
||
|
lea esi, [esi + 4]
|
||
|
mov [edi], ecx
|
||
|
lea edi, [edi + 4]
|
||
|
sub ebx, 4
|
||
|
jnz Lheight_yfill_row
|
||
|
// pyspace += word_ypitch_adj
|
||
|
// pyprev += word_ypitch_adj
|
||
|
add esi, [esp + BYTE_YPITCH_ADJ]
|
||
|
add edi, [esp + BYTE_YPITCH_ADJ]
|
||
|
dec eax
|
||
|
jnz Lheight_yfill_loop
|
||
|
|
||
|
mov eax, [esp + HEIGHT_DIFF]
|
||
|
mov edi, [esp + UPLANE]
|
||
|
// puvspace = (U32 *)UPlane
|
||
|
mov esi, edi
|
||
|
// puvprev = (U32 *)(UPlane - pitch)
|
||
|
sub esi, [esp + PITCH_PARM]
|
||
|
// for (j = height_diff; j > 0; j -= 2)
|
||
|
Lheight_ufill_loop:
|
||
|
mov ebx, [esp + WIDTHX16]
|
||
|
// for (i = widthx16; i>0; i -= 8)
|
||
|
Lheight_ufill_row:
|
||
|
// *puvspace++ = *puvprev++
|
||
|
mov ecx, [esi]
|
||
|
mov [edi], ecx
|
||
|
lea esi, [esi + 4]
|
||
|
lea edi, [edi + 4]
|
||
|
sub ebx, 8
|
||
|
jnz Lheight_ufill_row
|
||
|
// puvspace += word_uvpitch_adj
|
||
|
// puvprev += word_uvpitch_adj
|
||
|
add esi, [esp + BYTE_UVPITCH_ADJ]
|
||
|
add edi, [esp + BYTE_UVPITCH_ADJ]
|
||
|
sub eax, 2
|
||
|
jnz Lheight_ufill_loop
|
||
|
|
||
|
mov eax, [esp + HEIGHT_DIFF]
|
||
|
mov edi, [esp + VPLANE]
|
||
|
// puvspace = (U32 *)VPlane
|
||
|
mov esi, edi
|
||
|
// puvprev = (U32 *)(VPlane - pitch)
|
||
|
sub esi, [esp + PITCH_PARM]
|
||
|
// for (j = height_diff; j > 0; j -= 2)
|
||
|
Lheight_vfill_loop:
|
||
|
mov ebx, [esp + WIDTHX16]
|
||
|
// for (i = widthx16; i>0; i -= 8)
|
||
|
Lheight_vfill_row:
|
||
|
// *puvspace++ = *puvprev++
|
||
|
mov ecx, [esi]
|
||
|
mov [edi], ecx
|
||
|
lea esi, [esi + 4]
|
||
|
lea edi, [edi + 4]
|
||
|
sub ebx, 8
|
||
|
jnz Lheight_vfill_row
|
||
|
// puvspace += word_uvpitch_adj
|
||
|
// puvprev += word_uvpitch_adj
|
||
|
add esi, [esp + BYTE_UVPITCH_ADJ]
|
||
|
add edi, [esp + BYTE_UVPITCH_ADJ]
|
||
|
sub eax, 2
|
||
|
jnz Lheight_vfill_loop
|
||
|
Lno_height_diff:
|
||
|
|
||
|
// if (stretch)
|
||
|
mov esi, [esp + PYPREV]
|
||
|
cmp DWORD PTR [esp + STRETCH], 0
|
||
|
je L26
|
||
|
|
||
|
// for (i = OutputWidth; i > 0; i -= 4)
|
||
|
// assign (esi, pyprev)
|
||
|
// assign (edi, pyspace)
|
||
|
// assign (ebp, i)
|
||
|
mov ebp, [esp + OUTPUT_WIDTH]
|
||
|
mov edi, [esp + PYSPACE]
|
||
|
L25:
|
||
|
mov ecx, [esi]
|
||
|
lea esi, [esi + 4]
|
||
|
mov [edi], ecx
|
||
|
lea edi, [edi + 4]
|
||
|
sub ebp, 4
|
||
|
jnz L25
|
||
|
// kill (esi, pyprev)
|
||
|
// kill (edi, pyspace)
|
||
|
// kill (ebp, i)
|
||
|
|
||
|
L26:
|
||
|
add esp, LOCALSIZE
|
||
|
pop edi
|
||
|
pop esi
|
||
|
pop ebx
|
||
|
pop ebp
|
||
|
ret
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#undef LOCALSIZE
|
||
|
|
||
|
#undef PITCH_PARM
|
||
|
#undef VPLANE
|
||
|
#undef UPLANE
|
||
|
#undef YPLANE
|
||
|
#undef LP_INPUT
|
||
|
#undef OUTPUT_HEIGHT_WORD
|
||
|
#undef OUTPUT_WIDTH_WORD
|
||
|
#undef LPBI_INPUT
|
||
|
|
||
|
#undef OUTPUT_WIDTH
|
||
|
#undef PYPREV
|
||
|
#undef PYSPACE
|
||
|
#undef PYNEXT
|
||
|
#undef PUVPREV
|
||
|
#undef PUVSPACE
|
||
|
#undef LOOP_I
|
||
|
#undef LOOP_J
|
||
|
#undef LOOP_K
|
||
|
#undef BACK_TWO_LINES
|
||
|
#undef WIDTHX16
|
||
|
#undef HEIGHTX16
|
||
|
#undef WIDTH_DIFF
|
||
|
#undef HEIGHT_DIFF
|
||
|
#undef WIDTH_ADJ
|
||
|
#undef HEIGHT_ADJ
|
||
|
#undef STRETCH
|
||
|
#undef ASPECT
|
||
|
#undef LUMA_ITERS
|
||
|
#undef MARK
|
||
|
#undef BYTE_YPITCH_ADJ
|
||
|
#undef BYTE_UVPITCH_ADJ
|
||
|
|
||
|
#endif // } H263P
|