2020-09-30 16:53:55 +02:00

316 lines
13 KiB
PHP

;////////////////////////////////////////////////////////////////////////////
;//
;// INTEL CORPORATION PROPRIETARY INFORMATION
;//
;// This software is supplied under the terms of a license
;// agreement or nondisclosure agreement with Intel Corporation
;// and may not be copied or disclosed except in accordance
;// with the terms of that agreement.
;//
;////////////////////////////////////////////////////////////////////////////
;//
;// $Header: S:\h26x\src\enc\exedtq.inv 1.15 06 Nov 1996 16:18:34 BNICKERS $
;//
;// $Log: S:\h26x\src\enc\exedtq.inv $
;//
;// Rev 1.15 06 Nov 1996 16:18:34 BNICKERS
;// Improve performance.
;//
;// Rev 1.14 18 Oct 1996 16:57:14 BNICKERS
;// Fixes for EMV
;//
;// Rev 1.13 10 Oct 1996 16:42:54 BNICKERS
;// Initial debugging of Extended Motion Vectors.
;//
;// Rev 1.12 04 Oct 1996 08:48:00 BNICKERS
;// Add EMV.
;//
;// Rev 1.11 12 Sep 1996 10:56:18 BNICKERS
;// Add arguments for thresholds and differentials.
;//
;// Rev 1.10 22 Jul 1996 15:23:32 BNICKERS
;// Reduce code size. Implement H261 spatial filter.
;//
;// Rev 1.9 25 Jun 1996 14:24:54 BNICKERS
;// Implement heuristic motion estimation for MMX, AP mode.
;//
;// Rev 1.8 14 May 1996 12:18:54 BNICKERS
;// Initial debugging of MMx B-Frame ME.
;//
;// Rev 1.7 03 May 1996 14:03:46 BNICKERS
;//
;// Minor bug fixes and integration refinements.
;//
;// Rev 1.6 02 May 1996 12:00:58 BNICKERS
;// Initial integration of B Frame ME, MMX version.
;//
;// Rev 1.5 16 Apr 1996 16:41:02 BNICKERS
;// Start adding storage for B frame ME.
;//
;// Rev 1.4 10 Apr 1996 13:14:12 BNICKERS
;// Recoding of Motion Estimation, Advanced Prediction.
;//
;// Rev 1.3 05 Apr 1996 12:27:54 BNICKERS
;// Improvements to baseline half pel ME.
;//
;// Rev 1.2 26 Mar 1996 12:00:20 BNICKERS
;// Did some tuning for MMx encode.
;//
;// Rev 1.1 20 Mar 1996 15:26:56 KLILLEVO
;// changed quantization to match IA quantization
;//
;// Rev 1.0 15 Mar 1996 15:54:14 BECHOLS
;// Initial revision.
;//
;// Rev 1.0 16 Feb 1996 17:12:12 BNICKERS
;// Initial revision.
;//
;////////////////////////////////////////////////////////////////////////////
;
; exEDTQ.inc -- Include file for MMx versions of Motion Estimation and Frame
; Differencing, Forward DC Transform, and Quant/RLE.
;
; Storage on local stack frame for variables that survive only for the duration
; of one of the four phases (Motion Est, Frame Diff, FDCT, Quant RLE). This
; storage is prime in that it is accessed by the 3-byte addressing form,
; esp+8_bit_Disp. This is particularly important for MMx instructions, which
; would be 8 bytes long if a 32-bit Displacement was used. There's a penalty
; for such a long instruction.
; (128 bytes; 32:159)
; ********************************************
; Motion Estimation Locals * THAT DO NOT SURVIVE DURING OTHER PASSES. *
; ********************************************
HalfPelMBMESWDAccum TEXTEQU <[esp+ 0+StackOffset]>; 4 QWORDs + bit bucket
BestOfFourStartingPoints TEXTEQU HalfPelMBMESWDAccum+4
BitBucket1 TEXTEQU <[esp+ 32+StackOffset]>; 8 bytes (QWORD)
StashMM6 TEXTEQU <[esp+ 32+StackOffset]>; QWORD
PartSWDForLLBlk TEXTEQU <[esp+ 32+StackOffset]>; QWORD
SWDULandLR TEXTEQU <[esp+ 40+StackOffset]>; QWORD
BitBucket2 TEXTEQU <[esp+ 48+StackOffset]>; QWORD
PartSWDForLRBlk TEXTEQU <[esp+ 48+StackOffset]>; QWORD
Addr0MVRefBlk TEXTEQU <[esp+ 48+StackOffset]>; DWORD
LimitForSWDForBlkMV TEXTEQU <[esp+ 52+StackOffset]>; DWORD
SWDURandLL TEXTEQU <[esp+ 56+StackOffset]>; QWORD
PartSWDForURBlk TEXTEQU <[esp+ 64+StackOffset]>; QWORD
SWD0MVURandLL TEXTEQU <[esp+ 72+StackOffset]>; QWORD
SWD0MVULandLR TEXTEQU <[esp+ 80+StackOffset]>; QWORD
SWDForNon0MVToBeat TEXTEQU <[esp+ 88+StackOffset]>; DWORD
BestMBFullPelSWD TEXTEQU <[esp+ 92+StackOffset]>; DWORD
BestMBHalfPelSWD TEXTEQU <[esp+ 96+StackOffset]>; DWORD
BestMBHalfPelRefAddr TEXTEQU <[esp+ 100+StackOffset]>; DWORD
BestHalfPelHorzSWD TEXTEQU <[esp+ 104+StackOffset]>; DWORD
BestHalfPelVertSWD TEXTEQU <[esp+ 108+StackOffset]>; DWORD
Addr0MVRef TEXTEQU <[esp+ 112+StackOffset]>; DWORD
BestBlockRefAddrVP1 TEXTEQU <[esp+ 116+StackOffset]>; DWORD
BestBlkFullPelSWD TEXTEQU <[esp+ 120+StackOffset]>; DWORD
SWDForBlock2Or4 TEXTEQU <[esp+ 124+StackOffset]>; DWORD
; Frame Differencing Locals, passed to FDCT.
;
; The output of frame differencing is the input to the forward DCT.
; The intermediate coefficients are also stored here. This keeps the
; addressing forms as small as possible. This is particularly important
; for MMx instructions, to keep them 7 bytes or shorter.
; (32:167)
PelDiffs TEXTEQU <[esp+StackOffset]> ; Must stay here!
PelDiffsLine0 TEXTEQU <PelDiffs>
PelDiffsLine1 TEXTEQU <PelDiffs+16>
PelDiffsLine2 TEXTEQU <PelDiffs+32>
PelDiffsLine3 TEXTEQU <PelDiffs+48>
PelDiffsLine4 TEXTEQU <PelDiffs+64>
PelDiffsLine5 TEXTEQU <PelDiffs+80>
PelDiffsLine6 TEXTEQU <PelDiffs+96>
PelDiffsLine7 TEXTEQU <PelDiffs+112>
Coeffs TEXTEQU <[esp+StackOffset+8]> ; 16 QWORDs
; *****************************************
; Local variables * THAT SURVIVE FROM ONE PASS TO ANOTHER *
; *****************************************
;
; QWORD aligned:
; (184:191)
BlockAbove TEXTEQU <[esp+StackOffset+152]> ; 2 DWORDs
; ************************************************
; Frame Differencing Locals * THAT NEED NOT SURVIVE OTHER PASSES (but do). *
; ************************************************
; These three blocks of 8*8 storage are needed for the left, right, and
; central remote prediction contributions.
; (192:383)
LeftPred TEXTEQU <[esp+StackOffset+160]>
RightPred TEXTEQU <[esp+StackOffset+224]>
CentralPred TEXTEQU <[esp+CONST_384*1+StackOffset-96]>
; Temp space used by Heuristic ME.
TargetSigContribForRowPairs TEXTEQU CentralPred
; *****************************************
; Local variables * THAT SURVIVE FROM ONE PASS TO ANOTHER *
; *****************************************
; (384:511)
DoHalfPelME TEXTEQU <[esp+CONST_384*1+StackOffset- 32]>
DoBlockLevelVectors TEXTEQU <[esp+CONST_384*1+StackOffset- 28]>
DoAdvancedPrediction TEXTEQU <[esp+CONST_384*1+StackOffset- 27]>
DoSpatialFiltering TEXTEQU <[esp+CONST_384*1+StackOffset- 26]>
IsPlainPFrame TEXTEQU <[esp+CONST_384*1+StackOffset- 25]>
TargetFrameBaseAddress TEXTEQU <[esp+CONST_384*1+StackOffset- 24]>
PreviousFrameBaseAddress TEXTEQU <[esp+CONST_384*1+StackOffset- 20]>
TargToRef TEXTEQU <[esp+CONST_384*1+StackOffset- 16]>
BFrameBaseAddress TEXTEQU <[esp+CONST_384*1+StackOffset- 12]>
SpatiallyFilteredMB TEXTEQU <BFrameBaseAddress>
BFrameToFuture TEXTEQU <[esp+CONST_384*1+StackOffset- 8]>
SpatialFiltThreshold TEXTEQU <BFrameToFuture>
PendingOBMC TEXTEQU <[esp+CONST_384*1+StackOffset- 4]>
SpatialFiltDifferential TEXTEQU <PendingOBMC>
DistToBADforBlockAbove TEXTEQU <[esp+CONST_384*1+StackOffset+ 0]>
DistToBADforBlockBelow TEXTEQU <[esp+CONST_384*1+StackOffset+ 4]>
AddrOfLeftPred TEXTEQU <[esp+CONST_384*1+StackOffset+ 8]>
AddrOfRightPred TEXTEQU <[esp+CONST_384*1+StackOffset+ 12]>
Recip2QPToUse TEXTEQU <[esp+CONST_384*1+StackOffset+ 16]>
QPDiv2 TEXTEQU <[esp+CONST_384*1+StackOffset+ 20]>
BRecip2QPToUse TEXTEQU <[esp+CONST_384*1+StackOffset+ 24]>
BQPDiv2 TEXTEQU <[esp+CONST_384*1+StackOffset+ 28]>
CodeStreamCursor TEXTEQU <[esp+CONST_384*1+StackOffset+ 32]>
BCodeStreamCursor TEXTEQU <[esp+CONST_384*1+StackOffset+ 36]>
C00Copy TEXTEQU <[esp+CONST_384*1+StackOffset+ 40]>
StashBlockType TEXTEQU <[esp+CONST_384*1+StackOffset+ 44]>
TargetMacroBlockBaseAddr TEXTEQU <[esp+CONST_384*1+StackOffset+ 48]>
BestMV TEXTEQU <[esp+CONST_384*1+StackOffset+ 52]>
BestMBHalfPelMV TEXTEQU <[esp+CONST_384*1+StackOffset+ 56]>
CandidateMV TEXTEQU <BestMBHalfPelMV>
SWDTotal TEXTEQU <[esp+CONST_384*1+StackOffset+ 60]>
BSWDTotal TEXTEQU <[esp+CONST_384*1+StackOffset+ 64]>
BlockActionDescrCursor TEXTEQU <[esp+CONST_384*1+StackOffset+ 68]>
MBlockActionStream TEXTEQU BlockActionDescrCursor
BFrmCBP TEXTEQU <[esp+CONST_384*1+StackOffset+ 72]>
PastRefPitchDiv4 TEXTEQU <[esp+CONST_384*1+StackOffset+ 76]>
CurrSWDState TEXTEQU <[esp+CONST_384*1+StackOffset+ 80]>
StashPartialRefBlkAddr TEXTEQU <[esp+CONST_384*1+StackOffset+ 84]>
StashESP TEXTEQU <[esp+CONST_384*1+StackOffset+ 92]>
; These two arrays use esp+384+96:esp+384+223, and esp+384*2-96:esp+384*2+31.
; (512:639, 704:831)
WeightForwardMotion TEXTEQU <[esp+384+StackOffset+96]>
WeightBackwardMotion TEXTEQU <[esp+384+StackOffset+160]>
; 32 more bytes of local variables here:
; (832:863)
DoHeuristicME TEXTEQU <[esp+CONST_384*2+StackOffset+ 32]>
TargetToSig_Debiased TEXTEQU <[esp+CONST_384*2+StackOffset+ 36]>
SigToTarget TEXTEQU <[esp+CONST_384*2+StackOffset+ 40]>
BFrmZeroVectorThreshold TEXTEQU <[esp+CONST_384*2+StackOffset+ 44]>
EMVLimitsForThisMB TEXTEQU <[esp+CONST_384*2+StackOffset+ 48]> ; 8
DoExtendedMotionVectors TEXTEQU <[esp+CONST_384*2+StackOffset+ 56]>
StackSpaceAvailable TEXTEQU <[esp+CONST_384*2+StackOffset+ 60]>
EXTERNDEF C0100010001000100:DWORD
EXTERNDEF C1:DWORD
EXTERNDEF C2:DWORD
EXTERNDEF C3:DWORD
EXTERNDEF C4:DWORD
EXTERNDEF C5:DWORD
EXTERNDEF C6:DWORD
EXTERNDEF C7:DWORD
EXTERNDEF Diff_IdxRefWts:BYTE
EXTERNDEF FutureWt_FF_or_00:DWORD
EXTERNDEF BFrmSWDState:BYTE
EXTERNDEF Pel_Rnd:DWORD
EXTERNDEF LeftRightBlkPosition:DWORD
EXTERNDEF UpDownBlkPosition:DWORD
EXTERNDEF BlkEmptyFlag:BYTE
EXTERNDEF NextZigZagCoeff:BYTE
C00 = 0
C04 = 8
C10 = 16
C14 = 24
C20 = 32
C24 = 40
C30 = 48
C34 = 56
C40 = 64
C44 = 72
C50 = 80
C54 = 88
C60 = 96
C64 = 104
C70 = 112
C74 = 120
Q00 = C00 ; C00
Q01 = C10 ; C00+1
Q02 = C20 ; C00+2
Q03 = C30 ; C00+3
Q04 = C40 ; C00+4
Q05 = C50 ; C00+5
Q06 = C60 ; C00+6
Q07 = C70 ; C00+7
Q10 = C00+1 ; C10
Q11 = C10+1 ; C10+1
Q12 = C20+1 ; C10+2
Q13 = C30+1 ; C10+3
Q14 = C40+1 ; C10+4
Q15 = C50+1 ; C10+5
Q16 = C60+1 ; C10+6
Q17 = C70+1 ; C10+7
Q20 = C00+2 ; C20
Q21 = C10+2 ; C20+1
Q22 = C20+2 ; C20+2
Q23 = C30+2 ; C20+3
Q24 = C40+2 ; C20+4
Q25 = C50+2 ; C20+5
Q26 = C60+2 ; C20+6
Q27 = C70+2 ; C20+7
Q30 = C00+3 ; C30
Q31 = C10+3 ; C30+1
Q32 = C20+3 ; C30+2
Q33 = C30+3 ; C30+3
Q34 = C40+3 ; C30+4
Q35 = C50+3 ; C30+5
Q36 = C60+3 ; C30+6
Q37 = C70+3 ; C30+7
Q40 = C00+4 ; C40
Q41 = C10+4 ; C40+1
Q42 = C20+4 ; C40+2
Q43 = C30+4 ; C40+3
Q44 = C40+4 ; C40+4
Q45 = C50+4 ; C40+5
Q46 = C60+4 ; C40+6
Q47 = C70+4 ; C40+7
Q50 = C00+5 ; C50
Q51 = C10+5 ; C50+1
Q52 = C20+5 ; C50+2
Q53 = C30+5 ; C50+3
Q54 = C40+5 ; C50+4
Q55 = C50+5 ; C50+5
Q56 = C60+5 ; C50+6
Q57 = C70+5 ; C50+7
Q60 = C00+6 ; C60
Q61 = C10+6 ; C60+1
Q62 = C20+6 ; C60+2
Q63 = C30+6 ; C60+3
Q64 = C40+6 ; C60+4
Q65 = C50+6 ; C60+5
Q66 = C60+6 ; C60+6
Q67 = C70+6 ; C60+7
Q70 = C00+7 ; C70
Q71 = C10+7 ; C70+1
Q72 = C20+7 ; C70+2
Q73 = C30+7 ; C70+3
Q74 = C40+7 ; C70+4
Q75 = C50+7 ; C70+5
Q76 = C60+7 ; C70+6
Q77 = C70+7 ; C70+7