145 lines
6.1 KiB
C++
Raw Permalink Normal View History

2001-01-01 00:00:00 +01:00
/* *************************************************************************
** INTEL Corporation Proprietary Information
**
** This listing is supplied under the terms of a license
** agreement with INTEL Corporation and may not be copied
** nor disclosed except in accordance with the terms of
** that agreement.
**
** Copyright (c) 1995, 1996 Intel Corporation.
** All Rights Reserved.
**
** *************************************************************************
*/
//////////////////////////////////////////////////////////////////////////
// $Author: AKASAI $
// $Date: 18 Mar 1996 10:47:48 $
// $Archive: S:\h26x\src\dec\d1addsp.cpv $
// $Header: S:\h26x\src\dec\d1addsp.cpv 1.1 18 Mar 1996 10:47:48 AKASAI $
// $Log: S:\h26x\src\dec\d1addsp.cpv $
//
// Rev 1.1 18 Mar 1996 10:47:48 AKASAI
// Deleted ClampTblSpecial so now uses common table ClipPixIntra.
// Added pragma code_seg("IACODE2").
//
// Rev 1.0 01 Nov 1995 13:37:58 AKASAI
// Initial revision.
//
// -------------------------------------------------------------------------
// ROUTINE NAME: BlockAddSpecial
// FILE NAME: d1addsp.cpp
//
// This routine performs a block(8 8) addition.
// output = clamp[reference + current]
//
// Input I32 *current (output of FMIDCT)
// U8 *reference (Motion Compensated address of reference)
// U8 *output (Output buffer)
//
// Assumption: reference uses 8 as pitch, output use PITCH,
// current has some other pitch, TEMPPITCH4
//
// Registers used: eax, ebx, ecx, edx, esi, edi, ebp
//
// -------------------------------------------------------------------------
#include "precomp.h"
#define TEMPPITCH4 32
extern U8 ClipPixIntra[];
#define FRAMEPOINTER esp
#define L_LOOPCOUNTER FRAMEPOINTER + 0 // 4 byte
#define LOCALSIZE 4 // keep aligned
#pragma code_seg("IACODE2")
__declspec(naked)
void BlockAddSpecial (U32 uResidual, U32 uRefBlock,U32 uDstBlock)
{
__asm {
push ebp ;// save callers frame pointer
mov ebp,esp ;// make parameters accessible
push esi ;// assumed preserved
push edi
push ebx
sub esp,LOCALSIZE ;// reserve local storage
mov esi, uRefBlock; ;// esi gets Base addr of Current
mov edi, uDstBlock ;// edi gets Base addr of OutputBuffer
mov ebp, uResidual ;// ebp gets Base addr of Reference
mov ecx, 8
xor eax, eax
// Cylces counts: 26 x 8=208 without cache miss
// czhu, 9/25/95
ALIGN 4
loop_for_i:
mov [L_LOOPCOUNTER], ecx ; save loop counter in temporary
mov ebx, [ebp+8] ; 1) fetch current[i+2]
mov al, BYTE PTR[esi+2] ; 1) fetch ref[i+2]
xor ecx, ecx ; 2)
mov cl, BYTE PTR[esi+3] ; 2) fetch ref[i+3]
mov edx, [ebp+12] ; 2) fetch current[i+3]
add eax, ebx ; 1) result2 = ref[i+2] + current[i+2]
xor ebx, ebx ; 3)
add ecx, edx ; 2) result3= ref[i+3] + current[i+3]
mov bl, BYTE PTR[esi] ; 3) fetch ref[i]
mov dl, ClipPixIntra[1024+eax] ; 1) fetch clamp[result2]
mov eax, [ebp] ; 3) fetch current[i]
add ebx, eax ; 3) result0 = ref[i] + current[i]
xor eax, eax ; 4)
mov dh, ClipPixIntra[1024+ecx] ; 2) fetch clamp[result3]
mov al, [esi+1] ; 4) fetch ref[i+1]
shl edx, 16 ; move 1st 2 results to high word
mov ecx, [ebp+4] ; 4) fetch current[i+1]
mov dl, ClipPixIntra[1024+ebx] ; 3) fetch clamp[result0]
add eax, ecx ; 4) result1 = ref[i+1] + current[i+1]
xor ecx, ecx ; 4+1)
mov ebx, [ebp+24] ; 4+1) fetch current[i+6]
mov dh, ClipPixIntra[1024+eax] ; 4) fetch clamp[result1]
mov cl, BYTE PTR[esi+6] ; 4+1) fetch ref[i+6]
mov [edi], edx ; store 4 output pixels
xor eax, eax ; 4+2)
mov al, BYTE PTR[esi+7] ; 4+2) fetch ref[i+7]
mov edx, [ebp+28] ; 4+2) fetch current[i+7]
add ecx, ebx ; 4+1) result6 = ref[i+6] + current[i+6]
xor ebx, ebx ; 4+3)
add eax, edx ; 4+2) result7= ref[i+7] + current[i+7]
mov bl, BYTE PTR[esi+4] ; 4+3) fetch ref[i+4]
mov dl, ClipPixIntra[1024+ecx] ; 4+1) fetch clamp[result6]
mov ecx, [ebp+16] ; 4+3) fetch current[i+4]
add ebx, ecx ; 4+3) result4 = ref[i+4] + current[i+4]
xor ecx, ecx ; 4+4)
mov dh, ClipPixIntra[1024+eax] ; 4+2) fetch clamp[result7]
mov cl, [esi+5] ; 4+4) fetch ref[i+5]
shl edx, 16 ; move 3rd 2 results to high word
mov eax, [ebp+20] ; 4+4) fetch current[i+5]
add ecx, eax ; 4+4) result5 = ref[i+5] + current[i+5]
add esi, 8 ; Update address of next line
mov dl, ClipPixIntra[1024+ebx] ; 4+3) fetch clamp[result4]
add ebp, TEMPPITCH4 ; Update address of current to next line
mov dh, ClipPixIntra[1024+ecx] ; 4+4) fetch clamp[result5]
mov ecx, [L_LOOPCOUNTER] ; get loop counter
mov [edi+4], edx ; store 4 output pixels
add edi, PITCH ; Update address of output to next line
xor eax, eax ; 1)
dec ecx
jnz loop_for_i
add esp,LOCALSIZE // free locals
pop ebx
pop edi
pop esi
pop ebp
ret
} //end of asm, BlockAddSpecial
} // End of BlockAddSpecial
#pragma code_seg()