2020-09-30 16:53:55 +02:00

717 lines
16 KiB
C

//
// ITU-T G.723 Floating Point Speech Coder ANSI C Source Code. Version 1.00
// copyright (c) 1995, AudioCodes, DSP Group, France Telecom,
// Universite de Sherbrooke, Intel Corporation. All rights reserved.
//
#include <stdio.h>
#include <math.h>
#include "opt.h"
#include "typedef.h"
#include "cst_lbc.h"
#include "tab_lbc.h"
#include "util_lbc.h"
#include "lsp.h"
#include "timer.h"
#include "mmxutil.h"
#if COMPILE_MMX
// This file includes all the Lsp related functions
//--------------------------------------------------------------
int mult(short x, short y)
{
return ( ((int)x)*((int)y) >> 16 );
}
//--------------------------------------------------------------
int LspSearchInt(short *Lspw, short *LspTab)
{
#if ASM_SVQ
int mem8000[2] = {0x80008000,0x80008000}, zero[2] = {0,0};
short maxes[4],mx;
int retu;
int *ptr,tmp,t,i,k;
#define lsp esi
#define tab edi
#define idx edx
#define pidx eax // packed index: n+3 n+2 n+1 n
#define maxi ecx
#define r0 mm0
#define r1 mm1
#define r2 mm2
#define r3 mm3
#define r4 mm4
#define max mm5
// Expand Lspw table so it's 0000 1111 2222 3333 4444 5555 6666 7777
ptr = (int *)Lspw;
k = 14;
for (i=7; i>=0; i--)
{
t = Lspw[i]; t = t & 0xffff;
tmp = t;
tmp = (tmp << 16) | t;
ptr[k] = ptr[k+1] = tmp;
k -= 2;
}
// Need LspTab to be ordered 0 4 8 12 1 5 9 13 etc.
// so that lo word of each qword sees 0123, next sees 4567, etc.
//
// 'idx' counts iterations, so it goes 0 to 256 in steps of 4.
// The 4 LspTab sets at any given time are 8*idx, 8*idx+8, +16, +24
// Lspw[n] is lsp+8*n, as defined below
#define a(n) [tab+8*idx+8*n]
#define b(n) [lsp+8*n]
ASM
{
push lsp;
push tab;
push idx;
push pidx;
push maxi;
mov lsp,Lspw;
mov tab,LspTab;
xor idx,idx;
xor maxi,maxi;
mov pidx,003020100h;
movq max,mem8000;
/*
The code below is interleaved with k=1.
The structure is: four levels of indentation, one for each of the
4 terms of the sum. The instructions completely left-justified
are the part of the loop that's wrapped around on itself.
Note that the code reads 8 bytes past the end of the LspTab that's
passed to it. This is made o.k. by making the table that's passed
to it have 8 dummy bytes at the end.
*/
// Start up the pipeline
movq r0,a(0);
movq r1,r0;
pmulhw r0,b(4);
psubsw r1,b(0);
movq r2,a(1);
movq r3,r2;
psllw r0,2;
pmulhw r0,r1;
pmulhw r2,b(5);
psubsw r3,b(1);
movq r1,a(2);
psllw r2,2;
pmulhw r2,r3;
movq r3,r1;
pmulhw r1,b(6);
psubsw r3,b(2);
paddw r0,r2;
movq r2,a(3);
psllw r1,2;
pmulhw r1,r3;
movq r3,r2;
pmulhw r2,b(7);
psubsw r3,b(3);
paddw r1,r0;
loop1:
movq r0,a(4);
movq r4,r1; // save accum so not wiped out by first half of loop
psllw r2,2;
movq r1,r0;
pmulhw r2,r3;
pmulhw r0,b(4);
psubsw r1,b(0);
paddw r4,r2; // now final answer is in r4
movq r2,a(5);
paddw r4,mem8000 // make final sum unsigned
movq r3,r2; // 0123
psllw r0,2;
psubusw max,r4 // start to compute max
pmulhw r0,r1; // 0.23
pmulhw r2,b(5);
paddw max,r4 // max now done
psubsw r3,b(1);
pcmpeqw r4,max // now 1111's means a new max was found
movq r1,a(6); // 0123
packsswb r4,r4; // put all fields in low 32 bits
psllw r2,2;
movd ebx,r4;
pmulhw r2,r3; // 012.
xor ebx,0ffffffffh; // invert mask
and maxi,ebx; // get old index to keep
movq r3,r1; // 0123
pmulhw r1,b(6);
psubsw r3,b(2);
xor ebx,0ffffffffh; // invert mask
paddw r0,r2; // 01.3
and ebx,pidx; // get new index
psllw r1,2;
movq r2,a(7);// 0123
pmulhw r1,r3; // 012.
movq r3,r2; // 0123
or maxi,ebx; // now maxi is done
pmulhw r2,b(7);
psubsw r3,b(3);
paddw r1,r0; // .123
add idx,4;
add pidx,004040404h;
cmp idx,256;
jl loop1;
psubw max,mem8000;
mov retu,maxi;
movq maxes,max;
pop maxi;
pop pidx;
pop idx;
pop tab;
pop lsp;
}
// find which of the 4 maxes is the max, and return the appropriate
// one of the 4 maxindices.
mx = maxes[0]; t = 0;
if (maxes[1] >= mx) { mx = maxes[1]; t = 8; }
if (maxes[2] >= mx) { mx = maxes[2]; t = 16; }
if (maxes[3] >= mx) { mx = maxes[3]; t = 24; }
ASM emms;
retu = (retu >> t) & 0xff;
return(retu);
#undef a
#undef b
#undef idx
#undef lsp
#undef tab
#undef max
#undef maxi
#else // if assembly code not selected, use C code
int Indx[4],i,s,ret;
short Max[4],Err,mx;
short m0,m1,m2,m3,m4,m5,m6,m7,t;
for (i=0; i<4; i++)
{
Max[i] = Indx[i] = 0;
}
for (i=0; i < LspCbSize; i++)
{
s = (i&3);
m0 = mult(Lspw[4],LspTab[s+0]);
t = LspTab[s+0]-Lspw[0]; m1 = mult(t,m0<<2);
m2 = mult(Lspw[5],LspTab[s+4]);
t = LspTab[s+4]-Lspw[1]; m3 = mult(t,m2<<2);
m4 = mult(Lspw[6],LspTab[s+8]);
t = LspTab[s+8]-Lspw[2]; m5 = mult(t,m4<<2);
m6 = mult(Lspw[7],LspTab[s+12]);
t = LspTab[s+12]-Lspw[3]; m7 = mult(t,m6<<2);
Err = m1+m3+m5+m7;
if (Err >= Max[s])
{
Max[s] = Err;
Indx[s] = i;
}
if (s==3)
LspTab += 16;
}
mx = Max[0]; ret = Indx[0];
if (Max[1] >= mx) { mx = Max[1]; ret = Indx[1]; }
if (Max[2] >= mx) { mx = Max[2]; ret = Indx[2]; }
if (Max[3] >= mx) { mx = Max[3]; ret = Indx[3]; }
return(ret);
#endif
}
//--------------------------------------------------------------
Word32 Svq_Int(float *Lsp, float *Wvect)
{
#define LSP_SCALE 256
int i;
Word32 Rez;
int z;
short Wint[LpcOrder],LspTemp[LpcOrder];
DECLARE_SHORT(Lspw,32);
ALIGN_ARRAY(Lspw);
for (i=0; i<LpcOrder; i++)
{
z = (int)(LSP_SCALE*2*Lsp[i]);
if (z > 32767) LspTemp[i] = 32767;
else if (z < -32768) LspTemp[i] = -32768;
else LspTemp[i] = z;
}
FloatToShortScaled(Wvect,Wint,10,0);
Rez = (Word32) 0;
// For each of the 3 bands
Lspw[0] = LspTemp[0]; Lspw[1] = LspTemp[1]; Lspw[2] = LspTemp[2];
Lspw[3] = 0;
Lspw[4] = -Wint[0]; Lspw[5] = -Wint[1]; Lspw[6] = -Wint[2];
Lspw[7] = 0;
Rez = LspSearchInt(Lspw,LspTableInt);
Lspw[0] = LspTemp[3]; Lspw[1] = LspTemp[4]; Lspw[2] = LspTemp[5];
Lspw[3] = 0;
Lspw[4] = -Wint[3]; Lspw[5] = -Wint[4]; Lspw[6] = -Wint[5];
Lspw[7] = 0;
Rez = (Rez<<8) + LspSearchInt(Lspw,&LspTableInt[1024]);
Lspw[0] = LspTemp[6]; Lspw[1] = LspTemp[7]; Lspw[2] = LspTemp[8];
Lspw[3] = LspTemp[9];
Lspw[4] = -Wint[6]; Lspw[5] = -Wint[7]; Lspw[6] = -Wint[8];
Lspw[7] = -Wint[9];
Rez = (Rez<<8) + LspSearchInt(Lspw,&LspTableInt[2048]);
return Rez;
}
#endif
//---------------------------------------------------------------
float Polynomial(float *Lpq, int CosPtr)
{
return(Lpq[LpcOrder]*CosineTable[0] +
Lpq[LpcOrder-2]*CosineTable[CosPtr] +
Lpq[LpcOrder-4]*CosineTable[(CosPtr*2)&(CosineTableSize-1)] +
Lpq[LpcOrder-6]*CosineTable[(CosPtr*3)&(CosineTableSize-1)] +
Lpq[LpcOrder-8]*CosineTable[(CosPtr*4)&(CosineTableSize-1)] +
Lpq[LpcOrder-10]*CosineTable[(CosPtr*5)&(CosineTableSize-1)]);
}
//--------------------------------------------------------------
void AtoLsp(float *LspVect, float *Lpc, float *PrevLsp)
{
int i,j,k;
int LspCnt;
float Lpq[LpcOrder+2];
float PrevVal,CurrVal,AbsPrev,AbsCurr;
// Small additional bandwidth expansion
for (i=0; i < LpcOrder; i++)
LspVect[i] = Lpc[i]*BandExpTable[i];
// Compute Lp and Lq
Lpq[0] = Lpq[1] = 1.0f;
for (i=0; i < LpcOrder/2; i++)
{
Lpq[2*i+2] = -Lpq[2*i+0] - LspVect[i] - LspVect[LpcOrder-1-i];
Lpq[2*i+3] = Lpq[2*i+1] - LspVect[i] + LspVect[LpcOrder-1-i];
}
Lpq[LpcOrder+0] *= 0.5f;
Lpq[LpcOrder+1] *= 0.5f;
// Do first evaluation
k = 0;
LspCnt = 0;
PrevVal = Polynomial(Lpq,0);
for (i=1; i < CosineTableSize/2; i++)
{
// Evaluate the polynomial
CurrVal = Polynomial(&Lpq[k],i);
// Test for sign change
if ((asint(CurrVal) ^ asint(PrevVal)) < 0)
{
AbsPrev = (float)fabs(PrevVal);
AbsCurr = (float)fabs(CurrVal);
LspVect[LspCnt++] = (i-1 + AbsPrev/(AbsPrev+AbsCurr));
// Check if all found
if (LspCnt == LpcOrder)
break;
// Switch the pointer, evaluate again
k ^= 1;
CurrVal = Polynomial(&Lpq[k],i);
}
PrevVal = CurrVal;
}
// Check if all Lsp found
if (LspCnt != LpcOrder)
{
for (j=0; j < LpcOrder; j++)
LspVect[j] = PrevLsp[j];
}
return;
}
//--------------------------------------------------------------
Word32 Lsp_Qnt(float *CurrLsp, float *PrevLsp, int UseMMX)
{
int i;
float Wvect[LpcOrder];
float Min,Tmp;
// Compute the weighting vector
Wvect[0] = 1.0f/(CurrLsp[1] - CurrLsp[0]);
Wvect[LpcOrder-1] = 1.0f/(CurrLsp[LpcOrder-1] - CurrLsp[LpcOrder-2]);
for (i=1; i < LpcOrder-1; i++)
{
Min = CurrLsp[i+1] - CurrLsp[i];
Tmp = CurrLsp[i] - CurrLsp[i-1];
if (Tmp < Min)
Min = Tmp;
if (Min > 0.0f)
Wvect[i] = 1.0f/Min;
else
Wvect[i] = 1.0f;
}
// Generate predicted vector as (DC-removed-Curr) - b*(DC-removed-Prev)
CurrLsp[0] = (CurrLsp[0] - LspDcTable[0]) -
LspPred0*(PrevLsp[0] - LspDcTable[0]);
CurrLsp[1] = (CurrLsp[1] - LspDcTable[1]) -
LspPred0*(PrevLsp[1] - LspDcTable[1]);
CurrLsp[2] = (CurrLsp[2] - LspDcTable[2]) -
LspPred0*(PrevLsp[2] - LspDcTable[2]);
CurrLsp[3] = (CurrLsp[3] - LspDcTable[3]) -
LspPred0*(PrevLsp[3] - LspDcTable[3]);
CurrLsp[4] = (CurrLsp[4] - LspDcTable[4]) -
LspPred0*(PrevLsp[4] - LspDcTable[4]);
CurrLsp[5] = (CurrLsp[5] - LspDcTable[5]) -
LspPred0*(PrevLsp[5] - LspDcTable[5]);
CurrLsp[6] = (CurrLsp[6] - LspDcTable[6]) -
LspPred0*(PrevLsp[6] - LspDcTable[6]);
CurrLsp[7] = (CurrLsp[7] - LspDcTable[7]) -
LspPred0*(PrevLsp[7] - LspDcTable[7]);
CurrLsp[8] = (CurrLsp[8] - LspDcTable[8]) -
LspPred0*(PrevLsp[8] - LspDcTable[8]);
CurrLsp[9] = (CurrLsp[9] - LspDcTable[9]) -
LspPred0*(PrevLsp[9] - LspDcTable[9]);
// Do the SVQ
#if COMPILE_MMX
if (UseMMX)
return Svq_Int(CurrLsp, Wvect);
else
#endif
return Lsp_Svq(CurrLsp, Wvect);
}
//--------------------------------------------------------------
Word32 Lsp_Svq(float *Lsp, float *Wvect)
{
int i,k;
Word32 Rez;
int Indx,Start,Dim;
float *LspQntPnt;
float Max,Err,lsp0,lsp1,lsp2,lsp3,w0,w1,w2,w3;
float LspTemp[LpcOrder];
for (i=0; i<LpcOrder; i++)
LspTemp[i] = 2.0f*Lsp[i];
Rez = (Word32) 0;
// For each of the 3 bands
for (k=0; k < LspQntBands; k++)
{
// Initialize the search
Max = 0.0f; //-1.0f;
Indx = 0;
LspQntPnt = BandQntTable[k];
Start = BandInfoTable[k][0];
Dim = BandInfoTable[k][1];
lsp0 = LspTemp[Start+0];
lsp1 = LspTemp[Start+1];
lsp2 = LspTemp[Start+2];
w0 = Wvect[Start+0];
w1 = Wvect[Start+1];
w2 = Wvect[Start+2];
if (k < 2)
{
for (i=0; i < LspCbSize; i++)
{
Err = (lsp0 - LspQntPnt[0])*w0*LspQntPnt[0] +
(lsp1 - LspQntPnt[1])*w1*LspQntPnt[1] +
(lsp2 - LspQntPnt[2])*w2*LspQntPnt[2];
LspQntPnt += 3;
if (asint(Err) > asint(Max))
{
Max = Err;
Indx = i;
}
}
}
else
{
lsp3 = LspTemp[Start+3];
w3 = Wvect[Start+3];
for (i=0; i < LspCbSize; i++)
{
Err = (lsp0 - LspQntPnt[0])*w0*LspQntPnt[0] +
(lsp1 - LspQntPnt[1])*w1*LspQntPnt[1] +
(lsp2 - LspQntPnt[2])*w2*LspQntPnt[2] +
(lsp3 - LspQntPnt[3])*w3*LspQntPnt[3];
LspQntPnt += 4;
if (asint(Err) > asint(Max))
{
Max = Err;
Indx = i;
}
}
}
Rez = (Rez << 8) | Indx;
}
return Rez;
}
//--------------------------------------------------------------
Flag Lsp_Inq(float *Lsp, float *PrevLsp, Word32 LspId, int Crc)
{
int i,j;
float *LspQntPnt;
float Lprd,Scon,Tmpf,Scon2;
int Tmp;
Flag Test;
if (Crc == 0)
{
Scon = 2.0f;
Lprd = LspPred0;
}
else
{
LspId = (Word32) 0;
Scon = 4.0f;
Lprd = LspPred1;
}
Scon2 = Scon - 0.03125f;
// Reconstruct the LSP vector
for (i=LspQntBands-1; i >= 0; i--)
{
Tmp = LspId & (Word32) 0x000000ff;
LspId >>= 8;
LspQntPnt = BandQntTable[i];
for (j=0; j < BandInfoTable[i][1]; j++)
Lsp[BandInfoTable[i][0] + j] = LspQntPnt[Tmp*BandInfoTable[i][1] + j];
}
// Add predicted vector and DC to decoded vector
for (j=0; j < LpcOrder; j++)
Lsp[j] = Lsp[j] + (PrevLsp[j] - LspDcTable[j])*Lprd + LspDcTable[j];
// Perform the stability check
for (i=0; i < LpcOrder; i++)
{
// Test the first and last one
if (Lsp[0] < 3.0)
Lsp[0] = 3.0f;
if (Lsp[LpcOrder-1] > 252.0f)
Lsp[LpcOrder-1] = 252.0f;
// Test the others
for (j=1; j < LpcOrder; j++)
{
Tmpf = Scon + Lsp[j-1] - Lsp[j];
if (Tmpf > 0)
{
Tmpf *= 0.5f;
Lsp[j-1] -= Tmpf;
Lsp[j] += Tmpf;
}
}
// Test if stable
Test = False;
for (j=1; j < LpcOrder; j++)
if ((Lsp[j] - Lsp[j-1]) < Scon2)
Test = True;
if (Test == False)
break;
}
if (Test == True)
for (j=0; j < LpcOrder; j++)
Lsp[j] = PrevLsp[j];
return Test;
}
//--------------------------------------------------------------
void Lsp_Int(float *QntLpc, float *CurrLsp, float *PrevLsp)
{
int i,j;
float *Dpnt;
float Fac[4] = {0.25f, 0.5f, 0.75f, 1.0f};
Dpnt = QntLpc;
for (i=0; i < SubFrames; i++)
{
// Interpolate
for (j=0; j < LpcOrder; j++)
Dpnt[j] = (1.0f - Fac[i])*PrevLsp[j] + Fac[i]*CurrLsp[j];
// Convert to Lpc
LsptoA(Dpnt);
Dpnt += LpcOrder;
}
// Copy the Lsp vector
for (i=0; i < LpcOrder; i++)
PrevLsp[i] = CurrLsp[i];
}
//--------------------------------------------------------------
void LsptoA(float *Lsp)
{
int i,j;
float P[LpcOrder/2+1];
float Q[LpcOrder/2+1];
float Fac[(LpcOrder/2)-2] = {1.0f,0.5f,0.25f};
// Convert Lsp's to cosines
for (i=0; i < LpcOrder; i++)
{
j = MyFloor(Lsp[i]);
Lsp[i] = -(CosineTable[j] +
(CosineTable[j+1]-CosineTable[j])*(Lsp[i]-j));
}
// Init P and Q. Note that P,Q * 2^26 correspond to fixed-point code
P[0] = 0.5f;
P[1] = Lsp[0] + Lsp[2];
P[2] = 1.0f + 2.0f*Lsp[0]*Lsp[2];
Q[0] = 0.5f;
Q[1] = Lsp[1] + Lsp[3];
Q[2] = 1.0f + 2.0f*Lsp[1]*Lsp[3];
// Compute all the others
for (i=2; i < LpcOrder/2; i++)
{
P[i+1] = P[i-1] + P[i]*Lsp[2*i+0];
Q[i+1] = Q[i-1] + Q[i]*Lsp[2*i+1];
// All update
for (j=i; j >= 2; j--)
{
P[j] = P[j-1]*Lsp[2*i+0] + 0.5f*(P[j]+P[j-2]);
Q[j] = Q[j-1]*Lsp[2*i+1] + 0.5f*(Q[j]+Q[j-2]);
}
// Update PQ[01]
P[0] = P[0]*0.5f;
Q[0] = Q[0]*0.5f;
P[1] = (P[1] + Lsp[2*i+0]*Fac[i-2])*0.5f;
Q[1] = (Q[1] + Lsp[2*i+1]*Fac[i-2])*0.5f;
}
// Convert to Lpc
for (i=0; i < LpcOrder/2; i++)
{
Lsp[i] = (-P[i] - P[i+1] + Q[i] - Q[i+1])*8.0f;
Lsp[LpcOrder-1-i] = (-P[i] - P[i+1] - Q[i] + Q[i+1])*8.0f;
}
}