Windows2000/private/shell/ext/mlang/utf7obj.cpp
2020-09-30 17:12:32 +02:00

413 lines
11 KiB
C++

// ============================================================================
// Internet Character Set Conversion: Input from UTF-7
// ============================================================================
#include "private.h"
#include "fechrcnv.h"
#include "utf7obj.h"
// Function: IsBase64
// Synopsis: We use the following table to quickly determine if we have
// a valid base64 character.
static UCHAR g_aBase64[256] =
{
/* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, */
/* 00-0f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
/* 10-1f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
/* 20-2f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63,
/* 30-3f */ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 255, 255, 255,
/* 40-4f */ 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
/* 50-5f */ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255,
/* 60-6f */ 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
/* 70-7f */ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255,
/* 80-8f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
/* 90-9f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
/* a0-af */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
/* b0-bf */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
/* c0-cf */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
/* d0-df */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
/* e0-ef */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
/* f0-ff */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
};
// Direct encoded ASCII table
static UCHAR g_aDirectChar[128] =
{
/* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, */
/* 00-0f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 72, 73, 255, 255, 74, 255, 255,
/* 10-1f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
/* 20-2f */ 71, 255, 255, 255, 255, 255, 255, 62, 63, 64, 255, 255, 65, 66, 67, 68,
/* 30-3f */ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 69, 255, 255, 255, 255, 70,
/* 40-4f */ 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
/* 50-5f */ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255,
/* 60-6f */ 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
/* 70-7f */ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255,
};
// Base64 byte value table
static UCHAR g_aInvBase64[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" };
static inline BOOL
IsBase64(UCHAR t )
{
return g_aBase64[t] < 64;
}
/***
** C O N S T R U C T O R **
***/
CInccUTF7In::CInccUTF7In(UINT uCodePage, int nCodeSet) : CINetCodeConverter(uCodePage, nCodeSet)
{
Reset(); // initialization
return ;
}
/***
** R E S E T **
***/
void CInccUTF7In::Reset()
{
m_pfnConv = ConvMain;
m_pfnCleanUp = CleanUpMain;
m_fUTF7Mode = FALSE ;
m_nBitCount = 0 ;
m_tcUnicode = 0 ;
m_nOutCount = 0 ;
return ;
}
/***
**** C O N V E R T C H A R ****
***/
HRESULT CInccUTF7In::ConvertChar(UCHAR tc, int cchSrc)
{
BOOL fDone = (this->*m_pfnConv)(tc);
if (fDone)
return S_OK;
else
return E_FAIL;
}
/***
***** C L E A N U P *****
***/
BOOL CInccUTF7In::CleanUp()
{
return (this->*m_pfnCleanUp)();
}
/***
**** C O N V M A I N ****
***/
BOOL CInccUTF7In::ConvMain(UCHAR tc)
{
BOOL fDone = TRUE;
// are we in UTF-7 mode ?
if (m_fUTF7Mode )
{
if ( IsBase64(tc) )
{
UCHAR t64, outc ;
LONG tcUnicode ;
// save the Base64 value and update bit count
t64 = g_aBase64[tc] ;
m_tcUnicode = m_tcUnicode << 6 | t64 ;
m_nBitCount += 6 ;
// see if we accumulate enough bits
if ( m_nBitCount >= 16 )
{
// get higher 16 bits data from buffer
tcUnicode = m_tcUnicode >> ( m_nBitCount - 16 ) ;
// output one Unicode char
outc = (UCHAR) tcUnicode ;
Output( outc );
outc = (UCHAR) ( tcUnicode >> 8 ) ;
fDone = Output( outc );
// update output char count
m_nOutCount ++ ;
m_nBitCount -= 16 ;
}
}
// not a Base64 char, reset UTF-7 mode
else
{
// special case +- decodes to +
if ( tc == '-' && m_nOutCount == 0 && m_nBitCount == 0 )
{
Output('+');
fDone=Output(0);
}
// absorb shiht-out char '-', otherwise output char
else if ( tc != '-')
{
Output(tc);
fDone=Output(0);
}
// reset variables and UTF7Mode
m_fUTF7Mode = FALSE ;
m_nBitCount = 0 ;
m_tcUnicode = 0 ;
m_nOutCount = 0 ;
}
}
// is it a UTF-7 shift-in char ?
else if ( tc == '+' )
{
m_fUTF7Mode = TRUE ;
m_nBitCount = 0 ;
m_tcUnicode = 0 ;
m_nOutCount = 0 ;
}
else
// convert ASCII directly to Unicode if it is not in UFT-7 mode
{
Output(tc);
fDone = Output(0);
}
return fDone;
}
/***
*** C L E A N U P M A I N ***
***/
BOOL CInccUTF7In::CleanUpMain()
{
return TRUE;
}
int CInccUTF7In::GetUnconvertBytes()
{
return 0 ;
}
DWORD CInccUTF7In::GetConvertMode()
{
DWORD dwMode ;
if ( m_fUTF7Mode )
{
dwMode = ( m_tcUnicode & 0xffff ) | ( m_nBitCount << 16 ) ;
if ( dwMode == 0 )
dwMode = 1L ; // it is ok, since bitcount is 0
}
else
dwMode = 0 ;
return dwMode;
}
void CInccUTF7In::SetConvertMode(DWORD mode)
{
Reset(); // initialization
if (mode)
{
m_fUTF7Mode = TRUE ;
m_tcUnicode = ( mode & 0x7fff );
m_nBitCount = ( mode >> 16 ) & 0xffff ;
}
else
m_fUTF7Mode = FALSE ;
}
// ============================================================================
// Internet Character Set Conversion: Output to UTF-7
// ============================================================================
/***
** C O N S T R U C T O R **
***/
CInccUTF7Out::CInccUTF7Out(UINT uCodePage, int nCodeSet) : CINetCodeConverter(uCodePage, nCodeSet)
{
Reset(); // initialization
return ;
}
/***
** R E S E T **
***/
void CInccUTF7Out::Reset()
{
m_fDoubleByte = FALSE;
m_fUTF7Mode = FALSE ;
m_nBitCount = 0 ;
m_tcUnicode = 0 ;
return;
}
HRESULT CInccUTF7Out::ConvertChar(UCHAR tc, int cchSrc)
{
BOOL fDone = TRUE;
WORD uc ;
// 2nd byte of Unicode
if (m_fDoubleByte )
{
BOOL bNeedShift ;
// compose the 16 bits char
uc = ( (WORD) tc << 8 | m_tcFirstByte ) ;
// check whether the char can be direct encoded ?
bNeedShift = uc > 0x7f ? TRUE : g_aDirectChar[(UCHAR)uc] == 255 ;
if ( bNeedShift && m_fUTF7Mode == FALSE)
{
// output Shift-in char to change to UTF-7 Mode
fDone = Output('+');
// handle special case '+-'
if ( uc == '+' ) // single byte "+"
{
fDone=Output('-');
}
else
m_fUTF7Mode = TRUE ;
}
if (m_fUTF7Mode)
{
LONG tcUnicode ;
UCHAR t64 ;
int pad_bits ;
// either write the char to the bit buffer
// or pad bit buffer out to a full base64 char
if (bNeedShift)
{
m_tcUnicode = m_tcUnicode << 16 | uc ;
m_nBitCount += 16 ;
}
// pad bit buffer out to a full base64 char
else if (m_nBitCount % 6 )
{
pad_bits = 6 - (m_nBitCount % 6 ) ;
// get to next 6 multiple, pad these bits with 0
m_tcUnicode = m_tcUnicode << pad_bits ;
m_nBitCount += pad_bits ;
}
// flush out as many full base64 char as possible
while ( m_nBitCount >= 6 && fDone )
{
tcUnicode = ( m_tcUnicode >> ( m_nBitCount - 6 ) );
t64 = (UCHAR) ( tcUnicode & 0x3f ) ;
fDone = Output(g_aInvBase64[t64]);
m_nBitCount -= 6 ;
}
if (!bNeedShift)
{
// output Shift-out char
fDone = Output('-');
m_fUTF7Mode = FALSE ;
m_nBitCount = 0 ;
m_tcUnicode = 0 ;
}
}
// the character can be directly encoded as ASCII
if (!bNeedShift)
{
fDone = Output(m_tcFirstByte);
}
m_fDoubleByte = FALSE ;
}
// 1st byte of Unicode
else
{
m_tcFirstByte = tc ;
m_fDoubleByte = TRUE ;
}
if (fDone)
return S_OK;
else
return E_FAIL;
}
/***
***** C L E A N U P *****
***/
BOOL CInccUTF7Out::CleanUp()
{
BOOL fDone = TRUE;
if (m_fUTF7Mode)
{
UCHAR t64 ;
LONG tcUnicode ;
int pad_bits ;
// pad bit buffer out to a full base64 char
if (m_nBitCount % 6 )
{
pad_bits = 6 - (m_nBitCount % 6 ) ;
// get to next 6 multiple, pad these bits with 0
m_tcUnicode = m_tcUnicode << pad_bits ;
m_nBitCount += pad_bits ;
}
// flush out as many full base64 char as possible
while ( m_nBitCount >= 6 && fDone )
{
tcUnicode = ( m_tcUnicode >> ( m_nBitCount - 6 ) );
t64 = (UCHAR) ( tcUnicode & 0x3f ) ;
fDone = Output(g_aInvBase64[t64]);
m_nBitCount -= 6 ;
}
{
// output Shift-out char
fDone = Output('-');
m_fUTF7Mode = FALSE ;
m_nBitCount = 0 ;
m_tcUnicode = 0 ;
}
}
return fDone;
}
int CInccUTF7Out::GetUnconvertBytes()
{
return m_fDoubleByte ? 1 : 0 ;
}
DWORD CInccUTF7Out::GetConvertMode()
{
return 0 ;
}
void CInccUTF7Out::SetConvertMode(DWORD mode)
{
Reset(); // initialization
return ;
}