382 lines
11 KiB
C
Raw Normal View History

2001-01-01 00:00:00 +01:00
#include "basedll.h"
int
UnicodeToUTF8(
IN LPCWSTR lpWideCharStr,
IN int cchWideChar,
OUT LPSTR lpUTF8Str,
IN int cbUTF8
);
int
UTF8ToUnicode(
IN LPCSTR lpUTF8Str,
IN int cbUTF8,
OUT LPWSTR lpWideCharStr,
IN int cchWideChar
);
int WINAPI MultiByteToWideChar(
IN UINT CodePage,
IN DWORD dwFlags,
IN LPCSTR lpMultiByteStr,
IN int cbMultiByte,
OUT LPWSTR lpWideCharStr,
IN int cchWideChar)
{
USHORT LengthRequires;
UNICODE_STRING Unicode;
ANSI_STRING Ansi;
NTSTATUS Status;
if ( CodePage == 65000 ) {
ASSERT( 0 && "CP_UTF7 is not supported" );
SetLastError( ERROR_INVALID_PARAMETER );
return 0;
}
if ( CodePage == CP_UTF8 ) {
return UTF8ToUnicode(lpMultiByteStr, cbMultiByte, lpWideCharStr, cchWideChar);
}
if ( cbMultiByte == -1 ) {
LengthRequires = (USHORT)(strlen(lpMultiByteStr) + sizeof(ANSI_NULL));
} else {
LengthRequires = (USHORT)(cbMultiByte + sizeof(ANSI_NULL));
}
if ( cchWideChar == 0 ) {
return LengthRequires;
}
else if ( cchWideChar < (int)LengthRequires ) {
SetLastError( ERROR_INSUFFICIENT_BUFFER );
return 0;
}
Ansi.Buffer = (PSTR)lpMultiByteStr;
Ansi.Length = (USHORT)(LengthRequires - sizeof(ANSI_NULL));
Ansi.MaximumLength = (USHORT)(LengthRequires);
Unicode.Buffer = lpWideCharStr;
Unicode.Length = (USHORT)((LengthRequires - sizeof(ANSI_NULL)) * sizeof(WCHAR));
Unicode.MaximumLength = cchWideChar * sizeof(WCHAR);
Status = RtlAnsiStringToUnicodeString( &Unicode, &Ansi, FALSE );
if ( NT_SUCCESS(Status) ) {
return LengthRequires;
} else {
SetLastError( RtlNtStatusToDosError(Status) );
return 0;
}
}
int WINAPI WideCharToMultiByte(
IN UINT CodePage,
IN DWORD dwFlags,
IN LPCWSTR lpWideCharStr,
IN int cchWideChar,
OUT LPSTR lpMultiByteStr,
IN int cbMultiByte,
IN LPCSTR lpDefaultChar,
OUT LPBOOL lpUsedDefaultChar)
{
USHORT LengthRequires;
UNICODE_STRING Unicode;
ANSI_STRING Ansi;
NTSTATUS Status;
if ( CodePage == 65000 ) {
ASSERT( 0 && "CP_UTF7 is not supported" );
SetLastError( ERROR_INVALID_PARAMETER );
return 0;
}
if ( CodePage == CP_UTF8 ) {
return UnicodeToUTF8(lpWideCharStr, cchWideChar, lpMultiByteStr, cbMultiByte);
}
if ( lpDefaultChar ) {
ASSERT( 0 && "lpDefaultChar is not supported" );
SetLastError( ERROR_INVALID_PARAMETER );
return 0;
}
if ( lpUsedDefaultChar ) {
*lpUsedDefaultChar = FALSE;
}
if ( cchWideChar == -1 ) {
LengthRequires = wcslen(lpWideCharStr) + sizeof(ANSI_NULL);
} else {
LengthRequires = cchWideChar + sizeof(ANSI_NULL);
}
if ( cbMultiByte == 0 ) {
return LengthRequires;
}
else if ( cbMultiByte < (int)LengthRequires ) {
SetLastError( ERROR_INSUFFICIENT_BUFFER );
return 0;
}
Ansi.Buffer = lpMultiByteStr;
Ansi.Length = 0;
Ansi.MaximumLength = (USHORT)cbMultiByte;
Unicode.Buffer = (PWSTR)lpWideCharStr;
Unicode.Length = (USHORT)(LengthRequires * sizeof(WCHAR) - sizeof(UNICODE_NULL));
Unicode.MaximumLength = (USHORT)(LengthRequires * sizeof(WCHAR));
Status = RtlUnicodeStringToAnsiString( &Ansi, &Unicode, FALSE );
if ( NT_SUCCESS(Status) ) {
return LengthRequires;
} else {
SetLastError( RtlNtStatusToDosError(Status) );
return 0;
}
}
//*****************************************************************************
// UTF8 <-> Unicode conversion functions
//
#define HIGH_BITS1 0x80
#define HIGH_BITS2 0xC0
#define HIGH_BITS3 0xE0
#define HIGH_BITS4 0xF0
#define LOW_BITS4 0x0F
#define LOW_BITS5 0x1F
#define LOW_BITS6 0x3F
unsigned long offsetsFromUTF8[6] = { 0x00000000, 0x00003080, 0x000E2080,
0x03C82080, 0xFA082080, 0x82082080};
char bytesFromUTF8[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5};
unsigned long kReplacementCharacter = 0x0000FFFD;
unsigned long kMaximumSimpleUniChar = 0x0000FFFF;
unsigned long kMaximumUniChar = 0x0010FFFF;
int halfShift = 10;
unsigned long halfBase = 0x0010000;
unsigned long halfMask = 0x3FF;
unsigned long kSurrogateHighStart = 0xD800;
unsigned long kSurrogateLowStart = 0xDC00;
/*******************************************************************************
UTF8ToUnicode
Converts a UTF8 string to a Unicode string.
Returns:
DWORD size of Unicode string
*******************************************************************************/
int
UTF8ToUnicode(
IN LPCSTR lpUTF8Str,
IN int cbUTF8,
OUT LPWSTR lpWideCharStr,
IN int cchWideChar
)
{
int LengthRequires = 0;
unsigned short *pchWideChar = (unsigned short *) lpWideCharStr;
unsigned char *lpUTF8StrPtr = (unsigned char *)lpUTF8Str;
unsigned long ch = 0;
unsigned short extraBytesToWrite = 0;
if ( cbUTF8 == -1 )
{
cbUTF8 = strlen(lpUTF8Str) + sizeof(ANSI_NULL);
}
while ((lpUTF8StrPtr - ((unsigned char *)lpUTF8Str)) < cbUTF8)
{
ch = 0;
extraBytesToWrite = bytesFromUTF8[*lpUTF8StrPtr];
if (((lpUTF8StrPtr - ((unsigned char *)lpUTF8Str)) + extraBytesToWrite) > cbUTF8)
{
break;
}
switch(extraBytesToWrite)
{
case 5:ch += *lpUTF8StrPtr++; ch <<=6;
case 4:ch += *lpUTF8StrPtr++; ch <<=6;
case 3:ch += *lpUTF8StrPtr++; ch <<=6;
case 2:ch += *lpUTF8StrPtr++; ch <<=6;
case 1:ch += *lpUTF8StrPtr++; ch <<=6;
case 0:ch += *lpUTF8StrPtr++;
}
ch -= offsetsFromUTF8[extraBytesToWrite];
if (ch <= kMaximumSimpleUniChar)
{
if (++LengthRequires <= cchWideChar)
{
*((unsigned long *)pchWideChar) = ch;
pchWideChar++;
}
}
else if (ch > kMaximumUniChar)
{
if (++LengthRequires <= cchWideChar)
{
*((unsigned long *)pchWideChar) = kReplacementCharacter;
pchWideChar++;
}
}
else
{
ch -= halfBase;
if (++LengthRequires <= cchWideChar)
{
*((unsigned long *)pchWideChar) = ((ch >> halfShift) + kSurrogateHighStart);
pchWideChar++;
}
if (++LengthRequires <= cchWideChar)
{
*((unsigned long *)pchWideChar) = ((ch & halfMask) + kSurrogateLowStart);
pchWideChar++;
}
}
}
if ( cchWideChar == 0 )
{
return LengthRequires;
}
else if ( cchWideChar < LengthRequires )
{
SetLastError( ERROR_INSUFFICIENT_BUFFER );
return 0;
}
return LengthRequires;
}
/*******************************************************************************
UnicodeToUTF8
Converts a Unicode string to a UTF-8 string.
Returns:
DWORD size of UTF8 string
*******************************************************************************/
int
UnicodeToUTF8(
IN LPCWSTR lpWideCharStr,
IN int cchWideChar,
OUT LPSTR lpUTF8Str,
IN int cbUTF8
)
{
BYTE b;
int LengthRequires = 0;
HRESULT hr = S_OK;
int i = 0;
if ( cchWideChar == -1 ) {
cchWideChar = wcslen(lpWideCharStr) + sizeof(ANSI_NULL);
}
for ( i = 0; i < cchWideChar; i += 1 )
{
if (*lpWideCharStr <= 0x7F)
{
// Single byte encoding
if (++LengthRequires <= cbUTF8)
{
if (lpUTF8Str != NULL)
{
*lpUTF8Str++ = (char) *lpWideCharStr;
}
}
}
else if (*lpWideCharStr <= 0x7FF)
{
// Double byte encoding
if (++LengthRequires <= cbUTF8)
{
if (lpUTF8Str != NULL)
{
b = (BYTE) ((*lpWideCharStr & (LOW_BITS5 << 6)) >> 6);
b |= HIGH_BITS2;
*lpUTF8Str++ = (char) b;
}
}
if (++LengthRequires <= cbUTF8)
{
if (lpUTF8Str != NULL)
{
b = (BYTE) (*lpWideCharStr & LOW_BITS6);
b |= HIGH_BITS1;
*lpUTF8Str++ = (char) b;
}
}
}
else
{
// Triple byte encoding
if (++LengthRequires <= cbUTF8)
{
if (lpUTF8Str != NULL)
{
b = (BYTE) ((*lpWideCharStr & (LOW_BITS4 << 12)) >> 12);
b |= HIGH_BITS3;
*lpUTF8Str++ = (char) b;
}
}
if (++LengthRequires <= cbUTF8)
{
if (lpUTF8Str != NULL)
{
b = (BYTE) ((*lpWideCharStr & (LOW_BITS6 << 6)) >> 6);
b |= HIGH_BITS1;
*lpUTF8Str++ = (char) b;
}
}
if (++LengthRequires <= cbUTF8)
{
if (lpUTF8Str != NULL)
{
b = (BYTE) (*lpWideCharStr & LOW_BITS6);
b |= HIGH_BITS1;
*lpUTF8Str++ = (char) b;
}
}
}
lpWideCharStr++;
}
if ( cbUTF8 == 0 )
{
return LengthRequires;
}
else if ( cbUTF8 < LengthRequires )
{
SetLastError( ERROR_INSUFFICIENT_BUFFER );
return 0;
}
return LengthRequires;
}