xbox-kernel/private/ntos/xapi/k32/widechar.c

#include "basedll.h"

int
UnicodeToUTF8(
    IN LPCWSTR lpWideCharStr,
    IN int cchWideChar,
    OUT LPSTR lpUTF8Str,
    IN int cbUTF8
    );

int
UTF8ToUnicode(
    IN LPCSTR lpUTF8Str,
    IN int cbUTF8,
    OUT LPWSTR lpWideCharStr,
    IN int cchWideChar
    );

int WINAPI MultiByteToWideChar(
    IN UINT     CodePage,
    IN DWORD    dwFlags,
    IN LPCSTR   lpMultiByteStr,
    IN int      cbMultiByte,
    OUT LPWSTR  lpWideCharStr,
    IN int      cchWideChar)
{
    USHORT LengthRequires;
    UNICODE_STRING Unicode;
    ANSI_STRING Ansi;
    NTSTATUS Status;

    if ( CodePage == 65000 ) {
        ASSERT( 0 && "CP_UTF7 is not supported" );
        SetLastError( ERROR_INVALID_PARAMETER );
        return 0;
    }

    if ( CodePage == CP_UTF8 ) {
        return UTF8ToUnicode(lpMultiByteStr, cbMultiByte, lpWideCharStr, cchWideChar);
    }

    if ( cbMultiByte == -1 ) {
        LengthRequires = (USHORT)(strlen(lpMultiByteStr) + sizeof(ANSI_NULL));
    } else {
        LengthRequires = (USHORT)(cbMultiByte + sizeof(ANSI_NULL));
    }

    if ( cchWideChar == 0 ) {
        return LengthRequires;
    }
    else if ( cchWideChar < (int)LengthRequires ) {
        SetLastError( ERROR_INSUFFICIENT_BUFFER );
        return 0;
    }

    Ansi.Buffer = (PSTR)lpMultiByteStr;
    Ansi.Length = (USHORT)(LengthRequires - sizeof(ANSI_NULL));
    Ansi.MaximumLength = (USHORT)(LengthRequires);

    Unicode.Buffer = lpWideCharStr;
    Unicode.Length = (USHORT)((LengthRequires - sizeof(ANSI_NULL)) * sizeof(WCHAR));
    Unicode.MaximumLength = cchWideChar * sizeof(WCHAR);

    Status = RtlAnsiStringToUnicodeString( &Unicode, &Ansi, FALSE );

    if ( NT_SUCCESS(Status) ) {
        return LengthRequires;
    } else {
        SetLastError( RtlNtStatusToDosError(Status) );
        return 0;
    }
}

int WINAPI WideCharToMultiByte(
    IN UINT     CodePage,
    IN DWORD    dwFlags,
    IN LPCWSTR  lpWideCharStr,
    IN int      cchWideChar,
    OUT LPSTR   lpMultiByteStr,
    IN int      cbMultiByte,
    IN LPCSTR   lpDefaultChar,
    OUT LPBOOL  lpUsedDefaultChar)
{
    USHORT LengthRequires;
    UNICODE_STRING Unicode;
    ANSI_STRING Ansi;
    NTSTATUS Status;

    if ( CodePage == 65000 ) {
        ASSERT( 0 && "CP_UTF7 is not supported" );
        SetLastError( ERROR_INVALID_PARAMETER );
        return 0;
    }

    if ( CodePage == CP_UTF8 ) {
        return UnicodeToUTF8(lpWideCharStr, cchWideChar, lpMultiByteStr, cbMultiByte);
    }

    if ( lpDefaultChar ) {
        ASSERT( 0 && "lpDefaultChar is not supported" );
        SetLastError( ERROR_INVALID_PARAMETER );
        return 0;
    }

    if ( lpUsedDefaultChar ) {
        *lpUsedDefaultChar = FALSE;
    }

    if ( cchWideChar == -1 ) {
        LengthRequires = wcslen(lpWideCharStr) + sizeof(ANSI_NULL);
    } else {
        LengthRequires = cchWideChar + sizeof(ANSI_NULL);
    }

    if ( cbMultiByte == 0 ) {
        return LengthRequires;
    }
    else if ( cbMultiByte < (int)LengthRequires ) {
        SetLastError( ERROR_INSUFFICIENT_BUFFER );
        return 0;
    }

    Ansi.Buffer = lpMultiByteStr;
    Ansi.Length = 0;
    Ansi.MaximumLength = (USHORT)cbMultiByte;

    Unicode.Buffer = (PWSTR)lpWideCharStr;
    Unicode.Length = (USHORT)(LengthRequires * sizeof(WCHAR) - sizeof(UNICODE_NULL));
    Unicode.MaximumLength = (USHORT)(LengthRequires * sizeof(WCHAR));

    Status = RtlUnicodeStringToAnsiString( &Ansi, &Unicode, FALSE );

    if ( NT_SUCCESS(Status) ) {
        return LengthRequires;
    } else {
        SetLastError( RtlNtStatusToDosError(Status) );
        return 0;
    }
}


//*****************************************************************************
// UTF8 <-> Unicode conversion functions
//

#define HIGH_BITS1 0x80
#define HIGH_BITS2 0xC0
#define HIGH_BITS3 0xE0
#define HIGH_BITS4 0xF0
#define LOW_BITS4  0x0F
#define LOW_BITS5  0x1F
#define LOW_BITS6  0x3F

unsigned long offsetsFromUTF8[6] = { 0x00000000, 0x00003080, 0x000E2080,
                                     0x03C82080, 0xFA082080, 0x82082080};

char bytesFromUTF8[256] = {
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5};

unsigned long kReplacementCharacter = 0x0000FFFD;
unsigned long kMaximumSimpleUniChar = 0x0000FFFF;
unsigned long kMaximumUniChar       = 0x0010FFFF;
int           halfShift             = 10;
unsigned long halfBase              = 0x0010000;
unsigned long halfMask              = 0x3FF;
unsigned long kSurrogateHighStart   = 0xD800;
unsigned long kSurrogateLowStart    = 0xDC00;

/*******************************************************************************
UTF8ToUnicode

Converts a UTF8 string to a Unicode string.

Returns:
    DWORD size of Unicode string
*******************************************************************************/
int
UTF8ToUnicode(
    IN LPCSTR lpUTF8Str,
    IN int cbUTF8,
    OUT LPWSTR lpWideCharStr,
    IN int cchWideChar
    )
{
    int LengthRequires = 0;
    unsigned short *pchWideChar = (unsigned short *) lpWideCharStr;
    unsigned char *lpUTF8StrPtr = (unsigned char *)lpUTF8Str;
    unsigned long ch = 0;
    unsigned short extraBytesToWrite = 0;

    if ( cbUTF8 == -1 )
    {
        cbUTF8 = strlen(lpUTF8Str) + sizeof(ANSI_NULL);
    }

    while ((lpUTF8StrPtr - ((unsigned char *)lpUTF8Str)) < cbUTF8)
    {
        ch = 0;
        extraBytesToWrite = bytesFromUTF8[*lpUTF8StrPtr];

        if (((lpUTF8StrPtr - ((unsigned char *)lpUTF8Str)) + extraBytesToWrite) > cbUTF8)
        {
            break;
        }

        switch(extraBytesToWrite)
        {
            case 5:ch += *lpUTF8StrPtr++; ch <<=6;
            case 4:ch += *lpUTF8StrPtr++; ch <<=6;
            case 3:ch += *lpUTF8StrPtr++; ch <<=6;
            case 2:ch += *lpUTF8StrPtr++; ch <<=6;
            case 1:ch += *lpUTF8StrPtr++; ch <<=6;
            case 0:ch += *lpUTF8StrPtr++;
        }

        ch -= offsetsFromUTF8[extraBytesToWrite];

        if (ch <= kMaximumSimpleUniChar)
        {
            if (++LengthRequires <= cchWideChar)
            {
                *((unsigned long *)pchWideChar) = ch;
                pchWideChar++;
            }
        }
        else if (ch > kMaximumUniChar)
        {
            if (++LengthRequires <= cchWideChar)
            {
                *((unsigned long *)pchWideChar) = kReplacementCharacter;
                pchWideChar++;
            }
        }
        else
        {
            ch -= halfBase;

            if (++LengthRequires <= cchWideChar)
            {
                *((unsigned long *)pchWideChar) = ((ch >> halfShift) + kSurrogateHighStart);
                pchWideChar++;
            }

            if (++LengthRequires <= cchWideChar)
            {
                *((unsigned long *)pchWideChar) = ((ch & halfMask) + kSurrogateLowStart);
                pchWideChar++;
            }
        }
    }

    if ( cchWideChar == 0 )
    {
        return LengthRequires;
    }
    else if ( cchWideChar < LengthRequires )
    {
        SetLastError( ERROR_INSUFFICIENT_BUFFER );
        return 0;
    }

    return LengthRequires;
}

/*******************************************************************************
UnicodeToUTF8

Converts a Unicode string to a UTF-8 string.

Returns:
    DWORD size of UTF8 string
*******************************************************************************/
int
UnicodeToUTF8(
    IN LPCWSTR lpWideCharStr,
    IN int cchWideChar,
    OUT LPSTR lpUTF8Str,
    IN int cbUTF8
    )
{
    BYTE b;
    int LengthRequires = 0;
    HRESULT hr = S_OK;
    int i = 0;

    if ( cchWideChar == -1 ) {
        cchWideChar = wcslen(lpWideCharStr) + sizeof(ANSI_NULL);
    }

    for ( i = 0; i < cchWideChar; i += 1 )
    {
        if (*lpWideCharStr <= 0x7F)
        {
            // Single byte encoding
            if (++LengthRequires <= cbUTF8)
            {
                if (lpUTF8Str != NULL)
                {
                    *lpUTF8Str++ = (char) *lpWideCharStr;
                }
            }
        }
        else if (*lpWideCharStr <= 0x7FF)
        {
            // Double byte encoding
            if (++LengthRequires <= cbUTF8)
            {
                if (lpUTF8Str != NULL)
                {
                    b = (BYTE) ((*lpWideCharStr & (LOW_BITS5 << 6)) >> 6);
                    b |= HIGH_BITS2;
                    *lpUTF8Str++ = (char) b;
                }
            }

            if (++LengthRequires <= cbUTF8)
            {
                if (lpUTF8Str != NULL)
                {
                    b = (BYTE) (*lpWideCharStr & LOW_BITS6);
                    b |= HIGH_BITS1;
                    *lpUTF8Str++ = (char) b;
                }
            }
        }
        else
        {
            // Triple byte encoding
            if (++LengthRequires <= cbUTF8)
            {
                if (lpUTF8Str != NULL)
                {
                    b = (BYTE) ((*lpWideCharStr & (LOW_BITS4 << 12)) >> 12);
                    b |= HIGH_BITS3;
                    *lpUTF8Str++ = (char) b;
                }
            }

            if (++LengthRequires <= cbUTF8)
            {
                if (lpUTF8Str != NULL)
                {
                    b = (BYTE) ((*lpWideCharStr & (LOW_BITS6 << 6)) >> 6);
                    b |= HIGH_BITS1;
                    *lpUTF8Str++ = (char) b;
                }
            }

            if (++LengthRequires <= cbUTF8)
            {
                if (lpUTF8Str != NULL)
                {
                    b = (BYTE) (*lpWideCharStr & LOW_BITS6);
                    b |= HIGH_BITS1;
                    *lpUTF8Str++ = (char) b;
                }
            }
        }

        lpWideCharStr++;
    }

    if ( cbUTF8 == 0 )
    {
        return LengthRequires;
    }
    else if ( cbUTF8 < LengthRequires )
    {
        SetLastError( ERROR_INSUFFICIENT_BUFFER );
        return 0;
    }

    return LengthRequires;
}