Windows2000/private/ntos/rtl/i386/nlstrans.asm
2020-09-30 17:12:32 +02:00

669 lines
20 KiB
NASM

title "NLS Translation"
; Copyright (c) 1989 Microsoft Corporation
; Module Name:
; nlstrans.asm
; Abstract:
; This module implements the function to translate from Unicode
; characters to ANSI and OEM characters. The translation is based on
; the installed ACP and OEMCP.
; Author:
; Gregory Wilson 15 may 92
; Environment:
; Any mode.
; Revision History:
.386p
.xlist
include ks386.inc
.list
_DATA SEGMENT DWORD PUBLIC 'DATA'
extrn _NlsUnicodeToAnsiData:DWORD
extrn _NlsUnicodeToMbAnsiData:DWORD
extrn _NlsMbCodePageTag:BYTE
extrn _NlsUnicodeToOemData:DWORD
extrn _NlsUnicodeToMbOemData:DWORD
extrn _NlsMbOemCodePageTag:BYTE
_DATA ENDS
_TEXT SEGMENT DWORD PUBLIC 'CODE'
ASSUME DS:FLAT, ES:FLAT, SS:FLAT, FS:NOTHING, GS:NOTHING
_MultiByteString$ equ 8
_MaxBytesInMultiByteString$ equ 12
_BytesInMultiByteString$ equ 16
_UnicodeString$ equ 20
_BytesInUnicodeString$ equ 24
_LoopCount$ equ -4
_MultiByteStringAnchor$ equ -8
_CharsInUnicodeString$ equ -12
align 4
public _RtlUnicodeToMultiByteN
; NTSTATUS
; RtlUnicodeToMultiByteN(
; OUT PCH MultiByteString,
; IN ULONG MaxBytesInMultiByteString,
; OUT PULONG BytesInMultiByteString OPTIONAL,
; IN PWCH UnicodeString,
; IN ULONG BytesInUnicodeString)
; /*++
; Routine Description:
; This functions converts the specified unicode source string into an
; ansi string. The translation is done with respect to the
; ANSI Code Page (ACP) loaded at boot time.
; Arguments:
; MultiByteString - Returns an ansi string that is equivalent to the
; unicode source string. If the translation can not be done
; because a character in the unicode string does not map to an
; ansi character in the ACP, an error is returned.
; MaxBytesInMultiByteString - Supplies the maximum number of bytes to be
; written to MultiByteString. If this causes MultiByteString to be a
; truncated equivalent of UnicodeString, no error condition results.
; BytesInMultiByteString - Returns the number of bytes in the returned
; ansi string pointed to by MultiByteString.
; UnicodeString - Supplies the unicode source string that is to be
; converted to ansi.
; BytesInUnicodeString - The number of bytes in the the string pointed to by
; UnicodeString.
; Return Value:
; SUCCESS - The conversion was successful
; --*/
_RtlUnicodeToMultiByteN proc
push ebp
mov ebp, esp
sub esp, 12
push ebx
; Save beginning position of MultiByteString ptr for later
; use in calculating number of characters translated.
mov eax, DWORD PTR _MultiByteString$[ebp]
mov DWORD PTR _MultiByteStringAnchor$[ebp], eax
; Convert BytesInUnicodeString to a character count and
; compare against the maximum number of characters we have
; room to translate. Use the minimum for the loop count.
mov eax, DWORD PTR _BytesInUnicodeString$[ebp]
shr eax, 1
mov ecx, DWORD PTR _MaxBytesInMultiByteString$[ebp]
sub eax, ecx
sbb edx, edx
and eax, edx
add eax, ecx
mov DWORD PTR _LoopCount$[ebp], eax
; Set up registers such that:
; ebx: UnicodeString
; ecx: NlsUnicodeToAnsiData
; edx: MultiByteString
mov edx, DWORD PTR _MultiByteString$[ebp]
mov ebx, DWORD PTR _UnicodeString$[ebp]
mov ecx, DWORD PTR _NlsUnicodeToAnsiData
; Determine if we're dealing with SBCS or MBCS.
cmp BYTE PTR _NlsMbCodePageTag, 0 ; 0 -> sbcs, 1 -> mbcs
jne $ACP_MBCS
; If the string to be translated does not contain a multiple
; of 16 characters then figure out where to jump into the
; translation loop to translate the left over characters first.
; From then on the loop only deals with 16 characters at a time.
and eax, 15
je SHORT $ACP_TopOfSBLoop ; already a multiple of 16 chars.
push eax ; save for indexing into jump table
sub DWORD PTR _LoopCount$[ebp], eax ; decrement LoopCount
add edx, eax ; increment MultiByteString ptr
lea eax, DWORD PTR [eax*2]
add ebx, eax ; increment UnicodeString ptr
; Use ACP_JumpTable to jump into the while loop at the appropriate
; spot to take care of the *extra* characters.
pop eax
dec eax
jmp DWORD PTR cs:$ACP_JumpTable[eax*4]
; Main translation loop. Translates 16 characters on each iteration.
$ACP_TopOfSBLoop:
cmp DWORD PTR _LoopCount$[ebp], 0
jbe $ACP_FinishedTranslation
; Adjust pointers for next iteration
add edx, 16 ; increment MultiByteString ptr
add ebx, 32 ; increment UnicodeString ptr
sub DWORD PTR _LoopCount$[ebp], 16 ; decrement LoopCount
; begin translation
movzx eax, WORD PTR [ebx-32]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-16], al
$ACP_SBAdjust15:
movzx eax, WORD PTR [ebx-30]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-15], al
$ACP_SBAdjust14:
movzx eax, WORD PTR [ebx-28]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-14], al
$ACP_SBAdjust13:
movzx eax, WORD PTR [ebx-26]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-13], al
$ACP_SBAdjust12:
movzx eax, WORD PTR [ebx-24]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-12], al
$ACP_SBAdjust11:
movzx eax, WORD PTR [ebx-22]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-11], al
$ACP_SBAdjust10:
movzx eax, WORD PTR [ebx-20]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-10], al
$ACP_SBAdjust9:
movzx eax, WORD PTR [ebx-18]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-9], al
$ACP_SBAdjust8:
movzx eax, WORD PTR [ebx-16]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-8], al
$ACP_SBAdjust7:
movzx eax, WORD PTR [ebx-14]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-7], al
$ACP_SBAdjust6:
movzx eax, WORD PTR [ebx-12]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-6], al
$ACP_SBAdjust5:
movzx eax, WORD PTR [ebx-10]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-5], al
$ACP_SBAdjust4:
movzx eax, WORD PTR [ebx-8]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-4], al
$ACP_SBAdjust3:
movzx eax, WORD PTR [ebx-6]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-3], al
$ACP_SBAdjust2:
movzx eax, WORD PTR [ebx-4]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-2], al
$ACP_SBAdjust1:
movzx eax, WORD PTR [ebx-2]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-1], al
jmp $ACP_TopOfSBLoop
; The ACP is a multibyte code page. Translation is done here.
; WARNING!! WARNING!! No optimization has been done on this loop.
$ACP_MBCS:
mov eax, DWORD PTR _BytesInUnicodeString$[ebp]
shr eax, 1
mov DWORD PTR _CharsInUnicodeString$[ebp], eax
dec DWORD PTR _CharsInUnicodeString$[ebp]
or eax, eax
mov ecx, DWORD PTR _NlsUnicodeToMbAnsiData
je SHORT $ACP_FinishedTranslation
$ACP_TopOfMBLoop:
; Check to make sure we have room in the destination string.
mov eax, DWORD PTR _MaxBytesInMultiByteString$[ebp]
dec DWORD PTR _MaxBytesInMultiByteString$[ebp]
or eax, eax
je SHORT $ACP_FinishedTranslation
; Grab the multibyte character(s) from the translation table
; and increment the source string pointer.
mov eax, DWORD PTR _UnicodeString$[ebp]
movzx eax, WORD PTR [eax]
mov dx, WORD PTR [ecx+eax*2]
add DWORD PTR _UnicodeString$[ebp], 2
mov bl, dh
; Check for a lead byte.
or bl, bl
je SHORT $ACP_NoLeadByte
; There is a lead byte. Make sure there's room in the
; destination buffer for both the lead byte and trail byte.
mov eax, DWORD PTR _MaxBytesInMultiByteString$[ebp]
dec DWORD PTR _MaxBytesInMultiByteString$[ebp]
or eax, eax
je SHORT $ACP_FinishedTranslation
; Store the lead byte in the destination buffer, increment
; the destination pointer and decrement the count of remaining
; space.
mov eax, DWORD PTR _MultiByteString$[ebp]
mov BYTE PTR [eax], bl
inc DWORD PTR _MultiByteString$[ebp]
dec DWORD PTR _MaxBytesInMultiByteString$[ebp]
; Store the single byte or trail byte.
$ACP_NoLeadByte:
mov eax, DWORD PTR _MultiByteString$[ebp]
mov BYTE PTR [eax], dl
inc DWORD PTR _MultiByteString$[ebp]
; Check to see if there are any more characters to translate.
mov eax, DWORD PTR _CharsInUnicodeString$[ebp]
dec DWORD PTR _CharsInUnicodeString$[ebp]
or eax, eax
jne SHORT $ACP_TopOfMBLoop
; We're finished translating for the multibyte case.
; Set up edx so we can calculate the number of characters
; written (if the user has requested it).
mov edx, DWORD PTR _MultiByteString$[ebp]
$ACP_FinishedTranslation:
mov eax, DWORD PTR _BytesInMultiByteString$[ebp]
or eax, eax
je SHORT $ACP_NoOptParam
sub edx, DWORD PTR _MultiByteStringAnchor$[ebp]
mov DWORD PTR [eax], edx
$ACP_NoOptParam:
sub eax, eax
pop ebx
leave
ret 0 ; return STATUS_SUCCESS
$ACP_JumpTable:
DD OFFSET FLAT:$ACP_SBAdjust1
DD OFFSET FLAT:$ACP_SBAdjust2
DD OFFSET FLAT:$ACP_SBAdjust3
DD OFFSET FLAT:$ACP_SBAdjust4
DD OFFSET FLAT:$ACP_SBAdjust5
DD OFFSET FLAT:$ACP_SBAdjust6
DD OFFSET FLAT:$ACP_SBAdjust7
DD OFFSET FLAT:$ACP_SBAdjust8
DD OFFSET FLAT:$ACP_SBAdjust9
DD OFFSET FLAT:$ACP_SBAdjust10
DD OFFSET FLAT:$ACP_SBAdjust11
DD OFFSET FLAT:$ACP_SBAdjust12
DD OFFSET FLAT:$ACP_SBAdjust13
DD OFFSET FLAT:$ACP_SBAdjust14
DD OFFSET FLAT:$ACP_SBAdjust15
_RtlUnicodeToMultiByteN ENDP
_OemString$ equ 8
_MaxBytesInOemString$ equ 12
_BytesInOemString$ equ 16
_UnicodeString$ equ 20
_BytesInUnicodeString$ equ 24
_LoopCount$ equ -4
_OemStringAnchor$ equ -8
_CharsInUnicodeString$ equ -12
public _RtlUnicodeToOemN
; NTSTATUS
; RtlUnicodeToOemN(
; OUT PCH OemString,
; IN ULONG MaxBytesInOemString,
; OUT PULONG BytesInOemString OPTIONAL,
; IN PWCH UnicodeString,
; IN ULONG BytesInUnicodeString)
; /*++
; Routine Description:
; This functions converts the specified unicode source string into an
; oem string. The translation is done with respect to the OEM Code
; Page (OCP) loaded at boot time.
; Arguments:
; OemString - Returns an oem string that is equivalent to the
; unicode source string. If the translation can not be done
; because a character in the unicode string does not map to an
; oem character in the OCP, an error is returned.
; MaxBytesInOemString - Supplies the maximum number of bytes to be
; written to OemString. If this causes OemString to be a
; truncated equivalent of UnicodeString, no error condition results.
; BytesInOemString - Returns the number of bytes in the returned
; oem string pointed to by OemString.
; UnicodeString - Supplies the unicode source string that is to be
; converted to oem.
; BytesInUnicodeString - The number of bytes in the the string pointed to by
; UnicodeString.
; Return Value:
; SUCCESS - The conversion was successful
; STATUS_BUFFER_OVERFLOW - MaxBytesInOemString was not enough to hold
; the whole Oem string. It was converted correct to the point though.
; --*/
_RtlUnicodeToOemN proc
push ebp
mov ebp, esp
sub esp, 12
push ebx
; Save beginning position of OemString ptr for later
; use in calculating number of characters translated.
mov eax, DWORD PTR _OemString$[ebp]
mov DWORD PTR _OemStringAnchor$[ebp], eax
; Convert BytesInUnicodeString to a character count and
; compare against the maximum number of characters we have
; room to translate. Use the minimum for the loop count.
mov eax, DWORD PTR _BytesInUnicodeString$[ebp]
shr eax, 1
mov ecx, DWORD PTR _MaxBytesInOemString$[ebp]
sub eax, ecx
sbb edx, edx
and eax, edx
add eax, ecx
mov DWORD PTR _LoopCount$[ebp], eax
; Set up registers such that:
; ebx: UnicodeString
; ecx: NlsUnicodeToOemData
; edx: OemString
mov edx, DWORD PTR _OemString$[ebp]
mov ebx, DWORD PTR _UnicodeString$[ebp]
mov ecx, DWORD PTR _NlsUnicodeToOemData
; Determine if we're dealing with SBCS or MBCS.
cmp BYTE PTR _NlsMbOemCodePageTag, 0 ; 0 -> sbcs, 1 -> mbcs
jne $OEMCP_MBCS
; If the string to be translated does not contain a multiple
; of 16 characters then figure out where to jump into the
; translation loop to translate the left over characters first.
; From then on the loop only deals with 16 characters at a time.
and eax, 15
je SHORT $OEMCP_TopOfSBLoop ; already a multiple of 16 chars.
push eax ; save for indexing into jump table
sub DWORD PTR _LoopCount$[ebp], eax ; decrement LoopCount
add edx, eax ; increment OemString ptr
lea eax, DWORD PTR [eax*2]
add ebx, eax ; increment UnicodeString ptr
; Use OEMCP_JumpTable to jump into the while loop at the appropriate
; spot to take care of the *extra* characters.
pop eax
dec eax
jmp DWORD PTR cs:$OEMCP_JumpTable[eax*4]
; Main translation loop. Translates 16 characters on each iteration.
$OEMCP_TopOfSBLoop:
cmp DWORD PTR _LoopCount$[ebp], 0
jbe $OEMCP_FinishedTranslation
; Adjust pointers for next iteration
add edx, 16 ; increment OemString ptr
add ebx, 32 ; increment UnicodeString ptr
sub DWORD PTR _LoopCount$[ebp], 16 ; decrement LoopCount
; begin translation
movzx eax, WORD PTR [ebx-32]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-16], al
$OEMCP_SBAdjust15:
movzx eax, WORD PTR [ebx-30]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-15], al
$OEMCP_SBAdjust14:
movzx eax, WORD PTR [ebx-28]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-14], al
$OEMCP_SBAdjust13:
movzx eax, WORD PTR [ebx-26]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-13], al
$OEMCP_SBAdjust12:
movzx eax, WORD PTR [ebx-24]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-12], al
$OEMCP_SBAdjust11:
movzx eax, WORD PTR [ebx-22]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-11], al
$OEMCP_SBAdjust10:
movzx eax, WORD PTR [ebx-20]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-10], al
$OEMCP_SBAdjust9:
movzx eax, WORD PTR [ebx-18]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-9], al
$OEMCP_SBAdjust8:
movzx eax, WORD PTR [ebx-16]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-8], al
$OEMCP_SBAdjust7:
movzx eax, WORD PTR [ebx-14]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-7], al
$OEMCP_SBAdjust6:
movzx eax, WORD PTR [ebx-12]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-6], al
$OEMCP_SBAdjust5:
movzx eax, WORD PTR [ebx-10]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-5], al
$OEMCP_SBAdjust4:
movzx eax, WORD PTR [ebx-8]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-4], al
$OEMCP_SBAdjust3:
movzx eax, WORD PTR [ebx-6]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-3], al
$OEMCP_SBAdjust2:
movzx eax, WORD PTR [ebx-4]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-2], al
$OEMCP_SBAdjust1:
movzx eax, WORD PTR [ebx-2]
mov al, BYTE PTR [eax+ecx]
mov BYTE PTR [edx-1], al
jmp $OEMCP_TopOfSBLoop
; The OEMCP is a multibyte code page. Translation is done here.
; WARNING!! WARNING!! No optimization has been done on this loop.
$OEMCP_MBCS:
mov eax, DWORD PTR _BytesInUnicodeString$[ebp]
shr eax, 1
mov DWORD PTR _CharsInUnicodeString$[ebp], eax
dec DWORD PTR _CharsInUnicodeString$[ebp]
or eax, eax
mov ecx, DWORD PTR _NlsUnicodeToMbOemData
je SHORT $OEMCP_FinishedTranslation
$OEMCP_TopOfMBLoop:
; Check to make sure we have room in the destination string.
mov eax, DWORD PTR _MaxBytesInOemString$[ebp]
dec DWORD PTR _MaxBytesInOemString$[ebp]
or eax, eax
je SHORT $OEMCP_FinishedTranslation
; Grab the multibyte character(s) from the translation table
; and increment the source string pointer.
mov eax, DWORD PTR _UnicodeString$[ebp]
movzx eax, WORD PTR [eax]
mov dx, WORD PTR [ecx+eax*2]
add DWORD PTR _UnicodeString$[ebp], 2
mov bl, dh
; Check for a lead byte.
or bl, bl
je SHORT $OEMCP_NoLeadByte
; There is a lead byte. Make sure there's room in the
; destination buffer for both the lead byte and trail byte.
mov eax, DWORD PTR _MaxBytesInOemString$[ebp]
dec DWORD PTR _MaxBytesInOemString$[ebp]
or eax, eax
je SHORT $OEMCP_FinishedTranslation
; Store the lead byte in the destination buffer, increment
; the destination pointer and decrement the count of remaining
; space.
mov eax, DWORD PTR _OemString$[ebp]
mov BYTE PTR [eax], bl
inc DWORD PTR _OemString$[ebp]
dec DWORD PTR _MaxBytesInOemString$[ebp]
; Store the single byte or trail byte.
$OEMCP_NoLeadByte:
mov eax, DWORD PTR _OemString$[ebp]
mov BYTE PTR [eax], dl
inc DWORD PTR _OemString$[ebp]
; Check to see if there are any more characters to translate.
mov eax, DWORD PTR _CharsInUnicodeString$[ebp]
dec DWORD PTR _CharsInUnicodeString$[ebp]
or eax, eax
jne SHORT $OEMCP_TopOfMBLoop
; We're finished translating for the multibyte case.
; Set up edx so we can calculate the number of characters
; written (if the user has requested it).
mov edx, DWORD PTR _OemString$[ebp]
$OEMCP_FinishedTranslation:
mov eax, DWORD PTR _BytesInOemString$[ebp]
or eax, eax
je SHORT $OEMCP_NoOptParam
sub edx, DWORD PTR _OemStringAnchor$[ebp]
mov DWORD PTR [eax], edx
$OEMCP_NoOptParam:
; If we ran out of space in the destination buffer before
; translating all of the Unicode characters then return
; STATUS_BUFFER_OVERFLOW. Check is done by looking at
; # of chars in Unicode string left to translate.
; WARNING!
; we can't check CharsInUnicodeString since we determined the loop
; count above and don't modify CharsInUnicodeString anymore...
; cmp DWORD PTR _CharsInUnicodeString$[ebp], 1
; cmc
; sbb eax, eax
; and eax, -2147483643 ; STATUS_BUFFER_OVERFLOW (80000005H)
sub eax, eax ; return STATUS_SUCCESS
pop ebx
leave
ret 0
$OEMCP_JumpTable:
DD OFFSET FLAT:$OEMCP_SBAdjust1
DD OFFSET FLAT:$OEMCP_SBAdjust2
DD OFFSET FLAT:$OEMCP_SBAdjust3
DD OFFSET FLAT:$OEMCP_SBAdjust4
DD OFFSET FLAT:$OEMCP_SBAdjust5
DD OFFSET FLAT:$OEMCP_SBAdjust6
DD OFFSET FLAT:$OEMCP_SBAdjust7
DD OFFSET FLAT:$OEMCP_SBAdjust8
DD OFFSET FLAT:$OEMCP_SBAdjust9
DD OFFSET FLAT:$OEMCP_SBAdjust10
DD OFFSET FLAT:$OEMCP_SBAdjust11
DD OFFSET FLAT:$OEMCP_SBAdjust12
DD OFFSET FLAT:$OEMCP_SBAdjust13
DD OFFSET FLAT:$OEMCP_SBAdjust14
DD OFFSET FLAT:$OEMCP_SBAdjust15
_RtlUnicodeToOemN ENDP
_TEXT ENDS
end