NT4/private/sdktools/memt/i386/memcpy.asm
2020-09-30 17:12:29 +02:00

351 lines
11 KiB
NASM
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

page ,132
title memcpy - Copy source memory bytes to destination
;***
;memcpy.asm - contains memcpy and memmove routines
;
; Copyright (c) 1986-1991, Microsoft Corporation. All right reserved.
;
;Purpose:
; memcpy() copies a source memory buffer to a destination buffer.
; Overlapping buffers are not treated specially, so propogation may occur.
; memmove() copies a source memory buffer to a destination buffer.
; Overlapping buffers are treated specially, to avoid propogation.
;
;Revision History:
; 02-06-87 JCR Added memmove entry
; 04-08-87 JCR Conditionalized memmove/memcpy entries
; 06-30-87 SKS Rewritten for speed and size
; 08-21-87 SKS Fix return value for overlapping copies
; 05-17-88 SJM Add model-independent (large model) ifdef
; 08-04-88 SJM convert to cruntime/ add 32-bit support
; 08-19-88 JCR Minor 386 corrections/enhancements
; 10-25-88 JCR General cleanup for 386-only code
; 03-23-90 GJF Changed to _stdcall. Also, fixed the copyright.
; 05-10-91 GJF Back to _cdecl, sigh...
; 11-13-92 SRW Make it fast with unaligned arguments
;
;*******************************************************************************
.xlist
include cruntime.inc
.list
M_EXIT macro
mov eax,[dst] ; return pointer to destination
ifdef _STDCALL_
ret 2*DPSIZE + ISIZE ; _stdcall return
else
ret ; _cdecl return
endif
endm ; M_EXIT
CODESEG
page
;***
;memcpy - Copy source buffer to destination buffer
;
;Purpose:
; memcpy() copies a source memory buffer to a destination memory buffer.
; This routine does NOT recognize overlapping buffers, and thus can lead
; to propogation.
; For cases where propogation must be avoided, memmove() must be used.
;
; Algorithm:
;
; void * memcpy(void * dst, void * src, size_t count)
; {
; void * ret = dst;
;
; /*
; * copy from lower addresses to higher addresses
; */
; while (count--)
; *dst++ = *src++;
;
; return(ret);
; }
;
;memmove - Copy source buffer to destination buffer
;
;Purpose:
; memmove() copies a source memory buffer to a destination memory buffer.
; This routine recognize overlapping buffers to avoid propogation.
; For cases where propogation is not a problem, memcpy() can be used.
;
; Algorithm:
;
; void * memmove(void * dst, void * src, size_t count)
; {
; void * ret = dst;
;
; if (dst <= src || dst >= (src + count)) {
; /*
; * Non-Overlapping Buffers
; * copy from lower addresses to higher addresses
; */
; while (count--)
; *dst++ = *src++;
; }
; else {
; /*
; * Overlapping Buffers
; * copy from higher addresses to lower addresses
; */
; dst += count - 1;
; src += count - 1;
;
; while (count--)
; *dst-- = *src--;
; }
;
; return(ret);
; }
;
;
;Entry:
; void *dst = pointer to destination buffer
; const void *src = pointer to source buffer
; size_t count = number of bytes to copy
;
;Exit:
; Returns a pointer to the destination buffer in AX/DX:AX
;
;Uses:
; CX, DX
;
;Exceptions:
;*******************************************************************************
_MEM_ equ <memmove>
% public _MEM_
_MEM_ proc \
uses edi esi, \
dst:ptr byte, \
src:ptr byte, \
count:IWORD
; destination pointer
; source pointer
; number of bytes to copy
mov esi,[src] ; esi = source
mov edi,[dst] ; edi = dest
mov ecx,[count] ; ecx = number of bytes to move
;
; Check for overlapping buffers:
; If (dst <= src) Or (dst >= src + Count) Then
; Do normal (Upwards) Copy
; Else
; Do Downwards Copy to avoid propagation
;
cmp edi,esi ; dst <= src ?
jbe short CopyUp ; yes, copy toward higher addresses
mov eax,esi
add eax,ecx
cmp edi,eax ; dst >= (src + count) ?
jnae CopyDown ; no, copy toward lower addresses
;
; Copy toward higher addresses.
;
CopyUp:
;
; The algorithm for forward moves is to align the destination to a dword
; boundary and so we can move dwords with an aligned destination. This
; occurs in 3 steps.
;
; - move x = ((4 - Dest & 3) & 3) bytes
; - move y = ((L-x) >> 2) dwords
; - move (L - x - y*4) bytes
;
test edi,11b ; destination dword aligned?
jnz short byterampup ; if we are not dword aligned already, align
mov edx,ecx ; byte count
and edx,11b ; trailing byte count
shr ecx,2 ; shift down to dword count
rep movsd ; move all of our dwords
jmp dword ptr TrailingVecs[edx*4]
align @WordSize
TrailingVecs dd Trail0, Trail1, Trail2, Trail3
align @WordSize
Trail3:
mov ax,[esi]
mov [edi],ax
mov al,[esi+2]
mov [edi+2],al
M_EXIT
align @WordSize
Trail2:
mov ax,[esi]
mov [edi],ax
M_EXIT
align @WordSize
Trail1:
mov al,[esi]
mov [edi],al
Trail0:
M_EXIT
;
; Code to do optimal memory copies for non-dword-aligned destinations.
;
align @WordSize
byterampup:
; The following length check is done for two reasons:
;
; 1. to ensure that the actual move length is greater than any possiale
; alignment move, and
;
; 2. to skip the multiple move logic for small moves where it would
; be faster to move the bytes with one instruction.
;
; Leading bytes could be handled faster via split-out optimizations and
; a jump table (as trailing bytes are), at the cost of size.
;
; At this point, ECX is the # of bytes to copy, and EDX is the # of leading
; bytes to copy.
;
cmp ecx,12 ; check for reasonable length
jbe short ShortMove ; do short move if appropriate
mov edx,edi
neg edx
and edx,11b ; # of leading bytes
sub ecx,edx ; subtract out leading bytes
mov eax,ecx ; # of bytes remaining after leading
mov ecx,edx ; # of leading bytes
rep movsb ; copy leading bytes
mov ecx,eax ; compute number of dwords to move
and eax,11b ; # of trailing bytes
shr ecx,2 ; # of whole dwords
rep movsd ; move whole dwords
jmp dword ptr TrailingVecs[eax*4] ; copy trailing bytes
;
; Simple copy, byte at a time. This could be faster with a jump table and
; split-out optimizations, copying as much as possible a dword/word at a
; time and using MOV with displacements, but such short cases are unlikely
; to be called often (it seems silly to call a function to copy less than
; three dwords).
;
align @WordSize
ShortMove:
rep movsb
M_EXIT
;
; Copy down to avoid propogation in overlapping buffers.
;
align @WordSize
CopyDown:
std ; Set Direction Flag = Down
add esi,ecx ; point to byte after end of source buffer
add edi,ecx ; point to byte after end of dest buffer
;
; See if the destination start is dword aligned
;
test edi,11b
jnz short byterampup_copydown ; not dword aligned
;
; Destination start is dword aligned
;
mov edx,ecx ; set aside count of bytes to copy
and edx,11b ; # of trailing bytes to copy
sub esi,4 ; point to start of first dword to copy
sub edi,4 ; point to start of first dword to copy to
shr ecx,2 ; dwords to copy
rep movsd ; copy as many dwords as possible
jmp dword ptr TrailingVecs_copydown[edx*4] ;do any trailing bytes
align @WordSize
TrailingVecs_copydown label dword
dd Trail0_copydown
dd Trail1_copydown
dd Trail2_copydown
dd Trail3_copydown
align @WordSize
Trail3_copydown:
mov ax,[esi+2]
mov [edi+2],ax
mov al,[esi+1]
mov [edi+1],al
cld ; Set Direction Flag = Up
M_EXIT
align @WordSize
Trail2_copydown:
mov ax,[esi+2]
mov [edi+2],ax
cld ; Set Direction Flag = Up
M_EXIT
align @WordSize
Trail1_copydown:
mov al,[esi+3]
mov [edi+3],al
Trail0_copydown:
cld ; Set Direction Flag = Up
M_EXIT
;
; Destination start is not dword aligned.
;
; Leading bytes could be handled faster via split-out optimizations and
; a jump table (as trailing bytes are), at the cost of size.
;
; At this point, ECX is the # of bytes to copy, and EDX is the # of leading
; bytes to copy.
;
align @WordSize
byterampup_copydown:
dec esi ; point to first leading src byte
dec edi ; point to first leading dest byte
cmp ecx,12 ; check for reasonable length
jbe short ShortMove_copydown ; do short move if appropriate
neg edx
and edx,11b
sub ecx,edx ; # of bytes after leading bytes
mov eax,ecx ; set aside # of bytes remaining
mov ecx,edx ; # of leading bytes
rep movsb ; copy leading odd bytes
mov ecx,eax ; # of remaining bytes
and eax,11b ; # of trailing bytes
sub esi,3 ; point to start of first whole src dword
sub edi,3 ; point to start of first whole dest dword
shr ecx,2 ; # of whole dwords
rep movsd ; copy whole dwords
jmp dword ptr TrailingVecs_copydown[eax*4]
align @WordSize
ShortMove_copydown:
rep movsb
cld ; Set Direction Flag = Up
M_EXIT
_MEM_ endp
end