NT4/private/sdktools/memt/i386/memcpy.asm

        page    ,132
        title   memcpy - Copy source memory bytes to destination
;***
;memcpy.asm - contains memcpy and memmove routines
;
;       Copyright (c) 1986-1991, Microsoft Corporation. All right reserved.
;
;Purpose:
;       memcpy() copies a source memory buffer to a destination buffer.
;       Overlapping buffers are not treated specially, so propogation may occur.
;       memmove() copies a source memory buffer to a destination buffer.
;       Overlapping buffers are treated specially, to avoid propogation.
;
;Revision History:
;       02-06-87  JCR   Added memmove entry
;       04-08-87  JCR   Conditionalized memmove/memcpy entries
;       06-30-87  SKS   Rewritten for speed and size
;       08-21-87  SKS   Fix return value for overlapping copies
;       05-17-88  SJM   Add model-independent (large model) ifdef
;       08-04-88  SJM   convert to cruntime/ add 32-bit support
;       08-19-88  JCR   Minor 386 corrections/enhancements
;       10-25-88  JCR   General cleanup for 386-only code
;       03-23-90  GJF   Changed to _stdcall. Also, fixed the copyright.
;       05-10-91  GJF   Back to _cdecl, sigh...
;       11-13-92  SRW   Make it fast with unaligned arguments
;
;*******************************************************************************

        .xlist
        include cruntime.inc
        .list

M_EXIT  macro
        mov     eax,[dst]       ; return pointer to destination
ifdef   _STDCALL_
        ret     2*DPSIZE + ISIZE ; _stdcall return
else
        ret                     ; _cdecl return
endif
        endm    ; M_EXIT

        CODESEG

page
;***
;memcpy - Copy source buffer to destination buffer
;
;Purpose:
;       memcpy() copies a source memory buffer to a destination memory buffer.
;       This routine does NOT recognize overlapping buffers, and thus can lead
;       to propogation.
;       For cases where propogation must be avoided, memmove() must be used.
;
;       Algorithm:
;
;       void * memcpy(void * dst, void * src, size_t count)
;       {
;               void * ret = dst;
;
;               /*
;                * copy from lower addresses to higher addresses
;                */
;               while (count--)
;                       *dst++ = *src++;
;
;               return(ret);
;       }
;
;memmove - Copy source buffer to destination buffer
;
;Purpose:
;       memmove() copies a source memory buffer to a destination memory buffer.
;       This routine recognize overlapping buffers to avoid propogation.
;       For cases where propogation is not a problem, memcpy() can be used.
;
;   Algorithm:
;
;       void * memmove(void * dst, void * src, size_t count)
;       {
;               void * ret = dst;
;
;               if (dst <= src || dst >= (src + count)) {
;                       /*
;                        * Non-Overlapping Buffers
;                        * copy from lower addresses to higher addresses
;                        */
;                       while (count--)
;                               *dst++ = *src++;
;                       }
;               else {
;                       /*
;                        * Overlapping Buffers
;                        * copy from higher addresses to lower addresses
;                        */
;                       dst += count - 1;
;                       src += count - 1;
;
;                       while (count--)
;                               *dst-- = *src--;
;                       }
;
;               return(ret);
;       }
;
;
;Entry:
;       void *dst = pointer to destination buffer
;       const void *src = pointer to source buffer
;       size_t count = number of bytes to copy
;
;Exit:
;       Returns a pointer to the destination buffer in AX/DX:AX
;
;Uses:
;       CX, DX
;
;Exceptions:
;*******************************************************************************

        _MEM_     equ <memmove>

%       public  _MEM_
_MEM_   proc \
        uses edi esi, \
        dst:ptr byte, \
        src:ptr byte, \
        count:IWORD

              ; destination pointer
              ; source pointer
              ; number of bytes to copy

        mov     esi,[src]       ; esi = source
        mov     edi,[dst]       ; edi = dest
        mov     ecx,[count]     ; ecx = number of bytes to move

;
; Check for overlapping buffers:
;       If (dst <= src) Or (dst >= src + Count) Then
;               Do normal (Upwards) Copy
;       Else
;               Do Downwards Copy to avoid propagation
;

        cmp     edi,esi         ; dst <= src ?
        jbe     short CopyUp    ; yes, copy toward higher addresses

        mov     eax,esi
        add     eax,ecx
        cmp     edi,eax         ; dst >= (src + count) ?
        jnae    CopyDown        ; no, copy toward lower addresses

;
; Copy toward higher addresses.
;
CopyUp:

;
; The algorithm for forward moves is to align the destination to a dword
; boundary and so we can move dwords with an aligned destination.  This
; occurs in 3 steps.
;
;   - move x = ((4 - Dest & 3) & 3) bytes
;   - move y = ((L-x) >> 2) dwords
;   - move (L - x - y*4) bytes
;
        test    edi,11b          ; destination dword aligned?
        jnz     short byterampup ; if we are not dword aligned already, align

        mov     edx,ecx         ; byte count
        and     edx,11b         ; trailing byte count
        shr     ecx,2           ; shift down to dword count
        rep     movsd           ; move all of our dwords

        jmp     dword ptr TrailingVecs[edx*4]

        align   @WordSize
TrailingVecs    dd      Trail0, Trail1, Trail2, Trail3

        align   @WordSize
Trail3:
        mov     ax,[esi]
        mov     [edi],ax
        mov     al,[esi+2]
        mov     [edi+2],al

        M_EXIT

        align   @WordSize
Trail2:
        mov     ax,[esi]
        mov     [edi],ax

        M_EXIT

        align   @WordSize
Trail1:
        mov     al,[esi]
        mov     [edi],al

Trail0:
        M_EXIT

;
; Code to do optimal memory copies for non-dword-aligned destinations.
;
        align   @WordSize
byterampup:

; The following length check is done for two reasons:
;
;    1. to ensure that the actual move length is greater than any possiale
;       alignment move, and
;
;    2. to skip the multiple move logic for small moves where it would
;       be faster to move the bytes with one instruction.
;
; Leading bytes could be handled faster via split-out optimizations and
; a jump table (as trailing bytes are), at the cost of size.
;
; At this point, ECX is the # of bytes to copy, and EDX is the # of leading
; bytes to copy.
;
        cmp     ecx,12                  ; check for reasonable length
        jbe     short ShortMove         ; do short move if appropriate
        mov     edx,edi
        neg     edx
        and     edx,11b                 ; # of leading bytes
        sub     ecx,edx                 ; subtract out leading bytes
        mov     eax,ecx                 ; # of bytes remaining after leading
        mov     ecx,edx                 ; # of leading bytes
        rep     movsb                   ; copy leading bytes
        mov     ecx,eax                 ; compute number of dwords to move
        and     eax,11b                 ; # of trailing bytes
        shr     ecx,2                   ; # of whole dwords
        rep     movsd                   ; move whole dwords
        jmp     dword ptr TrailingVecs[eax*4] ; copy trailing bytes

;
; Simple copy, byte at a time. This could be faster with a jump table and
; split-out optimizations, copying as much as possible a dword/word at a
; time and using MOV with displacements, but such short cases are unlikely
; to be called often (it seems silly to call a function to copy less than
; three dwords).
;
        align   @WordSize
ShortMove:
        rep movsb

        M_EXIT

;
; Copy down to avoid propogation in overlapping buffers.
;
        align   @WordSize
CopyDown:
        std                     ; Set Direction Flag = Down
        add     esi,ecx         ; point to byte after end of source buffer
        add     edi,ecx         ; point to byte after end of dest buffer
;
; See if the destination start is dword aligned
;

        test    edi,11b
        jnz     short byterampup_copydown       ; not dword aligned
;
; Destination start is dword aligned
;
        mov     edx,ecx         ; set aside count of bytes to copy
        and     edx,11b         ; # of trailing bytes to copy
        sub     esi,4           ; point to start of first dword to copy
        sub     edi,4           ; point to start of first dword to copy to
        shr     ecx,2           ; dwords to copy
        rep     movsd           ; copy as many dwords as possible
        jmp     dword ptr TrailingVecs_copydown[edx*4] ;do any trailing bytes

        align   @WordSize
TrailingVecs_copydown   label   dword
        dd      Trail0_copydown
        dd      Trail1_copydown
        dd      Trail2_copydown
        dd      Trail3_copydown

        align   @WordSize
Trail3_copydown:
        mov     ax,[esi+2]
        mov     [edi+2],ax
        mov     al,[esi+1]
        mov     [edi+1],al
        cld                     ; Set Direction Flag = Up

        M_EXIT

        align   @WordSize
Trail2_copydown:
        mov     ax,[esi+2]
        mov     [edi+2],ax
        cld                     ; Set Direction Flag = Up

        M_EXIT

        align   @WordSize
Trail1_copydown:
        mov     al,[esi+3]
        mov     [edi+3],al
Trail0_copydown:
        cld                     ; Set Direction Flag = Up

        M_EXIT

;
; Destination start is not dword aligned.
;
; Leading bytes could be handled faster via split-out optimizations and
; a jump table (as trailing bytes are), at the cost of size.
;
; At this point, ECX is the # of bytes to copy, and EDX is the # of leading
; bytes to copy.
;
        align   @WordSize
byterampup_copydown:
        dec     esi             ; point to first leading src byte
        dec     edi             ; point to first leading dest byte
        cmp     ecx,12          ; check for reasonable length
        jbe     short ShortMove_copydown ; do short move if appropriate
        neg     edx
        and     edx,11b
        sub     ecx,edx         ; # of bytes after leading bytes
        mov     eax,ecx         ; set aside # of bytes remaining
        mov     ecx,edx         ; # of leading bytes
        rep     movsb           ; copy leading odd bytes
        mov     ecx,eax         ; # of remaining bytes
        and     eax,11b         ; # of trailing bytes
        sub     esi,3           ; point to start of first whole src dword
        sub     edi,3           ; point to start of first whole dest dword
        shr     ecx,2           ; # of whole dwords
        rep     movsd           ; copy whole dwords
        jmp     dword ptr TrailingVecs_copydown[eax*4]

        align   @WordSize
ShortMove_copydown:
        rep     movsb
        cld                     ; Set Direction Flag = Up

        M_EXIT

_MEM_   endp
        end