644 lines
21 KiB
NASM
644 lines
21 KiB
NASM
title "Memory functions"
|
|
;++
|
|
;
|
|
; Copyright (c) 2000 Microsoft Corporation
|
|
;
|
|
; Module Name:
|
|
;
|
|
; movemem.asm
|
|
;
|
|
; Abstract:
|
|
;
|
|
; This module implements functions to fill, copy , and compare blocks of
|
|
; memory.
|
|
;
|
|
; Author:
|
|
;
|
|
; David N. Cutler (davec) 6-Jul-2000
|
|
;
|
|
; Environment:
|
|
;
|
|
; Any mode.
|
|
;
|
|
;--
|
|
|
|
include ksamd64.inc
|
|
|
|
altentry RtlCopyMemoryAlternate
|
|
|
|
subttl "Compare Memory"
|
|
;++
|
|
;
|
|
; SIZE_T
|
|
; RtlCompareMemory (
|
|
; IN PVOID Source1,
|
|
; IN PVOID Source2,
|
|
; IN SIZE_T Length
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function compares two unaligned blocks of memory and returns the
|
|
; number of bytes that compared equal.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Source1 (rcx) - Supplies a pointer to the first block of memory to
|
|
; compare.
|
|
;
|
|
; Source2 (rdx) - Supplies a pointer to the second block of memory to
|
|
; compare.
|
|
;
|
|
; Length (r8) - Supplies the Length, in bytes, of the memory to be
|
|
; compared.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; The number of bytes that compared equal is returned as the function
|
|
; value. If all bytes compared equal, then the length of the orginal
|
|
; block of memory is returned.
|
|
;
|
|
;--
|
|
|
|
NESTED_ENTRY RtlCompareMemory, _TEXT$00
|
|
|
|
push_reg rdi ; save nonvolatile registers
|
|
push_reg rsi ;
|
|
|
|
END_PROLOGUE
|
|
|
|
mov rsi, rcx ; set address of first string
|
|
mov rdi, rdx ; set address of second string
|
|
xor edx, ecx ; check if compatible alignment
|
|
and edx, 07h ;
|
|
jnz short RlCM50 ; if nz, incompatible alignment
|
|
cmp r8, 8 ; check if length to align
|
|
jb short RlCM50 ; if b, insufficient alignment length
|
|
|
|
;
|
|
; Buffer alignment is compatible and there are enough bytes for alignment.
|
|
;
|
|
|
|
mov r9, rdi ; copy destination address
|
|
neg ecx ; compute alignment length
|
|
and ecx, 07h ;
|
|
jz short RlCM10 ; if z, buffers already aligned
|
|
sub r8, rcx ; reduce count by align length
|
|
repe cmpsb ; compare bytes to alignment
|
|
jnz short RlCM30 ; if nz, not all bytes matched
|
|
RlCM10: mov rcx, r8 ;
|
|
and rcx, -8 ; check if any quarwords to compare
|
|
jz short RlCM20 ; if z, no quadwords to compare
|
|
sub r8, rcx ; reduce length by compare count
|
|
shr rcx, 3 ; compute number of quadwords
|
|
repe cmpsq ; compare quadwords
|
|
jz short RlCM20 ; if z, all quadwords compared
|
|
inc rcx ; increment remaining count
|
|
sub rsi, 8 ; back up source address
|
|
sub rdi, 8 ; back up destination address
|
|
shl rcx, 3 ; compute uncompared bytes
|
|
RlCM20: add r8, rcx ; compute residual bytes to compare
|
|
jz short RlCM40 ; if z, all bytes compared equal
|
|
mov rcx, r8 ; set remaining bytes to compare
|
|
repe cmpsb ; compare bytes
|
|
jz short RlCM40 ; if z, all byte compared equal
|
|
RlCM30: dec rdi ; back up destination address
|
|
RlCM40: sub rdi, r9 ; compute number of bytes matched
|
|
mov rax, rdi ;
|
|
pop rsi ; restore nonvolatile register
|
|
pop rdi ;
|
|
ret ; return
|
|
|
|
;
|
|
; Buffer alignment is incompatible or there is less than 8 bytes to compare.
|
|
;
|
|
|
|
RlCM50: test r8, r8 ; test if any bytes to compare
|
|
jz short RlCM60 ; if z, no bytes to compare
|
|
mov rcx, r8 ; set number of bytes to compare
|
|
repe cmpsb ; compare bytes
|
|
jz short RlCM60 ; if z, all bytes compared equal
|
|
inc rcx ; increment remaining count
|
|
sub r8, rcx ; compute number of bytes matched
|
|
RlCM60: mov rax, r8 ;
|
|
pop rsi ; restore nonvolatile register
|
|
pop rdi ;
|
|
ret ; return
|
|
|
|
NESTED_END RtlCompareMemory, _TEXT$00
|
|
|
|
subttl "Compare Memory 32-bits"
|
|
;++
|
|
;
|
|
; SIZE_T
|
|
; RtlCompareMemoryUlong (
|
|
; IN PVOID Source,
|
|
; IN SIZE_T Length,
|
|
; IN ULONG Pattern
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function compares a block of dword aligned memory with a specified
|
|
; pattern 32-bits at a time.
|
|
;
|
|
; N.B. The low two bits of the length are assumed to be zero and are
|
|
; ignored.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Source (rcx) - Supplies a pointer to the block of memory to compare.
|
|
;
|
|
; Length (rdx) - Supplies the length, in bytes, of the memory to compare. compare.
|
|
;
|
|
; Pattern (r8d) - Supplies the pattern to be compared against.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; The number of bytes that compared equal is returned as the function
|
|
; value. If all bytes compared equal, then the length of the orginal
|
|
; block of memory is returned.
|
|
;
|
|
;--
|
|
|
|
NESTED_ENTRY RtlCompareMemoryUlong, _TEXT$00
|
|
|
|
push_reg rdi ; save nonvolatile register
|
|
|
|
END_PROLOGUE
|
|
|
|
mov rdi, rcx ; set destination address
|
|
shr rdx, 2 ; compute number of dwords
|
|
jz short RlCU10 ; if z, no dwords to compare
|
|
mov rcx, rdx ; set length of compare in dwords
|
|
mov eax, r8d ; set comparison pattern
|
|
repe scasd ; compare memory with pattern
|
|
jz short RlCU10 ; if z, all dwords compared
|
|
inc rcx ; increment remaining count
|
|
sub rdx, rcx ; compute number of bytes matched
|
|
RlCU10: lea rax, [rdx*4] ; compute successful compare in bytes
|
|
pop rdi ; restore nonvolatile register
|
|
ret ; return
|
|
|
|
NESTED_END RtlCompareMemoryUlong, _TEXT$00
|
|
|
|
subttl "Copy Memory"
|
|
;++
|
|
;
|
|
; VOID
|
|
; RtlCopyMemory (
|
|
; OUT VOID UNALIGNED *Destination,
|
|
; IN CONST VOID UNALIGNED * Sources,
|
|
; IN SIZE_T Length
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function copies nonoverlapping from one unaligned buffer to another.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Destination (rcx) - Supplies a pointer to the destination buffer.
|
|
;
|
|
; Sources (rdx) - Supplies a pointer to the source buffer.
|
|
;
|
|
; Length (r8) - Supplies the length, in bytes, of the copy operation.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
NESTED_ENTRY RtlCopyMemory, _TEXT$00
|
|
|
|
push_reg rdi ; save nonvolatile registers
|
|
push_reg rsi ;
|
|
|
|
END_PROLOGUE
|
|
|
|
ALTERNATE_ENTRY RtlCopyMemoryAlternate
|
|
|
|
mov rdi, rcx ; set destination address
|
|
mov rsi, rdx ; set source address
|
|
|
|
;
|
|
; Check for quadword alignment compatibility.
|
|
;
|
|
|
|
xor edx, ecx ; check if compatible alignment
|
|
and edx, 07h ;
|
|
jnz short RlCP40 ; is nz, incompatible alignment
|
|
cmp r8, 8 ; check if 8 bytes to move
|
|
jb short RlCP20 ; if b, less than 8 bytes to move
|
|
|
|
;
|
|
; Buffer alignment is compatible and there are enough bytes for alignment.
|
|
;
|
|
|
|
neg ecx ; compute alignment length
|
|
and ecx, 07h ;
|
|
jz short RlCP10 ; if z, buffers already aligned
|
|
sub r8, rcx ; reduce count by align length
|
|
rep movsb ; move bytes to alignment
|
|
|
|
;
|
|
; Move 8-byte blocks.
|
|
;
|
|
|
|
RlCP10: mov rcx, r8 ; compute number of 8-byte blocks
|
|
and rcx, -8 ;
|
|
jz short RlCP20 ; if z, no 8-byte blocks
|
|
sub r8, rcx ; subtract 8-byte blocks from count
|
|
shr rcx, 3 ; compute number of 8-byte blocks
|
|
rep movsq ; move 8-byte blocks
|
|
|
|
;
|
|
; Move residual bytes.
|
|
;
|
|
|
|
RlCP20: test r8, r8 ; test if any bytes to move
|
|
jz short RlCP30 ; if z, no bytes to move
|
|
mov rcx, r8 ; set remaining byte to move
|
|
rep movsb ; move bytes to destination
|
|
RlCP30: pop rsi ; restore nonvolatile registers
|
|
pop rdi ;
|
|
ret ; return
|
|
|
|
;
|
|
; The source and destination are not quadword alignment compatible.
|
|
;
|
|
; Check for doubleword alignment compatibility.
|
|
;
|
|
|
|
RlCP40: and edx, 03h ; check if compatibile alignment
|
|
jnz short RlCP60 ; is nz, incompatible alignment
|
|
cmp r8, 4 ; check if 4 bytes to move
|
|
jb short RlCP20 ; if b, less than 4 bytes to move
|
|
|
|
;
|
|
; Buffer alignment is compatible and there are enough bytes for alignment.
|
|
;
|
|
|
|
neg ecx ; compute alignment length
|
|
and ecx, 03h ;
|
|
jz short RlCP50 ; if z, buffers already aligned
|
|
sub r8, rcx ; reduce count by align length
|
|
rep movsb ; move bytes to alignment
|
|
|
|
;
|
|
; Move 4-byte blocks.
|
|
;
|
|
|
|
RlCP50: mov rcx, r8 ; compute number of 4-byte blocks
|
|
and rcx, -4 ;
|
|
jz short RlCP20 ; if z, no 4-byte blocks
|
|
sub r8, rcx ; subtract 4-byte blocks from count
|
|
shr rcx, 2 ; compute number of 4-byte blocks
|
|
rep movsd ; move 4-byte blocks
|
|
jmp short RlCP20 ; finish in common code
|
|
|
|
;
|
|
; The source and destination are not doubleword alignment compatible.
|
|
;
|
|
; Check for word alignment compatibility.
|
|
;
|
|
|
|
RlCP60: and edx, 01h ; check if compatibile alignment
|
|
jnz short RlCP20 ; is nz, incompatible alignment
|
|
cmp r8, 2 ; check if 2 bytes to move
|
|
jb short RlCP20 ; if b, less than 2 bytes to move
|
|
|
|
;
|
|
; Buffer alignment is compatible and there are enough bytes for alignment.
|
|
;
|
|
|
|
neg ecx ; compute alignment length
|
|
and ecx, 01h ;
|
|
jz short RlCP70 ; if z, buffers already aligned
|
|
sub r8, rcx ; reduce count by align length
|
|
rep movsb ; move bytes to alignment
|
|
|
|
;
|
|
; Move 2-byte blocks.
|
|
;
|
|
|
|
RlCP70: mov rcx, r8 ; compute number of 2-byte blocks
|
|
and rcx, -2 ;
|
|
jz short RlCP20 ; if z, no 2-byte blocks
|
|
sub r8, rcx ; subtract 2-byte blocks from count
|
|
shr rcx, 1 ; compute number of 2-byte blocks
|
|
rep movsw ; move 2-byte blocks
|
|
jmp short RlCP20 ; finish in common code
|
|
|
|
NESTED_END RtlCopyMemory, _TEXT$00
|
|
|
|
subttl "Copy Memory NonTemporal"
|
|
;++
|
|
;
|
|
; VOID
|
|
; RtlCopyMemoryNonTemporal (
|
|
; OUT VOID UNALIGNED *Destination,
|
|
; IN CONST VOID UNALIGNED * Sources,
|
|
; IN SIZE_T Length
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function copies nonoverlapping from one buffer to another using
|
|
; nontemporal moves that do not polute the cache.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Destination (rcx) - Supplies a pointer to the destination buffer.
|
|
;
|
|
; Sources (rdx) - Supplies a pointer to the source buffer.
|
|
;
|
|
; Length (r8) - Supplies the length, in bytes, of the copy operation.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
NESTED_ENTRY RtlCopyMemoryNonTemporal, _TEXT$00
|
|
|
|
push_reg rdi ; save nonvolatile registers
|
|
push_reg rsi ;
|
|
|
|
END_PROLOGUE
|
|
|
|
mov rdi, rcx ; set destination address
|
|
mov rsi, rdx ; set source address
|
|
cmp r8, 16 ; check if 16 bytes to move
|
|
jb RlNT50 ; if b, less than 16 bytes to move
|
|
|
|
;
|
|
; Align the destination to a 16-byte boundary.
|
|
;
|
|
|
|
neg ecx ; compute alignment length
|
|
and ecx, 0fh ;
|
|
jz short RlNT10 ; if z, destination already aligned
|
|
sub r8, rcx ; reduce count by align length
|
|
rep movsb ; move bytes to alignment
|
|
|
|
;
|
|
; Move 64-byte blocks.
|
|
;
|
|
|
|
RlNT10: mov rax, r8 ; compute number of 64-byte blocks
|
|
and rax, -64 ;
|
|
jz short RlNT30 ; if z, no 64-byte blocks to move
|
|
sub r8, rax ; subtract 64-byte blocks from count
|
|
RlNT20: prefetchnta 0[rsi] ; prefetch start of source block
|
|
prefetchnta 63[rsi] ; prefetch end source block
|
|
movdqu xmm0, [rsi] ; move 64-byte block
|
|
movdqu xmm1, 16[rsi] ;
|
|
movdqu xmm2, 32[rsi] ;
|
|
movdqu xmm3, 48[rsi] ;
|
|
movntdq [rdi], xmm0 ;
|
|
movntdq 16[rdi], xmm1 ;
|
|
movntdq 32[rdi], xmm2 ;
|
|
movntdq 48[rdi], xmm3 ;
|
|
add rdi, 64 ; advance destination address
|
|
add rsi, 64 ; advance source address
|
|
sub rax, 64 ; subtract number of bytes moved
|
|
jnz short RlNT20 ; if nz, more 64-byte blocks to move
|
|
|
|
;
|
|
; Move 16-byte blocks.
|
|
;
|
|
|
|
RlNT30: mov rax, r8 ; compute number of 16-byte blocks
|
|
and rax, -16 ;
|
|
jz short RlNT50 ; if z, no 16-byte blocks
|
|
sub r8, rax ; subract 16-byte blocks from count
|
|
RlNT40: movdqu xmm0, [rsi] ; move 16-byte block
|
|
movntdq [rdi], xmm0 ;
|
|
add rdi, 16 ; advance destination address
|
|
add rsi, 16 ; advance source address
|
|
sub rax, 16 ; subtract number of bytes moved
|
|
jnz short RlNT40 ; if nz, more 16-byte blocks to move
|
|
|
|
;
|
|
; Move residual bytes.
|
|
;
|
|
|
|
RlNT50: test r8, r8 ; test if any bytes to move
|
|
jz short RlNT60 ; if z, no bytes to move
|
|
mov rcx, r8 ; set residual bytes to move
|
|
rep movsb ; move residual bytes
|
|
RlNT60: sfence ; make sure all stores complete
|
|
pop rsi ; restore nonvolatile registers
|
|
pop rdi ;
|
|
ret ; return
|
|
|
|
NESTED_END RtlCopyMemoryNonTemporal, _TEXT$00
|
|
|
|
subttl "Fill Memory"
|
|
;++
|
|
;
|
|
; VOID
|
|
; RtlFillMemory (
|
|
; IN VOID UNALIGNED *Destination,
|
|
; IN SIZE_T Length,
|
|
; IN UCHAR Fill
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function fills a block of unaligned memory with a specified pattern.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Destination (rcx) - Supplies a pointer to the memory to fill.
|
|
;
|
|
; Length (rdx) - Supplies the length, in bytes, of the memory to fill.
|
|
;
|
|
; Fill (r8d) - Supplies the value to fill memory with.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
NESTED_ENTRY RtlFillMemory, _TEXT$00
|
|
|
|
push_reg rdi ; save nonvolatile register
|
|
|
|
END_PROLOGUE
|
|
|
|
mov rdi, rcx ; set destination address
|
|
mov eax, r8d ; set fill pattern
|
|
cmp rdx, 8 ; check if 8 bytes to fill
|
|
jb short RlFM20 ; if b, less than 8 bytes to fill
|
|
|
|
;
|
|
; Fill alignment bytes.
|
|
;
|
|
|
|
neg ecx ; compute alignment length
|
|
and ecx, 07h ;
|
|
jz short RlFM10 ; if z, buffers already aligned
|
|
sub rdx, rcx ; reduce count by align length
|
|
rep stosb ; fill bytes to alignment
|
|
|
|
;
|
|
; Fill 8-byte blocks.
|
|
;
|
|
|
|
RlFM10: mov rcx, rdx ; compute number of 8-byte blocks
|
|
and rcx, -8 ;
|
|
jz short RlFM20 ; if z, no 8-byte blocks
|
|
sub rdx, rcx ; subtract 8-byte blocks from count
|
|
shr rcx, 3 ; compute number of 8-byte blocks
|
|
mov ah, al ; replicate pattern to dword
|
|
shl eax, 16 ;
|
|
mov al, r8b ;
|
|
mov ah, al ;
|
|
mov r9, rax ;
|
|
shl rax, 32 ;
|
|
or rax, r9 ;
|
|
rep stosq ; fill 8-byte blocks
|
|
|
|
;
|
|
; Fill residual bytes.
|
|
;
|
|
|
|
RlFM20: test rdx, rdx ; test if any bytes to fill
|
|
jz short RlFM30 ; if z, no bytes to fill
|
|
mov rcx, rdx ; set remaining byte to fill
|
|
rep stosb ; fill residual bytes
|
|
RlFM30: pop rdi ; restore nonvolatile register
|
|
ret ; return
|
|
|
|
NESTED_END RtlFillMemory, _TEXT$00
|
|
|
|
subttl "Move Memory"
|
|
;++
|
|
;
|
|
; VOID
|
|
; RtlMoveMemory (
|
|
; OUT VOID UNALIGNED *Destination,
|
|
; IN CONST VOID UNALIGNED * Sources,
|
|
; IN SIZE_T Length
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function copies from one unaligned buffer to another.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Destination (rcx) - Supplies a pointer to the destination buffer.
|
|
;
|
|
; Sources (rdx) - Supplies a pointer to the source buffer.
|
|
;
|
|
; Length (r8) - Supplies the length, in bytes, of the copy operation.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
NESTED_ENTRY RtlMoveMemory, _TEXT$00
|
|
|
|
push_reg rdi ; save nonvolatile registers
|
|
push_reg rsi ;
|
|
|
|
END_PROLOGUE
|
|
|
|
cmp rcx, rdx ; check if possible buffer overlap
|
|
jbe RtlCopyMemoryAlternate ; if be, no overlap possible
|
|
mov rsi, rdx ; compute ending source address
|
|
add rsi, r8 ;
|
|
dec rsi ;
|
|
cmp rcx, rsi ; check for buffer overlap
|
|
ja RtlCopyMemoryAlternate ; if g, no overlap possible
|
|
mov rdi, rcx ; compute ending destination address
|
|
add rdi, r8 ;
|
|
dec rdi ;
|
|
mov rcx, r8 ; set count of bytes to move
|
|
std ; set direction flag
|
|
rep movsb ; move bytes backward to destination
|
|
cld ; clear direction flag
|
|
pop rsi ; restore nonvolatile registers
|
|
pop rdi ;
|
|
ret ; return
|
|
|
|
NESTED_END RtlMoveMemory, _TEXT$00
|
|
|
|
subttl "Prefetch Memory NonTemporal"
|
|
;++
|
|
;
|
|
; VOID
|
|
; RtlPrefetchMemoryNonTemporal (
|
|
; IN CONST PVOID Source,
|
|
; IN SIZE_T Length
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function prefetches memory at Source, for Length bytes into the
|
|
; closest cache to the processor.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Source (rcx) - Supplies a pointer to the memory to be prefetched.
|
|
;
|
|
; Length (rdx) - Supplies the length, in bytes, of the operation.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
LEAF_ENTRY RtlPrefetchMemoryNonTemporal, _TEXT$00
|
|
|
|
RlPF10: prefetchnta 0[rcx] ; prefetch line
|
|
add rcx, 64 ; increment address to prefetch
|
|
sub rdx, 64 ; subtract number of bytes prefetched
|
|
ja RlPF10 ; if above zero, more bytes to move
|
|
ret ; return
|
|
|
|
LEAF_END RtlPrefetchMemoryNonTemporal, _TEXT$00
|
|
|
|
subttl "Zero Memory"
|
|
;++
|
|
;
|
|
; VOID
|
|
; RtlZeroMemory (
|
|
; IN VOID UNALIGNED *Destination,
|
|
; IN SIZE_T Length
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function fills a block of unaligned memory with zero.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Destination (rcx) - Supplies a pointer to the memory to fill.
|
|
;
|
|
; Length (rdx) - Supplies the length, in bytes, of the memory to fill.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
LEAF_ENTRY RtlZeroMemory, _TEXT$00
|
|
|
|
xor r8, r8 ; set fill pattern
|
|
jmp RtlFillMemory ; finish in common code
|
|
|
|
LEAF_END RtlZeroMemory, _TEXT$00
|
|
|
|
end
|