2020-09-30 16:53:55 +02:00

644 lines
21 KiB
NASM

title "Memory functions"
;++
;
; Copyright (c) 2000 Microsoft Corporation
;
; Module Name:
;
; movemem.asm
;
; Abstract:
;
; This module implements functions to fill, copy , and compare blocks of
; memory.
;
; Author:
;
; David N. Cutler (davec) 6-Jul-2000
;
; Environment:
;
; Any mode.
;
;--
include ksamd64.inc
altentry RtlCopyMemoryAlternate
subttl "Compare Memory"
;++
;
; SIZE_T
; RtlCompareMemory (
; IN PVOID Source1,
; IN PVOID Source2,
; IN SIZE_T Length
; )
;
; Routine Description:
;
; This function compares two unaligned blocks of memory and returns the
; number of bytes that compared equal.
;
; Arguments:
;
; Source1 (rcx) - Supplies a pointer to the first block of memory to
; compare.
;
; Source2 (rdx) - Supplies a pointer to the second block of memory to
; compare.
;
; Length (r8) - Supplies the Length, in bytes, of the memory to be
; compared.
;
; Return Value:
;
; The number of bytes that compared equal is returned as the function
; value. If all bytes compared equal, then the length of the orginal
; block of memory is returned.
;
;--
NESTED_ENTRY RtlCompareMemory, _TEXT$00
push_reg rdi ; save nonvolatile registers
push_reg rsi ;
END_PROLOGUE
mov rsi, rcx ; set address of first string
mov rdi, rdx ; set address of second string
xor edx, ecx ; check if compatible alignment
and edx, 07h ;
jnz short RlCM50 ; if nz, incompatible alignment
cmp r8, 8 ; check if length to align
jb short RlCM50 ; if b, insufficient alignment length
;
; Buffer alignment is compatible and there are enough bytes for alignment.
;
mov r9, rdi ; copy destination address
neg ecx ; compute alignment length
and ecx, 07h ;
jz short RlCM10 ; if z, buffers already aligned
sub r8, rcx ; reduce count by align length
repe cmpsb ; compare bytes to alignment
jnz short RlCM30 ; if nz, not all bytes matched
RlCM10: mov rcx, r8 ;
and rcx, -8 ; check if any quarwords to compare
jz short RlCM20 ; if z, no quadwords to compare
sub r8, rcx ; reduce length by compare count
shr rcx, 3 ; compute number of quadwords
repe cmpsq ; compare quadwords
jz short RlCM20 ; if z, all quadwords compared
inc rcx ; increment remaining count
sub rsi, 8 ; back up source address
sub rdi, 8 ; back up destination address
shl rcx, 3 ; compute uncompared bytes
RlCM20: add r8, rcx ; compute residual bytes to compare
jz short RlCM40 ; if z, all bytes compared equal
mov rcx, r8 ; set remaining bytes to compare
repe cmpsb ; compare bytes
jz short RlCM40 ; if z, all byte compared equal
RlCM30: dec rdi ; back up destination address
RlCM40: sub rdi, r9 ; compute number of bytes matched
mov rax, rdi ;
pop rsi ; restore nonvolatile register
pop rdi ;
ret ; return
;
; Buffer alignment is incompatible or there is less than 8 bytes to compare.
;
RlCM50: test r8, r8 ; test if any bytes to compare
jz short RlCM60 ; if z, no bytes to compare
mov rcx, r8 ; set number of bytes to compare
repe cmpsb ; compare bytes
jz short RlCM60 ; if z, all bytes compared equal
inc rcx ; increment remaining count
sub r8, rcx ; compute number of bytes matched
RlCM60: mov rax, r8 ;
pop rsi ; restore nonvolatile register
pop rdi ;
ret ; return
NESTED_END RtlCompareMemory, _TEXT$00
subttl "Compare Memory 32-bits"
;++
;
; SIZE_T
; RtlCompareMemoryUlong (
; IN PVOID Source,
; IN SIZE_T Length,
; IN ULONG Pattern
; )
;
; Routine Description:
;
; This function compares a block of dword aligned memory with a specified
; pattern 32-bits at a time.
;
; N.B. The low two bits of the length are assumed to be zero and are
; ignored.
;
; Arguments:
;
; Source (rcx) - Supplies a pointer to the block of memory to compare.
;
; Length (rdx) - Supplies the length, in bytes, of the memory to compare. compare.
;
; Pattern (r8d) - Supplies the pattern to be compared against.
;
; Return Value:
;
; The number of bytes that compared equal is returned as the function
; value. If all bytes compared equal, then the length of the orginal
; block of memory is returned.
;
;--
NESTED_ENTRY RtlCompareMemoryUlong, _TEXT$00
push_reg rdi ; save nonvolatile register
END_PROLOGUE
mov rdi, rcx ; set destination address
shr rdx, 2 ; compute number of dwords
jz short RlCU10 ; if z, no dwords to compare
mov rcx, rdx ; set length of compare in dwords
mov eax, r8d ; set comparison pattern
repe scasd ; compare memory with pattern
jz short RlCU10 ; if z, all dwords compared
inc rcx ; increment remaining count
sub rdx, rcx ; compute number of bytes matched
RlCU10: lea rax, [rdx*4] ; compute successful compare in bytes
pop rdi ; restore nonvolatile register
ret ; return
NESTED_END RtlCompareMemoryUlong, _TEXT$00
subttl "Copy Memory"
;++
;
; VOID
; RtlCopyMemory (
; OUT VOID UNALIGNED *Destination,
; IN CONST VOID UNALIGNED * Sources,
; IN SIZE_T Length
; )
;
; Routine Description:
;
; This function copies nonoverlapping from one unaligned buffer to another.
;
; Arguments:
;
; Destination (rcx) - Supplies a pointer to the destination buffer.
;
; Sources (rdx) - Supplies a pointer to the source buffer.
;
; Length (r8) - Supplies the length, in bytes, of the copy operation.
;
; Return Value:
;
; None.
;
;--
NESTED_ENTRY RtlCopyMemory, _TEXT$00
push_reg rdi ; save nonvolatile registers
push_reg rsi ;
END_PROLOGUE
ALTERNATE_ENTRY RtlCopyMemoryAlternate
mov rdi, rcx ; set destination address
mov rsi, rdx ; set source address
;
; Check for quadword alignment compatibility.
;
xor edx, ecx ; check if compatible alignment
and edx, 07h ;
jnz short RlCP40 ; is nz, incompatible alignment
cmp r8, 8 ; check if 8 bytes to move
jb short RlCP20 ; if b, less than 8 bytes to move
;
; Buffer alignment is compatible and there are enough bytes for alignment.
;
neg ecx ; compute alignment length
and ecx, 07h ;
jz short RlCP10 ; if z, buffers already aligned
sub r8, rcx ; reduce count by align length
rep movsb ; move bytes to alignment
;
; Move 8-byte blocks.
;
RlCP10: mov rcx, r8 ; compute number of 8-byte blocks
and rcx, -8 ;
jz short RlCP20 ; if z, no 8-byte blocks
sub r8, rcx ; subtract 8-byte blocks from count
shr rcx, 3 ; compute number of 8-byte blocks
rep movsq ; move 8-byte blocks
;
; Move residual bytes.
;
RlCP20: test r8, r8 ; test if any bytes to move
jz short RlCP30 ; if z, no bytes to move
mov rcx, r8 ; set remaining byte to move
rep movsb ; move bytes to destination
RlCP30: pop rsi ; restore nonvolatile registers
pop rdi ;
ret ; return
;
; The source and destination are not quadword alignment compatible.
;
; Check for doubleword alignment compatibility.
;
RlCP40: and edx, 03h ; check if compatibile alignment
jnz short RlCP60 ; is nz, incompatible alignment
cmp r8, 4 ; check if 4 bytes to move
jb short RlCP20 ; if b, less than 4 bytes to move
;
; Buffer alignment is compatible and there are enough bytes for alignment.
;
neg ecx ; compute alignment length
and ecx, 03h ;
jz short RlCP50 ; if z, buffers already aligned
sub r8, rcx ; reduce count by align length
rep movsb ; move bytes to alignment
;
; Move 4-byte blocks.
;
RlCP50: mov rcx, r8 ; compute number of 4-byte blocks
and rcx, -4 ;
jz short RlCP20 ; if z, no 4-byte blocks
sub r8, rcx ; subtract 4-byte blocks from count
shr rcx, 2 ; compute number of 4-byte blocks
rep movsd ; move 4-byte blocks
jmp short RlCP20 ; finish in common code
;
; The source and destination are not doubleword alignment compatible.
;
; Check for word alignment compatibility.
;
RlCP60: and edx, 01h ; check if compatibile alignment
jnz short RlCP20 ; is nz, incompatible alignment
cmp r8, 2 ; check if 2 bytes to move
jb short RlCP20 ; if b, less than 2 bytes to move
;
; Buffer alignment is compatible and there are enough bytes for alignment.
;
neg ecx ; compute alignment length
and ecx, 01h ;
jz short RlCP70 ; if z, buffers already aligned
sub r8, rcx ; reduce count by align length
rep movsb ; move bytes to alignment
;
; Move 2-byte blocks.
;
RlCP70: mov rcx, r8 ; compute number of 2-byte blocks
and rcx, -2 ;
jz short RlCP20 ; if z, no 2-byte blocks
sub r8, rcx ; subtract 2-byte blocks from count
shr rcx, 1 ; compute number of 2-byte blocks
rep movsw ; move 2-byte blocks
jmp short RlCP20 ; finish in common code
NESTED_END RtlCopyMemory, _TEXT$00
subttl "Copy Memory NonTemporal"
;++
;
; VOID
; RtlCopyMemoryNonTemporal (
; OUT VOID UNALIGNED *Destination,
; IN CONST VOID UNALIGNED * Sources,
; IN SIZE_T Length
; )
;
; Routine Description:
;
; This function copies nonoverlapping from one buffer to another using
; nontemporal moves that do not polute the cache.
;
; Arguments:
;
; Destination (rcx) - Supplies a pointer to the destination buffer.
;
; Sources (rdx) - Supplies a pointer to the source buffer.
;
; Length (r8) - Supplies the length, in bytes, of the copy operation.
;
; Return Value:
;
; None.
;
;--
NESTED_ENTRY RtlCopyMemoryNonTemporal, _TEXT$00
push_reg rdi ; save nonvolatile registers
push_reg rsi ;
END_PROLOGUE
mov rdi, rcx ; set destination address
mov rsi, rdx ; set source address
cmp r8, 16 ; check if 16 bytes to move
jb RlNT50 ; if b, less than 16 bytes to move
;
; Align the destination to a 16-byte boundary.
;
neg ecx ; compute alignment length
and ecx, 0fh ;
jz short RlNT10 ; if z, destination already aligned
sub r8, rcx ; reduce count by align length
rep movsb ; move bytes to alignment
;
; Move 64-byte blocks.
;
RlNT10: mov rax, r8 ; compute number of 64-byte blocks
and rax, -64 ;
jz short RlNT30 ; if z, no 64-byte blocks to move
sub r8, rax ; subtract 64-byte blocks from count
RlNT20: prefetchnta 0[rsi] ; prefetch start of source block
prefetchnta 63[rsi] ; prefetch end source block
movdqu xmm0, [rsi] ; move 64-byte block
movdqu xmm1, 16[rsi] ;
movdqu xmm2, 32[rsi] ;
movdqu xmm3, 48[rsi] ;
movntdq [rdi], xmm0 ;
movntdq 16[rdi], xmm1 ;
movntdq 32[rdi], xmm2 ;
movntdq 48[rdi], xmm3 ;
add rdi, 64 ; advance destination address
add rsi, 64 ; advance source address
sub rax, 64 ; subtract number of bytes moved
jnz short RlNT20 ; if nz, more 64-byte blocks to move
;
; Move 16-byte blocks.
;
RlNT30: mov rax, r8 ; compute number of 16-byte blocks
and rax, -16 ;
jz short RlNT50 ; if z, no 16-byte blocks
sub r8, rax ; subract 16-byte blocks from count
RlNT40: movdqu xmm0, [rsi] ; move 16-byte block
movntdq [rdi], xmm0 ;
add rdi, 16 ; advance destination address
add rsi, 16 ; advance source address
sub rax, 16 ; subtract number of bytes moved
jnz short RlNT40 ; if nz, more 16-byte blocks to move
;
; Move residual bytes.
;
RlNT50: test r8, r8 ; test if any bytes to move
jz short RlNT60 ; if z, no bytes to move
mov rcx, r8 ; set residual bytes to move
rep movsb ; move residual bytes
RlNT60: sfence ; make sure all stores complete
pop rsi ; restore nonvolatile registers
pop rdi ;
ret ; return
NESTED_END RtlCopyMemoryNonTemporal, _TEXT$00
subttl "Fill Memory"
;++
;
; VOID
; RtlFillMemory (
; IN VOID UNALIGNED *Destination,
; IN SIZE_T Length,
; IN UCHAR Fill
; )
;
; Routine Description:
;
; This function fills a block of unaligned memory with a specified pattern.
;
; Arguments:
;
; Destination (rcx) - Supplies a pointer to the memory to fill.
;
; Length (rdx) - Supplies the length, in bytes, of the memory to fill.
;
; Fill (r8d) - Supplies the value to fill memory with.
;
; Return Value:
;
; None.
;
;--
NESTED_ENTRY RtlFillMemory, _TEXT$00
push_reg rdi ; save nonvolatile register
END_PROLOGUE
mov rdi, rcx ; set destination address
mov eax, r8d ; set fill pattern
cmp rdx, 8 ; check if 8 bytes to fill
jb short RlFM20 ; if b, less than 8 bytes to fill
;
; Fill alignment bytes.
;
neg ecx ; compute alignment length
and ecx, 07h ;
jz short RlFM10 ; if z, buffers already aligned
sub rdx, rcx ; reduce count by align length
rep stosb ; fill bytes to alignment
;
; Fill 8-byte blocks.
;
RlFM10: mov rcx, rdx ; compute number of 8-byte blocks
and rcx, -8 ;
jz short RlFM20 ; if z, no 8-byte blocks
sub rdx, rcx ; subtract 8-byte blocks from count
shr rcx, 3 ; compute number of 8-byte blocks
mov ah, al ; replicate pattern to dword
shl eax, 16 ;
mov al, r8b ;
mov ah, al ;
mov r9, rax ;
shl rax, 32 ;
or rax, r9 ;
rep stosq ; fill 8-byte blocks
;
; Fill residual bytes.
;
RlFM20: test rdx, rdx ; test if any bytes to fill
jz short RlFM30 ; if z, no bytes to fill
mov rcx, rdx ; set remaining byte to fill
rep stosb ; fill residual bytes
RlFM30: pop rdi ; restore nonvolatile register
ret ; return
NESTED_END RtlFillMemory, _TEXT$00
subttl "Move Memory"
;++
;
; VOID
; RtlMoveMemory (
; OUT VOID UNALIGNED *Destination,
; IN CONST VOID UNALIGNED * Sources,
; IN SIZE_T Length
; )
;
; Routine Description:
;
; This function copies from one unaligned buffer to another.
;
; Arguments:
;
; Destination (rcx) - Supplies a pointer to the destination buffer.
;
; Sources (rdx) - Supplies a pointer to the source buffer.
;
; Length (r8) - Supplies the length, in bytes, of the copy operation.
;
; Return Value:
;
; None.
;
;--
NESTED_ENTRY RtlMoveMemory, _TEXT$00
push_reg rdi ; save nonvolatile registers
push_reg rsi ;
END_PROLOGUE
cmp rcx, rdx ; check if possible buffer overlap
jbe RtlCopyMemoryAlternate ; if be, no overlap possible
mov rsi, rdx ; compute ending source address
add rsi, r8 ;
dec rsi ;
cmp rcx, rsi ; check for buffer overlap
ja RtlCopyMemoryAlternate ; if g, no overlap possible
mov rdi, rcx ; compute ending destination address
add rdi, r8 ;
dec rdi ;
mov rcx, r8 ; set count of bytes to move
std ; set direction flag
rep movsb ; move bytes backward to destination
cld ; clear direction flag
pop rsi ; restore nonvolatile registers
pop rdi ;
ret ; return
NESTED_END RtlMoveMemory, _TEXT$00
subttl "Prefetch Memory NonTemporal"
;++
;
; VOID
; RtlPrefetchMemoryNonTemporal (
; IN CONST PVOID Source,
; IN SIZE_T Length
; )
;
; Routine Description:
;
; This function prefetches memory at Source, for Length bytes into the
; closest cache to the processor.
;
; Arguments:
;
; Source (rcx) - Supplies a pointer to the memory to be prefetched.
;
; Length (rdx) - Supplies the length, in bytes, of the operation.
;
; Return Value:
;
; None.
;
;--
LEAF_ENTRY RtlPrefetchMemoryNonTemporal, _TEXT$00
RlPF10: prefetchnta 0[rcx] ; prefetch line
add rcx, 64 ; increment address to prefetch
sub rdx, 64 ; subtract number of bytes prefetched
ja RlPF10 ; if above zero, more bytes to move
ret ; return
LEAF_END RtlPrefetchMemoryNonTemporal, _TEXT$00
subttl "Zero Memory"
;++
;
; VOID
; RtlZeroMemory (
; IN VOID UNALIGNED *Destination,
; IN SIZE_T Length
; )
;
; Routine Description:
;
; This function fills a block of unaligned memory with zero.
;
; Arguments:
;
; Destination (rcx) - Supplies a pointer to the memory to fill.
;
; Length (rdx) - Supplies the length, in bytes, of the memory to fill.
;
; Return Value:
;
; None.
;
;--
LEAF_ENTRY RtlZeroMemory, _TEXT$00
xor r8, r8 ; set fill pattern
jmp RtlFillMemory ; finish in common code
LEAF_END RtlZeroMemory, _TEXT$00
end