Windows2000/private/ntos/rtl/ppc/movemem.s
2020-09-30 17:12:32 +02:00

2195 lines
108 KiB
ArmAsm
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// TITLE("Compare, Move, Zero, and Fill Memory Support")
//++
//
// Copyright (c) 1993 IBM Corporation
//
// Module Name:
//
// mvmem.s
//
// Abstract:
//
// This module implements functions to compare, move, zero, and fill
// blocks of memory. If the memory is aligned, then these functions
// are very efficient.
//
// N.B. These routines MUST preserve all floating state since they are
// frequently called from interrupt service routines that normally
// do not save or restore floating state.
//
// Author:
//
// Curt Fawcett (crf) 10-Aug-1993
//
// Environment:
//
// User or Kernel mode.
//
// Revision History:
//
// Curt Fawcett 11-Jan-1994 Removed register definitions
// and fixed for new assembler
//
//--
#include <ksppc.h>
//
// Define local constants
//
.set BLKLN,32
//
//--
//++
//
// ULONG
// RtlCompareMemory (
// IN PVOID Source1,
// IN PVOID Source2,
// IN ULONG Length
// )
//
// Routine Description:
//
// This function compares two blocks of memory and returns the number
// of bytes that compared equal.
//
// Arguments:
//
// SRC1 (r.3) - Supplies a pointer to the first block of memory to
// compare.
//
// SRC2 (r.4) - Supplies a pointer to the second block of memory to
// compare.
//
// LNGTH (r.5) - Supplies the length, in bytes, of the memory to be
// compared.
//
// Return Value:
//
// The number of bytes that compared equal is returned as the function
// value. If all bytes compared equal, then the length of the orginal
// block of memory is returned.
//
//--
//
// Define the routine entry point
LEAF_ENTRY(RtlCompareMemory)
//
// Compare Memory
//
// Check alignment
//
or. r.6,r.5,r.5 // Check for zero length
mr r.12,r.5 // Save original length
beq GetResults2 // Jump if zero length
cmpwi r.5,4 // Check for less than 4 bytes
add r.11,r.3,r.5 // Get ending SRC1 address
xor r.9,r.3,r.4 // Check for same alignment
blt- CompareByByte // Jump if single byte compares
andi. r.9,r.9,3 // Isolate alignment bits
bne- CompUnaligned // Jump if different alignments
//
// Compare Memory - Same SRC1 and SRC2 alignment
//
// Compare extra bytes until a word boundary is reached
//
CompAligned:
andi. r.6,r.4,3 // Check alignment type
beq+ CompBlkDiv // Jump to process 32-Byte blocks
cmpwi r.6,3 // Check for 1 byte unaligned
lbz r.7,0(r.3) // Get unaligned byte
lbz r.8,0(r.4) // Get unaligned byte
bne+ Comp2 // If not, check next case
li r.6,1 // Set byte move count
b UpdateCompAddrs // Jump to update addresses
Comp2:
cmpwi r.6,2 // Check for halfword aligned
li r.6,2 // Set byte move count
bne+ Comp3 // If not, check next case
lhz r.7,0(r.3) // Get unaligned halfword
lhz r.8,0(r.4) // Get unaligned halfword
b UpdateCompAddrs // Jump to update addresses
Comp3:
cmpw r.7,r.8 // Check for 1st word equal
lhz r.7,1(r.3) // Get unaligned halfword
lhz r.8,1(r.4) // Get unaligned halfword
li r.6,3 // Set byte move count
bne Wrd1ne // Jump if 1st word not equal
UpdateCompAddrs:
cmpw r.7,r.8 // Check for 1st word equal
sub r.5,r.5,r.6 // Decrement LNGTH by unaligned
bne Wrd1ne // Jump if 1st word not equal
add r.3,r.3,r.6 // Update the SRC1 address
add r.4,r.4,r.6 // Update the SRC2 address
//
// Divide the block to process into 32-byte blocks
//
CompBlkDiv:
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
sub. r.7,r.5,r.6 // Get full block count
add r.10,r.3,r.7 // Get address of last full block
beq- CompareBy4Bytes // Jump if no full blocks
mr r.5,r.6 // Set Length = remainder
//
// Compare 32-byte blocks
//
CompFullBlks:
lwz r.6,0(r.3) // Get 1st SRC1 word
lwz r.7,0(r.4) // Get 1st SRC2 word
lwz r.8,4(r.3) // Get 2nd SRC1 word
cmpw r.6,r.7 // Check for 1st word equal
lwz r.9,4(r.4) // Get 2nd SRC2 word
bne- Wrd1ne // Jump if 1st word not equal
cmpw r.8,r.9 // Check for 2nd word equal
lwz r.6,8(r.3) // Get 3rd SRC1 word
lwz r.7,8(r.4) // Get 3rd SRC2 word
bne- Wrd2ne // Jump if 2nd word not equal
cmpw r.6,r.7 // Check for 3rd word equal
lwz r.8,12(r.3) // Get 4th SRC1 word
lwz r.9,12(r.4) // Get 4th SRC2 word
bne- Wrd3ne // Jump if 3rd word not equal
cmpw r.8,r.9 // Check for 4th word equal
lwz r.6,16(r.3) // Get 5th SRC1 word
lwz r.7,16(r.4) // Get 5th SRC2 word
bne- Wrd4ne // Jump if 4th word not equal
cmpw r.6,r.7 // Check for 5th word equal
lwz r.8,20(r.3) // Get 6th SRC1 word
lwz r.9,20(r.4) // Get 6th SRC2 word
bne- Wrd5ne // Jump if 5th word not equal
cmpw r.8,r.9 // Check for 6th word equal
lwz r.6,24(r.3) // Get 7th SRC1 word
lwz r.7,24(r.4) // Get 7th SRC2 word
bne- Wrd6ne // Jump if 6th word not equal
cmpw r.6,r.7 // Check for 7th word equal
lwz r.8,28(r.3) // Get 8th SRC1 word
lwz r.9,28(r.4) // Get 8th SRC2 word
bne- Wrd7ne // Jump if 7th word not equal
cmpw r.8,r.9 // Check for 8th word equal
bne- Wrd8ne // Jump if 8th word not equal
addi r.3,r.3,32 // Update SRC1 pointer
cmpw r.3,r.10 // Check for all blocks done
addi r.4,r.4,32 // Update SRC2 pointer
bne+ CompFullBlks // Jump if more blocks
//
// Compare 4-byte blocks
//
CompareBy4Bytes:
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
sub. r.7,r.5,r.6 // Get 4-byte block count
add r.10,r.3,r.7 // Get address of last full block
beq- CompareByByte // Jump if no 4-byte blocks
mr r.5,r.6 // Set Length = remainder
CompLpOn4Bytes:
lwz r.6,0(r.3) // Get 1st SRC1 word
lwz r.7,0(r.4) // Get 1st SRC2 word
cmpw r.6,r.7 // Check for 1st word equal
bne- Wrd1ne // Jump if 1st word not equal
addi r.3,r.3,4 // Get pointer to next SRC1 block
cmpw r.3,r.10 // Check for last block
addi r.4,r.4,4 // Get pointer to next SRC2 block
bne+ CompLpOn4Bytes // Jump if more blocks
//
// Compare 1-byte blocks
//
CompareByByte:
cmpwi r.5,0 // Check for no bytes left
beq+ GetResults // Jump to return if done
lbz r.6,0(r.3) // Get 1st SRC1 byte
lbz r.7,0(r.4) // Load 1st SRC2 byte
cmpw r.6,r.7 // Check for 1st word equal
bne- Wrd1ne // Jump if 1st word not equal
addi r.3,r.3,1 // Update SRC1 address
cmpwi r.5,1 // Check for no bytes left
addi r.4,r.4,1 // Update SRC2 address
beq+ GetResults // Jump to return if done
lbz r.6,0(r.3) // Get 2nd SRC1 byte
lbz r.7,0(r.4) // Load 2nd SRC2 byte
cmpw r.6,r.7 // Check for 1st word equal
bne- Wrd1ne // Jump if 1st word not equal
cmpwi r.5,2 // Check for no bytes left
addi r.4,r.4,1 // Update SRC2 address
addi r.3,r.3,1 // Update SRC1 address
beq+ GetResults // Jump to return if done
lbz r.6,0(r.3) // Get 3rd SRC1 byte
lbz r.7,0(r.4) // Load 3rd SRC2 byte
cmpw r.6,r.7 // Check for 1st word equal
bne- Wrd1ne // Jump if 1st word not equal
addi r.4,r.4,1 // Update SRC2 address
addi r.3,r.3,1 // Update SRC1 address
b GetResults // Jump to return
//
// Compare - SRC1 and SRC2 have different alignments
//
CompUnaligned:
or r.9,r.3,r.4 // Check if either byte unaligned
andi. r.9,r.9,3 // Isolate alignment
cmpwi r.9,2 // Check for even result
bne+ CompByteUnaligned // Jump for byte unaligned
//
// Divide the blocks to process into 32-byte blocks
//
CompBlkDivUnaligned:
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
sub. r.7,r.5,r.6 // Get full block count
add r.10,r.3,r.7 // Get address of last full block
beq- CompHWrdBy4Bytes // Jump if no full blocks
mr r.5,r.6 // Set Length = remainder
//
// Compare - SRC1 or SRC2 is halfword aligned, the other is by word
//
CompByHWord:
lhz r.6,0(r.3) // Get 1st hword of 1st SRC1 wrd
lhz r.7,0(r.4) // Get 1st hword of 1st SRC2 wrd
lhz r.8,2(r.3) // Get 2nd hword of 1st SRC1 wrd
cmpw r.6,r.7 // Check for 1st word equal
lhz r.9,2(r.4) // Get 2nd hword of 1st SRC2 wrd
bne- Wrd1ne // Check for 1st word equal
cmpw r.8,r.9 // Check for 1st word equal
lhz r.6,4(r.3) // Get 1st hword of 2nd SRC1 wrd
lhz r.7,4(r.4) // Get 1st hword of 2nd SRC2 wrd
bne- Wrd1ne // Check for 1st word equal
cmpw r.6,r.7 // Check for 2nd word equal
lhz r.8,6(r.3) // Get 2nd hword of 2nd SRC1 wrd
lhz r.9,6(r.4) // Get 2nd hword of 2nd SRC2 wrd
bne- Wrd2ne // Check for 2nd word equal
cmpw r.8,r.9 // Check for 2nd word equal
lhz r.6,8(r.3) // Get 1st hword of 3rd SRC1 wrd
lhz r.7,8(r.4) // Get 1st hword of 3rd SRC2 wrd
bne- Wrd2ne // Check for 2nd word equal
cmpw r.6,r.7 // Check for 3rd word equal
lhz r.8,10(r.3) // Get 2nd hword of 3rd SRC1 wrd
lhz r.9,10(r.4) // Get 2nd hword of 3rd SRC2 wrd
bne- Wrd3ne // Check for 3rd word equal
cmpw r.8,r.9 // Check for 3rd word equal
lhz r.6,12(r.3) // Get 1st hword of 4th SRC1 wrd
lhz r.7,12(r.4) // Get 1st hword of 4th SRC2 wrd
bne- Wrd3ne // Check for 3rd word equal
cmpw r.6,r.7 // Check for 4th word equal
lhz r.8,14(r.3) // Get 2nd hword of 4th SRC1 wrd
lhz r.9,14(r.4) // Get 2nd hword of 4th SRC2 wrd
bne- Wrd4ne // Check for 4th word equal
cmpw r.8,r.9 // Check for 4th word equal
lhz r.6,16(r.3) // Get 1st hword of 5th SRC1 wrd
lhz r.7,16(r.4) // Get 1st hword of 5th SRC2 wrd
bne- Wrd4ne // Check for 4th word equal
cmpw r.6,r.7 // Check for 5th word equal
lhz r.8,18(r.3) // Get 2nd hword of 5th SRC1 wrd
lhz r.9,18(r.4) // Get 2nd hword of 5th SRC2 wrd
bne- Wrd5ne // Check for 5th word equal
cmpw r.8,r.9 // Check for 5th word equal
lhz r.6,20(r.3) // Get 1st hword of 6th SRC1 wrd
lhz r.7,20(r.4) // Get 1st hword of 6th SRC2 wrd
bne- Wrd5ne // Check for 5th word equal
cmpw r.6,r.7 // Check for 6th word equal
lhz r.8,22(r.3) // Get 2nd hword of 6th SRC1 wrd
lhz r.9,22(r.4) // Get 2nd hword of 6th SRC2 wrd
bne- Wrd6ne // Check for 6th word equal
cmpw r.8,r.9 // Check for 6th word equal
lhz r.6,24(r.3) // Get 1st hword of 7th SRC1 wrd
lhz r.7,24(r.4) // Get 1st hword of 7th SRC2 wrd
bne- Wrd6ne // Check for 6th word equal
cmpw r.6,r.7 // Check for 7th word equal
lhz r.8,26(r.3) // Get 2nd hword of 7th SRC1 wrd
lhz r.9,26(r.4) // Get 2nd hword of 7th SRC2 wrd
bne- Wrd7ne // Check for 7th word equal
cmpw r.8,r.9 // Check for 7th word equal
lhz r.6,28(r.3) // Get 1st hword of 8th SRC1 wrd
lhz r.7,28(r.4) // Get 1st hword of 8th SRC2 wrd
bne- Wrd7ne // Check for 7th word equal
cmpw r.6,r.7 // Check for 8th word equal
lhz r.8,30(r.3) // Get 2nd hword of 8th SRC1 wrd
lhz r.9,30(r.4) // Get 2nd hword of 8th SRC2 wrd
bne- Wrd8ne // Check for 8th word equal
cmpw r.8,r.9 // Check for 8th word equal
bne- Wrd8ne // Check for 8th word equal
addi r.3,r.3,32 // Update SRC1 pointer
cmpw r.3,r.10 // Check for all blocks done
addi r.4,r.4,32 // Update SRC2 pointer
bne+ CompByHWord // Jump if more blocks
//
// Compare 4-byte blocks with SRC2 Halfword unaligned
//
CompHWrdBy4Bytes:
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
sub. r.7,r.5,r.6 // Get 4-byte block count
add r.10,r.3,r.7 // Get address of last full block
beq- CompareByByte // Jump if no 4-byte blocks
mr r.5,r.6 // Set Length = remainder
CompHWrdLpOn4Bytes:
lhz r.6,0(r.3) // Get 1st hword of 1st SRC1 wrd
lhz r.7,0(r.4) // Get 1st hword of 1st SRC2 wrd
lhz r.8,2(r.3) // Get 2nd hword of 1st SRC1 wrd
cmpw r.6,r.7 // Check for 1st word equal
lhz r.9,2(r.4) // Get 2nd hword of 1st SRC2 wrd
bne- Wrd1ne // Check for 1st word equal
cmpw r.8,r.9 // Check for 1st word equal
bne- Wrd1ne // Check for 1st word equal
addi r.3,r.3,4 // Update SRC1 pointer
cmpw r.3,r.10 // Check for last block
addi r.4,r.4,4 // Update SRC2 pointer
bne+ CompHWrdLpOn4Bytes // Jump if more blocks
b CompareByByte // Jump to complete last bytes
//
// Compare - Byte unaligned
//
CompByteUnaligned:
and r.9,r.3,r.4 // Check for both byte aligned
andi. r.9,r.9,1 // Isolate alignment bits
beq- CmpBlksByByte // Jump if both not byte aligned
//
// Divide the blocks to process into 32-byte blocks
//
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
sub. r.7,r.5,r.6 // Get full block count
add r.10,r.3,r.7 // Get address of last full block
beq- CompByteBy4Bytes // Jump if no full blocks
mr r.5,r.6 // Set Length = remainder
//
// Compare - SRC1 and SRC2 are byte unaligned differently
//
CompByByte:
lbz r.6,0(r.3) // Get first byte of 1st SRC1 wrd
lbz r.7,0(r.4) // Get first byte of 1st SRC2 wrd
lhz r.8,1(r.3) // Get mid-h-word of 1st SRC1 wrd
cmpw r.6,r.7 // Check for 1st word equal
lhz r.9,1(r.4) // Get mid-h-word of 1st SRC2 wrd
bne Wrd1ne // Jump if 1st word not equal
cmpw r.8,r.9 // Check for 1st word equal
lhz r.6,3(r.3) // Get h-word crossing 1st/2nd SRC1 wrd
lhz r.7,3(r.4) // Get h-word crossing 1st/2nd SRC2 wrd
bne Wrd1ne // Jump if 1st word not equal
cmpw r.6,r.7 // Check for 1st word equal
lhz r.8,5(r.3) // Get mid-h-word of 2nd SRC1 wrd
lhz r.9,5(r.4) // Get mid-h-word of 2nd SRC2 wrd
bne Wrd1ne // Jump if 2nd word not equal
cmpw r.8,r.9 // Check for 2nd word equal
lhz r.6,7(r.3) // Get h-word crossing 2nd/3rd SRC1 wrd
lhz r.7,7(r.4) // Get h-word crossing 2nd/3rd SRC2 wrd
bne Wrd2ne // Jump if 2nd word not equal
cmpw r.6,r.7 // Check for 2nd word equal
lhz r.8,9(r.3) // Get mid-h-word of 3rd SRC1 wrd
lhz r.9,9(r.4) // Get mid-h-word of 3rd SRC2 wrd
bne Wrd2ne // Jump if 3rd word not equal
cmpw r.8,r.9 // Check for 3rd word equal
lhz r.6,11(r.3) // Get h-word crossing 3rd/4th SRC1 wrd
lhz r.7,11(r.4) // Get h-word crossing 3rd/4th SRC2 wrd
bne Wrd3ne // Jump if 3rd word not equal
cmpw r.6,r.7 // Check for 3rd word equal
lhz r.8,13(r.3) // Get mid-h-word of 4th SRC1 wrd
lhz r.9,13(r.4) // Get mid-h-word of 4th SRC2 wrd
bne Wrd3ne // Jump if 4th word not equal
cmpw r.8,r.9 // Check for 4th word equal
lhz r.6,15(r.3) // Get h-word crossing 4th/5th SRC1 wrd
lhz r.7,15(r.4) // Get h-word crossing 4th/5th SRC2 wrd
bne Wrd4ne // Jump if 4th word not equal
cmpw r.6,r.7 // Check for 4th word equal
lhz r.8,17(r.3) // Get mid-h-word of 5th SRC1 wrd
lhz r.9,17(r.4) // Get mid-h-word of 5th SRC2 wrd
bne Wrd4ne // Jump if 5th word not equal
cmpw r.8,r.9 // Check for 5th word equal
lhz r.6,19(r.3) // Get h-word crossing 5th/6th SRC1 wrd
lhz r.7,19(r.4) // Get h-word crossing 5th/6th SRC2 wrd
bne Wrd5ne // Jump if 5th word not equal
cmpw r.6,r.7 // Check for 5th word equal
lhz r.8,21(r.3) // Get mid-h-word of 6th SRC1 wrd
lhz r.9,21(r.4) // Get mid-h-word of 6th SRC2 wrd
bne Wrd5ne // Jump if 6th word not equal
cmpw r.8,r.9 // Check for 6th word equal
lhz r.6,23(r.3) // Get h-word crossing 6th/7th SRC1 wrd
lhz r.7,23(r.4) // Get h-word crossing 6th/7th SRC2 wrd
bne Wrd6ne // Jump if 6th word not equal
cmpw r.6,r.7 // Check for 6th word equal
lhz r.8,25(r.3) // Get mid-h-word of 7th SRC1 wrd
lhz r.9,25(r.4) // Get mid-h-word of 7th SRC2 wrd
bne Wrd6ne // Jump if 7th word not equal
cmpw r.8,r.9 // Check for 7th word equal
lhz r.6,27(r.3) // Get h-word crossing 7th/8th SRC1 wrd
lhz r.7,27(r.4) // Get h-word crossing 7th/8th SRC2 wrd
bne Wrd7ne // Jump if 7th word not equal
cmpw r.6,r.7 // Check for 7th word equal
lhz r.8,29(r.3) // Get mid-h-word of 8th SRC1 wrd
lhz r.9,29(r.4) // Get mid-h-word of 8th SRC2 wrd
bne Wrd7ne // Jump if 8th word not equal
cmpw r.8,r.9 // Check for 8th word equal
lbz r.6,31(r.3) // Get last byte of 8th SRC1 wrd
lbz r.7,31(r.4) // Get last byte of 8th SRC2 wrd
bne Wrd8ne // Jump if 8th word not equal
cmpw r.6,r.7 // Check for 8th word equal
bne Wrd8ne // Jump if 8th word not equal
addi r.3,r.3,32 // Update SRC1 pointer
cmpw r.3,r.10 // Check for all blocks done
addi r.4,r.4,32 // Update SRC2 pointer
bne+ CompByByte // Jump if more blocks
//
// Compare 4-byte blocks with SRC2 or SRC1 Byte aligned
//
CompByteBy4Bytes:
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
sub. r.7,r.5,r.6 // Get 4-byte block count
add r.10,r.3,r.7 // Get address of last full block
beq- CompareByByte // Jump if no 4-byte blocks
mr r.5,r.6 // Set Length = remainder
CompByteLpOn4Bytes:
lbz r.6,0(r.3) // Get first byte of 1st SRC1 wrd
lbz r.7,0(r.4) // Get first byte of 1st SRC2 wrd
lhz r.8,1(r.3) // Get mid-h-word of 1st SRC1 wrd
cmpw r.6,r.7 // Check for 1st word equal
lhz r.9,1(r.4) // Get mid-h-word of 1st SRC2 wrd
bne Wrd1ne // Jump if 1st word not equal
cmpw r.8,r.9 // Check for 1st word equal
lbz r.6,3(r.3) // Get last byte of 1st SRC1 wrd
lbz r.7,3(r.4) // Get last byte of 1st SRC2 wrd
bne Wrd1ne // Jump if 1st word not equal
cmpw r.6,r.7 // Check for 1st word equal
bne Wrd1ne // Jump if 1st word not equal
addi r.3,r.3,4 // Update SRC1 pointer
cmpw r.3,r.10 // Check for last block
addi r.4,r.4,4 // Update SRC2 pointer
bne+ CompByteLpOn4Bytes // Jump if more blocks
b CompareByByte // Jump to complete last bytes
//
// Compare - Either SRC1 or SRC2 is byte unaligned but not both
//
// Divide the blocks to process into 32-byte blocks
//
CmpBlksByByte:
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
sub. r.7,r.5,r.6 // Get full block count
add r.10,r.3,r.7 // Get address of last full block
beq- CompBlksOf4Bytes // Jump if no full blocks
mr r.5,r.6 // Set Length = remainder
CompBlksByByte:
lbz r.6,0(r.3) // Get first byte of 1st SRC1 wrd
lbz r.7,0(r.4) // Get first byte of 1st SRC2 wrd
lbz r.8,1(r.3) // Get 2nd byte of 1st SRC1 wrd
cmpw r.6,r.7 // Check for 1st word equal
lbz r.9,1(r.4) // Get 2nd byte of 1st SRC2 wrd
bne Wrd1ne // Jump if 1st word not equal
cmpw r.8,r.9 // Check for 1st word equal
lbz r.6,2(r.3) // Get first byte of 1st SRC1 wrd
lbz r.7,2(r.4) // Get first byte of 1st SRC2 wrd
bne Wrd1ne // Jump if 1st word not equal
cmpw r.6,r.7 // Check for 1st word equal
lbz r.8,3(r.3) // Get 2nd byte of 1st SRC1 wrd
lbz r.9,3(r.4) // Get 2nd byte of 1st SRC2 wrd
bne Wrd1ne // Jump if 1st word not equal
cmpw r.8,r.9 // Check for 1st word equal
lbz r.6,4(r.3) // Get first byte of 2nd SRC1 wrd
lbz r.7,4(r.4) // Get first byte of 2nd SRC2 wrd
bne Wrd1ne // Jump if 1st word not equal
cmpw r.6,r.7 // Check for 2nd word equal
lbz r.8,5(r.3) // Get 2nd byte of 2nd SRC1 wrd
lbz r.9,5(r.4) // Get 2nd byte of 2nd SRC2 wrd
bne Wrd2ne // Jump if 2nd word not equal
cmpw r.8,r.9 // Check for 2nd word equal
lbz r.6,6(r.3) // Get first byte of 2nd SRC1 wrd
lbz r.7,6(r.4) // Get first byte of 2nd SRC2 wrd
bne Wrd2ne // Jump if 2nd word not equal
cmpw r.6,r.7 // Check for 2nd word equal
lbz r.8,7(r.3) // Get 2nd byte of 2nd SRC1 wrd
lbz r.9,7(r.4) // Get 2nd byte of 2nd SRC2 wrd
bne Wrd2ne // Jump if 2nd word not equal
cmpw r.8,r.9 // Check for 2nd word equal
lbz r.6,8(r.3) // Get first byte of 3rd SRC1 wrd
lbz r.7,8(r.4) // Get first byte of 3rd SRC2 wrd
bne Wrd2ne // Jump if 2nd word not equal
cmpw r.6,r.7 // Check for 3rd word equal
lbz r.8,9(r.3) // Get 2nd byte of 3rd SRC1 wrd
lbz r.9,9(r.4) // Get 2nd byte of 3rd SRC2 wrd
bne Wrd3ne // Jump if 3rd word not equal
cmpw r.8,r.9 // Check for 3rd word equal
lbz r.6,10(r.3) // Get first byte of 3rd SRC1 wrd
lbz r.7,10(r.4) // Get first byte of 3rd SRC2 wrd
bne Wrd3ne // Jump if 3rd word not equal
cmpw r.6,r.7 // Check for 3rd word equal
lbz r.8,11(r.3) // Get 2nd byte of 3rd SRC1 wrd
lbz r.9,11(r.4) // Get 2nd byte of 3rd SRC2 wrd
bne Wrd3ne // Jump if 3rd word not equal
cmpw r.8,r.9 // Check for 3rd word equal
lbz r.6,12(r.3) // Get first byte of 4th SRC1 wrd
lbz r.7,12(r.4) // Get first byte of 4th SRC2 wrd
bne Wrd3ne // Jump if 3rd word not equal
cmpw r.6,r.7 // Check for 4th word equal
lbz r.8,13(r.3) // Get 2nd byte of 4th SRC1 wrd
lbz r.9,13(r.4) // Get 2nd byte of 4th SRC2 wrd
bne Wrd4ne // Jump if 4th word not equal
cmpw r.8,r.9 // Check for 4th word equal
lbz r.6,14(r.3) // Get first byte of 4th SRC1 wrd
lbz r.7,14(r.4) // Get first byte of 4th SRC2 wrd
bne Wrd4ne // Jump if 4th word not equal
cmpw r.6,r.7 // Check for 4th word equal
lbz r.8,15(r.3) // Get 2nd byte of 4th SRC1 wrd
lbz r.9,15(r.4) // Get 2nd byte of 4th SRC2 wrd
bne Wrd4ne // Jump if 4th word not equal
cmpw r.8,r.9 // Check for 4th word equal
lbz r.6,16(r.3) // Get first byte of 5th SRC1 wrd
lbz r.7,16(r.4) // Get first byte of 5th SRC2 wrd
bne Wrd4ne // Jump if 4th word not equal
cmpw r.6,r.7 // Check for 5th word equal
lbz r.8,17(r.3) // Get 2nd byte of 5th SRC1 wrd
lbz r.9,17(r.4) // Get 2nd byte of 5th SRC2 wrd
bne Wrd5ne // Jump if 5th word not equal
cmpw r.8,r.9 // Check for 5th word equal
lbz r.6,18(r.3) // Get first byte of 5th SRC1 wrd
lbz r.7,18(r.4) // Get first byte of 5th SRC2 wrd
bne Wrd5ne // Jump if 5th word not equal
cmpw r.6,r.7 // Check for 5th word equal
lbz r.8,19(r.3) // Get 2nd byte of 5th SRC1 wrd
lbz r.9,19(r.4) // Get 2nd byte of 5th SRC2 wrd
bne Wrd5ne // Jump if 5th word not equal
cmpw r.8,r.9 // Check for 5th word equal
lbz r.6,20(r.3) // Get first byte of 6th SRC1 wrd
lbz r.7,20(r.4) // Get first byte of 6th SRC2 wrd
bne Wrd5ne // Jump if 5th word not equal
cmpw r.6,r.7 // Check for 6th word equal
lbz r.8,21(r.3) // Get 2nd byte of 6th SRC1 wrd
lbz r.9,21(r.4) // Get 2nd byte of 6th SRC2 wrd
bne Wrd6ne // Jump if 6th word not equal
cmpw r.8,r.9 // Check for 6th word equal
lbz r.6,22(r.3) // Get first byte of 6th SRC1 wrd
lbz r.7,22(r.4) // Get first byte of 6th SRC2 wrd
bne Wrd6ne // Jump if 6th word not equal
cmpw r.6,r.7 // Check for 6th word equal
lbz r.8,23(r.3) // Get 2nd byte of 6th SRC1 wrd
lbz r.9,23(r.4) // Get 2nd byte of 6th SRC2 wrd
bne Wrd6ne // Jump if 6th word not equal
cmpw r.8,r.9 // Check for 6th word equal
lbz r.6,24(r.3) // Get first byte of 7th SRC1 wrd
lbz r.7,24(r.4) // Get first byte of 7th SRC2 wrd
bne Wrd6ne // Jump if 6th word not equal
cmpw r.6,r.7 // Check for 7th word equal
lbz r.8,25(r.3) // Get 2nd byte of 7th SRC1 wrd
lbz r.9,25(r.4) // Get 2nd byte of 7th SRC2 wrd
bne Wrd7ne // Jump if 7th word not equal
cmpw r.8,r.9 // Check for 7th word equal
lbz r.6,26(r.3) // Get first byte of 7th SRC1 wrd
lbz r.7,26(r.4) // Get first byte of 7th SRC2 wrd
bne Wrd7ne // Jump if 7th word not equal
cmpw r.6,r.7 // Check for 7th word equal
lbz r.8,27(r.3) // Get 2nd byte of 7th SRC1 wrd
lbz r.9,27(r.4) // Get 2nd byte of 7th SRC2 wrd
bne Wrd7ne // Jump if 7th word not equal
cmpw r.8,r.9 // Check for 7th word equal
lbz r.6,28(r.3) // Get first byte of 8th SRC1 wrd
lbz r.7,28(r.4) // Get first byte of 8th SRC2 wrd
bne Wrd7ne // Jump if 7th word not equal
cmpw r.6,r.7 // Check for 8th word equal
lbz r.8,29(r.3) // Get 2nd byte of 8th SRC1 wrd
lbz r.9,29(r.4) // Get ond byte of 8th SRC2 wrd
bne Wrd8ne // Jump if 8th word not equal
cmpw r.8,r.9 // Check for 8th word equal
lbz r.6,30(r.3) // Get first byte of 8th SRC1 wrd
lbz r.7,30(r.4) // Get first byte of 8th SRC2 wrd
bne Wrd8ne // Jump if 8th word not equal
cmpw r.6,r.7 // Check for 8th word equal
lbz r.8,31(r.3) // Get 2nd byte of 8th SRC1 wrd
lbz r.9,31(r.4) // Get 2nd byte of 8th SRC2 wrd
bne Wrd8ne // Jump if 8th word not equal
cmpw r.8,r.9 // Check for 8th word equal
bne Wrd8ne // Jump if 8th word not equal
addi r.3,r.3,32 // Update SRC1 pointer
cmpw r.3,r.10 // Check for all blocks done
addi r.4,r.4,32 // Update SRC2 pointer
bne+ CompBlksByByte // Jump if more blocks
//
// Divide the blocks to process into 32-byte blocks
//
CompBlksOf4Bytes:
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
sub. r.7,r.5,r.6 // Get 4-byte block count
add r.10,r.3,r.7 // Get address of last full block
beq- CompareByByte // Jump if no 4-byte blocks
mr r.5,r.6 // Set Length = remainder
CompBlksLpOn4Bytes:
lbz r.6,0(r.3) // Get first byte of 1st SRC1 wrd
lbz r.7,0(r.4) // Get first byte of 1st SRC2 wrd
lbz r.8,1(r.3) // Get 2nd byte of 1st SRC1 wrd
cmpw r.6,r.7 // Check for 1st word equal
lbz r.9,1(r.4) // Get 2nd byte of 1st SRC2 wrd
bne Wrd1ne // Jump if 1st word not equal
cmpw r.8,r.9 // Check for 1st word equal
lbz r.6,2(r.3) // Get first byte of 1st SRC1 wrd
lbz r.7,2(r.4) // Get first byte of 1st SRC2 wrd
bne Wrd1ne // Jump if 1st word not equal
cmpw r.6,r.7 // Check for 1st word equal
lbz r.8,3(r.3) // Get 2nd byte of 1st SRC1 wrd
lbz r.9,3(r.4) // Get 2nd byte of 1st SRC2 wrd
bne Wrd1ne // Jump if 1st word not equal
cmpw r.8,r.9 // Check for 1st word equal
bne Wrd1ne // Jump if 1st word not equal
addi r.3,r.3,4 // Update SRC1 pointer
cmpw r.3,r.10 // Check for last block
addi r.4,r.4,4 // Update SRC2 pointer
bne+ CompBlksLpOn4Bytes // Jump if more blocks
b CompareByByte // Jump to complete last bytes
//
// Adjust pointers to SRC1 and SRC2 to isolate offending byte compare
//
Wrd2ne:
addi r.3,r.3,4 // Adjust to point to 2nd word
addi r.4,r.4,4 // Adjust to point to 2nd word
b Compare1byte // Jump to isolate the bad byte
Wrd3ne:
addi r.3,r.3,8 // Adjust to point to 3rd word
addi r.4,r.4,8 // Adjust to point to 3rd word
b Compare1byte // Jump to isolate the bad byte
Wrd4ne:
addi r.3,r.3,12 // Adjust to point to 4th word
addi r.4,r.4,12 // Adjust to point to 4th word
b Compare1byte // Jump to isolate the bad byte
Wrd5ne:
addi r.3,r.3,16 // Adjust to point to 5th word
addi r.4,r.4,16 // Adjust to point to 5th word
b Compare1byte // Jump to isolate the bad byte
Wrd6ne:
addi r.3,r.3,20 // Adjust to point to 6th word
addi r.4,r.4,20 // Adjust to point to 6th word
b Compare1byte // Jump to isolate the bad byte
Wrd7ne:
addi r.3,r.3,24 // Adjust to point to 7th word
addi r.4,r.4,24 // Adjust to point to 7th word
b Compare1byte // Jump to isolate the bad byte
Wrd8ne:
addi r.3,r.3,28 // Adjust to point to 8th word
addi r.4,r.4,28 // Adjust to point to 8th word
Wrd1ne:
Compare1byte:
sub r.5,r.11,r.3 // Calculate remaining byte count
add r.8,r.3,r.5 // Get new ending address
cmpwi r.5,0 // Check for no block to compare
beq- GetResults // Jump if processing completed
SingleByte:
lbz r.6,0(r.3) // Get next SRC1 byte
lbz r.7,0(r.4) // Get next SRC2 byte
addi r.4,r.4,1 // Update SRC2 to next byte
cmpw r.6,r.7 // Check for unequal bytes
bne- GetResults // Jump if bytes aren't equal
addi r.3,r.3,1 // Update SRC1 to next byte
cmpw r.3,r.8 // Check for being done
bne+ SingleByte // Jump if more bytes
//
// Compute the results
//
GetResults:
sub r.6,r.11,r.3 // Get no. of bytes not compared
GetResults2:
sub r.3,r.12,r.6 // Get no. of bytes that match
//
// Exit the routine
//
LEAF_EXIT(RtlCompareMemory)
//++
//
// ULONG
// RtlEqualMemory (
// IN PVOID Source1,
// IN PVOID Source2,
// IN ULONG Length
// )
//
// Routine Description:
//
// This function compares two blocks of memory for equality.
//
// Arguments:
//
// Source1 (r.3) - Supplies a pointer to the first block of memory to
// compare.
//
// Source2 (r.4) - Supplies a pointer to the second block of memory to
// compare.
//
// Length (r.5) - Supplies the length, in bytes, of the memory to be
// compared.
//
// Return Value:
//
// If all bytes in the source strings match, then a value of TRUE is
// returned. Otherwise, FALSE is returned.
//
//--
LEAF_ENTRY(RtlEqualMemory)
//
// Check alignment
//
clrlwi r.12,r.5,28 // isolate residual bytes (Length & 15)
or r.9,r.3,r.4 // merge alignment bits
sub. r.11,r.5,r.12 // subtract out residual bytes
add r.10,r.3,r.5 // get ending Source1 address
beq+ EqualByByte // if eq, no 16-byte block to compare
andi. r.9,r.9,3 // isolate alignment bits
add r.5,r.3,r.11 // compute ending block address
bne- EqualUnaligned // if ne, different alignments
EqualAligned:
//
// Both blocks are word-aligned, and there are at least 16 bytes to compare.
//
lwz r.6,0(r.3) // Get 1st Source1 word
lwz r.7,0(r.4) // Get 1st Source2 word
lwz r.8,4(r.3) // Get 2nd Source1 word
cmpw r.6,r.7 // Check for 1st word equal
lwz r.9,4(r.4) // Get 2nd Source2 word
bne- EqualNotEqual // Jump if 1st word not equal
cmpw r.8,r.9 // Check for 2nd word equal
lwz r.6,8(r.3) // Get 3rd Source1 word
lwz r.7,8(r.4) // Get 3rd Source2 word
bne- EqualNotEqual // Jump if 2nd word not equal
cmpw r.6,r.7 // Check for 3rd word equal
lwz r.8,12(r.3) // Get 4th Source1 word
lwz r.9,12(r.4) // Get 4th Source2 word
bne- EqualNotEqual // Jump if 3rd word not equal
cmpw r.8,r.9 // Check for 4th word equal
addi r.3,r.3,16 // Update Source1 pointer
bne- EqualNotEqual // Jump if 4th word not equal
cmpw r.3,r.5 // Check for all blocks done
addi r.4,r.4,16 // Update Source2 pointer
bne- EqualAligned // Jump if more blocks
sub r.5,r.10,r.3 // compute remaining bytes
EqualByByte:
//
// Compare 1-byte blocks until done.
//
cmpwi r.5,0 // Check for no bytes left
beq+ EqualEqual // Jump to return if done
EqualByByteLoop:
lbz r.6,0(r.3) // Get Source1 byte
lbz r.7,0(r.4) // Get Source2 byte
addi r.3,r.3,1 // Update Source1 address
cmpw r.6,r.7 // Check for equality
addi r.4,r.4,1 // Update Source2 address
bne- EqualNotEqual // Jump if not equal
cmpw r.10,r.3 // Check for end of block
bne- EqualByByteLoop // Loop if not done
EqualEqual:
//
// The blocks are not equal.
//
li r.3,TRUE // indicate blocks are equal
blr // return to caller
EqualUnaligned:
//
// There are at least 16 bytes to compare, but at least one of the blocks
// is not word-aligned.
//
andi. r.9,r.9,1 // isolate alignment bits
bne- EqualByteUnaligned // jump if at least one not halfword aligned
EqualUnalignedLoop:
//
// Both blocks are halfword-aligned, and there are at least 16 bytes to compare.
//
lhz r.6,0(r.3) // Get 1st hword of 1st Source1 wrd
lhz r.7,0(r.4) // Get 1st hword of 1st Source2 wrd
lhz r.8,2(r.3) // Get 2nd hword of 1st Source1 wrd
cmpw r.6,r.7 // Check for 1st word equal
lhz r.9,2(r.4) // Get 2nd hword of 1st Source2 wrd
bne- EqualNotEqual // Check for 1st word equal
cmpw r.8,r.9 // Check for 1st word equal
lhz r.6,4(r.3) // Get 1st hword of 2nd Source1 wrd
lhz r.7,4(r.4) // Get 1st hword of 2nd Source2 wrd
bne- EqualNotEqual // Check for 1st word equal
cmpw r.6,r.7 // Check for 2nd word equal
lhz r.8,6(r.3) // Get 2nd hword of 2nd Source1 wrd
lhz r.9,6(r.4) // Get 2nd hword of 2nd Source2 wrd
bne- EqualNotEqual // Check for 2nd word equal
cmpw r.8,r.9 // Check for 2nd word equal
lhz r.6,8(r.3) // Get 1st hword of 3rd Source1 wrd
lhz r.7,8(r.4) // Get 1st hword of 3rd Source2 wrd
bne- EqualNotEqual // Check for 2nd word equal
cmpw r.6,r.7 // Check for 3rd word equal
lhz r.8,10(r.3) // Get 2nd hword of 3rd Source1 wrd
lhz r.9,10(r.4) // Get 2nd hword of 3rd Source2 wrd
bne- EqualNotEqual // Check for 3rd word equal
cmpw r.8,r.9 // Check for 3rd word equal
lhz r.6,12(r.3) // Get 1st hword of 4th Source1 wrd
lhz r.7,12(r.4) // Get 1st hword of 4th Source2 wrd
bne- EqualNotEqual // Check for 3rd word equal
cmpw r.6,r.7 // Check for 4th word equal
lhz r.8,14(r.3) // Get 2nd hword of 4th Source1 wrd
lhz r.9,14(r.4) // Get 2nd hword of 4th Source2 wrd
bne- EqualNotEqual // Check for 4th word equal
cmpw r.8,r.9 // Check for 4th word equal
addi r.3,r.3,16 // Update Source1 pointer
bne- EqualNotEqual // Check for 4th word equal
cmpw r.3,r.5 // Check for all blocks done
addi r.4,r.4,16 // Update Source2 pointer
bne- EqualUnalignedLoop // Jump if more blocks
sub r.5,r.10,r.3 // compute remaining bytes
b EqualByByte // compare rest byte-by-byte
EqualByteUnaligned:
//
// There are at least 16 bytes to compare, but at least one of the blocks
// is not halfword-aligned.
//
// Because we don't expect very high byte counts in RtlEqualMemory, and
// we also don't expect unaligned buffers very often, we don't bother
// with the byte/halfword fetches that RtlCompareMemory does.
//
lbz r.6,0(r.3) // Get first byte of 1st Source1 wrd
lbz r.7,0(r.4) // Get first byte of 1st Source2 wrd
lbz r.8,1(r.3) // Get 2nd byte of 1st Source1 wrd
cmpw r.6,r.7 // Check for 1st word equal
lbz r.9,1(r.4) // Get 2nd byte of 1st Source2 wrd
bne EqualNotEqual // Jump if 1st word not equal
cmpw r.8,r.9 // Check for 1st word equal
lbz r.6,2(r.3) // Get first byte of 1st Source1 wrd
lbz r.7,2(r.4) // Get first byte of 1st Source2 wrd
bne EqualNotEqual // Jump if 1st word not equal
cmpw r.6,r.7 // Check for 1st word equal
lbz r.8,3(r.3) // Get 2nd byte of 1st Source1 wrd
lbz r.9,3(r.4) // Get 2nd byte of 1st Source2 wrd
bne EqualNotEqual // Jump if 1st word not equal
cmpw r.8,r.9 // Check for 1st word equal
lbz r.6,4(r.3) // Get first byte of 2nd Source1 wrd
lbz r.7,4(r.4) // Get first byte of 2nd Source2 wrd
bne EqualNotEqual // Jump if 1st word not equal
cmpw r.6,r.7 // Check for 2nd word equal
lbz r.8,5(r.3) // Get 2nd byte of 2nd Source1 wrd
lbz r.9,5(r.4) // Get 2nd byte of 2nd Source2 wrd
bne EqualNotEqual // Jump if 2nd word not equal
cmpw r.8,r.9 // Check for 2nd word equal
lbz r.6,6(r.3) // Get first byte of 2nd Source1 wrd
lbz r.7,6(r.4) // Get first byte of 2nd Source2 wrd
bne EqualNotEqual // Jump if 2nd word not equal
cmpw r.6,r.7 // Check for 2nd word equal
lbz r.8,7(r.3) // Get 2nd byte of 2nd Source1 wrd
lbz r.9,7(r.4) // Get 2nd byte of 2nd Source2 wrd
bne EqualNotEqual // Jump if 2nd word not equal
cmpw r.8,r.9 // Check for 2nd word equal
lbz r.6,8(r.3) // Get first byte of 3rd Source1 wrd
lbz r.7,8(r.4) // Get first byte of 3rd Source2 wrd
bne EqualNotEqual // Jump if 2nd word not equal
cmpw r.6,r.7 // Check for 3rd word equal
lbz r.8,9(r.3) // Get 2nd byte of 3rd Source1 wrd
lbz r.9,9(r.4) // Get 2nd byte of 3rd Source2 wrd
bne EqualNotEqual // Jump if 3rd word not equal
cmpw r.8,r.9 // Check for 3rd word equal
lbz r.6,10(r.3) // Get first byte of 3rd Source1 wrd
lbz r.7,10(r.4) // Get first byte of 3rd Source2 wrd
bne EqualNotEqual // Jump if 3rd word not equal
cmpw r.6,r.7 // Check for 3rd word equal
lbz r.8,11(r.3) // Get 2nd byte of 3rd Source1 wrd
lbz r.9,11(r.4) // Get 2nd byte of 3rd Source2 wrd
bne EqualNotEqual // Jump if 3rd word not equal
cmpw r.8,r.9 // Check for 3rd word equal
lbz r.6,12(r.3) // Get first byte of 4th Source1 wrd
lbz r.7,12(r.4) // Get first byte of 4th Source2 wrd
bne EqualNotEqual // Jump if 3rd word not equal
cmpw r.6,r.7 // Check for 4th word equal
lbz r.8,13(r.3) // Get 2nd byte of 4th Source1 wrd
lbz r.9,13(r.4) // Get 2nd byte of 4th Source2 wrd
bne EqualNotEqual // Jump if 4th word not equal
cmpw r.8,r.9 // Check for 4th word equal
lbz r.6,14(r.3) // Get first byte of 4th Source1 wrd
lbz r.7,14(r.4) // Get first byte of 4th Source2 wrd
bne EqualNotEqual // Jump if 4th word not equal
cmpw r.6,r.7 // Check for 4th word equal
lbz r.8,15(r.3) // Get 2nd byte of 4th Source1 wrd
lbz r.9,15(r.4) // Get 2nd byte of 4th Source2 wrd
bne EqualNotEqual // Jump if 4th word not equal
cmpw r.8,r.9 // Check for 4th word equal
addi r.3,r.3,16 // Update Source1 pointer
bne EqualNotEqual // Jump if 4th word not equal
cmpw r.3,r.5 // Check for all blocks done
addi r.4,r.4,16 // Update Source2 pointer
bne- EqualByteUnaligned // Jump if more blocks
sub r.5,r.10,r.3 // compute remaining bytes
b EqualByByte // compare rest byte-by-byte
EqualNotEqual:
//
// The blocks are not equal.
//
li r.3, FALSE // indicate blocks are not equal
LEAF_EXIT(RtlEqualMemory) // return to caller
//++
//
// VOID
// RtlMoveMemory (
// IN PVOID Destination,
// IN PVOID Source,
// IN ULONG Length
// )
//
// Routine Description:
//
// This function moves memory either forward or backward, aligned or
// unaligned, in 32-byte blocks, followed by 4-byte blocks, followed
// by any remaining bytes.
//
// The alternate entry point, RtlCopyMemory, moves non-overlapping
// blocks only, in the forward direction.
//
// RtlCopyMemory32 is the same as RtlCopyMemory but is guaranteed
// never to copy more than 32 bits at a time. RtlCopyMemory may
// (probably will) be modified in the future to copy 64 bits at
// a time.
//
// Arguments:
//
// DEST (r.3) - Supplies a pointer to the destination address of
// the move operation.
//
// SRC (r.4) - Supplies a pointer to the source address of the move
// operation.
//
// LNGTH (r.5) - Supplies the length, in bytes, of the memory to be
// moved.
//
// Return Value:
//
// None.
//
//--
//
// Define the routine entry point
//
LEAF_ENTRY(RtlMoveMemory)
//
// Check to see if destination block overlaps the source block
// If so, jump to a backward move to preserve source block from
// being corrupted.
//
cmpw r.4,r.3 // Check to see if DEST > SRC
bge+ MoveForward // Jump if no overlap possible
add r.10,r.4,r.5 // Get ending SRC address
cmpw r.10,r.3 // Check for overlap
bgt- MoveBackward // Jump for overlap
//
// Move Memory Forward
//
// Check alignment
//
ALTERNATE_ENTRY(RtlCopyMemory)
ALTERNATE_ENTRY(RtlCopyMemory32)
MoveForward:
cmpwi r.5,4 // Check for less than 4 bytes
blt- FwdMoveByByte // Jump if single byte moves
xor r.9,r.4,r.3 // Check for same alignment
andi. r.9,r.9,3 // Isolate alignment bits
bne- MvFwdUnaligned // Jump if different alignments
//
// Move Memory Forward - Same SRC and DEST alignment
//
// Load and store extra bytes until a word boundary is reached
//
MvFwdAligned:
andi. r.6,r.3,3 // Check alignment type
beq+ FwdBlkDiv // Jump to process 32-Byte blocks
cmpwi r.6,3 // Check for 1 byte unaligned
bne+ FwdChkFor2 // If not, check next case
lbz r.7,0(r.4) // Get unaligned byte
li r.6,1 // Set byte move count
stb r.7,0(r.3) // Store unaligned byte
b UpdateAddrs // Jump to update addresses
FwdChkFor2:
cmpwi r.6,2 // Check for halfword aligned
bne+ FwdChkFor1 // If not, check next case
lhz r.7,0(r.4) // Get unaligned halfword
li r.6,2 // Set byte move count
sth r.7,0(r.3) // Store unaligned halfword
b UpdateAddrs // Jump to update addresses
FwdChkFor1:
lbz r.8,0(r.4) // Get unaligned byte
lhz r.7,1(r.4) // Get unaligned halfword
stb r.8,0(r.3) // Store unaligned byte
sth r.7,1(r.3) // Store unaligned halfword
li r.6,3 // Set byte move count
UpdateAddrs:
sub r.5,r.5,r.6 // Decrement LNGTH by unaligned
add r.4,r.4,r.6 // Update the SRC address
add r.3,r.3,r.6 // Update the DEST address
//
// Divide the block to process into 32-byte blocks
//
FwdBlkDiv:
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
sub. r.7,r.5,r.6 // Get full block count
add r.10,r.4,r.7 // Get address of last full block
beq- FwdMoveBy4Bytes // Jump if no full blocks
mr r.5,r.6 // Set Length = remainder
//
// Move 32-byte blocks
//
FwdMvFullBlks:
lwz r.6,0(r.4) // Get 1st SRC word
lwz r.7,4(r.4) // Get 2nd SRC word
stw r.6,0(r.3) // Store 1st DEST word
stw r.7,4(r.3) // Store 2nd DEST word
lwz r.6,8(r.4) // Get 3rd SRC word
lwz r.7,12(r.4) // Get 4th SRC word
stw r.6,8(r.3) // Store 3rd DEST word
stw r.7,12(r.3) // Store 4th DEST word
lwz r.6,16(r.4) // Get 5th SRC word
lwz r.7,20(r.4) // Get 6th SRC word
stw r.6,16(r.3) // Store 5th DEST word
stw r.7,20(r.3) // Store 6th DEST word
lwz r.6,24(r.4) // Get 7th SRC word
lwz r.7,28(r.4) // Get 8th SRC word
addi r.4,r.4,32 // Update SRC pointer
cmpw r.4,r.10 // Check for all blocks done
stw r.6,24(r.3) // Store 7th DEST word
stw r.7,28(r.3) // Store 8th DEST word
addi r.3,r.3,32 // Update DEST pointer
bne+ FwdMvFullBlks // Jump if more blocks
//
// Move 4-byte blocks
//
FwdMoveBy4Bytes:
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
sub. r.7,r.5,r.6 // Get 4-byte block count
add r.10,r.4,r.7 // Get address of last full block
beq- FwdMoveByByte // Jump if no 4-byte blocks
mr r.5,r.6 // Set Length = remainder
FwdLpOn4Bytes:
lwz r.6,0(r.4) // Load next set of 4 bytes
addi r.4,r.4,4 // Get pointer to next SRC block
cmpw r.4,r.10 // Check for last block
stw r.6,0(r.3) // Store next DEST block
addi r.3,r.3,4 // Get pointer to next DEST block
bne+ FwdLpOn4Bytes // Jump if more blocks
//
// Move 1-byte blocks
//
FwdMoveByByte:
cmpwi r.5,0 // Check for no bytes left
beqlr+ // Return if done
cmpwi r.5,1 // Check for no bytes left
lbz r.6,0(r.4) // Get 1st SRC byte
stb r.6,0(r.3) // Store 1st DEST byte
beqlr+ // Return if done
cmpwi r.5,2 // Check for no bytes left
lbz r.6,1(r.4) // Get 2nd SRC byte
stb r.6,1(r.3) // Store 2nd DEST byte
beqlr+ // Return if done
lbz r.6,2(r.4) // Get 3rd SRC byte
stb r.6,2(r.3) // Store 3rd byte word
blr // Return
//
// Forward Move - SRC and DEST have different alignments
//
MvFwdUnaligned:
or r.9,r.4,r.3 // Check if either byte unaligned
andi. r.9,r.9,3 // Isolate alignment
cmpwi r.9,2 // Check for even result
bne+ FwdMvByteUnaligned // Jump for byte unaligned
//
// Divide the blocks to process into 32-byte blocks
//
FwdBlkDivUnaligned:
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
sub. r.7,r.5,r.6 // Get full block count
add r.10,r.4,r.7 // Get address of last full block
beq- FwdMvHWrdBy4Bytes // Jump if no full blocks
mr r.5,r.6 // Set Length = remainder
//
// Forward Move - SRC or DEST is halfword aligned, the other is by word
//
FwdMvByHWord:
lhz r.6,0(r.4) // Get 1st 2 bytes of 1st SRC wrd
lhz r.7,2(r.4) // Get 2nd 2 bytes of 1st SRC wrd
sth r.6,0(r.3) // Put 1st 2 bytes of 1st DST wrd
sth r.7,2(r.3) // Put 2nd 2 bytes of 1st DST wrd
lhz r.6,4(r.4) // Get 1st 2 bytes of 2nd SRC wrd
lhz r.7,6(r.4) // Get 2nd 2 bytes of 2nd SRC wrd
sth r.6,4(r.3) // Put 1st 2 bytes of 2nd DST wrd
sth r.7,6(r.3) // Put 2nd 2 bytes of 2nd DST wrd
lhz r.6,8(r.4) // Get 1st 2 bytes of 3rd SRC wrd
lhz r.7,10(r.4) // Get 2nd 2 bytes of 3rd SRC wrd
sth r.6,8(r.3) // Put 1st 2 bytes of 3rd DST wrd
sth r.7,10(r.3) // Put 2nd 2 bytes of 3rd DST wrd
lhz r.6,12(r.4) // Get 1st 2 bytes of 4th SRC wrd
lhz r.7,14(r.4) // Get 2nd 2 bytes of 4th SRC wrd
sth r.6,12(r.3) // Put 1st 2 bytes of 4th DST wrd
sth r.7,14(r.3) // Put 2nd 2 bytes of 4th DST wrd
lhz r.6,16(r.4) // Get 1st 2 bytes of 5th SRC wrd
lhz r.7,18(r.4) // Get 2nd 2 bytes of 5th SRC wrd
sth r.6,16(r.3) // Put 1st 2 bytes of 5th DST wrd
sth r.7,18(r.3) // Put 2nd 2 bytes of 5th DST wrd
lhz r.6,20(r.4) // Get 1st 2 bytes of 6th SRC wrd
lhz r.7,22(r.4) // Get 2nd 2 bytes of 6th SRC wrd
sth r.6,20(r.3) // Put 1st 2 bytes of 6th DST wrd
sth r.7,22(r.3) // Put 2nd 2 bytes of 6th DST wrd
lhz r.6,24(r.4) // Get 1st 2 bytes of 7th SRC wrd
lhz r.7,26(r.4) // Get 2nd 2 bytes of 7th SRC wrd
sth r.6,24(r.3) // Put 1st 2 bytes of 7th DST wrd
sth r.7,26(r.3) // Put 2nd 2 bytes of 7th DST wrd
lhz r.6,28(r.4) // Get 1st 2 bytes of 8th SRC wrd
lhz r.7,30(r.4) // Get 2nd 2 bytes of 8th SRC wrd
addi r.4,r.4,32 // Update SRC pointer
cmpw r.4,r.10 // Check for all blocks done
sth r.6,28(r.3) // Put 1st 2 bytes of 8th DST wrd
sth r.7,30(r.3) // Put 2nd 2 bytes of 8th DST wrd
addi r.3,r.3,32 // Update DEST pointer
bne+ FwdMvByHWord // Jump if more blocks
//
// Move 4-byte blocks with DEST Halfword unaligned
//
FwdMvHWrdBy4Bytes:
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
sub. r.7,r.5,r.6 // Get 4-byte block count
add r.10,r.4,r.7 // Get address of last full block
beq- FwdMoveByByte // Jump if no 4-byte blocks
mr r.5,r.6 // Set Length = remainder
FwdHWrdLpOn4Bytes:
lhz r.6,0(r.4) // Get 1st 2 bytes of 1st SRC wrd
lhz r.7,2(r.4) // Get 2nd 2 bytes of 1st SRC wrd
addi r.4,r.4,4 // Update SRC pointer
cmpw r.4,r.10 // Check for last block
sth r.6,0(r.3) // Put 1st 2 bytes of 1st DST wrd
sth r.7,2(r.3) // Put 2nd 2 bytes of 1st DST wrd
addi r.3,r.3,4 // Update DEST pointer
bne+ FwdHWrdLpOn4Bytes // Jump if more blocks
b FwdMoveByByte // Jump to complete last bytes
//
// Forward Move - DEST is byte unaligned - Check SRC
//
FwdMvByteUnaligned:
and r.9,r.3,r.4 // Check for both byte aligned
andi. r.9,r.9,1 // Isolate alignment bits
beq- FwdBlksByByte // Jump if both not byte aligned
//
// Divide the blocks to process into 32-byte blocks
//
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
sub. r.7,r.5,r.6 // Get full block count
add r.10,r.4,r.7 // Get address of last full block
beq- FwdMvByteBy4Bytes // Jump if no full blocks
mr r.5,r.6 // Set Length = remainder
//
// Forward Move - Both DEST and SRC are byte unaligned, but differently
//
FwdMvByByte:
lbz r.6,0(r.4) // Get first byte of 1st SRC word
lhz r.7,1(r.4) // Get mid-h-word of 1st SRC word
lhz r.8,3(r.4) // Get h-word crossing 1st/2nd SRC word
stb r.6,0(r.3) // Put first byte of 1st DEST word
sth r.7,1(r.3) // Put mid-h-word of 1st DEST word
sth r.8,3(r.3) // Put h-word crossing 1st/2nd DEST word
lhz r.6,5(r.4) // Get mid-h-word of 2nd SRC word
lhz r.7,7(r.4) // Get h-word crossing 2nd/3rd SRC word
lhz r.8,9(r.4) // Get mid-h-word of 3rd SRC word
sth r.6,5(r.3) // Put mid-h-word of 2nd DEST word
sth r.7,7(r.3) // Put h-word crossing 2nd/3rd DEST word
sth r.8,9(r.3) // Put mid-h-word of 3rd DEST word
lhz r.6,11(r.4) // Get h-word crossing 3rd/4th SRC word
lhz r.7,13(r.4) // Get mid-h-word of 4th SRC word
lhz r.8,15(r.4) // Get h-word crossing 4th/5th SRC word
sth r.6,11(r.3) // Put h-word crossing 3rd/4th DEST word
sth r.7,13(r.3) // Put mid-h-word of 4th DEST word
sth r.8,15(r.3) // Put h-word crossing 4th/5th DEST word
lhz r.6,17(r.4) // Get mid-h-word of 5th SRC word
lhz r.7,19(r.4) // Get h-word crossing 5th/6th SRC word
lhz r.8,21(r.4) // Get mid-h-word of 6th SRC word
sth r.6,17(r.3) // Put mid-h-word of 5th DEST word
sth r.7,19(r.3) // Put h-word crossing 5th/6th DEST word
sth r.8,21(r.3) // Put mid-h-word of 6th DEST word
lhz r.6,23(r.4) // Get h-word crossing 6th/7th SRC word
lhz r.7,25(r.4) // Get mid-h-word of 7th SRC word
lhz r.8,27(r.4) // Get h-word crossing 7th/8th SRC word
sth r.6,23(r.3) // Put h-word crossing 6th/7th DEST word
sth r.7,25(r.3) // Put mid-h-word of 7th DEST word
sth r.8,27(r.3) // Put h-word crossing 7th/8th DEST word
lhz r.6,29(r.4) // Get mid-h-word of 8th SRC word
lbz r.7,31(r.4) // Get last byte of 8th SRC word
addi r.4,r.4,32 // Update SRC pointer
cmpw r.4,r.10 // Check for all blocks done
sth r.6,29(r.3) // Put mid-h-word of 8th DEST word
stb r.7,31(r.3) // Put last byte of 8th DEST word
addi r.3,r.3,32 // Update DEST pointer
bne+ FwdMvByByte // Jump if more blocks
//
// Move 4-byte blocks with DEST or SRC byte aligned
//
FwdMvByteBy4Bytes:
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
sub. r.7,r.5,r.6 // Get 4-byte block count
add r.10,r.4,r.7 // Get address of last full block
beq- FwdMoveByByte // Jump if no 4-byte blocks
mr r.5,r.6 // Set Length = remainder
FwdByteLpOn4Bytes:
lbz r.6,0(r.4) // Get first byte of 1st SRC word
lhz r.7,1(r.4) // Get mid-h-word of 1st SRC word
lbz r.8,3(r.4) // Get last byte of 1st SRC word
stb r.6,0(r.3) // Put first byte of 1st DEST wd
addi r.4,r.4,4 // Update SRC pointer
cmpw r.4,r.10 // Check for last block
sth r.7,1(r.3) // Put mid-h-word of 1st DEST wd
stb r.8,3(r.3) // Put last byte of 1st DEST wrd
addi r.3,r.3,4 // Update DEST pointer
bne+ FwdByteLpOn4Bytes // Jump if more blocks
b FwdMoveByByte // Jump to complete last bytes
//
// Forward Move - Either SRC or DEST are byte unaligned but not both
//
// Divide the blocks to process into 32-byte blocks
//
FwdBlksByByte:
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
sub. r.7,r.5,r.6 // Get full block count
add r.10,r.4,r.7 // Get address of last full block
beq- FwdMvBlksOf4Bytes // Jump if no full blocks
mr r.5,r.6 // Set Length = remainder
FwdMvBlksByByte:
lbz r.6,0(r.4) // Get first byte of 1st SRC wrd
lbz r.7,1(r.4) // Get second byte of 1st SRC wrd
stb r.6,0(r.3) // Put first byte of 1st DEST wrd
stb r.7,1(r.3) // Put second byte of 1st DST wrd
lbz r.6,2(r.4) // Get third byte of 1st SRC wrd
lbz r.7,3(r.4) // Get fourth byte of 1st SRC wrd
stb r.6,2(r.3) // Put third byte of 1st DEST wrd
stb r.7,3(r.3) // Put fourth byte of 1st DST wrd
lbz r.6,4(r.4) // Get first byte of 2nd SRC wrd
lbz r.7,5(r.4) // Get 2nd byte of 2nd SRC wrd
stb r.6,4(r.3) // Put first byte of 2nd DEST wrd
stb r.7,5(r.3) // Put second byte of 2nd DST wrd
lbz r.6,6(r.4) // Get third byte of 2nd SRC wrd
lbz r.7,7(r.4) // Get fourth byte of 2nd SRC wrd
stb r.6,6(r.3) // Put third byte of 2nd DEST wrd
stb r.7,7(r.3) // Put fourth byte of 2nd DST wrd
lbz r.6,8(r.4) // Get first byte of 3rd SRC wrd
lbz r.7,9(r.4) // Get second byte of 3rd SRC wrd
stb r.6,8(r.3) // Put first byte of 3rd DEST wrd
stb r.7,9(r.3) // Put second byte of 3rd DST wrd
lbz r.6,10(r.4) // Get third byte of 3rd SRC wrd
lbz r.7,11(r.4) // Get fourth byte of 3rd SRC wrd
stb r.6,10(r.3) // Put third byte of 3rd DEST wrd
stb r.7,11(r.3) // Put fourth byte of 3rd DST wrd
lbz r.6,12(r.4) // Get first byte of 4th SRC wrd
lbz r.7,13(r.4) // Get second byte of 4th SRC wrd
stb r.6,12(r.3) // Put first byte of 4th DEST wrd
stb r.7,13(r.3) // Put second byte of 4th DST wrd
lbz r.6,14(r.4) // Get third byte of 4th SRC wrd
lbz r.7,15(r.4) // Get fourth byte of 4th SRC wrd
stb r.6,14(r.3) // Put third byte of 4th DEST wrd
stb r.7,15(r.3) // Put fourth byte of 4th DST wrd
lbz r.6,16(r.4) // Get first byte of 5th SRC wrd
lbz r.7,17(r.4) // Get second byte of 5th SRC wrd
stb r.6,16(r.3) // Put first byte of 5th DEST wrd
stb r.7,17(r.3) // Put second byte of 5th DST wrd
lbz r.6,18(r.4) // Get third byte of 5th SRC wrd
lbz r.7,19(r.4) // Get fourth byte of 5th SRC wrd
stb r.6,18(r.3) // Put third byte of 5th DEST wrd
stb r.7,19(r.3) // Put fourth byte of 5th DST wrd
lbz r.6,20(r.4) // Get first byte of 6th SRC wrd
lbz r.7,21(r.4) // Get second byte of 6th SRC wrd
stb r.6,20(r.3) // Put first byte of 6th DEST wrd
stb r.7,21(r.3) // Put second byte of 6th DST wrd
lbz r.6,22(r.4) // Get third byte of 6th SRC wrd
lbz r.7,23(r.4) // Get fourth byte of 6th SRC wrd
stb r.6,22(r.3) // Put third byte of 6th DEST wrd
stb r.7,23(r.3) // Put fourth byte of 6th DST wrd
lbz r.6,24(r.4) // Get first byte of 7th SRC wrd
lbz r.7,25(r.4) // Get second byte of 7th SRC wrd
stb r.6,24(r.3) // Put first byte of 7th DEST wrd
stb r.7,25(r.3) // Put second byte of 7th DST wrd
lbz r.6,26(r.4) // Get third byte of 7th SRC wrd
lbz r.7,27(r.4) // Get fourth byte of 7th SRC wrd
stb r.6,26(r.3) // Put third byte of 7th DEST wrd
stb r.7,27(r.3) // Put fourth byte of 7th DST wrd
lbz r.6,28(r.4) // Get first byte of 8th SRC wrd
lbz r.7,29(r.4) // Get second byte of 8th SRC wrd
stb r.6,28(r.3) // Put first byte of 8th DEST wrd
stb r.7,29(r.3) // Put second byte of 8th DST wrd
lbz r.6,30(r.4) // Get third byte of 8th SRC wrd
lbz r.7,31(r.4) // Get fourth byte of 8th SRC wrd
addi r.4,r.4,32 // Update SRC pointer
cmpw r.4,r.10 // Check for all blocks done
stb r.6,30(r.3) // Put third byte of 8th DEST wrd
stb r.7,31(r.3) // Put fourth byte of 8th DST wrd
addi r.3,r.3,32 // Update DEST pointer
bne+ FwdMvBlksByByte // Jump if more blocks
//
// Move 4-byte blocks with DEST or SRC Byte aligned
//
FwdMvBlksOf4Bytes:
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
sub. r.7,r.5,r.6 // Get 4-byte block count
add r.10,r.4,r.7 // Get address of last full block
beq- FwdMoveByByte // Jump if no 4-byte blocks
mr r.5,r.6 // Set Length = remainder
FwdBlksLpOn4Bytes:
lbz r.6,0(r.4) // Get first byte of 1st SRC wrd
lbz r.7,1(r.4) // Get second byte of 1st SRC wrd
stb r.6,0(r.3) // Put first byte of 1st DEST wrd
stb r.7,1(r.3) // Put second byte of 1st DST wrd
lbz r.6,2(r.4) // Get third byte of 1st SRC wrd
lbz r.7,3(r.4) // Get fourth byte of 1st SRC wrd
addi r.4,r.4,4 // Update SRC pointer
cmpw r.4,r.10 // Check for last block
stb r.6,2(r.3) // Put third byte of 1st DEST wrd
stb r.7,3(r.3) // Put fourth byte of 1st DST wrd
addi r.3,r.3,4 // Update DEST pointer
bne+ FwdBlksLpOn4Bytes // Jump if more blocks
b FwdMoveByByte // Jump to complete last bytes
//
// Move Memory Backward
//
// Check alignment
//
MoveBackward:
add r.4,r.4,r.5 // Compute ending SRC address
add r.3,r.3,r.5 // Compute ending DEST address
cmpwi r.5,4 // Check for less than 4 bytes
blt- BckMoveByByte // Jump if single byte moves
xor r.9,r.4,r.3 // Check for same alignment
andi. r.9,r.9,3 // Isolate alignment bits
bne- MvBckUnaligned // Jump if different alignments
//
// Move Memory Backword - Same SRC and DEST alignment
//
// Load and store extra bytes until a word boundary is reached
//
MvBckAligned:
andi. r.6,r.3,3 // Check alignment type
beq+ BckBlkDiv // Jump to process 32-Byte blocks
cmpwi r.6,1 // Check for 1 byte unaligned
bne+ BckChkFor2 // If not, check next case
lbz r.7,-1(r.4) // Get unaligned byte
sub r.5,r.5,r.6 // Decrement LNGTH by unaligned
stb r.7,-1(r.3) // Store unaligned byte
b BckUpdateAddrs // Jump to update addresses
BckChkFor2:
cmpwi r.6,2 // Check for halfword aligned
bne+ BckChkFor3 // If not, check next case
lhz r.7,-2(r.4) // Get unaligned halfword
sub r.5,r.5,r.6 // Decrement LNGTH by unaligned
sth r.7,-2(r.3) // Store unaligned halfword
b BckUpdateAddrs // Jump to update addresses
BckChkFor3:
lbz r.8,-1(r.4) // Get unaligned byte
lhz r.7,-3(r.4) // Get unaligned halfword
stb r.8,-1(r.3) // Store unaligned byte
sth r.7,-3(r.3) // Store unaligned halfword
sub r.5,r.5,r.6 // Decrement LNGTH by unaligned
BckUpdateAddrs:
sub r.4,r.4,r.6 // Update the SRC address
sub r.3,r.3,r.6 // Update the DEST address
//
// Divide the block to process into 32-byte blocks
//
BckBlkDiv:
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
sub. r.7,r.5,r.6 // Get full block count
sub r.10,r.4,r.7 // Get address of last full block
beq- BckMoveBy4Bytes // Jump if no full blocks
mr r.5,r.6 // Set Length = remainder
//
// Move 32-byte blocks
//
BckMvFullBlks:
lwz r.6,-4(r.4) // Get 1st SRC word
lwz r.7,-8(r.4) // Get 2nd SRC word
stw r.6,-4(r.3) // Store 1st DEST word
stw r.7,-8(r.3) // Store 2nd DEST word
lwz r.6,-12(r.4) // Get 3rd SRC word
lwz r.7,-16(r.4) // Get 4th SRC word
stw r.6,-12(r.3) // Store 3rd DEST word
stw r.7,-16(r.3) // Store 4th DEST word
lwz r.6,-20(r.4) // Get 5th SRC word
lwz r.7,-24(r.4) // Get 6th SRC word
stw r.6,-20(r.3) // Store 5th DEST word
stw r.7,-24(r.3) // Store 6th DEST word
lwz r.6,-28(r.4) // Get 7th SRC word
lwz r.7,-32(r.4) // Get 8th SRC word
subi r.4,r.4,32 // Update SRC pointer
cmpw r.4,r.10 // Check for all blocks done
stw r.6,-28(r.3) // Store 7th DEST word
stw r.7,-32(r.3) // Store 8th DEST word
subi r.3,r.3,32 // Update DEST pointer
bne+ BckMvFullBlks // Jump if more blocks
//
// Move 4-byte blocks
//
BckMoveBy4Bytes:
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
sub. r.7,r.5,r.6 // Get 4-byte block count
sub r.10,r.4,r.7 // Get address of last full block
beq- BckMoveByByte // Jump if no 4-byte blocks
mr r.5,r.6 // Set Length = remainder
BckLpOn4Bytes:
lwz r.6,-4(r.4) // Load next set of 4 bytes
subi r.4,r.4,4 // Get pointer to next SRC block
cmpw r.4,r.10 // Check for last block
stw r.6,-4(r.3) // Store next DEST block
subi r.3,r.3,4 // Get pointer to next DEST block
bne+ BckLpOn4Bytes // Jump if more blocks
//
// Move 1-byte blocks
//
BckMoveByByte:
cmpwi r.5,0 // Check for no bytes left
beqlr+ // Return if done
lbz r.6,-1(r.4) // Get 1st SRC byte
cmpwi r.5,1 // Check for no bytes left
stb r.6,-1(r.3) // Store 1st DEST byte
beqlr+ // Return if done
lbz r.6,-2(r.4) // Get 2nd SRC byte
cmpwi r.5,2 // Check for no bytes left
stb r.6,-2(r.3) // Store 2nd DEST byte
beqlr+ // Return if done
lbz r.6,-3(r.4) // Get 3rd SRC byte
stb r.6,-3(r.3) // Store 3rd byte word
blr // Return
//
// Backward Move - SRC and DEST have different alignments
//
MvBckUnaligned:
or r.9,r.4,r.3 // Check for either byte unaligned
andi. r.9,r.9,3 // Isolate alignment
cmpwi r.9,2 // Check for even result
bne+ BckMvByteUnaligned // Jump for byte unaligned
//
// Divide the blocks to process into 32-byte blocks
//
BckBlkDivUnaligned:
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
sub. r.7,r.5,r.6 // Get full block count
sub r.10,r.4,r.7 // Get address of last full block
beq- BckMvHWrdBy4Bytes // Jump if no full blocks
mr r.5,r.6 // Set Length = remainder
//
// Backward Move - SRC or DEST is halfword aligned, the other is by word
//
BckMvByHWord:
lhz r.6,-2(r.4) // Get 1st 2 bytes of 1st SRC wrd
lhz r.7,-4(r.4) // Get 2nd 2 bytes of 1st SRC wrd
sth r.6,-2(r.3) // Put 1st 2 bytes of 1st DST wrd
sth r.7,-4(r.3) // Put 2nd 2 bytes of 1st DST wrd
lhz r.6,-6(r.4) // Get 1st 2 bytes of 2nd SRC wrd
lhz r.7,-8(r.4) // Get 2nd 2 bytes of 2nd SRC wrd
sth r.6,-6(r.3) // Put 1st 2 bytes of 2nd DST wrd
sth r.7,-8(r.3) // Put 2nd 2 bytes of 2nd DST wrd
lhz r.6,-10(r.4) // Get 1st 2 bytes of 3rd SRC wrd
lhz r.7,-12(r.4) // Get 2nd 2 bytes of 3rd SRC wrd
sth r.6,-10(r.3) // Put 1st 2 bytes of 3rd DST wrd
sth r.7,-12(r.3) // Put 2nd 2 bytes of 3rd DST wrd
lhz r.6,-14(r.4) // Get 1st 2 bytes of 4th SRC wrd
lhz r.7,-16(r.4) // Get 2nd 2 bytes of 4th SRC wrd
sth r.6,-14(r.3) // Put 1st 2 bytes of 4th DST wrd
sth r.7,-16(r.3) // Put 2nd 2 bytes of 4th DST wrd
lhz r.6,-18(r.4) // Get 1st 2 bytes of 5th SRC wrd
lhz r.7,-20(r.4) // Get 2nd 2 bytes of 5th SRC wrd
sth r.6,-18(r.3) // Put 1st 2 bytes of 5th DST wrd
sth r.7,-20(r.3) // Put 2nd 2 bytes of 5th DST wrd
lhz r.6,-22(r.4) // Get 1st 2 bytes of 6th SRC wrd
lhz r.7,-24(r.4) // Get 2nd 2 bytes of 6th SRC wrd
sth r.6,-22(r.3) // Put 1st 2 bytes of 6th DST wrd
sth r.7,-24(r.3) // Put 2nd 2 bytes of 6th DST wrd
lhz r.6,-26(r.4) // Get 1st 2 bytes of 7th SRC wrd
lhz r.7,-28(r.4) // Get 2nd 2 bytes of 7th SRC wrd
sth r.6,-26(r.3) // Put 1st 2 bytes of 7th DST wrd
sth r.7,-28(r.3) // Put 2nd 2 bytes of 7th DST wrd
lhz r.6,-30(r.4) // Get 1st 2 bytes of 8th SRC wrd
lhz r.7,-32(r.4) // Get 2nd 2 bytes of 8th SRC wrd
subi r.4,r.4,32 // Update SRC pointer
cmpw r.4,r.10 // Check for all blocks done
sth r.6,-30(r.3) // Put 1st 2 bytes of 8th DST wrd
sth r.7,-32(r.3) // Put 2nd 2 bytes of 8th DST wrd
subi r.3,r.3,32 // Update DEST pointer
bne+ BckMvByHWord // Jump if more blocks
//
// Move 4-byte blocks with DEST Halfword unaligned
//
BckMvHWrdBy4Bytes:
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
sub. r.7,r.5,r.6 // Get 4-byte block count
sub r.10,r.4,r.7 // Get address of last full block
beq- BckMoveByByte // Jump if no 4-byte blocks
mr r.5,r.6 // Set Length = remainder
BckHWrdLpOn4Bytes:
lhz r.6,-2(r.4) // Get 1st 2 bytes of 1st SRC wrd
lhz r.7,-4(r.4) // Get 2nd 2 bytes of 1st SRC wrd
subi r.4,r.4,4 // Update SRC pointer
cmpw r.4,r.10 // Check for last block
sth r.6,-2(r.3) // Put 1st 2 bytes of 1st DST wrd
sth r.7,-4(r.3) // Put 2nd 2 bytes of 1st DST wrd
subi r.3,r.3,4 // Update DEST pointer
bne+ BckHWrdLpOn4Bytes // Jump if more blocks
b BckMoveByByte // Jump to complete last bytes
//
// Check for both byte unaligned
//
BckMvByteUnaligned:
and r.9,r.3,r.4 // Check for both byte aligned
and r.9,r.9,1 // Isolate alignment bits
bne- BckBlksByByte // Jump if both not byte aligned
//
// Divide the blocks to process into 32-byte blocks
//
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
sub. r.7,r.5,r.6 // Get full block count
sub r.10,r.4,r.7 // Get address of last full block
beq- BckMvByteBy4Bytes // Jump if no full blocks
mr r.5,r.6 // Set Length = remainder
//
// Backward Move - Both SRC and DEST are byte unaligned, but differently
//
BckMvByByte:
lbz r.6,-1(r.4) // Get first byte of 1st SRC word
lhz r.7,-3(r.4) // Get mid-h-word of 1st SRC word
lhz r.8,-5(r.4) // Get h-word crossing 1st/2nd SRC word
stb r.6,-1(r.3) // Put first byte of 1st DEST word
sth r.7,-3(r.3) // Put mid-h-word of 1st DEST word
sth r.8,-5(r.3) // Put h-word crossing 1st/2nd DEST word
lhz r.6,-7(r.4) // Get mid-h-word of 2nd SRC word
lhz r.7,-9(r.4) // Get h-word crossing 2nd/3rd SRC word
lhz r.8,-11(r.4) // Get mid-h-word of 3rd SRC word
sth r.6,-7(r.3) // Put mid-h-word of 2nd DEST word
sth r.7,-9(r.3) // Put h-word crossing 2nd/3rd DEST word
sth r.8,-11(r.3) // Put mid-h-word of 3rd DEST word
lhz r.6,-13(r.4) // Get h-word crossing 3rd/4th SRC word
lhz r.7,-15(r.4) // Get mid-h-word of 4th SRC word
lhz r.8,-17(r.4) // Get h-word crossing 4th/5th SRC word
sth r.6,-13(r.3) // Put h-word crossing 3rd/4th DEST word
sth r.7,-15(r.3) // Put mid-h-word of 4th DEST word
sth r.8,-17(r.3) // Put h-word crossing 4th/5th DEST word
lhz r.6,-19(r.4) // Get mid-h-word of 5th SRC word
lhz r.7,-21(r.4) // Get h-word crossing 5th/6th SRC word
lhz r.8,-23(r.4) // Get mid-h-word of 6th SRC word
sth r.6,-19(r.3) // Put mid-h-word of 5th DEST word
sth r.7,-21(r.3) // Put h-word crossing 5th/6th DEST word
sth r.8,-23(r.3) // Put mid-h-word of 6th DEST word
lhz r.6,-25(r.4) // Get h-word crossing 6th/7th SRC word
lhz r.7,-27(r.4) // Get mid-h-word of 7th SRC word
lhz r.8,-29(r.4) // Get h-word crossing 7th/8th SRC word
sth r.6,-25(r.3) // Put h-word crossing 6th/7th DEST word
sth r.7,-27(r.3) // Put mid-h-word of 7th DEST word
sth r.8,-29(r.3) // Put h-word crossing 7th/8th DEST word
lhz r.6,-31(r.4) // Get mid-h-word of 8th SRC word
lbz r.7,-32(r.4) // Get last byte of 8th SRC word
subi r.4,r.4,32 // Update SRC pointer
cmpw r.4,r.10 // Check for all blocks done
sth r.7,-31(r.3) // Put mid-h-word of 8th DEST wd
stb r.8,-32(r.3) // Put last byte of 8th DEST wrd
subi r.3,r.3,32 // Update DEST pointer
bne+ BckMvByByte // Jump if more blocks
//
// Move 4-byte blocks with DEST and SRC Byte aligned
//
BckMvByteBy4Bytes:
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
sub. r.7,r.5,r.6 // Get 4-byte block count
sub r.10,r.4,r.7 // Get address of last full block
beq- BckMoveByByte // Jump if no 4-byte blocks
mr r.5,r.6 // Set Length = remainder
BckByteLpOn4Bytes:
lbz r.6,-1(r.4) // Get first byte of 1st SRC word
lhz r.7,-3(r.4) // Get mid-h-word of 1st SRC word
lbz r.8,-4(r.4) // Get last byte of 1st SRC word
stb r.6,-1(r.3) // Put first byte of 1st DEST wd
subi r.4,r.4,4 // Update SRC pointer
cmpw r.4,r.10 // Check for last block
sth r.7,-3(r.3) // Put mid-h-word of 1st DEST wd
stb r.8,-4(r.3) // Put last byte of 1st DEST wrd
subi r.3,r.3,4 // Update DEST pointer
bne+ BckByteLpOn4Bytes // Jump if more blocks
b BckMoveByByte // Jump to complete last bytes
//
// Backward Move - Either DEST or SRC byte unaligned but not both
//
// Divide the blocks to process into 32-byte blocks
//
BckBlksByByte:
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
sub. r.7,r.5,r.6 // Get full block count
sub r.10,r.4,r.7 // Get address of last full block
beq- BckMvBlksOf4Bytes // Jump if no full blocks
mr r.5,r.6 // Set Length = remainder
BckMvBlksByByte:
lbz r.6,-1(r.4) // Get first byte of 1st SRC wrd
lbz r.7,-2(r.4) // Get second byte of 1st SRC wrd
stb r.6,-1(r.3) // Put first byte of 1st DEST wrd
stb r.7,-2(r.3) // Put second byte of 1st DST wrd
lbz r.6,-3(r.4) // Get third byte of 1st SRC wrd
lbz r.7,-4(r.4) // Get fourth byte of 1st SRC wrd
stb r.6,-3(r.3) // Put third byte of 1st DEST wrd
stb r.7,-4(r.3) // Put fourth byte of 1st DST wrd
lbz r.6,-5(r.4) // Get first byte of 2nd SRC wrd
lbz r.7,-6(r.4) // Get second byte of 2nd SRC wrd
stb r.6,-5(r.3) // Put first byte of 2nd DEST wrd
stb r.7,-6(r.3) // Put second byte of 2nd DST wrd
lbz r.6,-7(r.4) // Get third byte of 2nd SRC wrd
lbz r.7,-8(r.4) // Get fourth byte of 2nd SRC wrd
stb r.6,-7(r.3) // Put third byte of 2nd DEST wrd
stb r.7,-8(r.3) // Put fourth byte of 2nd DST wrd
lbz r.6,-9(r.4) // Get first byte of 3rd SRC wrd
lbz r.7,-10(r.4) // Get second byte of 3rd SRC wrd
stb r.6,-9(r.3) // Put first byte of 3rd DST wrd
stb r.7,-10(r.3) // Put second byte of 3rd DST wrd
lbz r.6,-11(r.4) // Get third byte of 3rd SRC wrd
lbz r.7,-12(r.4) // Get fourth byte of 3rd SRC wrd
stb r.6,-11(r.3) // Put third byte of 3rd DEST wrd
stb r.7,-12(r.3) // Put fourth byte of 3rd DST wrd
lbz r.6,-13(r.4) // Get first byte of 4th SRC wrd
lbz r.7,-14(r.4) // Get second byte of 4th SRC wrd
stb r.6,-13(r.3) // Put first byte of 4th DEST wrd
stb r.7,-14(r.3) // Put second byte of 4th DST wrd
lbz r.6,-15(r.4) // Get third byte of 4th SRC wrd
lbz r.7,-16(r.4) // Get fourth byte of 4th SRC wrd
stb r.6,-15(r.3) // Put third byte of 4th DEST wrd
stb r.7,-16(r.3) // Put fourth byte of 4th DST wrd
lbz r.6,-17(r.4) // Get first byte of 5th SRC wrd
lbz r.7,-18(r.4) // Get second byte of 5th SRC wrd
stb r.6,-17(r.3) // Put first byte of 5th DEST wrd
stb r.7,-18(r.3) // Put second byte of 5th DST wrd
lbz r.6,-19(r.4) // Get third byte of 5th SRC wrd
lbz r.7,-20(r.4) // Get fourth byte of 5th SRC wrd
stb r.6,-19(r.3) // Put third byte of 5th DEST wrd
stb r.7,-20(r.3) // Put fourth byte of 5th DST wrd
lbz r.6,-21(r.4) // Get first byte of 6th SRC wrd
lbz r.7,-22(r.4) // Get second byte of 6th SRC wrd
stb r.6,-21(r.3) // Put first byte of 6th DEST wrd
stb r.7,-22(r.3) // Put second byte of 6th DST wrd
lbz r.6,-23(r.4) // Get third byte of 6th SRC wrd
lbz r.7,-24(r.4) // Get fourth byte of 6th SRC wrd
stb r.6,-23(r.3) // Put third byte of 6th DEST wrd
stb r.7,-24(r.3) // Put fourth byte of 6th DST wrd
lbz r.6,-25(r.4) // Get first byte of 7th SRC wrd
lbz r.7,-26(r.4) // Get second byte of 7th SRC wrd
stb r.6,-25(r.3) // Put first byte of 7th DEST wrd
stb r.7,-26(r.3) // Put second byte of 7th DST wrd
lbz r.6,-27(r.4) // Get third byte of 7th SRC wrd
lbz r.7,-28(r.4) // Get fourth byte of 7th SRC wrd
stb r.6,-27(r.3) // Put third byte of 7th DEST wrd
stb r.7,-28(r.3) // Put fourth byte of 7th DST wrd
lbz r.6,-29(r.4) // Get first byte of 8th SRC wrd
lbz r.7,-30(r.4) // Get second byte of 8th SRC wrd
stb r.6,-29(r.3) // Put first byte of 8th DEST wrd
stb r.7,-30(r.3) // Put second byte of 8th DST wrd
lbz r.6,-31(r.4) // Get third byte of 8th SRC wrd
lbz r.7,-32(r.4) // Get fourth byte of 8th SRC wrd
subi r.4,r.4,32 // Update SRC pointer
cmpw r.4,r.10 // Check for all blocks done
stb r.6,-31(r.3) // Put third byte of 8th DEST wrd
stb r.7,-32(r.3) // Put fourth byte of 8th DST wrd
subi r.3,r.3,32 // Update DEST pointer
bne+ BckMvBlksByByte // Jump if more blocks
//
// Move 4-byte blocks with DEST or SRC Byte aligned, but not the other
//
BckMvBlksOf4Bytes:
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
sub. r.7,r.5,r.6 // Get 4-byte block count
sub r.10,r.4,r.7 // Get address of last full block
beq- BckMoveByByte // Jump if no 4-byte blocks
mr r.5,r.6 // Set Length = remainder
BckBlksLpOn4Bytes:
lbz r.6,-1(r.4) // Get first byte of 1st SRC wrd
lbz r.7,-2(r.4) // Get second byte of 1st SRC wrd
stb r.6,-1(r.3) // Put first byte of 1st DEST wrd
stb r.7,-2(r.3) // Put second byte of 1st DST wrd
lbz r.6,-3(r.4) // Get third byte of 1st SRC wrd
lbz r.7,-4(r.4) // Get fourth byte of 1st SRC wrd
subi r.4,r.4,4 // Update SRC pointer
cmpw r.4,r.10 // Check for last block
stb r.6,-3(r.3) // Put third byte of 1st DEST wrd
stb r.7,-4(r.3) // Put fourth byte of 1st DST wrd
subi r.3,r.3,4 // Update DEST pointer
bne+ BckBlksLpOn4Bytes // Jump if more blocks
b BckMoveByByte // Jump to complete last bytes
//
// Exit the routine
//
MvExit:
LEAF_EXIT(RtlMoveMemory)
//++
//
// VOID
// RtlZeroMemory (
// IN PVOID Destination,
// IN ULONG Length
// )
//
// Routine Description:
//
// This function zeros memory by first aligning the destination
// address to a longword boundary, and then zeroing 32-byte blocks,
// followed by 4-byte blocks, followed by any remaining bytes.
//
// Arguments:
//
// DEST (r.3) - Supplies a pointer to the memory to zero.
//
// LENGTH (r.4) - Supplies the length, in bytes, of the memory to be
// zeroed.
//
// Return Value:
//
// None.
//
//--
//
// Define the entry point
//
LEAF_ENTRY(RtlZeroMemory)
//
// Fill Memory with the zeros
//
// Zero extra bytes until a word boundary is reached
//
cmpwi cr.1,r.4,4 // Check for less than 3 bytes
mtcrf 0x01,r.3 // Check alignment type
li r.5,0 // Set pattern as 0
blt- cr.1,ZeroByByte // Jump to handle small cases
li r.6,1
ZeroMem:
bt 31,ZeroOdd // Branch if 1 or 3
bf 30,ZBlkDiv // Branch if not 2
sth r.5,0(r.3) // Store unaligned halfword
li r.6,2
b ZUpdteAddr // Jump to update addresses
ZeroOdd:
bt 30,Zero1 // Branch if align 3
sth r.5,1(r.3) // Store unaligned halfword
li r.6,3
Zero1:
stb r.5,0(r.3) // Store unaligned byte
ZUpdteAddr:
sub r.4,r.4,r.6 // Decrement LENGTH by unaligned
add r.3,r.3,r.6 // Update the DEST address
//
// Divide the block to process into 32-byte blocks
//
ZBlkDiv:
andi. r.6,r.4,BLKLN-1 // Isolate remainder of LENGTH/32
sub. r.7,r.4,r.6 // Get full block count
add r.10,r.3,r.7 // Get address of last full block
beq- ZeroBy4Bytes // Jump if no full blocks
mr r.4,r.6 // Set Length = Remainder
//
// Zero 32-Byte Blocks
//
// Check for 32-Byte Boundary, if so use the cache zero
//
andi. r.9,r.3,31 // Check for cache boundary
li r.6,0 // Set offset=0
beq+ BlkZeroC // Jump if on cache boundary
//
// If not 32-byte boundary, fill to 32-bit boundary then use cache zero
//
srwi r.8,r.7,5 // Get block count
cmpwi r.8,1 // Check for single block
mr r.12,r.9 // Save offset value
li r11,32 // Get full block count
sub r9,r.11,r.9 // Get distance to cache boundary
beq- BlkZero // Jump if single block
//
// Adjust pointers and loop counts
//
sub. r.8,r.4,r.9 // TMP=Remainder-Unaligned Count
add r.10,r.10,r.9 // Get new end pointer
mr r.4,r.8 // Set new remainder count (TMP)
bge+ AlignToCache // Jump if TMP >= 0
sub r.10,r.10,r.9 // Subtract previous increment
add r4,r.11,r8 // Get new rem cnt (32-abs(TMP))
sub r.10,r.10,r.12 // Get new end pointer
//
// Fill to 32-byte boundary - Using 4-byte blocks
//
AlignToCache:
andi. r.8,r.9,3 // Isolate remainder of LENGTH/4
sub. r.9,r.9,r.8 // Get full word byte count
li r.7,4 // Initialize loop decrement
beq- ByteAlignToCache // Jump if no full blocks
//
Align4Bytes:
stw r.5,0(r.3)
sub. r.9,r.9,r.7 // Increment the loop counter
addi r.3,r.3,4 // Increment the DEST address
bne+ Align4Bytes // Jump if more 4-Byte Blk fills
//
// Align to cache boundary using 1-Byte Blocks
//
ByteAlignToCache:
cmpwi r.8,0 // Check for completion
add r.3,r.3,r.8 // Update DEST address
beq+ BlkZeroC // Jump if cache aligned
//
cmpwi r.8,1 // Check for done
stb r.5,0(r.3) // Zero 1 byte
beq+ BlkZeroC // Jump if done
cmpwi r.8,2 // Check cache aligned
stb r.5,1(r.3) // Zero 1 byte
beq+ BlkZeroC // Jump cache aligned
stb r.5,2(r.3) // Zero 1 Byte
//
// Zero using the cache
//
BlkZeroC:
#if 0 // BLDR_KERNEL_RUNTIME != 1
//
// In order to allow us to boot in write-through or cache-inhibited
// mode, the boot loader does not use dcbz.
//
dcbz r.6,r.3 // Zero 32-byte cache block
addi r.3,r.3,32 // Increment the DEST address
cmpw r.3,r.10 // Check for completion
bne+ BlkZeroC // Jump if more 32-Byte Blk fills
b ZeroBy4Bytes // Jump to finish
#endif
//
// Zero using normal stores
//
BlkZero:
stw r.5,0(r.3) // Store the 1st DEST word
stw r.5,4(r.3) // Store the 2nd DEST word
stw r.5,8(r.3) // Store the 3rd DEST word
stw r.5,12(r.3) // Store the 4th DEST word
stw r.5,16(r.3) // Store the 5th DEST word
stw r.5,20(r.3) // Store the 6th DEST word
stw r.5,24(r.3) // Store the 7th DEST word
stw r.5,28(r.3) // Store the 8th DEST word
addi r.3,r.3,32 // Increment the DEST address
cmpw r.3,r.10 // Check for completion
bne+ BlkZero // Jump if more 32-Byte Blk fills
//
// Zero 4-Byte Blocks
//
ZeroBy4Bytes:
andi. r.6,r.4,3 // Isolate remainder of LENGTH/4
sub. r.7,r.4,r.6 // Get full block count
add r.10,r.3,r.7 // Get address of last full block
beq- ZeroByByte // Jump if no full blocks
mr r.4,r.6 // Set Length = Remainder
//
Zero4Bytes:
stw r.5,0(r.3)
addi r.3,r.3,4 // Increment the DEST address
cmpw r.3,r.10 // Check for completion
bne+ Zero4Bytes // Jump if more 4-Byte Blk fills
//
// Zero 1-Byte Blocks
//
ZeroByByte:
cmpwi r.4,0 // Check for completion
beqlr+ // Return if done
//
Zero1Byte:
cmpwi r.4,1 // Check for done
stb r.5,0(r.3) // Zero 1 byte
beqlr+ // Return if done
cmpwi r.4,2 // Check for done
stb r.5,1(r.3) // Zero 1 byte
beqlr+ // Return if done
stb r.5,2(r.3) // Zero 1 Byte
//
// Exit
//
ZeroExit:
LEAF_EXIT(RtlZeroMemory)
//
//++
//
// VOID
// RtlFillMemory (
// IN PVOID Destination,
// IN ULONG Length,
// IN UCHAR Fill
// )
//
// Routine Description:
//
// This function fills memory by first aligning the destination
// address to a longword boundary, and then filling 32-byte blocks,
// followed by 4-byte blocks, followed by any remaining bytes.
//
// Arguments:
//
// DEST (r.3) - Supplies a pointer to the memory to fill.
//
// LENGTH (r.4) - Supplies the length, in bytes, of the memory to be
// filled.
//
// PTTRN (r.5) - Supplies the fill byte.
//
// Return Value:
//
// None.
//
//--
//
// Define the entry point
//
LEAF_ENTRY(RtlFillMemory)
cmpwi cr.1,r.4,4 // Check for less than 4 bytes
//
// Initialize a register with the fill byte duplicated
//
rlwimi r.5,r.5,8,0x0000ff00 // propogate rightmost byte
rlwimi. r.5,r.5,16,0xffff0000 // thru upper 3 bytes
//
// Fill Memory with the pattern
//
//
// Fill extra bytes until a word boundary is reached
//
mtcrf 0x01,r.3 // Check alignment type
blt- cr.1,FillByByte // Jump to handle small cases
li r.6,1 // Default unaligned count to 1 byte
beq- ZeroMem // Use RtlZeroMemory if fill 0
bt 31,FillOdd // Branch if align 1 or 3
bf 30,BlkDiv // Branch if not 2
sth r.5,0(r.3) // Store unaligned halfword
li r.6,2 // Set count to 2 bytes
b UpdteAddr // Jump to update addresses
FillOdd:
bt 30,Fill1 // Branch if align 3
sth r.5,1(r.3) // Store unaligned halfword
li r.6,3 // Set count to 3 bytes
Fill1:
stb r.5,0(r.3) // Store unaligned byte
UpdteAddr:
sub r.4,r.4,r.6 // Decrement LENGTH by unaligned
add r.3,r.3,r.6 // Update the DEST address
//
// Divide the block to process into 32-byte blocks
//
BlkDiv:
andi. r.6,r.4,BLKLN-1 // Isolate remainder of LENGTH/32
sub. r.7,r.4,r.6 // Get full block count
add r.10,r.3,r.7 // Get address of last full block
beq- FillBy4Bytes // Jump if no full blocks
mr r.4,r.6 // Set Length = Remainder
//
// Fill 32-Byte Blocks
//
BlkFill:
stw r.5,0(r.3) // Store the 1st DEST word
stw r.5,4(r.3) // Store the 2nd DEST word
stw r.5,8(r.3) // Store the 3rd DEST word
stw r.5,12(r.3) // Store the 4th DEST word
stw r.5,16(r.3) // Store the 5th DEST word
stw r.5,20(r.3) // Store the 6th DEST word
stw r.5,24(r.3) // Store the 7th DEST word
stw r.5,28(r.3) // Store the 8th DEST word
addi r.3,r.3,32 // Increment the DEST address
cmpw r.3,r.10 // Check for completion
bne+ BlkFill // Jump if more 32-Byte Blk fills
//
// Fill 4-Byte Blocks
//
FillBy4Bytes:
andi. r.6,r.4,3 // Isolate remainder of LENGTH/4
sub. r.7,r.4,r.6 // Get full block count
add r.10,r.3,r.7 // Get address of last full block
beq- FillByByte // Jump if no full blocks
mr r.4,r.6 // Set Length = Remainder
//
Fill4Bytes:
stw r.5,0(r.3)
addi r.3,r.3,4 // Increment the DEST address
cmpw r.3,r.10 // Check for completion
bne+ Fill4Bytes // Jump if more 4-Byte Blk fills
//
// Fill 1-Byte Blocks
//
FillByByte:
cmpwi r.4,0 // Check for completion
beqlr+ // Return if done
//
Fill1Byte:
cmpwi r.4,1 // Check for done
stb r.5,0(r.3) // Fill 1 byte
beqlr+ // Return if done
cmpwi r.4,2 // Check for done
stb r.5,1(r.3) // Fill 1 byte
beqlr+ // Return if done
stb r.5,2(r.3) // Fill 1 Byte
//
// Exit
//
FillExit:
LEAF_EXIT(RtlFillMemory)
//++
//
// VOID
// RtlFillMemoryUlong (
// IN PVOID Destination,
// IN ULONG Length,
// IN ULONG Pattern
// )
//
// Routine Description:
//
// This function fills memory with the specified longowrd pattern by
// filling 32-byte blocks followed by 4-byte blocks.
//
// N.B. This routine assumes that the destination address is aligned
// on a longword boundary and that the length is an even multiple
// of longwords.
//
// Arguments:
//
// DEST (r.3) - Supplies a pointer to the memory to fill.
//
// LENGTH (r.4) - Supplies the length, in bytes, of the memory to be
// filled.
//
// PTTRN (r.5) - Supplies the fill pattern.
//
// Return Value:
//
// None.
//
//--
//
// Define the entry point
//
LEAF_ENTRY(RtlFillMemoryUlong)
//
// Make sure length is even number of longwords
//
srwi r.4,r.4,2 // Shift length to divide by 4
slwi r.4,r.4,2 // Make sure LENGTH is even
//
// Divide the block to process into 32-byte blocks
//
andi. r.6,r.4,BLKLN-1 // Isolate remainder of LNGTH/32
sub. r.7,r.4,r.6 // Get full block count
add r.10,r.3,r.7 // Get address of last full block
beq- FillUlBy4Bytes // Jump if no full blocks
mr r.4,r.6 // Set Length = Remainder
//
// Fill 32-Byte Blocks
//
BlkFillUl:
stw r.5,0(r.3) // Store the 1st DEST word
stw r.5,4(r.3) // Store the 2nd DEST word
stw r.5,8(r.3) // Store the 3rd DEST word
stw r.5,12(r.3) // Store the 4th DEST word
stw r.5,16(r.3) // Store the 5th DEST word
stw r.5,20(r.3) // Store the 6th DEST word
stw r.5,24(r.3) // Store the 7th DEST word
stw r.5,28(r.3) // Store the 8th DEST word
addi r.3,r.3,32 // Increment the DEST address
cmpw r.3,r.10 // Check for completion
bne+ BlkFillUl // Jump if more 32-Byte Blk fills
//
// Fill 4-Byte Blocks
//
FillUlBy4Bytes:
andi. r.6,r.4,3 // Isolate remainder of LENGTH/4
sub. r.7,r.4,r.6 // Get full block count
add r.10,r.3,r.7 // Get address of last full block
beqlr- // Return if done
mr r.4,r.6 // Set Length = Remainder
//
FillUl4Bytes:
stw r.5,0(r.3)
addi r.3,r.3,4 // Increment the DEST address
cmpw r.3,r.10 // Check for completion
bne+ FillUl4Bytes // Jump if more 4-Byte Blk fills
//
// Exit
//
FillUlExit:
LEAF_EXIT(RtlFillMemoryUlong)