2195 lines
108 KiB
ArmAsm
2195 lines
108 KiB
ArmAsm
// TITLE("Compare, Move, Zero, and Fill Memory Support")
|
||
//++
|
||
//
|
||
// Copyright (c) 1993 IBM Corporation
|
||
//
|
||
// Module Name:
|
||
//
|
||
// mvmem.s
|
||
//
|
||
// Abstract:
|
||
//
|
||
// This module implements functions to compare, move, zero, and fill
|
||
// blocks of memory. If the memory is aligned, then these functions
|
||
// are very efficient.
|
||
//
|
||
// N.B. These routines MUST preserve all floating state since they are
|
||
// frequently called from interrupt service routines that normally
|
||
// do not save or restore floating state.
|
||
//
|
||
// Author:
|
||
//
|
||
// Curt Fawcett (crf) 10-Aug-1993
|
||
//
|
||
// Environment:
|
||
//
|
||
// User or Kernel mode.
|
||
//
|
||
// Revision History:
|
||
//
|
||
// Curt Fawcett 11-Jan-1994 Removed register definitions
|
||
// and fixed for new assembler
|
||
//
|
||
//--
|
||
|
||
#include <ksppc.h>
|
||
|
||
//
|
||
// Define local constants
|
||
//
|
||
.set BLKLN,32
|
||
//
|
||
//--
|
||
//++
|
||
//
|
||
// ULONG
|
||
// RtlCompareMemory (
|
||
// IN PVOID Source1,
|
||
// IN PVOID Source2,
|
||
// IN ULONG Length
|
||
// )
|
||
//
|
||
// Routine Description:
|
||
//
|
||
// This function compares two blocks of memory and returns the number
|
||
// of bytes that compared equal.
|
||
//
|
||
// Arguments:
|
||
//
|
||
// SRC1 (r.3) - Supplies a pointer to the first block of memory to
|
||
// compare.
|
||
//
|
||
// SRC2 (r.4) - Supplies a pointer to the second block of memory to
|
||
// compare.
|
||
//
|
||
// LNGTH (r.5) - Supplies the length, in bytes, of the memory to be
|
||
// compared.
|
||
//
|
||
// Return Value:
|
||
//
|
||
// The number of bytes that compared equal is returned as the function
|
||
// value. If all bytes compared equal, then the length of the orginal
|
||
// block of memory is returned.
|
||
//
|
||
//--
|
||
//
|
||
// Define the routine entry point
|
||
|
||
LEAF_ENTRY(RtlCompareMemory)
|
||
//
|
||
// Compare Memory
|
||
//
|
||
// Check alignment
|
||
//
|
||
or. r.6,r.5,r.5 // Check for zero length
|
||
mr r.12,r.5 // Save original length
|
||
beq GetResults2 // Jump if zero length
|
||
cmpwi r.5,4 // Check for less than 4 bytes
|
||
add r.11,r.3,r.5 // Get ending SRC1 address
|
||
xor r.9,r.3,r.4 // Check for same alignment
|
||
blt- CompareByByte // Jump if single byte compares
|
||
andi. r.9,r.9,3 // Isolate alignment bits
|
||
bne- CompUnaligned // Jump if different alignments
|
||
//
|
||
// Compare Memory - Same SRC1 and SRC2 alignment
|
||
//
|
||
// Compare extra bytes until a word boundary is reached
|
||
//
|
||
|
||
CompAligned:
|
||
andi. r.6,r.4,3 // Check alignment type
|
||
beq+ CompBlkDiv // Jump to process 32-Byte blocks
|
||
cmpwi r.6,3 // Check for 1 byte unaligned
|
||
lbz r.7,0(r.3) // Get unaligned byte
|
||
lbz r.8,0(r.4) // Get unaligned byte
|
||
bne+ Comp2 // If not, check next case
|
||
li r.6,1 // Set byte move count
|
||
b UpdateCompAddrs // Jump to update addresses
|
||
Comp2:
|
||
cmpwi r.6,2 // Check for halfword aligned
|
||
li r.6,2 // Set byte move count
|
||
bne+ Comp3 // If not, check next case
|
||
lhz r.7,0(r.3) // Get unaligned halfword
|
||
lhz r.8,0(r.4) // Get unaligned halfword
|
||
b UpdateCompAddrs // Jump to update addresses
|
||
Comp3:
|
||
cmpw r.7,r.8 // Check for 1st word equal
|
||
lhz r.7,1(r.3) // Get unaligned halfword
|
||
lhz r.8,1(r.4) // Get unaligned halfword
|
||
li r.6,3 // Set byte move count
|
||
bne Wrd1ne // Jump if 1st word not equal
|
||
UpdateCompAddrs:
|
||
cmpw r.7,r.8 // Check for 1st word equal
|
||
sub r.5,r.5,r.6 // Decrement LNGTH by unaligned
|
||
bne Wrd1ne // Jump if 1st word not equal
|
||
add r.3,r.3,r.6 // Update the SRC1 address
|
||
add r.4,r.4,r.6 // Update the SRC2 address
|
||
//
|
||
// Divide the block to process into 32-byte blocks
|
||
//
|
||
|
||
CompBlkDiv:
|
||
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
||
sub. r.7,r.5,r.6 // Get full block count
|
||
add r.10,r.3,r.7 // Get address of last full block
|
||
beq- CompareBy4Bytes // Jump if no full blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
//
|
||
// Compare 32-byte blocks
|
||
//
|
||
CompFullBlks:
|
||
lwz r.6,0(r.3) // Get 1st SRC1 word
|
||
lwz r.7,0(r.4) // Get 1st SRC2 word
|
||
lwz r.8,4(r.3) // Get 2nd SRC1 word
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
lwz r.9,4(r.4) // Get 2nd SRC2 word
|
||
bne- Wrd1ne // Jump if 1st word not equal
|
||
cmpw r.8,r.9 // Check for 2nd word equal
|
||
lwz r.6,8(r.3) // Get 3rd SRC1 word
|
||
lwz r.7,8(r.4) // Get 3rd SRC2 word
|
||
bne- Wrd2ne // Jump if 2nd word not equal
|
||
cmpw r.6,r.7 // Check for 3rd word equal
|
||
lwz r.8,12(r.3) // Get 4th SRC1 word
|
||
lwz r.9,12(r.4) // Get 4th SRC2 word
|
||
bne- Wrd3ne // Jump if 3rd word not equal
|
||
cmpw r.8,r.9 // Check for 4th word equal
|
||
lwz r.6,16(r.3) // Get 5th SRC1 word
|
||
lwz r.7,16(r.4) // Get 5th SRC2 word
|
||
bne- Wrd4ne // Jump if 4th word not equal
|
||
cmpw r.6,r.7 // Check for 5th word equal
|
||
lwz r.8,20(r.3) // Get 6th SRC1 word
|
||
lwz r.9,20(r.4) // Get 6th SRC2 word
|
||
bne- Wrd5ne // Jump if 5th word not equal
|
||
cmpw r.8,r.9 // Check for 6th word equal
|
||
lwz r.6,24(r.3) // Get 7th SRC1 word
|
||
lwz r.7,24(r.4) // Get 7th SRC2 word
|
||
bne- Wrd6ne // Jump if 6th word not equal
|
||
cmpw r.6,r.7 // Check for 7th word equal
|
||
lwz r.8,28(r.3) // Get 8th SRC1 word
|
||
lwz r.9,28(r.4) // Get 8th SRC2 word
|
||
bne- Wrd7ne // Jump if 7th word not equal
|
||
cmpw r.8,r.9 // Check for 8th word equal
|
||
bne- Wrd8ne // Jump if 8th word not equal
|
||
addi r.3,r.3,32 // Update SRC1 pointer
|
||
cmpw r.3,r.10 // Check for all blocks done
|
||
addi r.4,r.4,32 // Update SRC2 pointer
|
||
bne+ CompFullBlks // Jump if more blocks
|
||
//
|
||
// Compare 4-byte blocks
|
||
//
|
||
|
||
CompareBy4Bytes:
|
||
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
||
sub. r.7,r.5,r.6 // Get 4-byte block count
|
||
add r.10,r.3,r.7 // Get address of last full block
|
||
beq- CompareByByte // Jump if no 4-byte blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
|
||
CompLpOn4Bytes:
|
||
lwz r.6,0(r.3) // Get 1st SRC1 word
|
||
lwz r.7,0(r.4) // Get 1st SRC2 word
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
bne- Wrd1ne // Jump if 1st word not equal
|
||
addi r.3,r.3,4 // Get pointer to next SRC1 block
|
||
cmpw r.3,r.10 // Check for last block
|
||
addi r.4,r.4,4 // Get pointer to next SRC2 block
|
||
bne+ CompLpOn4Bytes // Jump if more blocks
|
||
//
|
||
// Compare 1-byte blocks
|
||
//
|
||
|
||
CompareByByte:
|
||
cmpwi r.5,0 // Check for no bytes left
|
||
beq+ GetResults // Jump to return if done
|
||
lbz r.6,0(r.3) // Get 1st SRC1 byte
|
||
lbz r.7,0(r.4) // Load 1st SRC2 byte
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
bne- Wrd1ne // Jump if 1st word not equal
|
||
addi r.3,r.3,1 // Update SRC1 address
|
||
cmpwi r.5,1 // Check for no bytes left
|
||
addi r.4,r.4,1 // Update SRC2 address
|
||
beq+ GetResults // Jump to return if done
|
||
lbz r.6,0(r.3) // Get 2nd SRC1 byte
|
||
lbz r.7,0(r.4) // Load 2nd SRC2 byte
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
bne- Wrd1ne // Jump if 1st word not equal
|
||
cmpwi r.5,2 // Check for no bytes left
|
||
addi r.4,r.4,1 // Update SRC2 address
|
||
addi r.3,r.3,1 // Update SRC1 address
|
||
beq+ GetResults // Jump to return if done
|
||
lbz r.6,0(r.3) // Get 3rd SRC1 byte
|
||
lbz r.7,0(r.4) // Load 3rd SRC2 byte
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
bne- Wrd1ne // Jump if 1st word not equal
|
||
|
||
addi r.4,r.4,1 // Update SRC2 address
|
||
addi r.3,r.3,1 // Update SRC1 address
|
||
b GetResults // Jump to return
|
||
//
|
||
// Compare - SRC1 and SRC2 have different alignments
|
||
//
|
||
|
||
CompUnaligned:
|
||
or r.9,r.3,r.4 // Check if either byte unaligned
|
||
andi. r.9,r.9,3 // Isolate alignment
|
||
cmpwi r.9,2 // Check for even result
|
||
bne+ CompByteUnaligned // Jump for byte unaligned
|
||
//
|
||
// Divide the blocks to process into 32-byte blocks
|
||
//
|
||
|
||
CompBlkDivUnaligned:
|
||
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
||
sub. r.7,r.5,r.6 // Get full block count
|
||
add r.10,r.3,r.7 // Get address of last full block
|
||
beq- CompHWrdBy4Bytes // Jump if no full blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
//
|
||
// Compare - SRC1 or SRC2 is halfword aligned, the other is by word
|
||
//
|
||
|
||
CompByHWord:
|
||
lhz r.6,0(r.3) // Get 1st hword of 1st SRC1 wrd
|
||
lhz r.7,0(r.4) // Get 1st hword of 1st SRC2 wrd
|
||
lhz r.8,2(r.3) // Get 2nd hword of 1st SRC1 wrd
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
lhz r.9,2(r.4) // Get 2nd hword of 1st SRC2 wrd
|
||
bne- Wrd1ne // Check for 1st word equal
|
||
cmpw r.8,r.9 // Check for 1st word equal
|
||
lhz r.6,4(r.3) // Get 1st hword of 2nd SRC1 wrd
|
||
lhz r.7,4(r.4) // Get 1st hword of 2nd SRC2 wrd
|
||
bne- Wrd1ne // Check for 1st word equal
|
||
cmpw r.6,r.7 // Check for 2nd word equal
|
||
lhz r.8,6(r.3) // Get 2nd hword of 2nd SRC1 wrd
|
||
lhz r.9,6(r.4) // Get 2nd hword of 2nd SRC2 wrd
|
||
bne- Wrd2ne // Check for 2nd word equal
|
||
cmpw r.8,r.9 // Check for 2nd word equal
|
||
lhz r.6,8(r.3) // Get 1st hword of 3rd SRC1 wrd
|
||
lhz r.7,8(r.4) // Get 1st hword of 3rd SRC2 wrd
|
||
bne- Wrd2ne // Check for 2nd word equal
|
||
cmpw r.6,r.7 // Check for 3rd word equal
|
||
lhz r.8,10(r.3) // Get 2nd hword of 3rd SRC1 wrd
|
||
lhz r.9,10(r.4) // Get 2nd hword of 3rd SRC2 wrd
|
||
bne- Wrd3ne // Check for 3rd word equal
|
||
cmpw r.8,r.9 // Check for 3rd word equal
|
||
lhz r.6,12(r.3) // Get 1st hword of 4th SRC1 wrd
|
||
lhz r.7,12(r.4) // Get 1st hword of 4th SRC2 wrd
|
||
bne- Wrd3ne // Check for 3rd word equal
|
||
cmpw r.6,r.7 // Check for 4th word equal
|
||
lhz r.8,14(r.3) // Get 2nd hword of 4th SRC1 wrd
|
||
lhz r.9,14(r.4) // Get 2nd hword of 4th SRC2 wrd
|
||
bne- Wrd4ne // Check for 4th word equal
|
||
cmpw r.8,r.9 // Check for 4th word equal
|
||
lhz r.6,16(r.3) // Get 1st hword of 5th SRC1 wrd
|
||
lhz r.7,16(r.4) // Get 1st hword of 5th SRC2 wrd
|
||
bne- Wrd4ne // Check for 4th word equal
|
||
cmpw r.6,r.7 // Check for 5th word equal
|
||
lhz r.8,18(r.3) // Get 2nd hword of 5th SRC1 wrd
|
||
lhz r.9,18(r.4) // Get 2nd hword of 5th SRC2 wrd
|
||
bne- Wrd5ne // Check for 5th word equal
|
||
cmpw r.8,r.9 // Check for 5th word equal
|
||
lhz r.6,20(r.3) // Get 1st hword of 6th SRC1 wrd
|
||
lhz r.7,20(r.4) // Get 1st hword of 6th SRC2 wrd
|
||
bne- Wrd5ne // Check for 5th word equal
|
||
cmpw r.6,r.7 // Check for 6th word equal
|
||
lhz r.8,22(r.3) // Get 2nd hword of 6th SRC1 wrd
|
||
lhz r.9,22(r.4) // Get 2nd hword of 6th SRC2 wrd
|
||
bne- Wrd6ne // Check for 6th word equal
|
||
cmpw r.8,r.9 // Check for 6th word equal
|
||
lhz r.6,24(r.3) // Get 1st hword of 7th SRC1 wrd
|
||
lhz r.7,24(r.4) // Get 1st hword of 7th SRC2 wrd
|
||
bne- Wrd6ne // Check for 6th word equal
|
||
cmpw r.6,r.7 // Check for 7th word equal
|
||
lhz r.8,26(r.3) // Get 2nd hword of 7th SRC1 wrd
|
||
lhz r.9,26(r.4) // Get 2nd hword of 7th SRC2 wrd
|
||
bne- Wrd7ne // Check for 7th word equal
|
||
cmpw r.8,r.9 // Check for 7th word equal
|
||
lhz r.6,28(r.3) // Get 1st hword of 8th SRC1 wrd
|
||
lhz r.7,28(r.4) // Get 1st hword of 8th SRC2 wrd
|
||
bne- Wrd7ne // Check for 7th word equal
|
||
cmpw r.6,r.7 // Check for 8th word equal
|
||
lhz r.8,30(r.3) // Get 2nd hword of 8th SRC1 wrd
|
||
lhz r.9,30(r.4) // Get 2nd hword of 8th SRC2 wrd
|
||
bne- Wrd8ne // Check for 8th word equal
|
||
cmpw r.8,r.9 // Check for 8th word equal
|
||
bne- Wrd8ne // Check for 8th word equal
|
||
addi r.3,r.3,32 // Update SRC1 pointer
|
||
cmpw r.3,r.10 // Check for all blocks done
|
||
addi r.4,r.4,32 // Update SRC2 pointer
|
||
bne+ CompByHWord // Jump if more blocks
|
||
//
|
||
// Compare 4-byte blocks with SRC2 Halfword unaligned
|
||
//
|
||
|
||
CompHWrdBy4Bytes:
|
||
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
||
sub. r.7,r.5,r.6 // Get 4-byte block count
|
||
add r.10,r.3,r.7 // Get address of last full block
|
||
beq- CompareByByte // Jump if no 4-byte blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
|
||
CompHWrdLpOn4Bytes:
|
||
lhz r.6,0(r.3) // Get 1st hword of 1st SRC1 wrd
|
||
lhz r.7,0(r.4) // Get 1st hword of 1st SRC2 wrd
|
||
lhz r.8,2(r.3) // Get 2nd hword of 1st SRC1 wrd
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
lhz r.9,2(r.4) // Get 2nd hword of 1st SRC2 wrd
|
||
bne- Wrd1ne // Check for 1st word equal
|
||
cmpw r.8,r.9 // Check for 1st word equal
|
||
bne- Wrd1ne // Check for 1st word equal
|
||
addi r.3,r.3,4 // Update SRC1 pointer
|
||
cmpw r.3,r.10 // Check for last block
|
||
addi r.4,r.4,4 // Update SRC2 pointer
|
||
bne+ CompHWrdLpOn4Bytes // Jump if more blocks
|
||
|
||
b CompareByByte // Jump to complete last bytes
|
||
//
|
||
// Compare - Byte unaligned
|
||
//
|
||
|
||
CompByteUnaligned:
|
||
and r.9,r.3,r.4 // Check for both byte aligned
|
||
andi. r.9,r.9,1 // Isolate alignment bits
|
||
beq- CmpBlksByByte // Jump if both not byte aligned
|
||
//
|
||
// Divide the blocks to process into 32-byte blocks
|
||
//
|
||
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
||
sub. r.7,r.5,r.6 // Get full block count
|
||
add r.10,r.3,r.7 // Get address of last full block
|
||
beq- CompByteBy4Bytes // Jump if no full blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
//
|
||
// Compare - SRC1 and SRC2 are byte unaligned differently
|
||
//
|
||
|
||
CompByByte:
|
||
lbz r.6,0(r.3) // Get first byte of 1st SRC1 wrd
|
||
lbz r.7,0(r.4) // Get first byte of 1st SRC2 wrd
|
||
lhz r.8,1(r.3) // Get mid-h-word of 1st SRC1 wrd
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
lhz r.9,1(r.4) // Get mid-h-word of 1st SRC2 wrd
|
||
bne Wrd1ne // Jump if 1st word not equal
|
||
cmpw r.8,r.9 // Check for 1st word equal
|
||
lhz r.6,3(r.3) // Get h-word crossing 1st/2nd SRC1 wrd
|
||
lhz r.7,3(r.4) // Get h-word crossing 1st/2nd SRC2 wrd
|
||
bne Wrd1ne // Jump if 1st word not equal
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
lhz r.8,5(r.3) // Get mid-h-word of 2nd SRC1 wrd
|
||
lhz r.9,5(r.4) // Get mid-h-word of 2nd SRC2 wrd
|
||
bne Wrd1ne // Jump if 2nd word not equal
|
||
cmpw r.8,r.9 // Check for 2nd word equal
|
||
lhz r.6,7(r.3) // Get h-word crossing 2nd/3rd SRC1 wrd
|
||
lhz r.7,7(r.4) // Get h-word crossing 2nd/3rd SRC2 wrd
|
||
bne Wrd2ne // Jump if 2nd word not equal
|
||
cmpw r.6,r.7 // Check for 2nd word equal
|
||
lhz r.8,9(r.3) // Get mid-h-word of 3rd SRC1 wrd
|
||
lhz r.9,9(r.4) // Get mid-h-word of 3rd SRC2 wrd
|
||
bne Wrd2ne // Jump if 3rd word not equal
|
||
cmpw r.8,r.9 // Check for 3rd word equal
|
||
lhz r.6,11(r.3) // Get h-word crossing 3rd/4th SRC1 wrd
|
||
lhz r.7,11(r.4) // Get h-word crossing 3rd/4th SRC2 wrd
|
||
bne Wrd3ne // Jump if 3rd word not equal
|
||
cmpw r.6,r.7 // Check for 3rd word equal
|
||
lhz r.8,13(r.3) // Get mid-h-word of 4th SRC1 wrd
|
||
lhz r.9,13(r.4) // Get mid-h-word of 4th SRC2 wrd
|
||
bne Wrd3ne // Jump if 4th word not equal
|
||
cmpw r.8,r.9 // Check for 4th word equal
|
||
lhz r.6,15(r.3) // Get h-word crossing 4th/5th SRC1 wrd
|
||
lhz r.7,15(r.4) // Get h-word crossing 4th/5th SRC2 wrd
|
||
bne Wrd4ne // Jump if 4th word not equal
|
||
cmpw r.6,r.7 // Check for 4th word equal
|
||
lhz r.8,17(r.3) // Get mid-h-word of 5th SRC1 wrd
|
||
lhz r.9,17(r.4) // Get mid-h-word of 5th SRC2 wrd
|
||
bne Wrd4ne // Jump if 5th word not equal
|
||
cmpw r.8,r.9 // Check for 5th word equal
|
||
lhz r.6,19(r.3) // Get h-word crossing 5th/6th SRC1 wrd
|
||
lhz r.7,19(r.4) // Get h-word crossing 5th/6th SRC2 wrd
|
||
bne Wrd5ne // Jump if 5th word not equal
|
||
cmpw r.6,r.7 // Check for 5th word equal
|
||
lhz r.8,21(r.3) // Get mid-h-word of 6th SRC1 wrd
|
||
lhz r.9,21(r.4) // Get mid-h-word of 6th SRC2 wrd
|
||
bne Wrd5ne // Jump if 6th word not equal
|
||
cmpw r.8,r.9 // Check for 6th word equal
|
||
lhz r.6,23(r.3) // Get h-word crossing 6th/7th SRC1 wrd
|
||
lhz r.7,23(r.4) // Get h-word crossing 6th/7th SRC2 wrd
|
||
bne Wrd6ne // Jump if 6th word not equal
|
||
cmpw r.6,r.7 // Check for 6th word equal
|
||
lhz r.8,25(r.3) // Get mid-h-word of 7th SRC1 wrd
|
||
lhz r.9,25(r.4) // Get mid-h-word of 7th SRC2 wrd
|
||
bne Wrd6ne // Jump if 7th word not equal
|
||
cmpw r.8,r.9 // Check for 7th word equal
|
||
lhz r.6,27(r.3) // Get h-word crossing 7th/8th SRC1 wrd
|
||
lhz r.7,27(r.4) // Get h-word crossing 7th/8th SRC2 wrd
|
||
bne Wrd7ne // Jump if 7th word not equal
|
||
cmpw r.6,r.7 // Check for 7th word equal
|
||
lhz r.8,29(r.3) // Get mid-h-word of 8th SRC1 wrd
|
||
lhz r.9,29(r.4) // Get mid-h-word of 8th SRC2 wrd
|
||
bne Wrd7ne // Jump if 8th word not equal
|
||
cmpw r.8,r.9 // Check for 8th word equal
|
||
lbz r.6,31(r.3) // Get last byte of 8th SRC1 wrd
|
||
lbz r.7,31(r.4) // Get last byte of 8th SRC2 wrd
|
||
bne Wrd8ne // Jump if 8th word not equal
|
||
cmpw r.6,r.7 // Check for 8th word equal
|
||
bne Wrd8ne // Jump if 8th word not equal
|
||
addi r.3,r.3,32 // Update SRC1 pointer
|
||
cmpw r.3,r.10 // Check for all blocks done
|
||
addi r.4,r.4,32 // Update SRC2 pointer
|
||
bne+ CompByByte // Jump if more blocks
|
||
//
|
||
// Compare 4-byte blocks with SRC2 or SRC1 Byte aligned
|
||
//
|
||
|
||
CompByteBy4Bytes:
|
||
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
||
sub. r.7,r.5,r.6 // Get 4-byte block count
|
||
add r.10,r.3,r.7 // Get address of last full block
|
||
beq- CompareByByte // Jump if no 4-byte blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
|
||
CompByteLpOn4Bytes:
|
||
lbz r.6,0(r.3) // Get first byte of 1st SRC1 wrd
|
||
lbz r.7,0(r.4) // Get first byte of 1st SRC2 wrd
|
||
lhz r.8,1(r.3) // Get mid-h-word of 1st SRC1 wrd
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
lhz r.9,1(r.4) // Get mid-h-word of 1st SRC2 wrd
|
||
bne Wrd1ne // Jump if 1st word not equal
|
||
cmpw r.8,r.9 // Check for 1st word equal
|
||
lbz r.6,3(r.3) // Get last byte of 1st SRC1 wrd
|
||
lbz r.7,3(r.4) // Get last byte of 1st SRC2 wrd
|
||
bne Wrd1ne // Jump if 1st word not equal
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
bne Wrd1ne // Jump if 1st word not equal
|
||
addi r.3,r.3,4 // Update SRC1 pointer
|
||
cmpw r.3,r.10 // Check for last block
|
||
addi r.4,r.4,4 // Update SRC2 pointer
|
||
bne+ CompByteLpOn4Bytes // Jump if more blocks
|
||
|
||
b CompareByByte // Jump to complete last bytes
|
||
//
|
||
// Compare - Either SRC1 or SRC2 is byte unaligned but not both
|
||
//
|
||
// Divide the blocks to process into 32-byte blocks
|
||
//
|
||
|
||
CmpBlksByByte:
|
||
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
||
sub. r.7,r.5,r.6 // Get full block count
|
||
add r.10,r.3,r.7 // Get address of last full block
|
||
beq- CompBlksOf4Bytes // Jump if no full blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
|
||
CompBlksByByte:
|
||
lbz r.6,0(r.3) // Get first byte of 1st SRC1 wrd
|
||
lbz r.7,0(r.4) // Get first byte of 1st SRC2 wrd
|
||
lbz r.8,1(r.3) // Get 2nd byte of 1st SRC1 wrd
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
lbz r.9,1(r.4) // Get 2nd byte of 1st SRC2 wrd
|
||
bne Wrd1ne // Jump if 1st word not equal
|
||
cmpw r.8,r.9 // Check for 1st word equal
|
||
lbz r.6,2(r.3) // Get first byte of 1st SRC1 wrd
|
||
lbz r.7,2(r.4) // Get first byte of 1st SRC2 wrd
|
||
bne Wrd1ne // Jump if 1st word not equal
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
lbz r.8,3(r.3) // Get 2nd byte of 1st SRC1 wrd
|
||
lbz r.9,3(r.4) // Get 2nd byte of 1st SRC2 wrd
|
||
bne Wrd1ne // Jump if 1st word not equal
|
||
cmpw r.8,r.9 // Check for 1st word equal
|
||
lbz r.6,4(r.3) // Get first byte of 2nd SRC1 wrd
|
||
lbz r.7,4(r.4) // Get first byte of 2nd SRC2 wrd
|
||
bne Wrd1ne // Jump if 1st word not equal
|
||
cmpw r.6,r.7 // Check for 2nd word equal
|
||
lbz r.8,5(r.3) // Get 2nd byte of 2nd SRC1 wrd
|
||
lbz r.9,5(r.4) // Get 2nd byte of 2nd SRC2 wrd
|
||
bne Wrd2ne // Jump if 2nd word not equal
|
||
cmpw r.8,r.9 // Check for 2nd word equal
|
||
lbz r.6,6(r.3) // Get first byte of 2nd SRC1 wrd
|
||
lbz r.7,6(r.4) // Get first byte of 2nd SRC2 wrd
|
||
bne Wrd2ne // Jump if 2nd word not equal
|
||
cmpw r.6,r.7 // Check for 2nd word equal
|
||
lbz r.8,7(r.3) // Get 2nd byte of 2nd SRC1 wrd
|
||
lbz r.9,7(r.4) // Get 2nd byte of 2nd SRC2 wrd
|
||
bne Wrd2ne // Jump if 2nd word not equal
|
||
cmpw r.8,r.9 // Check for 2nd word equal
|
||
lbz r.6,8(r.3) // Get first byte of 3rd SRC1 wrd
|
||
lbz r.7,8(r.4) // Get first byte of 3rd SRC2 wrd
|
||
bne Wrd2ne // Jump if 2nd word not equal
|
||
cmpw r.6,r.7 // Check for 3rd word equal
|
||
lbz r.8,9(r.3) // Get 2nd byte of 3rd SRC1 wrd
|
||
lbz r.9,9(r.4) // Get 2nd byte of 3rd SRC2 wrd
|
||
bne Wrd3ne // Jump if 3rd word not equal
|
||
cmpw r.8,r.9 // Check for 3rd word equal
|
||
lbz r.6,10(r.3) // Get first byte of 3rd SRC1 wrd
|
||
lbz r.7,10(r.4) // Get first byte of 3rd SRC2 wrd
|
||
bne Wrd3ne // Jump if 3rd word not equal
|
||
cmpw r.6,r.7 // Check for 3rd word equal
|
||
lbz r.8,11(r.3) // Get 2nd byte of 3rd SRC1 wrd
|
||
lbz r.9,11(r.4) // Get 2nd byte of 3rd SRC2 wrd
|
||
bne Wrd3ne // Jump if 3rd word not equal
|
||
cmpw r.8,r.9 // Check for 3rd word equal
|
||
lbz r.6,12(r.3) // Get first byte of 4th SRC1 wrd
|
||
lbz r.7,12(r.4) // Get first byte of 4th SRC2 wrd
|
||
bne Wrd3ne // Jump if 3rd word not equal
|
||
cmpw r.6,r.7 // Check for 4th word equal
|
||
lbz r.8,13(r.3) // Get 2nd byte of 4th SRC1 wrd
|
||
lbz r.9,13(r.4) // Get 2nd byte of 4th SRC2 wrd
|
||
bne Wrd4ne // Jump if 4th word not equal
|
||
cmpw r.8,r.9 // Check for 4th word equal
|
||
lbz r.6,14(r.3) // Get first byte of 4th SRC1 wrd
|
||
lbz r.7,14(r.4) // Get first byte of 4th SRC2 wrd
|
||
bne Wrd4ne // Jump if 4th word not equal
|
||
cmpw r.6,r.7 // Check for 4th word equal
|
||
lbz r.8,15(r.3) // Get 2nd byte of 4th SRC1 wrd
|
||
lbz r.9,15(r.4) // Get 2nd byte of 4th SRC2 wrd
|
||
bne Wrd4ne // Jump if 4th word not equal
|
||
cmpw r.8,r.9 // Check for 4th word equal
|
||
lbz r.6,16(r.3) // Get first byte of 5th SRC1 wrd
|
||
lbz r.7,16(r.4) // Get first byte of 5th SRC2 wrd
|
||
bne Wrd4ne // Jump if 4th word not equal
|
||
cmpw r.6,r.7 // Check for 5th word equal
|
||
lbz r.8,17(r.3) // Get 2nd byte of 5th SRC1 wrd
|
||
lbz r.9,17(r.4) // Get 2nd byte of 5th SRC2 wrd
|
||
bne Wrd5ne // Jump if 5th word not equal
|
||
cmpw r.8,r.9 // Check for 5th word equal
|
||
lbz r.6,18(r.3) // Get first byte of 5th SRC1 wrd
|
||
lbz r.7,18(r.4) // Get first byte of 5th SRC2 wrd
|
||
bne Wrd5ne // Jump if 5th word not equal
|
||
cmpw r.6,r.7 // Check for 5th word equal
|
||
lbz r.8,19(r.3) // Get 2nd byte of 5th SRC1 wrd
|
||
lbz r.9,19(r.4) // Get 2nd byte of 5th SRC2 wrd
|
||
bne Wrd5ne // Jump if 5th word not equal
|
||
cmpw r.8,r.9 // Check for 5th word equal
|
||
lbz r.6,20(r.3) // Get first byte of 6th SRC1 wrd
|
||
lbz r.7,20(r.4) // Get first byte of 6th SRC2 wrd
|
||
bne Wrd5ne // Jump if 5th word not equal
|
||
cmpw r.6,r.7 // Check for 6th word equal
|
||
lbz r.8,21(r.3) // Get 2nd byte of 6th SRC1 wrd
|
||
lbz r.9,21(r.4) // Get 2nd byte of 6th SRC2 wrd
|
||
bne Wrd6ne // Jump if 6th word not equal
|
||
cmpw r.8,r.9 // Check for 6th word equal
|
||
lbz r.6,22(r.3) // Get first byte of 6th SRC1 wrd
|
||
lbz r.7,22(r.4) // Get first byte of 6th SRC2 wrd
|
||
bne Wrd6ne // Jump if 6th word not equal
|
||
cmpw r.6,r.7 // Check for 6th word equal
|
||
lbz r.8,23(r.3) // Get 2nd byte of 6th SRC1 wrd
|
||
lbz r.9,23(r.4) // Get 2nd byte of 6th SRC2 wrd
|
||
bne Wrd6ne // Jump if 6th word not equal
|
||
cmpw r.8,r.9 // Check for 6th word equal
|
||
lbz r.6,24(r.3) // Get first byte of 7th SRC1 wrd
|
||
lbz r.7,24(r.4) // Get first byte of 7th SRC2 wrd
|
||
bne Wrd6ne // Jump if 6th word not equal
|
||
cmpw r.6,r.7 // Check for 7th word equal
|
||
lbz r.8,25(r.3) // Get 2nd byte of 7th SRC1 wrd
|
||
lbz r.9,25(r.4) // Get 2nd byte of 7th SRC2 wrd
|
||
bne Wrd7ne // Jump if 7th word not equal
|
||
cmpw r.8,r.9 // Check for 7th word equal
|
||
lbz r.6,26(r.3) // Get first byte of 7th SRC1 wrd
|
||
lbz r.7,26(r.4) // Get first byte of 7th SRC2 wrd
|
||
bne Wrd7ne // Jump if 7th word not equal
|
||
cmpw r.6,r.7 // Check for 7th word equal
|
||
lbz r.8,27(r.3) // Get 2nd byte of 7th SRC1 wrd
|
||
lbz r.9,27(r.4) // Get 2nd byte of 7th SRC2 wrd
|
||
bne Wrd7ne // Jump if 7th word not equal
|
||
cmpw r.8,r.9 // Check for 7th word equal
|
||
lbz r.6,28(r.3) // Get first byte of 8th SRC1 wrd
|
||
lbz r.7,28(r.4) // Get first byte of 8th SRC2 wrd
|
||
bne Wrd7ne // Jump if 7th word not equal
|
||
cmpw r.6,r.7 // Check for 8th word equal
|
||
lbz r.8,29(r.3) // Get 2nd byte of 8th SRC1 wrd
|
||
lbz r.9,29(r.4) // Get ond byte of 8th SRC2 wrd
|
||
bne Wrd8ne // Jump if 8th word not equal
|
||
cmpw r.8,r.9 // Check for 8th word equal
|
||
lbz r.6,30(r.3) // Get first byte of 8th SRC1 wrd
|
||
lbz r.7,30(r.4) // Get first byte of 8th SRC2 wrd
|
||
bne Wrd8ne // Jump if 8th word not equal
|
||
cmpw r.6,r.7 // Check for 8th word equal
|
||
lbz r.8,31(r.3) // Get 2nd byte of 8th SRC1 wrd
|
||
lbz r.9,31(r.4) // Get 2nd byte of 8th SRC2 wrd
|
||
bne Wrd8ne // Jump if 8th word not equal
|
||
cmpw r.8,r.9 // Check for 8th word equal
|
||
bne Wrd8ne // Jump if 8th word not equal
|
||
addi r.3,r.3,32 // Update SRC1 pointer
|
||
cmpw r.3,r.10 // Check for all blocks done
|
||
addi r.4,r.4,32 // Update SRC2 pointer
|
||
bne+ CompBlksByByte // Jump if more blocks
|
||
//
|
||
// Divide the blocks to process into 32-byte blocks
|
||
//
|
||
CompBlksOf4Bytes:
|
||
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
||
sub. r.7,r.5,r.6 // Get 4-byte block count
|
||
add r.10,r.3,r.7 // Get address of last full block
|
||
beq- CompareByByte // Jump if no 4-byte blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
|
||
CompBlksLpOn4Bytes:
|
||
lbz r.6,0(r.3) // Get first byte of 1st SRC1 wrd
|
||
lbz r.7,0(r.4) // Get first byte of 1st SRC2 wrd
|
||
lbz r.8,1(r.3) // Get 2nd byte of 1st SRC1 wrd
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
lbz r.9,1(r.4) // Get 2nd byte of 1st SRC2 wrd
|
||
bne Wrd1ne // Jump if 1st word not equal
|
||
cmpw r.8,r.9 // Check for 1st word equal
|
||
lbz r.6,2(r.3) // Get first byte of 1st SRC1 wrd
|
||
lbz r.7,2(r.4) // Get first byte of 1st SRC2 wrd
|
||
bne Wrd1ne // Jump if 1st word not equal
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
lbz r.8,3(r.3) // Get 2nd byte of 1st SRC1 wrd
|
||
lbz r.9,3(r.4) // Get 2nd byte of 1st SRC2 wrd
|
||
bne Wrd1ne // Jump if 1st word not equal
|
||
cmpw r.8,r.9 // Check for 1st word equal
|
||
bne Wrd1ne // Jump if 1st word not equal
|
||
addi r.3,r.3,4 // Update SRC1 pointer
|
||
cmpw r.3,r.10 // Check for last block
|
||
addi r.4,r.4,4 // Update SRC2 pointer
|
||
bne+ CompBlksLpOn4Bytes // Jump if more blocks
|
||
|
||
b CompareByByte // Jump to complete last bytes
|
||
//
|
||
// Adjust pointers to SRC1 and SRC2 to isolate offending byte compare
|
||
//
|
||
Wrd2ne:
|
||
addi r.3,r.3,4 // Adjust to point to 2nd word
|
||
addi r.4,r.4,4 // Adjust to point to 2nd word
|
||
b Compare1byte // Jump to isolate the bad byte
|
||
Wrd3ne:
|
||
addi r.3,r.3,8 // Adjust to point to 3rd word
|
||
addi r.4,r.4,8 // Adjust to point to 3rd word
|
||
b Compare1byte // Jump to isolate the bad byte
|
||
Wrd4ne:
|
||
addi r.3,r.3,12 // Adjust to point to 4th word
|
||
addi r.4,r.4,12 // Adjust to point to 4th word
|
||
b Compare1byte // Jump to isolate the bad byte
|
||
Wrd5ne:
|
||
addi r.3,r.3,16 // Adjust to point to 5th word
|
||
addi r.4,r.4,16 // Adjust to point to 5th word
|
||
b Compare1byte // Jump to isolate the bad byte
|
||
Wrd6ne:
|
||
addi r.3,r.3,20 // Adjust to point to 6th word
|
||
addi r.4,r.4,20 // Adjust to point to 6th word
|
||
b Compare1byte // Jump to isolate the bad byte
|
||
Wrd7ne:
|
||
addi r.3,r.3,24 // Adjust to point to 7th word
|
||
addi r.4,r.4,24 // Adjust to point to 7th word
|
||
b Compare1byte // Jump to isolate the bad byte
|
||
Wrd8ne:
|
||
addi r.3,r.3,28 // Adjust to point to 8th word
|
||
addi r.4,r.4,28 // Adjust to point to 8th word
|
||
Wrd1ne:
|
||
Compare1byte:
|
||
sub r.5,r.11,r.3 // Calculate remaining byte count
|
||
add r.8,r.3,r.5 // Get new ending address
|
||
cmpwi r.5,0 // Check for no block to compare
|
||
beq- GetResults // Jump if processing completed
|
||
SingleByte:
|
||
lbz r.6,0(r.3) // Get next SRC1 byte
|
||
lbz r.7,0(r.4) // Get next SRC2 byte
|
||
addi r.4,r.4,1 // Update SRC2 to next byte
|
||
cmpw r.6,r.7 // Check for unequal bytes
|
||
bne- GetResults // Jump if bytes aren't equal
|
||
addi r.3,r.3,1 // Update SRC1 to next byte
|
||
cmpw r.3,r.8 // Check for being done
|
||
bne+ SingleByte // Jump if more bytes
|
||
//
|
||
// Compute the results
|
||
//
|
||
GetResults:
|
||
sub r.6,r.11,r.3 // Get no. of bytes not compared
|
||
GetResults2:
|
||
sub r.3,r.12,r.6 // Get no. of bytes that match
|
||
//
|
||
// Exit the routine
|
||
//
|
||
LEAF_EXIT(RtlCompareMemory)
|
||
|
||
//++
|
||
//
|
||
// ULONG
|
||
// RtlEqualMemory (
|
||
// IN PVOID Source1,
|
||
// IN PVOID Source2,
|
||
// IN ULONG Length
|
||
// )
|
||
//
|
||
// Routine Description:
|
||
//
|
||
// This function compares two blocks of memory for equality.
|
||
//
|
||
// Arguments:
|
||
//
|
||
// Source1 (r.3) - Supplies a pointer to the first block of memory to
|
||
// compare.
|
||
//
|
||
// Source2 (r.4) - Supplies a pointer to the second block of memory to
|
||
// compare.
|
||
//
|
||
// Length (r.5) - Supplies the length, in bytes, of the memory to be
|
||
// compared.
|
||
//
|
||
// Return Value:
|
||
//
|
||
// If all bytes in the source strings match, then a value of TRUE is
|
||
// returned. Otherwise, FALSE is returned.
|
||
//
|
||
//--
|
||
|
||
LEAF_ENTRY(RtlEqualMemory)
|
||
|
||
//
|
||
// Check alignment
|
||
//
|
||
|
||
clrlwi r.12,r.5,28 // isolate residual bytes (Length & 15)
|
||
or r.9,r.3,r.4 // merge alignment bits
|
||
sub. r.11,r.5,r.12 // subtract out residual bytes
|
||
add r.10,r.3,r.5 // get ending Source1 address
|
||
beq+ EqualByByte // if eq, no 16-byte block to compare
|
||
andi. r.9,r.9,3 // isolate alignment bits
|
||
add r.5,r.3,r.11 // compute ending block address
|
||
bne- EqualUnaligned // if ne, different alignments
|
||
|
||
EqualAligned:
|
||
|
||
//
|
||
// Both blocks are word-aligned, and there are at least 16 bytes to compare.
|
||
//
|
||
|
||
lwz r.6,0(r.3) // Get 1st Source1 word
|
||
lwz r.7,0(r.4) // Get 1st Source2 word
|
||
lwz r.8,4(r.3) // Get 2nd Source1 word
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
lwz r.9,4(r.4) // Get 2nd Source2 word
|
||
bne- EqualNotEqual // Jump if 1st word not equal
|
||
cmpw r.8,r.9 // Check for 2nd word equal
|
||
lwz r.6,8(r.3) // Get 3rd Source1 word
|
||
lwz r.7,8(r.4) // Get 3rd Source2 word
|
||
bne- EqualNotEqual // Jump if 2nd word not equal
|
||
cmpw r.6,r.7 // Check for 3rd word equal
|
||
lwz r.8,12(r.3) // Get 4th Source1 word
|
||
lwz r.9,12(r.4) // Get 4th Source2 word
|
||
bne- EqualNotEqual // Jump if 3rd word not equal
|
||
cmpw r.8,r.9 // Check for 4th word equal
|
||
addi r.3,r.3,16 // Update Source1 pointer
|
||
bne- EqualNotEqual // Jump if 4th word not equal
|
||
cmpw r.3,r.5 // Check for all blocks done
|
||
addi r.4,r.4,16 // Update Source2 pointer
|
||
bne- EqualAligned // Jump if more blocks
|
||
|
||
sub r.5,r.10,r.3 // compute remaining bytes
|
||
|
||
EqualByByte:
|
||
|
||
//
|
||
// Compare 1-byte blocks until done.
|
||
//
|
||
|
||
cmpwi r.5,0 // Check for no bytes left
|
||
beq+ EqualEqual // Jump to return if done
|
||
|
||
EqualByByteLoop:
|
||
|
||
lbz r.6,0(r.3) // Get Source1 byte
|
||
lbz r.7,0(r.4) // Get Source2 byte
|
||
addi r.3,r.3,1 // Update Source1 address
|
||
cmpw r.6,r.7 // Check for equality
|
||
addi r.4,r.4,1 // Update Source2 address
|
||
bne- EqualNotEqual // Jump if not equal
|
||
cmpw r.10,r.3 // Check for end of block
|
||
bne- EqualByByteLoop // Loop if not done
|
||
|
||
EqualEqual:
|
||
|
||
//
|
||
// The blocks are not equal.
|
||
//
|
||
|
||
li r.3,TRUE // indicate blocks are equal
|
||
|
||
blr // return to caller
|
||
|
||
EqualUnaligned:
|
||
|
||
//
|
||
// There are at least 16 bytes to compare, but at least one of the blocks
|
||
// is not word-aligned.
|
||
//
|
||
|
||
andi. r.9,r.9,1 // isolate alignment bits
|
||
bne- EqualByteUnaligned // jump if at least one not halfword aligned
|
||
|
||
EqualUnalignedLoop:
|
||
|
||
//
|
||
// Both blocks are halfword-aligned, and there are at least 16 bytes to compare.
|
||
//
|
||
|
||
lhz r.6,0(r.3) // Get 1st hword of 1st Source1 wrd
|
||
lhz r.7,0(r.4) // Get 1st hword of 1st Source2 wrd
|
||
lhz r.8,2(r.3) // Get 2nd hword of 1st Source1 wrd
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
lhz r.9,2(r.4) // Get 2nd hword of 1st Source2 wrd
|
||
bne- EqualNotEqual // Check for 1st word equal
|
||
cmpw r.8,r.9 // Check for 1st word equal
|
||
lhz r.6,4(r.3) // Get 1st hword of 2nd Source1 wrd
|
||
lhz r.7,4(r.4) // Get 1st hword of 2nd Source2 wrd
|
||
bne- EqualNotEqual // Check for 1st word equal
|
||
cmpw r.6,r.7 // Check for 2nd word equal
|
||
lhz r.8,6(r.3) // Get 2nd hword of 2nd Source1 wrd
|
||
lhz r.9,6(r.4) // Get 2nd hword of 2nd Source2 wrd
|
||
bne- EqualNotEqual // Check for 2nd word equal
|
||
cmpw r.8,r.9 // Check for 2nd word equal
|
||
lhz r.6,8(r.3) // Get 1st hword of 3rd Source1 wrd
|
||
lhz r.7,8(r.4) // Get 1st hword of 3rd Source2 wrd
|
||
bne- EqualNotEqual // Check for 2nd word equal
|
||
cmpw r.6,r.7 // Check for 3rd word equal
|
||
lhz r.8,10(r.3) // Get 2nd hword of 3rd Source1 wrd
|
||
lhz r.9,10(r.4) // Get 2nd hword of 3rd Source2 wrd
|
||
bne- EqualNotEqual // Check for 3rd word equal
|
||
cmpw r.8,r.9 // Check for 3rd word equal
|
||
lhz r.6,12(r.3) // Get 1st hword of 4th Source1 wrd
|
||
lhz r.7,12(r.4) // Get 1st hword of 4th Source2 wrd
|
||
bne- EqualNotEqual // Check for 3rd word equal
|
||
cmpw r.6,r.7 // Check for 4th word equal
|
||
lhz r.8,14(r.3) // Get 2nd hword of 4th Source1 wrd
|
||
lhz r.9,14(r.4) // Get 2nd hword of 4th Source2 wrd
|
||
bne- EqualNotEqual // Check for 4th word equal
|
||
cmpw r.8,r.9 // Check for 4th word equal
|
||
addi r.3,r.3,16 // Update Source1 pointer
|
||
bne- EqualNotEqual // Check for 4th word equal
|
||
cmpw r.3,r.5 // Check for all blocks done
|
||
addi r.4,r.4,16 // Update Source2 pointer
|
||
bne- EqualUnalignedLoop // Jump if more blocks
|
||
|
||
sub r.5,r.10,r.3 // compute remaining bytes
|
||
b EqualByByte // compare rest byte-by-byte
|
||
|
||
EqualByteUnaligned:
|
||
|
||
//
|
||
// There are at least 16 bytes to compare, but at least one of the blocks
|
||
// is not halfword-aligned.
|
||
//
|
||
// Because we don't expect very high byte counts in RtlEqualMemory, and
|
||
// we also don't expect unaligned buffers very often, we don't bother
|
||
// with the byte/halfword fetches that RtlCompareMemory does.
|
||
//
|
||
|
||
lbz r.6,0(r.3) // Get first byte of 1st Source1 wrd
|
||
lbz r.7,0(r.4) // Get first byte of 1st Source2 wrd
|
||
lbz r.8,1(r.3) // Get 2nd byte of 1st Source1 wrd
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
lbz r.9,1(r.4) // Get 2nd byte of 1st Source2 wrd
|
||
bne EqualNotEqual // Jump if 1st word not equal
|
||
cmpw r.8,r.9 // Check for 1st word equal
|
||
lbz r.6,2(r.3) // Get first byte of 1st Source1 wrd
|
||
lbz r.7,2(r.4) // Get first byte of 1st Source2 wrd
|
||
bne EqualNotEqual // Jump if 1st word not equal
|
||
cmpw r.6,r.7 // Check for 1st word equal
|
||
lbz r.8,3(r.3) // Get 2nd byte of 1st Source1 wrd
|
||
lbz r.9,3(r.4) // Get 2nd byte of 1st Source2 wrd
|
||
bne EqualNotEqual // Jump if 1st word not equal
|
||
cmpw r.8,r.9 // Check for 1st word equal
|
||
lbz r.6,4(r.3) // Get first byte of 2nd Source1 wrd
|
||
lbz r.7,4(r.4) // Get first byte of 2nd Source2 wrd
|
||
bne EqualNotEqual // Jump if 1st word not equal
|
||
cmpw r.6,r.7 // Check for 2nd word equal
|
||
lbz r.8,5(r.3) // Get 2nd byte of 2nd Source1 wrd
|
||
lbz r.9,5(r.4) // Get 2nd byte of 2nd Source2 wrd
|
||
bne EqualNotEqual // Jump if 2nd word not equal
|
||
cmpw r.8,r.9 // Check for 2nd word equal
|
||
lbz r.6,6(r.3) // Get first byte of 2nd Source1 wrd
|
||
lbz r.7,6(r.4) // Get first byte of 2nd Source2 wrd
|
||
bne EqualNotEqual // Jump if 2nd word not equal
|
||
cmpw r.6,r.7 // Check for 2nd word equal
|
||
lbz r.8,7(r.3) // Get 2nd byte of 2nd Source1 wrd
|
||
lbz r.9,7(r.4) // Get 2nd byte of 2nd Source2 wrd
|
||
bne EqualNotEqual // Jump if 2nd word not equal
|
||
cmpw r.8,r.9 // Check for 2nd word equal
|
||
lbz r.6,8(r.3) // Get first byte of 3rd Source1 wrd
|
||
lbz r.7,8(r.4) // Get first byte of 3rd Source2 wrd
|
||
bne EqualNotEqual // Jump if 2nd word not equal
|
||
cmpw r.6,r.7 // Check for 3rd word equal
|
||
lbz r.8,9(r.3) // Get 2nd byte of 3rd Source1 wrd
|
||
lbz r.9,9(r.4) // Get 2nd byte of 3rd Source2 wrd
|
||
bne EqualNotEqual // Jump if 3rd word not equal
|
||
cmpw r.8,r.9 // Check for 3rd word equal
|
||
lbz r.6,10(r.3) // Get first byte of 3rd Source1 wrd
|
||
lbz r.7,10(r.4) // Get first byte of 3rd Source2 wrd
|
||
bne EqualNotEqual // Jump if 3rd word not equal
|
||
cmpw r.6,r.7 // Check for 3rd word equal
|
||
lbz r.8,11(r.3) // Get 2nd byte of 3rd Source1 wrd
|
||
lbz r.9,11(r.4) // Get 2nd byte of 3rd Source2 wrd
|
||
bne EqualNotEqual // Jump if 3rd word not equal
|
||
cmpw r.8,r.9 // Check for 3rd word equal
|
||
lbz r.6,12(r.3) // Get first byte of 4th Source1 wrd
|
||
lbz r.7,12(r.4) // Get first byte of 4th Source2 wrd
|
||
bne EqualNotEqual // Jump if 3rd word not equal
|
||
cmpw r.6,r.7 // Check for 4th word equal
|
||
lbz r.8,13(r.3) // Get 2nd byte of 4th Source1 wrd
|
||
lbz r.9,13(r.4) // Get 2nd byte of 4th Source2 wrd
|
||
bne EqualNotEqual // Jump if 4th word not equal
|
||
cmpw r.8,r.9 // Check for 4th word equal
|
||
lbz r.6,14(r.3) // Get first byte of 4th Source1 wrd
|
||
lbz r.7,14(r.4) // Get first byte of 4th Source2 wrd
|
||
bne EqualNotEqual // Jump if 4th word not equal
|
||
cmpw r.6,r.7 // Check for 4th word equal
|
||
lbz r.8,15(r.3) // Get 2nd byte of 4th Source1 wrd
|
||
lbz r.9,15(r.4) // Get 2nd byte of 4th Source2 wrd
|
||
bne EqualNotEqual // Jump if 4th word not equal
|
||
cmpw r.8,r.9 // Check for 4th word equal
|
||
addi r.3,r.3,16 // Update Source1 pointer
|
||
bne EqualNotEqual // Jump if 4th word not equal
|
||
cmpw r.3,r.5 // Check for all blocks done
|
||
addi r.4,r.4,16 // Update Source2 pointer
|
||
bne- EqualByteUnaligned // Jump if more blocks
|
||
|
||
sub r.5,r.10,r.3 // compute remaining bytes
|
||
b EqualByByte // compare rest byte-by-byte
|
||
|
||
EqualNotEqual:
|
||
|
||
//
|
||
// The blocks are not equal.
|
||
//
|
||
|
||
li r.3, FALSE // indicate blocks are not equal
|
||
|
||
LEAF_EXIT(RtlEqualMemory) // return to caller
|
||
|
||
//++
|
||
//
|
||
// VOID
|
||
// RtlMoveMemory (
|
||
// IN PVOID Destination,
|
||
// IN PVOID Source,
|
||
// IN ULONG Length
|
||
// )
|
||
//
|
||
// Routine Description:
|
||
//
|
||
// This function moves memory either forward or backward, aligned or
|
||
// unaligned, in 32-byte blocks, followed by 4-byte blocks, followed
|
||
// by any remaining bytes.
|
||
//
|
||
// The alternate entry point, RtlCopyMemory, moves non-overlapping
|
||
// blocks only, in the forward direction.
|
||
//
|
||
// RtlCopyMemory32 is the same as RtlCopyMemory but is guaranteed
|
||
// never to copy more than 32 bits at a time. RtlCopyMemory may
|
||
// (probably will) be modified in the future to copy 64 bits at
|
||
// a time.
|
||
//
|
||
// Arguments:
|
||
//
|
||
// DEST (r.3) - Supplies a pointer to the destination address of
|
||
// the move operation.
|
||
//
|
||
// SRC (r.4) - Supplies a pointer to the source address of the move
|
||
// operation.
|
||
//
|
||
// LNGTH (r.5) - Supplies the length, in bytes, of the memory to be
|
||
// moved.
|
||
//
|
||
// Return Value:
|
||
//
|
||
// None.
|
||
//
|
||
//--
|
||
//
|
||
// Define the routine entry point
|
||
//
|
||
LEAF_ENTRY(RtlMoveMemory)
|
||
//
|
||
// Check to see if destination block overlaps the source block
|
||
// If so, jump to a backward move to preserve source block from
|
||
// being corrupted.
|
||
//
|
||
cmpw r.4,r.3 // Check to see if DEST > SRC
|
||
bge+ MoveForward // Jump if no overlap possible
|
||
add r.10,r.4,r.5 // Get ending SRC address
|
||
cmpw r.10,r.3 // Check for overlap
|
||
bgt- MoveBackward // Jump for overlap
|
||
//
|
||
// Move Memory Forward
|
||
//
|
||
// Check alignment
|
||
//
|
||
|
||
ALTERNATE_ENTRY(RtlCopyMemory)
|
||
ALTERNATE_ENTRY(RtlCopyMemory32)
|
||
|
||
MoveForward:
|
||
cmpwi r.5,4 // Check for less than 4 bytes
|
||
blt- FwdMoveByByte // Jump if single byte moves
|
||
xor r.9,r.4,r.3 // Check for same alignment
|
||
andi. r.9,r.9,3 // Isolate alignment bits
|
||
bne- MvFwdUnaligned // Jump if different alignments
|
||
//
|
||
// Move Memory Forward - Same SRC and DEST alignment
|
||
//
|
||
// Load and store extra bytes until a word boundary is reached
|
||
//
|
||
|
||
MvFwdAligned:
|
||
andi. r.6,r.3,3 // Check alignment type
|
||
beq+ FwdBlkDiv // Jump to process 32-Byte blocks
|
||
cmpwi r.6,3 // Check for 1 byte unaligned
|
||
bne+ FwdChkFor2 // If not, check next case
|
||
lbz r.7,0(r.4) // Get unaligned byte
|
||
li r.6,1 // Set byte move count
|
||
stb r.7,0(r.3) // Store unaligned byte
|
||
b UpdateAddrs // Jump to update addresses
|
||
FwdChkFor2:
|
||
cmpwi r.6,2 // Check for halfword aligned
|
||
bne+ FwdChkFor1 // If not, check next case
|
||
lhz r.7,0(r.4) // Get unaligned halfword
|
||
li r.6,2 // Set byte move count
|
||
sth r.7,0(r.3) // Store unaligned halfword
|
||
b UpdateAddrs // Jump to update addresses
|
||
FwdChkFor1:
|
||
lbz r.8,0(r.4) // Get unaligned byte
|
||
lhz r.7,1(r.4) // Get unaligned halfword
|
||
stb r.8,0(r.3) // Store unaligned byte
|
||
sth r.7,1(r.3) // Store unaligned halfword
|
||
li r.6,3 // Set byte move count
|
||
UpdateAddrs:
|
||
sub r.5,r.5,r.6 // Decrement LNGTH by unaligned
|
||
add r.4,r.4,r.6 // Update the SRC address
|
||
add r.3,r.3,r.6 // Update the DEST address
|
||
//
|
||
// Divide the block to process into 32-byte blocks
|
||
//
|
||
|
||
FwdBlkDiv:
|
||
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
||
sub. r.7,r.5,r.6 // Get full block count
|
||
add r.10,r.4,r.7 // Get address of last full block
|
||
beq- FwdMoveBy4Bytes // Jump if no full blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
//
|
||
// Move 32-byte blocks
|
||
//
|
||
FwdMvFullBlks:
|
||
lwz r.6,0(r.4) // Get 1st SRC word
|
||
lwz r.7,4(r.4) // Get 2nd SRC word
|
||
stw r.6,0(r.3) // Store 1st DEST word
|
||
stw r.7,4(r.3) // Store 2nd DEST word
|
||
lwz r.6,8(r.4) // Get 3rd SRC word
|
||
lwz r.7,12(r.4) // Get 4th SRC word
|
||
stw r.6,8(r.3) // Store 3rd DEST word
|
||
stw r.7,12(r.3) // Store 4th DEST word
|
||
lwz r.6,16(r.4) // Get 5th SRC word
|
||
lwz r.7,20(r.4) // Get 6th SRC word
|
||
stw r.6,16(r.3) // Store 5th DEST word
|
||
stw r.7,20(r.3) // Store 6th DEST word
|
||
lwz r.6,24(r.4) // Get 7th SRC word
|
||
lwz r.7,28(r.4) // Get 8th SRC word
|
||
addi r.4,r.4,32 // Update SRC pointer
|
||
cmpw r.4,r.10 // Check for all blocks done
|
||
stw r.6,24(r.3) // Store 7th DEST word
|
||
stw r.7,28(r.3) // Store 8th DEST word
|
||
addi r.3,r.3,32 // Update DEST pointer
|
||
bne+ FwdMvFullBlks // Jump if more blocks
|
||
//
|
||
// Move 4-byte blocks
|
||
//
|
||
|
||
FwdMoveBy4Bytes:
|
||
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
||
sub. r.7,r.5,r.6 // Get 4-byte block count
|
||
add r.10,r.4,r.7 // Get address of last full block
|
||
beq- FwdMoveByByte // Jump if no 4-byte blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
|
||
FwdLpOn4Bytes:
|
||
lwz r.6,0(r.4) // Load next set of 4 bytes
|
||
addi r.4,r.4,4 // Get pointer to next SRC block
|
||
cmpw r.4,r.10 // Check for last block
|
||
stw r.6,0(r.3) // Store next DEST block
|
||
addi r.3,r.3,4 // Get pointer to next DEST block
|
||
bne+ FwdLpOn4Bytes // Jump if more blocks
|
||
//
|
||
// Move 1-byte blocks
|
||
//
|
||
|
||
FwdMoveByByte:
|
||
cmpwi r.5,0 // Check for no bytes left
|
||
beqlr+ // Return if done
|
||
cmpwi r.5,1 // Check for no bytes left
|
||
lbz r.6,0(r.4) // Get 1st SRC byte
|
||
stb r.6,0(r.3) // Store 1st DEST byte
|
||
beqlr+ // Return if done
|
||
cmpwi r.5,2 // Check for no bytes left
|
||
lbz r.6,1(r.4) // Get 2nd SRC byte
|
||
stb r.6,1(r.3) // Store 2nd DEST byte
|
||
beqlr+ // Return if done
|
||
lbz r.6,2(r.4) // Get 3rd SRC byte
|
||
stb r.6,2(r.3) // Store 3rd byte word
|
||
blr // Return
|
||
//
|
||
// Forward Move - SRC and DEST have different alignments
|
||
//
|
||
|
||
MvFwdUnaligned:
|
||
or r.9,r.4,r.3 // Check if either byte unaligned
|
||
andi. r.9,r.9,3 // Isolate alignment
|
||
cmpwi r.9,2 // Check for even result
|
||
bne+ FwdMvByteUnaligned // Jump for byte unaligned
|
||
//
|
||
// Divide the blocks to process into 32-byte blocks
|
||
//
|
||
|
||
FwdBlkDivUnaligned:
|
||
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
||
sub. r.7,r.5,r.6 // Get full block count
|
||
add r.10,r.4,r.7 // Get address of last full block
|
||
beq- FwdMvHWrdBy4Bytes // Jump if no full blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
//
|
||
// Forward Move - SRC or DEST is halfword aligned, the other is by word
|
||
//
|
||
FwdMvByHWord:
|
||
lhz r.6,0(r.4) // Get 1st 2 bytes of 1st SRC wrd
|
||
lhz r.7,2(r.4) // Get 2nd 2 bytes of 1st SRC wrd
|
||
sth r.6,0(r.3) // Put 1st 2 bytes of 1st DST wrd
|
||
sth r.7,2(r.3) // Put 2nd 2 bytes of 1st DST wrd
|
||
lhz r.6,4(r.4) // Get 1st 2 bytes of 2nd SRC wrd
|
||
lhz r.7,6(r.4) // Get 2nd 2 bytes of 2nd SRC wrd
|
||
sth r.6,4(r.3) // Put 1st 2 bytes of 2nd DST wrd
|
||
sth r.7,6(r.3) // Put 2nd 2 bytes of 2nd DST wrd
|
||
lhz r.6,8(r.4) // Get 1st 2 bytes of 3rd SRC wrd
|
||
lhz r.7,10(r.4) // Get 2nd 2 bytes of 3rd SRC wrd
|
||
sth r.6,8(r.3) // Put 1st 2 bytes of 3rd DST wrd
|
||
sth r.7,10(r.3) // Put 2nd 2 bytes of 3rd DST wrd
|
||
lhz r.6,12(r.4) // Get 1st 2 bytes of 4th SRC wrd
|
||
lhz r.7,14(r.4) // Get 2nd 2 bytes of 4th SRC wrd
|
||
sth r.6,12(r.3) // Put 1st 2 bytes of 4th DST wrd
|
||
sth r.7,14(r.3) // Put 2nd 2 bytes of 4th DST wrd
|
||
lhz r.6,16(r.4) // Get 1st 2 bytes of 5th SRC wrd
|
||
lhz r.7,18(r.4) // Get 2nd 2 bytes of 5th SRC wrd
|
||
sth r.6,16(r.3) // Put 1st 2 bytes of 5th DST wrd
|
||
sth r.7,18(r.3) // Put 2nd 2 bytes of 5th DST wrd
|
||
lhz r.6,20(r.4) // Get 1st 2 bytes of 6th SRC wrd
|
||
lhz r.7,22(r.4) // Get 2nd 2 bytes of 6th SRC wrd
|
||
sth r.6,20(r.3) // Put 1st 2 bytes of 6th DST wrd
|
||
sth r.7,22(r.3) // Put 2nd 2 bytes of 6th DST wrd
|
||
lhz r.6,24(r.4) // Get 1st 2 bytes of 7th SRC wrd
|
||
lhz r.7,26(r.4) // Get 2nd 2 bytes of 7th SRC wrd
|
||
sth r.6,24(r.3) // Put 1st 2 bytes of 7th DST wrd
|
||
sth r.7,26(r.3) // Put 2nd 2 bytes of 7th DST wrd
|
||
lhz r.6,28(r.4) // Get 1st 2 bytes of 8th SRC wrd
|
||
lhz r.7,30(r.4) // Get 2nd 2 bytes of 8th SRC wrd
|
||
addi r.4,r.4,32 // Update SRC pointer
|
||
cmpw r.4,r.10 // Check for all blocks done
|
||
sth r.6,28(r.3) // Put 1st 2 bytes of 8th DST wrd
|
||
sth r.7,30(r.3) // Put 2nd 2 bytes of 8th DST wrd
|
||
addi r.3,r.3,32 // Update DEST pointer
|
||
bne+ FwdMvByHWord // Jump if more blocks
|
||
//
|
||
// Move 4-byte blocks with DEST Halfword unaligned
|
||
//
|
||
|
||
FwdMvHWrdBy4Bytes:
|
||
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
||
sub. r.7,r.5,r.6 // Get 4-byte block count
|
||
add r.10,r.4,r.7 // Get address of last full block
|
||
beq- FwdMoveByByte // Jump if no 4-byte blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
|
||
FwdHWrdLpOn4Bytes:
|
||
lhz r.6,0(r.4) // Get 1st 2 bytes of 1st SRC wrd
|
||
lhz r.7,2(r.4) // Get 2nd 2 bytes of 1st SRC wrd
|
||
addi r.4,r.4,4 // Update SRC pointer
|
||
cmpw r.4,r.10 // Check for last block
|
||
sth r.6,0(r.3) // Put 1st 2 bytes of 1st DST wrd
|
||
sth r.7,2(r.3) // Put 2nd 2 bytes of 1st DST wrd
|
||
addi r.3,r.3,4 // Update DEST pointer
|
||
bne+ FwdHWrdLpOn4Bytes // Jump if more blocks
|
||
|
||
b FwdMoveByByte // Jump to complete last bytes
|
||
//
|
||
// Forward Move - DEST is byte unaligned - Check SRC
|
||
//
|
||
|
||
FwdMvByteUnaligned:
|
||
and r.9,r.3,r.4 // Check for both byte aligned
|
||
andi. r.9,r.9,1 // Isolate alignment bits
|
||
beq- FwdBlksByByte // Jump if both not byte aligned
|
||
//
|
||
// Divide the blocks to process into 32-byte blocks
|
||
//
|
||
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
||
sub. r.7,r.5,r.6 // Get full block count
|
||
add r.10,r.4,r.7 // Get address of last full block
|
||
beq- FwdMvByteBy4Bytes // Jump if no full blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
//
|
||
// Forward Move - Both DEST and SRC are byte unaligned, but differently
|
||
//
|
||
|
||
FwdMvByByte:
|
||
lbz r.6,0(r.4) // Get first byte of 1st SRC word
|
||
lhz r.7,1(r.4) // Get mid-h-word of 1st SRC word
|
||
lhz r.8,3(r.4) // Get h-word crossing 1st/2nd SRC word
|
||
stb r.6,0(r.3) // Put first byte of 1st DEST word
|
||
sth r.7,1(r.3) // Put mid-h-word of 1st DEST word
|
||
sth r.8,3(r.3) // Put h-word crossing 1st/2nd DEST word
|
||
lhz r.6,5(r.4) // Get mid-h-word of 2nd SRC word
|
||
lhz r.7,7(r.4) // Get h-word crossing 2nd/3rd SRC word
|
||
lhz r.8,9(r.4) // Get mid-h-word of 3rd SRC word
|
||
sth r.6,5(r.3) // Put mid-h-word of 2nd DEST word
|
||
sth r.7,7(r.3) // Put h-word crossing 2nd/3rd DEST word
|
||
sth r.8,9(r.3) // Put mid-h-word of 3rd DEST word
|
||
lhz r.6,11(r.4) // Get h-word crossing 3rd/4th SRC word
|
||
lhz r.7,13(r.4) // Get mid-h-word of 4th SRC word
|
||
lhz r.8,15(r.4) // Get h-word crossing 4th/5th SRC word
|
||
sth r.6,11(r.3) // Put h-word crossing 3rd/4th DEST word
|
||
sth r.7,13(r.3) // Put mid-h-word of 4th DEST word
|
||
sth r.8,15(r.3) // Put h-word crossing 4th/5th DEST word
|
||
lhz r.6,17(r.4) // Get mid-h-word of 5th SRC word
|
||
lhz r.7,19(r.4) // Get h-word crossing 5th/6th SRC word
|
||
lhz r.8,21(r.4) // Get mid-h-word of 6th SRC word
|
||
sth r.6,17(r.3) // Put mid-h-word of 5th DEST word
|
||
sth r.7,19(r.3) // Put h-word crossing 5th/6th DEST word
|
||
sth r.8,21(r.3) // Put mid-h-word of 6th DEST word
|
||
lhz r.6,23(r.4) // Get h-word crossing 6th/7th SRC word
|
||
lhz r.7,25(r.4) // Get mid-h-word of 7th SRC word
|
||
lhz r.8,27(r.4) // Get h-word crossing 7th/8th SRC word
|
||
sth r.6,23(r.3) // Put h-word crossing 6th/7th DEST word
|
||
sth r.7,25(r.3) // Put mid-h-word of 7th DEST word
|
||
sth r.8,27(r.3) // Put h-word crossing 7th/8th DEST word
|
||
lhz r.6,29(r.4) // Get mid-h-word of 8th SRC word
|
||
lbz r.7,31(r.4) // Get last byte of 8th SRC word
|
||
addi r.4,r.4,32 // Update SRC pointer
|
||
cmpw r.4,r.10 // Check for all blocks done
|
||
sth r.6,29(r.3) // Put mid-h-word of 8th DEST word
|
||
stb r.7,31(r.3) // Put last byte of 8th DEST word
|
||
addi r.3,r.3,32 // Update DEST pointer
|
||
bne+ FwdMvByByte // Jump if more blocks
|
||
//
|
||
// Move 4-byte blocks with DEST or SRC byte aligned
|
||
//
|
||
|
||
FwdMvByteBy4Bytes:
|
||
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
||
sub. r.7,r.5,r.6 // Get 4-byte block count
|
||
add r.10,r.4,r.7 // Get address of last full block
|
||
beq- FwdMoveByByte // Jump if no 4-byte blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
|
||
FwdByteLpOn4Bytes:
|
||
lbz r.6,0(r.4) // Get first byte of 1st SRC word
|
||
lhz r.7,1(r.4) // Get mid-h-word of 1st SRC word
|
||
lbz r.8,3(r.4) // Get last byte of 1st SRC word
|
||
stb r.6,0(r.3) // Put first byte of 1st DEST wd
|
||
addi r.4,r.4,4 // Update SRC pointer
|
||
cmpw r.4,r.10 // Check for last block
|
||
sth r.7,1(r.3) // Put mid-h-word of 1st DEST wd
|
||
stb r.8,3(r.3) // Put last byte of 1st DEST wrd
|
||
addi r.3,r.3,4 // Update DEST pointer
|
||
bne+ FwdByteLpOn4Bytes // Jump if more blocks
|
||
|
||
b FwdMoveByByte // Jump to complete last bytes
|
||
//
|
||
// Forward Move - Either SRC or DEST are byte unaligned but not both
|
||
//
|
||
// Divide the blocks to process into 32-byte blocks
|
||
//
|
||
|
||
FwdBlksByByte:
|
||
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
||
sub. r.7,r.5,r.6 // Get full block count
|
||
add r.10,r.4,r.7 // Get address of last full block
|
||
beq- FwdMvBlksOf4Bytes // Jump if no full blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
|
||
FwdMvBlksByByte:
|
||
lbz r.6,0(r.4) // Get first byte of 1st SRC wrd
|
||
lbz r.7,1(r.4) // Get second byte of 1st SRC wrd
|
||
stb r.6,0(r.3) // Put first byte of 1st DEST wrd
|
||
stb r.7,1(r.3) // Put second byte of 1st DST wrd
|
||
lbz r.6,2(r.4) // Get third byte of 1st SRC wrd
|
||
lbz r.7,3(r.4) // Get fourth byte of 1st SRC wrd
|
||
stb r.6,2(r.3) // Put third byte of 1st DEST wrd
|
||
stb r.7,3(r.3) // Put fourth byte of 1st DST wrd
|
||
lbz r.6,4(r.4) // Get first byte of 2nd SRC wrd
|
||
lbz r.7,5(r.4) // Get 2nd byte of 2nd SRC wrd
|
||
stb r.6,4(r.3) // Put first byte of 2nd DEST wrd
|
||
stb r.7,5(r.3) // Put second byte of 2nd DST wrd
|
||
lbz r.6,6(r.4) // Get third byte of 2nd SRC wrd
|
||
lbz r.7,7(r.4) // Get fourth byte of 2nd SRC wrd
|
||
stb r.6,6(r.3) // Put third byte of 2nd DEST wrd
|
||
stb r.7,7(r.3) // Put fourth byte of 2nd DST wrd
|
||
lbz r.6,8(r.4) // Get first byte of 3rd SRC wrd
|
||
lbz r.7,9(r.4) // Get second byte of 3rd SRC wrd
|
||
stb r.6,8(r.3) // Put first byte of 3rd DEST wrd
|
||
stb r.7,9(r.3) // Put second byte of 3rd DST wrd
|
||
lbz r.6,10(r.4) // Get third byte of 3rd SRC wrd
|
||
lbz r.7,11(r.4) // Get fourth byte of 3rd SRC wrd
|
||
stb r.6,10(r.3) // Put third byte of 3rd DEST wrd
|
||
stb r.7,11(r.3) // Put fourth byte of 3rd DST wrd
|
||
lbz r.6,12(r.4) // Get first byte of 4th SRC wrd
|
||
lbz r.7,13(r.4) // Get second byte of 4th SRC wrd
|
||
stb r.6,12(r.3) // Put first byte of 4th DEST wrd
|
||
stb r.7,13(r.3) // Put second byte of 4th DST wrd
|
||
lbz r.6,14(r.4) // Get third byte of 4th SRC wrd
|
||
lbz r.7,15(r.4) // Get fourth byte of 4th SRC wrd
|
||
stb r.6,14(r.3) // Put third byte of 4th DEST wrd
|
||
stb r.7,15(r.3) // Put fourth byte of 4th DST wrd
|
||
lbz r.6,16(r.4) // Get first byte of 5th SRC wrd
|
||
lbz r.7,17(r.4) // Get second byte of 5th SRC wrd
|
||
stb r.6,16(r.3) // Put first byte of 5th DEST wrd
|
||
stb r.7,17(r.3) // Put second byte of 5th DST wrd
|
||
lbz r.6,18(r.4) // Get third byte of 5th SRC wrd
|
||
lbz r.7,19(r.4) // Get fourth byte of 5th SRC wrd
|
||
stb r.6,18(r.3) // Put third byte of 5th DEST wrd
|
||
stb r.7,19(r.3) // Put fourth byte of 5th DST wrd
|
||
lbz r.6,20(r.4) // Get first byte of 6th SRC wrd
|
||
lbz r.7,21(r.4) // Get second byte of 6th SRC wrd
|
||
stb r.6,20(r.3) // Put first byte of 6th DEST wrd
|
||
stb r.7,21(r.3) // Put second byte of 6th DST wrd
|
||
lbz r.6,22(r.4) // Get third byte of 6th SRC wrd
|
||
lbz r.7,23(r.4) // Get fourth byte of 6th SRC wrd
|
||
stb r.6,22(r.3) // Put third byte of 6th DEST wrd
|
||
stb r.7,23(r.3) // Put fourth byte of 6th DST wrd
|
||
lbz r.6,24(r.4) // Get first byte of 7th SRC wrd
|
||
lbz r.7,25(r.4) // Get second byte of 7th SRC wrd
|
||
stb r.6,24(r.3) // Put first byte of 7th DEST wrd
|
||
stb r.7,25(r.3) // Put second byte of 7th DST wrd
|
||
lbz r.6,26(r.4) // Get third byte of 7th SRC wrd
|
||
lbz r.7,27(r.4) // Get fourth byte of 7th SRC wrd
|
||
stb r.6,26(r.3) // Put third byte of 7th DEST wrd
|
||
stb r.7,27(r.3) // Put fourth byte of 7th DST wrd
|
||
lbz r.6,28(r.4) // Get first byte of 8th SRC wrd
|
||
lbz r.7,29(r.4) // Get second byte of 8th SRC wrd
|
||
stb r.6,28(r.3) // Put first byte of 8th DEST wrd
|
||
stb r.7,29(r.3) // Put second byte of 8th DST wrd
|
||
lbz r.6,30(r.4) // Get third byte of 8th SRC wrd
|
||
lbz r.7,31(r.4) // Get fourth byte of 8th SRC wrd
|
||
addi r.4,r.4,32 // Update SRC pointer
|
||
cmpw r.4,r.10 // Check for all blocks done
|
||
stb r.6,30(r.3) // Put third byte of 8th DEST wrd
|
||
stb r.7,31(r.3) // Put fourth byte of 8th DST wrd
|
||
addi r.3,r.3,32 // Update DEST pointer
|
||
bne+ FwdMvBlksByByte // Jump if more blocks
|
||
//
|
||
// Move 4-byte blocks with DEST or SRC Byte aligned
|
||
//
|
||
|
||
FwdMvBlksOf4Bytes:
|
||
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
||
sub. r.7,r.5,r.6 // Get 4-byte block count
|
||
add r.10,r.4,r.7 // Get address of last full block
|
||
beq- FwdMoveByByte // Jump if no 4-byte blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
|
||
FwdBlksLpOn4Bytes:
|
||
lbz r.6,0(r.4) // Get first byte of 1st SRC wrd
|
||
lbz r.7,1(r.4) // Get second byte of 1st SRC wrd
|
||
stb r.6,0(r.3) // Put first byte of 1st DEST wrd
|
||
stb r.7,1(r.3) // Put second byte of 1st DST wrd
|
||
lbz r.6,2(r.4) // Get third byte of 1st SRC wrd
|
||
lbz r.7,3(r.4) // Get fourth byte of 1st SRC wrd
|
||
addi r.4,r.4,4 // Update SRC pointer
|
||
cmpw r.4,r.10 // Check for last block
|
||
stb r.6,2(r.3) // Put third byte of 1st DEST wrd
|
||
stb r.7,3(r.3) // Put fourth byte of 1st DST wrd
|
||
addi r.3,r.3,4 // Update DEST pointer
|
||
bne+ FwdBlksLpOn4Bytes // Jump if more blocks
|
||
|
||
b FwdMoveByByte // Jump to complete last bytes
|
||
//
|
||
// Move Memory Backward
|
||
//
|
||
// Check alignment
|
||
//
|
||
|
||
MoveBackward:
|
||
add r.4,r.4,r.5 // Compute ending SRC address
|
||
add r.3,r.3,r.5 // Compute ending DEST address
|
||
cmpwi r.5,4 // Check for less than 4 bytes
|
||
blt- BckMoveByByte // Jump if single byte moves
|
||
xor r.9,r.4,r.3 // Check for same alignment
|
||
andi. r.9,r.9,3 // Isolate alignment bits
|
||
bne- MvBckUnaligned // Jump if different alignments
|
||
//
|
||
// Move Memory Backword - Same SRC and DEST alignment
|
||
//
|
||
// Load and store extra bytes until a word boundary is reached
|
||
//
|
||
|
||
MvBckAligned:
|
||
andi. r.6,r.3,3 // Check alignment type
|
||
beq+ BckBlkDiv // Jump to process 32-Byte blocks
|
||
cmpwi r.6,1 // Check for 1 byte unaligned
|
||
bne+ BckChkFor2 // If not, check next case
|
||
lbz r.7,-1(r.4) // Get unaligned byte
|
||
sub r.5,r.5,r.6 // Decrement LNGTH by unaligned
|
||
stb r.7,-1(r.3) // Store unaligned byte
|
||
b BckUpdateAddrs // Jump to update addresses
|
||
BckChkFor2:
|
||
cmpwi r.6,2 // Check for halfword aligned
|
||
bne+ BckChkFor3 // If not, check next case
|
||
lhz r.7,-2(r.4) // Get unaligned halfword
|
||
sub r.5,r.5,r.6 // Decrement LNGTH by unaligned
|
||
sth r.7,-2(r.3) // Store unaligned halfword
|
||
b BckUpdateAddrs // Jump to update addresses
|
||
BckChkFor3:
|
||
lbz r.8,-1(r.4) // Get unaligned byte
|
||
lhz r.7,-3(r.4) // Get unaligned halfword
|
||
stb r.8,-1(r.3) // Store unaligned byte
|
||
sth r.7,-3(r.3) // Store unaligned halfword
|
||
sub r.5,r.5,r.6 // Decrement LNGTH by unaligned
|
||
BckUpdateAddrs:
|
||
sub r.4,r.4,r.6 // Update the SRC address
|
||
sub r.3,r.3,r.6 // Update the DEST address
|
||
//
|
||
// Divide the block to process into 32-byte blocks
|
||
//
|
||
|
||
BckBlkDiv:
|
||
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
||
sub. r.7,r.5,r.6 // Get full block count
|
||
sub r.10,r.4,r.7 // Get address of last full block
|
||
beq- BckMoveBy4Bytes // Jump if no full blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
//
|
||
// Move 32-byte blocks
|
||
//
|
||
BckMvFullBlks:
|
||
lwz r.6,-4(r.4) // Get 1st SRC word
|
||
lwz r.7,-8(r.4) // Get 2nd SRC word
|
||
stw r.6,-4(r.3) // Store 1st DEST word
|
||
stw r.7,-8(r.3) // Store 2nd DEST word
|
||
lwz r.6,-12(r.4) // Get 3rd SRC word
|
||
lwz r.7,-16(r.4) // Get 4th SRC word
|
||
stw r.6,-12(r.3) // Store 3rd DEST word
|
||
stw r.7,-16(r.3) // Store 4th DEST word
|
||
lwz r.6,-20(r.4) // Get 5th SRC word
|
||
lwz r.7,-24(r.4) // Get 6th SRC word
|
||
stw r.6,-20(r.3) // Store 5th DEST word
|
||
stw r.7,-24(r.3) // Store 6th DEST word
|
||
lwz r.6,-28(r.4) // Get 7th SRC word
|
||
lwz r.7,-32(r.4) // Get 8th SRC word
|
||
subi r.4,r.4,32 // Update SRC pointer
|
||
cmpw r.4,r.10 // Check for all blocks done
|
||
stw r.6,-28(r.3) // Store 7th DEST word
|
||
stw r.7,-32(r.3) // Store 8th DEST word
|
||
subi r.3,r.3,32 // Update DEST pointer
|
||
bne+ BckMvFullBlks // Jump if more blocks
|
||
//
|
||
// Move 4-byte blocks
|
||
//
|
||
|
||
BckMoveBy4Bytes:
|
||
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
||
sub. r.7,r.5,r.6 // Get 4-byte block count
|
||
sub r.10,r.4,r.7 // Get address of last full block
|
||
beq- BckMoveByByte // Jump if no 4-byte blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
|
||
BckLpOn4Bytes:
|
||
lwz r.6,-4(r.4) // Load next set of 4 bytes
|
||
subi r.4,r.4,4 // Get pointer to next SRC block
|
||
cmpw r.4,r.10 // Check for last block
|
||
stw r.6,-4(r.3) // Store next DEST block
|
||
subi r.3,r.3,4 // Get pointer to next DEST block
|
||
bne+ BckLpOn4Bytes // Jump if more blocks
|
||
//
|
||
// Move 1-byte blocks
|
||
//
|
||
|
||
BckMoveByByte:
|
||
cmpwi r.5,0 // Check for no bytes left
|
||
beqlr+ // Return if done
|
||
lbz r.6,-1(r.4) // Get 1st SRC byte
|
||
cmpwi r.5,1 // Check for no bytes left
|
||
stb r.6,-1(r.3) // Store 1st DEST byte
|
||
beqlr+ // Return if done
|
||
lbz r.6,-2(r.4) // Get 2nd SRC byte
|
||
cmpwi r.5,2 // Check for no bytes left
|
||
stb r.6,-2(r.3) // Store 2nd DEST byte
|
||
beqlr+ // Return if done
|
||
lbz r.6,-3(r.4) // Get 3rd SRC byte
|
||
stb r.6,-3(r.3) // Store 3rd byte word
|
||
blr // Return
|
||
//
|
||
// Backward Move - SRC and DEST have different alignments
|
||
//
|
||
|
||
MvBckUnaligned:
|
||
or r.9,r.4,r.3 // Check for either byte unaligned
|
||
andi. r.9,r.9,3 // Isolate alignment
|
||
cmpwi r.9,2 // Check for even result
|
||
bne+ BckMvByteUnaligned // Jump for byte unaligned
|
||
//
|
||
// Divide the blocks to process into 32-byte blocks
|
||
//
|
||
|
||
BckBlkDivUnaligned:
|
||
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
||
sub. r.7,r.5,r.6 // Get full block count
|
||
sub r.10,r.4,r.7 // Get address of last full block
|
||
beq- BckMvHWrdBy4Bytes // Jump if no full blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
//
|
||
// Backward Move - SRC or DEST is halfword aligned, the other is by word
|
||
//
|
||
BckMvByHWord:
|
||
lhz r.6,-2(r.4) // Get 1st 2 bytes of 1st SRC wrd
|
||
lhz r.7,-4(r.4) // Get 2nd 2 bytes of 1st SRC wrd
|
||
sth r.6,-2(r.3) // Put 1st 2 bytes of 1st DST wrd
|
||
sth r.7,-4(r.3) // Put 2nd 2 bytes of 1st DST wrd
|
||
lhz r.6,-6(r.4) // Get 1st 2 bytes of 2nd SRC wrd
|
||
lhz r.7,-8(r.4) // Get 2nd 2 bytes of 2nd SRC wrd
|
||
sth r.6,-6(r.3) // Put 1st 2 bytes of 2nd DST wrd
|
||
sth r.7,-8(r.3) // Put 2nd 2 bytes of 2nd DST wrd
|
||
lhz r.6,-10(r.4) // Get 1st 2 bytes of 3rd SRC wrd
|
||
lhz r.7,-12(r.4) // Get 2nd 2 bytes of 3rd SRC wrd
|
||
sth r.6,-10(r.3) // Put 1st 2 bytes of 3rd DST wrd
|
||
sth r.7,-12(r.3) // Put 2nd 2 bytes of 3rd DST wrd
|
||
lhz r.6,-14(r.4) // Get 1st 2 bytes of 4th SRC wrd
|
||
lhz r.7,-16(r.4) // Get 2nd 2 bytes of 4th SRC wrd
|
||
sth r.6,-14(r.3) // Put 1st 2 bytes of 4th DST wrd
|
||
sth r.7,-16(r.3) // Put 2nd 2 bytes of 4th DST wrd
|
||
lhz r.6,-18(r.4) // Get 1st 2 bytes of 5th SRC wrd
|
||
lhz r.7,-20(r.4) // Get 2nd 2 bytes of 5th SRC wrd
|
||
sth r.6,-18(r.3) // Put 1st 2 bytes of 5th DST wrd
|
||
sth r.7,-20(r.3) // Put 2nd 2 bytes of 5th DST wrd
|
||
lhz r.6,-22(r.4) // Get 1st 2 bytes of 6th SRC wrd
|
||
lhz r.7,-24(r.4) // Get 2nd 2 bytes of 6th SRC wrd
|
||
sth r.6,-22(r.3) // Put 1st 2 bytes of 6th DST wrd
|
||
sth r.7,-24(r.3) // Put 2nd 2 bytes of 6th DST wrd
|
||
lhz r.6,-26(r.4) // Get 1st 2 bytes of 7th SRC wrd
|
||
lhz r.7,-28(r.4) // Get 2nd 2 bytes of 7th SRC wrd
|
||
sth r.6,-26(r.3) // Put 1st 2 bytes of 7th DST wrd
|
||
sth r.7,-28(r.3) // Put 2nd 2 bytes of 7th DST wrd
|
||
lhz r.6,-30(r.4) // Get 1st 2 bytes of 8th SRC wrd
|
||
lhz r.7,-32(r.4) // Get 2nd 2 bytes of 8th SRC wrd
|
||
subi r.4,r.4,32 // Update SRC pointer
|
||
cmpw r.4,r.10 // Check for all blocks done
|
||
sth r.6,-30(r.3) // Put 1st 2 bytes of 8th DST wrd
|
||
sth r.7,-32(r.3) // Put 2nd 2 bytes of 8th DST wrd
|
||
subi r.3,r.3,32 // Update DEST pointer
|
||
bne+ BckMvByHWord // Jump if more blocks
|
||
//
|
||
// Move 4-byte blocks with DEST Halfword unaligned
|
||
//
|
||
|
||
BckMvHWrdBy4Bytes:
|
||
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
||
sub. r.7,r.5,r.6 // Get 4-byte block count
|
||
sub r.10,r.4,r.7 // Get address of last full block
|
||
beq- BckMoveByByte // Jump if no 4-byte blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
|
||
BckHWrdLpOn4Bytes:
|
||
lhz r.6,-2(r.4) // Get 1st 2 bytes of 1st SRC wrd
|
||
lhz r.7,-4(r.4) // Get 2nd 2 bytes of 1st SRC wrd
|
||
subi r.4,r.4,4 // Update SRC pointer
|
||
cmpw r.4,r.10 // Check for last block
|
||
sth r.6,-2(r.3) // Put 1st 2 bytes of 1st DST wrd
|
||
sth r.7,-4(r.3) // Put 2nd 2 bytes of 1st DST wrd
|
||
subi r.3,r.3,4 // Update DEST pointer
|
||
bne+ BckHWrdLpOn4Bytes // Jump if more blocks
|
||
|
||
b BckMoveByByte // Jump to complete last bytes
|
||
//
|
||
// Check for both byte unaligned
|
||
//
|
||
|
||
BckMvByteUnaligned:
|
||
and r.9,r.3,r.4 // Check for both byte aligned
|
||
and r.9,r.9,1 // Isolate alignment bits
|
||
bne- BckBlksByByte // Jump if both not byte aligned
|
||
//
|
||
// Divide the blocks to process into 32-byte blocks
|
||
//
|
||
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
||
sub. r.7,r.5,r.6 // Get full block count
|
||
sub r.10,r.4,r.7 // Get address of last full block
|
||
beq- BckMvByteBy4Bytes // Jump if no full blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
//
|
||
// Backward Move - Both SRC and DEST are byte unaligned, but differently
|
||
//
|
||
|
||
BckMvByByte:
|
||
lbz r.6,-1(r.4) // Get first byte of 1st SRC word
|
||
lhz r.7,-3(r.4) // Get mid-h-word of 1st SRC word
|
||
lhz r.8,-5(r.4) // Get h-word crossing 1st/2nd SRC word
|
||
stb r.6,-1(r.3) // Put first byte of 1st DEST word
|
||
sth r.7,-3(r.3) // Put mid-h-word of 1st DEST word
|
||
sth r.8,-5(r.3) // Put h-word crossing 1st/2nd DEST word
|
||
lhz r.6,-7(r.4) // Get mid-h-word of 2nd SRC word
|
||
lhz r.7,-9(r.4) // Get h-word crossing 2nd/3rd SRC word
|
||
lhz r.8,-11(r.4) // Get mid-h-word of 3rd SRC word
|
||
sth r.6,-7(r.3) // Put mid-h-word of 2nd DEST word
|
||
sth r.7,-9(r.3) // Put h-word crossing 2nd/3rd DEST word
|
||
sth r.8,-11(r.3) // Put mid-h-word of 3rd DEST word
|
||
lhz r.6,-13(r.4) // Get h-word crossing 3rd/4th SRC word
|
||
lhz r.7,-15(r.4) // Get mid-h-word of 4th SRC word
|
||
lhz r.8,-17(r.4) // Get h-word crossing 4th/5th SRC word
|
||
sth r.6,-13(r.3) // Put h-word crossing 3rd/4th DEST word
|
||
sth r.7,-15(r.3) // Put mid-h-word of 4th DEST word
|
||
sth r.8,-17(r.3) // Put h-word crossing 4th/5th DEST word
|
||
lhz r.6,-19(r.4) // Get mid-h-word of 5th SRC word
|
||
lhz r.7,-21(r.4) // Get h-word crossing 5th/6th SRC word
|
||
lhz r.8,-23(r.4) // Get mid-h-word of 6th SRC word
|
||
sth r.6,-19(r.3) // Put mid-h-word of 5th DEST word
|
||
sth r.7,-21(r.3) // Put h-word crossing 5th/6th DEST word
|
||
sth r.8,-23(r.3) // Put mid-h-word of 6th DEST word
|
||
lhz r.6,-25(r.4) // Get h-word crossing 6th/7th SRC word
|
||
lhz r.7,-27(r.4) // Get mid-h-word of 7th SRC word
|
||
lhz r.8,-29(r.4) // Get h-word crossing 7th/8th SRC word
|
||
sth r.6,-25(r.3) // Put h-word crossing 6th/7th DEST word
|
||
sth r.7,-27(r.3) // Put mid-h-word of 7th DEST word
|
||
sth r.8,-29(r.3) // Put h-word crossing 7th/8th DEST word
|
||
lhz r.6,-31(r.4) // Get mid-h-word of 8th SRC word
|
||
lbz r.7,-32(r.4) // Get last byte of 8th SRC word
|
||
subi r.4,r.4,32 // Update SRC pointer
|
||
cmpw r.4,r.10 // Check for all blocks done
|
||
sth r.7,-31(r.3) // Put mid-h-word of 8th DEST wd
|
||
stb r.8,-32(r.3) // Put last byte of 8th DEST wrd
|
||
subi r.3,r.3,32 // Update DEST pointer
|
||
bne+ BckMvByByte // Jump if more blocks
|
||
//
|
||
// Move 4-byte blocks with DEST and SRC Byte aligned
|
||
//
|
||
|
||
BckMvByteBy4Bytes:
|
||
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
||
sub. r.7,r.5,r.6 // Get 4-byte block count
|
||
sub r.10,r.4,r.7 // Get address of last full block
|
||
beq- BckMoveByByte // Jump if no 4-byte blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
|
||
BckByteLpOn4Bytes:
|
||
lbz r.6,-1(r.4) // Get first byte of 1st SRC word
|
||
lhz r.7,-3(r.4) // Get mid-h-word of 1st SRC word
|
||
lbz r.8,-4(r.4) // Get last byte of 1st SRC word
|
||
stb r.6,-1(r.3) // Put first byte of 1st DEST wd
|
||
subi r.4,r.4,4 // Update SRC pointer
|
||
cmpw r.4,r.10 // Check for last block
|
||
sth r.7,-3(r.3) // Put mid-h-word of 1st DEST wd
|
||
stb r.8,-4(r.3) // Put last byte of 1st DEST wrd
|
||
subi r.3,r.3,4 // Update DEST pointer
|
||
bne+ BckByteLpOn4Bytes // Jump if more blocks
|
||
|
||
b BckMoveByByte // Jump to complete last bytes
|
||
//
|
||
// Backward Move - Either DEST or SRC byte unaligned but not both
|
||
//
|
||
// Divide the blocks to process into 32-byte blocks
|
||
//
|
||
|
||
BckBlksByByte:
|
||
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
||
sub. r.7,r.5,r.6 // Get full block count
|
||
sub r.10,r.4,r.7 // Get address of last full block
|
||
beq- BckMvBlksOf4Bytes // Jump if no full blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
|
||
BckMvBlksByByte:
|
||
lbz r.6,-1(r.4) // Get first byte of 1st SRC wrd
|
||
lbz r.7,-2(r.4) // Get second byte of 1st SRC wrd
|
||
stb r.6,-1(r.3) // Put first byte of 1st DEST wrd
|
||
stb r.7,-2(r.3) // Put second byte of 1st DST wrd
|
||
lbz r.6,-3(r.4) // Get third byte of 1st SRC wrd
|
||
lbz r.7,-4(r.4) // Get fourth byte of 1st SRC wrd
|
||
stb r.6,-3(r.3) // Put third byte of 1st DEST wrd
|
||
stb r.7,-4(r.3) // Put fourth byte of 1st DST wrd
|
||
lbz r.6,-5(r.4) // Get first byte of 2nd SRC wrd
|
||
lbz r.7,-6(r.4) // Get second byte of 2nd SRC wrd
|
||
stb r.6,-5(r.3) // Put first byte of 2nd DEST wrd
|
||
stb r.7,-6(r.3) // Put second byte of 2nd DST wrd
|
||
lbz r.6,-7(r.4) // Get third byte of 2nd SRC wrd
|
||
lbz r.7,-8(r.4) // Get fourth byte of 2nd SRC wrd
|
||
stb r.6,-7(r.3) // Put third byte of 2nd DEST wrd
|
||
stb r.7,-8(r.3) // Put fourth byte of 2nd DST wrd
|
||
lbz r.6,-9(r.4) // Get first byte of 3rd SRC wrd
|
||
lbz r.7,-10(r.4) // Get second byte of 3rd SRC wrd
|
||
stb r.6,-9(r.3) // Put first byte of 3rd DST wrd
|
||
stb r.7,-10(r.3) // Put second byte of 3rd DST wrd
|
||
lbz r.6,-11(r.4) // Get third byte of 3rd SRC wrd
|
||
lbz r.7,-12(r.4) // Get fourth byte of 3rd SRC wrd
|
||
stb r.6,-11(r.3) // Put third byte of 3rd DEST wrd
|
||
stb r.7,-12(r.3) // Put fourth byte of 3rd DST wrd
|
||
lbz r.6,-13(r.4) // Get first byte of 4th SRC wrd
|
||
lbz r.7,-14(r.4) // Get second byte of 4th SRC wrd
|
||
stb r.6,-13(r.3) // Put first byte of 4th DEST wrd
|
||
stb r.7,-14(r.3) // Put second byte of 4th DST wrd
|
||
lbz r.6,-15(r.4) // Get third byte of 4th SRC wrd
|
||
lbz r.7,-16(r.4) // Get fourth byte of 4th SRC wrd
|
||
stb r.6,-15(r.3) // Put third byte of 4th DEST wrd
|
||
stb r.7,-16(r.3) // Put fourth byte of 4th DST wrd
|
||
lbz r.6,-17(r.4) // Get first byte of 5th SRC wrd
|
||
lbz r.7,-18(r.4) // Get second byte of 5th SRC wrd
|
||
stb r.6,-17(r.3) // Put first byte of 5th DEST wrd
|
||
stb r.7,-18(r.3) // Put second byte of 5th DST wrd
|
||
lbz r.6,-19(r.4) // Get third byte of 5th SRC wrd
|
||
lbz r.7,-20(r.4) // Get fourth byte of 5th SRC wrd
|
||
stb r.6,-19(r.3) // Put third byte of 5th DEST wrd
|
||
stb r.7,-20(r.3) // Put fourth byte of 5th DST wrd
|
||
lbz r.6,-21(r.4) // Get first byte of 6th SRC wrd
|
||
lbz r.7,-22(r.4) // Get second byte of 6th SRC wrd
|
||
stb r.6,-21(r.3) // Put first byte of 6th DEST wrd
|
||
stb r.7,-22(r.3) // Put second byte of 6th DST wrd
|
||
lbz r.6,-23(r.4) // Get third byte of 6th SRC wrd
|
||
lbz r.7,-24(r.4) // Get fourth byte of 6th SRC wrd
|
||
stb r.6,-23(r.3) // Put third byte of 6th DEST wrd
|
||
stb r.7,-24(r.3) // Put fourth byte of 6th DST wrd
|
||
lbz r.6,-25(r.4) // Get first byte of 7th SRC wrd
|
||
lbz r.7,-26(r.4) // Get second byte of 7th SRC wrd
|
||
stb r.6,-25(r.3) // Put first byte of 7th DEST wrd
|
||
stb r.7,-26(r.3) // Put second byte of 7th DST wrd
|
||
lbz r.6,-27(r.4) // Get third byte of 7th SRC wrd
|
||
lbz r.7,-28(r.4) // Get fourth byte of 7th SRC wrd
|
||
stb r.6,-27(r.3) // Put third byte of 7th DEST wrd
|
||
stb r.7,-28(r.3) // Put fourth byte of 7th DST wrd
|
||
lbz r.6,-29(r.4) // Get first byte of 8th SRC wrd
|
||
lbz r.7,-30(r.4) // Get second byte of 8th SRC wrd
|
||
stb r.6,-29(r.3) // Put first byte of 8th DEST wrd
|
||
stb r.7,-30(r.3) // Put second byte of 8th DST wrd
|
||
lbz r.6,-31(r.4) // Get third byte of 8th SRC wrd
|
||
lbz r.7,-32(r.4) // Get fourth byte of 8th SRC wrd
|
||
subi r.4,r.4,32 // Update SRC pointer
|
||
cmpw r.4,r.10 // Check for all blocks done
|
||
stb r.6,-31(r.3) // Put third byte of 8th DEST wrd
|
||
stb r.7,-32(r.3) // Put fourth byte of 8th DST wrd
|
||
subi r.3,r.3,32 // Update DEST pointer
|
||
bne+ BckMvBlksByByte // Jump if more blocks
|
||
//
|
||
// Move 4-byte blocks with DEST or SRC Byte aligned, but not the other
|
||
//
|
||
|
||
BckMvBlksOf4Bytes:
|
||
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
||
sub. r.7,r.5,r.6 // Get 4-byte block count
|
||
sub r.10,r.4,r.7 // Get address of last full block
|
||
beq- BckMoveByByte // Jump if no 4-byte blocks
|
||
mr r.5,r.6 // Set Length = remainder
|
||
|
||
BckBlksLpOn4Bytes:
|
||
lbz r.6,-1(r.4) // Get first byte of 1st SRC wrd
|
||
lbz r.7,-2(r.4) // Get second byte of 1st SRC wrd
|
||
stb r.6,-1(r.3) // Put first byte of 1st DEST wrd
|
||
stb r.7,-2(r.3) // Put second byte of 1st DST wrd
|
||
lbz r.6,-3(r.4) // Get third byte of 1st SRC wrd
|
||
lbz r.7,-4(r.4) // Get fourth byte of 1st SRC wrd
|
||
subi r.4,r.4,4 // Update SRC pointer
|
||
cmpw r.4,r.10 // Check for last block
|
||
stb r.6,-3(r.3) // Put third byte of 1st DEST wrd
|
||
stb r.7,-4(r.3) // Put fourth byte of 1st DST wrd
|
||
subi r.3,r.3,4 // Update DEST pointer
|
||
bne+ BckBlksLpOn4Bytes // Jump if more blocks
|
||
|
||
b BckMoveByByte // Jump to complete last bytes
|
||
//
|
||
// Exit the routine
|
||
//
|
||
MvExit:
|
||
LEAF_EXIT(RtlMoveMemory)
|
||
|
||
//++
|
||
//
|
||
// VOID
|
||
// RtlZeroMemory (
|
||
// IN PVOID Destination,
|
||
// IN ULONG Length
|
||
// )
|
||
//
|
||
// Routine Description:
|
||
//
|
||
// This function zeros memory by first aligning the destination
|
||
// address to a longword boundary, and then zeroing 32-byte blocks,
|
||
// followed by 4-byte blocks, followed by any remaining bytes.
|
||
//
|
||
// Arguments:
|
||
//
|
||
// DEST (r.3) - Supplies a pointer to the memory to zero.
|
||
//
|
||
// LENGTH (r.4) - Supplies the length, in bytes, of the memory to be
|
||
// zeroed.
|
||
//
|
||
// Return Value:
|
||
//
|
||
// None.
|
||
//
|
||
//--
|
||
//
|
||
// Define the entry point
|
||
//
|
||
LEAF_ENTRY(RtlZeroMemory)
|
||
//
|
||
// Fill Memory with the zeros
|
||
//
|
||
// Zero extra bytes until a word boundary is reached
|
||
//
|
||
cmpwi cr.1,r.4,4 // Check for less than 3 bytes
|
||
mtcrf 0x01,r.3 // Check alignment type
|
||
li r.5,0 // Set pattern as 0
|
||
blt- cr.1,ZeroByByte // Jump to handle small cases
|
||
li r.6,1
|
||
ZeroMem:
|
||
bt 31,ZeroOdd // Branch if 1 or 3
|
||
bf 30,ZBlkDiv // Branch if not 2
|
||
sth r.5,0(r.3) // Store unaligned halfword
|
||
li r.6,2
|
||
b ZUpdteAddr // Jump to update addresses
|
||
ZeroOdd:
|
||
bt 30,Zero1 // Branch if align 3
|
||
sth r.5,1(r.3) // Store unaligned halfword
|
||
li r.6,3
|
||
Zero1:
|
||
stb r.5,0(r.3) // Store unaligned byte
|
||
ZUpdteAddr:
|
||
sub r.4,r.4,r.6 // Decrement LENGTH by unaligned
|
||
add r.3,r.3,r.6 // Update the DEST address
|
||
//
|
||
// Divide the block to process into 32-byte blocks
|
||
//
|
||
ZBlkDiv:
|
||
andi. r.6,r.4,BLKLN-1 // Isolate remainder of LENGTH/32
|
||
sub. r.7,r.4,r.6 // Get full block count
|
||
add r.10,r.3,r.7 // Get address of last full block
|
||
beq- ZeroBy4Bytes // Jump if no full blocks
|
||
mr r.4,r.6 // Set Length = Remainder
|
||
//
|
||
// Zero 32-Byte Blocks
|
||
//
|
||
// Check for 32-Byte Boundary, if so use the cache zero
|
||
//
|
||
andi. r.9,r.3,31 // Check for cache boundary
|
||
li r.6,0 // Set offset=0
|
||
beq+ BlkZeroC // Jump if on cache boundary
|
||
//
|
||
// If not 32-byte boundary, fill to 32-bit boundary then use cache zero
|
||
//
|
||
srwi r.8,r.7,5 // Get block count
|
||
cmpwi r.8,1 // Check for single block
|
||
mr r.12,r.9 // Save offset value
|
||
li r11,32 // Get full block count
|
||
sub r9,r.11,r.9 // Get distance to cache boundary
|
||
beq- BlkZero // Jump if single block
|
||
//
|
||
// Adjust pointers and loop counts
|
||
//
|
||
sub. r.8,r.4,r.9 // TMP=Remainder-Unaligned Count
|
||
add r.10,r.10,r.9 // Get new end pointer
|
||
mr r.4,r.8 // Set new remainder count (TMP)
|
||
bge+ AlignToCache // Jump if TMP >= 0
|
||
sub r.10,r.10,r.9 // Subtract previous increment
|
||
add r4,r.11,r8 // Get new rem cnt (32-abs(TMP))
|
||
sub r.10,r.10,r.12 // Get new end pointer
|
||
//
|
||
// Fill to 32-byte boundary - Using 4-byte blocks
|
||
//
|
||
AlignToCache:
|
||
andi. r.8,r.9,3 // Isolate remainder of LENGTH/4
|
||
sub. r.9,r.9,r.8 // Get full word byte count
|
||
li r.7,4 // Initialize loop decrement
|
||
beq- ByteAlignToCache // Jump if no full blocks
|
||
//
|
||
Align4Bytes:
|
||
stw r.5,0(r.3)
|
||
sub. r.9,r.9,r.7 // Increment the loop counter
|
||
addi r.3,r.3,4 // Increment the DEST address
|
||
bne+ Align4Bytes // Jump if more 4-Byte Blk fills
|
||
//
|
||
// Align to cache boundary using 1-Byte Blocks
|
||
//
|
||
ByteAlignToCache:
|
||
cmpwi r.8,0 // Check for completion
|
||
add r.3,r.3,r.8 // Update DEST address
|
||
beq+ BlkZeroC // Jump if cache aligned
|
||
//
|
||
cmpwi r.8,1 // Check for done
|
||
stb r.5,0(r.3) // Zero 1 byte
|
||
beq+ BlkZeroC // Jump if done
|
||
cmpwi r.8,2 // Check cache aligned
|
||
stb r.5,1(r.3) // Zero 1 byte
|
||
beq+ BlkZeroC // Jump cache aligned
|
||
stb r.5,2(r.3) // Zero 1 Byte
|
||
//
|
||
// Zero using the cache
|
||
//
|
||
BlkZeroC:
|
||
#if 0 // BLDR_KERNEL_RUNTIME != 1
|
||
//
|
||
// In order to allow us to boot in write-through or cache-inhibited
|
||
// mode, the boot loader does not use dcbz.
|
||
//
|
||
|
||
dcbz r.6,r.3 // Zero 32-byte cache block
|
||
addi r.3,r.3,32 // Increment the DEST address
|
||
cmpw r.3,r.10 // Check for completion
|
||
bne+ BlkZeroC // Jump if more 32-Byte Blk fills
|
||
b ZeroBy4Bytes // Jump to finish
|
||
#endif
|
||
//
|
||
// Zero using normal stores
|
||
//
|
||
BlkZero:
|
||
stw r.5,0(r.3) // Store the 1st DEST word
|
||
stw r.5,4(r.3) // Store the 2nd DEST word
|
||
stw r.5,8(r.3) // Store the 3rd DEST word
|
||
stw r.5,12(r.3) // Store the 4th DEST word
|
||
stw r.5,16(r.3) // Store the 5th DEST word
|
||
stw r.5,20(r.3) // Store the 6th DEST word
|
||
stw r.5,24(r.3) // Store the 7th DEST word
|
||
stw r.5,28(r.3) // Store the 8th DEST word
|
||
addi r.3,r.3,32 // Increment the DEST address
|
||
cmpw r.3,r.10 // Check for completion
|
||
bne+ BlkZero // Jump if more 32-Byte Blk fills
|
||
//
|
||
// Zero 4-Byte Blocks
|
||
//
|
||
ZeroBy4Bytes:
|
||
andi. r.6,r.4,3 // Isolate remainder of LENGTH/4
|
||
sub. r.7,r.4,r.6 // Get full block count
|
||
add r.10,r.3,r.7 // Get address of last full block
|
||
beq- ZeroByByte // Jump if no full blocks
|
||
mr r.4,r.6 // Set Length = Remainder
|
||
//
|
||
Zero4Bytes:
|
||
stw r.5,0(r.3)
|
||
addi r.3,r.3,4 // Increment the DEST address
|
||
cmpw r.3,r.10 // Check for completion
|
||
bne+ Zero4Bytes // Jump if more 4-Byte Blk fills
|
||
//
|
||
// Zero 1-Byte Blocks
|
||
//
|
||
ZeroByByte:
|
||
cmpwi r.4,0 // Check for completion
|
||
beqlr+ // Return if done
|
||
//
|
||
Zero1Byte:
|
||
cmpwi r.4,1 // Check for done
|
||
stb r.5,0(r.3) // Zero 1 byte
|
||
beqlr+ // Return if done
|
||
cmpwi r.4,2 // Check for done
|
||
stb r.5,1(r.3) // Zero 1 byte
|
||
beqlr+ // Return if done
|
||
stb r.5,2(r.3) // Zero 1 Byte
|
||
//
|
||
// Exit
|
||
//
|
||
ZeroExit:
|
||
LEAF_EXIT(RtlZeroMemory)
|
||
//
|
||
//++
|
||
//
|
||
// VOID
|
||
// RtlFillMemory (
|
||
// IN PVOID Destination,
|
||
// IN ULONG Length,
|
||
// IN UCHAR Fill
|
||
// )
|
||
//
|
||
// Routine Description:
|
||
//
|
||
// This function fills memory by first aligning the destination
|
||
// address to a longword boundary, and then filling 32-byte blocks,
|
||
// followed by 4-byte blocks, followed by any remaining bytes.
|
||
//
|
||
// Arguments:
|
||
//
|
||
// DEST (r.3) - Supplies a pointer to the memory to fill.
|
||
//
|
||
// LENGTH (r.4) - Supplies the length, in bytes, of the memory to be
|
||
// filled.
|
||
//
|
||
// PTTRN (r.5) - Supplies the fill byte.
|
||
//
|
||
// Return Value:
|
||
//
|
||
// None.
|
||
//
|
||
//--
|
||
//
|
||
// Define the entry point
|
||
//
|
||
LEAF_ENTRY(RtlFillMemory)
|
||
|
||
cmpwi cr.1,r.4,4 // Check for less than 4 bytes
|
||
|
||
//
|
||
// Initialize a register with the fill byte duplicated
|
||
//
|
||
|
||
rlwimi r.5,r.5,8,0x0000ff00 // propogate rightmost byte
|
||
rlwimi. r.5,r.5,16,0xffff0000 // thru upper 3 bytes
|
||
|
||
//
|
||
// Fill Memory with the pattern
|
||
//
|
||
//
|
||
// Fill extra bytes until a word boundary is reached
|
||
//
|
||
|
||
mtcrf 0x01,r.3 // Check alignment type
|
||
blt- cr.1,FillByByte // Jump to handle small cases
|
||
li r.6,1 // Default unaligned count to 1 byte
|
||
beq- ZeroMem // Use RtlZeroMemory if fill 0
|
||
bt 31,FillOdd // Branch if align 1 or 3
|
||
bf 30,BlkDiv // Branch if not 2
|
||
sth r.5,0(r.3) // Store unaligned halfword
|
||
li r.6,2 // Set count to 2 bytes
|
||
b UpdteAddr // Jump to update addresses
|
||
FillOdd:
|
||
bt 30,Fill1 // Branch if align 3
|
||
sth r.5,1(r.3) // Store unaligned halfword
|
||
li r.6,3 // Set count to 3 bytes
|
||
Fill1:
|
||
stb r.5,0(r.3) // Store unaligned byte
|
||
UpdteAddr:
|
||
sub r.4,r.4,r.6 // Decrement LENGTH by unaligned
|
||
add r.3,r.3,r.6 // Update the DEST address
|
||
//
|
||
// Divide the block to process into 32-byte blocks
|
||
//
|
||
|
||
BlkDiv:
|
||
andi. r.6,r.4,BLKLN-1 // Isolate remainder of LENGTH/32
|
||
sub. r.7,r.4,r.6 // Get full block count
|
||
add r.10,r.3,r.7 // Get address of last full block
|
||
beq- FillBy4Bytes // Jump if no full blocks
|
||
mr r.4,r.6 // Set Length = Remainder
|
||
//
|
||
// Fill 32-Byte Blocks
|
||
//
|
||
BlkFill:
|
||
stw r.5,0(r.3) // Store the 1st DEST word
|
||
stw r.5,4(r.3) // Store the 2nd DEST word
|
||
stw r.5,8(r.3) // Store the 3rd DEST word
|
||
stw r.5,12(r.3) // Store the 4th DEST word
|
||
stw r.5,16(r.3) // Store the 5th DEST word
|
||
stw r.5,20(r.3) // Store the 6th DEST word
|
||
stw r.5,24(r.3) // Store the 7th DEST word
|
||
stw r.5,28(r.3) // Store the 8th DEST word
|
||
addi r.3,r.3,32 // Increment the DEST address
|
||
cmpw r.3,r.10 // Check for completion
|
||
bne+ BlkFill // Jump if more 32-Byte Blk fills
|
||
//
|
||
// Fill 4-Byte Blocks
|
||
//
|
||
FillBy4Bytes:
|
||
andi. r.6,r.4,3 // Isolate remainder of LENGTH/4
|
||
sub. r.7,r.4,r.6 // Get full block count
|
||
add r.10,r.3,r.7 // Get address of last full block
|
||
beq- FillByByte // Jump if no full blocks
|
||
mr r.4,r.6 // Set Length = Remainder
|
||
//
|
||
Fill4Bytes:
|
||
stw r.5,0(r.3)
|
||
addi r.3,r.3,4 // Increment the DEST address
|
||
cmpw r.3,r.10 // Check for completion
|
||
bne+ Fill4Bytes // Jump if more 4-Byte Blk fills
|
||
//
|
||
// Fill 1-Byte Blocks
|
||
//
|
||
FillByByte:
|
||
cmpwi r.4,0 // Check for completion
|
||
beqlr+ // Return if done
|
||
//
|
||
Fill1Byte:
|
||
cmpwi r.4,1 // Check for done
|
||
stb r.5,0(r.3) // Fill 1 byte
|
||
beqlr+ // Return if done
|
||
cmpwi r.4,2 // Check for done
|
||
stb r.5,1(r.3) // Fill 1 byte
|
||
beqlr+ // Return if done
|
||
stb r.5,2(r.3) // Fill 1 Byte
|
||
//
|
||
// Exit
|
||
//
|
||
FillExit:
|
||
LEAF_EXIT(RtlFillMemory)
|
||
|
||
//++
|
||
//
|
||
// VOID
|
||
// RtlFillMemoryUlong (
|
||
// IN PVOID Destination,
|
||
// IN ULONG Length,
|
||
// IN ULONG Pattern
|
||
// )
|
||
//
|
||
// Routine Description:
|
||
//
|
||
// This function fills memory with the specified longowrd pattern by
|
||
// filling 32-byte blocks followed by 4-byte blocks.
|
||
//
|
||
// N.B. This routine assumes that the destination address is aligned
|
||
// on a longword boundary and that the length is an even multiple
|
||
// of longwords.
|
||
//
|
||
// Arguments:
|
||
//
|
||
// DEST (r.3) - Supplies a pointer to the memory to fill.
|
||
//
|
||
// LENGTH (r.4) - Supplies the length, in bytes, of the memory to be
|
||
// filled.
|
||
//
|
||
// PTTRN (r.5) - Supplies the fill pattern.
|
||
//
|
||
// Return Value:
|
||
//
|
||
// None.
|
||
//
|
||
//--
|
||
//
|
||
// Define the entry point
|
||
//
|
||
LEAF_ENTRY(RtlFillMemoryUlong)
|
||
//
|
||
// Make sure length is even number of longwords
|
||
//
|
||
srwi r.4,r.4,2 // Shift length to divide by 4
|
||
slwi r.4,r.4,2 // Make sure LENGTH is even
|
||
//
|
||
// Divide the block to process into 32-byte blocks
|
||
//
|
||
andi. r.6,r.4,BLKLN-1 // Isolate remainder of LNGTH/32
|
||
sub. r.7,r.4,r.6 // Get full block count
|
||
add r.10,r.3,r.7 // Get address of last full block
|
||
beq- FillUlBy4Bytes // Jump if no full blocks
|
||
mr r.4,r.6 // Set Length = Remainder
|
||
//
|
||
// Fill 32-Byte Blocks
|
||
//
|
||
BlkFillUl:
|
||
stw r.5,0(r.3) // Store the 1st DEST word
|
||
stw r.5,4(r.3) // Store the 2nd DEST word
|
||
stw r.5,8(r.3) // Store the 3rd DEST word
|
||
stw r.5,12(r.3) // Store the 4th DEST word
|
||
stw r.5,16(r.3) // Store the 5th DEST word
|
||
stw r.5,20(r.3) // Store the 6th DEST word
|
||
stw r.5,24(r.3) // Store the 7th DEST word
|
||
stw r.5,28(r.3) // Store the 8th DEST word
|
||
addi r.3,r.3,32 // Increment the DEST address
|
||
cmpw r.3,r.10 // Check for completion
|
||
bne+ BlkFillUl // Jump if more 32-Byte Blk fills
|
||
//
|
||
// Fill 4-Byte Blocks
|
||
//
|
||
FillUlBy4Bytes:
|
||
andi. r.6,r.4,3 // Isolate remainder of LENGTH/4
|
||
sub. r.7,r.4,r.6 // Get full block count
|
||
add r.10,r.3,r.7 // Get address of last full block
|
||
beqlr- // Return if done
|
||
mr r.4,r.6 // Set Length = Remainder
|
||
//
|
||
FillUl4Bytes:
|
||
stw r.5,0(r.3)
|
||
addi r.3,r.3,4 // Increment the DEST address
|
||
cmpw r.3,r.10 // Check for completion
|
||
bne+ FillUl4Bytes // Jump if more 4-Byte Blk fills
|
||
//
|
||
// Exit
|
||
//
|
||
FillUlExit:
|
||
LEAF_EXIT(RtlFillMemoryUlong)
|
||
|