126 lines
2.5 KiB
ArmAsm
126 lines
2.5 KiB
ArmAsm
|
/* ------------------------------------------------------------------ */
|
||
|
/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */
|
||
|
/* | Reserved. This software contains proprietary and confidential | */
|
||
|
/* | information of MIPS and its suppliers. Use, disclosure or | */
|
||
|
/* | reproduction is prohibited without the prior express written | */
|
||
|
/* | consent of MIPS. | */
|
||
|
/* ------------------------------------------------------------------ */
|
||
|
#ident "$Header"
|
||
|
|
||
|
/*
|
||
|
* Copyright 1985 by MIPS Computer Systems, Inc.
|
||
|
*/
|
||
|
|
||
|
/* bcmp(s1, s2, n) */
|
||
|
|
||
|
#include "kxmips.h"
|
||
|
|
||
|
/*
|
||
|
* bcmp(src, dst, bcount)
|
||
|
*
|
||
|
* MINCMP is minimum number of byte that its worthwhile to try and
|
||
|
* align cmp into word transactions
|
||
|
*
|
||
|
* Calculating MINCMP
|
||
|
* Overhead =~ 15 instructions => 90 cycles
|
||
|
* Byte cmp =~ 38 cycles/word
|
||
|
* Word cmp =~ 17 cycles/word
|
||
|
* Breakeven =~ 16 bytes
|
||
|
*/
|
||
|
#define MINCMP 16
|
||
|
#define NBPW 4
|
||
|
|
||
|
LEAF_ENTRY(memcmp)
|
||
|
xor v0,a0,a1
|
||
|
blt a2,MINCMP,bytecmp # too short, just byte cmp
|
||
|
and v0,NBPW-1
|
||
|
subu t8,zero,a0 # number of bytes til aligned
|
||
|
bne v0,zero,unalgncmp # src and dst not alignable
|
||
|
/*
|
||
|
* src and dst can be simultaneously word aligned
|
||
|
*/
|
||
|
and t8,NBPW-1
|
||
|
subu a2,t8
|
||
|
beq t8,zero,wordcmp # already aligned
|
||
|
move v0,v1 # lw[lr] don't clear target reg
|
||
|
lwr v0,0(a0)
|
||
|
lwr v1,0(a1)
|
||
|
addu a0,t8
|
||
|
addu a1,t8
|
||
|
bne v0,v1,cmpne
|
||
|
|
||
|
/*
|
||
|
* word cmp loop
|
||
|
*/
|
||
|
wordcmp:
|
||
|
and a3,a2,~(NBPW-1)
|
||
|
subu a2,a3
|
||
|
beq a3,zero,bytecmp
|
||
|
addu a3,a0 # src1 endpoint
|
||
|
1: lw v0,0(a0)
|
||
|
lw v1,0(a1)
|
||
|
addu a0,NBPW # 1st BDSLOT
|
||
|
addu a1,NBPW # 2nd BDSLOT (asm doesn't move)
|
||
|
bne v0,v1,cmpne
|
||
|
bne a0,a3,1b # at least one more word
|
||
|
b bytecmp
|
||
|
|
||
|
/*
|
||
|
* deal with simultaneously unalignable cmp by aligning one src
|
||
|
*/
|
||
|
unalgncmp:
|
||
|
subu a3,zero,a1 # calc byte cnt to get src2 aligned
|
||
|
and a3,NBPW-1
|
||
|
subu a2,a3
|
||
|
beq a3,zero,partaligncmp # already aligned
|
||
|
addu a3,a0 # src1 endpoint
|
||
|
1: lbu v0,0(a0)
|
||
|
lbu v1,0(a1)
|
||
|
addu a0,1
|
||
|
addu a1,1
|
||
|
bne v0,v1,cmpne
|
||
|
bne a0,a3,1b
|
||
|
|
||
|
/*
|
||
|
* src unaligned, dst aligned loop
|
||
|
*/
|
||
|
partaligncmp:
|
||
|
and a3,a2,~(NBPW-1)
|
||
|
subu a2,a3
|
||
|
beq a3,zero,bytecmp
|
||
|
addu a3,a0
|
||
|
1:
|
||
|
lwr v0,0(a0)
|
||
|
lwl v0,3(a0)
|
||
|
lw v1,0(a1)
|
||
|
addu a0,NBPW
|
||
|
addu a1,NBPW
|
||
|
bne v0,v1,cmpne
|
||
|
bne a0,a3,1b
|
||
|
|
||
|
/*
|
||
|
* brute force byte cmp loop
|
||
|
*/
|
||
|
bytecmp:
|
||
|
addu a3,a2,a0 # src1 endpoint; BDSLOT
|
||
|
ble a2,zero,cmpdone
|
||
|
1: lbu v0,0(a0)
|
||
|
lbu v1,0(a1)
|
||
|
addu a0,1
|
||
|
addu a1,1
|
||
|
bne v0,v1,cmpne
|
||
|
bne a0,a3,1b
|
||
|
cmpdone:
|
||
|
move v0,zero
|
||
|
j ra
|
||
|
|
||
|
cmpne:
|
||
|
sltu a2,v1,v0
|
||
|
bne a2,zero,9f
|
||
|
li v0,-1
|
||
|
j ra
|
||
|
9:
|
||
|
li v0,1
|
||
|
j ra
|
||
|
.end bcmp
|