NT4/private/ntos/nthals/halsni4x/mips/duocache.s
2020-09-30 17:12:29 +02:00

775 lines
24 KiB
ArmAsm
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//#pragma comment(exestr, "$Header: /usr4/winnt/SOURCES/halvlbms/src/hal/halsni4x/mips/RCS/duocache.s,v 1.1 1995/05/19 10:44:26 flo Exp $")
// TITLE("Cache Flush")
//++
//
// Copyright (c) 1991-1993 Microsoft Corporation
//
// Module Name:
//
// duocache.s
//
// Abstract:
//
// This module implements the code necessary for cache operations on
// MIPS R4000 MultiProcerssor Machines. It is very special to SNI machines,
// which use a special MP Agent Asic.
//`
// Environment:
//
// Kernel mode only.
//
//--
#include "halmips.h"
#include "SNIdef.h"
// NON COHERENT algorithm : to use the replace facility of the MP_Agent
#define CONFIG_NONCOH(reg) \
.set noreorder; \
.set noat; \
mfc0 reg,config; \
nop; \
nop; \
and AT,reg,~(7); \
or AT,AT,0x3; \
mtc0 AT,config; \
nop; \
nop; \
nop; \
nop; \
.set at; \
.set reorder
// restauration of CONFIG register
#define CONFIG_RESTORE(reg) \
.set noreorder; \
mtc0 reg,config; \
nop; \
nop; \
nop; \
nop; \
.set reorder
LEAF_ENTRY(HalpGetTaglo)
.set noreorder
.set noat
cache INDEX_LOAD_TAG_SD,0(a0) // get a copy of the SLC TAG
mfc0 v0, taglo
// and v0, v0, 0xffff8000 // mask address bits A18:A17
sll v0, v0, 4 // dismiss A35:A32
.set at
.set reorder
j ra // return
.end HalpGetTaglo
//
// some bitmap defines to display cache activities via the LED's
// in the SNI RM machines
//
#define SWEEP_DCACHE 0xc0 // 1100 0000
#define FLUSH_DCACHE_PAGE 0x80 // 1000 0000
#define PURGE_DCACHE_PAGE 0x40 // 0100 0000
#define ZERO_PAGE 0x0c // 0000 1100
#define SWEEP_ICACHE 0x30 // 0011 0000
#define PURGE_ICACHE_PAGE 0x10 // 0001 0000
//
// Define cache operations constants.
//
#define COLOR_BITS (7 << PAGE_SHIFT) // color bit (R4000 - 8kb cache)
#define COLOR_MASK (0x7fff) // color mask (R4000 - 8kb cache)
#define FLUSH_BASE 0xfffe0000 // flush base address
#define PROTECTION_BITS ((1 << ENTRYLO_V) | (1 << ENTRYLO_D) ) //
SBTTL("MpAgent Identification")
//++
//
// VOID
// HalpMpAgentIdentify()
//
// Routine Description:
//
// This function attempts to access to the MpAgent registers (base = 0x1ffff000).
// It reads 'base' and 'base + 0x40' addresses which correspond to two different registers
// of the MP Agent so two different contents.
// If there is no existing MPAgent, the hardware will access in fact the old Asic
// (base 0x1fff0000). Only 'base' to 'base+0x32' addresses are existing for this ASIC.
// So when we will attempt to access to 'base+0x40', we will read 'base'. So we will
// see that 'base' and 'base+0x40' have the same contents.
// WARNING : cache error must be disabled to access to the single processor ASIC
//
// Arguments:
//
// None
//
// Return Value:
//
// TRUE : a MpAgent is detected
// FALSE : no MpAgent
//
//--
LEAF_ENTRY(HalpMpAgentIdentify)
.set noreorder
mfc0 a0,psr // get current PSR
nop // fill
nop
nop
nop
move a3,a0
or a0,a0,0x00010000 // disable error cache to access single proc ASIC
mtc0 a0,psr
nop // fill
nop
nop
nop
li a0,0xbffff000 // MpAgent address
lw a1,0(a0) // address + 0
lw a2,0x40(a0) // address + 0x40
li v0,0x01
beql a2,a1,10f
li v0,0x00 // v0 = 0 only if a2 == a1 (branch likely)
10:
mtc0 a3,psr // restore PSR
nop // fill
nop
nop
nop
.set reorder
j ra
.end HalpMpAgentIdentify
SBTTL("Flush Data Cache Page")
//++
//
// VOID
// HalpFlushDcachePageMulti (
// IN PVOID Color,
// IN ULONG PageFrame,
// IN ULONG Length
// )
//
// Routine Description:
//
// This function flushes (hit/writeback/invalidate) up to a page of data
// from the data cache.
//
// Arguments:
//
// Color (a0) - Supplies the starting virtual address and color of the
// data that is flushed.
//
// PageFrame (a1) - Supplies the page frame number of the page that
// is flushed.
//
// Length (a2) - Supplies the length of the region in the page that is
// flushed.
//
// Return Value:
//
// None.
//
//--
LEAF_ENTRY(HalpFlushDcachePageMulti)
#if DBG
lw t0,KeDcacheFlushCount // get address of dcache flush count
lw t1,0(t0) // increment the count of flushes
addu t1,t1,1 //
sw t1,0(t0) // store result
la t0, HalpLedAddress // get the address for the LED register
lw t0, 0(t0)
lw t1, KiPcr + PcSetMember(zero) // get a bitmapped value for Processor Number
or t1, t1, FLUSH_DCACHE_PAGE // what are we doing ?
xor t1, t1, 0xff // inverse
sb t1, 0(t0) // display it
#endif
15: DISABLE_INTERRUPTS(t5) // disable interrupts
.set noreorder
.set noat
//
// Flush the primary and secondary data caches.
//
//
// HIT_WRITEBACK_INVALIDATE cache instruction does not update the SC
// TagRam copy in the MP Agent. So we do cache replace.
//
.set noreorder
.set noat
40: and a0,a0,PAGE_SIZE -1 // PageOffset
sll t7,a1,PAGE_SHIFT // physical address
lw t4,KiPcr + PcSecondLevelDcacheFillSize(zero) // get 2nd fill size
or t0,t7,a0 // physical address + offset
subu t6,t4,1 // compute block size minus one
and t7,t0,t6 // compute offset in block
addu a2,a2,t6 // round up to next block
addu a2,a2,t7 //
nor t6,t6,zero // complement block size minus one
and a2,a2,t6 // truncate length to even number
beq zero,a2,60f // if eq, no blocks to flush
and t8,t0,t6 // compute starting virtual address
addu t9,t8,a2 // compute ending virtual address
subu t9,t9,t4 // compute ending loop address
li a3,MPAGENT_RESERVED | KSEG0_BASE // get base flush address
lw t0,KiPcr + PcSecondLevelDcacheSize(zero) // get cache size
add t0,t0,-1 // mask of the cache size
CONFIG_NONCOH(t2) // NON COHERENT algorithm
.set noreorder
.set noat
50: and t7,t8,t0 // offset
addu t7,t7,a3 // physical address + offset
lw zero,0(t7) // load Cache -> Write back old Data
bne t8,t9,50b // if ne, more blocks to invalidate
addu t8,t8,t4 // compute next block address (+Linesize)
CONFIG_RESTORE(t2)
60: ENABLE_INTERRUPTS(t5) // enable interrupts
j ra // return
.end HalpFlushDcachePageMulti
SBTTL("Purge Instruction Cache Page")
//++
//
// VOID
// HalpPurgeIcachePageMulti (
// IN PVOID Color,
// IN ULONG PageFrame,
// IN ULONG Length
// )
//
// Routine Description:
//
// This function purges (hit/invalidate) up to a page of data from the
// instruction cache.
//
// Arguments:
//
// Color (a0) - Supplies the starting virtual address and color of the
// data that is purged.
//
// PageFrame (a1) - Supplies the page frame number of the page that
// is purged.
//
// Length (a2) - Supplies the length of the region in the page that is
// purged.
//
// Return Value:
//
// None.
//
//--
LEAF_ENTRY(HalpPurgeIcachePageMulti)
#if DBG
lw t0,KeIcacheFlushCount // get address of icache flush count
lw t1,0(t0) // increment the count of flushes
addu t1,t1,1 //
sw t1,0(t0) // store result
la t0, HalpLedAddress // get the address for the LED register
lw t0, 0(t0)
lw t1, KiPcr + PcSetMember(zero) // get a bitmapped value for Processor Number
or t1, t1, PURGE_ICACHE_PAGE // what are we doing ?
xor t1, t1, 0xff // inverse
sb t1, 0(t0) // display it
#endif
.set noreorder
.set noat
lw v0,KiPcr + PcAlignedCachePolicy(zero) // get cache policy
and a0,a0,COLOR_MASK // isolate color bits
li t0,FLUSH_BASE // get base flush address
or t0,t0,a0 // compute color virtual address
sll t1,a1,ENTRYLO_PFN // shift page frame into position
or t1,t1,PROTECTION_BITS // merge protection bits
or t1,t1,v0 // merge cache policy
and a0,a0,0x1000 // isolate TB entry index
beql zero,a0,10f // if eq, first entry
move t2,zero // set second page table entry
move t2,t1 // set second page table entry
move t1,zero // set first page table entry
10: mfc0 t3,wired // get TB entry index
lw v0,KiPcr + PcSecondLevelIcacheFillSize(zero) // get 2nd fill size
lw t4,KiPcr + PcFirstLevelIcacheFillSize(zero) // get 1st fill size
bnel zero,v0,15f // if ne, second level cache present
move t4,v0 // set purge block size
.set at
.set reorder
//
// Purge data from the instruction cache.
//
15: DISABLE_INTERRUPTS(t5) // disable interrupts
.set noreorder
.set noat
mfc0 t6,entryhi // get current PID and VPN2
srl t7,t0,ENTRYHI_VPN2 // isolate VPN2 of virtual address
sll t7,t7,ENTRYHI_VPN2 //
and t6,t6,0xff << ENTRYHI_PID // isolate current PID
or t7,t7,t6 // merge PID with VPN2 of virtual address
mtc0 t7,entryhi // set VPN2 and PID for probe
mtc0 t1,entrylo0 // set first PTE value
mtc0 t2,entrylo1 // set second PTE value
mtc0 t3,index // set TB index value
nop // fill
tlbwi // write TB entry - 3 cycle hazzard
subu t6,t4,1 // compute block size minus one
and t7,t0,t6 // compute offset in block
addu a2,a2,t6 // round up to next block
addu a2,a2,t7 //
nor t6,t6,zero // complement block size minus one
and a2,a2,t6 // truncate length to even number
beq zero,a2,30f // if eq, no blocks to purge
and t8,t0,t6 // compute starting virtual address
addu t9,t8,a2 // compute ending virtual address
bne zero,v0,40f // if ne, second level cache present
subu t9,t9,t4 // compute ending loop address
//
// Purge the primary instruction cache only.
//
20: cache HIT_INVALIDATE_I,0(t8) // invalidate cache block
bne t8,t9,20b // if ne, more blocks to invalidate
addu t8,t8,t4 // compute next block address
.set at
.set reorder
30: ENABLE_INTERRUPTS(t5) // enable interrupts
j ra // return
//
// Purge the primary and secondary instruction caches.
//
//
// multi-processor machine
//
40: move t7,t8
//
// HIT_WRITEBACK_INVALIDATE cache instruction does not work. So we do cache replace.
// We use a MP agent facility to do that : a 4Mb area is stolen to the upper EISA space
// and this address is notified to the MP agent. When the cache replace is done, no
// access to the memory is done : the MP agent returns zero as value for these addresses.
// Be careful : to use this mechanism, CONFIG register must be programmed in NON COHERENT
// mode, so we must be protected from interrupts.
//
li t8,PAGE_SIZE
add t8,t8,-1 // page mask
and t8,t7,t8 // offset in the page
sll t7,a1,PAGE_SHIFT // physical address
or t8,t7,t8 // physical address + offset
//
// note: we have a Unified SLC, so SecondLevelIcacheSize is set to 0
//
lw t0,KiPcr + PcSecondLevelIcacheSize(zero) // get cache size
addu t9,t8,a2 // compute ending physical address
subu t9,t9,t4 // compute ending loop address
add t0,t0,-1 // mask of the cache size
and t8,t8,t0 // first cache line to invalidate
and t9,t9,t0 // last cache line to invalidate
li a2, MPAGENT_RESERVED | KSEG0_BASE
or t8,a2,t8 // starting address
or t9,a2,t9 // ending address
CONFIG_NONCOH(t2) // NON COHERENT algorithm
.set noreorder
.set noat
50: lw zero,0(t8) // invalidate sc
bne t8,t9,50b // if ne, more blocks to invalidate
addu t8,t8,t4 // compute next block address
CONFIG_RESTORE(t2)
ENABLE_INTERRUPTS(t5) // enable interrupts
j ra // return
.end HalPurgeIcachePage
SBTTL("Sweep Data Cache")
//++
//
// VOID
// HalpSweepDcacheMulti (
// VOID
// )
//
// Routine Description:
//
// This function sweeps (index/writeback/invalidate) the entire data cache.
//
// Arguments:
//
// None.
//
// Return Value:
//
// None.
//
//--
LEAF_ENTRY(HalpSweepDcacheMulti)
#if DBG
lw t0,KeDcacheFlushCount // get address of dcache flush count
lw t1,0(t0) // increment the count of flushes
addu t1,t1,1 //
sw t1,0(t0) // store result
la t0, HalpLedAddress // get the address for the LED register
lw t0, 0(t0)
lw t1, KiPcr + PcSetMember(zero) // get a bitmapped value for Processor Number
or t1, t1, SWEEP_DCACHE // what are we doing ?
xor t1, t1, 0xff // inverse
sb t1, 0(t0) // display it
#endif
.set at
.set reorder
DISABLE_INTERRUPTS(t3) // disable interrupts
.set noreorder
.set noat
//
// sweep secondary cache in the MP Agent
//
//
// HIT_WRITEBACK_INVALIDATE cache instruction does not update the SC
// TagRam copy in the MP Agent. So we do cache replace.
//
lw t0,KiPcr + PcSecondLevelDcacheSize(zero) // get data cache size
lw t1,KiPcr + PcSecondLevelDcacheFillSize(zero) // get block size
li a0,MPAGENT_RESERVED | KSEG0_BASE // starting address
addu a1,a0,t0 // compute ending cache address
subu a1,a1,t1 // compute ending block address
CONFIG_NONCOH(t2) // NON COHERENT algorithm
.set noreorder
.set noat
25:
lw zero,0(a0)
bne a0,a1,25b // if ne, more to invalidate
addu a0,a0,t1 // compute address of next block
CONFIG_RESTORE(t2)
ENABLE_INTERRUPTS(t3) // enable interrupts
.set at
.set reorder
j ra // return
.end HalpSweepDcacheMulti
SBTTL("Sweep Instruction Cache")
//++
//
// VOID
// HalpSweepIcacheMulti (
// VOID
// )
//
// Routine Description:
//
// This function sweeps (index/invalidate) the entire instruction cache.
//
// Arguments:
//
// None.
//
// Return Value:
//
// None.
//
//--
LEAF_ENTRY(HalpSweepIcacheMulti)
#if DBG
lw t0,KeIcacheFlushCount // get address of icache flush count
lw t1,0(t0) // increment the count of flushes
addu t1,t1,1 //
sw t1,0(t0) // store result
la t0, HalpLedAddress // get the address for the LED register
lw t0, 0(t0)
lw t1, KiPcr + PcSetMember(zero) // get a bitmapped value for Processor Number
or t1, t1, SWEEP_ICACHE // what are we doing ?
xor t1, t1, 0xff // inverse
sb t1, 0(t0) // display it
#endif
//
// Sweep the secondary instruction cache.
//
.set noreorder
.set noat
DISABLE_INTERRUPTS(t3) // disable interrupts
.set noreorder
.set noat
//
// sweep secondary cache
// SNI machines have only an Unified SL cache
// NOTE: PcSecondLevelIcacheSize and PcSecondLevelICacheFillSize is set to 0
// on SNI machines
//
lw t0,KiPcr + PcSecondLevelIcacheSize(zero) // get instruction cache size
lw t1,KiPcr + PcSecondLevelIcacheFillSize(zero) // get fill size
beq zero,t1,20f // if eq, no second level cache
li a0,MPAGENT_RESERVED | KSEG0_BASE // set starting index value
addu a1,a0,t0 // compute ending cache address
subu a1,a1,t1 // compute ending block address
CONFIG_NONCOH(t2) // NON COHERENT algorithm
.set noreorder
.set noat
10: lw zero,0(a0)
bne a0,a1,10b // if ne, more to invalidate
addu a0,a0,t1 // compute address of next block
CONFIG_RESTORE(t2)
.set noreorder
.set noat
20: lw t0,KiPcr + PcFirstLevelIcacheSize(zero) // get instruction cache size
lw t1,KiPcr + PcFirstLevelIcacheFillSize(zero) // get fill size
li a0,KSEG0_BASE // set starting index value
addu a1,a0,t0 // compute ending cache address
subu a1,a1,t1 // compute ending block address
//
// Sweep the primary instruction cache.
//
30: cache INDEX_INVALIDATE_I,0(a0) // invalidate cache line
bne a0,a1,30b // if ne, more to invalidate
addu a0,a0,t1 // compute address of next block
ENABLE_INTERRUPTS(t3) // enable interrupts
.set at
.set reorder
j ra // return
.end HalSweepIcache
SBTTL("Zero Page")
//++
//
// VOID
// HalpZeroPageMulti (
// IN PVOID NewColor,
// IN PVOID OldColor,
// IN ULONG PageFrame
// )
//
// Routine Description:
//
// This function zeros a page of memory.
//
// The algorithm used to zero a page is as follows:
//
// 1. Purge (hit/invalidate) the page from the instruction cache
// using the old color iff the old color is not the same as
// the new color.
//
// 2. Purge (hit/invalidate) the page from the data cache using
// the old color iff the old color is not the same as the new
// color.
//
// 3. Create (create/dirty/exclusive) the page in the data cache
// using the new color.
//
// 4. Write zeros to the page using the new color.
//
// Arguments:
//
// NewColor (a0) - Supplies the page aligned virtual address of the
// new color of the page that is zeroed.
//
// OldColor (a1) - Supplies the page aligned virtual address of the
// old color of the page that is zeroed.
//
// PageFrame (a2) - Supplies the page frame number of the page that
// is zeroed.
//
// Return Value:
//
// None.
//
//--
.struct 0
.space 3 * 4 // fill
ZpRa: .space 4 // saved return address
ZpFrameLength: // length of stack frame
ZpA0: .space 4 // (a0)
ZpA1: .space 4 // (a1)
ZpA2: .space 4 // (a2)
ZpA3: .space 4 // (a3)
NESTED_ENTRY(HalpZeroPageMulti, ZpFrameLength, zero)
subu sp,sp,ZpFrameLength // allocate stack frame
sw ra,ZpRa(sp) // save return address
PROLOGUE_END
#if DBG
la t0, HalpLedAddress // get the address for the LED register
lw t0, 0(t0)
lw t1, KiPcr + PcSetMember(zero) // get a bitmapped value for Processor Number
or t1, t1, ZERO_PAGE // what are we doing ?
xor t1, t1, 0xff // inverse
sb t1, 0(t0) // display it
#endif
and a0,a0,COLOR_BITS // isolate new color bits
and a1,a1,COLOR_BITS // isolate old color bits
sw a0,ZpA0(sp) // save new color bits
sw a1,ZpA1(sp) // save old color bits
sw a2,ZpA2(sp) // save page frame
//
// If the old page color is not equal to the new page color, then change
// the color of the page.
//
beq a0,a1,10f // if eq, colors match
jal KeChangeColorPage // chagne page color
//
// Create dirty exclusive cache blocks and zero the data.
//
10: lw a3,ZpA0(sp) // get new color bits
lw a1,ZpA2(sp) // get page frame number
.set noreorder
.set noat
lw v0,KiPcr + PcAlignedCachePolicy(zero) // get cache polciy
li t0,FLUSH_BASE // get base flush address
or t0,t0,a3 // compute new color virtual address
sll t1,a1,ENTRYLO_PFN // shift page frame into position
or t1,t1,PROTECTION_BITS // merge protection bits
or t1,t1,v0 // merge cache policy
and a3,a3,0x1000 // isolate TB entry index
beql zero,a3,20f // if eq, first entry
move t2,zero // set second page table entry
move t2,t1 // set second page table entry
move t1,zero // set first page table entry
20: mfc0 t3,wired // get TB entry index
lw t4,KiPcr + PcFirstLevelDcacheFillSize(zero) // get 1st fill size
lw v0,KiPcr + PcSecondLevelDcacheFillSize(zero) // get 2nd fill size
.set at
.set reorder
DISABLE_INTERRUPTS(t5) // disable interrupts
.set noreorder
.set noat
mfc0 t6,entryhi // get current PID and VPN2
srl t7,t0,ENTRYHI_VPN2 // isolate VPN2 of virtual address
sll t7,t7,ENTRYHI_VPN2 //
and t6,t6,0xff << ENTRYHI_PID // isolate current PID
or t7,t7,t6 // merge PID with VPN2 of virtual address
mtc0 t7,entryhi // set VPN2 and PID for probe
mtc0 t1,entrylo0 // set first PTE value
mtc0 t2,entrylo1 // set second PTE value
mtc0 t3,index // set TB index value
nop // fill
tlbwi // write TB entry - 3 cycle hazzard
addu t9,t0,PAGE_SIZE // compute ending address of block
dmtc1 zero,f0 // set write pattern
and t8,t4,0x10 // test if 16-byte cache block
//
// Zero page in primary and secondary data caches.
//
50: sdc1 f0,0(t0) // zero 64-byte block
sdc1 f0,8(t0) //
sdc1 f0,16(t0) //
sdc1 f0,24(t0) //
sdc1 f0,32(t0) //
sdc1 f0,40(t0) //
sdc1 f0,48(t0) //
addu t0,t0,64 // advance to next 64-byte block
bne t0,t9,50b // if ne, more to zero
sdc1 f0,-8(t0) //
.set at
.set reorder
ENABLE_INTERRUPTS(t5) // enable interrupts
lw ra,ZpRa(sp) // get return address
addu sp,sp,ZpFrameLength // deallocate stack frame
j ra // return
.end HalpZeroPageMulti