315 lines
8.5 KiB
NASM
315 lines
8.5 KiB
NASM
title "MP primitives for the Corollary Cbus machines"
|
|
;++
|
|
;
|
|
;Copyright (c) 1992, 1993, 1994 Corollary Inc.
|
|
;
|
|
;Module Name:
|
|
;
|
|
; cbus1bt.asm
|
|
;
|
|
;Abstract:
|
|
;
|
|
; Corollary Cbus1 Boot Code
|
|
;
|
|
; This module implements the low-level highly cache
|
|
; architecture dependent code to boot the additional
|
|
; processors in the Corollary Cbus1 based machines.
|
|
|
|
; This consists of two functions which are exactly the
|
|
; same (Cbus1Boot1 & Cbus1Boot2). The calling code
|
|
; determines which one is safe to call (depending on the
|
|
; linker, sometimes both may be ok). The reason for this
|
|
; is that the boot processor fills in the reset vector at
|
|
; 0xFFFFFFF0 for the next processor and that cache line
|
|
; must not be inadvertently flushed before the next processor
|
|
; gets out of reset to see where to go (it's filled in with
|
|
; a real-mode jmp cs:ip). Note that this code is highly
|
|
; dependent on the linker placing all this code contiguous
|
|
; and the hardware architecture of the Corollary L2 caches.
|
|
; unless the system is fully populated, memory will not exist
|
|
; at 0xFFFFFFF0. hence, we must ensure that the cacheline is
|
|
; not evicted until the processor has done the jump!
|
|
|
|
|
|
; the order of Cbus1Boot1, ciboot, and Cbus1Boot2 is critical.
|
|
; Cbus1Boot1 and Cbus1Boot2 must be separated by Cbus1BootCPU;
|
|
; Cbus1Boot1 must be defined before Cbus1Boot2.
|
|
; the size of all three must be less than 4K.
|
|
|
|
; WARNING!!! WARNING!!! WARNING!!!
|
|
|
|
; do not put any routines between Cbus1Boot1 and Cbus1Boot2. there
|
|
; are tricky games being played with the write back caches so
|
|
; that StartVector[] does not get flushed.
|
|
|
|
;
|
|
;Author:
|
|
;
|
|
; Landy Wang (landy@corollary.com) 23-Jun-1993
|
|
;
|
|
;Environment:
|
|
; Kernel mode.
|
|
;
|
|
;--
|
|
|
|
|
|
|
|
.386p
|
|
.xlist
|
|
include hal386.inc
|
|
include callconv.inc ; calling convention macros
|
|
|
|
.list
|
|
|
|
INIT SEGMENT DWORD PUBLIC 'CODE' ; Start 32 bit code
|
|
ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
|
|
|
|
;++
|
|
;
|
|
; VOID
|
|
; Cbus1Boot1 (
|
|
; IN ULONG Processor,
|
|
; IN PQUAD Dest,
|
|
; IN PQUAD Source,
|
|
; IN ULONG ResetAddress,
|
|
; IN ULONG ResetValue
|
|
; )
|
|
;
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; Clear reset on the specified logical processor number, setting his
|
|
; reset vector to point at the specified code location. The Dest
|
|
; generally points at a reset vector, and thus, unless the system is
|
|
; fully populated, memory will not exist at that address. hence, we
|
|
; must ensure that the cacheline is not evicted until the processor
|
|
; has done the jump!
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Processor - Supplies a logical processor number
|
|
;
|
|
; Dest - Supplies the address of the reset vector where the code below
|
|
; will go.
|
|
;
|
|
; Source - Supplies startup code for this processor, currently a 5 byte
|
|
; intrasegment jump, ie: "jmp cs:ip"
|
|
;
|
|
; Note the reset vector length is hardcoded here to 8 bytes. (ie: Dest
|
|
; and Source must point at arrays of 8 bytes each).
|
|
;
|
|
; ResetAddress - Supplies the address to poke to clear reset
|
|
;
|
|
; ResetValue - Supplies the value to poke to clear reset
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;--
|
|
|
|
ProcessorNumber equ dword ptr [ebp+8] ; zero based
|
|
Destination equ dword ptr [ebp+12]
|
|
Source equ dword ptr [ebp+16]
|
|
ResetAddress equ dword ptr [ebp+20]
|
|
ResetValue equ dword ptr [ebp+24]
|
|
|
|
cPublicProc _Cbus1Boot1 ,5
|
|
push ebp
|
|
mov ebp, esp
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
|
|
;
|
|
; set up all variables to be used after the cache line
|
|
; initialization. this is because we want to load up
|
|
; our register variables with these values and avoid
|
|
; memory references. see the comment below.
|
|
;
|
|
|
|
mov eax, PCR[PcStallScaleFactor] ; get per microsecond
|
|
; loop count for the processor
|
|
|
|
mov ecx, 40 ; 40 microsecond stall
|
|
mul ecx ; (eax) = desired loop count
|
|
|
|
mov edx, ResetAddress
|
|
mov ebx, ResetValue
|
|
|
|
mov esi, Source ; point at the source code
|
|
|
|
mov ecx, dword ptr [esi] ; get first dword into a reg
|
|
mov esi, dword ptr [esi+4] ; and 2nd dword into a reg
|
|
|
|
mov edi, Destination
|
|
|
|
;
|
|
; now start filling in the cache line for the processor coming out
|
|
; of reset. no memory references which may flush this cache line
|
|
; can be made after the below fill UNTIL the booting processor
|
|
; has read the line. (the only memory references made here in this
|
|
; critical time period is the code fetching, but our caller has
|
|
; already determined that none of the code in this function could
|
|
; cause the cache line to be flushed).
|
|
;
|
|
|
|
mov dword ptr [edi], ecx ; 1st dword now in the cacheline
|
|
mov dword ptr [edi+4], esi ; and 2nd dword now in
|
|
|
|
;
|
|
; cache line is initialized, we must let it get flushed now, or
|
|
; the additional processor will fly blind.
|
|
;
|
|
|
|
mov byte ptr [edx], bl ; clear reset
|
|
|
|
;
|
|
; wait approximately 40 microseconds, but don't call
|
|
; KeStallExecutionProcessor() as this might flush the
|
|
; cache line prematurely. inline the function instead.
|
|
;
|
|
|
|
align 4
|
|
@@:
|
|
sub eax, 1 ; (eax) = (eax) - 1
|
|
jnz short @b
|
|
|
|
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
mov esp, ebp
|
|
pop ebp
|
|
stdRET _Cbus1Boot1
|
|
|
|
stdENDP _Cbus1Boot1
|
|
|
|
;
|
|
; force enough spacing between the two boot functions so
|
|
; that at least one of them will always be safe to call.
|
|
; currently that would be 16 bytes (the current cache line
|
|
; size), but make it bigger so any of our OEMs will be safe
|
|
; even if they modify the size of the cache line.
|
|
;
|
|
|
|
public _Cbus1Boot1End
|
|
_Cbus1Boot1End label byte
|
|
db 64 dup (?)
|
|
|
|
;++
|
|
;
|
|
; VOID
|
|
; Cbus1Boot2 (
|
|
; IN ULONG Processor,
|
|
; IN PQUAD Dest,
|
|
; IN PQUAD Source,
|
|
; IN ULONG ResetAddress,
|
|
; IN ULONG ResetValue
|
|
; )
|
|
;
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; Clear reset on the specified logical processor number, setting his
|
|
; reset vector to point at the specified code location. The Dest
|
|
; generally points at a reset vector, and thus, unless the system is
|
|
; fully populated, memory will not exist at that address. hence, we
|
|
; must ensure that the cacheline is not evicted until the processor
|
|
; has done the jump!
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Processor - Supplies a logical processor number
|
|
;
|
|
; Dest - Supplies the address of the reset vector where the code below
|
|
; will go.
|
|
;
|
|
; Source - Supplies startup code for this processor, currently a 5 byte
|
|
; intrasegment jump, ie: "jmp cs:ip"
|
|
;
|
|
; Note the reset vector length is hardcoded here to 8 bytes. (ie: Dest
|
|
; and Source must point at arrays of 8 bytes each).
|
|
;
|
|
; ResetAddress - Supplies the address to poke to clear reset
|
|
;
|
|
; ResetValue - Supplies the value to poke to clear reset
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;--
|
|
|
|
cPublicProc _Cbus1Boot2 ,5
|
|
push ebp
|
|
mov ebp, esp
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
|
|
;
|
|
; set up all variables to be used after the cache line
|
|
; initialization. this is because we want to load up
|
|
; our register variables with these values and avoid
|
|
; memory references. see the comment below.
|
|
;
|
|
|
|
mov eax, PCR[PcStallScaleFactor] ; get per microsecond
|
|
; loop count for the processor
|
|
|
|
mov ecx, 40 ; 40 microsecond stall
|
|
mul ecx ; (eax) = desired loop count
|
|
|
|
mov edx, ResetAddress
|
|
mov ebx, ResetValue
|
|
|
|
mov esi, Source ; point at the source code
|
|
|
|
mov ecx, dword ptr [esi] ; get first dword into a reg
|
|
mov esi, dword ptr [esi+4] ; and 2nd dword into a reg
|
|
|
|
mov edi, Destination
|
|
|
|
;
|
|
; now start filling in the cache line for the processor coming out
|
|
; of reset. no memory references which may flush this cache line
|
|
; can be made after the below fill UNTIL the booting processor
|
|
; has read the line. (the only memory references made here in this
|
|
; critical time period is the code fetching, but our caller has
|
|
; already determined that none of the code in this function could
|
|
; cause the cache line to be flushed).
|
|
;
|
|
|
|
mov dword ptr [edi], ecx ; 1st dword now in the cacheline
|
|
mov dword ptr [edi+4], esi ; and 2nd dword now in
|
|
|
|
;
|
|
; cache line is initialized, we must let it get flushed now, or
|
|
; the additional processor will fly blind.
|
|
;
|
|
|
|
mov byte ptr [edx], bl ; clear reset
|
|
|
|
;
|
|
; wait approximately 40 microseconds, but don't call
|
|
; KeStallExecutionProcessor() as this might flush the
|
|
; cache line prematurely. inline the function instead.
|
|
;
|
|
|
|
align 4
|
|
@@:
|
|
sub eax, 1 ; (eax) = (eax) - 1
|
|
jnz short @b
|
|
|
|
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
mov esp, ebp
|
|
pop ebp
|
|
stdRET _Cbus1Boot2
|
|
|
|
stdENDP _Cbus1Boot2
|
|
|
|
INIT ends ; end 32 bit code
|
|
end
|