2305 lines
61 KiB
C
2305 lines
61 KiB
C
/*++
|
||
|
||
Copyright (c) 1995 Digital Equipment Corporation
|
||
|
||
Module Name:
|
||
|
||
ioderr.c
|
||
|
||
Abstract:
|
||
|
||
This module implements error handling functions for the Rawhide
|
||
IOD (CAP and MDP ASICs).
|
||
|
||
Author:
|
||
|
||
Eric Rehm 13-Apr-1995
|
||
|
||
Environment:
|
||
|
||
Kernel mode
|
||
|
||
Revision History:
|
||
|
||
|
||
--*/
|
||
|
||
#include "halp.h"
|
||
//#include "iod.h"
|
||
#include "rawhide.h"
|
||
#include "stdio.h"
|
||
|
||
//
|
||
// Externals and globals.
|
||
//
|
||
|
||
extern PERROR_FRAME PUncorrectableError;
|
||
extern ULONG HalDisablePCIParityChecking;
|
||
ULONG IodCorrectedErrors = 0;
|
||
|
||
//
|
||
// Define the context structure for use by interrupt service routines.
|
||
//
|
||
|
||
typedef BOOLEAN (*PSECOND_LEVEL_DISPATCH)(
|
||
PKINTERRUPT InterruptObject,
|
||
PVOID ServiceContext
|
||
);
|
||
|
||
//
|
||
// The Soft Error interrupt is always turned on for Rawhide. When a
|
||
// Soft Error interrupt occurs, HalpIodSoftErrorInterrupt() must
|
||
// be called to reset the error condition on the offending IOD to
|
||
// insure system integrity.
|
||
//
|
||
// A Correctable Error Driver might also connect to the Soft Error interrupt
|
||
// via the Internal Bus interface. When a Soft Error interrupt occurs,
|
||
// we determine if it is also necessary to dispatch an ISR for the
|
||
// Correctable Error Driver via a boolean.
|
||
//
|
||
|
||
BOOLEAN HalpLogCorrectableErrors = FALSE;
|
||
|
||
//
|
||
// Keep the first time we read the WhoAmI register
|
||
// since it does not always read the same the second time.
|
||
//
|
||
// Zero value indicates that we haven't read WhoAmI yet and that
|
||
// this global variable is not valid.
|
||
//
|
||
// (On machine checks that we dismiss, we must remember to
|
||
// to reset this to zero.)
|
||
//
|
||
|
||
IOD_WHOAMI HalpIodWhoAmIOnError = { 0 };
|
||
|
||
//
|
||
// Function prototypes.
|
||
//
|
||
|
||
VOID
|
||
HalpSetMachineCheckEnables(
|
||
IN BOOLEAN DisableMachineChecks,
|
||
IN BOOLEAN DisableProcessorCorrectables,
|
||
IN BOOLEAN DisableSystemCorrectables
|
||
);
|
||
|
||
VOID
|
||
HalpUpdateMces(
|
||
IN BOOLEAN ClearMachineCheck,
|
||
IN BOOLEAN ClearCorrectableError
|
||
);
|
||
|
||
//
|
||
// Function prototypes for routines not visible outside this module
|
||
//
|
||
|
||
VOID
|
||
HalpBuildIodErrorFrame(
|
||
MC_DEVICE_ID McDeviceId,
|
||
PIOD_ERROR_FRAME IodErrorFrame
|
||
);
|
||
|
||
BOOLEAN
|
||
bFindIodError(
|
||
PMC_DEVICE_ID pMcDeviceId,
|
||
PIOD_CAP_ERR pIodCapErr
|
||
);
|
||
|
||
BOOLEAN
|
||
bHandleFatalIodError(
|
||
MC_DEVICE_ID McDeviceId,
|
||
BOOLEAN bMachineCheck
|
||
);
|
||
|
||
BOOLEAN
|
||
bHandleIsaError(
|
||
MC_DEVICE_ID pMcDeviceId,
|
||
IOD_CAP_ERR IodCapErr
|
||
);
|
||
|
||
VOID
|
||
HalpErrorFrameString(
|
||
PUNCORRECTABLE_ERROR uncorr,
|
||
PUCHAR OutBuffer
|
||
);
|
||
|
||
ULONG
|
||
BuildActiveCpus (
|
||
VOID
|
||
);
|
||
|
||
//
|
||
// Allocate a flag that indicates when a PCI Master Abort is expected.
|
||
// PCI Master Aborts are signaled on configuration reads to non-existent
|
||
// PCI slots. A cardinal value (0-128) indicates that a Master Abort is expected.
|
||
// A value of 0xffffffff indicates that a Master Abort is *not* expected.
|
||
//
|
||
|
||
IOD_EXPECTED_ERROR HalpMasterAbortExpected = {MASTER_ABORT_NOT_EXPECTED, 0x0};
|
||
|
||
|
||
VOID
|
||
HalpInitializeIodMachineChecks(
|
||
IN BOOLEAN ReportCorrectableErrors,
|
||
IN BOOLEAN PciParityChecking
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine initializes machine check handling for a IOD-based
|
||
system by clearing all pending errors in the IOD registers and
|
||
enabling correctable errors according to the callers specification.
|
||
|
||
Arguments:
|
||
|
||
ReportCorrectableErrors - Supplies a boolean value which specifies
|
||
if correctable error reporting should be
|
||
enabled.
|
||
|
||
Return Value:
|
||
|
||
None.
|
||
|
||
--*/
|
||
{
|
||
IOD_CAP_CONTROL IodCapControl;
|
||
IOD_CAP_ERR IodCapError;
|
||
IOD_MDPA_DIAG IodMdpaDiag;
|
||
IOD_MDPB_DIAG IodMdpbDiag;
|
||
IOD_INT_MASK IodIntMask;
|
||
|
||
MC_DEVICE_ID McDeviceId;
|
||
MC_ENUM_CONTEXT mcCtx;
|
||
ULONG numIods;
|
||
BOOLEAN bfoundIod;
|
||
|
||
//
|
||
// Clear any pending error bits in the IOD_CAP_ERR register:
|
||
//
|
||
|
||
IodCapError.all = 0; // Clear all bits
|
||
|
||
IodCapError.Perr = 1; // PCI bus perr detected
|
||
IodCapError.Serr = 1; // PCI bus serr detected
|
||
IodCapError.Mab = 1; // PCI bus master abort detected
|
||
IodCapError.PteInv = 1; // Invalid Pte
|
||
IodCapError.PioOvfl = 1; // Pio Ovfl
|
||
IodCapError.LostMcErr = 1; // Lost error
|
||
IodCapError.McAddrPerr = 1; // MC bus comd/addr parity error
|
||
IodCapError.Nxm = 1; // Non-existent memory error
|
||
IodCapError.CrdA = 1; // Correctable ECC error on MDPA
|
||
IodCapError.CrdB = 1; // Correctable ECC error on MDPB
|
||
IodCapError.RdsA = 1; // Uncorrectable ECC error on MDPA
|
||
IodCapError.RdsA = 1; // Uncorrectable ECC error on MDPA
|
||
|
||
//
|
||
// Intialize enumerator.
|
||
//
|
||
|
||
numIods = HalpMcBusEnumStart ( HalpIodMask, &mcCtx );
|
||
|
||
//
|
||
// Intialize each Iod
|
||
//
|
||
|
||
while ( bfoundIod = HalpMcBusEnum( &mcCtx ) ) {
|
||
|
||
McDeviceId = mcCtx.McDeviceId;
|
||
|
||
//
|
||
// Initialize IOD_CAP_ERR
|
||
//
|
||
|
||
WRITE_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->CapErr,
|
||
IodCapError.all );
|
||
|
||
//
|
||
// Set the Iod error enable bits in the IOD_CAP_CTRL and
|
||
// IOD_MDPA/B_DIAG registers. The configuration bits in the IOD
|
||
// will be left as set by the Extended SROM, with the few
|
||
// exceptions documented below.
|
||
//
|
||
|
||
IodCapControl.all = READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_GENERAL_CSRS)(IOD_GENERAL_CSRS_QVA))->CapCtrl );
|
||
|
||
#if 0 // CAP/MDP Bug
|
||
IodMdpaDiag.all =
|
||
READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->MdpaDiag );
|
||
|
||
IodMdpbDiag.all =
|
||
READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->MdpbDiag );
|
||
#else
|
||
|
||
//
|
||
// Clear Mdp Diagnotic Check Registers....
|
||
//
|
||
|
||
IodMdpaDiag.all = 0;
|
||
IodMdpbDiag.all = 0;
|
||
|
||
//
|
||
// Enable ECC checking on all MC Bus transactions
|
||
//
|
||
|
||
IodMdpaDiag.EccCkEn = 1;
|
||
IodMdpbDiag.EccCkEn = 1;
|
||
|
||
#endif
|
||
|
||
//
|
||
// Disable/enable PCI parity checking as requested
|
||
//
|
||
|
||
if (PciParityChecking == FALSE) {
|
||
|
||
IodCapControl.PciAddrPe= 0; // Do *not* check PCI address parity
|
||
IodMdpaDiag.ParCkEn = 0; // Do *not* check PCI data parity
|
||
IodMdpbDiag.ParCkEn = 0; // Do *not* check PCI data parity
|
||
|
||
} else {
|
||
|
||
IodCapControl.PciAddrPe= PciParityChecking;
|
||
IodMdpaDiag.ParCkEn = PciParityChecking;
|
||
IodMdpbDiag.ParCkEn = PciParityChecking;
|
||
|
||
}
|
||
|
||
|
||
//
|
||
// Disable McBus NXM's
|
||
//
|
||
// (If enabled, accesses to non-existent McBus device will cause an
|
||
// EV5 fill error. Non existant CSRs will return all 0s most of the time
|
||
// and not fill error.)
|
||
//
|
||
|
||
IodCapControl.McNxmEn = 0;
|
||
|
||
//
|
||
// Disable monitoring of McBus bystander errors.
|
||
//
|
||
// That means the IOD will not capture the failing address in the event of
|
||
// an MC bus NXM. It has no effect on what the IOD does in the event of a
|
||
// PCI NXM (which causes a Master Abort).
|
||
//
|
||
// Regardless of how McBusMonEn PCI PERR, SERR, MAB, and PTE_INV
|
||
// will only show up in IOD CAP_ERR of the participant in the transaction.
|
||
//
|
||
// If McBusMonEn is set, there can be a difference between the bystander CAP_ERR
|
||
// state and the participant CAP_ERR state (as per Sam Duncan, 5/3/95)
|
||
// shows up in an unlikely situation:
|
||
// "Cache single bit or double bit error: read is dirty in a cache
|
||
// and the fill has an ecc error, don't want to indite a memory for this
|
||
// (very unlikely) error."
|
||
// Thus, we choose not to be able to correctly detect this situation in
|
||
// order to make machine check and error handling easier, i.e., we
|
||
// always only need to clear only one IOD's CAP_ERROR.
|
||
//
|
||
|
||
IodCapControl.McBusMonEn= 0;
|
||
|
||
|
||
WRITE_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_GENERAL_CSRS)(IOD_GENERAL_CSRS_QVA))->CapCtrl,
|
||
IodCapControl.all );
|
||
|
||
WRITE_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->MdpaDiag,
|
||
IodMdpaDiag.all );
|
||
|
||
WRITE_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->MdpbDiag,
|
||
IodMdpbDiag.all );
|
||
|
||
|
||
//
|
||
// Soft and Hard Error handling
|
||
//
|
||
// ecrfix - IntMask0 on Bus 0 only.
|
||
|
||
IodIntMask.all = READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_INT_CSRS)(IOD_INT_CSRS_QVA))->IntMask0 );
|
||
|
||
IodIntMask.SoftErr = (ReportCorrectableErrors == TRUE);
|
||
IodIntMask.HardErr = 0; // ecrfix - Mask Hard Errors for now
|
||
|
||
WRITE_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_INT_CSRS)(IOD_INT_CSRS_QVA))->IntMask0,
|
||
IodIntMask.all );
|
||
|
||
} // while ( HalpMcBusEnum ( &mcCtx ) )
|
||
|
||
//
|
||
// Set the machine check enables within the EV5.
|
||
//
|
||
|
||
if( ReportCorrectableErrors == TRUE ){
|
||
HalpSetMachineCheckEnables( FALSE, FALSE, FALSE );
|
||
} else {
|
||
HalpSetMachineCheckEnables( FALSE, TRUE, TRUE );
|
||
}
|
||
|
||
return;
|
||
|
||
}
|
||
|
||
#define MAX_ERROR_STRING 128
|
||
|
||
|
||
BOOLEAN
|
||
HalpIodUncorrectableError(
|
||
PMC_DEVICE_ID pMcDeviceId
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Read the IOD error register and determine if an uncorrectable error
|
||
is latched in the error bits.
|
||
|
||
Arguments:
|
||
|
||
None.
|
||
|
||
Return Value:
|
||
|
||
TRUE is returned if an uncorrectable error has been detected. FALSE
|
||
is returned otherwise.
|
||
|
||
--*/
|
||
{
|
||
UCHAR OutBuffer[ MAX_ERROR_STRING ];
|
||
IOD_WHOAMI IodWhoAmI;
|
||
IOD_CAP_ERR IodCapErr;
|
||
|
||
//
|
||
// Check for a duplicate tag parity error on this (in the Smalltalk
|
||
// sense) processor.
|
||
//
|
||
|
||
IodWhoAmI.all = HalpReadWhoAmI();
|
||
HalpIodWhoAmIOnError.all = IodWhoAmI.all;
|
||
|
||
if ( IodWhoAmI.CpuInfo & CACHED_CPU_DTAG_PARITY_ERROR ) {
|
||
|
||
pMcDeviceId->all = IodWhoAmI.Devid;
|
||
|
||
return TRUE;
|
||
|
||
} else {
|
||
|
||
//
|
||
// None of the uncorrectable error conditions were detected.
|
||
//
|
||
|
||
return FALSE;
|
||
}
|
||
|
||
}
|
||
|
||
VOID
|
||
HalpBuildIodErrorFrame(
|
||
MC_DEVICE_ID McDeviceId,
|
||
PIOD_ERROR_FRAME IodErrorFrame
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This function reports and interprets a fatal hardware error
|
||
detected by the IOD chipset. It is assumed that HalGetDisplayOwnership()
|
||
has been called prior to this function.
|
||
|
||
Arguments:
|
||
|
||
McDevid - Supplies the MC Bus Device ID of the IOD
|
||
IodErrorFrame - Supplies a pointer to an IOD_ERROR_FRAME
|
||
|
||
Return Value:
|
||
|
||
None.
|
||
|
||
--*/
|
||
{
|
||
//
|
||
// Clear it first, since caller may reuse the IodErrorFrame
|
||
//
|
||
|
||
RtlZeroMemory(IodErrorFrame, sizeof(IOD_ERROR_FRAME));
|
||
|
||
//
|
||
// Everything is valid
|
||
//
|
||
|
||
IodErrorFrame->ValidBits.all = 0xffffffff; // all valid
|
||
|
||
|
||
//
|
||
// Read the General registers
|
||
//
|
||
|
||
IodErrorFrame->BaseAddress = IOD_IO_SPACE_START |
|
||
IOD_SPARSE_CSR_OFFSET |
|
||
MCDEVID_TO_PHYS_ADDR(McDeviceId.all);
|
||
|
||
IodErrorFrame->WhoAmI = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_GENERAL_CSRS)(IOD_GENERAL_CSRS_QVA))->WhoAmI
|
||
);
|
||
|
||
IodErrorFrame->PciRevision = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_GENERAL_CSRS)(IOD_GENERAL_CSRS_QVA))->PciRevision
|
||
);
|
||
|
||
IodErrorFrame->CapCtrl = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_GENERAL_CSRS)(IOD_GENERAL_CSRS_QVA))->CapCtrl
|
||
);
|
||
|
||
IodErrorFrame->HaeMem = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_GENERAL_CSRS)(IOD_GENERAL_CSRS_QVA))->HaeMem
|
||
);
|
||
|
||
IodErrorFrame->HaeIo = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_GENERAL_CSRS)(IOD_GENERAL_CSRS_QVA))->HaeIo
|
||
);
|
||
|
||
//
|
||
// Read Interrupt Control and Status Registers
|
||
//
|
||
|
||
IodErrorFrame->IntCtrl = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_INT_CSRS)(IOD_INT_CSRS_QVA))->IntCtrl
|
||
);
|
||
|
||
IodErrorFrame->IntReq = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_INT_CSRS)(IOD_INT_CSRS_QVA))->IntReq
|
||
);
|
||
|
||
IodErrorFrame->IntMask0 = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_INT_CSRS)(IOD_INT_CSRS_QVA))->IntMask0
|
||
);
|
||
|
||
IodErrorFrame->IntMask1 = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_INT_CSRS)(IOD_INT_CSRS_QVA))->IntMask1
|
||
);
|
||
|
||
//
|
||
// Read the rest of the error registers and then unlock them by
|
||
// writing to CAP_ERR
|
||
//
|
||
|
||
IodErrorFrame->CapErr = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->CapErr
|
||
);
|
||
|
||
IodErrorFrame->PciErr1 = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->PciErr1
|
||
);
|
||
|
||
IodErrorFrame->McErr0 = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->McErr0
|
||
);
|
||
|
||
IodErrorFrame->McErr1 = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->McErr1
|
||
);
|
||
#if 0 // CAP/MDP Bug
|
||
IodErrorFrame->MdpaStat = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->MdpaStat
|
||
);
|
||
|
||
IodErrorFrame->MdpaSyn = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->MdpaSyn
|
||
);
|
||
|
||
IodErrorFrame->MdpbStat = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->MdpbStat
|
||
);
|
||
|
||
IodErrorFrame->MdpbSyn = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->MdpbSyn
|
||
);
|
||
#else
|
||
|
||
//
|
||
// CAP/MDP Bug - these registers are not valid.
|
||
//
|
||
|
||
IodErrorFrame->ValidBits.MdpaStatValid = 0;
|
||
IodErrorFrame->ValidBits.MdpbStatValid = 0;
|
||
IodErrorFrame->ValidBits.MdpaSynValid = 0;
|
||
IodErrorFrame->ValidBits.MdpbSynValid = 0;
|
||
|
||
#endif // CAP/MDP Bug
|
||
|
||
}
|
||
|
||
VOID
|
||
HalpIodReportFatalError(
|
||
MC_DEVICE_ID ErrorMcDeviceId
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This function reports and interprets a fatal hardware error
|
||
detected by the IOD chipset. It is assumed that HalGetDisplayOwnership()
|
||
has been called prior to this function.
|
||
|
||
Arguments:
|
||
|
||
ErrorMcDeviceId - Supplies the MC Bus Device ID of the IOD
|
||
where the error was found
|
||
|
||
- In the case of a Duplicate Tag Parity Error, supplies
|
||
the CPU that took the error. Note, in this case
|
||
the ErrorMcDeviceId will never match a IOD McDeviceId.
|
||
No MC Bus snapshot is present in this case.
|
||
|
||
Return Value:
|
||
|
||
None.
|
||
|
||
--*/
|
||
{
|
||
UCHAR OutBuffer[ MAX_ERROR_STRING ];
|
||
IOD_ERROR_FRAME IodErrorFrame, *pCurrentIodErrorFrame;
|
||
MC_ENUM_CONTEXT mcCtx;
|
||
MC_DEVICE_ID McDeviceId;
|
||
ULONG numIods;
|
||
BOOLEAN bfoundIod;
|
||
|
||
PUNCORRECTABLE_ERROR uncorr = NULL;
|
||
PRAWHIDE_UNCORRECTABLE_FRAME rawerr = NULL;
|
||
PEXTENDED_ERROR PExtErr;
|
||
|
||
//
|
||
// Do we have an uncorrectable error frame?
|
||
//
|
||
|
||
if (PUncorrectableError) {
|
||
uncorr = (PUNCORRECTABLE_ERROR)
|
||
&PUncorrectableError->UncorrectableFrame;
|
||
rawerr = (PRAWHIDE_UNCORRECTABLE_FRAME)
|
||
PUncorrectableError->UncorrectableFrame.RawSystemInformation;
|
||
PExtErr = &PUncorrectableError->UncorrectableFrame.ErrorInformation;
|
||
}
|
||
|
||
//
|
||
// Validate the ProcessorInfo portion of the Error Frame.
|
||
//
|
||
|
||
if (uncorr) {
|
||
uncorr->Flags.ProcessorInformationValid = 1;
|
||
HalpGetProcessorInfo(&uncorr->ReportingProcessor);
|
||
|
||
//
|
||
// Initialize our "error string accumulator"
|
||
//
|
||
|
||
HalpErrorFrameString( uncorr, NULL );
|
||
|
||
}
|
||
|
||
//
|
||
// Validate the Rawhide Uncorrectable Frame
|
||
// (Common RCUD Header was already set up.)
|
||
//
|
||
|
||
if (rawerr) {
|
||
rawerr->Revision = RAWHIDE_UNCORRECTABLE_FRAME_REVISION;
|
||
rawerr->WhoAmI = HalpIodWhoAmIOnError.all;
|
||
rawerr->ErrorSubpacketFlags.all = 0;
|
||
rawerr->CudHeader.ActiveCpus = BuildActiveCpus();
|
||
|
||
}
|
||
|
||
//
|
||
// Handle cached CPU duplicate tag parity error.
|
||
// (Note that a DTAG parity error implies that we don't
|
||
// take an MC Bus Snapshot.
|
||
//
|
||
|
||
if ( HalpIodWhoAmIOnError.CpuInfo & CACHED_CPU_DTAG_PARITY_ERROR ) {
|
||
|
||
sprintf( OutBuffer, "Duplicate Tag Parity Error on CPU %x\n",
|
||
MCDEVID_TO_PHYS_CPU(HalpIodWhoAmIOnError.McDevId.all) );
|
||
|
||
HalDisplayString( OutBuffer );
|
||
#if HALDBG
|
||
DbgPrint( "Duplicate Tag Parity Error on CPU (%d, %d)\n",
|
||
HalpIodWhoAmIOnError.McDevId.Gid, HalpIodWhoAmIOnError.McDevId.Mid);
|
||
#endif
|
||
HalpErrorFrameString( uncorr, OutBuffer );
|
||
|
||
//
|
||
// OK. This is tedious:
|
||
// * Error is in memory space and is the system (external) cache.
|
||
// * And we know this is the L3 cache.
|
||
// * And we'll subvert the "CacheBoard" to squirrel away the
|
||
// Cached CPU Revision Info and Cache size.
|
||
//
|
||
|
||
uncorr->Flags.AddressSpace = MEMORY_SPACE;
|
||
uncorr->Flags.ExtendedErrorValid = 1;
|
||
uncorr->Flags.MemoryErrorSource = SYSTEM_CACHE;
|
||
PExtErr->CacheError.Flags.CacheLevelValid = 1;
|
||
PExtErr->CacheError.Flags.CacheBoardValid = 1;
|
||
PExtErr->CacheError.Flags.CacheSimmValid = 0;
|
||
PExtErr->CacheError.CacheLevel = 3;
|
||
PExtErr->CacheError.CacheBoardNumber = HalpIodWhoAmIOnError.CpuInfo;
|
||
|
||
return;
|
||
}
|
||
|
||
//
|
||
// Handle cached CPU fill error.
|
||
// Since this could be caused by an MC Bus or PCI error,
|
||
// we continue to create an MC Bus snapshot.
|
||
//
|
||
|
||
if ( HalpIodWhoAmIOnError.CpuInfo & CACHED_CPU_FILL_ERROR ) {
|
||
|
||
sprintf( OutBuffer, "Fill Error on CPU %x\n",
|
||
MCDEVID_TO_PHYS_CPU(HalpIodWhoAmIOnError.McDevId.all) );
|
||
|
||
HalDisplayString( OutBuffer );
|
||
#if HALDBG
|
||
DbgPrint( "Fill Error on CPU (%d, %d)\n",
|
||
HalpIodWhoAmIOnError.McDevId.Gid, HalpIodWhoAmIOnError.McDevId.Mid);
|
||
#endif
|
||
HalpErrorFrameString( uncorr, OutBuffer );
|
||
|
||
//
|
||
// * WhoAmI tells us Addr<38:33> of reference causing error.
|
||
// * However, PciErr1 and/or McErr0/McErr1 give us more bits,
|
||
// so the data entered here my get overwritten later.
|
||
//
|
||
|
||
uncorr->Flags.PhysicalAddressValid = 1;
|
||
uncorr->PhysicalAddress =
|
||
( ((ULONGLONG)(HalpIodWhoAmIOnError.CpuInfo & 0x3f)) << 33 );
|
||
|
||
}
|
||
|
||
//
|
||
// Validate the MCBusSnapshot header.
|
||
//
|
||
|
||
if (rawerr) {
|
||
rawerr->ErrorSubpacketFlags.McBusPresent = 1;
|
||
rawerr->McBusSnapshot.ReportingCpuBaseAddr =
|
||
IOD_IO_SPACE_START |
|
||
MCDEVID_TO_PHYS_ADDR( HalpIodWhoAmIOnError.Devid );
|
||
pCurrentIodErrorFrame = (PIOD_ERROR_FRAME) (rawerr + 1);
|
||
}
|
||
|
||
//
|
||
// Intialize enumerator.
|
||
//
|
||
|
||
numIods = HalpMcBusEnumStart ( HalpIodMask, &mcCtx );
|
||
ASSERT( numIods == HalpNumberOfIods);
|
||
|
||
//
|
||
// Gather data from each Iod
|
||
//
|
||
|
||
while ( bfoundIod = HalpMcBusEnum( &mcCtx ) ) {
|
||
|
||
McDeviceId.all = mcCtx.McDeviceId.all;
|
||
|
||
HalpBuildIodErrorFrame( McDeviceId, &IodErrorFrame );
|
||
|
||
//
|
||
// Fill in IOD_ERROR_FRAME portion of the RAWHIDE_UNCORRECTABLE_FRAME
|
||
//
|
||
|
||
if (rawerr) {
|
||
|
||
RtlCopyMemory( pCurrentIodErrorFrame,
|
||
&IodErrorFrame,
|
||
sizeof(IOD_ERROR_FRAME));
|
||
|
||
pCurrentIodErrorFrame++;
|
||
}
|
||
|
||
//
|
||
// If this is the IOD where we found the error
|
||
// a. clear the error
|
||
// b. complete the uncorrectable error frame processing
|
||
// c. Display an interpretation of the error to the screen
|
||
//
|
||
|
||
if (ErrorMcDeviceId.all == McDeviceId.all) {
|
||
|
||
// ecrfix Put below into HalpInterpretIodError(McDeviceId, IodErrorFrame) ???
|
||
IOD_WHOAMI IodWhoAmI;
|
||
IOD_CAP_CONTROL IodCapCtrl;
|
||
IOD_CAP_ERR IodCapErr;
|
||
IOD_PCI_ERR1 IodPciErr1;
|
||
IOD_MC_ERR0 IodMcErr0;
|
||
IOD_MC_ERR1 IodMcErr1;
|
||
IOD_MDPA_STAT IodMdpaStat;
|
||
IOD_MDPB_STAT IodMdpbStat;
|
||
ULONG HwBusNumber = ErrorMcDeviceId.Mid & 0x3;
|
||
|
||
//
|
||
// Copy error frame variables in locals for bitfield access
|
||
//
|
||
|
||
IodWhoAmI.all = IodErrorFrame.WhoAmI;
|
||
IodCapCtrl.all = IodErrorFrame.CapCtrl;
|
||
IodCapErr.all = IodErrorFrame.CapErr;
|
||
IodPciErr1.PciAddress = IodErrorFrame.PciErr1;
|
||
IodMcErr0.all = IodErrorFrame.McErr0;
|
||
IodMcErr1.all = IodErrorFrame.McErr1;
|
||
|
||
#if 0 // CAP/MDP Bug
|
||
IodMdpaStat.all = IodErrorFrame.MdpaStat;
|
||
IodMdpbStat.all = IodErrorFrame.MdpbStat;
|
||
IodMdpaSyn.all = IodErrorFrame.MdpaSyn;
|
||
IodMdpbSyn.all = IodErrorFrame.MdpbSyn;
|
||
#else
|
||
IodMdpaStat.all = 0xffffffff;
|
||
IodMdpbStat.all = 0xffffffff;
|
||
#endif // CAP/MDP Bug
|
||
|
||
|
||
|
||
//
|
||
// Clear state in MDPA and MDPB before clearing CAP_ERR
|
||
//
|
||
|
||
|
||
WRITE_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->MdpaStat,
|
||
IodErrorFrame.MdpaStat
|
||
);
|
||
|
||
WRITE_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->MdpbStat,
|
||
IodErrorFrame.MdpbStat
|
||
);
|
||
|
||
WRITE_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->CapErr,
|
||
IodErrorFrame.CapErr
|
||
);
|
||
|
||
sprintf( OutBuffer,
|
||
"IOD MC_DEVICE_ID : (%x, %x) CAP_CTRL : %08x CAP_ERR : %08x\n",
|
||
McDeviceId.Gid, McDeviceId.Mid,
|
||
IodCapCtrl.all,
|
||
IodCapErr.all );
|
||
|
||
HalDisplayString( OutBuffer );
|
||
#if HALDBG
|
||
DbgPrint( OutBuffer );
|
||
#endif
|
||
|
||
sprintf( OutBuffer,
|
||
"PCI_ERR1 %08x MC_ERR0 : %08x MC_ERR1 : %08x\n",
|
||
IodPciErr1.PciAddress,
|
||
IodMcErr0.all,
|
||
IodMcErr1.all );
|
||
|
||
HalDisplayString( OutBuffer );
|
||
#if HALDBG
|
||
DbgPrint( OutBuffer );
|
||
#endif
|
||
|
||
#if 0 // CAP/MDP Bug
|
||
sprintf( OutBuffer,
|
||
"MDPA_STAT : %08x MDPA_SYN : %08x MDPB_STAT : %08x MDPB_SYN : %08x\n",
|
||
IodMdpaStat.all,
|
||
IodMdpaSyn.all,
|
||
IodMdpbStat.all,
|
||
IodMdpbSyn.all );
|
||
HalDisplayString( OutBuffer );
|
||
#if HALDBG
|
||
DbgPrint( OutBuffer );
|
||
#endif
|
||
#endif
|
||
|
||
//
|
||
// If no valid error then no interpretation.
|
||
//
|
||
|
||
if (( IodCapErr.PciErrValid == 0 ) && ( IodCapErr.McErrValid == 0 ) ){
|
||
|
||
return; // No IOD error detected
|
||
|
||
}
|
||
|
||
//
|
||
// Interpret any detected errors:
|
||
//
|
||
|
||
if (IodCapErr.McErrValid == 1) {
|
||
|
||
if ( IodMcErr1.Dirty != 1 ) {
|
||
sprintf( OutBuffer,
|
||
"MC Bus Error, Bus Master=(%x,%x)\n",
|
||
|
||
( ( IodMcErr1.DevId & 0x38) >> 3 ),
|
||
( IodMcErr1.DevId & 0x07)
|
||
);
|
||
} else {
|
||
|
||
sprintf( OutBuffer,
|
||
"MC bus error on a read/dirty transaction\n"
|
||
);
|
||
}
|
||
|
||
|
||
//
|
||
// Output the detected error message:
|
||
//
|
||
|
||
HalDisplayString( OutBuffer );
|
||
#if HALDBG
|
||
DbgPrint( OutBuffer );
|
||
#endif
|
||
HalpErrorFrameString( uncorr, OutBuffer);
|
||
|
||
|
||
sprintf( OutBuffer,
|
||
"IOD Addr=%x%x, Cmd=%x\n",
|
||
IodMcErr1.Addr39_32, // bits 39:32
|
||
IodMcErr0.Addr, // bits 31:4
|
||
IodMcErr1.McCmd
|
||
);
|
||
|
||
//
|
||
// Output the detected error message:
|
||
//
|
||
|
||
HalDisplayString( OutBuffer );
|
||
#if HALDBG
|
||
DbgPrint( OutBuffer );
|
||
#endif
|
||
HalpErrorFrameString( uncorr, OutBuffer);
|
||
|
||
//
|
||
// Interpret specific MC bus error
|
||
//
|
||
|
||
uncorr->Flags.PhysicalAddressValid = 1;
|
||
uncorr->PhysicalAddress = (
|
||
(((ULONGLONG)IodMcErr1.Addr39_32) << 32) |
|
||
((ULONGLONG)IodMcErr0.Addr) );
|
||
|
||
//
|
||
// McAddr<39> indicates whether this was a
|
||
// memory or I/O transaction.
|
||
//
|
||
|
||
if ( (IodMcErr1.Addr39_32 & 0x80) == 1) {
|
||
uncorr->Flags.AddressSpace = IO_SPACE;
|
||
} else {
|
||
uncorr->Flags.AddressSpace = MEMORY_SPACE;
|
||
}
|
||
|
||
if ( IodCapErr.PioOvfl == 1 ){
|
||
|
||
sprintf( OutBuffer,
|
||
"IOD PIO Overflow, PendNumb=%x\n",
|
||
IodCapCtrl.PendNum
|
||
);
|
||
|
||
} else if ( IodCapErr.McAddrPerr == 1 ){
|
||
|
||
sprintf( OutBuffer,
|
||
"MC bus parity error\n"
|
||
);
|
||
|
||
} else if ( IodCapErr.Nxm == 1 ){
|
||
|
||
sprintf( OutBuffer,
|
||
"MC bus NXM\n"
|
||
);
|
||
|
||
} else if ( IodCapErr.CrdA == 1 ){
|
||
|
||
sprintf( OutBuffer,
|
||
"IOD Correctable ECC error in MDPA\n"
|
||
);
|
||
|
||
} else if ( IodCapErr.CrdB == 1 ){
|
||
|
||
sprintf( OutBuffer,
|
||
"IOD Correctable ECC error in MDPB\n"
|
||
);
|
||
|
||
} else if ( IodCapErr.RdsA == 1 ){
|
||
|
||
sprintf( OutBuffer,
|
||
"IOD Uncorrectable ECC error in MDPA\n"
|
||
);
|
||
|
||
} else if ( IodCapErr.RdsB == 1 ){
|
||
|
||
sprintf( OutBuffer,
|
||
"IOD Uncorrectable ECC error in MDPB\n"
|
||
);
|
||
|
||
}
|
||
|
||
//
|
||
// Output the detected error message:
|
||
//
|
||
|
||
HalDisplayString( OutBuffer );
|
||
#if HALDBG
|
||
DbgPrint( OutBuffer );
|
||
#endif
|
||
HalpErrorFrameString( uncorr, OutBuffer);
|
||
|
||
}
|
||
|
||
if ( IodCapErr.PciErrValid == 1 ){
|
||
|
||
//
|
||
// Interpret specific PCI bus error
|
||
//
|
||
|
||
uncorr->Flags.AddressSpace = IO_SPACE;
|
||
uncorr->Flags.PhysicalAddressValid = 1;
|
||
uncorr->PhysicalAddress = IOD_IO_SPACE_START |
|
||
MCDEVID_TO_PHYS_ADDR(IodWhoAmI.McDevId.all) |
|
||
IodPciErr1.PciAddress << IO_BIT_SHIFT;
|
||
|
||
uncorr->Flags.ExtendedErrorValid = 1;
|
||
PExtErr->IoError.Interface = PCIBus;
|
||
PExtErr->IoError.BusNumber = HwBusNumber;
|
||
PExtErr->IoError.BusAddress.LowPart = IodPciErr1.PciAddress;
|
||
|
||
if ( IodCapErr.Perr == 1 ){
|
||
sprintf( OutBuffer,
|
||
"PERR detected on PCI-%d, Addr=%x\n",
|
||
HwBusNumber,
|
||
IodPciErr1.PciAddress
|
||
);
|
||
|
||
} else if ( IodCapErr.Serr == 1 ){
|
||
|
||
sprintf( OutBuffer,
|
||
"SERR detected on PCI-%d, Addr=%x\n",
|
||
HwBusNumber,
|
||
IodPciErr1.PciAddress
|
||
);
|
||
|
||
} else if ( IodCapErr.Mab == 1 ){
|
||
|
||
sprintf( OutBuffer,
|
||
"Master Abort on PCI-%d, Addr=%x\n",
|
||
HwBusNumber,
|
||
IodPciErr1.PciAddress
|
||
);
|
||
|
||
} else if ( IodCapErr.PteInv == 1 ){
|
||
|
||
sprintf( OutBuffer,
|
||
"Invalid Scatter/Gather PTE on PCI-%d, Addr=%x\n",
|
||
HwBusNumber,
|
||
IodPciErr1.PciAddress
|
||
);
|
||
}
|
||
|
||
//
|
||
// Output the detected error message:
|
||
//
|
||
|
||
HalDisplayString( OutBuffer );
|
||
#if HALDBG
|
||
DbgPrint( OutBuffer );
|
||
#endif
|
||
HalpErrorFrameString( uncorr, OutBuffer);
|
||
|
||
}
|
||
|
||
//
|
||
// Check for lost errors and output message if any occurred:
|
||
//
|
||
|
||
if ( IodCapErr.LostMcErr == 1 ){
|
||
HalDisplayString("IOD Lost errors were detected\n");
|
||
#if HALDBG
|
||
DbgPrint("IOD Lost errors were detected\n");
|
||
#endif
|
||
HalpErrorFrameString(uncorr, "IOD Lost errors were detected\n");
|
||
}
|
||
|
||
} // if (ErrorMcDeviceID == McDeviceId)
|
||
|
||
} // while (bfoundIod = HalpMcBusEnum)
|
||
|
||
return; // Fatal error detected
|
||
}
|
||
|
||
|
||
BOOLEAN
|
||
HalpIodMachineCheck(
|
||
IN PEXCEPTION_RECORD ExceptionRecord,
|
||
IN PKEXCEPTION_FRAME ExceptionFrame,
|
||
IN PKTRAP_FRAME TrapFrame
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This routine is given control when an hard error is acknowledged
|
||
by the IOD chipset. The routine is given the chance to
|
||
correct and dismiss the error.
|
||
|
||
Arguments:
|
||
|
||
ExceptionRecord - Supplies a pointer to the exception record generated
|
||
at the point of the exception.
|
||
|
||
ExceptionFrame - Supplies a pointer to the exception frame generated
|
||
at the point of the exception.
|
||
|
||
TrapFrame - Supplies a pointer to the trap frame generated
|
||
at the point of the exception.
|
||
|
||
Return Value:
|
||
|
||
TRUE is returned if the machine check has been handled and dismissed -
|
||
indicating that execution can continue. FALSE is return otherwise.
|
||
|
||
--*/
|
||
{
|
||
IOD_CAP_ERR IodCapErr;
|
||
IOD_CAP_ERR IodCapErrMask;
|
||
IOD_MC_ERR1 IodMcErr1;
|
||
IOD_WHOAMI IodWhoAmI;
|
||
MC_DEVICE_ID McDeviceId;
|
||
BOOLEAN ExpectedMchk;
|
||
BOOLEAN ExpectedMcAddrPerr;
|
||
BOOLEAN PciMemReadMchk;
|
||
BOOLEAN bfoundIod;
|
||
|
||
//
|
||
// We don't expect a machine check yet...
|
||
//
|
||
|
||
ExpectedMchk = FALSE;
|
||
ExpectedMcAddrPerr = FALSE;
|
||
|
||
//
|
||
// Make sure any error due to 2Mb/4Mb Cached CUD bug is latched.
|
||
//
|
||
// At this point, WhoAmI may indicate the symptoms of a fill_error
|
||
// and CUD cache size is not available. We'll read it again when
|
||
// we need to know the Cache size. However, we save he here so we
|
||
// can figure out if this was a fill error or not.
|
||
//
|
||
|
||
HalpIodWhoAmIOnError.all = HalpReadWhoAmI();
|
||
|
||
//
|
||
// Where do we look for the error symptoms?
|
||
//
|
||
// 1. If we expected this machine check, then we know which
|
||
// IOD to check.
|
||
// 2. If we didn't expect this machine check, find the IOD that
|
||
// generated the error.
|
||
//
|
||
|
||
//
|
||
// For an expected machine check, HalpMasterAbortExpected will
|
||
// contain the processor number and address of a PCI config
|
||
// space read. CAP_ERR will indicate a MasterAbort.
|
||
//
|
||
|
||
if( HalpMasterAbortExpected.Number == (ULONG)KeGetCurrentProcessorNumber() ) {
|
||
|
||
//
|
||
// Determine expected IOD from the address of the PCI config read
|
||
//
|
||
|
||
McDeviceId.all = MCDEVID_FROM_PHYS_ADDR(HalpMasterAbortExpected.Addr);
|
||
|
||
//
|
||
// Now get the Bcache size information.
|
||
//
|
||
|
||
IodWhoAmI.all = READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_GENERAL_CSRS)(IOD_GENERAL_CSRS_QVA))->WhoAmI);
|
||
|
||
//
|
||
//
|
||
// Make sure there is a Master abort on this IOD
|
||
//
|
||
|
||
IodCapErr.all = READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->CapErr );
|
||
|
||
|
||
if( IodCapErr.Mab == 1 ) {
|
||
ExpectedMchk = TRUE;
|
||
|
||
//
|
||
// If 2Mb or 4 Mb cached CUD, and we may get an MCbus address parity
|
||
// error with MC command signature in MC_ERR1 equal to zero (cached
|
||
// CPU idle transaction). Also dismiss this error that's the result
|
||
// of the cached 2Mb/4Mb cached CPU VCTY bug.
|
||
//
|
||
|
||
IodMcErr1.all = READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->McErr1 );
|
||
|
||
if ((IodWhoAmI.CpuInfo & 0x7) && // Cached CPU?
|
||
IodCapErr.McAddrPerr && // McAddrPerr?
|
||
(IodMcErr1.McCmd == 0) ) { // McCmd is zero?
|
||
|
||
ExpectedMcAddrPerr = TRUE; // All yes, then dismiss it!
|
||
}
|
||
}
|
||
|
||
#if HALDBG
|
||
DbgPrint( "Expected Mchk (Mab) on IOD (%x, %x), Processor number %x\n",
|
||
McDeviceId.Gid,
|
||
McDeviceId.Mid,
|
||
HalpMasterAbortExpected.Number);
|
||
#endif //HALDBG
|
||
}
|
||
|
||
//
|
||
// If this isn't the machine check we expected, then
|
||
// we must find the IOD that took the error.
|
||
//
|
||
|
||
if (!ExpectedMchk) {
|
||
|
||
bfoundIod = bFindIodError( &McDeviceId, &IodCapErr );
|
||
|
||
//
|
||
// Check that we found an IOD that has a valid PCI or MC error.
|
||
// If it is not this is a pretty weird (fatal???) condition.
|
||
// For now, we'll just go return TRUE.
|
||
//
|
||
// ecrfix - should we check the error interrupts? probably not...
|
||
//
|
||
|
||
if( !bfoundIod ) {
|
||
|
||
#if HALDBG
|
||
DbgPrint( "HalpIodMachineCheck called but no PCI or MC error found\n");
|
||
#endif
|
||
return (TRUE);
|
||
}
|
||
|
||
|
||
#if 0 // HALDBG
|
||
DbgPrint( "Unexpected Mchk on IOD (%x, %x)\n",
|
||
McDeviceId.Gid,
|
||
McDeviceId.Mid );
|
||
#endif //HALDBG
|
||
|
||
//
|
||
// Case: Uexpected Master Abort, e.g. a PCI memory or I/O space read to
|
||
// legacy ISA space (0 - 1 Mb) on PCI-1,2,3.
|
||
//
|
||
|
||
if ( bHandleIsaError( McDeviceId, IodCapErr) ) {
|
||
return TRUE;
|
||
}
|
||
|
||
}
|
||
|
||
//
|
||
// Case: PCI or MC Bus error other than master abort
|
||
//
|
||
// At this point we have either:
|
||
// (a) an expected PCI Master Abort (ExpectedMchk == TRUE), or
|
||
// (b) an unexpected PCI or MC Bus error.
|
||
//
|
||
// However, it's possible that we have *both* (a) AND (b).
|
||
// So, even if ExpectedMch == TRUE, check for other PCI or MC Bus
|
||
// errors. Any of these other errors indicate a
|
||
// fatal condition.
|
||
//
|
||
|
||
if( (IodCapErr.Perr == 1) || // PCI bus perr detected
|
||
(IodCapErr.Serr == 1) || // PCI bus serr detected
|
||
(IodCapErr.PteInv == 1) || // Invalid Pte
|
||
(IodCapErr.PioOvfl == 1) || // Pio Ovfl
|
||
|
||
//
|
||
// Cached CUD with 2 Mb and 4 Mb Cache may also assert an MCAddrPerr
|
||
// or Nxm upon a config space read. Lost Error bit will also be set.
|
||
//
|
||
//
|
||
|
||
( (IodCapErr.LostMcErr == 1) && !ExpectedMcAddrPerr) ||
|
||
// Lost error
|
||
( (IodCapErr.McAddrPerr == 1) && !ExpectedMcAddrPerr ) ||
|
||
// MC bus comd/addr parity error
|
||
|
||
|
||
( (IodCapErr.Nxm == 1) && !ExpectedMcAddrPerr ) ||
|
||
// Non-existent memory error
|
||
(IodCapErr.CrdA == 1) || // Correctable ECC error on MDPA
|
||
(IodCapErr.CrdB == 1) || // Correctable ECC error on MDPB
|
||
(IodCapErr.RdsA == 1) || // Uncorrectable ECC error on MDPA
|
||
(IodCapErr.RdsA == 1) // Uncorrectable ECC error on MDPA
|
||
|
||
){
|
||
return ( bHandleFatalIodError(McDeviceId, TRUE) );
|
||
}
|
||
|
||
//
|
||
// At this point, we have either an expected or unexpected Master
|
||
// abort. There are three cases:
|
||
// 1. Expected MAB from a PCI config space read that must be handled
|
||
// 2. Unexpected MAB from a PCI memory or I/O space read in ISA legacy
|
||
// space that can be handled.
|
||
// 3. Unexpected MAB. Don't handle or fix up this error condition.
|
||
// (Really take the machine check.)
|
||
//
|
||
|
||
//
|
||
// Case 1: Expected Master Abort, e.g. a PCI configuration read error.
|
||
//
|
||
|
||
if ( (IodCapErr.Mab == 1) && ExpectedMchk ){
|
||
|
||
//
|
||
// Here's how a PCI config space read to an empty slot will transpire:
|
||
//
|
||
// READ_CONFIG_Usize indicates the issuing CPU and address in
|
||
// HalpMasterAbortExpected.Number and HalpMasterAbortExpected.Addr.
|
||
//
|
||
// PCI config space read will case a MC Bus FILL_ERROR on the issuing CPU
|
||
// FILL_ERROR causes a machine check.
|
||
//
|
||
// The targeted MC-PCI bus bridge will set CAP_ERR<MasterAbort> bit.
|
||
//
|
||
// So far, the error looks like a PCI configuration space read
|
||
// that accessed a device that does not exist. In order to fix
|
||
// this up we expect that the original faulting instruction must
|
||
// be a load with v0 as the destination register. Unfortunately,
|
||
// machine checks are not precise exceptions so we may have exectued
|
||
// many instructions since the faulting load. For EV5 a pair of
|
||
// memory barrier instructions following the load will stall the pipe
|
||
// waiting for load completion before the second memory barrier can
|
||
// be issued. Therefore, we expect the exception PC to point to either
|
||
// the load instruction or one of the two memory barriers. We will
|
||
// assume that if the exception pc is not an mb that instead it
|
||
// points to the load that machine checked. We must be careful to
|
||
// not reexectute the load.
|
||
//
|
||
|
||
ALPHA_INSTRUCTION FaultingInstruction;
|
||
|
||
|
||
FaultingInstruction.Long = *(PULONG)((ULONG)TrapFrame->Fir);
|
||
if( FaultingInstruction.Memory.Opcode != MEMSPC_OP ){
|
||
|
||
//
|
||
// Exception pc does not point to a memory barrier, return
|
||
// to the instruction after the exception pc.
|
||
//
|
||
|
||
TrapFrame->Fir += 4;
|
||
|
||
}
|
||
|
||
//
|
||
// The error has matched all of our conditions. Fix it up by
|
||
// writing the value 0xffffffff into the destination of the load.
|
||
//
|
||
|
||
TrapFrame->IntV0 = (ULONGLONG)0xffffffffffffffff;
|
||
|
||
//
|
||
// Clear all error conditions in CAP_ERR.
|
||
// (McAddrPerr, LostMcErr, Mab)
|
||
//
|
||
#if 0
|
||
WRITE_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->CapErr,
|
||
IodCapErr.all );
|
||
#else
|
||
IodCapErrMask.all = ALL_CAP_ERRORS;
|
||
HalpClearAllIods( IodCapErrMask );
|
||
#endif
|
||
|
||
//
|
||
// Clear the hard error interrupt.
|
||
// ecrfix - For now, the Hard error interrupt is masked, so
|
||
// we don't have to clear it.
|
||
//
|
||
|
||
return TRUE;
|
||
|
||
}
|
||
#if 0
|
||
//
|
||
// Case 2: Uexpected Master Abort, e.g. a PCI memory or I/O space read to
|
||
// legacy ISA space (0 - 1 Mb) on PCI-1,2,3.
|
||
//
|
||
|
||
if ( bHandleIsaError( McDeviceId, IodCapErr) ) {
|
||
return TRUE;
|
||
}
|
||
#endif
|
||
//
|
||
// Case 3: Unexpected Master abort.
|
||
// (Or anything I might have missed.... )
|
||
//
|
||
|
||
#if (DBG) || (HALDBG)
|
||
DbgPrint( "Unexpected PCI master abort\n" );
|
||
#endif
|
||
|
||
return ( bHandleFatalIodError(McDeviceId, TRUE) );
|
||
|
||
}
|
||
|
||
|
||
#define ENTIRE_FRAME_SIZE (sizeof(ERROR_FRAME) + sizeof(RAWHIDE_CORRECTABLE_FRAME))
|
||
VOID
|
||
HalpIodSoftErrorInterrupt(
|
||
VOID
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Handle a IOD soft (correctable) error interrupt.
|
||
|
||
Arguments:
|
||
|
||
None.
|
||
|
||
Return Value:
|
||
|
||
None.
|
||
|
||
--*/
|
||
{
|
||
BOOLEAN bfoundIod;
|
||
MC_DEVICE_ID McDeviceId;
|
||
|
||
static UCHAR Frame[ENTIRE_FRAME_SIZE];
|
||
static PERROR_FRAME pFrame;
|
||
static RAWHIDE_CORRECTABLE_FRAME RawhideFrame;
|
||
static BOOLEAN RawhideFrameInitialized = FALSE;
|
||
|
||
UCHAR TempFrame[ENTIRE_FRAME_SIZE];
|
||
PERROR_FRAME pTempFrame;
|
||
PCORRECTABLE_ERROR pCorr;
|
||
PRAWHIDE_CORRECTABLE_FRAME pRawCorr;
|
||
|
||
PBOOLEAN ErrorlogBusy;
|
||
PULONG DispatchCode;
|
||
PKINTERRUPT InterruptObject;
|
||
PKSPIN_LOCK ErrorlogSpinLock;
|
||
PRAWHIDE_UNCORRECTABLE_FRAME rawerr;
|
||
|
||
IOD_CAP_ERR IodCapErr;
|
||
IOD_MDPA_STAT IodMdpaStat;
|
||
IOD_MDPA_STAT IodMdpbStat;
|
||
IOD_MC_ERR0 IodMcErr0;
|
||
IOD_MC_ERR1 IodMcErr1;
|
||
|
||
KIRQL Irql;
|
||
|
||
#if 0 // CAP/MDP Bug
|
||
IOD_MDPA_SYN IodMdpaSyn;
|
||
IOD_MDPB_SYN IodMdpbSyn;
|
||
#endif
|
||
|
||
|
||
//ecrfix - later we should log the error, throttle the logging and turn off
|
||
// correctable error reporting if the frequency is too high
|
||
|
||
//
|
||
// The error is expected to be a corrected ECC error on a DMA or
|
||
// Scatter/Gather TLB read/write. Read the error registers relevant
|
||
// to this error.
|
||
//
|
||
|
||
//
|
||
// Find the IOD that latched the error.
|
||
//
|
||
|
||
bfoundIod = bFindIodError( &McDeviceId, &IodCapErr );
|
||
|
||
#ifdef FORCE_CORRECTABLE_ERROR
|
||
IodCapErr.all = 0x88000000;
|
||
bfoundIod = 1;
|
||
#endif // FORCE_CORRECTABLE_ERROR
|
||
|
||
//
|
||
// Check that we found an IOD that has a valid PCI or MC error.
|
||
// If it is not this is a pretty weird (fatal???) condition.
|
||
// For now, we'll just go return TRUE.
|
||
//
|
||
|
||
if( !bfoundIod ) {
|
||
|
||
#if 0 //HALDBG
|
||
DbgPrint( "HalpIodSoftErrorInterrupt: no PCI or MC error found.\n");
|
||
#endif
|
||
return;
|
||
}
|
||
|
||
//
|
||
// Check if an error is latched into the IOD. If not, goodbye.
|
||
//
|
||
|
||
if( IodCapErr.McErrValid == 0 ){
|
||
|
||
#if HALDBG
|
||
DbgPrint( "Iod soft error interrupt without valid MC error\n" );
|
||
#endif //HALDBG
|
||
|
||
return;
|
||
}
|
||
|
||
//
|
||
// Check for the correctable error bit.
|
||
//
|
||
|
||
if( (IodCapErr.CrdA == 0) && (IodCapErr.CrdB == 0) ){
|
||
|
||
#if HALDBG
|
||
DbgPrint( "Iod soft error interrupt without correctable error indicated in CapErr\n" );
|
||
#endif //HALDBG
|
||
|
||
}
|
||
|
||
|
||
//
|
||
// Increment the number of IOD correctable errors.
|
||
//
|
||
|
||
IodCorrectedErrors += 1;
|
||
|
||
//
|
||
// Read the rest of the error registers
|
||
//
|
||
|
||
IodMcErr0.all = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR0_CSRS_QVA))->McErr0
|
||
);
|
||
|
||
IodMcErr1.all = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR0_CSRS_QVA))->McErr1
|
||
);
|
||
#ifdef FORCE_CORRECTABLE_ERROR
|
||
IodMcErr0.all = 0x00bebad0;
|
||
IodMcErr1.all = 0x800f3f00;
|
||
#endif // FORCE_CORRECTABLE_ERROR
|
||
|
||
#if 0 // CAP/MDP Bug
|
||
IodMdpaStat.all = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR0_CSRS_QVA))->MdpaStat
|
||
);
|
||
|
||
IodMdpaSyn.all = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR0_CSRS_QVA))->MdpaSyn
|
||
);
|
||
|
||
IodMdpbStat.all = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR0_CSRS_QVA))->MdpbStat
|
||
);
|
||
|
||
IodMdpbSyn.all = (ULONG)READ_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR0_CSRS_QVA))->MdpbSyn
|
||
);
|
||
#endif
|
||
|
||
|
||
#if HALDBG
|
||
|
||
//
|
||
// Print a correctable error message to the debugger.
|
||
//
|
||
|
||
DbgPrint( "IOD Correctable Error Number %d, state follows: \n",
|
||
IodCorrectedErrors );
|
||
DbgPrint( "\tIOD_CAP_ERR: 0x%x\n", IodCapErr.all );
|
||
DbgPrint( "\tIOD_MC_ERR0: 0x%x\n", IodMcErr0.all );
|
||
DbgPrint( "\tIOD_MC_ERR1: 0x%x\n", IodMcErr1.all );
|
||
// DbgPrint( "\tIOD_MDPA_STAT: 0x%x\n", IodMdpaStat.all );
|
||
// DbgPrint( "\tIOD_MDPA_SYN: 0x%x\n", IodMdpaSyn.all );
|
||
// DbgPrint( "\tIOD_MDPB_STAT: 0x%x\n", IodMdpbStat.all );
|
||
// DbgPrint( "\tIOD_MDPB_SYN: 0x%x\n", IodMdpbSyn.all );
|
||
|
||
#endif //HALDBG
|
||
|
||
//
|
||
// Fill in the Correctable Error frame only if we've connected
|
||
// to the Correctable Error interrupt.
|
||
//
|
||
|
||
if (HalpLogCorrectableErrors) {
|
||
|
||
//
|
||
// Real error, get the interrupt object.
|
||
//
|
||
|
||
DispatchCode = (PULONG)PCR->InterruptRoutine[RawhideSoftErrVector];
|
||
InterruptObject = CONTAINING_RECORD(
|
||
DispatchCode,
|
||
KINTERRUPT,
|
||
DispatchCode
|
||
);
|
||
|
||
//
|
||
// Set various pointers so we can use them later.
|
||
//
|
||
|
||
pFrame = (PERROR_FRAME) Frame;
|
||
pTempFrame = (PERROR_FRAME) TempFrame;
|
||
pCorr = (PCORRECTABLE_ERROR) &pTempFrame->CorrectableFrame;
|
||
pRawCorr = (PRAWHIDE_CORRECTABLE_FRAME) (TempFrame +
|
||
sizeof(ERROR_FRAME) );
|
||
|
||
ErrorlogBusy = (PBOOLEAN)((PUCHAR)InterruptObject->ServiceContext +
|
||
sizeof(PERROR_FRAME));
|
||
ErrorlogSpinLock = (PKSPIN_LOCK)((PUCHAR)ErrorlogBusy + sizeof(PBOOLEAN));
|
||
|
||
//
|
||
// Clear the data structures that we will use.
|
||
//
|
||
|
||
RtlZeroMemory(&TempFrame, ENTIRE_FRAME_SIZE);
|
||
|
||
//
|
||
// Fill in the error frame information.
|
||
//
|
||
|
||
pTempFrame->Signature = ERROR_FRAME_SIGNATURE;
|
||
pTempFrame->LengthOfEntireErrorFrame = ENTIRE_FRAME_SIZE;
|
||
pTempFrame->FrameType = CorrectableFrame;
|
||
pTempFrame->VersionNumber = ERROR_FRAME_VERSION;
|
||
pTempFrame->SequenceNumber = IodCorrectedErrors;
|
||
pTempFrame->PerformanceCounterValue =
|
||
KeQueryPerformanceCounter(NULL).QuadPart;
|
||
|
||
//
|
||
// Check for lost error.
|
||
//
|
||
|
||
if( IodCapErr.LostMcErr ) {
|
||
|
||
//
|
||
// Since the error registers are locked from a previous error,
|
||
// we do not know where the error came from. Mark everything
|
||
// as UNIDENTIFIED.
|
||
//
|
||
|
||
pCorr->Flags.LostCorrectable = 1;
|
||
pCorr->Flags.LostAddressSpace = UNIDENTIFIED;
|
||
pCorr->Flags.LostMemoryErrorSource = UNIDENTIFIED;
|
||
}
|
||
|
||
pCorr->Flags.ErrorBitMasksValid = 0;
|
||
|
||
//
|
||
// Determine error type.
|
||
//
|
||
|
||
if (IodMcErr1.Addr39_32 & 0x80) {
|
||
|
||
//
|
||
// I/O ECC error occurred.
|
||
//
|
||
|
||
pCorr->Flags.AddressSpace = IO_SPACE;
|
||
pCorr->Flags.ExtendedErrorValid = 1;
|
||
pCorr->ErrorInformation.IoError.Interface = PCIBus;
|
||
pCorr->ErrorInformation.IoError.BusNumber = IodMcErr1.DevId & 0x3;
|
||
|
||
// We never alloc PCI address higher than 1 Gb for any PCI
|
||
// address space (sparse mem, dense mem, sparse I/O), so this
|
||
// trick works.
|
||
|
||
pCorr->ErrorInformation.IoError.BusAddress.LowPart =
|
||
((IodMcErr0.Addr & 0x3FFFFFFF) >> IO_BIT_SHIFT);
|
||
|
||
// The code below is not strictly correct. Based on the MC Bus
|
||
// spec, p.32, we can roughly say that McCmd<3> tells us whether
|
||
// there was a write or read transaction on the bus. If I looked
|
||
// at the spec harder, I might be able to distinguish a PIO op
|
||
// from a DMA operation.
|
||
|
||
pCorr->ErrorInformation.IoError.TransferType
|
||
= ((IodMcErr1.McCmd & 0x8) ? BUS_IO_READ : BUS_IO_WRITE);
|
||
|
||
} else {
|
||
|
||
//
|
||
// Memory ECC error occurred.
|
||
//
|
||
|
||
pCorr->Flags.AddressSpace = MEMORY_SPACE;
|
||
|
||
}
|
||
|
||
//
|
||
// Get the physical address where the error occurred.
|
||
//
|
||
|
||
if (IodMcErr1.Valid) {
|
||
pCorr->Flags.PhysicalAddressValid = 1;
|
||
pCorr->PhysicalAddress =
|
||
((ULONGLONG) (IodMcErr1.Addr39_32)) << 32;
|
||
pCorr->PhysicalAddress |= IodMcErr0.all;
|
||
}
|
||
|
||
//
|
||
// Scrub the error if it's any type of memory error.
|
||
//
|
||
|
||
if ( pCorr->Flags.AddressSpace == MEMORY_SPACE &&
|
||
pCorr->Flags.PhysicalAddressValid ) {
|
||
pCorr->Flags.ScrubError = 1;
|
||
}
|
||
|
||
//
|
||
// Acquire the spinlock.
|
||
//
|
||
|
||
KeAcquireSpinLock(ErrorlogSpinLock, &Irql );
|
||
|
||
//
|
||
// Check to see if an errorlog operation is in progress already.
|
||
//
|
||
|
||
if (!*ErrorlogBusy) {
|
||
|
||
//
|
||
// Set reporting processor information. Disregard at the moment.
|
||
//
|
||
|
||
pCorr->Flags.ProcessorInformationValid = 0;
|
||
|
||
//
|
||
// Copy the SYSTEM_INFORMATION from the uncorrectable frame
|
||
//
|
||
|
||
pCorr->System = PUncorrectableError->UncorrectableFrame.System;
|
||
|
||
//
|
||
//
|
||
// Set raw system information flag.
|
||
//
|
||
|
||
pCorr->Flags.SystemInformationValid = 1;
|
||
|
||
//
|
||
// Do the Rawhide-specific stuff here
|
||
//
|
||
|
||
pRawCorr->Revision = RAWHIDE_CORRECTABLE_FRAME_REVISION;
|
||
|
||
//
|
||
// Copy the CUD header from the uncorrectable frame
|
||
//
|
||
|
||
rawerr = (PRAWHIDE_UNCORRECTABLE_FRAME)
|
||
PUncorrectableError->UncorrectableFrame.RawSystemInformation;
|
||
if (rawerr) {
|
||
pRawCorr->CudHeader = rawerr->CudHeader;
|
||
}
|
||
|
||
//
|
||
// Fill in the rest of the dynamic portion of the
|
||
// correctable frame.
|
||
//
|
||
|
||
pRawCorr->CudHeader.ActiveCpus = BuildActiveCpus();
|
||
pRawCorr->ErrorSubpacketFlags.all = 0;
|
||
pRawCorr->ErrorSubpacketFlags.IodSubpacketPresent = 1;
|
||
pRawCorr->WhoAmI = HalpReadWhoAmI();
|
||
HalpBuildIodErrorFrame( McDeviceId, &(pRawCorr->IodErrorFrame) );
|
||
|
||
//
|
||
// Copy the information that we need to log.
|
||
//
|
||
|
||
RtlCopyMemory(&Frame,
|
||
&TempFrame,
|
||
ENTIRE_FRAME_SIZE);
|
||
|
||
pFrame->CorrectableFrame.RawSystemInformation =
|
||
(PVOID)((PUCHAR)pFrame + sizeof(ERROR_FRAME) );
|
||
|
||
pFrame->CorrectableFrame.RawSystemInformationLength =
|
||
sizeof(RAWHIDE_CORRECTABLE_FRAME);
|
||
|
||
|
||
//
|
||
// Put frame into ISR service context.
|
||
//
|
||
|
||
*(PERROR_FRAME *)InterruptObject->ServiceContext = pFrame;
|
||
|
||
} else {
|
||
|
||
//
|
||
// An errorlog operation is in progress already. We will
|
||
// set various lost bits and then get out without doing
|
||
// an actual errorloging call.
|
||
//
|
||
|
||
pFrame->CorrectableFrame.Flags.LostCorrectable = TRUE;
|
||
pFrame->CorrectableFrame.Flags.LostAddressSpace =
|
||
pTempFrame->CorrectableFrame.Flags.AddressSpace;
|
||
pFrame->CorrectableFrame.Flags.LostMemoryErrorSource =
|
||
pTempFrame->CorrectableFrame.Flags.MemoryErrorSource;
|
||
}
|
||
|
||
//
|
||
// Release the spinlock.
|
||
//
|
||
|
||
KeReleaseSpinLock(ErrorlogSpinLock, Irql );
|
||
|
||
//
|
||
// Dispatch to the secondary correctable interrupt service routine.
|
||
// The assumption here is that if this interrupt ever happens, then
|
||
// some driver enabled it, and the driver should have the ISR connected.
|
||
//
|
||
|
||
((PSECOND_LEVEL_DISPATCH)InterruptObject->DispatchAddress)(
|
||
InterruptObject,
|
||
InterruptObject->ServiceContext
|
||
);
|
||
|
||
}
|
||
|
||
|
||
|
||
//
|
||
// Clear state in MDPA and MDPB before clearing CAP_ERR
|
||
//
|
||
|
||
IodCapErr.all = 0;
|
||
IodCapErr.CrdA = 1;
|
||
IodCapErr.CrdB = 1;
|
||
IodMdpaStat.all = 0xffffffff;
|
||
IodMdpbStat.all = 0xffffffff;
|
||
|
||
WRITE_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->MdpaStat,
|
||
IodMdpaStat.all
|
||
);
|
||
|
||
WRITE_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->MdpaStat,
|
||
IodMdpbStat.all
|
||
);
|
||
|
||
WRITE_IOD_REGISTER_NEW( McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->CapErr,
|
||
IodCapErr.all
|
||
);
|
||
|
||
return;
|
||
|
||
}
|
||
|
||
VOID
|
||
HalpIodHardErrorInterrupt(
|
||
VOID
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Handle a IOD hard (uncorrectable) error interrupt.
|
||
|
||
Arguments:
|
||
|
||
None.
|
||
|
||
Return Value:
|
||
|
||
None.
|
||
|
||
--*/
|
||
{
|
||
BOOLEAN bfoundIod;
|
||
MC_DEVICE_ID McDeviceId;
|
||
IOD_CAP_ERR IodCapErr;
|
||
IOD_WHOAMI IodWhoAmI;
|
||
KIRQL OldIrql;
|
||
|
||
|
||
//
|
||
// Raise IRQL to the highest level.
|
||
// Prevents us from taking other hard error interrupts
|
||
// during this one.
|
||
//
|
||
// Also, acquire a spin lock to keep entry
|
||
// to this code serialized.
|
||
//
|
||
|
||
KeRaiseIrql(HIGH_LEVEL, &OldIrql);
|
||
KiAcquireSpinLock(&HalpSystemInterruptLock);
|
||
|
||
//
|
||
// Find the IOD that latched the error.
|
||
//
|
||
|
||
bfoundIod = bFindIodError( &McDeviceId, &IodCapErr );
|
||
|
||
//
|
||
// Check that we found an IOD that has a valid PCI or MC error.
|
||
// If it is not this is a pretty weird (fatal???) condition.
|
||
// For now, we'll just return.
|
||
//
|
||
|
||
if( !bfoundIod ) {
|
||
|
||
#if 0 // HALDBG
|
||
DbgPrint( "HalpIodHardErrorInterrupt: no PCI or MC error found.\n");
|
||
#endif
|
||
//
|
||
// Lower IRQL to the previous level.
|
||
//
|
||
|
||
KiReleaseSpinLock(&HalpSystemInterruptLock);
|
||
KeLowerIrql(OldIrql);
|
||
return;
|
||
}
|
||
|
||
#if 1 // ecrfix
|
||
//
|
||
// See if this was an ISA legacy space access
|
||
// on PCI-1,2,3. If so, dismiss this interrupt.
|
||
//
|
||
|
||
if ( bHandleIsaError( McDeviceId, IodCapErr) ) {
|
||
|
||
//
|
||
// Lower IRQL to the previous level.
|
||
//
|
||
|
||
KiReleaseSpinLock(&HalpSystemInterruptLock);
|
||
KeLowerIrql(OldIrql);
|
||
return;
|
||
}
|
||
#endif
|
||
|
||
#if HALDBG
|
||
DbgPrint( "Hard Error Found on IOD (%x, %x)\n",
|
||
McDeviceId.Gid,
|
||
McDeviceId.Mid );
|
||
#endif //HALDBG
|
||
|
||
//
|
||
// Save IodWhoAmI
|
||
//
|
||
|
||
IodWhoAmI.all = HalpReadWhoAmI();
|
||
HalpIodWhoAmIOnError.all = IodWhoAmI.all;
|
||
|
||
//
|
||
// Handle the Fatal Error
|
||
//
|
||
|
||
bHandleFatalIodError( McDeviceId, FALSE );
|
||
|
||
KeBugCheckEx( DATA_BUS_ERROR,
|
||
0xbeadfeed, //ecrfix - quick error interrupt id
|
||
McDeviceId.all,
|
||
0,
|
||
(ULONG) PUncorrectableError );
|
||
|
||
|
||
}
|
||
|
||
BOOLEAN
|
||
bHandleFatalIodError(
|
||
MC_DEVICE_ID McDeviceId,
|
||
BOOLEAN bMachineCheck
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Handles the epilogue of a fatal IOD unccorrectable error
|
||
from either a machine check or IOD hard error interrupt.
|
||
|
||
Arguments:
|
||
|
||
McDeviceId - IOD on which the error was found
|
||
|
||
bMachineCheck - TRUE if we're handling a fatal machine check
|
||
FALSE if we're handling a fatal hard error interrupt
|
||
|
||
Return Value:
|
||
|
||
TRUE is returned if the IOD error has been handled and dismissed -
|
||
indicating that execution can continue. FALSE is return otherwise.
|
||
|
||
--*/
|
||
{
|
||
|
||
#if HALDBG
|
||
if (bMachineCheck ) {
|
||
DbgPrint( "Handling fatal error - machine check\n" );
|
||
} else {
|
||
DbgPrint( "Handling fatal error - hard error interrupt\n" );
|
||
}
|
||
#endif
|
||
|
||
//
|
||
// Clear the error condition in the MCES register.
|
||
//
|
||
// ecrfix - the way this is written, this will be done on hard
|
||
// error interrupts too (where there has been *no* machine check).
|
||
// I hope it will be benign in this case....
|
||
//
|
||
|
||
HalpUpdateMces( TRUE, TRUE );
|
||
|
||
//
|
||
// Proceed to display the error.
|
||
//
|
||
|
||
HalAcquireDisplayOwnership(NULL);
|
||
|
||
//
|
||
// Display the dreaded banner.
|
||
//
|
||
|
||
HalDisplayString( "\nFatal system hardware error.\n" );
|
||
|
||
#ifdef DUMPIODS
|
||
DumpAllIods(AllRegisters);
|
||
#endif
|
||
|
||
|
||
HalpIodReportFatalError( McDeviceId );
|
||
|
||
return( FALSE );
|
||
|
||
}
|
||
|
||
BOOLEAN
|
||
bFindIodError(
|
||
PMC_DEVICE_ID pMcDeviceId,
|
||
PIOD_CAP_ERR pIodCapErr
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Determines which IOD has an error latched in it.
|
||
|
||
Arguments:
|
||
|
||
None.
|
||
|
||
Return Value:
|
||
|
||
TRUE if an IOD was found with an error latched in CAP_ERR.
|
||
FALSE otherwise.
|
||
|
||
--*/
|
||
{
|
||
MC_ENUM_CONTEXT mcCtx;
|
||
ULONG numIods;
|
||
BOOLEAN bfoundIod;
|
||
IOD_CAP_ERR IodCapErr;
|
||
|
||
|
||
//
|
||
// Intialize enumerator.
|
||
//
|
||
|
||
numIods = HalpMcBusEnumStart ( HalpIodMask, &mcCtx );
|
||
|
||
#if 0 // HALDBG
|
||
DbgPrint( "FindIodError: Searching: %d Iods: ", numIods);
|
||
#endif // HALDBG
|
||
|
||
//
|
||
// Search each Iod and look for a PCI or McBus error.
|
||
//
|
||
|
||
while ( bfoundIod = HalpMcBusEnum( &mcCtx ) ) {
|
||
|
||
//
|
||
// Read the IOD error register to determine the source of the
|
||
// error.
|
||
//
|
||
|
||
#if 0 //HALDBG
|
||
DbgPrint( "(%d, %d) ", mcCtx.McDeviceId.Gid, mcCtx.McDeviceId.Mid);
|
||
#endif // HALDBG
|
||
|
||
IodCapErr.all = READ_IOD_REGISTER_NEW( mcCtx.McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->CapErr );
|
||
|
||
if( (IodCapErr.PciErrValid != 0) || (IodCapErr.McErrValid != 0) ){
|
||
break;
|
||
}
|
||
}
|
||
|
||
#if 0 // HALDBG
|
||
if (bfoundIod) {
|
||
DbgPrint( "Found!\n");
|
||
} else {
|
||
DbgPrint( "Error Not Found!\n");
|
||
}
|
||
#endif // HALDBG
|
||
|
||
//
|
||
// Return the McDeviceId and CapErr register contents
|
||
// of the first IOD that has an error.
|
||
//
|
||
|
||
*pMcDeviceId = mcCtx.McDeviceId;
|
||
pIodCapErr->all = IodCapErr.all;
|
||
|
||
return (bfoundIod);
|
||
}
|
||
|
||
BOOLEAN
|
||
bHandleIsaError(
|
||
MC_DEVICE_ID McDeviceId,
|
||
IOD_CAP_ERR IodCapErrIn
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Gives PCI-1,2,3 ISA legacy semantics for I/O and memory accesses.
|
||
|
||
Arguments:
|
||
|
||
None.
|
||
|
||
Return Value:
|
||
|
||
TRUE if the error was handled.
|
||
FALSE otherwise.
|
||
|
||
--*/
|
||
{
|
||
|
||
MC_ENUM_CONTEXT mcCtx;
|
||
MC_DEVICE_ID McDeviceIdWithMab;
|
||
ULONG numIods;
|
||
BOOLEAN bfoundIod;
|
||
IOD_CAP_ERR IodCapErr;
|
||
IOD_CAP_ERR IodCapErrMask;
|
||
|
||
|
||
//
|
||
// Find an IOD that has Mab set. If we do not find one, then
|
||
// we don't have this error.
|
||
//
|
||
|
||
|
||
numIods = HalpMcBusEnumStart ( HalpIodMask, &mcCtx );
|
||
|
||
//
|
||
// Search each Iod and look for a PCI or McBus error.
|
||
//
|
||
|
||
while ( bfoundIod = HalpMcBusEnum( &mcCtx ) ) {
|
||
|
||
//
|
||
// Read the IOD error register to determine who has Mab set
|
||
//
|
||
|
||
IodCapErr.all = READ_IOD_REGISTER_NEW( mcCtx.McDeviceId,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->CapErr );
|
||
|
||
if( (IodCapErr.PciErrValid == 1) &&
|
||
(IodCapErr.Perr == 0) &&
|
||
(IodCapErr.Serr == 0) &&
|
||
(IodCapErr.Mab == 1) &&
|
||
(IodCapErr.PteInv == 0) ) {
|
||
break;
|
||
}
|
||
}
|
||
|
||
//
|
||
// If we didn't find an IOD with Mab set, then do not handle this error.
|
||
//
|
||
|
||
if (!bfoundIod) {
|
||
return FALSE;
|
||
}
|
||
|
||
//
|
||
// Save the McDevice Id of the offending IOD
|
||
//
|
||
|
||
McDeviceIdWithMab.all = mcCtx.McDeviceId.all;
|
||
|
||
//
|
||
// This must be on a bus other than PCI0 for us to handle this error
|
||
// (PCI0 reads to non-existent ISA addresses will be fixed by by the
|
||
// PCI-EISA bridge. Thus we'll never get here on PCI0 unless there
|
||
// really is an error.)
|
||
//
|
||
|
||
if ( McDeviceIdWithMab.Mid != MidPci0 ) {
|
||
|
||
|
||
IOD_PCI_ERR1 IodPciErr1;
|
||
|
||
//
|
||
// Get the PCI address of the transaction that caused the MAB
|
||
//
|
||
|
||
IodPciErr1.PciAddress =
|
||
(ULONG) READ_IOD_REGISTER_NEW( McDeviceIdWithMab,
|
||
&((PIOD_ERROR_CSRS)(IOD_ERROR_CSRS_QVA))->PciErr1 );
|
||
|
||
|
||
//
|
||
// To be handled as an ISA legacy memory or I/O space read, the
|
||
// FaultingPciAddress must be in the range 0-1 Mb
|
||
//
|
||
|
||
if( IodPciErr1.PciAddress < __1MB ) {
|
||
|
||
//
|
||
// The error has matched all of our conditions. Assume that
|
||
// V0 has already been set to 0xffffffff. (This is a contract
|
||
// with the HAL access routines in iodio.s.)
|
||
//
|
||
|
||
IodCapErrMask.all = ALL_CAP_ERRORS;
|
||
HalpClearAllIods( IodCapErrMask );
|
||
|
||
return TRUE;
|
||
|
||
}
|
||
|
||
#if HALDBG
|
||
DbgPrint( "Failed checking for legacy ISA read:\n");
|
||
DbgPrint( "PciErr1 : %08x\n", IodPciErr1.PciAddress );
|
||
#endif //HALDBG
|
||
|
||
}
|
||
|
||
//
|
||
// We have a PCI Mab on PCI0. Do not handle this error.
|
||
//
|
||
|
||
return FALSE;
|
||
|
||
}
|
||
|
||
VOID
|
||
HalpErrorFrameString(
|
||
PUNCORRECTABLE_ERROR uncorr,
|
||
PUCHAR OutBuffer
|
||
)
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Append an Error message to the Uncorrectable Error Frame
|
||
string
|
||
|
||
Arguments:
|
||
|
||
uncorr - Pointer to the UNCORRECTABLE_ERROR frame.
|
||
|
||
OutBuffer - message to be appended.
|
||
(If null, no string is appended, and pCurrentString
|
||
is reset to NULL).
|
||
|
||
|
||
Return Value:
|
||
|
||
none.
|
||
|
||
--*/
|
||
{
|
||
ULONG len;
|
||
static PCHAR pCurrentString = NULL;
|
||
|
||
//
|
||
// If OutBuffer is NULL, reset pointer and flag
|
||
//
|
||
|
||
if (OutBuffer == NULL) {
|
||
pCurrentString = NULL;
|
||
if (uncorr) uncorr->Flags.ErrorStringValid = 0;
|
||
return;
|
||
}
|
||
|
||
//
|
||
// Uncorrectable frame valid?
|
||
//
|
||
|
||
if (uncorr) {
|
||
|
||
//
|
||
// On first error message:
|
||
// * Init pCurrentString to beginning of ErrorString
|
||
// * Set valid flag
|
||
//
|
||
|
||
if (pCurrentString == NULL) {
|
||
pCurrentString = uncorr->ErrorString;
|
||
uncorr->Flags.ErrorStringValid = 1;
|
||
}
|
||
|
||
//
|
||
// Append OutBuffer to ErrorString
|
||
//
|
||
|
||
len = strlen(OutBuffer);
|
||
strncpy(pCurrentString,
|
||
OutBuffer,
|
||
len);
|
||
|
||
//
|
||
// Zero-terminate the error string.
|
||
//
|
||
|
||
pCurrentString += len;
|
||
*pCurrentString = 0;
|
||
|
||
}
|
||
}
|
||
|
||
ULONG
|
||
BuildActiveCpus (
|
||
VOID
|
||
)
|
||
{
|
||
ULONG ActiveLogicalProcessors = HalpActiveProcessors;
|
||
ULONG ActivePhysicalCpus = 0;
|
||
ULONG i;
|
||
|
||
//
|
||
// Make a physical processor mask from the logical processor mask
|
||
//
|
||
|
||
for (i = 0; i < HalpNumberOfCpus; i++, ActiveLogicalProcessors >> 1) {
|
||
if (ActiveLogicalProcessors & 0x1) {
|
||
ActivePhysicalCpus |= (1 << (ULONG) (MCDEVID_TO_PHYS_CPU(
|
||
HalpLogicalToPhysicalProcessor[i].all)));
|
||
}
|
||
}
|
||
|
||
return (ActivePhysicalCpus);
|
||
|
||
}
|