Windows2003-3790/inetsrv/query/h/fa.hxx
2020-09-30 16:53:55 +02:00

457 lines
12 KiB
C++

//+-------------------------------------------------------------------------
//
// Microsoft Windows
// Copyright (C) Microsoft Corporation, 1991-1998.
//
// File: FA.hxx
//
// Contents: Non-deterministic finite automata
//
// Classes: CFA
// CNFA
// CDFA
//
// History: 20-Jan-92 KyleP Created
// 19-Jun-92 KyleP Cleanup
//
//--------------------------------------------------------------------------
#pragma once
#include <xpr.hxx>
#include <state.hxx>
#include <xlatstat.hxx>
#include <xlatchar.hxx>
#include <timlimit.hxx>
class CInternalPropertyRestriction;
WCHAR const wcAnySingle = '?';
WCHAR const wcAnyMultiple = '*';
WCHAR const wcDOSDot = '.';
WCHAR const wcRepeatZero = '*';
WCHAR const wcRepeatOne = '+';
WCHAR const wcRepeatZeroOrOne = '?';
WCHAR const wcBeginRange = '[';
WCHAR const wcEndRange = ']';
WCHAR const wcInvertRange = '^';
WCHAR const wcRangeSep = '-';
WCHAR const wcEscape = '|';
WCHAR const wcOr = ',';
WCHAR const wcBeginParen = '(';
WCHAR const wcEndParen = ')';
WCHAR const wcBeginRepeat = '{';
WCHAR const wcEndRepeat = '}';
WCHAR const wcNextRepeat = ',';
WCHAR const wcLastValidChar = 0xFFFF;
//
// Note that these are the 'top level' special characters.
// Characters *on or after* these characters may have special meaning.
//
WCHAR const awcSpecialRegex[] = L"?*.|";
char const acSpecialRegex[] = "?*.|";
WCHAR const awcSpecialRegexReverse[] = L"?*.|+]),}";
//+-------------------------------------------------------------------------
//
// Class: CFA
//
// Purpose: Base class for finite automata.
//
// History: 20-Jan-92 KyleP Created
//
//--------------------------------------------------------------------------
class CFA
{
protected:
inline CFA();
CFA( CFA const & src );
~CFA();
void Add( CFAState * pState );
CFAState * Get( unsigned iState );
inline unsigned Count();
private:
unsigned _cTotal;
CFAState ** _ppState;
};
//+-------------------------------------------------------------------------
//
// Class: CNFA
//
// Purpose: Non-deterministic finite automata.
//
// History: 20-Jan-92 Kylep Created
//
//--------------------------------------------------------------------------
class CNFA
{
public:
CNFA( WCHAR const * pwcs, BOOLEAN fCaseSens );
CNFA( CNFA const & src );
~CNFA();
inline unsigned StartState();
void EpsClosure( unsigned StateNum, CStateSet & ssOut );
void EpsClosure( CStateSet & ssIn, CStateSet & ssOut );
void Move( CStateSet & ssIn, CStateSet & ssOut, unsigned symbol = symEpsilon );
BOOLEAN IsFinal( CStateSet & ss );
inline CXlatChar const & Translate() const;
inline unsigned NumStates() const;
private:
inline CNFAState * Get( unsigned iState );
void Parse( WCHAR const * wcs,
unsigned * iStart,
unsigned * iEnd,
WCHAR const * * pwcsEnd = 0,
WCHAR wcHalt = 0 );
void ParseRepeat( WCHAR const * & wcs,
unsigned & cRepeat1,
unsigned & cRepeat2 );
void FindCharClasses( WCHAR const * wcs );
void Replicate( unsigned iStart,
unsigned iEnd,
unsigned * piNewStart,
unsigned * piNewEnd );
unsigned _iStart; // Start state
unsigned _iNextState;
static WCHAR * _wcsNull;
CXlatChar _chars; // Wide character translator
XArray<CNFAState> _aState; // State array.
#if (CIDBG == 1)
public:
//
// Debug methods.
//
void Display();
#endif // (CIDBG == 1)
};
//+-------------------------------------------------------------------------
//
// Class: CDFA
//
// Purpose: Deterministic finite automata.
//
// History: 20-Jan-92 Kylep Created
//
//--------------------------------------------------------------------------
class CDFA : public CFA
{
public:
CDFA( WCHAR const * pwcs, CTimeLimit & timeLimit, BOOLEAN fCaseSens );
CDFA( CDFA const & CDFA );
~CDFA();
BOOLEAN Recognize( WCHAR const * wcs );
private:
void CommonCtor( );
inline BOOLEAN IsFinal( unsigned state );
inline unsigned Move( unsigned state, unsigned sym ) const;
inline void AddTransition( unsigned state, unsigned sym, unsigned newstate );
inline BOOLEAN IsComputed( unsigned state );
void Add( unsigned state, BOOLEAN fFinal );
void Realloc();
# if CIDBG == 1
void ValidateStateTransitions();
# endif // CIDBG == 1
CNFA _nfa; // This must be the first member variable.
CXlatState _xs; // Translate NFA state set to DFA state.
unsigned _stateStart; // Starting DFA state.
unsigned _cState; // Number of states
XArray<unsigned> _xStateTrans; // Array of state transitions.
XArray<BOOLEAN> _xStateFinal; // _xStateFinal[i] TRUE if i is final state.
CReadWriteAccess _rwa; // Locking.
CTimeLimit & _timeLimit; // Execution time limit
};
//+-------------------------------------------------------------------------
//
// Class: CRegXpr (regx)
//
// Purpose: Performs regular expression matches on properties
//
// History: 15-Apr-92 KyleP Created
//
//--------------------------------------------------------------------------
class CRegXpr : public CXpr
{
public:
CRegXpr( CInternalPropertyRestriction * prst, CTimeLimit& timeLimit );
CRegXpr( CRegXpr const & regxpr );
virtual ~CRegXpr() {};
virtual CXpr * Clone();
virtual void SelectIndexing( CIndexStrategy & strategy );
virtual BOOL IsMatch( CRetriever & obj );
private:
CXprPropertyValue _pxpval; // Retrieves value from database
XPtr<CRestriction> _xrstContentHelper; // Use content indexing
CStorageVariant _varPrefix; // Fixed prefix (for value indexing)
CDFA _dfa; // Finite automata engine
ULONG _ulCodePage; // Code page of system
};
//+-------------------------------------------------------------------------
//
// Member: CFA::CFA, protected
//
// Synopsis: Intializes a generic finite automata.
//
// History: 20-Jan-92 KyleP Created
//
//--------------------------------------------------------------------------
inline CFA::CFA()
: _cTotal( 0 ),
_ppState( 0 )
{
}
//+-------------------------------------------------------------------------
//
// Member: CFA::Count, protected
//
// Synopsis: Returns the count of states.
//
// History: 20-Jan-92 KyleP Created
//
//--------------------------------------------------------------------------
inline unsigned CFA::Count()
{
return( _cTotal );
}
//+-------------------------------------------------------------------------
//
// Member: CNFA::Get, private
//
// Arguments: [iState] -- Index of state.
//
// Returns: The appropriate state.
//
// History: 20-Jan-92 Kylep Created
//
//--------------------------------------------------------------------------
inline CNFAState * CNFA::Get( unsigned iState )
{
if ( iState > _aState.Count() )
{
unsigned cNewState = iState + 10;
XArray<CNFAState> xState( cNewState );
for ( unsigned i = 0; i < _aState.Count(); i++ )
xState[i].Init( _aState[i] );
for ( ; i < cNewState; i++ )
xState[i].Init(i+1);
_aState.Free();
_aState.Set( cNewState, xState.Acquire() );
}
return &_aState[ iState - 1 ];
}
//+-------------------------------------------------------------------------
//
// Member: CNFA::StartState, private
//
// Returns: The start state.
//
// History: 20-Jan-92 Kylep Created
//
//--------------------------------------------------------------------------
inline unsigned CNFA::StartState()
{
return( _iStart );
}
//+-------------------------------------------------------------------------
//
// Member: CNFA::Translate, private
//
// Returns: The character translator.
//
// History: 20-Jan-92 Kylep Created
//
//--------------------------------------------------------------------------
inline CXlatChar const & CNFA::Translate() const
{
return( _chars );
}
//+-------------------------------------------------------------------------
//
// Member: CNFA::NumStates, public
//
// Returns: The count of states currently in the automata.
//
// History: 20-Jan-92 Kylep Created
//
//--------------------------------------------------------------------------
inline unsigned CNFA::NumStates() const
{
return( _iNextState );
}
//+-------------------------------------------------------------------------
//
// Member: CDFA::IsFinal, public
//
// Arguments: [state] -- Index of state.
//
// Returns: TRUE if state [state] is final.
//
// History: 20-Jan-92 Kylep Created
//
//--------------------------------------------------------------------------
inline BOOLEAN CDFA::IsFinal( unsigned state )
{
return( _xStateFinal[ state ] );
}
//+-------------------------------------------------------------------------
//
// Member: CDFA::Move, public
//
// Arguments: [state] -- Index of state.
// [sym] -- Input symbol
//
// Returns: The new state reached from state [state] on an input
// symbol [sym].
//
// History: 20-Jan-92 Kylep Created
//
// Notes: If this function is ever changed to modify data, then
// you need to also investigate the locking in CDFA::Recognize.
//
//--------------------------------------------------------------------------
inline unsigned CDFA::Move( unsigned state, unsigned sym ) const
{
return( _xStateTrans[state * (_nfa.Translate().NumClasses() + 1) + sym] );
}
//+-------------------------------------------------------------------------
//
// Member: CDFA::AddTransition, private
//
// Effects: Adds a transtion from state [state] on input symbol [sym]
// to state [newstate].
//
// Arguments: [state] -- Index of state.
// [sym] -- Input symbol.
// [newstate] -- Index of state
//
// History: 20-Jan-92 Kylep Created
//
//--------------------------------------------------------------------------
inline void CDFA::AddTransition( unsigned state, unsigned sym, unsigned newstate )
{
_xStateTrans[ state * ( _nfa.Translate().NumClasses() + 1 ) + sym ] =
newstate;
}
//+-------------------------------------------------------------------------
//
// Member: CDFA::IsComputed, private
//
// Arguments: [state] -- Index of state.
//
// Returns: TRUE if the DFA contains a transition mapping for state
// [state].
//
// History: 20-Jan-92 Kylep Created
//
// Notes: An uncomputed state is one for which IsFinal has not been
// computed. All transitions other transitions are
// automatically set to stateUncomputed at allocation time.
//
//--------------------------------------------------------------------------
inline BOOLEAN CDFA::IsComputed( unsigned state )
{
return ( state <= _cState &&
Move( state, 0 ) != stateUndefined );
}