Windows2003-3790/inetsrv/query/deflang/noise.cxx
2020-09-30 16:53:55 +02:00

664 lines
18 KiB
C++

//+---------------------------------------------------------------------------
//
// Microsoft Windows
// Copyright (C) Microsoft Corporation, 1991 - 2000.
//
// File: NOISE.CXX
//
// Contents: Noise list
//
// Classes: CNoiseList, NoiseListInit, NoiseListEmpty
// CLString, CStringList, CStringTable
//
// History: 11-Jul-91 BartoszM Created
//
//----------------------------------------------------------------------------
#include <pch.cxx>
#pragma hdrstop
#include <noise.hxx>
//+---------------------------------------------------------------------------
//
// Member: CLString::CLString, public
//
// Synopsis: Initializes and links a string list element
//
// Arguments: [cb] -- length
// [buf] -- string
// [next] -- next link in the chain
//
// History: 16-Jul-91 BartoszM Created.
//
//----------------------------------------------------------------------------
CLString::CLString ( UINT cb, const BYTE* buf, CLString* next )
{
_cb = cb;
#if CIDBG == 1
cb++;
#endif
memcpy ( _buf, buf, cb );
_next = next;
}
//+---------------------------------------------------------------------------
//
// Member: CLString::operator new, public
//
// Synopsis: Allocates a string list element
//
// Arguments: [n] -- size of class instance
// [cb] -- length of string buffer needed
//
// History: 10 Apr 96 AlanW Created.
//
//----------------------------------------------------------------------------
void *
CLString::operator new ( size_t n, UINT cb )
{
#if CIDBG == 1
cb++;
#endif
return new BYTE [n+cb];
}
//+---------------------------------------------------------------------------
//
// Member: CStringList::~CStringList, public
//
// Synopsis: Free linked list
//
// History: 16-Jul-91 BartoszM Created.
//
//----------------------------------------------------------------------------
CStringList::~CStringList()
{
while ( _head != 0 )
{
CLString* p = _head;
_head = _head->Next();
delete p;
}
}
//+---------------------------------------------------------------------------
//
// Member: CStringList::Add, public
//
// Synopsis: Adds a string to list
//
// Arguments: [cb] -- length
// [str] -- string
//
// History: 16-Jul-91 BartoszM Created.
//
//----------------------------------------------------------------------------
void CStringList::Add ( UINT cb, const BYTE * str )
{
_head = new (cb) CLString ( cb, str, _head );
}
//+---------------------------------------------------------------------------
//
// Member: CStringList::Find, public
//
// Synopsis: Returns TRUE if string found in the list, FALSE otherwise
//
// Arguments: [cb] -- length
// [str] -- string
//
// History: 16-Jul-91 BartoszM Created.
//
//----------------------------------------------------------------------------
BOOL CStringList::Find ( UINT cb, const BYTE* str ) const
{
CLString* pStr = _head;
while ( pStr != 0 )
{
if ( pStr->Equal ( cb, str ) )
{
return TRUE;
}
pStr = pStr->Next();
}
return FALSE;
}
#if CIDBG == 1
void CStringList::Dump () const
{
CLString * p = _head;
while ( p )
{
p->Dump();
p = p->Next();
}
ciDebugOut (( DEB_ITRACE, "\n" ));
}
#endif // CIDBG == 1
//+---------------------------------------------------------------------------
//
// Member: CStringTable::CStringTable, public
//
// Synopsis: Create hash table of given size
//
// Arguments: [size] -- size
//
// History: 16-Jul-91 BartoszM Created.
//
//----------------------------------------------------------------------------
CStringTable::CStringTable( UINT size )
{
_size = size;
_bucket = new CStringList[size];
}
//+---------------------------------------------------------------------------
//
// Member: CStringTable::~CStringTable, public
//
// Synopsis: Free linked lists
//
// History: 16-Jul-91 BartoszM Created.
//
//----------------------------------------------------------------------------
CStringTable::~CStringTable()
{
delete [] _bucket;
}
//+---------------------------------------------------------------------------
//
// Member: CStringTable::Add, publid
//
// Synopsis: Add a string to hash table
//
// Arguments: [cb] -- size
// [str] -- string
// [hash] -- precomputed hash value
//
// History: 16-Jul-91 BartoszM Created.
//
//----------------------------------------------------------------------------
void CStringTable::Add ( UINT cb, const BYTE* str, UINT hash )
{
_bucket[_index(hash)].Add ( cb, str );
}
#if CIDBG == 1
void CStringTable::Dump () const
{
for ( unsigned i = 0; i < _size; i++ )
{
if ( !_bucket[i].IsEmpty() )
{
ciDebugOut (( DEB_ITRACE, "%3d: ", i ));
_bucket[i].Dump();
}
}
}
#endif // CIDBG == 1
//+---------------------------------------------------------------------------
//
// Member: CNoiseList::CNoiseList
//
// Synopsis: constructor for noise list
//
// Effects: gets buffers from key repository
//
// Arguments: [krep] -- key repository to give words to.
//
// History: 05-June-91 t-WadeR Created.
//
//----------------------------------------------------------------------------
CNoiseList::CNoiseList( const CStringTable& table, PKeyRepository& krep )
: _krep(krep),
_table(table),
_cNoiseWordsSkipped(0),
_cNonNoiseAltWords(0),
_fFoundNoise( FALSE )
{
krep.GetBuffers( &_pcbOutBuf, &_pbOutBuf, &_pocc );
_cbMaxOutBuf = *_pcbOutBuf;
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseList::GetBuffers
//
// Synopsis: Returns address of normilizer's input buffers
//
// Arguments: [ppcbInBuf] -- pointer to pointer to size of input buffer
// [ppbInBuf] -- pointer to pointer to recieve address of buffer
//
// History: 05-June-91 t-WadeR Created.
//
//----------------------------------------------------------------------------
void CNoiseList::GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf )
{
// Don't actually have an in buffer, so pass through the out buffer
*ppbInBuf = _pbOutBuf;
*_pcbOutBuf = _cbMaxOutBuf;
*ppcbInBuf = _pcbOutBuf;
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseList::GetFlags
//
// Synopsis: Returns address of ranking and range flags
//
// Arguments: [ppRange] -- range flag
// [ppRank] -- rank flag
//
// History: 11-Fab-92 BartoszM Created.
//
//----------------------------------------------------------------------------
void CNoiseList::GetFlags ( BOOL** ppRange, CI_RANK** ppRank )
{
_krep.GetFlags ( ppRange, ppRank );
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseList::PutWord
//
// Synopsis: If word isn't a noise word, passes it to the key repository
//
// Effects: calls _krep.PutKey
//
// Arguments: [hash] -- precomputed hash value
//
// History: 05-June-91 t-WadeR Created stub.
//
//----------------------------------------------------------------------------
void CNoiseList::PutWord ( UINT hash )
{
// Check the word to see if it should pass through.
if ( _table.Find ( *_pcbOutBuf, _pbOutBuf, hash ))
{
_fFoundNoise = TRUE;
//
// if all alternate words at current occurrence have been noise words,
// then it is equivalent to one noise word at current occcurrence,
// hence increment count of noise words skipped
//
if ( _cNonNoiseAltWords == 0 )
_cNoiseWordsSkipped++;
}
else
{
//
// output word to key repository. The count of noise words skipped refers to
// noise words at previous occurrences only
//
_krep.PutKey( _cNoiseWordsSkipped );
_cNoiseWordsSkipped = 0;
}
// reset count of non-noise words in preparation for word at next occurrence
_cNonNoiseAltWords = 0;
(*_pocc)++;
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseList::PutAltWord
//
// Synopsis: If word isn't a noise word, passes it to the key repository
//
// Effects: calls _krep.PutKey
//
// Arguments: [hash] -- precomputed hash value
//
// History: 03-May-95 SitaramR Created
//
//----------------------------------------------------------------------------
void CNoiseList::PutAltWord ( UINT hash )
{
// Check the word to see if it should pass through.
if ( _table.Find ( *_pcbOutBuf, _pbOutBuf, hash ) )
{
_fFoundNoise = TRUE;
}
else
{
//
// since this is not the last of a sequence of alternate words we increment
// count of non-noise words at current occurrence
//
_cNonNoiseAltWords++;
//
// output word to key repository. The count of noise words skipped refers to
// noise words at previous occurrences only
//
_krep.PutKey( _cNoiseWordsSkipped );
_cNoiseWordsSkipped = 0;
}
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseList::StartAltPhrase
//
// History: 29-Nov-94 SitaramR Created
//
//----------------------------------------------------------------------------
void CNoiseList::StartAltPhrase()
{
_krep.StartAltPhrase( _cNoiseWordsSkipped );
_cNoiseWordsSkipped = 0;
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseList::EndAltPhrase
//
// History: 29-Nov-94 SitaramR Created
//
//----------------------------------------------------------------------------
void CNoiseList::EndAltPhrase()
{
_krep.EndAltPhrase( _cNoiseWordsSkipped );
_cNoiseWordsSkipped = 0;
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseListInit::CNoiseListInit
//
// Synopsis: Creates a hash table to be filled
//
// Arguments: [size] -- size of the hash table (possibly prime #)
//
// History: 15-Jul-91 BartoszM Created.
//
//----------------------------------------------------------------------------
CNoiseListInit::CNoiseListInit ( UINT size )
{
_table = new CStringTable ( size );
END_CONSTRUCTION( CNoiseListInit );
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseListInit::GetBuffers
//
// Synopsis: Returns address of repository's input buffers
//
// Arguments: [ppcbInBuf] -- pointer to pointer to size of input buffer
// [ppbInBuf] -- pointer to pointer to recieve address of buffer
//
// History: 15-Jul-91 BartoszM Created.
//
//----------------------------------------------------------------------------
void CNoiseListInit::GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf )
{
_key.SetCount(MAXKEYSIZE);
*ppcbInBuf = _key.GetCountAddress();
*ppbInBuf = _key.GetWritableBuf();
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseListInit::PutWord
//
// Synopsis: Puts a key into the hash table
//
// Arguments: [hash] -- hash value
//
// History: 15-Jul-91 BartoszM Created
//
//----------------------------------------------------------------------------
void CNoiseListInit::PutWord ( UINT hash )
{
_table->Add ( _key.Count(), _key.GetBuf(), hash );
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseListInit::PutAltWord
//
// Synopsis: Puts a key into the hash table
//
// Arguments: [hash] -- hash value
//
// History: 03-May-95 SitaramR Created
//
//----------------------------------------------------------------------------
void CNoiseListInit::PutAltWord ( unsigned hash )
{
PutWord( hash );
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseListEmpty::CNoiseListEmpty
//
// Synopsis: constructor for a default empty noise list
//
// Effects: gets buffers from key repository
//
// Arguments: [krep] -- key repository to give words to.
// [ulFuzzy] -- Fuzziness of query
//
// History: 16-Jul-91 BartoszM Created.
//
//----------------------------------------------------------------------------
CNoiseListEmpty::CNoiseListEmpty( PKeyRepository& krep, ULONG ulFuzzy )
: _krep(krep),
_ulGenerateMethod(ulFuzzy),
_cNoiseWordsSkipped(0),
_cNonNoiseAltWords(0),
_fFoundNoise( FALSE )
{
krep.GetBuffers( &_pcbOutBuf, &_pbOutBuf, &_pocc );
_cbMaxOutBuf = *_pcbOutBuf;
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseListEmpty::GetBuffers
//
// Synopsis: Returns address of normilizer's input buffers
//
// Arguments: [ppcbInBuf] -- pointer to pointer to size of input buffer
// [ppbInBuf] -- pointer to pointer to recieve address of buffer
//
// History: 16-Jul-91 BartoszM Created.
//
//----------------------------------------------------------------------------
void CNoiseListEmpty::GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf )
{
// Don't actually have an in buffer, so pass through the out buffer
*ppbInBuf = _pbOutBuf;
*_pcbOutBuf = _cbMaxOutBuf;
*ppcbInBuf = _pcbOutBuf;
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseListEmpty::GetFlags
//
// Synopsis: Returns address of ranking and range flags
//
// Arguments: [ppRange] -- range flag
// [ppRank] -- rank flag
//
// History: 11-Fab-92 BartoszM Created.
//
//----------------------------------------------------------------------------
void CNoiseListEmpty::GetFlags ( BOOL** ppRange, CI_RANK** ppRank )
{
_krep.GetFlags ( ppRange, ppRank );
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseListEmpty::PutWord
//
// Synopsis: If word isn't a noise word, passes it to the key repository
//
// Effects: calls _krep.PutKey
//
// Arguments: [hash] -- hash value (ignored)
//
// History: 16-Jul-91 BartoszM Created
//
// Notes: Filters out one letter words, unless it is a prefix (*) query
//
//----------------------------------------------------------------------------
void CNoiseListEmpty::PutWord ( UINT )
{
//
// Even though the noise list is empty, we are modeling PutBreak()
// by a skip of appropriate number of noise words, and we are counting
// 1 letter words as noise words. Note that the length is in bytes and there is
// a 1 byte prefix.
//
if ( _ulGenerateMethod != GENERATE_METHOD_PREFIX && *_pcbOutBuf <= NOISE_WORD_LENGTH )
{
_fFoundNoise = TRUE;
//
// if all alternate words at current occurrence have been noise words,
// then it is equivalent to one noise word at current occcurrence,
// hence increment count of noise words skipped
//
if ( _cNonNoiseAltWords == 0 )
_cNoiseWordsSkipped++;
}
else
{
//
// output word to key repository. The count of noise words skipped refers to
// noise words at previous occurrences only
//
_krep.PutKey( _cNoiseWordsSkipped );
_cNoiseWordsSkipped = 0;
}
// reset count of non-noise words in preparation for word at next occurrence
_cNonNoiseAltWords = 0;
(*_pocc)++;
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseListEmpty::PutAltWord
//
// Synopsis: If word isn't a noise word, passes it to the key repository
//
// Effects: calls _krep.PutKey
//
// Arguments: [hash] -- precomputed hash value
//
// History: 03-May-95 SitaramR Created
//
// Notes: Filters out one letter words, unless it is a prefix (*) query
//
//----------------------------------------------------------------------------
void CNoiseListEmpty::PutAltWord ( UINT hash )
{
//
// Even though the noise list is empty, we are modeling PutBreak()
// by a skip of appropriate number of noise words, and we are counting
// 1 letter words as noise words. Note that the length is in bytes and there is
// a 1 byte prefix.
//
if ( _ulGenerateMethod == GENERATE_METHOD_PREFIX || *_pcbOutBuf > NOISE_WORD_LENGTH )
{
//
// since this is not the last of a sequence of alternate words we increment
// count of non-noise words at current occurrence
//
_cNonNoiseAltWords++;
//
// output word to key repository. The count of noise words skipped refers to
// noise words at previous occurrences only
//
_krep.PutKey( _cNoiseWordsSkipped );
_cNoiseWordsSkipped = 0;
}
else
_fFoundNoise = TRUE;
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseListEmpty::StartAltPhrase
//
// Synopsis: Pass on StartAltPhrase to key repository
//
// History: 20-Feb-95 SitaramR Created
//
//----------------------------------------------------------------------------
void CNoiseListEmpty::StartAltPhrase()
{
_krep.StartAltPhrase( _cNoiseWordsSkipped );
_cNoiseWordsSkipped = 0;
}
//+---------------------------------------------------------------------------
//
// Member: CNoiseListEmpty::EndAltPhrase
//
// Synopsis: Pass on EndAltPhrase to key repository
//
// History: 20-Feb-95 SitaramR Created
//
//----------------------------------------------------------------------------
void CNoiseListEmpty::EndAltPhrase()
{
_krep.EndAltPhrase( _cNoiseWordsSkipped );
_cNoiseWordsSkipped = 0;
}