Windows2003-3790/inetsrv/iis/svcs/cmp/asp/hashing.cpp
2020-09-30 16:53:55 +02:00

1020 lines
26 KiB
C++

/*===================================================================
Microsoft Denali
Microsoft Confidential.
Copyright 1996 Microsoft Corporation. All Rights Reserved.
Component: Link list and Hash table
File: Hashing.cpp
Owner: PramodD
This is the Link list and Hash table source file.
===================================================================*/
#include "denpre.h"
#pragma hdrstop
#include "memchk.h"
/*===================================================================
::DefaultHash
this is a hash algorithm that is highly recommended by Aho,
Seth, and Ulman from the dragon book. (THE compiler reference)
Parameters:
BYTE * pbKey
int cbKey
Returns:
Hashed DWORD value.
===================================================================*/
DWORD DefaultHash(const BYTE *pbKey, int cbKey)
{
const unsigned WORD_BITS = CHAR_BIT * sizeof(unsigned);
const unsigned SEVENTY_FIVE_PERCENT = WORD_BITS * 3 / 4;
const unsigned ONE_EIGHTH = WORD_BITS / 8;
const unsigned HIGH_BITS = ~(unsigned(~0) >> ONE_EIGHTH);
register unsigned uT, uResult = 0;
register const BYTE *pb = pbKey;
while (cbKey-- > 0)
{
uResult = (uResult << ONE_EIGHTH) + *pb++;
if ((uT = uResult & HIGH_BITS) != 0)
uResult = (uResult ^ (uT >> SEVENTY_FIVE_PERCENT)) & ~HIGH_BITS;
}
return uResult;
}
/*===================================================================
::UnicodeUpcaseHash
This is Aho, Seth, and Ulman's hash algorithm adapted for wide
character strings. Their algorithm was not designed for cases
where every other character is 0 (which is how a unicode string
looks if you pretend it's ascii) Therefore, performance
qualities are unknown for that case.
NOTE: for real Unicode, (not unicode that is merely ANSI converted)
I have no idea how good a distribution this algorithm will
produce. (since we are shifting in values > 8 bits now)
Parameters:
BYTE * pbKey
int cbKey
Returns:
Hashed DWORD value.
===================================================================*/
#define toupper(x) WORD(CharUpper(LPSTR(WORD(x))))
DWORD UnicodeUpcaseHash(const BYTE *pbKey, int cbKey)
{
// PERF hash on last CCH_HASH chars only
const unsigned WORD_BITS = CHAR_BIT * sizeof(unsigned);
const unsigned SEVENTY_FIVE_PERCENT = WORD_BITS * 3 / 4;
const unsigned ONE_EIGHTH = WORD_BITS / 8;
const unsigned HIGH_BITS = ~(unsigned(~0) >> ONE_EIGHTH);
const unsigned CCH_HASH = 8;
register unsigned uT, uResult = 0;
Assert ((cbKey & 1) == 0); // cbKey better be even!
int cwKey = unsigned(cbKey) >> 1;
register const WORD *pw = reinterpret_cast<const WORD *>(pbKey) + cwKey;
cwKey = min(cwKey, CCH_HASH);
WCHAR awcTemp[CCH_HASH];
// copy last cwKey WCHARs of pbKey to last cwKey WCHARs of awcTemp
wcsncpy(awcTemp + CCH_HASH - cwKey, pw - cwKey, cwKey);
CharUpperBuffW(awcTemp + CCH_HASH - cwKey, cwKey);
pw = awcTemp + CCH_HASH;
while (cwKey-- > 0)
{
uResult = (uResult << ONE_EIGHTH) + *--pw;
if ((uT = uResult & HIGH_BITS) != 0)
uResult = (uResult ^ (uT >> SEVENTY_FIVE_PERCENT)) & ~HIGH_BITS;
}
return uResult;
}
DWORD MultiByteUpcaseHash(const BYTE *pbKey, int cbKey)
{
// PERF hash on first CCH_HASH chars only
const unsigned WORD_BITS = CHAR_BIT * sizeof(unsigned);
const unsigned SEVENTY_FIVE_PERCENT = WORD_BITS * 3 / 4;
const unsigned ONE_EIGHTH = WORD_BITS / 8;
const unsigned HIGH_BITS = ~(unsigned(~0) >> ONE_EIGHTH);
const unsigned CCH_HASH = 8;
register unsigned uT, uResult = 0;
unsigned char achTemp[CCH_HASH + 1];
// For performance we only HASH on at most CCH_HASH characters.
cbKey = min(cbKey, CCH_HASH);
// Copy cbKey chacters into temporary buffer
memcpy(achTemp, pbKey, cbKey);
// Add terminating null character
achTemp[cbKey] = 0;
// Convert to upper case
_mbsupr(achTemp);
while (cbKey-- > 0)
{
uResult = (uResult << ONE_EIGHTH) + achTemp[cbKey];
if ((uT = uResult & HIGH_BITS) != 0)
uResult = (uResult ^ (uT >> SEVENTY_FIVE_PERCENT)) & ~HIGH_BITS;
}
return uResult;
}
/*===================================================================
::PtrHash
Hash function that returns the pointer itself as the
DWORD hash value
Parameters:
BYTE * pbKey
int cbKey (not used)
Returns:
Hashed DWORD value.
===================================================================*/
DWORD PtrHash
(
const BYTE *pbKey,
int /* cbKey */
)
{
return *(reinterpret_cast<DWORD *>(&pbKey));
}
/*===================================================================
CLSIDHash
CLSID hash. Uses xor of the first and last DWORD
Parameters:
BYTE * pbKey
int cbKey
Returns:
Hashed DWORD value.
===================================================================*/
DWORD CLSIDHash
(
const BYTE *pbKey,
int cbKey
)
{
Assert(cbKey == 16);
DWORD *pdwKey = (DWORD *)pbKey;
return (pdwKey[0] ^ pdwKey[3]);
}
/*===================================================================
CLinkElem::CLinkElem
The Constructor.
Parameters:
NONE
Returns:
NONE
===================================================================*/
CLinkElem::CLinkElem(void)
: m_pKey(NULL),
m_cbKey(0),
m_Info(0),
m_pPrev(NULL),
m_pNext(NULL)
{
}
/*===================================================================
HRESULT CLinkElem::Init
Initializes class members
Parameters:
void * pKey
int cbKey
Returns:
S_OK Success
E_FAIL Error
===================================================================*/
HRESULT CLinkElem::Init( void *pKey, int cbKey )
{
m_pPrev = NULL;
m_pNext = NULL;
m_Info = 0;
if ( pKey == NULL || cbKey == 0 )
return E_FAIL;
m_pKey = static_cast<BYTE *>(pKey);
m_cbKey = (short)cbKey;
return S_OK;
}
/*===================================================================
CHashTable::CHashTable
Constructor for CHashTable
Parameters:
NONE
Returns:
NONE
===================================================================*/
CHashTable::CHashTable( HashFunction pfnHash )
: m_fInited(FALSE),
m_fBucketsAllocated(FALSE),
m_pHead(NULL),
m_pTail(NULL),
m_rgpBuckets(NULL),
m_pfnHash(pfnHash),
m_cBuckets(0),
m_Count(0)
{
}
/*===================================================================
CHashTable::~CHashTable
Destructor for CHashTable. Frees allocated bucket array.
Parameters:
NONE
Returns:
NONE
===================================================================*/
CHashTable::~CHashTable( void )
{
if (m_fBucketsAllocated)
{
Assert(m_rgpBuckets);
delete [] m_rgpBuckets;
}
}
/*===================================================================
HRESULT CHashTable::UnInit
Frees allocated bucket array.
Parameters:
NONE
Returns:
S_OK Always
===================================================================*/
HRESULT CHashTable::UnInit( void )
{
if (m_fBucketsAllocated)
{
Assert(m_rgpBuckets);
delete [] m_rgpBuckets;
m_fBucketsAllocated = FALSE;
}
m_rgpBuckets = NULL;
m_pHead = NULL;
m_pTail = NULL;
m_cBuckets = 0;
m_Count = 0;
m_fInited = FALSE;
return S_OK;
}
/*===================================================================
void CHashTable::AssertValid
Verify integrity of the data structure.
NOTE: This function does very deep integrity checks and thus is
very slow.
Checks performed:
verify that m_Count is valid
verify that each element is in the right bucket
verify prev, next links and info fields
===================================================================*/
#ifdef DBG
void CHashTable::AssertValid() const
{
CLinkElem *pElem; // pointer to current link element
unsigned i; // index into current bucket
unsigned cItems = 0; // actual number of items in the table
Assert(m_fInited);
if (m_Count == 0)
{
if (m_rgpBuckets)
{
BOOL fAllNulls = TRUE;
// empty hash table - make sure that everything reflects this
Assert(m_pHead == NULL);
Assert (m_pTail == NULL);
for (i = 0; i < m_cBuckets; i++)
{
if (m_rgpBuckets[i] != NULL)
{
fAllNulls = FALSE;
break;
}
}
Assert(fAllNulls);
}
return;
}
// If m_Count > 0
Assert(m_pHead);
Assert(m_pHead->m_pPrev == NULL);
Assert(m_pTail != NULL && m_pTail->m_pNext == NULL);
Assert(m_rgpBuckets);
// Now verify each entry
for (i = 0; i < m_cBuckets; ++i)
{
pElem = m_rgpBuckets[i];
while (pElem != NULL)
{
// Verify hashing
Assert ((m_pfnHash(pElem->m_pKey, pElem->m_cbKey) % m_cBuckets) == i);
// Verify links
if (pElem->m_pPrev)
{
Assert (pElem->m_pPrev->m_pNext == pElem);
}
else
{
Assert (m_pHead == pElem);
}
if (pElem->m_pNext)
{
Assert (pElem->m_pNext->m_pPrev == pElem);
}
else
{
Assert (m_pTail == pElem);
}
// Verify info fields
Assert (pElem->m_Info >= 0);
if (pElem != m_rgpBuckets[i])
{
Assert (pElem->m_Info == pElem->m_pPrev->m_Info - 1);
}
// Prepare for next iteration, stopping when m_Info is zero.
++cItems;
if (pElem->m_Info == 0)
break;
pElem = pElem->m_pNext;
}
}
// Verify count
Assert (m_Count == cItems);
}
#endif
/*===================================================================
HRESULT CHashTable::Init
Initialize CHashTable by allocating the bucket array and
and initializing the bucket link lists.
Parameters:
UINT cBuckets Number of buckets
Returns:
HRESULT S_OK
E_OUTOFMEMORY
===================================================================*/
HRESULT CHashTable::Init( UINT cBuckets )
{
m_cBuckets = cBuckets;
m_Count = 0;
m_rgpBuckets = NULL; // created on demand
m_fInited = TRUE;
return S_OK;
}
/*===================================================================
HRESULT CHashTable::ReInit
Reinitialize CHashTable by deleting everything in it. - client
is responsible for making the hashtable empty first
Parameters:
None
Returns:
None
===================================================================*/
void CHashTable::ReInit()
{
Assert( m_fInited );
if (m_rgpBuckets)
memset(m_rgpBuckets, 0, m_cBuckets * sizeof(CLinkElem *));
m_Count = 0;
m_pHead = NULL;
m_pTail = NULL;
}
/*===================================================================
HRESULT CHashTable::AllocateBuckets()
Allocates hash table buckets on demand
Parameters:
Returns:
HRESULT
===================================================================*/
HRESULT CHashTable::AllocateBuckets()
{
Assert(m_rgpBuckets == NULL);
Assert(m_fInited);
Assert(m_cBuckets > 0);
if (m_cBuckets <= PREALLOCATED_BUCKETS_MAX)
{
m_rgpBuckets = m_rgpBucketsBuffer;
}
else
{
m_rgpBuckets = new CLinkElem * [m_cBuckets];
if (m_rgpBuckets == NULL)
return E_OUTOFMEMORY;
m_fBucketsAllocated = TRUE;
}
memset(m_rgpBuckets, 0, m_cBuckets * sizeof(CLinkElem *));
return S_OK;
}
/*===================================================================
BOOL CHashTable::FIsEqual
compare two keys using their lengths and memcmp()
Parameters:
const void *pKey1 first key
int cbKey1 length of the first key
const void *pKey2 second key
int cbKey2 length of second key
Returns:
Pointer to element added/found.
===================================================================*/
BOOL CHashTable::FIsEqual( const void * pKey1,
int cbKey1,
const void * pKey2,
int cbKey2 )
{
if (cbKey1 != cbKey2)
return FALSE;
return memcmp(pKey1, pKey2, cbKey1) == 0;
}
#pragma optimize("g", off)
/*===================================================================
CHashTable::AddElem
Adds a CLinkElem to Hash table.
User is responsible for allocating the Element to be added.
Parameters:
CLinkElem * pElem Object to be added
BOOL fTestDups Look for duplicates if true
Returns:
Pointer to element added/found.
===================================================================*/
CLinkElem *CHashTable::AddElem( CLinkElem *pElem, BOOL fTestDups )
{
if (m_rgpBuckets == NULL)
{
if (FAILED(AllocateBuckets()))
return NULL;
}
if (pElem == NULL)
return NULL;
BOOL fNew = TRUE;
DWORD iT = m_pfnHash( pElem->m_pKey, pElem->m_cbKey ) % m_cBuckets;
CLinkElem * pT = m_rgpBuckets[iT];
BOOL fDebugTestDups = FALSE;
#ifdef DBG
// In retail, if fTestDups is false, it means that
// there shouldnt be any dups, so dont bother testing. Under debug, however
// we want to be able to assert that there isnt a dup (since there isnt supposed to be one).
fDebugTestDups = !fTestDups;
#endif
if (fTestDups || fDebugTestDups)
{
while ( pT && fNew )
{
if ( FIsEqual( pT->m_pKey, pT->m_cbKey, pElem->m_pKey, pElem->m_cbKey ) )
fNew = FALSE;
else if ( pT->m_Info > 0 )
pT = pT->m_pNext;
else
break;
}
}
#ifdef DBG
// If there arent supposed to be any dups, then make sure this element is seen as "new"
if (fDebugTestDups)
Assert(fNew);
#endif
#ifdef DUMP_HASHING_INFO
static DWORD cAdds = 0;
FILE *logfile = NULL;
if (cAdds++ > 1000000 && m_Count > 100)
{
cAdds = 0;
if (logfile = fopen("C:\\Temp\\hashdump.Log", "a+"))
{
DWORD cZero = 0;
short iMax = 0;
DWORD cGte3 = 0;
DWORD cGte5 = 0;
DWORD cGte10 = 0;
fprintf(logfile, "Hash dump: # elements = %d\n", m_Count);
for (UINT iBucket = 0; iBucket < m_cBuckets; iBucket++)
{
if (m_rgpBuckets[iBucket] == NULL)
cZero++;
else
{
short Info = m_rgpBuckets[iBucket]->m_Info;
if (Info > iMax)
iMax = Info;
if (Info >= 10) cGte10++;
else if (Info >= 5) cGte5++;
else if (Info >= 3) cGte3++;
}
}
fprintf(logfile, "Max chain = %d, # 0 chains = %d, # >= 3 = %d, # >= 5 = %d, # >= 10 = %d\n",
(DWORD)iMax, cZero, cGte3, cGte5, cGte10);
fflush(logfile);
fclose(logfile);
}
}
#endif
if ( fNew )
{
if ( pT )
{
// There are other elements in bucket
pT = m_rgpBuckets[iT];
m_rgpBuckets[iT] = pElem;
pElem->m_Info = pT->m_Info + 1;
pElem->m_pNext = pT;
pElem->m_pPrev = pT->m_pPrev;
pT->m_pPrev = pElem;
if ( pElem->m_pPrev == NULL )
m_pHead = pElem;
else
pElem->m_pPrev->m_pNext = pElem;
}
else
{
// This is the first element in the bucket
m_rgpBuckets[iT] = pElem;
pElem->m_pPrev = NULL;
pElem->m_pNext = m_pHead;
pElem->m_Info = 0;
if ( m_pHead )
m_pHead->m_pPrev = pElem;
else
m_pTail = pElem;
m_pHead = pElem;
}
m_Count++;
AssertValid();
return pElem;
}
AssertValid();
return pT;
}
#pragma optimize("g", on)
#pragma optimize("g", off)
/*===================================================================
CLinkElem * CHashTable::FindElem
Finds an object in the hash table based on the name.
Parameters:
void * pKey
int cbKey
Returns:
Pointer to CLinkElem if found, otherwise NULL.
===================================================================*/
CLinkElem * CHashTable::FindElem( const void *pKey, int cbKey )
{
AssertValid();
if ( m_rgpBuckets == NULL || pKey == NULL )
return NULL;
DWORD iT = m_pfnHash( static_cast<const BYTE *>(pKey), cbKey ) % m_cBuckets;
CLinkElem * pT = m_rgpBuckets[iT];
CLinkElem * pRet = NULL;
while ( pT && pRet == NULL )
{
if ( FIsEqual( pT->m_pKey, pT->m_cbKey, pKey, cbKey ) )
pRet = pT;
else if ( pT->m_Info > 0 )
pT = pT->m_pNext;
else
break;
}
return pRet;
}
#pragma optimize("g", on)
#pragma optimize("g", off)
/*===================================================================
CHashTable::DeleteElem
Removes a CLinkElem from Hash table.
The user should delete the freed link list element.
Parameters:
void * pbKey key
int cbKey length of key
Returns:
Pointer to element removed, NULL if not found
===================================================================*/
CLinkElem * CHashTable::DeleteElem( const void *pKey, int cbKey )
{
if ( m_rgpBuckets == NULL || pKey == NULL )
return NULL;
CLinkElem * pRet = NULL;
DWORD iT = m_pfnHash( static_cast<const BYTE *>(pKey), cbKey ) % m_cBuckets;
CLinkElem * pT = m_rgpBuckets[iT];
while ( pT && pRet == NULL )// Find it !
{
if ( FIsEqual( pT->m_pKey, pT->m_cbKey, pKey, cbKey ) )
pRet = pT;
else if ( pT->m_Info > 0 )
pT = pT->m_pNext;
else
break;
}
if ( pRet )
{
pT = m_rgpBuckets[iT];
if ( pRet == pT )
{
// Update bucket head
if ( pRet->m_Info > 0 )
m_rgpBuckets[iT] = pRet->m_pNext;
else
m_rgpBuckets[iT] = NULL;
}
// Update counts in bucket link list
while ( pT != pRet )
{
pT->m_Info--;
pT = pT->m_pNext;
}
// Update link list
if ( pT = pRet->m_pPrev )
{
// Not the Head of the link list
if ( pT->m_pNext = pRet->m_pNext )
pT->m_pNext->m_pPrev = pT;
else
m_pTail = pT;
}
else
{
// Head of the link list
if ( m_pHead = pRet->m_pNext )
m_pHead->m_pPrev = NULL;
else
m_pTail = NULL;
}
m_Count--;
}
AssertValid();
return pRet;
}
#pragma optimize("g", on)
/*===================================================================
CHashTable::RemoveElem
Removes a given CLinkElem from Hash table.
The user should delete the freed link list element.
Parameters:
CLinkElem * pLE Element to remove
Returns:
Pointer to element removed
===================================================================*/
CLinkElem * CHashTable::RemoveElem( CLinkElem *pLE )
{
CLinkElem *pLET;
if ( m_rgpBuckets == NULL || pLE == NULL )
return NULL;
// Remove this item from the linked list
pLET = pLE->m_pPrev;
if (pLET)
pLET->m_pNext = pLE->m_pNext;
pLET = pLE->m_pNext;
if (pLET)
pLET->m_pPrev = pLE->m_pPrev;
if (m_pHead == pLE)
m_pHead = pLE->m_pNext;
if (m_pTail == pLE)
m_pTail = pLE->m_pPrev;
/*
* If this was the first item in a bucket, then fix up the bucket.
* Otherwise, decrement the count of items in the bucket for each item
* in the bucket prior to this item
*/
if (pLE->m_pPrev == NULL || pLE->m_pPrev->m_Info == 0)
{
UINT iBucket;
// This item is head of a bucket. Need to find out which bucket!
for (iBucket = 0; iBucket < m_cBuckets; iBucket++)
if (m_rgpBuckets[iBucket] == pLE)
break;
Assert(iBucket < m_cBuckets && m_rgpBuckets[iBucket] == pLE);
if (pLE->m_Info == 0)
m_rgpBuckets[iBucket] = NULL;
else
m_rgpBuckets[iBucket] = pLE->m_pNext;
}
else
{
// This item is in the middle of a bucket chain. Update counts in preceeding items
pLET = pLE->m_pPrev;
while (pLET != NULL && pLET->m_Info != 0)
{
pLET->m_Info--;
pLET = pLET->m_pPrev;
}
}
// Decrement count of total number of items
m_Count--;
AssertValid();
return pLE;
}
/*===================================================================
CHashTableStr::CHashTableStr
Constructor for CHashTableStr
Parameters:
NONE
Returns:
NONE
===================================================================*/
CHashTableStr::CHashTableStr( HashFunction pfnHash )
: CHashTable( pfnHash )
{
}
/*===================================================================
BOOL CHashTableStr::FIsEqual
compare two keys using their lengths, treating the keys
as Unicode and doing case insensitive compare.
Parameters:
const void *pKey1 first key
int cbKey1 length of the first key
const void *pKey2 second key
int cbKey2 length of second key
Returns:
Pointer to element added/found.
===================================================================*/
BOOL CHashTableStr::FIsEqual( const void * pKey1,
int cbKey1,
const void * pKey2,
int cbKey2 )
{
if ( cbKey1 != cbKey2 )
return FALSE;
return _wcsnicmp(static_cast<const wchar_t *>(pKey1), static_cast<const wchar_t *>(pKey2), cbKey1) == 0;
}
/*===================================================================
CHashTableMBStr::CHashTableMBStr
Constructor for CHashTableMBStr
Parameters:
NONE
Returns:
NONE
===================================================================*/
CHashTableMBStr::CHashTableMBStr( HashFunction pfnHash )
: CHashTable( pfnHash )
{
}
/*===================================================================
BOOL CHashTableMBStr::FIsEqual
compare two keys using their lengths, treating the keys
as multi-byte strings and doing case insensitive compare.
Parameters:
const void *pKey1 first key
int cbKey1 length of the first key
const void *pKey2 second key
int cbKey2 length of second key
Returns:
Pointer to element added/found.
===================================================================*/
BOOL CHashTableMBStr::FIsEqual( const void * pKey1,
int cbKey1,
const void * pKey2,
int cbKey2 )
{
if ( cbKey1 != cbKey2 )
return FALSE;
return _mbsnicmp(static_cast<const unsigned char *>(pKey1), static_cast<const unsigned char *>(pKey2), cbKey1) == 0;
}
/*===================================================================
CHashTablePtr::CHashTablePtr
Constructor for CHashTableStr
Parameters:
HashFunction pfnHash has function (PtrHash is default)
Returns:
NONE
===================================================================*/
CHashTablePtr::CHashTablePtr
(
HashFunction pfnHash
)
: CHashTable(pfnHash)
{
}
/*===================================================================
BOOL CHashTablePtr::FIsEqual
Compare two pointers.
Used by CHashTable to find elements
Parameters:
const void *pKey1 first key
int cbKey1 length of the first key (unused)
const void *pKey2 second key
int cbKey2 length of second key (unused)
Returns:
BOOL (true when equal)
===================================================================*/
BOOL CHashTablePtr::FIsEqual
(
const void *pKey1,
int /* cbKey1 */,
const void *pKey2,
int /* cbKey2 */
)
{
return (pKey1 == pKey2);
}
/*===================================================================
CHashTableCLSID::CHashTableCLSID
Constructor for CHashTableCLSID
Parameters:
HashFunction pfnHash has function (CLSIDHash is default)
Returns:
NONE
===================================================================*/
CHashTableCLSID::CHashTableCLSID
(
HashFunction pfnHash
)
: CHashTable(pfnHash)
{
}
/*===================================================================
BOOL CHashTableCLSID::FIsEqual
Compare two CLSIDs.
Parameters:
const void *pKey1 first key
int cbKey1 length of the first key
const void *pKey2 second key
int cbKey2 length of second key
Returns:
BOOL (true when equal)
===================================================================*/
BOOL CHashTableCLSID::FIsEqual
(
const void *pKey1,
int cbKey1,
const void *pKey2,
int cbKey2
)
{
Assert(cbKey1 == sizeof(CLSID) && cbKey2 == sizeof(CLSID));
return IsEqualCLSID(*((CLSID *)pKey1), *((CLSID *)pKey2));
}