Windows2003-3790/enduser/speech/common/include/sphash.h
2020-09-30 16:53:55 +02:00

500 lines
18 KiB
C++

/****************************************************************************
* SPHash.h
* This is modified from sr/include/hash_n.h to minimize dependencies on
* application specific headers.
*
* Owner: bohsu
* Copyright ©2000 Microsoft Corporation All Rights Reserved.
*****************************************************************************/
#pragma once
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
#endif
//--- Includes --------------------------------------------------------------
#include <windows.h>
#include <math.h>
#include <crtdbg.h>
#ifdef _DEBUG
#include <stdio.h>
#endif _DEBUG
//--- Forward and External Declarations -------------------------------------
//--- TypeDef and Enumeration Declarations ----------------------------------
//--- Constants -------------------------------------------------------------
//--- Class, Struct and Union Definitions -----------------------------------
/***********************************************************************
* CSPHash Class
* This is a templated hash table class. Note that the base CSPHash class
* does not allocate or free the Keys and Values. To define a hash class
* that manages its Keys and Values, derive a subclass an overload Add()
* and ...
*****************************************************************bohsu*/
template<class KEY, class VALUE>
class CSPHash
{
public:
// Constructor
CSPHash(
VALUE ValueNIL = NULL, // Value representing NIL
UINT32 uInitialSize = 0); // Initial hash table size
// Destructor
virtual ~CSPHash();
// Returns number of (Key, Value) entries used in the hash table.
inline UINT32 GetNumEntries(void) const { return m_uNumEntriesUsed; }
// Returns the next entry starting at the given index. Set puIndex = 0 for the first entry.
VALUE GetNextEntry(
UINT32 *puIndex, // Index to start looking for the next entry
KEY *pKey = NULL) const; // [out] Key of the next entry found
// Resets the content hash table.
virtual void Reset(void);
// Adds a (Key, Value) entry to the hash table.
HRESULT Add(
KEY Key, // Key to add
VALUE Val); // Value associated with the Key
// Lookup a Value based on the Key. If not found, ValueNIL is returned.
VALUE Lookup(
KEY Key) const; // Key to lookup
#ifdef _DEBUG
// Dumps the hash table statistics to file handle.
void DumpStat(
FILE *hFile = NULL, // Output file handle. NULL -> DebugWindow
const char *strHeader = NULL) const; // Trace header
#endif _DEBUG
protected:
// Data structure containing (Key, Value) pair
struct ENTRY
{
KEY Key;
VALUE Value;
};
// Find the index corresponding to the given Key.
int FindIndex(
KEY Key) const; // Key to search for
static UINT32 NextPrime(UINT32 Val);
protected:
//---------------------------------------------------------------
//--- The following functions can be overloaded by subclasses ---
//---------------------------------------------------------------
// If Destroy*() is overloaded, you MUST overload the destructor with:
// virtual ~CSPDerivedHash() { Reset(); }
// Calling Reset() in the base class destructor has no effect because
// the derived subclass will have been destroyed already by the time it
// gets to the base class destructor. Thus, the correct DestroyKey() and
// DestroyValue() will never be called.
// Hash function mapping the Key to a UINT32 index.
virtual UINT32 HashKey(KEY Key) const { return (UINT32)Key; }
// Compare if two Keys are equal.
virtual bool AreKeysEqual(KEY Key1, KEY Key2) const { return Key1 == Key2; }
// Hash function used to determine the skip count.
virtual UINT32 HashKey2(KEY Key) const { return 1; }
// Overload if a deep copy of the Key needs to be made in Add().
virtual KEY CopyKey(KEY Key) const { return Key; }
// Overload if a deep copy of the Key needs to be made in Add().
virtual VALUE CopyValue(VALUE Value) const { return Value; }
// Overload if the Key needs to be destroyed.
virtual void DestroyKey(KEY Key) const { }
// Overload if the Value needs to be destroyed.
virtual void DestroyValue(VALUE Value) const { }
//------------------------
//--- Member Variables ---
//------------------------
protected:
ENTRY *m_aTable; // Hash table containing (Key, Value) pairs
VALUE m_ValueNIL; // Value representing NIL
UINT32 m_uNumEntries; // Current size of hash table
UINT32 m_uNumEntriesInit; // Initial size of hash table
UINT32 m_uNumEntriesUsed; // Current number of entries used in hash table
#ifdef _DEBUG
UINT32 m_uAccess; // Number of times a Key is looked up
UINT32 m_uSearch; // Number of times a entry in the table is searched
UINT32 m_uRegrow; // Number of times the hash table regrew
#endif _DEBUG
};
/***********************************************************************
* CSPStringHashW Class
* CSPStringHashW is a hash of UNICODE strings to VALUEs. The UNICODE string
* is treated as a constant. It is neither copied during Add() nor deleted
* during destructor. Likewise, VALUE is treated as a simple data type and
* is neither copied nor destroyed. If the application wants the class to
* manage its own copy of the string key or VALUE, derive a subclass and
* overload Copy*() and/or Destroy().
*****************************************************************bohsu*/
template<class VALUE> class CSPStringHashW : public CSPHash<const WCHAR *, VALUE>
{
protected:
UINT32 StringHashW(const WCHAR *wcsKey, UINT32 uPrime) const
{
UINT32 uHashIndex = 0;
for(const WCHAR *pwch = wcsKey; *pwch != NULL; pwch++)
uHashIndex = uHashIndex * uPrime + *pwch;
return uHashIndex;
}
//--- Overloaded functions ---
protected:
virtual UINT32 HashKey(const WCHAR* wcsKey) const { return StringHashW(wcsKey, 65599); }
virtual UINT32 HashKey2(const WCHAR* wcsKey) const { return StringHashW(wcsKey, 257); }
virtual bool AreKeysEqual(const WCHAR* wcsKey1, const WCHAR* wcsKey2) const
{
return wcscmp(wcsKey1, wcsKey2) == 0;
}
};
/***********************************************************************
* CSPGUIDHash Class
* CSPGUIDHash is a hash of GUIDs to VALUEs. The GUID pointer is treated
* as a constant. It is neither copied during Add() nor deleted
* during destructor. Likewise, VALUE is treated as a simple data type and
* is neither copied nor destroyed. If the application wants the class to
* manage its own copy of the GUID key or VALUE, derive a subclass and
* overload Copy*() and/or Destroy().
*****************************************************************bohsu*/
template<class VALUE> class CSPGUIDHash : public CSPHash<const GUID *, VALUE>
{
//--- Overloaded functions ---
protected:
virtual UINT32 HashKey(const GUID *pguidKey) const { return pguidKey->Data1; }
virtual UINT32 HashKey2(const GUID *pguidKey) const { return pguidKey->Data2; }
virtual bool AreKeysEqual(const GUID *pguidKey1, const GUID *pguidKey2) const
{
// It is annoying that operator== for GUIDs return int (BOOL) instead of bool.
return (*pguidKey1 == *pguidKey2) != 0;
}
};
//--- Function Declarations -------------------------------------------------
//--- Inline Function Definitions -------------------------------------------
/**********************************************************************
* CSPHash::CSPHash *
*------------------*
* Description:
* Constructor.
****************************************************************bohsu*/
template<class KEY, class VALUE>
CSPHash<KEY, VALUE>::CSPHash(
VALUE ValueNIL, // Value representing NIL
UINT32 uInitialSize) // Initial hash table size
{
m_ValueNIL = ValueNIL;
m_aTable = 0;
m_uNumEntries = 0;
m_uNumEntriesInit = uInitialSize; // Estimated final number of entries to be stored.
m_uNumEntriesUsed = 0;
#ifdef _DEBUG
m_uAccess = 0;
m_uSearch = 0;
m_uRegrow = 0;
#endif _DEBUG
}
/**********************************************************************
* CSPHash::~CSPHash *
*-------------------*
* Description:
* Destructor. This does not free KEY and VALUE.
* If Destroy*() is overloaded, call Reset() in the subclass destructor.
****************************************************************bohsu*/
template<class KEY, class VALUE>
CSPHash<KEY, VALUE>::~CSPHash()
{
delete [] m_aTable;
}
/**********************************************************************
* CSPHash::GetNextEntry *
*-----------------------*
* Description:
* Returns the next entry starting at the given index. Set puIndex = 0 for the first entry.
****************************************************************bohsu*/
template<class KEY, class VALUE>
VALUE CSPHash<KEY, VALUE>::GetNextEntry(
UINT32 *puIndex, // Index to start looking for the next entry
KEY *pKey) const // [out] Key of the next entry found
{
while (*puIndex < m_uNumEntries)
{
if (m_aTable[*puIndex].Value != m_ValueNIL)
{
if(pKey) *pKey = m_aTable[*puIndex].Key;
return m_aTable[(*puIndex)++].Value;
}
++*puIndex;
}
return m_ValueNIL;
}
/**********************************************************************
* CSPHash::Reset *
*----------------*
* Description:
* Resets the content hash table.
****************************************************************bohsu*/
template<class KEY, class VALUE>
void CSPHash<KEY, VALUE>::Reset()
{
for (UINT32 i=0; i < m_uNumEntries; i++)
{
if(m_aTable[i].Value != m_ValueNIL)
{
DestroyKey(m_aTable[i].Key);
DestroyValue(m_aTable[i].Value);
m_aTable[i].Value = m_ValueNIL;
}
}
m_uNumEntriesUsed = 0;
#ifdef _DEBUG
m_uAccess = m_uSearch = m_uRegrow = 0;
#endif _DEBUG
}
/**********************************************************************
* CSPHash::Add *
*--------------*
* Description:
* Adds a (Key, Value) entry to the hash table.
****************************************************************bohsu*/
template<class KEY, class VALUE>
HRESULT CSPHash<KEY, VALUE>::Add(
KEY Key, // Key to add
VALUE Val) // Value associated with the Key
{
int ientry;
// Implementation uses Val==m_ValueNIL to detect empty entries.
_ASSERTE(Val != m_ValueNIL);
// Grow if allowed and we're more than half full.
// (Also handles initial alloc)
if (m_uNumEntriesUsed * 2 >= m_uNumEntries)
{
/* half-full, too crowded ==> regrow */
ENTRY * oldtable = m_aTable;
UINT32 oldentry = m_uNumEntries;
UINT32 prime = NextPrime(max(m_uNumEntriesUsed * 3 + 17, m_uNumEntriesInit));
#ifdef _DEBUG
m_uRegrow++;
#endif _DEBUG
// Alloc new table.
m_aTable = new ENTRY[prime];
if (m_aTable == NULL)
{
m_aTable = oldtable;
return E_OUTOFMEMORY;
}
for (UINT32 i=0; i < prime; i++)
{
m_aTable[i].Value = m_ValueNIL;
}
m_uNumEntries = prime;
for (i = 0; i < oldentry; i++)
{
if (oldtable[i].Value != m_ValueNIL)
{
ientry = FindIndex(oldtable[i].Key);
_ASSERTE(ientry >= 0 && m_aTable[ientry].Value == m_ValueNIL);
m_aTable[ientry] = oldtable[i];
}
}
delete [] oldtable;
}
// Find out where this element should end up.
ientry = FindIndex(Key);
if (ientry < 0)
return E_FAIL; // Too full
if (m_aTable[ientry].Value == m_ValueNIL)
{
// Not already there. Add it.
m_aTable[ientry].Key = CopyKey(Key);
m_aTable[ientry].Value = CopyValue(Val);
m_uNumEntriesUsed++;
}
else
{
return S_FALSE; // It was already there.
}
return S_OK;
}
/**********************************************************************
* CSPHash::Lookup *
*-----------------*
* Description:
* Lookup a Value based on the Key. If not found, ValueNIL is returned.
****************************************************************bohsu*/
template<class KEY, class VALUE>
VALUE CSPHash<KEY, VALUE>::Lookup(
KEY Key) const // Key to lookup
{
int ientry = FindIndex(Key);
if (ientry < 0)
return m_ValueNIL;
return m_aTable[ientry].Value;
}
#ifdef _DEBUG
/**********************************************************************
* CSPHash::DumpStat *
*-------------------*
* Description:
* Dumps the hash table statistics to file handle.
****************************************************************bohsu*/
template<class KEY, class VALUE>
void CSPHash<KEY, VALUE>::DumpStat(
FILE *hFile, // Output file handle.
const char *strHeader) const // Trace header
{
if(hFile == NULL)
{
char buf[100];
sprintf(buf, "(%s) hash statistics:\n", strHeader ? strHeader : "");
OutputDebugString(buf);
sprintf(buf, "load=%d/%d = %.3g, regrow = %d\n", m_uNumEntriesUsed, m_uNumEntries,
(m_uNumEntries == 0) ? 0 : (float)m_uNumEntriesUsed/(float)m_uNumEntries, m_uRegrow);
OutputDebugString(buf);
sprintf(buf, "access %d/%d = %g\n\n", m_uSearch, m_uAccess,
(m_uAccess == 0) ? 0 :
(float) m_uSearch / (float) m_uAccess);
OutputDebugString(buf);
}
else
{
fprintf(hFile, "(%s) hash statistics:\n", strHeader ? strHeader : "");
fprintf(hFile, "load=%d/%d = %.3g, regrow = %d\n", m_uNumEntriesUsed, m_uNumEntries,
(m_uNumEntries == 0) ? 0 : (float)m_uNumEntriesUsed/(float)m_uNumEntries, m_uRegrow);
fprintf(hFile, "access %d/%d = %g\n\n", m_uSearch, m_uAccess,
(m_uAccess == 0) ? 0 :
(float) m_uSearch / (float) m_uAccess);
}
}
#endif _DEBUG
/**********************************************************************
* CSPHash::FindIndex *
*--------------------*
* Description:
* Find the index corresponding to the given Key.
****************************************************************bohsu*/
template<class KEY, class VALUE>
int CSPHash<KEY, VALUE>::FindIndex(
KEY Key) const
{
#ifdef _DEBUG
// Hack: Violate const declaration for statistics member variables
const_cast<CSPHash *>(this)->m_uAccess++;
#endif _DEBUG
if (m_uNumEntries == 0)
return -1;
UINT32 start = HashKey(Key) % m_uNumEntries;
UINT32 index = start;
UINT32 skip = 0;
do
{
#ifdef _DEBUG
// Hack: Violate const declaration for statistics member variables
const_cast<CSPHash *>(this)->m_uSearch++;
#endif _DEBUG
// Not in table; return index where it should be placed.
if (m_aTable[index].Value == m_ValueNIL)
return index;
if (AreKeysEqual(m_aTable[index].Key, Key))
return index;
if (skip == 0)
{
skip = HashKey2(Key);
// Limit skip amount to non-zero and less than hash table size.
// Since m_uNumEntries is prime, they are relatively prime and so we're guaranteed
// to visit every bucket.
if (m_uNumEntries > 1)
skip = skip % (m_uNumEntries - 1) + 1;
}
index += skip;
if (index >= m_uNumEntries)
index -= m_uNumEntries;
} while (index != start);
_ASSERTE(m_uNumEntriesUsed == m_uNumEntries);
return -1; /* all full and not found */
}
/**********************************************************************
* CSPHash::NextPrime *
*--------------------*
* Description:
* Return a prime number greater than or equal to Val.
* If overflow occurs, return 0.
*
* To Do: This function can be optimized significantly.
****************************************************************bohsu*/
template<class KEY, class VALUE>
UINT32 CSPHash<KEY, VALUE>::NextPrime(UINT32 Val)
{
UINT32 maxFactor;
UINT32 i;
if (Val < 2) return 2; // the smallest prime number
while(Val < 0xFFFFFFFF)
{
maxFactor = (UINT32) sqrt ((double) Val); // Is Val a prime number?
for (i = 2; i <= maxFactor; i++) // Is i a factor of Val?
if (Val % i == 0) break;
if (i > maxFactor) return (Val);
Val++;
};
return 0;
}