2020-09-30 17:12:29 +02:00

228 lines
5.4 KiB
C++

// Classify.cpp -- Implementation of the glyph stream classifier.
#include "stdafx.h"
#include <Search.h>
#include "Classify.h"
CClassifier::CClassifier()
{
Initial();
}
CClassifier::CClassifier(PWCHAR pbData, UINT cbData) //rmk
{
Initial();
ScanAndRankData(pbData, cbData);
}
void CClassifier::Initial()
{
ASSERT(sizeof(UINT) == 4);
ASSERT(sizeof(BOOL) == 4);
memset(m_cGlyphRefs, 0, sizeof(m_cGlyphRefs));
m_cPartitions= 1;
m_aiGlyphPartitions [0] = 0;
m_aiGlyphPartitions [1] = TOTAL_GLYPHS; //rmk
m_cGlyphPartitionRefs[0] = 0;
UINT i;
for (i= TOTAL_GLYPHS; i--; ) //rmk
{
m_iGlyphRefsDescending[i]= i;
m_afClassifications [i]= UNUSED_GLYPH;
}
}
int __cdecl CompareUINTsDown(const void *pvL, const void *pvR)
{
if ((**(PUINT *) pvL) > (**(PUINT *) pvR)) return -1;
if ((**(PUINT *) pvL) < (**(PUINT *) pvR)) return 1;
return 0;
}
void CClassifier::ScanData(PWCHAR pwData, UINT cwData)
{
for (; cwData--; )
{
WCHAR wChar= 0x00FF & *pwData++;
m_cGlyphRefs[wChar]++;
}
}
BOOL CClassifier::ScanAndRankData(PWCHAR pwData, UINT cwData)
{
ScanData(pwData, cwData);
return RankGlyphSets();
}
BOOL CClassifier::RankGlyphSets() //rmk
{
ASSERT(sizeof(PUINT *) == sizeof(UINT));
PUINT *ppui= (PUINT *) &m_iGlyphRefsDescending;
UINT ui;
for (ui= TOTAL_GLYPHS; ui--; ) *ppui++= &m_cGlyphRefs[ui]; //rmk
ppui= (PUINT *) &m_iGlyphRefsDescending;
qsort(ppui, TOTAL_GLYPHS, sizeof(PUINT), CompareUINTsDown); //rmk
PUINT pui= (PUINT) ppui;
for (ui= TOTAL_GLYPHS; ui--; ) *pui++ = (*ppui++) - &m_cGlyphRefs[0]; //rmk
for (m_aiGlyphPartitions[0]= 0, ui= 1; ui < 32; )
{
m_aiGlyphPartitions[ui]= ui;
if (!m_cGlyphRefs[m_iGlyphRefsDescending[ui++]]) break;
}
m_aiGlyphPartitions[ui]= TOTAL_GLYPHS; //rmk
// Here we adjust the partitions to insure that any unused glyphs go
// into a separate partition.
if (ui == 32 && !m_cGlyphRefs[m_iGlyphRefsDescending[TOTAL_GLYPHS-1]]) //rmk
{
// If we have fewer than 32 partition classes, this is already true.
// Also, no adjustment is necessary if we have no unused glyphs.
UINT iGlyphClass;
for (iGlyphClass= TOTAL_GLYPHS; iGlyphClass--; ) //rmk
if (m_cGlyphRefs[m_iGlyphRefsDescending[iGlyphClass]]) break;
m_aiGlyphPartitions[31]= iGlyphClass+1;
}
BOOL fPartitionsChanged= ui != m_cPartitions;
m_cPartitions= ui;
for (ui= m_cPartitions; ui--; )
{
UINT cRefs, iGlyphClass, iGlyphClassLimit;
for (cRefs= 0, iGlyphClass= m_aiGlyphPartitions[ui], iGlyphClassLimit= m_aiGlyphPartitions[ui+1];
iGlyphClass < iGlyphClassLimit;
iGlyphClass++
) cRefs += m_cGlyphRefs[m_iGlyphRefsDescending[iGlyphClass]];
m_cGlyphPartitionRefs[ui]= cRefs;
}
#if 0 // This kind of partition adjustment gives worse search
// performance in the important singleton case!
if (m_cPartitions == 32)
{
BOOL fChanges;
UINT cRefs;
do
for (fChanges= FALSE, ui= 32; --ui;)
while ( m_cGlyphPartitionRefs[ui ] > m_cGlyphPartitionRefs[ui-1]
&& (m_aiGlyphPartitions [ui+1] - m_aiGlyphPartitions [ui ]) > 1
)
{
cRefs= m_cGlyphRefs[m_iGlyphRefsDescending[m_aiGlyphPartitions[ui]++]];
m_cGlyphPartitionRefs[ui ] -= cRefs;
m_cGlyphPartitionRefs[ui-1] += cRefs;
fChanges= TRUE;
}
while (fChanges);
}
#endif
UINT fClassification= m_cGlyphRefs[m_iGlyphRefsDescending[TOTAL_GLYPHS-1]]? 0x40000000 : 0x80000000; //rmk
UINT iGlyphClass, iGlyphClassLimit;
for (ui= m_cPartitions; ui--; fClassification >>= 1)
for (iGlyphClass= m_aiGlyphPartitions[ui], iGlyphClassLimit= m_aiGlyphPartitions[ui+1];
iGlyphClass < iGlyphClassLimit;
++iGlyphClass
)
{
UINT iGlyph= m_iGlyphRefsDescending[iGlyphClass];
if (m_afClassifications[iGlyph] != fClassification)
{
m_afClassifications[iGlyph] = fClassification;
fPartitionsChanged= TRUE;
}
}
return fPartitionsChanged;
}
UINT CClassifier::ClassifyData(PWCHAR pbData, UINT cbData) //rmk
{
UINT fClass;
WCHAR wChar;
// for (fClass= FALSE; cbData--; ) fClass |= m_afClassifications[map_to_char_class[*pbData++]]; //rmk
#if 1 //rmk
for (fClass= FALSE; cbData--; )
{
wChar= 0x00FF & *pbData++;
fClass |= m_afClassifications[wChar];
}
#endif //rmk
return fClass;
}
UINT CClassifier::GlyphCount(BOOL fClassificationMask)
{
UINT ui, cGlyphs;
UINT fClass= m_cGlyphRefs[m_iGlyphRefsDescending[TOTAL_GLYPHS-1]]? 0x40000000 : 0x80000000; //rmk
for (cGlyphs= 0, ui= m_cPartitions; ui--; fClass >>= 1)
if (fClass & fClassificationMask) cGlyphs += m_aiGlyphPartitions[ui+1] - m_aiGlyphPartitions[ui];
return cGlyphs;
}
#if 0 //rmk
UINT CClassifier::GlyphList(BOOL fClassificationMask, PBYTE pabGlyphCodes, UINT cbCodes)
{
UINT ui, cGlyphs;
UINT fClass= m_cGlyphRefs[m_iGlyphRefsDescending[TOTAL_GLYPHS-1]]? 0x40000000 : 0x80000000; //rmk
for (cGlyphs= 0, ui= m_cPartitions; ui--; fClass >>= 1)
if (fClass & fClassificationMask)
{
UINT iGlyphClass = m_aiGlyphPartitions[ui];
UINT iGlyphClassLimit = m_aiGlyphPartitions[ui+1];
cGlyphs += iGlyphClassLimit - iGlyphClass;
if (pabGlyphCodes)
for (; iGlyphClass < iGlyphClassLimit && cbCodes; ++iGlyphClass, --cbCodes)
*pabGlyphCodes++ = (BYTE) m_iGlyphRefsDescending[iGlyphClass];
}
return cGlyphs;
}
#endif //rmk