2020-09-30 17:12:29 +02:00

879 lines
26 KiB
C++

// FragInfo.cpp -- Implementation for class CFragInfo
#include "stdafx.h"
#include "FragInfo.h"
#include "Memex.h"
extern char acMap[];
/////////////////////////////////////////////////////////////////////////////
// Worker functions
BOOL AllLowerCase(PWCHAR pwText, UINT cwText)
{
WORD wSepMax= SORT_KEY_SEPARATOR | (SORT_KEY_SEPARATOR << 8);
BOOL fAllLower= TRUE;
for (; cwText; pwText++, cwText--)
if (SORT_KEY_SEPARATOR == ((*pwText) >> 8)) break;
for (; cwText--; )
if (wSepMax < *pwText++)
{
fAllLower= FALSE;
break;
}
return fAllLower;
}
UINT SortKeyText(PWCHAR pwText, UINT cwText, PWCHAR pwOut, UINT cwOut)
{ // convert sort key into text order based representation
UINT nChar = 0; // ... for IsAPrefix and IsASuffix type text operations.
// ... Work field is built up as individual alpha sort
PCHAR pWork, pAlpha, pBuild, pSeparator; // ... weights immediately followed by their diacritic
PWCHAR pwTemp; // ... and case weight bytes: [WORD][BYTE-BYTE] per char.
// ... Non-existent diacritic and case weights are filled
PWCHAR pwEnd = pwText + cwText; // ... out with SORT_KEY_SEPARATOR (lowest sort value).
PCHAR pStart = (PCHAR)pwOut;
static BOOL fSetup, fCaseLR, fDiacriticLR; // determine LR nature of case and diacritic weights.
if (!fSetup)
{
BYTE szTest[] = "Au"; // test case: one capital letter and one diacritic.
char szSort[25];
szTest[1] = 250; // converts second char to 'u' with diacritic.
fSetup = fCaseLR = fDiacriticLR = TRUE; // set defaults to L --> R case and diacritic processing.
int nLen = LCMapStringA(GetUserDefaultLCID(), LCMAP_SORTKEY, (PSTR)szTest, 2, szSort, sizeof(szSort));
if (nLen > 8 && szSort[4] == SORT_KEY_SEPARATOR && szSort[5] != SORT_KEY_SEPARATOR)
{
nLen = 8;
if (szSort[6] == SORT_KEY_SEPARATOR)
{
nLen--;
fDiacriticLR = FALSE; // we found R->L diacritic sort key generation.
}
if (szSort[nLen-1] == SORT_KEY_SEPARATOR && szSort[nLen] != SORT_KEY_SEPARATOR &&
szSort[nLen+1] != SORT_KEY_SEPARATOR && szSort[nLen+2] == SORT_KEY_SEPARATOR)
fCaseLR = FALSE; // we found R->L case weight sort key generation.
}
}
if (!cwText)
return 0;
while (HIBYTE(*pwText) != SORT_KEY_SEPARATOR) // search for first weight separator
{
pwOut[nChar++] = *pwText++;
pwOut[nChar++] = MAKEWORD(SORT_KEY_SEPARATOR, SORT_KEY_SEPARATOR);
if (nChar >= cwOut)
return 0;
}
pwOut[nChar] = 0; // terminating NULL for "strstr"
for (pwTemp = pwText; pwTemp < pwEnd; pwTemp++)
*pwTemp = (*pwTemp >> 8) | (*pwTemp << 8); // bring sort key weights in byte order
if (fDiacriticLR) // L->R diacritic weights.
{
pAlpha = (PCHAR)pwText + 1; // start at beginning of diacritics
pBuild = (PCHAR)pwOut + 3;
while (*pAlpha != SORT_KEY_SEPARATOR && pAlpha < (PCHAR)pwEnd)
{
*pBuild = *pAlpha++; // fill out diacritic weights
pBuild += 4; // next diacritic, moving from start to end
}
pWork = pAlpha;
}
// R->L diacritic weights (old style and French).
else
{ // skip diacritic separator
for (pWork = (PCHAR)pwText + 1; pWork < (PCHAR)pwEnd; pWork++)
if (*pWork == SORT_KEY_SEPARATOR) // find alpha weights separator
break;
pBuild = PCHAR(pwOut + nChar) - 1;
for (pAlpha = pWork - 1; pAlpha > (PCHAR)pwText && pBuild > pStart; pAlpha--)
{
*pBuild = *pAlpha; // fill out diacritic weights
pBuild -= 4; // next diacritic, moving from end to start
}
}
if (fCaseLR) // L->R case weights.
{
pWork++;
pBuild = (PCHAR)pwOut + 2;
while (*pWork != SORT_KEY_SEPARATOR && pWork < (PCHAR)pwEnd)
{
*pBuild = *pWork++; // fill out diacritic weights
pBuild += 4; // next diacritic, moving from start to end
}
}
else // R->L case weights (old style sort keys).
{
pSeparator = pWork++;
for ( ; pWork < (PCHAR)pwEnd; pWork++) // skip case separator
if (*pWork == SORT_KEY_SEPARATOR) // find special weights separator
break;
pBuild = PCHAR(pwOut + nChar) - 2;
for (pAlpha = pWork - 1; pAlpha > pSeparator && pBuild > pStart; pAlpha--)
{
*pBuild = *pAlpha; // fill out case weights
pBuild -= 4; // next case, moving from end to start
}
}
for (pwTemp = pwText; pwTemp < pwEnd; pwTemp++)
*pwTemp = (*pwTemp >> 8) | (*pwTemp << 8); // byte reverse sort keys weights
return nChar;
}
BOOL IsAPrefix(PWCHAR pwStringL, UINT cwStringL, PWCHAR pwStringR, UINT cwStringR)
{
WCHAR workL[512], workR[512];
UINT cwL, cwR;
if (cwStringL) pwStringL++, cwStringL--; // skip alpha-num-punc prefix
if (cwStringR) pwStringR++, cwStringR--; // skip alpha-num-punc prefix
PWCHAR pwL = pwStringL;
PWCHAR pwR = pwStringR;
for (cwL = 0; cwL < cwStringL; cwL++)
if (HIBYTE(pwStringL[cwL]) == SORT_KEY_SEPARATOR)
break;
for (cwR = 0; cwR < cwStringR; cwR++)
if (HIBYTE(pwStringR[cwR]) == SORT_KEY_SEPARATOR)
break;
if (cwL > cwR)
return FALSE; // suffix is larger than base word
while (cwL--)
if (*pwStringL++ != *pwStringR++)
return FALSE;
cwL = SortKeyText(pwL, cwStringL, workL, sizeof(workL)/sizeof(WCHAR));
cwR = SortKeyText(pwR, cwStringR, workR, sizeof(workR)/sizeof(WCHAR));
if (!cwL || !cwR) return FALSE;
pwL = workL;
pwR = workR;
while (cwL--)
if (*pwL++ != *pwR++)
return FALSE;
return TRUE;
}
BOOL IsASuffix(PWCHAR pwStringL, UINT cwStringL, PWCHAR pwStringR, UINT cwStringR)
{
WCHAR workL[512], workR[512];
UINT cwL, cwR;
if (cwStringL) pwStringL++, cwStringL--; // skip alpha-num-punc prefix
if (cwStringR) pwStringR++, cwStringR--; // skip alpha-num-punc prefix
PWCHAR pwL = pwStringL;
PWCHAR pwR = pwStringR;
for (cwL = 0; cwL < cwStringL; cwL++)
if (HIBYTE(pwStringL[cwL]) == SORT_KEY_SEPARATOR)
break;
for (cwR = 0; cwR < cwStringR; cwR++)
if (HIBYTE(pwStringR[cwR]) == SORT_KEY_SEPARATOR)
break;
if (cwL > cwR)
return FALSE; // suffix is larger than base word
pwStringR += cwR - cwL;
while (cwL--)
if (*pwStringL++ != *pwStringR++)
return FALSE;
cwL = SortKeyText(pwL, cwStringL, workL, sizeof(workL)/sizeof(WCHAR));
cwR = SortKeyText(pwR, cwStringR, workR, sizeof(workR)/sizeof(WCHAR));
if (!cwL || !cwR) return FALSE;
pwL = workL;
pwR = workR + cwR - cwL;
while (cwL--)
if (*pwL++ != *pwR++)
return FALSE;
return TRUE;
}
BOOL IsASubstring(PWCHAR pwL, UINT cwL, PWCHAR pwR, UINT cwR)
{
WCHAR workL[512], workR[512];
cwL = SortKeyText(++pwL, --cwL, workL, sizeof(workL)/sizeof(WCHAR));
cwR = SortKeyText(++pwR, --cwR, workR, sizeof(workR)/sizeof(WCHAR));
if (!cwL || !cwR || cwL > cwR)
return FALSE;
return (wcsstr(workR, workL) != NULL);
}
// End of Worker functions
/////////////////////////////////////////////////////////////////////////////
CFragInfo::CFragInfo()
{
m_pwcFrag = NULL;
m_cwcFrag = 0;
m_pFrag = NULL;
m_cFrag = 0;
m_fEvaluated = FALSE;
// m_cwcAllocated = 0;
m_ptkc = NULL;
m_ptlc = NULL;
m_pcsVisibleWords = NULL;
m_pcsSelectedWords = NULL;
m_pcsArticleSet = NULL;
m_fFlags = 0;
m_pRefList = NULL;
m_rt = NoRefs;
}
CFragInfo::~CFragInfo()
{
// if (m_pwcFrag) VFree(m_pwcFrag);
if (m_ptkc) DetachRef(m_ptkc);
if (m_ptlc) DetachRef(m_ptlc);
if (m_pcsVisibleWords ) DetachRef(m_pcsVisibleWords );
if (m_pcsSelectedWords) DetachRef(m_pcsSelectedWords);
if (m_pcsArticleSet ) DetachRef(m_pcsArticleSet );
PPerTextSet ppts, pptsNext;
for (ppts= m_pRefList; ppts; ppts= pptsNext)
{
pptsNext= ppts->pptsNext;
if (ppts->pcsRefs) DetachRef(ppts->pcsRefs);
VFree(ppts);
}
}
CFragInfo *CFragInfo::NewFragInfo(CTokenCollection *ptkc, CTitleCollection *ptlc,
RefType rt, BOOL fFeedback, UINT iWordMatchType, PWCHAR pwcFrag, UINT cwcFrag, PWCHAR pFrag, UINT cFrag)
{
CFragInfo *pfi= NULL;
__try
{
pfi= New CFragInfo();
pfi->AttachParameters(ptkc, ptlc, rt, fFeedback, iWordMatchType, pwcFrag, cwcFrag, pFrag, cFrag);
}
__finally
{
if (_abnormal_termination() && pfi)
{
delete pfi; pfi= NULL;
}
}
return pfi;
}
BOOL CFragInfo::AttachParameters(CTokenCollection *ptkc, CTitleCollection *ptlc,
RefType rt, BOOL fFeedback, UINT iWordMatchType, PWCHAR pwcFrag, UINT cwcFrag, PWCHAR pFrag, UINT cFrag)
{
AttachRef(m_ptkc, ptkc);
AttachRef(m_ptlc, ptlc);
m_rt= rt;
m_fFeedback = fFeedback;
m_iWordMatchType= iWordMatchType;
// m_cwcAllocated= CWC_FRAGMENT_GRANULE * ((cwcFrag + CWC_FRAGMENT_GRANULE - 1) / CWC_FRAGMENT_GRANULE);
// m_pwcFrag= PWCHAR(VAlloc(FALSE, m_cwcAllocated * sizeof(WCHAR));
// if (!m_pwcFrag) return FALSE;
m_cwcFrag= 0;
SetImage(pwcFrag, cwcFrag, pFrag, cFrag);
return TRUE;
}
RefType CFragInfo::GetRefType()
{
return m_rt;
}
CCompressedSet *CFragInfo::GetCSArticleSet()
{
ASSERT(m_rt == AllWords || m_rt == AnyWord);
if (!HasValue()) CoerceToValue();
return m_pcsArticleSet;
}
CIndicatorSet *CFragInfo::GetArticleSet()
{
CCompressedSet *pcs= GetCSArticleSet();
if (!pcs) return NULL;
return CIndicatorSet::NewIndicatorSet(pcs);
}
void CFragInfo::MoveToFirstLocationSet()
{
if (!HasValue()) CoerceToValue();
m_pRefNext= m_pRefList;
}
CCompressedSet *CFragInfo::GetCSLocationSet(UINT iTS)
{
if (!HasValue()) CoerceToValue();
for (m_pRefNext= m_pRefList; m_pRefNext && m_pRefNext->its <= iTS; m_pRefNext= m_pRefNext->pptsNext)
if (m_pRefNext->its == iTS)
{
// m_pRefNext= m_pRefNext->pptsNext;
return m_pRefNext->pcsRefs;
}
return NULL;
}
CIndicatorSet *CFragInfo::GetLocationSet(UINT iTS)
{
CCompressedSet *pcs= GetCSLocationSet(iTS);
if (!pcs) return NULL;
return CIndicatorSet::NewIndicatorSet(pcs);
}
UINT CFragInfo::GetImage (const WCHAR **ppwc)
{
*ppwc= m_pwcFrag;
return m_cwcFrag;
}
BOOL CFragInfo::SetImage(PWCHAR pwcFrag, UINT cwcFrag, PWCHAR pFrag, UINT cFrag)
{
PWCHAR pwcOld= m_pwcFrag; m_pwcFrag= pwcFrag;
UINT cwcOld= m_cwcFrag; m_cwcFrag= cwcFrag;
PWCHAR pOld = m_pFrag; m_pFrag = pFrag;
UINT cOld = m_cFrag; m_cFrag = cFrag;
return EvaluateChange(pwcOld, cwcOld, pOld, cOld, m_rt, m_fFeedback, m_iWordMatchType);
}
BOOL CFragInfo::SetReferenceType(RefType rt, BOOL fFeedback)
{
RefType rtOld= m_rt; m_rt= rt;
BOOL fFeedbackOld = m_fFeedback; m_fFeedback = fFeedback;
return EvaluateChange(m_pwcFrag, m_cwcFrag, m_pFrag, m_cFrag, rtOld, fFeedback, m_iWordMatchType);
}
BOOL CFragInfo::SetImageAndType(PWCHAR pwcFrag, UINT cwcFrag, PWCHAR pFrag, UINT cFrag, RefType rt, BOOL fFeedback)
{
PWCHAR pwcOld= m_pwcFrag;
UINT cwcOld= m_cwcFrag;
RefType rtOld= m_rt;
BOOL fFeedbackOld = fFeedback;
PWCHAR pOld = m_pFrag;
UINT cOld = m_cFrag;
m_pwcFrag= pwcFrag;
m_cwcFrag= cwcFrag;
m_rt = rt;
m_fFeedback = fFeedback;
m_pFrag = pFrag;
m_cFrag = cFrag;
return EvaluateChange(pwcOld, cwcOld, pOld, cOld, rtOld, m_fFeedback, m_iWordMatchType);
}
BOOL CFragInfo::SetMatchCriteria(UINT iWordMatchType)
{
if (m_iWordMatchType == iWordMatchType) return TRUE;
UINT iWordMatchOld= m_iWordMatchType;
m_iWordMatchType= iWordMatchType;
return EvaluateChange(m_pwcFrag, m_cwcFrag, m_pFrag, m_cFrag, m_rt, m_fFeedback, iWordMatchOld);
}
BOOL CFragInfo::InvalidateMatches()
{
return EvaluateChange(m_pwcFrag, m_cwcFrag, m_pFrag, m_cFrag, m_rt, m_fFeedback, UINT(-1));
}
BOOL CFragInfo::EvaluateChange(PWCHAR pwcOld, UINT cwcOld, PWCHAR pOld, UINT cOld,
RefType rtOld, BOOL fFeedbackOld, UINT iWordMatchTypeOld)
{
BOOL fImageChanged= FALSE;
if (cwcOld != m_cwcFrag) fImageChanged= TRUE;
else
{
PWCHAR pwc= pwcOld, pwcNew= m_pwcFrag;
UINT cwc= cwcOld;
for (; cwc; cwc--) if (*pwc++ != *pwcNew++) break;
if (cwc) fImageChanged= TRUE;
}
BOOL fMatchTypeChanged= m_iWordMatchType != iWordMatchTypeOld;
if (m_pcsVisibleWords)
if (fMatchTypeChanged) DetachRef(m_pcsVisibleWords);
else
if (fImageChanged)
{
BOOL fSubset= FALSE;
if (cwcOld < m_cwcFrag && !fMatchTypeChanged)
switch(m_iWordMatchType)
{
case BEGIN_WITH: // Begins With...
fSubset= IsAPrefix(pwcOld, cwcOld, m_pwcFrag, m_cwcFrag);
break;
case CONTAIN: // Contains...
fSubset= IsASubstring(pwcOld, cwcOld, m_pwcFrag, m_cwcFrag);
break;
case END_WITH: // Ends With...
fSubset= IsASuffix(pwcOld, cwcOld, m_pwcFrag, m_cwcFrag);
break;
case MATCH: // Matches
case HAVE_SAME_STEM: // From the same root word
fSubset= FALSE;
break;
}
if (!fSubset) DetachRef(m_pcsVisibleWords);
}
if (m_pcsSelectedWords && (fImageChanged || fMatchTypeChanged)) DetachRef(m_pcsSelectedWords);
BOOL fRefTypeChanged= m_rt != rtOld || m_fFeedback != fFeedbackOld;
if (!fImageChanged && !fMatchTypeChanged && !fRefTypeChanged) return FALSE;
DiscardValue(rtOld);
CIndicatorSet *pisOldTerms= NULL, *pisTerms= NULL;
if ((fImageChanged || fMatchTypeChanged) && m_cwcFrag)
__try
{
// Need to recompute the visible word set...
if (m_pcsVisibleWords) AttachRef(pisOldTerms, CIndicatorSet::NewIndicatorSet(m_pcsVisibleWords ));
else AttachRef(pisOldTerms, CIndicatorSet::NewIndicatorSet(m_ptkc->ActiveTokens()));
if (m_iWordMatchType == HAVE_SAME_STEM)
{
ASSERT(m_ptkc->SimilaritySearch());
PWCHAR pwc= PWCHAR(_alloca(sizeof(WCHAR) * (m_cFrag + 1)));
CopyMemory(pwc, m_pFrag, m_cFrag * sizeof(WCHAR));
pwc[m_cFrag]= 0;
pisTerms = GetWordsWithTheSameStem(pwc, m_cFrag, m_ptkc->RowCount());
}
else
{
PWCHAR pwc= PWCHAR(_alloca(sizeof(WCHAR) * (m_cwcFrag + 1)));
CopyMemory(pwc, m_pwcFrag, m_cwcFrag * sizeof(WCHAR));
pwc[m_cwcFrag]= 0;
pisTerms= m_ptkc->TokensContaining(pwc,
acMap[m_iWordMatchType] & 1,
acMap[m_iWordMatchType] & 2,
pisOldTerms
);
}
if (pisOldTerms) DetachRef(pisOldTerms);
ChangeRef(m_pcsVisibleWords, CCompressedSet::NewCompressedSet(pisTerms));
}
__finally
{
if (pisOldTerms) DetachRef(pisOldTerms);
if (pisTerms ) { delete pisTerms; pisTerms= NULL; }
}
return TRUE;
}
void CFragInfo::DiscardValue(RefType rtOld)
{
if (HasValue())
switch (rtOld)
{
case NoRefs:
break;
case AllWords:
case AnyWord:
if (m_pcsArticleSet) DetachRef(m_pcsArticleSet);
break;
case TokenRefs:
if (m_pRefList)
{
PPerTextSet pptsNext;
for (; m_pRefList; m_pRefList= pptsNext)
{
pptsNext= m_pRefList->pptsNext;
DetachRef(m_pRefList->pcsRefs);
VFree(m_pRefList);
}
}
m_pRefNext= NULL;
break;
}
m_fEvaluated= FALSE;
}
void CFragInfo::CoerceToValue()
{
ASSERT(!m_fEvaluated );
ASSERT(m_rt != NoRefs);
UINT cts= m_ptkc->TextSetCount();
PTokenInfo *ppti= (PTokenInfo *) _alloca(cts * sizeof(PTokenInfo));
ASSERT(ppti);
CIndicatorSet *pisTemp = NULL;
CIndicatorSet *pisArticles = NULL;
CIndicatorSet *pisTokens = NULL;
PPerTextSet pPTS = NULL;
__try
{
pisTemp= GetSelection();
if (!pisTemp)
{
m_fEvaluated= TRUE;
__leave;
}
m_ptkc->MapToTokenLists(pisTemp, ppti, cts);
delete pisTemp; pisTemp= NULL;
if (m_rt == AllWords || m_rt == AnyWord)
{
AttachRef(pisArticles, CIndicatorSet::NewIndicatorSet(m_ptlc->RowCount()));
UINT iTextSet;
for (iTextSet= cts; iTextSet--; )
{
PTokenInfo pti= ppti[iTextSet];
if (!pti) continue;
CTextSet *pts = m_ptkc->GetTextSet (iTextSet);
const UINT *piMap = m_ptlc->UniversalTitleMap(iTextSet);
for (; pti; pti= pti->ptiTextSetLink)
pts->IndicateArticleRefs(pisArticles, pti->iDescriptor, piMap);
}
AttachRef(m_pcsArticleSet, CCompressedSet::NewCompressedSet(pisArticles));
}
else
if (m_rt == TokenRefs)
{
// The code below pushes PerTextSet items on from high iTextSet to low iTextSet.
// This leaves the resulting m_pRefList chain ordered. The code which reads
// the chain presumes this ordering.
UINT iTextSet= cts;
for (; iTextSet--; )
{
PTokenInfo pti= ppti[iTextSet];
if (!pti) continue;
CTextSet *pts= m_ptkc->GetTextSet(iTextSet);
ChangeRef(pisTokens, CIndicatorSet::NewIndicatorSet(pts->TokenCount()));
for (; pti; pti= pti->ptiTextSetLink)
pts->IndicateTokenRefs(pisTokens, pti->iDescriptor);
pPTS= PPerTextSet(VAlloc(FALSE, sizeof(PerTextSet)));
pPTS->its= iTextSet;
AttachRef(pPTS->pcsRefs, CCompressedSet::NewCompressedSet(pisTokens));
pPTS->pptsNext= m_pRefList;
m_pRefList= pPTS; pPTS= NULL;
}
m_pRefNext= m_pRefList;
}
}
__finally
{
if (pisTemp) { delete pisTemp; pisTemp = NULL; }
if (pisArticles) DetachRef(pisArticles);
if (pisTokens ) DetachRef(pisTokens);
if (pPTS) VFree(pPTS);
}
m_fEvaluated= TRUE;
}
// Tnis routine is optimized as follows...
// First we look at each dictionary for words that have the same stem as the given word
// For each match, we enter that word into a new dictionary, pCombinedDict. This will
// ensure that duplicates do not exist.
// After we do this for all the dictionaries pCombinedDict has the unique list of words
// across all the dictionaries that have the common stem with the given word.
// Now, for each word in the dictionary, we will find variants using m_ptkc and OR the
// results into a CIndicatorSet, pTmp.
//
// When we are attempting to find words with the same stem as a designated stop word, there
// will be no matching words. When there are no matching words, we simply "match exact word"
// and get the results from TokensContaining.
CIndicatorSet * CFragInfo::GetWordsWithTheSameStem(PWCHAR lpsubstring, WORD cblpsubstring, DWORD cTokens)
{
CIndicatorSet *pisResult = NULL;
CIndicatorSet * pisTmp = NULL;
CIndicatorSet * pisTmp2 = NULL;
PWCHAR pbDest = NULL;
CDictionary *pCombinedDict = NULL; // pCombinedDict is used to record all the words resulting
// from the stem match in all the dicts.
__try
{
DWORD dwConId,
cWordCount;
WORD cWord = cblpsubstring,
cTmp;
PWCHAR pMatchingWord;
BOOL fFound = FALSE;
pisTmp = CIndicatorSet::NewIndicatorSet(cTokens, FALSE);
pbDest = (PWCHAR) VAlloc(FALSE, MaxSortKeyBytes(cblpsubstring));
if (cblpsubstring == 0) __leave;
// Create a combined dictionary
pCombinedDict= CDictionary::NewDictionary(FALSE);
// For each collection's dictionary, find the words that have the same stem as the passed in word and
// enter the matching words into a new dictionary, pCombinedDict
UINT cts = m_ptkc->TextSetCount();
for (UINT j = 0; j < cts; j++)
{
// Ignore collections that are currently not active
if (!m_ptkc->IsActive(j)) continue;
CDictionary *pDict = m_ptkc->GetTextSet(j)->PDict();
if (!pDict) continue;
// EnterWord with the last param set to TRUE is only looking up, not entering a word.
dwConId = pDict->EnterWord(lpsubstring, cblpsubstring, TRUE, TRUE);
if (dwConId == EOL) // This word doesn't exist in the current dictionary!
continue;
fFound = TRUE; // We have matching words in at least one of the dictionaries!
cWordCount = pDict->GetWordCountOfConcept(dwConId);
ASSERT(cWordCount);
for (UINT i = 0; i < cWordCount; i++)
{
// reallocate memory only if the matching word is more than twice as long as the current one.
// if it is <= twice, we already have enough space
if (i == 0)
pMatchingWord = pDict->GetFirstWordOfConcept(dwConId);
else
pMatchingWord = pDict->GetNextWordOfConcept(dwConId);
// Avoid the overhead of stemming. Enter each word as if it were a stop word. We are only REUSING
// CDict as a data structure to hold our strings, so the semantics of the dictionary don't matter much
pCombinedDict->EnterWord(pMatchingWord, wcslen(pMatchingWord), TRUE, FALSE);
}
}
pCombinedDict->EndDictInsertions();
if (fFound)
{
// Now, for each unique word in the pCombinedDict dictionary, find the variants from m_ptkc
cWordCount = pCombinedDict->GetWordCountOfConcept(EOL);
ASSERT(cWordCount);
for (UINT i = 0; i < cWordCount; i++)
{
if (i == 0)
pMatchingWord = pCombinedDict->GetFirstWordOfConcept(EOL);
else
pMatchingWord = pCombinedDict->GetNextWordOfConcept(EOL);
cTmp = wcslen(pMatchingWord);
if ( MaxSortKeyBytes(cTmp) > UINT(2*cWord))
{
VFree(pbDest);
pbDest = (PWCHAR) VAlloc(FALSE, MaxSortKeyBytes(cTmp));
}
cWord = cTmp;
LCSortKeyW(GetUserDefaultLCID(), 0, pMatchingWord, cWord, pbDest, MaxSortKeyBytes(cWord));
pisTmp2 = m_ptkc->TokensContaining(pbDest, 1, 1);
pisTmp->ORWith(pisTmp2);
if (pisTmp2) { delete pisTmp2; pisTmp2= NULL; }
}
}
else
{
if (pisTmp) delete pisTmp;
LCSortKeyW(GetUserDefaultLCID(), 0, lpsubstring, cblpsubstring, pbDest, MaxSortKeyBytes(cblpsubstring));
pisTmp = m_ptkc->TokensContaining(pbDest, 1, 1);
}
}
__finally
{
if (pCombinedDict) { delete pCombinedDict; pCombinedDict = NULL; }
if (pbDest ) { VFree(pbDest); pbDest = NULL; }
if (pisTmp2 ) { delete pisTmp2; pisTmp2 = NULL; }
if (_abnormal_termination() && pisTmp) { delete pisTmp; pisTmp = NULL; }
}
return pisTmp;
}
CCompressedSet *CFragInfo::GetCSWordSet()
{
return m_pcsVisibleWords;
}
CIndicatorSet *CFragInfo::GetWordSet()
{
CCompressedSet *pcs= GetCSWordSet();
if (pcs) return CIndicatorSet::NewIndicatorSet(pcs);
else return NULL;
}
void CFragInfo::SetSelection(CIndicatorSet *pisSelection)
{
if (!pisSelection || pisSelection->SelectionCount() == m_ptkc->ActiveTokens()->SelectionCount())
{
if (m_pcsSelectedWords) DetachRef(m_pcsSelectedWords);
}
else ChangeRef(m_pcsSelectedWords, CCompressedSet::NewCompressedSet(pisSelection));
DiscardValue(m_rt);
m_fEvaluated= FALSE;
}
CCompressedSet *CFragInfo::GetCSSelection()
{
if (m_pcsSelectedWords) return m_pcsSelectedWords;
else
if (m_pcsVisibleWords) return m_pcsVisibleWords;
else return NULL;
}
CIndicatorSet *CFragInfo::GetSelection()
{
CCompressedSet *pcs= GetCSSelection();
if (pcs) return CIndicatorSet::NewIndicatorSet(pcs);
else return NULL;
}