527 lines
21 KiB
C++
527 lines
21 KiB
C++
// Hilite.cpp -- Implementation of the text Hilite feature
|
|
//
|
|
|
|
#include "stdafx.h"
|
|
#include "hilite.h"
|
|
#include "ftslex.h"
|
|
|
|
///////////////////////////// Hiliting Functions ////////////////////////////
|
|
// Phase (1) Set up a new Hiliter
|
|
|
|
CHiliter* CHiliter::NewHiliter(HSEARCHER hSearcher) {
|
|
CHiliter* phil = NULL;
|
|
__try { // New is our version of new with __FILE__ and __LINE__ added
|
|
phil = New CHiliter();
|
|
phil->InitHiliter(hSearcher);
|
|
}
|
|
__except(FilterFTExceptions(_exception_code())) {
|
|
if (phil) { delete phil; phil = NULL; }
|
|
}
|
|
return phil;
|
|
}
|
|
|
|
CHiliter::CHiliter() : CGlobals(Hiliter) {
|
|
// start by initializing all our variables
|
|
m_philNext = NULL; // so we can chain our hiliters together
|
|
m_pSearcher = NULL; // the searcher passed to us
|
|
m_pbTextBuffer = NULL;
|
|
m_cbCarryOver = 0; // Hilite text text carry over values
|
|
m_iCharSetCarryOver = 0;
|
|
m_lcidCarryOver = 0;
|
|
m_paTokenInfo = NULL; // pointer to virtual buffer to hold token info
|
|
m_cTokenInfo = 0; // count of tokens in text passed
|
|
m_pavr = NULL; // pointer to value reference array to hold words as they come in
|
|
m_cbScanned = 0; // number of bytes added in previous passes
|
|
m_serialFind = -1; // validity count
|
|
m_baseNext = 0; // to reduce binary searches
|
|
m_iTokenNext = 0;
|
|
m_pMask = NULL; // mask for words selected
|
|
for (int iFrag=0; iFrag<cFRAG_MAX; iFrag++)
|
|
m_apMasks[iFrag] = NULL; // masks for words in phrases
|
|
m_cFrags = 0;
|
|
m_base = m_iTokenStart = 0;
|
|
m_limit = m_iLimit = 0;
|
|
m_cLit = m_cMax = 0;
|
|
m_paHilites = NULL;
|
|
}
|
|
|
|
void CHiliter::InitHiliter(HSEARCHER hSearcher) {
|
|
m_idProcess = ::GetCurrentProcessId();
|
|
Link(); // this links our new global object into a list so that we can be sure that
|
|
// .. everything gets deleted when we exit. ~CGlobals() does the UnLink()
|
|
m_Signature = GLOBAL_SIGNATURE; // .. in good standing
|
|
m_pSearcher = (CGlobals*)hSearcher; // handle is really a pointer
|
|
m_pSearcher->RegisterHiliter(this); // add this hiliter to our list
|
|
}
|
|
|
|
CHiliter::~CHiliter() {
|
|
if (m_pbTextBuffer) FreeVirtualBuffer(&m_vbTextBuffer);
|
|
if (m_paTokenInfo) FreeVirtualBuffer(&m_vbTokenInfo);
|
|
if (m_pavr) delete m_pavr;
|
|
if (CGlobals::ValidObject((CGlobals*)m_pSearcher, Searcher)) // if Searcher is still around
|
|
m_pSearcher->UnRegisterHiliter(this); // .. we need to tell it we have one less Hiliter
|
|
if (m_pMask) DetachRef(m_pMask); // remove any AND/OR mask
|
|
for (int iFrag=0; iFrag<cFRAG_MAX; iFrag++) // remove any phrase search masks
|
|
if (m_apMasks[iFrag]) DetachRef(m_apMasks[iFrag]);
|
|
}
|
|
|
|
BOOL CHiliter::ValidObject(HHILITER hhil) {
|
|
CHiliter* phil = (CHiliter*)hhil;
|
|
return CGlobals::ValidObject(phil, Hiliter);
|
|
}
|
|
|
|
//////////////////////////////// Phase (2) ////////////////////////////////////////
|
|
/*
|
|
The text is passed in sections by ScanDisplayText(). Each section is assumed to be
|
|
in Multi-byte code and has a locale assosciated with it. The locale is important
|
|
for tokenizing.
|
|
The first step is to break the text into tokens. We do not store the actual text
|
|
but store the following information about each token:
|
|
|
|
typedef struct {
|
|
int base; // offset of token (in bytes from start of text)
|
|
int limit; // offset of end of token
|
|
int type; // type (Punctuation=0, Numbers=1, Words=2)
|
|
int iSorted; // index into m_ppdSorted
|
|
} TOKEN_INFO;
|
|
|
|
We don't know how many tokens there are going to be, so we allocate a virtual
|
|
array to hold this information.
|
|
In order to find iToken, we need to convert the Multi-byte string to Unicode and
|
|
look this up in our hash table -- where we previously saved these indexes for
|
|
each token. If no entry in the hash table is found, we store -1.
|
|
*/
|
|
|
|
void CHiliter::InitTokenInfo() { // do any initialization needed
|
|
ASSERT (!m_pbTextBuffer);
|
|
ASSERT (!m_pavr);
|
|
ClearDisplayText(); // initialize variables
|
|
// allocate our virtual memory buffers
|
|
BYTE* pbTextBuffer = NULL;
|
|
TOKEN_INFO* paTokenInfo = NULL;
|
|
CAValRef* pavr = NULL;
|
|
__try {
|
|
CreateVirtualBuffer(&m_vbTextBuffer, 0, 4096*4096); // buffer for incoming Hilite text
|
|
pbTextBuffer = (BYTE*)m_vbTextBuffer.Base; // (needed for words split on input)
|
|
CreateVirtualBuffer(&m_vbTokenInfo, 0, 4096*4096); // buffer to store token spreads
|
|
paTokenInfo = (TOKEN_INFO*)m_vbTokenInfo.Base;
|
|
pavr = CAValRef::NewValRef(MAX_TOKENS_HILITE); // allocate a CAValRef object
|
|
}
|
|
__except(FilterFTExceptions(_exception_code())) {
|
|
if (pbTextBuffer) FreeVirtualBuffer(&m_vbTextBuffer);
|
|
if (paTokenInfo) FreeVirtualBuffer(&m_vbTokenInfo);
|
|
if (pavr) delete pavr;
|
|
return;
|
|
}
|
|
m_pbTextBuffer = pbTextBuffer;
|
|
m_paTokenInfo = paTokenInfo;
|
|
m_pavr = pavr;
|
|
}
|
|
|
|
ERRORCODE CHiliter::ScanDisplayText(BYTE* pbText, int cbText, UINT iCharset, UINT lcid) {
|
|
// this can get called repeatedly
|
|
if (!cbText || !pbText) return 0; // if there was no text we are done now
|
|
__try {
|
|
// do any initialization needed
|
|
if (!m_paTokenInfo) InitTokenInfo();
|
|
ASSERT (m_pbTextBuffer);
|
|
ASSERT (m_pavr);
|
|
// add the tokens to our custom hash table
|
|
m_pHash = m_pSearcher->GetHiliterHashTable();
|
|
// tokenize the text and convert tokens to value references
|
|
// NOTE: we don't process the last token we are passed in case it is split
|
|
// we call this routine one more time to process the last token
|
|
// .. when we know that there is no more input
|
|
// we can't split tokenizing across a change in charset or locale so just process the carryover
|
|
if (m_cbCarryOver && (m_iCharSetCarryOver != iCharset || m_lcidCarryOver != lcid))
|
|
m_cbCarryOver = FlushCarryOverText();
|
|
// if we had a split token last time, we have to join the old and new text
|
|
if (m_cbCarryOver) { // first check that our buffer is large enough
|
|
if (PBYTE(m_pbTextBuffer + m_cbCarryOver + cbText) >= PBYTE(m_vbTextBuffer.CommitLimit)) {
|
|
PVOID pNewEnd = PVOID(m_pbTextBuffer+m_cbCarryOver+cbText+CB_COMMIT_HILITEBUF);
|
|
if (!ExtendVirtualBuffer(&m_vbTextBuffer, pNewEnd))
|
|
return OUT_OF_MEMORY;
|
|
}
|
|
MoveMemory(m_pbTextBuffer+m_cbCarryOver, pbText, cbText); // join buffers
|
|
pbText = m_pbTextBuffer; // switch the pointer
|
|
cbText += m_cbCarryOver; // adjust the length
|
|
}
|
|
// now scan the text we were passed
|
|
m_cbCarryOver = AppendText(pbText, cbText, FALSE, iCharset, lcid);
|
|
if (m_cbCarryOver) { // copy carry text to start of buffer
|
|
if (PBYTE(m_pbTextBuffer+m_cbCarryOver) >= PBYTE(m_vbTextBuffer.CommitLimit)) {
|
|
PVOID pNewEnd = PVOID(m_pbTextBuffer+m_cbCarryOver+CB_COMMIT_HILITEBUF);
|
|
if (!ExtendVirtualBuffer(&m_vbTextBuffer, pNewEnd))
|
|
return OUT_OF_MEMORY;
|
|
}
|
|
MoveMemory(m_pbTextBuffer, pbText+cbText-m_cbCarryOver, m_cbCarryOver);
|
|
m_iCharSetCarryOver = iCharset;
|
|
m_lcidCarryOver = lcid;
|
|
}
|
|
}
|
|
__except(FilterFTExceptions(_exception_code())) {
|
|
return ErrorCodeForExceptions(_exception_code());
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int CHiliter::FlushCarryOverText() { // flush out any carryover text
|
|
return AppendText(m_pbTextBuffer, m_cbCarryOver, TRUE, m_iCharSetCarryOver, m_lcidCarryOver);
|
|
}
|
|
|
|
int CHiliter::AppendText(BYTE* pbText, int cbText, BOOL fArticleEnd, UINT iCharset, UINT lcid) {
|
|
// this code mimics the code in txdbase.cpp so that the handling of split
|
|
// words with punctuation (such as can't) works the same in both pieces of code
|
|
int cbScanned;
|
|
while (cbText) {
|
|
int cbChunk = min(cbText, MAX_HILITE_WORDS); // don't exceed our unicode buffer
|
|
int cbHeld = cbText - cbChunk;
|
|
cbScanned = AppendSlave(pbText, cbChunk, fArticleEnd, iCharset, lcid);
|
|
if (cbScanned >= 0) { // continue passing partial buffers
|
|
pbText += cbScanned;
|
|
cbText -= cbScanned;
|
|
}
|
|
else { // reached end of passed buffer
|
|
pbText -= cbScanned;
|
|
cbText += cbScanned;
|
|
if (cbHeld==0) break; // .. so break out of the loop
|
|
}
|
|
}
|
|
return cbText;
|
|
}
|
|
|
|
int CHiliter::AppendSlave(BYTE* pbText, int cbText, BOOL fArticleEnd, UINT iCharset, UINT lcid) {
|
|
CAbortSearch::CheckContinueState();
|
|
int nChar, nTokens;
|
|
int nMore = cbText;
|
|
PBYTE pbTextStart = pbText;
|
|
nTokens = pWordBreakA(iCharset, (char**)(&pbText), &nMore, // these variables get changed
|
|
(char**)m_paStart, (char**)m_paEnd, m_pbType, NULL, // no hash needed
|
|
MAX_TOKENS_HILITE, 0); // leave spaces in as part of punctuation
|
|
if (nTokens > 1 && (nMore || !fArticleEnd)) { // exhausted token space OR more article
|
|
if (nTokens > 2 && !(m_pbType[nTokens-1] & WORD_TYPE))
|
|
nTokens--; // break at word starts (punc not to span)
|
|
nChar = m_paStart[--nTokens] - pbTextStart; // reprocess last token
|
|
}
|
|
else nChar = cbText; // processed entire buffer
|
|
// now go and move the token spreads into our token info array
|
|
CopySpreads(nTokens, iCharset);
|
|
// .. and set the counts for the next pass
|
|
m_cbScanned += nChar;
|
|
if (!nMore) nChar = -nChar; // marks that text buffer is fully processed
|
|
return nChar; // amount still to be done
|
|
}
|
|
|
|
void FindToken(UINT iValue, PVOID pvTag, PVOID pvEnvironment) {
|
|
// Assimilate function to move the index of each token (into ppdSorted) into our TOKEN_INFO array
|
|
TOKEN_INFO* pTokenInfo = (TOKEN_INFO*)pvEnvironment;
|
|
int iToken = *(int*)pvTag;
|
|
pTokenInfo[iValue].iSorted = iToken;
|
|
}
|
|
|
|
void CHiliter::CopySpreads(int nTokens, UINT iCharset) {
|
|
WCHAR* pwBuf = m_wTextBuf; // pointer to unicode buffer
|
|
BYTE* pOrigin = m_paStart[0] - m_cbScanned; // where we got to so far
|
|
if (m_pavr) m_pavr->DiscardRefs(); // discard all the value references
|
|
// process through the list of incoming words
|
|
for (int iToken=0; iToken<nTokens; iToken++) {
|
|
// move the token locations into our TOKEN_INFO array
|
|
BYTE* pStart = m_paStart[iToken];
|
|
BYTE* pEnd = m_paEnd[iToken];
|
|
|
|
// convert to unicode + move to unicode buffer
|
|
|
|
UINT cw;
|
|
|
|
for (;;)
|
|
{
|
|
cw = MultiByteToWideChar(
|
|
iCharset, // code page
|
|
0, // character-type options
|
|
(char*)pStart, // address of string to map
|
|
pEnd - pStart, // number of characters in string
|
|
pwBuf, // address of wide-character buffer
|
|
MAX_HILITE_WORDS); // size of buffer in words
|
|
|
|
if (cw || pEnd == pStart) break;
|
|
|
|
iCharset= ANSI_CHARSET; // Force to ANSI if the previous charset
|
|
// didn't work.
|
|
}
|
|
|
|
if (m_pbType[iToken]==0) { // for puctuation remove blanks ..
|
|
int cBase; // number of leading white space chars.
|
|
int cLimit; // number of trailing white space chars.
|
|
int cwOld = cw; // needed to compute cEnd
|
|
cw = ::RemoveWhiteSpace(pwBuf, cw, cBase, cLimit);
|
|
// now adjust the base and limit if necessary to remove white space
|
|
if (cBase) pStart = (BYTE*)CharNextMult(iCharset, (char*)pStart, cBase);
|
|
if (cLimit) pEnd = (BYTE*)CharNextMult(iCharset, (char*)pStart, cwOld - cBase - cLimit);
|
|
}
|
|
int iTokenInfo = m_cTokenInfo + iToken; // slot for latest results
|
|
__try {
|
|
m_paTokenInfo[iTokenInfo].base = pStart - pOrigin;
|
|
m_paTokenInfo[iTokenInfo].limit = pEnd - pOrigin;
|
|
m_paTokenInfo[iTokenInfo].type = m_pbType[iToken];
|
|
m_paTokenInfo[iTokenInfo].iSorted = -1; // default -- means not in our token list
|
|
}
|
|
__except(VirtualBufferExceptionFilter(GetExceptionCode(), GetExceptionInformation(), &m_vbTokenInfo)) {
|
|
RaiseException(STATUS_NO_MEMORY, EXCEPTION_NONCONTINUABLE, 0, NULL);
|
|
}
|
|
// add to pavr
|
|
m_pavr->AddValRef(pwBuf, cw * sizeof(WCHAR)); // returns index -- but we already know that
|
|
pwBuf += cw; // slot for next Unicode token
|
|
}
|
|
// get our token info table set up
|
|
m_pHash->Assimilate(m_pavr, &m_paTokenInfo[m_cTokenInfo], FindToken, NULL);
|
|
m_cTokenInfo += nTokens; // ready for next batch
|
|
}
|
|
|
|
ERRORCODE CHiliter::ClearDisplayText() {
|
|
m_cTokenInfo = 0;
|
|
m_cbScanned = 0;
|
|
m_iCharSetCarryOver = 0;
|
|
m_lcidCarryOver = 0; // set up our carry over variables
|
|
m_cbCarryOver = 0;
|
|
m_cbScanned = 0;
|
|
return 0;
|
|
}
|
|
|
|
//////////////////////////////// Phase (3) //////////////////////////////////
|
|
/*
|
|
Analysing the search parameters
|
|
|
|
When the user asks for a count or for actual hilites, we must go and find the
|
|
current state of the search parameter to find out which of the words in our
|
|
word list are selected by those criteria. This information is condensed into
|
|
an indicator set, m_pMask, which is ordered the same as m_ppdSorted.
|
|
|
|
We scan our array of TOKEN_INFO structures and use each index to access our
|
|
indicator set. On the first pass we just count the number of hits. Then we
|
|
allocate space to save the actual information which we put in on the second pass.
|
|
For each hilite we need to save the following:
|
|
|
|
typedef struct {
|
|
int base;
|
|
int limit;
|
|
} HILITE;
|
|
|
|
Every time there is a change in the search parameters, Cfind must increment its
|
|
copy of m_serialFind. This way we can tell if we need to recompute.
|
|
|
|
When we get a QueryHiliter() call, we first check m_iTokenNext to see if the
|
|
caller is moving sequentially through the list. If this does not match, we do
|
|
a binary search.
|
|
*/
|
|
|
|
void CHiliter::UpdateMask() {
|
|
CFind* pFind = m_pSearcher->m_pFind;
|
|
if (m_serialFind==pFind->GetSerial()) return;
|
|
// we don't have the indicator set -- go compute it
|
|
int cFrag = pFind->GetFragmentCount(); // number of fragments
|
|
if (m_pMask) DetachRef(m_pMask); // remove previous mask
|
|
m_pMask = NULL; // initial empty set
|
|
for (int iFrag=0; iFrag<cFrag; iFrag++) { // look at all the fragments
|
|
CFragInfo* pFrag = pFind->GetFragment(iFrag); // get current fragment
|
|
CIndicatorSet* pMask = pFrag->GetSelection(); // get new indicator set
|
|
if (pMask) { // NULL implies "all"
|
|
if (!m_pMask) AttachRef(m_pMask, CIndicatorSet::NewIndicatorSet(pMask));
|
|
// first time thru we create a mask
|
|
else m_pMask->ORWith(pMask); // or the masks!
|
|
delete pMask;
|
|
}
|
|
}
|
|
m_serialFind = pFind->GetSerial(); // avoid unwanted recomputes
|
|
}
|
|
|
|
BOOL CHiliter::PhraseSearch() {
|
|
// returns TRUE if he is searching for phrases
|
|
CFind* pFind = m_pSearcher->m_pFind;
|
|
CFragInfo* pFrag = pFind->GetFragment(0); // get any fragment
|
|
return pFrag->GetRefType()==TokenRefs;
|
|
}
|
|
|
|
int CHiliter::CountHilites(int base, int limit) {
|
|
// find out how many hilites there so user can allocate space
|
|
return QueryHilites(base, limit, COUNTING, NULL);
|
|
}
|
|
|
|
int CHiliter::QueryHilites(int base, int limit, int cMax, HILITE* paHilites) {
|
|
// base and limit describe the range of text we wish to get hilites for
|
|
// cHilites and paHilites describe a buffer to put the results into
|
|
// we return the number of hilites copied
|
|
if (!m_paTokenInfo) return 0; // no text -- no hilites
|
|
if (m_cbCarryOver) m_cbCarryOver = FlushCarryOverText();
|
|
__try {
|
|
if (base<0) base = 0;
|
|
int iEnd = m_cTokenInfo? m_paTokenInfo[m_cTokenInfo-1].limit
|
|
: 0;
|
|
if (limit==-1 || limit>=iEnd) limit = iEnd;
|
|
if (base>=limit) return 0;
|
|
m_base = base;
|
|
m_limit = limit; // we need these globals
|
|
m_cMax = cMax; // .. inside called functions
|
|
m_paHilites = paHilites;
|
|
m_cLit = 0;
|
|
int iToken = LocateBase(base); // convert offset to indexes
|
|
m_iLimit = LocateLimit(limit);
|
|
if (PhraseSearch()) {
|
|
UpdateMasks();
|
|
if (!m_apMasks || !m_apMasks[0]) return 0; // none is none
|
|
// we need to consider phrases that start outside the range
|
|
int iSlop = 2*(m_cFrags-1); // max we can be off
|
|
iToken = max(0, iToken-iSlop);
|
|
// .. and end outside the range
|
|
m_iLimit = min(m_cTokenInfo, m_iLimit+iSlop);
|
|
// loop thru our hiliter tokens and copy the valid ones
|
|
while (iToken<m_iLimit && m_cLit<m_cMax) {
|
|
// loop can end because we got the count we want
|
|
CheckNextToken(0, m_iTokenStart = iToken);
|
|
iToken++;
|
|
}
|
|
}
|
|
else {
|
|
UpdateMask();
|
|
if (!m_pMask) return 0; // return "all" == NONE!
|
|
while (iToken<m_iLimit && m_cLit<m_cMax) {
|
|
// loop can end because we got the count we want
|
|
CheckToken(iToken);
|
|
iToken++;
|
|
}
|
|
}
|
|
m_iTokenNext = iToken;
|
|
m_baseNext = m_paTokenInfo[m_iTokenNext].base;
|
|
}
|
|
__except(FilterFTExceptions(_exception_code())) {
|
|
return ErrorCodeForExceptions(_exception_code());
|
|
}
|
|
return m_cLit;
|
|
}
|
|
|
|
void CHiliter::CheckToken(int iToken) {
|
|
int iSorted = m_paTokenInfo[iToken].iSorted;
|
|
if (iSorted==-1) return; // not a word in our text sets
|
|
if (m_pMask && m_pMask->IsBitSet(iSorted)) {
|
|
if (m_cMax!=COUNTING) {
|
|
m_paHilites[m_cLit].base = max(m_base, m_paTokenInfo[iToken].base); // copy the HILITES
|
|
m_paHilites[m_cLit].limit = min(m_limit, m_paTokenInfo[iToken].limit);
|
|
}
|
|
m_cLit++;
|
|
}
|
|
}
|
|
|
|
int CHiliter::LocateBase(int base) {
|
|
// find the index to the span which contains the given base
|
|
int iToken;
|
|
if (base==0) iToken = 0; // easy case #1
|
|
else if (base==m_baseNext) iToken = m_iTokenNext; // #2 -- for serial access
|
|
else iToken = LocateOffset(base); // hard case
|
|
return iToken;
|
|
}
|
|
|
|
int CHiliter::LocateLimit(int limit) {
|
|
// find the index to the span which contains the given limit
|
|
int iToken;
|
|
if (limit>=m_paTokenInfo[m_cTokenInfo-1].limit) iToken = m_cTokenInfo;
|
|
else iToken = LocateOffset(limit) + 1; // hard case
|
|
// add one to include a word only partly included
|
|
return iToken;
|
|
}
|
|
|
|
int CHiliter::LocateOffset(int offset) {
|
|
// failing the above, we do a binary search
|
|
// likely to hang if terms are not properly in ascending order
|
|
int iStart = 0;
|
|
int iEnd = m_cTokenInfo - 1;
|
|
int i = (iStart + iEnd) / 2; // start in the middle
|
|
// binary search
|
|
while (iStart<iEnd) {
|
|
if (offset >= m_paTokenInfo[i].base) {
|
|
if (offset < m_paTokenInfo[i].limit) break; // we found it
|
|
else {
|
|
iStart = i;
|
|
i = (i + iEnd + 1) / 2;
|
|
}
|
|
}
|
|
else {
|
|
iEnd = i;
|
|
i = (iStart + i) / 2;
|
|
}
|
|
}
|
|
return i;
|
|
}
|
|
|
|
/////////////////////////////// Phase 4 ///////////////////////////////////////////
|
|
|
|
/* This is special code to handle phrase searching. Instead of ORing all our fragment
|
|
masks we simply store them in an array. A typical mask should be 5-10K so storing 20 of
|
|
them would mean allocating at most 200K.
|
|
We proceed by walking our display text as before. When we get to a word we ask does
|
|
this word occur in fragment[0]. If it does, we move to the next word. The second word
|
|
may be punctuation which generates two cases (a) this is our next word (check fragment[1])
|
|
or (b) this is punctuation to be ignored. Since we count space as a valid token, this
|
|
second alternative will be relatively common.
|
|
Suppose that the number of fragments is nFrag. Imagine a binary tree of depth nFrag.
|
|
All the left branches are where we have case (a) -- our next word. The right branches are
|
|
case (b) -- where we skip a punctuation token. We do a tree sweep. If we ever reach a
|
|
terminal node we have a hit. We then add an element to our array and store the base of
|
|
the first word and the limit of the last word.
|
|
Note that we may have several hits in a single tree and that hits may overlap. We
|
|
return to the caller all the overlapping hits. This would be a good form if he wants to
|
|
display them sequentially. If he wants to show them all at once he will have to write
|
|
code to combine ones that overlap.
|
|
*/
|
|
|
|
void CHiliter::UpdateMasks() {
|
|
// make sure we have the current array of indicator sets for each fragment
|
|
CFind* pFind = m_pSearcher->m_pFind;
|
|
if (m_serialFind==pFind->GetSerial()) return;
|
|
// we don't have the indicator sets -- go fetch them
|
|
for (int iFrag=0; iFrag<m_cFrags; iFrag++) { // remove previous masks
|
|
if (m_apMasks[iFrag]) DetachRef(m_apMasks[iFrag]);
|
|
m_apMasks[iFrag] = NULL; // empty sets
|
|
}
|
|
m_cFrags = pFind->GetFragmentCount(); // number of fragments now
|
|
if (m_cFrags>cFRAG_MAX) m_cFrags = cFRAG_MAX; // upper limit
|
|
for (iFrag=0; iFrag<m_cFrags; iFrag++) {
|
|
CFragInfo* pFrag = pFind->GetFragment(iFrag); // get current fragment
|
|
CIndicatorSet* pMask = pFrag->GetSelection();
|
|
if (pMask) AttachRef(m_apMasks[iFrag], pMask); // get new indicator set
|
|
}
|
|
m_serialFind = pFind->GetSerial(); // avoid unwanted recomputes
|
|
}
|
|
|
|
void CHiliter::CheckNextToken(int depth, int iToken) {
|
|
// recursive routine to find how many phrases start with this token
|
|
// returns the number of terminal nodes in this sub-tree
|
|
if (m_cLit>=m_cMax) return;
|
|
int iSorted = m_paTokenInfo[iToken].iSorted;
|
|
if (iSorted==-1) return; // not a word in our text sets
|
|
if (m_apMasks[depth] && !m_apMasks[depth]->IsBitSet(iSorted)) return;
|
|
// [.. he selected words in this fragment but not this one]
|
|
if (depth>=m_cFrags-1) { // ==== we got a hit ==========
|
|
int base = m_paTokenInfo[m_iTokenStart].base;
|
|
int limit = m_paTokenInfo[iToken].limit;
|
|
if (base<m_limit || limit>m_base) {
|
|
if (m_cMax!=COUNTING) { // we were called by QueryHilite
|
|
m_paHilites[m_cLit].base = max(m_base, base);
|
|
// the span starts at the start of the token at the top of the tree
|
|
m_paHilites[m_cLit].limit = min(m_limit, limit);
|
|
// .. and ends at the end of the token at the bottom of the tree
|
|
}
|
|
m_cLit++; // count one more for the gipper
|
|
}
|
|
}
|
|
else {
|
|
if (++iToken>=m_iLimit) return;
|
|
// we survived ferocious pruning so far so let us press deeper
|
|
depth++;
|
|
CheckNextToken(depth, iToken); // any sort of token -- just delve
|
|
// notice we can go two different ways at any node
|
|
if (m_paTokenInfo[iToken].type==0) { // [0==punctuation]
|
|
if (++iToken>=m_iLimit) return; // punctuation -- skip before delving
|
|
CheckNextToken(depth, iToken); // delve on MacDuff
|
|
}
|
|
}
|
|
}
|