//#define _DUMPALL #include #include #include #include #ifdef DOS_ONLY #include #endif // DOS_ONLY #include #include "common.h" #include "search.h" #ifdef _DEBUG static BYTE NEAR s_aszModule[] = __FILE__; /* Used by error return functions.*/ #endif typedef int (PASCAL NEAR * FCMP)(LPV, LPV); #define MAX_HEAP_ENTRIES 0xffff/sizeof(LPV) // Maximum entries for heap sort #define MIN_HEAP_ENTRIES 100 // Minimum entries for heap sort /************************************************************************* * * INTERNAL GLOBAL FUNCTIONS *************************************************************************/ PUBLIC HRESULT PASCAL NEAR OrHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int); PUBLIC HRESULT PASCAL NEAR AndHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int); PUBLIC HRESULT PASCAL NEAR NotHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int); PUBLIC HRESULT PASCAL NEAR NearHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int); PUBLIC HRESULT PASCAL NEAR PhraseHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int); PUBLIC VOID PASCAL NEAR RemoveUnmarkedTopicList (LPQT, _LPQTNODE, BOOL); PUBLIC VOID PASCAL NEAR RemoveUnmarkedNearTopicList (_LPQT, _LPQTNODE); PUBLIC VOID PASCAL NEAR MergeOccurence(LPQT, LPITOPIC , LPITOPIC); PUBLIC VOID PASCAL NEAR SortResult (LPQT, _LPQTNODE, WORD); PUBLIC VOID PASCAL NEAR NearHandlerCleanUp (LPQT, _LPQTNODE); PUBLIC HRESULT PASCAL NEAR TopicListSort (_LPQTNODE, BOOL); /************************************************************************* * GLOBAL VARIABLES *************************************************************************/ extern FNHANDLER HandlerFuncTable[]; /************************************************************************* * * INTERNAL PRIVATE FUNCTIONS * All of them should be declared near *************************************************************************/ PRIVATE VOID PASCAL NEAR RemoveQuery(LPQT, _LPQTNODE); PUBLIC HRESULT PASCAL NEAR ProximityCheck(LPITOPIC, LPIOCC, WORD); PRIVATE HRESULT PASCAL NEAR HandleNullNode(LPQT, _LPQTNODE , _LPQTNODE, int); PRIVATE VOID PASCAL NEAR RemoveUnmarkedOccList (LPQT, LPITOPIC, LPIOCC, int); PRIVATE VOID PASCAL NEAR OccurenceSort (LPQT, LPITOPIC); PRIVATE int PASCAL NEAR FRange(DWORD, DWORD, WORD); PRIVATE HRESULT PASCAL NEAR InsertMarker (LPQT, LPITOPIC); PRIVATE LPIOCC PASCAL NEAR FindMarker (LPIOCC); PRIVATE HRESULT PASCAL NEAR NearListMatch (LPIOCC, LPIOCC, WORD); PRIVATE VOID PASCAL NEAR HeapUp (LPITOPIC far*, WORD, FCMP); PRIVATE VOID PASCAL NEAR HeapDown (LPITOPIC far*, int, FCMP); PRIVATE int PASCAL NEAR TopicWeightCompare (LPV, LPV); PRIVATE int PASCAL NEAR HitCountCompare (LPV, LPV); /************************************************************************* * @doc INTERNAL * * @func HRESULT PASCAL NEAR | OrHandler | * Handle ORing the strings * * @parm int | fOperationType | * Tell what kinds of operations we are dealing with * * @parm _LPQTNODE | lpResQtNode | * The query node structure that we add the result to * * @parm LPV | lpStruct | * Vanilla pointers to different types of structures we are dealing with. * The contents of those pointers are determined by the value of * fOperationType, for EXPRESSION_TERM, this is a LPIOCC, for * EXPRESSION_EXPRESSION, this is a_LPQTNODE * * @rdesc S_OK : if the operation has been carried * E_FAIL : if some errors happened (out-of-memory) * * @comm The implementation is straightforward: *************************************************************************/ PUBLIC HRESULT PASCAL NEAR OrHandler(_LPQT lpQueryTree, _LPQTNODE lpResQtNode, LPITOPIC lpResTopicList, LPV lpStruct, int fOperationType) { _LPQTNODE lpCurQtNode; LPITOPIC lpCurTopicList; LPITOPIC lpNextTopicList; switch (fOperationType) { case EXPRESSION_TERM: /* We are adding a new occurence into a TopicID list. This can only happens when we are loading the infos for a query's TERM_NODE node */ RET_ASSERT(lpResTopicList); /* Adding the new occurence to the TopicID list */ return OccNodeInsert(lpQueryTree, lpResTopicList, (LPIOCC)lpStruct); break; case EXPRESSION_EXPRESSION: lpCurQtNode = (_LPQTNODE)lpStruct; /* Handle different variations of: (EXPRESSION_NODE | NULL_NODE) or (NULL_NODE | EXPRESSION_NODE) */ if (HandleNullNode(lpQueryTree, lpResQtNode, lpCurQtNode, OR_OP)) return S_OK; /* Make sure that we are pointing to the right place to search */ lpQueryTree->lpTopicStartSearch = lpResQtNode->lpTopicList; /* Thread the TopicID List and add them to lpResQtNode */ for (lpCurTopicList = QTN_TOPICLIST(lpCurQtNode); lpCurTopicList; lpCurTopicList = lpNextTopicList) { lpNextTopicList = lpCurTopicList->pNext; /* Find the location of the TopicID List in the query. */ if ((lpResTopicList = TopicNodeSearch(lpQueryTree, lpResQtNode, lpCurTopicList->dwTopicId)) == NULL){ /* The list doesn't exist yet, so we just transfer the new TopicID list to lpResQtNode */ RemoveNode(lpQueryTree, (LPV) lpCurQtNode, NULL, (LPSLINK) lpCurTopicList, (TOPICLIST_NODE | DONT_FREE)); TopicNodeInsert (lpQueryTree, lpResQtNode, lpCurTopicList); } else { /* Merging two TopicList together by adding the new occurence list to the old result doc list */ MergeOccurence(lpQueryTree, lpResTopicList, lpCurTopicList); /* Remove the now empty TopicList */ RemoveNode(lpQueryTree, (LPV) lpCurQtNode, NULL, (LPSLINK) lpCurTopicList, TOPICLIST_NODE); } } /* Assure that all nodes are transferred */ RET_ASSERT (QTN_TOPICLIST(lpCurQtNode) == NULL) ; break; } return S_OK; } /************************************************************************* * @doc INTERNAL * * @func PUBLIC HRESULT PASCAL NEAR | AndHandler | * Handle Anding the strings * * @parm _LPQTNODE | lpResQtNode | * The query structure that we add the result to * * @parm LPITOPIC | lpResTopicList | * The TopicList structure that we add the result to * * @parm LPV | lpStruct | * Vanilla pointers to different types of structures we are dealing with. * The contents of those pointers are determined by the value of * fOperationType, for EXPRESSION_TERM, this is a LPIOCC, for * EXPRESSION_EXPRESSION, this is a_LPQTNODE * * @parm int | fOperationType | * Tell what kinds of nodes we are handling, query-occurence or * query-query * * @rdesc S_OK : if the operation has been carried * errors : if some errors happened (out-of-memory) *************************************************************************/ PUBLIC HRESULT PASCAL NEAR AndHandler(_LPQT lpQueryTree, _LPQTNODE lpResQtNode, LPITOPIC lpResTopicList, LPV lpStruct, int fOperationType) { _LPQTNODE lpCurQtNode; LPITOPIC lpTopicNode1; LPITOPIC lpTopicNode2; LPITOPIC lpNextTopic1; LPITOPIC lpNextTopic2; LPITOPIC lpPrev; long fResult; switch (fOperationType) { case EXPRESSION_TERM: RET_ASSERT (lpResTopicList); ((LPIOCC)lpStruct)->fFlag |= TO_BE_KEPT; lpResTopicList->fFlag |= TO_BE_KEPT; /* Adding the new occurence to the TopicID list */ return OccNodeInsert(lpQueryTree, lpResTopicList, (LPIOCC)lpStruct); case EXPRESSION_EXPRESSION: /* Doing an AND combination is equivalent to merging the * two lists together for same doc ID */ lpCurQtNode = (_LPQTNODE)lpStruct; if (HandleNullNode(lpQueryTree, lpResQtNode, lpCurQtNode, AND_OP)) return S_OK; /* Initialize variables */ lpTopicNode1 = QTN_TOPICLIST(lpResQtNode); lpTopicNode2 = QTN_TOPICLIST(lpCurQtNode); lpPrev = NULL; while (lpTopicNode1 && lpTopicNode2) { /* Get the next nodes */ lpNextTopic1 = lpTopicNode1->pNext; lpNextTopic2 = lpTopicNode2->pNext; if ((fResult = lpTopicNode1->dwTopicId - lpTopicNode2->dwTopicId) == 0) { /* The TopicIds match */ /* Merge the occurrences together */ MergeOccurence (lpQueryTree, lpTopicNode1, lpTopicNode2); lpPrev = lpTopicNode1; lpTopicNode1 = lpNextTopic1; lpTopicNode2 = lpNextTopic2; } else if (fResult < 0) { /* List 1 < List 2 */ /* Remove Topic node 1*/ TopicNodeFree(lpQueryTree, lpResQtNode, lpPrev, lpTopicNode1); lpTopicNode1 = lpNextTopic1; } else { /* List 1 > List 2 */ lpTopicNode2 = lpNextTopic2; } } /* Free remaining doc list */ while (lpTopicNode1) { /* Get the next nodes */ lpNextTopic1 = lpTopicNode1->pNext; /* Remove Topic node 1*/ TopicNodeFree(lpQueryTree, lpResQtNode, lpPrev, lpTopicNode1); lpTopicNode1 = lpNextTopic1; } /* Free doc 2 list */ RemoveQuery(lpQueryTree, lpCurQtNode); if (QTN_TOPICLIST(lpResQtNode) == NULL) QTN_NODETYPE(lpResQtNode) = NULL_NODE; return S_OK; default: /* Weird parameters */ RET_ASSERT(UNREACHED); } } /************************************************************************* * @doc INTERNAL * * @func PUBLIC HRESULT PASCAL NEAR | NotHandler | * Handle NOT the strings * * @parm _LPQTNODE | lpResQtNode | * The query structure that we add the result to * * @parm LPITOPIC | lpResTopicList | * The TopicList structure that we add the result to * * @parm LPV | lpStruct | * Vanilla pointers to different types of structures we are dealing with. * The contents of those pointers are determined by the value of * fOperationType, for EXPRESSION_TERM, this is a LPIOCC, for * EXPRESSION_EXPRESSION, this is a_LPQTNODE * * @parm int | fOperationType | * Tell what kinds of nodes we are handling, query-occurence or * query-query * * @rdesc S_OK : if the operation has been carried * errors : if some errors happened (out-of-memory) *************************************************************************/ PUBLIC HRESULT PASCAL NEAR NotHandler(_LPQT lpQueryTree, _LPQTNODE lpResQtNode, LPITOPIC lpResTopicList, LPV lpStruct, int fOperationType) { _LPQTNODE lpCurQtNode; LPITOPIC lpTopicNode1; LPITOPIC lpTopicNode2; LPITOPIC lpNextTopic1; LPITOPIC lpNextTopic2; LPITOPIC lpPrev; long fResult; switch (fOperationType) { case EXPRESSION_TERM: RET_ASSERT(UNREACHED); break; case EXPRESSION_EXPRESSION: lpCurQtNode = (_LPQTNODE)lpStruct; if (HandleNullNode(lpQueryTree, lpResQtNode, lpCurQtNode, NOT_OP)) return S_OK; /* Initialize variables */ lpTopicNode1 = QTN_TOPICLIST(lpResQtNode); lpTopicNode2 = QTN_TOPICLIST(lpCurQtNode); lpPrev = NULL; while (lpTopicNode1 && lpTopicNode2) { /* Get the next nodes */ lpNextTopic1 = lpTopicNode1->pNext; lpNextTopic2 = lpTopicNode2->pNext; if ((fResult = lpTopicNode1->dwTopicId - lpTopicNode2->dwTopicId) == 0) { /* The TopicIds match */ TopicNodeFree(lpQueryTree, lpResQtNode, lpPrev, lpTopicNode1); lpTopicNode1 = lpNextTopic1; lpTopicNode2 = lpNextTopic2; } else if (fResult < 0) { /* List 1 < List 2 */ lpPrev = lpTopicNode1; lpTopicNode1 = lpNextTopic1; } else { /* List 1 > List 2 */ lpTopicNode2 = lpNextTopic2; } } /* Free doc 2 list */ RemoveQuery(lpQueryTree, lpCurQtNode); if (QTN_TOPICLIST(lpResQtNode) == NULL) QTN_NODETYPE(lpResQtNode) = NULL_NODE; return S_OK; default: /* Weird parameters */ RET_ASSERT(UNREACHED); } return S_OK; } /************************************************************************* * NEARHANDLER Description * * Sematics: * The current chosen sematics is: * A near B near C --> (A near B) and (B near C) * Other possible sematics of NEAR for (A near B near C) are: * - A and B anc C must be near each other * - Any two of A or B or C can be near each other * * Observation: * With the above semantics, we notice that only the last word (ie. B) * has meaning in the comparison with C. * * Implementation: * A special node will be used to differentiate between occurrences * coming from A and from B. Only the ones from B will be used in the * combination with C. Consider the following example (ProxDist = 5): * A B C * 10 15 5 (the numbers are word counts) * 14 18 16 * After combining A and B we will end up with: * 15 * 18 * M <- marker separates occurrences from A and B * 10 * 14 * After that we only combine B's terms with C's terms. The result will * look like: * 16 * M <- marker separates occurrences from B and C * 15 * 18 * M <- marker separates occurrences from A and B * 10 * 14 * Note that C's 5 is dropped since there is no match with B, even * that A's 10 matches it. * After sorting and getting rid of the marker nodes, the final result * will look as followed: * 10 14 15 16 18 *************************************************************************/ PRIVATE HRESULT PASCAL NEAR NearHandlerInsert (_LPQT lpQueryTree, LPITOPIC lpResTopicList, LPIOCC lpStartOcc, LPIOCC lpCurOcc) { HRESULT fRet = FALSE; if (!(lpCurOcc->fFlag & IS_MARKER_NODE) && (fRet = NearListMatch(lpCurOcc, lpStartOcc, lpQueryTree->wProxDist))) { /* Insert the occurrence node */ lpCurOcc->pNext = lpResTopicList->lpOccur; lpResTopicList->lpOccur = lpCurOcc; lpResTopicList->lcOccur ++; } else { /* Remove the occurrence node */ RemoveNode(lpQueryTree, (LPV) NULL, (LPSLINK)NULL, (LPSLINK) lpCurOcc, OCCURENCE_NODE); } return (fRet); } /************************************************************************* * @doc INTERNAL * * @func LPIOCC PASCAL NEAR | FindMarker | * Given a starting occurrence node, traverse it and find the first * marker node * * @parm LPIOCC | lpStartOcc | * Starting node * * @rdesc The marker node *************************************************************************/ PRIVATE LPIOCC PASCAL NEAR FindMarker (LPIOCC lpStartOcc) { LPIOCC lpCurOcc; for (lpCurOcc = lpStartOcc; lpCurOcc; lpCurOcc = lpCurOcc->pNext) { if (lpCurOcc->fFlag & IS_MARKER_NODE) break; } return lpCurOcc; } /************************************************************************* * @doc INTERNAL * * @func HRESULT PASCAL NEAR | InsertMarker | * This function will insert a marker node at the beginning of * lpResTopicList->lpOccur * * @parm LPQT | lpQueryTree | * Pointer to query tree where all globals are * * @parm LPITOPIC | lpResTopicList | * Pointer to TopicId node * * @rdesc S_OK *************************************************************************/ PRIVATE HRESULT PASCAL NEAR InsertMarker (LPQT lpQueryTree, LPITOPIC lpResTopicList) { LPMARKER lpMark; /* Do some preparations by allocating marker nodes */ if ((lpResTopicList->fFlag & HAS_MARKER) == FALSE) { if (!(lpMark = (LPMARKER)OccNodeAllocate(lpQueryTree))) return E_TOOMANYTOPICS; lpMark->fFlag |= (IS_MARKER_NODE | TO_BE_KEPT); /* Link the markers together with the nodes */ lpMark->pNext = lpResTopicList->lpOccur; lpResTopicList->lpOccur = (LPIOCC)lpMark; /* Link with the next marker */ lpMark->pNextMark = (LPMARKER)FindMarker (lpMark->pNext); /* Mark that we already has a marker */ lpResTopicList->fFlag |= HAS_MARKER; /* Increment lcOccur, since this node will be removed * like a regular occurrence node */ lpResTopicList->lcOccur ++; } return S_OK; } /************************************************************************* * @doc INTERNAL * * @func HRESULT PASCAL NEAR | NearListMatch | * Traverse a list and match all nodes against lpCurOcc * * @parm LPIOCC | lpCurOcc | * Occurrence node to be compared with * * @parm LPIOCC | lpStartOcc | * Start of the occurrence list * * @parm WORD | wProxDist | * Proximity distance * * @rdesc * return TRUE if the the node matches *************************************************************************/ PRIVATE HRESULT PASCAL NEAR NearListMatch (LPIOCC lpCurOcc, LPIOCC lpStartOcc, WORD wProxDist) { LPIOCC lpResOcc; BOOL fMatch = FALSE; for (lpResOcc = lpStartOcc; lpResOcc; lpResOcc = lpResOcc->pNext) { if (lpResOcc->fFlag & IS_MARKER_NODE) break; if (!FRange(lpResOcc->dwCount, lpCurOcc->dwCount, wProxDist)) { lpResOcc->fFlag |= TO_BE_KEPT; fMatch = TRUE; } } if (fMatch) lpCurOcc->fFlag |= TO_BE_KEPT; return fMatch; } /************************************************************************* * @doc INTERNAL * * @func VOID PASCAL FAR | NearHandlerTopicCleanUp | * Clean up a TopicList by going thru each sequence of occurrence * delimited by marker, do the check again and remove all extra * occurrences * * @parm _LPQT | lpQueryTree | * Pointer to query tree * * @parm _LPQTNODE | lpResQtNode | * Pointer to result query node * * @parm LPITOPIC | lpCurTopic | * Current doc id *************************************************************************/ PUBLIC HRESULT PASCAL FAR NearHandlerTopicCleanUp (_LPQT lpQueryTree, _LPQTNODE lpResQtNode, LPITOPIC lpCurTopic) { LPMARKER lpMarkStart; LPMARKER lpCurMark; LPIOCC lpCurOcc; LPIOCC lpStartOcc; BOOL fDone; /* Find the first marker */ lpMarkStart = (LPMARKER)FindMarker(lpCurTopic->lpOccur); if (lpMarkStart == NULL) { /* This branch has been cleaned up of marker nodes */ return E_FAIL; } /* The first occurrences from the start to lpMarkStart must be * TO_BE_KEPT, so we don't have to check them since they just * freshly came from a near handler. All we have to do * is set the flag */ for (lpCurOcc = lpCurTopic->lpOccur; lpCurOcc && !(lpCurOcc->fFlag & IS_MARKER_NODE); lpCurOcc = lpCurOcc->pNext) lpCurOcc->fFlag |= TO_BE_KEPT; if (lpMarkStart->pNextMark == NULL) { /* Simple A NEAR B, just set the flag */ for (lpCurOcc = lpMarkStart->pNext; lpCurOcc && !(lpCurOcc->fFlag & IS_MARKER_NODE); lpCurOcc = lpCurOcc->pNext) lpCurOcc->fFlag |= TO_BE_KEPT; } else { /* Complex NEAR terms, such as: A NEAR B NEAR C */ lpStartOcc = lpMarkStart->pNext; lpCurMark = lpMarkStart->pNextMark; fDone = FALSE; for (;lpCurMark; lpCurMark = lpCurMark->pNextMark) { for (lpCurOcc = lpCurMark->pNext; lpCurOcc && !(lpCurOcc->fFlag & IS_MARKER_NODE); lpCurOcc = lpCurOcc->pNext) { if (NearListMatch (lpCurOcc, lpStartOcc, lpQueryTree->wProxDist) == FALSE) { fDone = TRUE; } } lpStartOcc = lpCurMark->pNext; if (fDone) break; } } /* Clear up all the marker node flag to ensure that they will be * removed */ while (lpMarkStart) { lpMarkStart->fFlag &= ~(TO_BE_KEPT | IS_MARKER_NODE); lpMarkStart = lpMarkStart->pNextMark; } return S_OK; } PUBLIC VOID PASCAL NEAR NearHandlerCleanUp (_LPQT lpQueryTree, _LPQTNODE lpResQtNode) { LPITOPIC lpCurTopic; for (lpCurTopic = QTN_TOPICLIST(lpResQtNode); lpCurTopic; lpCurTopic = lpCurTopic->pNext) { if (NearHandlerTopicCleanUp (lpQueryTree, lpResQtNode, lpCurTopic) == S_OK) { RemoveUnmarkedOccList(lpQueryTree, lpCurTopic, lpCurTopic->lpOccur, TRUE); } } } /************************************************************************* * @doc INTERNAL * * @func PUBLIC HRESULT PASCAL NEAR | NearHandler | * Handle NEAR operation * * @parm _LPQTNODE | lpResQtNode | * The query structure that we add the result to * * @parm LPITOPIC | lpResTopicList | * The TopicList structure that we add the result to * * @parm LPV | lpStruct | * Vanilla pointers to different types of structures we are dealing with. * The contents of those pointers are determined by the value of * fOperationType, for EXPRESSION_TERM, this is a LPIOCC, for * EXPRESSION_EXPRESSION, this is a_LPQTNODE * @parm int | fOperationType | * Tell what kinds of nodes we are handling, query-occurence or * query-query * * @rdesc S_OK : if the operation has been carried * errors : if some errors happened (out-of-memory) *************************************************************************/ PUBLIC HRESULT PASCAL NEAR NearHandler(_LPQT lpQueryTree, _LPQTNODE lpResQtNode, LPITOPIC lpResTopicList, LPV lpStruct, int fOperationType) { _LPQTNODE lpCurQtNode; /* Current query tree node */ LPITOPIC lpCurTopicList; /* Current TopicId node */ LPITOPIC lpNextTopicList; LPIOCC lpCurOcc; LPIOCC lpStartOcc; LPITOPIC lpPrevRes; LPSLINK lpTmp; //erinfox switch (fOperationType) { case EXPRESSION_TERM: /* Insert a marker node if necessary */ if (InsertMarker (lpQueryTree, lpResTopicList) != S_OK) return E_TOOMANYTOPICS; /* Look for the starting point */ lpStartOcc = FindMarker(lpResTopicList->lpOccur); RET_ASSERT(lpStartOcc); /* Handle the near operation */ if (NearHandlerInsert (lpQueryTree, lpResTopicList, lpStartOcc->pNext, (LPIOCC)lpStruct)) lpResTopicList->fFlag |= TO_BE_KEPT; break; case EXPRESSION_EXPRESSION: lpCurQtNode = (_LPQTNODE)lpStruct; if (HandleNullNode(lpQueryTree, lpResQtNode, lpCurQtNode, NEAR_OP)) return S_OK; /* Now doing the real jobs */ /* Make sure that we are pointing to the right place to search */ lpQueryTree->lpTopicStartSearch = lpResQtNode->lpTopicList; /* First check the coming data from lpCurTopicList. * If there isn't an equivalent TopicId in QTN_TOPICLIST(lpResQtNode), * then remove it */ for (lpCurTopicList = QTN_TOPICLIST(lpCurQtNode); lpCurTopicList; lpCurTopicList = lpNextTopicList) { lpNextTopicList = lpCurTopicList->pNext; /* Find the location of the TopicID List in the query. */ if ((lpResTopicList = TopicNodeSearch(lpQueryTree, lpResQtNode, lpCurTopicList->dwTopicId)) == NULL) { /* Can't find equivalent TopicId in the result list, just remove lpCurTopicList */ TopicNodeFree(lpQueryTree, lpCurQtNode, NULL, lpCurTopicList); continue; } /* An equivalent TopicId is found */ /* Insert a marker node if necessary */ if (InsertMarker (lpQueryTree, lpResTopicList) != S_OK) return E_TOOMANYTOPICS; /* Look for the starting point */ RET_ASSERT(lpResTopicList->lpOccur) lpStartOcc = FindMarker(lpResTopicList->lpOccur); lpStartOcc = lpStartOcc->pNext; // Skip marker node for (lpCurOcc = lpCurTopicList->lpOccur; lpCurOcc; lpCurOcc = lpCurTopicList->lpOccur) { /* "Unlink" lpCurOcc */ lpCurTopicList->lpOccur = lpCurOcc->pNext; /* Handle the near operation */ if (NearHandlerInsert (lpQueryTree, lpResTopicList, lpStartOcc, lpCurOcc)) { lpResTopicList->fFlag |= TO_BE_KEPT; } } RET_ASSERT(lpCurTopicList->lpOccur == NULL); RemoveNode(lpQueryTree, (LPV) lpCurQtNode, NULL, (LPSLINK) lpCurTopicList, TOPICLIST_NODE); if (lpResTopicList->lpOccur->fFlag & IS_MARKER_NODE) { /* We didn't find any match, remove this TopicList */ // erinfox - I don't know lpPrevRes for lpResTopicList, so I get it here for (lpPrevRes = NULL, lpTmp = (LPSLINK)lpResQtNode->lpTopicList; lpTmp; lpTmp = lpTmp->pNext) { if (lpTmp == (LPSLINK)lpResTopicList) break; lpPrevRes = (LPITOPIC) lpTmp; } TopicNodeFree(lpQueryTree, lpResQtNode, lpPrevRes, lpResTopicList); } else { /* Remove all unmarked occurrences, but don't * reset the TO_BE_KEPT flag */ RemoveUnmarkedOccList(lpQueryTree, lpResTopicList, lpResTopicList->lpOccur, FALSE); } } RemoveQuery(lpQueryTree, lpCurQtNode); if (QTN_TOPICLIST(lpResQtNode) == NULL) QTN_NODETYPE(lpResQtNode) = NULL_NODE; return S_OK; break; } return S_OK; } /************************************************************************* * @doc INTERNAL * * @func PUBLIC HRESULT PASCAL NEAR | PhraseHandler | * Handle PHRASE operation * * @parm _LPQTNODE | lpResQtNode | * The query structure that we add the result to * * @parm LPITOPIC | lpResTopicList | * The TopicList structure that we add the result to * * @parm LPV | lpStruct | * Vanilla pointers to different types of structures we are dealing with. * The contents of those pointers are determined by the value of * fOperationType, for EXPRESSION_TERM, this is a LPIOCC, for * EXPRESSION_EXPRESSION, this is a_LPQTNODE * * @parm int | fOperationType | * Tell what kinds of nodes we are handling, query-occurence or * query-query * * @rdesc S_OK : if the operation has been carried * errors : if some errors happened (out-of-memory) *************************************************************************/ PUBLIC HRESULT PASCAL NEAR PhraseHandler(_LPQT lpQueryTree, _LPQTNODE lpResQtNode, LPITOPIC lpResTopicList, LPIOCC lpCurOcc, int fOperationType) { LPIOCC lpResOcc; BOOL fResult = 0; LPIOCC lpStartOcc = NULL; RET_ASSERT(fOperationType == EXPRESSION_TERM); /* Start at the beginning if necessary */ if ((lpResOcc = lpQueryTree->lpOccStartSearch) == NULL) lpResOcc = lpResTopicList->lpOccur; for (; lpResOcc; lpResOcc = lpResOcc->pNext) { if ((lpResOcc->fFlag & TO_BE_SKIPPED) == 0) { if ((fResult = (int)(lpCurOcc->dwCount - lpResOcc->dwCount)) == 1) { /* The nodes are consecutive, mark them TO_BE_KEPT */ lpResOcc->fFlag |= TO_BE_KEPT | TO_BE_SKIPPED; lpResOcc->fFlag &= ~TO_BE_COMPARED; lpCurOcc->fFlag |= TO_BE_KEPT | TO_BE_SKIPPED | TO_BE_COMPARED; lpResTopicList->fFlag |= TO_BE_KEPT; break; } /* Reset lpStartOcc */ lpStartOcc = NULL; if (fResult <= 0) { /* CurOcc is less than what is in the result list */ break; } } else { /* Get a skipped node. Mark the assumed starting node */ if (lpStartOcc == NULL) { lpStartOcc = lpResOcc; } } } if (fResult == 1) { /* Add this node, and mark the starting point for next search */ lpQueryTree->lpOccStartSearch = lpCurOcc->pNext = lpResOcc->pNext; lpResOcc->pNext = lpCurOcc; lpResTopicList->lcOccur ++; /* Mark all previous nodes TO_BE_KEPT */ if (lpStartOcc) { for (; lpStartOcc != lpCurOcc; lpStartOcc = lpStartOcc->pNext) lpStartOcc->fFlag |= TO_BE_KEPT; } } else { RemoveNode(lpQueryTree, (LPV) NULL, (LPSLINK)NULL, (LPSLINK) lpCurOcc, OCCURENCE_NODE); } return S_OK; } /************************************************************************* * @doc INTERNAL * * @func VOID PASCAL NEAR | MergeOccurence | * Merge two occurences lists together * * @parm _LPQT | lpQueryTree | * Pointer to query tree (where global variables are) * * @parm LPITOPIC | lpResTopicList | * Resulting TopicList that has the merged occurence list * * @parm LPITOPIC | lpCurTopicList | * TopicList that has the occurrence list to be merged to the * resulting list *************************************************************************/ PUBLIC VOID PASCAL NEAR MergeOccurence(_LPQT lpQueryTree, LPITOPIC lpResTopicList, LPITOPIC lpCurTopicList) { register LPIOCC lpTmpOcc; register LPIOCC lpNextOcc; /* Reset lpOccStartSearch */ lpQueryTree->lpOccStartSearch = NULL; for (lpTmpOcc = lpCurTopicList->lpOccur; lpTmpOcc; lpTmpOcc = lpNextOcc){ lpNextOcc = lpTmpOcc->pNext; OccNodeInsert(lpQueryTree, lpResTopicList, lpTmpOcc); } lpCurTopicList->lpOccur = NULL; lpCurTopicList->lcOccur = 0; } /************************************************************************* * @doc INTERNAL * * @func VOID PASCAL NEAR | RemoveUnmarkedTopicList | * Remove all the TopicLists that are not marked TO_BE_KEPT * * @parm _LPQT | lpQueryTree | * Pointer to query tree (for globasl variables) * * @parm _LPQTNODE | lpQtNode | * Query tree node to be checked * * @parm HRESULT | fKeepOccurence | * If 0, then check and remove all occurrences nodes that are not * marked TO_BE_KEPT *************************************************************************/ PUBLIC VOID PASCAL NEAR RemoveUnmarkedTopicList (_LPQT lpQueryTree, _LPQTNODE lpQtNode, BOOL fKeepOccurence) { register LPITOPIC lpTopicList; register LPITOPIC lpNextTopicList; // erinfox: add to keep track of previous node register LPITOPIC lpPrev; /* Traverse the doclist */ for (lpPrev = NULL, lpTopicList = QTN_TOPICLIST(lpQtNode); lpTopicList; lpTopicList = lpNextTopicList) { lpNextTopicList = lpTopicList->pNext; if ((lpTopicList->fFlag & TO_BE_KEPT) == 0) { /* Free the doc node and its occurences list */ TopicNodeFree(lpQueryTree, lpQtNode, lpPrev, lpTopicList); } else { lpTopicList->fFlag &= ~(TO_BE_KEPT | HAS_MARKER); // Reset the flag if (!fKeepOccurence) { /* Check the occurences list, and free all nodes that * are not marked TO_BE_KEPT */ RemoveUnmarkedOccList(lpQueryTree, lpTopicList, lpTopicList->lpOccur, TRUE); if (lpTopicList->lpOccur == NULL) { RemoveNode(lpQueryTree, (LPV) lpQtNode, NULL, (LPSLINK) lpTopicList, TOPICLIST_NODE); } } lpPrev = lpTopicList; } } } /************************************************************************* * @doc INTERNAL * * @func VOID PASCAL NEAR | RemoveUnmarkedNearTopicList | * Remove all the TopicLists that are not marked TO_BE_KEPT * * @parm _LPQT | lpQueryTree | * Pointer to query tree (for globasl variables) * * @parm _LPQTNODE | lpQtNode | * Query tree node to be checked * * @parm BOOL | fKeepOccurence | * If 0, then check and remove all occurrences nodes that are not * marked TO_BE_KEPT *************************************************************************/ PUBLIC VOID PASCAL NEAR RemoveUnmarkedNearTopicList (_LPQT lpQueryTree, _LPQTNODE lpQtNode) { register LPITOPIC lpTopicList; register LPITOPIC lpNextTopicList; LPIOCC lpMark; // erinfox - add to keep track of previous node register LPITOPIC lpPrev; /* Traverse the doclist */ for (lpPrev = NULL, lpTopicList = QTN_TOPICLIST(lpQtNode); lpTopicList; lpTopicList = lpNextTopicList) { lpNextTopicList = lpTopicList->pNext; if ((lpTopicList->fFlag & TO_BE_KEPT) == 0) { /* Free the doc node and its occurences list */ TopicNodeFree(lpQueryTree, lpQtNode, lpPrev, lpTopicList); } else { // Reset the flag lpTopicList->fFlag &= ~(TO_BE_KEPT | HAS_MARKER); /* Find the marker */ lpMark = FindMarker(lpTopicList->lpOccur); RemoveUnmarkedOccList(lpQueryTree, lpTopicList, lpTopicList->lpOccur, TRUE); /* Remove all unmarked occurrences between this marker and * the next one */ if (lpMark) RemoveUnmarkedOccList(lpQueryTree, lpTopicList, lpMark->pNext, TRUE); lpPrev = lpTopicList; } } } /************************************************************************* * @doc INTERNAL * * @func PASCAL NEAR | MarkTopicList | * Mark all TopicId nodes in a doc list TO_BE_KEPT * * @parm _LPQTNODE | lpQtNode | * Pointer to the query tree node that contains the doc list *************************************************************************/ PUBLIC VOID PASCAL NEAR MarkTopicList (_LPQTNODE lpQtNode) { register LPITOPIC lpTopicList; for (lpTopicList = QTN_TOPICLIST(lpQtNode); lpTopicList; lpTopicList = lpTopicList->pNext) { lpTopicList->fFlag |= TO_BE_KEPT; } } /************************************************************************* * @doc INTERNAL * * @func VOID PASCAL NEAR | RemoveUnmarkedOccList | * Remove all the occurrence nodes that are not marked TO_BE_KEPT * * @parm LPQT | lpQueryTree | * Pointer to query tree (for global variables) * * @parm int | fResetFlag | * Do we reset the TO_BE_KEPT flag ? * * @parm LPIOCC | lpTopicList | * Pointer to Topic list to be checked * * @parm LPIOCC | lpOccList | * Pointer to occurrence list to be checked *************************************************************************/ PRIVATE VOID PASCAL NEAR RemoveUnmarkedOccList (LPQT lpQueryTree, LPITOPIC lpTopicList, LPIOCC lpOccList, int fResetFlag) { register LPIOCC lpNextOccList; register LPIOCC lpPrevOccList; lpPrevOccList = NULL; for (;lpOccList && !(lpOccList->fFlag & IS_MARKER_NODE); lpOccList = lpNextOccList) { lpNextOccList = lpOccList->pNext; if ((lpOccList->fFlag & TO_BE_KEPT) == 0) { RemoveNode(lpQueryTree, (LPV) lpTopicList, (LPSLINK)lpPrevOccList, (LPSLINK) lpOccList, OCCURENCE_NODE); } else if (fResetFlag) { lpOccList->fFlag &= ~TO_BE_KEPT; // Reset the flag if (lpOccList->fFlag & TO_BE_COMPARED) lpOccList->fFlag &= ~TO_BE_SKIPPED; lpPrevOccList = lpOccList; } } /* Reset the flag of the marker node */ if (lpOccList) lpOccList->fFlag &= ~TO_BE_KEPT; } /************************************************************************* * @doc INTERNAL * * @func VOID PASCAL NEAR | CleanMarkedOccList | * Clean all the occurrence nodes from TO_BE_KEPT * * @parm LPIOCC | lpTopicList | * Pointer to Topic list to be checked *************************************************************************/ VOID PASCAL FAR CleanMarkedOccList (LPITOPIC lpTopicList) { register LPIOCC lpCurOcc; for (;lpTopicList; lpTopicList = lpTopicList->pNext) { for (lpCurOcc = lpTopicList->lpOccur; lpCurOcc; lpCurOcc = lpCurOcc->pNext) { lpCurOcc->fFlag &= ~TO_BE_KEPT; } } } /************************************************************************* * @doc INTERNAL * * @func HRESULT PASCAL NEAR | HandleNullNode | * Handle NULL query node. This is an optimization which will * save processing time. * * @parm LPQT | lpQueryTree | * Pointer to query tree (for global variables) * * @parm _LPQTNODE | lpResQtNode | * Pointer to result query tree node * * @parm _LPQTNODE | lpCurQtNode | * Pointer to query tree node * * @parm int | Operator | * What operator are we dealing with * * @rdesc FALSE, if no optimization can be done, TRUE otherwise *************************************************************************/ PRIVATE HRESULT PASCAL NEAR HandleNullNode(LPQT lpQueryTree, _LPQTNODE lpResQtNode, _LPQTNODE lpCurQtNode, int Operator) { _LPQTNODE lpChild; if (QTN_NODETYPE(lpResQtNode) != NULL_NODE && QTN_NODETYPE(lpCurQtNode) != NULL_NODE) return FALSE; if (Operator == NOT_OP) { if (QTN_NODETYPE(lpResQtNode) == NULL_NODE) { /* NULL ! a = NULL */ RemoveQuery(lpQueryTree, lpCurQtNode); QTN_NODETYPE(lpCurQtNode) = NULL_NODE; return TRUE; } else if (QTN_NODETYPE(lpCurQtNode) == NULL_NODE) { /* a ! NULL = a */ return TRUE; } return FALSE; } lpChild = QTN_NODETYPE(lpResQtNode) == NULL_NODE ? lpCurQtNode : lpResQtNode; switch (Operator) { case AND_OP: // a & NULL = NULL case NEAR_OP: // a # NULL = NULL case PHRASE_OP: // a + NULL = NULL ?? RemoveQuery(lpQueryTree, lpChild); QTN_NODETYPE(lpChild) = NULL_NODE; return TRUE; case OR_OP: // a | NULL = a if (QTN_NODETYPE(lpResQtNode) == NULL_NODE) { *lpResQtNode = *lpChild; QTN_NODETYPE(lpChild) = NULL_NODE; QTN_LEFT(lpChild) = QTN_RIGHT(lpChild) = NULL; QTN_TOPICLIST(lpChild) = NULL; } return TRUE; } return FALSE; } PRIVATE int PASCAL NEAR FRange(DWORD dwCount1, DWORD dwCount2, WORD cProxDist) { long fResult; int fRet = 1; fResult = dwCount1 - dwCount2; if (fResult < 0) { fRet = -1; fResult = -fResult; } if (fResult != 0 && fResult <= (long)cProxDist) return 0; else return fRet; } /************************************************************************* * @doc INTERNAL * * @func VOID PASCAL NEAR | RemoveQuery | * Remove all doc nodes for a query node * * @parm LPQT | lpQueryTree | * Pointer to query tree (for global variables) * * @parm _LPQTNODE | lpCurQtNode | * Pointer to query tree node to be cleared *************************************************************************/ PRIVATE VOID PASCAL NEAR RemoveQuery(LPQT lpQueryTree, _LPQTNODE lpCurQtNode) { register LPITOPIC lpCurTopicList; register LPITOPIC lpNextTopicList; /* Remove all occurences of all doclist */ if ((lpCurTopicList = QTN_TOPICLIST(lpCurQtNode)) == NULL) return; for (; lpCurTopicList; lpCurTopicList = lpNextTopicList) { lpNextTopicList = lpCurTopicList->pNext; TopicNodeFree(lpQueryTree, lpCurQtNode, NULL, lpCurTopicList); } QTN_TOPICLIST(lpCurQtNode) = NULL; } /************************************************************************* * @doc INTERNAL * * @func VOID PASCAL NEAR | SortResult | * Sort the results according to flag * * @parm _LPQT | lpQueryTree | * Pointer to query tree (containing globals) * * @parm _LPQTNODE | lpQtNode | * Pointer to query node * * @parm WORD | fFlag | * Tell how to sort the result: * @flag ORDERED_BASED | * Everything is ordered TopicId, hit offsets * @flag HIT_COUNT_BASED | * The doc id with most hit will be returned first * @flag WEIGHT_BASED | * The topicId with most weight will be returned first *************************************************************************/ PUBLIC VOID PASCAL NEAR SortResult (_LPQT lpQueryTree, _LPQTNODE lpQtNode, WORD fFlag) { register LPITOPIC lpTopic; switch (fFlag) { case ORDERED_BASED: for (lpTopic = lpQtNode->lpTopicList; lpTopic; lpTopic = lpTopic->pNext) OccurenceSort (lpQueryTree, lpTopic); break; case HIT_COUNT_BASED: case WEIGHT_BASED: TopicListSort (lpQtNode, fFlag); break; } #if defined(_DEBUG) && defined(SIMILARITY) && defined(_DUMPALL) { int i; _DPF1("Sort total: %lu\n", lpQtNode->cTopic); for (i = 0, lpTopic = lpQtNode->lpTopicList; lpTopic && i < 10; lpTopic = lpTopic->pNext, i++) { _DPF2("Topic %lu (%u)\n", lpTopic->dwTopicId, lpTopic->wWeight); } } #endif } PRIVATE HRESULT PASCAL NEAR TopicListInsertionSort (_LPQTNODE lpQtNode, BOOL fFlag) { LPITOPIC lpPrevTopic; LPITOPIC lpCurTopic; LPITOPIC lpNextTopic; LPITOPIC lpTmpTopic; FCMP fCompare; if (fFlag == HIT_COUNT_BASED) fCompare = HitCountCompare; else fCompare = TopicWeightCompare; for (lpCurTopic = lpQtNode->lpTopicList; lpCurTopic; lpCurTopic = lpNextTopic) { if (lpNextTopic = lpCurTopic->pNext) { if ((*fCompare) (lpCurTopic, lpNextTopic) < 0) { /* Out of order sequence */ /* Unlink the out of order node */ lpCurTopic->pNext = lpNextTopic->pNext; /* Do an insertion sort */ for (lpPrevTopic = NULL, lpTmpTopic = lpQtNode->lpTopicList;; lpTmpTopic = lpTmpTopic->pNext) { if ((*fCompare) (lpTmpTopic, lpNextTopic) < 0) { /* We just pass the insertion point */ if (lpPrevTopic == NULL) { lpNextTopic->pNext = lpQtNode->lpTopicList; lpQtNode->lpTopicList = lpNextTopic; } else { lpNextTopic->pNext = lpPrevTopic->pNext; lpPrevTopic->pNext = lpNextTopic; } break; } lpPrevTopic = lpTmpTopic; } /* Reset lpNextTopic */ lpNextTopic = lpCurTopic; } } } return S_OK; } /************************************************************************* * @doc INTERNAL * * @func VOID PASCAL NEAR | TopicListSort | * Sort the results according to flag * * @parm _LPQT | lpQueryTree | * Pointer to query tree (containing globals) * * @parm _LPQTNODE | lpQtNode | * Pointer to query node * * @parm WORD | fFlag | * Tell how to sort the result: * @flag HIT_COUNT_BASED | * The doc id with most hit will be returned first * @flag WEIGHT_BASED | * The topicId with most weight will be returned first *************************************************************************/ HRESULT PASCAL NEAR TopicListSort (_LPQTNODE lpQtNode, BOOL fFlag) { HANDLE hHeap; /* Handle to heap block */ LPITOPIC far *lrgHeap; /* Pointer to heap block */ TOPIC_LIST Dummy; /* Dummy node to speed up search, compare */ LPITOPIC lpCurTopic; /* Current Topic node */ LPITOPIC lpNextTopic; /* Next Topic node */ LPITOPIC lpInsertPt; /* Current insertion point */ WORD cLastItem; WORD MaxItem; LPITOPIC far * lpPQNode; LPITOPIC lpTopNode; LPITOPIC lpNextNode; WORD wCurWeight; FCMP fCompare; /* Allocate the heap */ if (lpQtNode->cTopic > MAX_HEAP_ENTRIES) MaxItem = MAX_HEAP_ENTRIES; else MaxItem = (WORD)lpQtNode->cTopic + 1; /* If the list is short, we can use insertion sort since it is faster * then preparing and use heap sort */ if (MaxItem <= 20) return TopicListInsertionSort (lpQtNode, fFlag); if ((hHeap = _GLOBALALLOC(DLLGMEM, MaxItem * sizeof(LPV))) == NULL) { /* We run out of memory for the heap. Try a smaller size */ if ((hHeap = _GLOBALALLOC(DLLGMEM, (MaxItem = MIN_HEAP_ENTRIES)* sizeof(LPV))) == NULL) { /* We really run out of memory, so just do a regular * insertion sort. It is slow but at least something * works */ return TopicListInsertionSort (lpQtNode, fFlag); } } MaxItem --; /* Since node 0 is used for sentinel */ lrgHeap = (LPITOPIC far *)_GLOBALLOCK (hHeap); /* Initialize of Dummy */ Dummy.wWeight = 0xffff; // Maximum weigth for sentinel Dummy.pNext = NULL; /* Set the sentinel */ lrgHeap[0] = &Dummy; /* Initialize the variables */ lpInsertPt = &Dummy; lpCurTopic = lpQtNode->lpTopicList; if (fFlag == HIT_COUNT_BASED) fCompare = HitCountCompare; else fCompare = TopicWeightCompare; while (lpCurTopic) { lpPQNode = &lrgHeap[1]; for (cLastItem = 1; lpCurTopic && cLastItem <= MaxItem; cLastItem++, lpPQNode++) { lpNextTopic = lpCurTopic->pNext; *lpPQNode = lpCurTopic; lpCurTopic->pNext = NULL; lpCurTopic = lpNextTopic; HeapUp (lrgHeap, cLastItem, fCompare); } cLastItem--; /* Set up the last pointer */ for (; cLastItem > 0;) { lpTopNode = lrgHeap[1]; /* Get the new node's weight */ wCurWeight = lpTopNode->wWeight; /* Insert into the resulting list in decreasing order */ if (wCurWeight > lpInsertPt->wWeight) { /* Start from the beginning of the list */ lpInsertPt = &Dummy; } while (lpNextNode = lpInsertPt->pNext) { if (lpNextNode->wWeight < wCurWeight) break; lpInsertPt = lpNextNode; } lpTopNode->pNext = lpInsertPt->pNext; lpInsertPt->pNext = lpTopNode; lpInsertPt = lpTopNode; lrgHeap[1] = lrgHeap[cLastItem--]; HeapDown (lrgHeap, cLastItem, fCompare); } } /* Update the pointer to the sorted list */ lpQtNode->lpTopicList = Dummy.pNext; /* Release the memory */ _GLOBALUNLOCK(hHeap); _GLOBALFREE(hHeap); return S_OK; } /************************************************************************* * @doc INTERNAL * * @func VOID PASCAL NEAR | OccurenceSort | * Sort all the occurrences depending on their offsets. If two * occurrences have the same offset, ie. they must be identical * then one will be removed. Simple insertion sort is used since * it it expected that most of the time we will have less than * 15 occurences per TopicId * * @func _LPQT | lpQueryTree | * Pointer to query tree structure where all globals are * * @func LPITOPIC | lpTopic | * Pointer to doclist with the occurrence list to be sorted *************************************************************************/ PRIVATE VOID PASCAL NEAR OccurenceSort (_LPQT lpQueryTree, LPITOPIC lpTopic) { LPIOCC lpPrevOcc; LPIOCC lpCurOcc; LPIOCC lpNextOcc; LPIOCC lpTmpOcc; int fResult; for (lpCurOcc = lpTopic->lpOccur; lpCurOcc; lpCurOcc = lpNextOcc) { if (lpNextOcc = lpCurOcc->pNext) { if ((fResult = OccCompare(lpCurOcc, lpNextOcc)) <= 0) { /* Out of order sequence */ /* Unlink the out of order node */ lpCurOcc->pNext = lpNextOcc->pNext; if (fResult == 0) { /* Duplicate data, just free the node */ lpNextOcc->pNext = (LPIOCC)lpQueryTree->lpOccFreeList; lpQueryTree->lpOccFreeList = (LPSLINK)lpNextOcc; lpTopic->lcOccur--; /* Reset lpNextOcc */ lpNextOcc = lpCurOcc; continue; } /* Do an insertion sort */ for (lpPrevOcc = NULL, lpTmpOcc = lpTopic->lpOccur;; lpTmpOcc = lpTmpOcc->pNext) { if (lpTmpOcc != NULL && (fResult = OccCompare(lpNextOcc, lpTmpOcc)) == 0) { /* Duplicate data, just free the node */ lpNextOcc->pNext = (LPIOCC)lpQueryTree->lpOccFreeList; lpQueryTree->lpOccFreeList = (LPSLINK)lpNextOcc; lpTopic->lcOccur--; break; } if (lpTmpOcc == NULL || fResult > 0) { /* We just pass the insertion point */ if (lpPrevOcc == NULL) { lpNextOcc->pNext = lpTopic->lpOccur; lpTopic->lpOccur = lpNextOcc; } else { lpNextOcc->pNext = lpPrevOcc->pNext; lpPrevOcc->pNext = lpNextOcc; } break; } lpPrevOcc = lpTmpOcc; } /* Reset lpNextOcc */ lpNextOcc = lpCurOcc; } } } } PRIVATE int PASCAL NEAR TopicWeightCompare (LPITOPIC lpTopic1, LPITOPIC lpTopic2) { if (lpTopic1->wWeight > lpTopic2->wWeight) { return 1; } else if (lpTopic1->wWeight < lpTopic2->wWeight) { return -1; } else // must be equal { if (lpTopic1->lcOccur >= lpTopic2->lcOccur) return 1; return -1; } } PRIVATE int PASCAL NEAR HitCountCompare (LPITOPIC lpTopic1, LPITOPIC lpTopic2) { if (lpTopic1->lcOccur >= lpTopic2->lcOccur) return 1; return -1; } PRIVATE VOID PASCAL NEAR HeapUp (LPITOPIC far * lrgHeap, WORD ChildIndex, FCMP fCompare) { WORD ParentIndex; LPITOPIC far * lplpvParent; LPITOPIC far * lplpvChild; LPITOPIC lpSaved; LPITOPIC lpvParent; lplpvChild = &lrgHeap [ChildIndex]; ParentIndex = ChildIndex/2; lpSaved = *lplpvChild; while (ParentIndex) { lplpvParent = &lrgHeap[ParentIndex]; lpvParent = *lplpvParent; if ((*fCompare)((LPV)lpvParent, (LPV)lpSaved) > 0) break; *lplpvChild = lpvParent; lplpvChild = lplpvParent; ParentIndex /= 2; }; *lplpvChild = lpSaved; } PRIVATE VOID PASCAL NEAR HeapDown (LPITOPIC far * lrgHeap, int MaxChildIndex, FCMP fCompare) { int ChildIndex; LPITOPIC far * lplpvParent; LPITOPIC far * lplpvChild; LPITOPIC lpTopicChild; LPITOPIC lpTopicChild2; LPITOPIC lpSaved; lpSaved = *(lplpvParent = &lrgHeap[1]); ChildIndex = 2; for (; ChildIndex <= MaxChildIndex; ) { lplpvChild = &lrgHeap[ChildIndex]; lpTopicChild = *lplpvChild; /* Find the minimum of the two children */ if (ChildIndex < MaxChildIndex && (lpTopicChild2 = *(lplpvChild + 1))) { if ((*fCompare)((LPV)lpTopicChild, (LPV)lpTopicChild2) < 0) { lplpvChild++; ChildIndex ++; } } if ((*fCompare)((LPV)lpSaved, (LPV)*lplpvChild) > 0) break; /* Replace the node */ *lplpvParent = *lplpvChild; lplpvParent = lplpvChild; ChildIndex *= 2; } *lplpvParent = lpSaved; }