NT4/private/windows/win4help/hcrtf/cphrase.cpp

/*****************************************************************************
*																			 *
*  CPHRASE.CPP																 *
*																			 *
*  Copyright (C) Microsoft Corporation 1993-1994							 *
*  All Rights reserved. 													 *
*																			 *
*****************************************************************************/

#include "stdafx.h"

#include "cphrase.h"
#include "ftsrch.h"

// These must be identical to ..\common\ctable.cpp

const int TABLE_ALLOC_SIZE = 4096;	   // allocate in page increments
const int MAX_POINTERS = (2048 * 1024); // 2 meg, 524,288 strings
const int MAX_STRINGS  = (10 * 1024 * 1024) - 4096L; // 10 megs

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

static HASH FASTCALL ConvertToHash(PCSTR szKey);

CPhrase* pphrase;

CPhrase::CPhrase()
		: CTable()
{
	fWordPhrasing = FALSE;
}

/***************************************************************************

	FUNCTION:	CPhrase::AddPhrase

	PURPOSE:	Either adds the string, or updates its count if already added

	PARAMETERS:
		pszString

	RETURNS:

	COMMENTS:
		We use a hash algorithm that is fast, but will generate collisions.
		We ignore any collisions -- the effect on the phrase generation file
		will be negligible.

	MODIFICATION DATES:
		24-Feb-1994 [ralphw]

***************************************************************************/

BOOL STDCALL CPhrase::AddPhrase(PSTR pszString)
{
	static BOOL fWarned = FALSE;
#ifdef _DEBUG
//	ptblCheck->AddString(pszString);
#endif

#ifdef CHECK_HALL
	poutPhrase->outstring_eol(pszString);
#endif

	if (options.fsCompress & COMPRESS_TEXT_HALL) {
		return (pScanText(hCompressor, (PBYTE) pszString, strlen(pszString),
			defCharSet) >= 0);
	}
	if (*pszString == ' ')
		pszString = FirstNonSpace(pszString, options.fDBCS);
	else if (!*pszString)
		return TRUE;

	// REVIEW: because old compiler thought it would see these

	ASSERT(!strpbrk(pszString, "\001\021\013\n\r\032"));

	while (strlen(pszString) >= CCH_MAX_PHRASE) {
		int cb = CCH_MAX_PHRASE - 1;

		// REVIEW: needs work for DBCS enabling

		while (pszString[cb] == ' ' && cb)
			cb--;
		if (cb == 0)
			return TRUE; // too long to add without any spaces

		pszString[cb] = '\0';
		RemoveTrailingSpaces(pszString);
		AddPhrase(pszString);
		pszString += cb + 1;
	}

	HASH hash = ConvertToHash(pszString);
	int pos;
	for (pos = 1; pos < endpos; pos++) {
		if (((HASH_PHRASE*) ppszTable[pos])->hash == hash)
			break;
	}
	if (pos < endpos)
		((HASH_PHRASE *) ppszTable[pos])->count++;

	else if (!fWarned) {
		if (endpos >= maxpos) {
			if (cbStrings + TABLE_ALLOC_SIZE >= MAX_STRINGS ||
					cbPointers + TABLE_ALLOC_SIZE >= MAX_POINTERS) {
#ifdef _DEBUG
				SendStringToParent("Phrase limit reached\r\n");
#endif
				fWarned = TRUE;
			}
			else
				IncreaseTableBuffer();
		}

		if ((ppszTable[endpos] = TableMalloc(strlen(pszString) + 1 +
				sizeof(HASH) + sizeof(DWORD))) == NULL) {
#ifdef _DEBUG
				SendStringToParent("Phrase limit reached in TableMalloc\r\n");
#endif
			fWarned = TRUE;
			return FALSE;
		}

		HASH_PHRASE* pphr = (HASH_PHRASE*) ppszTable[endpos];
		pphr->hash = hash;
		pphr->count = 1;
		strcpy(pphr->sz, pszString);
		endpos++;
	}

	if (fWordPhrasing)
		return TRUE;
	else {
		fWordPhrasing = TRUE;

		// Now add each word in the phrase

		StrToken(pszString, ' ');	// Ignore first word
		PSTR pszWord;

		// Write out words:

		while ((pszWord = StrToken(NULL, ' ')) != NULL)
			AddPhrase(pszWord);

		fWordPhrasing = FALSE;
	}

	return TRUE;
}

/***************************************************************************

	FUNCTION:	CPhrase::GetPhrase

	PURPOSE:	Return string, once for each time it occurred.

	PARAMETERS:
		pszDst

	RETURNS:

	COMMENTS:
		WARNING! This is a destructive read. You can only read this
		table once!

	MODIFICATION DATES:
		25-Feb-1994 [ralphw]

***************************************************************************/

DWORD STDCALL CPhrase::GetPhrase(PSTR pszDst)
{
	*pszDst = 0;	  // clear the line no matter what happens

	if (curpos >= endpos)
		return (DWORD) -1;
	HASH_PHRASE* pphr = (HASH_PHRASE*) ppszTable[curpos++];
	strcpy(pszDst, pphr->sz);
	return pphr->count;
}

static const HASH MAX_CHARS = 43L;

static HASH FASTCALL ConvertToHash(PCSTR szKey)
{
	int ich;
	HASH hash = 0L;

	// REVIEW: 14-Oct-1993 [ralphw] -- Note lack of check for a hash collision.

	/*
	 * REVIEW: This is the hash algorithm used for context strings. That
	 * may not make much sense here where we must also deal with
	 * international character sets including DBCS.
	 */

	for (ich = 0; szKey[ich]; ++ich) {
		if (szKey[ich] == '!')
			hash = (hash * MAX_CHARS) + 11;
		else if (szKey[ich] == '.')
			hash = (hash * MAX_CHARS) + 12;
		else if (szKey[ich] == '_')
			hash = (hash * MAX_CHARS) + 13;
		else if (szKey[ich] == '0')
			hash = (hash * MAX_CHARS) + 10;
		else if (szKey[ich] <= 'Z')
			hash = (hash * MAX_CHARS) + (szKey[ich] - '0');
		else
			hash = (hash * MAX_CHARS) + (szKey[ich] - '0' - ('a' - 'A'));
	}

	/*
	 * Since the value 0 is reserved as a nil value, if any string
	 * actually hashes to this value, we just move it.
	 */

	return (hash == 0 ? 0 + 1 : hash);
}

void CPhrase::SortTable(void)
{
	if (endpos < 3) // don't sort one entry
		return;
	SetTableSortColumn(sizeof(HASH) + sizeof(DWORD));
	doSort(1, (int) endpos - 1);
}