Windows2003-3790/inetcore/outlookexpress/oejunk/junkeng.cpp
2020-09-30 16:53:55 +02:00

1927 lines
53 KiB
C++

/*
SVMHANDLER.CPP
(c) copyright 1998 Microsoft Corp
Contains the class encapsulating the Support Vector Machine used to do on the fly spam detection
Robert Rounthwaite (RobertRo@microsoft.com)
*/
#include <pch.hxx>
#include "junkeng.h"
#include "junkutil.h"
#include "parsestm.h"
#include <iert.h>
#include <math.h>
#include <limits.h>
class CBodyBuff
{
private:
enum
{
CB_BODYBUFF_MAX = 4096
};
enum
{
BBF_CLEAR = 0x00000000,
BBF_SET = 0x00000001,
BBF_ALPHA = 0x00000002,
BBF_NUM = 0x00000004,
BBF_SPACE = 0x00000008,
BBF_MASK = 0x0000000F
};
private:
IStream * m_pIStream;
ULONG m_cbStream;
ULONG m_ibStream;
BYTE m_rgbBuff[CB_BODYBUFF_MAX];
ULONG m_cbBuffTotal;
BYTE * m_pbBuffCurr;
DWORD m_dwFlagsCurr;
BYTE * m_pbBuffGood;
BYTE * m_pbBuffPrev;
DWORD m_dwFlagsPrev;
public:
CBodyBuff() : m_pIStream(NULL), m_cbStream(0), m_ibStream(0),
m_cbBuffTotal(0), m_pbBuffCurr(m_rgbBuff), m_dwFlagsCurr(BBF_CLEAR),
m_pbBuffGood(m_rgbBuff), m_pbBuffPrev(NULL), m_dwFlagsPrev(BBF_CLEAR) {}
~CBodyBuff() {SafeRelease(m_pIStream);}
HRESULT HrInit(DWORD dwFlags, IStream * pIStream);
HRESULT HrGetCurrChar(CHAR * pchNext);
BOOL FDoMatch(FEATURECOMP * pfcomp);
HRESULT HrMoveNext(VOID)
{
m_pbBuffPrev = m_pbBuffCurr;
m_dwFlagsPrev = m_dwFlagsCurr;
m_pbBuffCurr = (BYTE *) CharNext((LPSTR) m_pbBuffCurr);
m_dwFlagsCurr = BBF_CLEAR;
return S_OK;
}
private:
HRESULT _HrFillBuffer(VOID);
};
static const LPSTR szCountFeatureComp = "FeatureComponentCount = ";
static const LPSTR szDefaultThresh = "dThresh = ";
static const LPSTR szMostThresh = "mThresh = ";
static const LPSTR szLeastThresh = "lThresh = ";
static const LPSTR szThresh = "Threshold = ";
static const LPSTR szNumberofDim = "NumDim = ";
#ifdef DEBUG
static const LPSTR STR_REG_PATH_FLAT = "Software\\Microsoft\\Outlook Express";
static const LPSTR szJunkMailPrefix = "JUNKMAIL";
static const LPSTR szJunkMailLog = "JUNKMAIL.LOG";
static const LPSTR LOG_TAGLINE = "Calculating Junk Mail for message: %s";
static const LPSTR LOG_FIRSTNAME = "User's First Name: %s";
static const LPSTR LOG_LASTNAME = "User's Last Name: %s";
static const LPSTR LOG_COMPANYNAME = "User's Company Name: %s";
static const LPSTR LOG_BODY = "Body contains: %s";
static const LPSTR LOG_SUBJECT = "Subject contains: %s";
static const LPSTR LOG_TO = "To line contains: %s";
static const LPSTR LOG_FROM = "From line contains: %s";
static const LPSTR LOG_FINAL = "Junk Mail percentage: %0.1d.%0.6d\r\n";
#endif // DEBUG
BOOL FReadDouble(LPSTR pszLine, LPSTR pszToken, DOUBLE * pdblVal);
#ifdef DEBUG
VOID PrintToLogFile(ILogFile * pILogFile, LPSTR pszTmpl, LPSTR pszArg);
#endif // DEBUG
HRESULT CBodyBuff::HrInit(DWORD dwFlags, IStream * pIStream)
{
HRESULT hr = S_OK;
// Check incoming params
if (NULL == pIStream)
{
hr = E_INVALIDARG;
goto exit;
}
// Set the stream
m_pIStream = pIStream;
m_pIStream->AddRef();
// Get the stream size
hr = HrGetStreamSize(m_pIStream, &m_cbStream);
if (FAILED(hr))
{
goto exit;
}
// Reset the stream to the beginning
hr = HrRewindStream(m_pIStream);
if (FAILED(hr))
{
goto exit;
}
// Start from the beginning
m_ibStream = 0;
exit:
return hr;
}
HRESULT CBodyBuff::HrGetCurrChar(CHAR * pchNext)
{
HRESULT hr = S_OK;
// Check incoming params
Assert(NULL != pchNext);
// Do we need to get any more characters?
if (m_pbBuffCurr >= m_pbBuffGood)
{
// If we couldn't get any more characters
if (S_OK != _HrFillBuffer())
{
hr = E_FAIL;
goto exit;
}
}
// Get the current char
*pchNext = *m_pbBuffCurr;
hr = S_OK;
exit:
return hr;
}
BOOL CBodyBuff::FDoMatch(FEATURECOMP * pfcomp)
{
BOOL fRet = FALSE;
BYTE * pbSearch = NULL;
ULONG cchSearch = 0;
LPSTR pszMatch = NULL;
DWORD dwFlags = 0;
// Check incoming params
Assert(NULL != pfcomp);
Assert(NULL != pfcomp->pszFeature);
Assert(0 != pfcomp->cchFeature);
// Set up some locals
cchSearch = pfcomp->cchFeature;
// Do we need more characters for the match?
// Include the character after the string, just in case
// we have a match and need to check the character after
// the string for a word break
if ((cchSearch + 1) > (ULONG) (m_pbBuffGood - m_pbBuffCurr))
{
// Get more characters
// If this fails, we still might be good, since
// we might just have enough characters to do the
// full match at the end of the stream.
(VOID) _HrFillBuffer();
// Could we get enough?
if (cchSearch > (ULONG) (m_pbBuffGood - m_pbBuffCurr))
{
// No Match
fRet = FALSE;
goto exit;
}
}
// Do match
pbSearch = m_pbBuffCurr;
pszMatch = pfcomp->pszFeature;
while (0 != cchSearch--)
{
if (*(pszMatch++) != *(pbSearch++))
{
// No Match
fRet = FALSE;
goto exit;
}
}
// Validate the match
// Do we need to figure out if it starts with a word break?
if (0 != (pfcomp->dwFlags & CT_START_SET))
{
dwFlags = pfcomp->dwFlags;
}
else
{
Assert(CT_END_SET != (dwFlags & CT_END_SET));
dwFlags = m_dwFlagsCurr;
}
Assert(CT_START_SET == BBF_SET);
Assert(CT_START_ALPHA == BBF_ALPHA);
fRet = FMatchToken((NULL == m_pbBuffPrev),
((m_ibStream >= m_cbStream) && ((m_pbBuffCurr + pfcomp->cchFeature) >= m_pbBuffGood)),
(LPCSTR) m_pbBuffPrev, &m_dwFlagsPrev, pfcomp->pszFeature,
pfcomp->cchFeature, &dwFlags, (LPCSTR) (m_pbBuffCurr + pfcomp->cchFeature));
// Save the changed flags
pfcomp->dwFlags = dwFlags;
// Cache the current character's state
m_dwFlagsCurr = (dwFlags & BBF_MASK);
exit:
return fRet;
}
HRESULT CBodyBuff::_HrFillBuffer(VOID)
{
HRESULT hr = S_OK;
LONG cbExtra = 0;
ULONG cbRead = 0;
ULONG cbToRead = 0;
// If there isn't any more of the stream to grab
if (m_ibStream >= m_cbStream)
{
hr = S_FALSE;
goto exit;
}
// If this is the first time through, save nothing
if (NULL == m_pbBuffPrev)
{
cbExtra = 0;
}
else
{
// How much space should I save?
cbExtra = (ULONG) (m_cbBuffTotal - (m_pbBuffPrev - m_rgbBuff));
Assert(cbExtra > 0);
// Save the unused data
MoveMemory(m_rgbBuff, m_pbBuffPrev, (int)min(cbExtra, sizeof(m_rgbBuff)));
// Reset the current pointer
m_pbBuffCurr = m_rgbBuff + (m_pbBuffCurr - m_pbBuffPrev);
// Reset the previous pointer
m_pbBuffPrev = m_rgbBuff;
}
// Read in more data
cbToRead = (int)min(CB_BODYBUFF_MAX - cbExtra - 1, (LONG) (m_cbStream - m_ibStream));
hr = m_pIStream->Read(m_rgbBuff + cbExtra, cbToRead, &cbRead);
if ((FAILED(hr)) || (0 == cbRead))
{
// End of stream
hr = S_FALSE;
}
else
{
hr = S_OK;
}
// Track the number of bytes read
m_ibStream += cbRead;
// Set the total buffer size
m_cbBuffTotal = cbExtra + cbRead;
// Terminate the buffer, just in case
m_rgbBuff[m_cbBuffTotal] = '\0';
// Uppercase the buffer
m_pbBuffGood = m_rgbBuff + CharUpperBuff((CHAR *) m_rgbBuff, m_cbBuffTotal);
exit:
return hr;
}
HRESULT CJunkFilter::_HrBuildBodyList(USHORT cBodyItems)
{
HRESULT hr = S_OK;
USHORT usIndex = 0;
FEATURECOMP * pfcomp = NULL;
USHORT iBodyList = 0;
// Check incoming params
if (0 == cBodyItems)
{
hr = E_INVALIDARG;
goto exit;
}
Assert(USHRT_MAX > cBodyItems);
// Make sure the old items are freed
SafeMemFree(m_pblistBodyList);
m_cblistBodyList = 0;
// Initialize the list
ZeroMemory(m_rgiBodyList, sizeof(m_rgiBodyList));
// Allocate space to hold all of the items
hr = HrAlloc((VOID **) &m_pblistBodyList, sizeof(*m_pblistBodyList) * (cBodyItems + 1));
if (FAILED(hr))
{
goto exit;
}
// Initialize the body list
ZeroMemory(m_pblistBodyList, sizeof(*m_pblistBodyList) * (cBodyItems + 1));
// For each feature
for (usIndex = 0, iBodyList = 1, pfcomp = m_rgfeaturecomps; usIndex < m_cFeatureComps; usIndex++, pfcomp++)
{
// If it's a body feature
if (locBody == pfcomp->loc)
{
// Initialize it
m_pblistBodyList[iBodyList].usItem = usIndex;
// Add it to the list
m_pblistBodyList[iBodyList].iNext = m_rgiBodyList[(UCHAR) (pfcomp->pszFeature[0])];
m_rgiBodyList[(UCHAR) (pfcomp->pszFeature[0])] = iBodyList;
// Move to the next body item
iBodyList++;
}
}
// Save the number of items
m_cblistBodyList = cBodyItems + 1;
// Set the return value
hr = S_OK;
exit:
return hr;
}
/////////////////////////////////////////////////////////////////////////////
// _FReadSVMOutput
//
// Read the SVM output from a file (".LKO file")
/////////////////////////////////////////////////////////////////////////////
HRESULT CJunkFilter::_HrReadSVMOutput(LPCSTR pszFileName)
{
HRESULT hr = S_OK;
CParseStream parsestm;
ULONG ulIndex = 0;
LPSTR pszBuff = NULL;
ULONG cchBuff = 0;
LPSTR pszDummy = NULL;
LPSTR pszDefThresh = NULL;
ULONG cFeatureComponents = 0;
LPSTR pszFeature = NULL;
ULONG ulFeatureComp = 0;
USHORT cBodyItems = 0;
FEATURECOMP * pfeaturecomp = NULL;
if ((NULL == pszFileName) || ('\0' == *pszFileName))
{
hr = E_INVALIDARG;
goto exit;
}
// Get the parse stream
hr = parsestm.HrSetFile(0, pszFileName);
if (FAILED(hr))
{
goto exit;
}
// skip first two lines
for (ulIndex = 0; ulIndex < 3; ulIndex++)
{
SafeMemFree(pszBuff);
hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff);
if (FAILED(hr))
{
goto exit;
}
}
// parse 3rd line: only care about CC and DD
if (FALSE == FReadDouble(pszBuff, "cc = ", &m_dblCC))
{
hr = E_FAIL;
goto exit;
}
if (FALSE == FReadDouble(pszBuff, "dd = ", &m_dblDD))
{
hr = E_FAIL;
goto exit;
}
SafeMemFree(pszBuff);
hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff);
if (FAILED(hr))
{
goto exit;
}
if (FALSE == FReadDouble(pszBuff, szDefaultThresh, &m_dblDefaultThresh))
{
m_dblDefaultThresh = THRESH_DEFAULT;
}
if (0 == m_dblSpamCutoff)
{
m_dblSpamCutoff = m_dblDefaultThresh;
}
if (FALSE == FReadDouble(pszBuff, szThresh, &m_dblThresh))
{
hr = E_FAIL;
goto exit;
}
SafeMemFree(pszBuff);
hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff);
if (FAILED(hr))
{
goto exit;
}
if (FALSE == FReadDouble(pszBuff, szMostThresh, &m_dblMostThresh))
{
m_dblMostThresh = THRESH_MOST;
}
if (FALSE == FReadDouble(pszBuff, szLeastThresh, &m_dblLeastThresh))
{
m_dblLeastThresh = THRESH_LEAST;
}
SafeMemFree(pszBuff);
hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff);
if (FAILED(hr))
{
goto exit;
}
m_cFeatures = StrToInt(pszBuff + lstrlen(szNumberofDim));
if (0 == m_cFeatures)
{
hr = E_FAIL;
goto exit;
}
// We only support up to USHRT_MAX features
if (m_cFeatures >= USHRT_MAX)
{
hr = E_OUTOFMEMORY;
goto exit;
}
SafeMemFree(pszBuff);
hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff);
if (FAILED(hr))
{
goto exit;
}
pszDummy = StrStr(pszBuff, szCountFeatureComp);
if (NULL != pszDummy)
{
pszDummy += lstrlen(szCountFeatureComp);
cFeatureComponents = StrToInt(pszDummy);
}
if (cFeatureComponents < m_cFeatures)
{
cFeatureComponents = m_cFeatures * 2;
}
while (0 != lstrcmp(pszBuff, "Weights"))
{
SafeMemFree(pszBuff);
hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff);
if (FAILED(hr))
{
goto exit;
}
}
SafeMemFree(m_rgdblSVMWeights);
hr = HrAlloc((void **) &m_rgdblSVMWeights, sizeof(*m_rgdblSVMWeights) * m_cFeatures);
if (FAILED(hr))
{
goto exit;
}
SafeMemFree(m_rgulFeatureStatus);
hr = HrAlloc((void **) &m_rgulFeatureStatus, sizeof(*m_rgulFeatureStatus) * m_cFeatures);
if (FAILED(hr))
{
goto exit;
}
FillMemory(m_rgulFeatureStatus, sizeof(*m_rgulFeatureStatus) * m_cFeatures, -1);
SafeMemFree(m_rgfeaturecomps);
hr = HrAlloc((void **) &m_rgfeaturecomps, sizeof(*m_rgfeaturecomps) * cFeatureComponents);
if (FAILED(hr))
{
goto exit;
}
// Initialize the features
ZeroMemory(m_rgfeaturecomps, sizeof(*m_rgfeaturecomps) * cFeatureComponents);
for (ulIndex = 0; ulIndex < m_cFeatures; ulIndex++)
{
UINT uiLoc;
USHORT cbStr;
boolop bop;
BOOL fContinue;
BOOL fNegative;
SafeMemFree(pszBuff);
hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff);
if (FAILED(hr))
{
goto exit;
}
// read the SVM weight
pszDummy = pszBuff;
fNegative = ('-' == *pszDummy);
pszDummy++;
m_rgdblSVMWeights[ulIndex] = StrToDbl(pszDummy, &pszDummy);
if (FALSE != fNegative)
{
m_rgdblSVMWeights[ulIndex] *= -1;
}
pszDummy++; // skip the separator
bop = boolopOr;
fContinue = false;
do
{
pfeaturecomp = &m_rgfeaturecomps[ulFeatureComp++];
// Skip over white space
UlStripWhitespace(pszDummy, TRUE, FALSE, NULL);
// Location (or "special")
uiLoc = StrToInt(pszDummy);
pszDummy = StrStr(pszDummy, ":"); // skip the separator
pszDummy++;
pfeaturecomp->loc = (FeatureLocation)uiLoc;
pfeaturecomp->ulFeature = ulIndex;
pfeaturecomp->bop = bop;
if (locBody == pfeaturecomp->loc)
{
cBodyItems++;
}
if (uiLoc == 5)
{
UINT uiRuleNumber = StrToInt(pszDummy);
pszDummy += StrSpn(pszDummy, "0123456789");
pfeaturecomp->ulRuleNum = uiRuleNumber;
}
else
{
cbStr = (USHORT) StrToInt(pszDummy);
pszDummy = StrStr(pszDummy, ":");
pszDummy++;
// We only support strings up to USHRT_MAX
if (cbStr >= USHRT_MAX)
{
hr = E_OUTOFMEMORY;
goto exit;
}
hr = HrAlloc((void **) &pszFeature, sizeof(*pszFeature) * (cbStr + 1));
if (FAILED(hr))
{
goto exit;
}
StrCpyN(pszFeature, pszDummy, cbStr + 1);
pszDummy += cbStr;
if ('\0' != *pszDummy)
{
pszDummy++; // skip the separator
}
pszFeature[cbStr] = '\0';
Assert(cbStr == strlen(pszFeature));
// Save off the string
pfeaturecomp->pszFeature = pszFeature;
pszFeature = NULL;
pfeaturecomp->cchFeature = cbStr;
}
UlStripWhitespace(pszDummy, TRUE, FALSE, NULL);
switch(*pszDummy)
{
case '|':
bop = boolopOr;
fContinue = TRUE;
break;
case '&':
bop = boolopAnd;
fContinue = TRUE;
break;
default:
fContinue = FALSE;
break;
}
pszDummy++;
}
while (fContinue);
}
m_cFeatureComps = ulFeatureComp;
// Build up body items...
hr = _HrBuildBodyList(cBodyItems);
if (FAILED(hr))
{
goto exit;
}
hr = S_OK;
exit:
SafeMemFree(pszFeature);
SafeMemFree(pszBuff);
return hr;
}
/////////////////////////////////////////////////////////////////////////////
// _FInvokeSpecialRule
//
// Invokes the special rule that is this FEATURECOMP.
// Returns the state of the feature.
/////////////////////////////////////////////////////////////////////////////
BOOL CJunkFilter::_FInvokeSpecialRule(UINT iRuleNum)
{
BOOL fRet = FALSE;
SYSTEMTIME stSent;
CHAR rgchYear[6];
ULONG cbSize = 0;
DWORD dwDummy = 0;
switch (iRuleNum)
{
case 1:
fRet = FStreamStringSearch(m_pIStmBody, &dwDummy, m_pszFirstName, m_cchFirstName, 0);
break;
case 2:
fRet = FStreamStringSearch(m_pIStmBody, &dwDummy, m_pszLastName, m_cchLastName, 0);
break;
case 3:
fRet = FStreamStringSearch(m_pIStmBody, &dwDummy, m_pszCompanyName, m_cchCompanyName, 0);
break;
case 4:
// year message received
if (FALSE == FTimeEmpty(&m_ftMessageSent))
{
// Convert to system time so we can get the year
SideAssert(FALSE != FileTimeToSystemTime(&m_ftMessageSent, &stSent));
wnsprintf(rgchYear, ARRAYSIZE(rgchYear), "%d", stSent.wYear);
dwDummy = CT_START_SET | CT_START_NUM | CT_END_SET | CT_END_NUM;
fRet = FStreamStringSearch(m_pIStmBody, &dwDummy, rgchYear, lstrlen(rgchYear), SSF_CASESENSITIVE);
}
break;
case 5:
// message received in the wee hours (>= 7pm or <6am
if (FALSE == FTimeEmpty(&m_ftMessageSent))
{
// Convert to system time so we can get the year
SideAssert(FALSE != FileTimeToSystemTime(&m_ftMessageSent, &stSent));
fRet = (stSent.wHour >= (7 + 12)) || (stSent.wHour < 6);
}
break;
case 6:
// message received on weekend
if (FALSE == FTimeEmpty(&m_ftMessageSent))
{
// Convert to system time so we can get the year
SideAssert(FALSE != FileTimeToSystemTime(&m_ftMessageSent, &stSent));
fRet = ((0 == stSent.wDayOfWeek) || (6 == stSent.wDayOfWeek));
}
break;
case 14:
fRet = m_fRule14; // set in _HandleCaseSensitiveSpecialRules()
break;
case 15:
fRet = FSpecialFeatureNonAlphaStm(m_pIStmBody);
break;
case 16:
fRet = m_fDirectMessage;
break;
case 17:
fRet = m_fRule17; // set in _HandleCaseSensitiveSpecialRules()
break;
case 18:
fRet = FSpecialFeatureNonAlpha(m_pszSubject);
break;
case 19:
fRet = ((NULL == m_pszTo) || ('\0' == *m_pszTo));
break;
case 20:
fRet = m_fHasAttach;
break;
case 40:
fRet = (m_cbBody >= 125);
break;
case 41:
fRet = (m_cbBody >= 250);
break;
case 42:
fRet = (m_cbBody >= 500);
break;
case 43:
fRet = (m_cbBody >= 1000);
break;
case 44:
fRet = (m_cbBody >= 2000);
break;
case 45:
fRet = (m_cbBody >= 4000);
break;
case 46:
fRet = (m_cbBody >= 8000);
break;
case 47:
fRet = (m_cbBody >= 16000);
break;
default:
AssertSz(FALSE, "unsupported special feature");
break;
}
return fRet;
}
/////////////////////////////////////////////////////////////////////////////
// _HandleCaseSensitiveSpecialRules
//
// Called from _EvaluateFeatureComponents().
// Some special rules are case sensitive, so if they're present, we'll
// evaluate them before we make the texts uppercase and cache the result
// for when they are actually used.
/////////////////////////////////////////////////////////////////////////////
VOID CJunkFilter::_HandleCaseSensitiveSpecialRules()
{
ULONG ulIndex = 0;
for (ulIndex = 0; ulIndex < m_cFeatureComps; ulIndex++)
{
if (m_rgfeaturecomps[ulIndex].loc == locSpecial)
{
switch (m_rgfeaturecomps[ulIndex].ulRuleNum)
{
case 14:
m_fRule14 = FSpecialFeatureUpperCaseWordsStm(m_pIStmBody);
break;
case 17:
m_fRule17 = FSpecialFeatureUpperCaseWords(m_pszSubject);
break;
default:
break;
}
}
}
return;
}
VOID CJunkFilter::_EvaluateBodyFeatures(VOID)
{
CBodyBuff buffBody;
CHAR chMatch = '\0';
ULONG ulIndex = 0;
FEATURECOMP * pfcomp = NULL;
USHORT iBodyList = 0;
// Check to see if we have work to do
if (NULL == m_pIStmBody)
{
goto exit;
}
// Set the stream into the buffer
if (FAILED(buffBody.HrInit(0, m_pIStmBody)))
{
goto exit;
}
// Initialize all the body features to no found
for (iBodyList = 1; iBodyList < m_cblistBodyList; iBodyList++)
{
// Set it to not found
m_rgfeaturecomps[m_pblistBodyList[iBodyList].usItem].fPresent = FALSE;
}
// While we have more bytes to read
for (; S_OK == buffBody.HrGetCurrChar(&chMatch); buffBody.HrMoveNext())
{
// Search for a match through the feature list
for (iBodyList = m_rgiBodyList[(UCHAR) chMatch]; 0 != iBodyList; iBodyList = m_pblistBodyList[iBodyList].iNext)
{
pfcomp = &(m_rgfeaturecomps[m_pblistBodyList[iBodyList].usItem]);
// If we have a body item and it hasn't been found yet
if (FALSE == pfcomp->fPresent)
{
// Could this item be a possible match???
Assert(NULL != pfcomp->pszFeature);
// Try to do the comparison
pfcomp->fPresent = buffBody.FDoMatch(pfcomp);
}
}
}
exit:
return;
}
/////////////////////////////////////////////////////////////////////////////
// _EvaluateFeatureComponents
//
// Evaluates all of the feature components. Sets fPresent in each component
// to true if the feature is present, false otherwise
/////////////////////////////////////////////////////////////////////////////
VOID CJunkFilter::_EvaluateFeatureComponents(VOID)
{
ULONG ulIndex = 0;
FEATURECOMP * pfcomp = NULL;
_HandleCaseSensitiveSpecialRules();
if (NULL != m_pszFrom)
{
CharUpperBuff(m_pszFrom, lstrlen(m_pszFrom));
}
if (NULL != m_pszTo)
{
CharUpperBuff(m_pszTo, lstrlen(m_pszTo));
}
if (NULL != m_pszSubject)
{
CharUpperBuff(m_pszSubject, lstrlen(m_pszSubject));
}
for (ulIndex = 0; ulIndex < m_cFeatureComps; ulIndex++)
{
pfcomp = &m_rgfeaturecomps[ulIndex];
switch(pfcomp->loc)
{
case locNil:
Assert(locNil != pfcomp->loc);
pfcomp->fPresent = FALSE;
break;
case locSubj:
pfcomp->fPresent = FWordPresent(m_pszSubject, &(pfcomp->dwFlags), pfcomp->pszFeature, pfcomp->cchFeature, NULL);
break;
case locFrom:
pfcomp->fPresent = FWordPresent(m_pszFrom, &(pfcomp->dwFlags), pfcomp->pszFeature, pfcomp->cchFeature, NULL);
break;
case locTo:
pfcomp->fPresent = FWordPresent(m_pszTo, &(pfcomp->dwFlags), pfcomp->pszFeature, pfcomp->cchFeature, NULL);
break;
case locSpecial:
pfcomp->fPresent = _FInvokeSpecialRule(pfcomp->ulRuleNum);
break;
}
}
}
/////////////////////////////////////////////////////////////////////////////
// ProcessFeatureComponentPresence
//
// Processes the presence (or absence) of the individual feature components,
// setting the feature status of each feature (which may me made up of
// multiple feature components).
/////////////////////////////////////////////////////////////////////////////
VOID CJunkFilter::_ProcessFeatureComponentPresence(VOID)
{
ULONG ulIndex = 0;
FEATURECOMP * pfcomp = NULL;
ULONG ulFeature = 0;
for (ulIndex = 0; ulIndex < m_cFeatureComps; ulIndex++)
{
pfcomp = &m_rgfeaturecomps[ulIndex];
ulFeature = pfcomp->ulFeature;
if (-1 == m_rgulFeatureStatus[ulFeature]) // first feature of this feature
{
if (FALSE != pfcomp->fPresent)
{
m_rgulFeatureStatus[ulFeature] = 1;
}
else
{
m_rgulFeatureStatus[ulFeature] = 0;
}
}
else
{
switch (pfcomp->bop)
{
case boolopOr:
if (pfcomp->fPresent)
{
m_rgulFeatureStatus[ulFeature] = 1;
}
break;
case boolopAnd:
if (!pfcomp->fPresent)
{
m_rgulFeatureStatus[ulFeature] = 0;
}
break;
default:
Assert(FALSE);
break;
}
}
}
}
/////////////////////////////////////////////////////////////////////////////
// _DblDoSVMCalc
//
// Does the actual support vector machine calculation.
// Returns the probability that the message is spam
/////////////////////////////////////////////////////////////////////////////
DOUBLE CJunkFilter::_DblDoSVMCalc(VOID)
{
DOUBLE dblAccum;
DOUBLE dblResult;
ULONG ulIndex = 0;
dblAccum = 0.0;
for (ulIndex = 0; ulIndex < m_cFeatures; ulIndex++)
{
if (m_rgulFeatureStatus[ulIndex] == 1)
{
dblAccum += m_rgdblSVMWeights[ulIndex];
#ifdef DEBUG
if (NULL != m_pILogFile)
{
_PrintFeatureToLog(ulIndex);
}
#endif // DEBUG
}
else if (m_rgulFeatureStatus[ulIndex] != 0)
{
AssertSz(FALSE, "What happened here!");
}
}
// Apply threshold;
dblAccum -= m_dblThresh;
// Apply sigmoid
dblResult = (1 / (1 + exp((m_dblCC * dblAccum) + m_dblDD)));
return dblResult;
}
/////////////////////////////////////////////////////////////////////////////
// BCalculateSpamProb
//
// Calculates the probability that the current message is spam.
// Returns the probability (0 to 1) that the message is spam in prSpamProb
// the boolean return is determined by comparing to the spam cutoff
/////////////////////////////////////////////////////////////////////////////
BOOL CJunkFilter::FCalculateSpamProb(LPSTR pszFrom, LPSTR pszTo, LPSTR pszSubject, IStream * pIStmBody,
BOOL fDirectMessage, BOOL fHasAttach, FILETIME * pftMessageSent,
DOUBLE * pdblSpamProb, BOOL * pfIsSpam)
{
#ifdef DEBUG
CHAR rgchBuff[1024];
DWORD dwVal = 0;
#endif // DEBUG
m_pszFrom = pszFrom;
m_pszTo = pszTo;
m_pszSubject = pszSubject;
m_pIStmBody = pIStmBody;
m_fDirectMessage = fDirectMessage;
m_fHasAttach = fHasAttach;
m_ftMessageSent = *pftMessageSent;
// Set the size of the body
if ((NULL == m_pIStmBody) || (FAILED(HrGetStreamSize(m_pIStmBody, &m_cbBody))))
{
m_cbBody = 0;
}
#ifdef DEBUG
// Get the logfile if we need it
if (NULL == m_pILogFile)
{
_HrCreateLogFile();
}
if (NULL != m_pILogFile)
{
PrintToLogFile(m_pILogFile, LOG_TAGLINE, pszSubject);
PrintToLogFile(m_pILogFile, LOG_FIRSTNAME, m_pszFirstName);
PrintToLogFile(m_pILogFile, LOG_LASTNAME, m_pszLastName);
PrintToLogFile(m_pILogFile, LOG_COMPANYNAME, m_pszCompanyName);
}
#endif // DEBUG
_EvaluateBodyFeatures();
_EvaluateFeatureComponents();
_ProcessFeatureComponentPresence();
*pdblSpamProb = _DblDoSVMCalc();
#ifdef DEBUG
if (NULL != m_pILogFile)
{
dwVal = ( DWORD ) ((*pdblSpamProb * 1000000) + 0.5);
wnsprintf(rgchBuff, ARRAYSIZE(rgchBuff), LOG_FINAL, dwVal / 1000000, dwVal % 1000000);
m_pILogFile->WriteLog(LOGFILE_DB, rgchBuff);
m_pILogFile->WriteLog(LOGFILE_DB, "");
}
#endif // DEBUG
*pfIsSpam = (*pdblSpamProb > m_dblSpamCutoff);
return TRUE;
}
/////////////////////////////////////////////////////////////////////////////
// BReadDefaultSpamCutoff
//
// Reads the default spam cutoff without parsing entire file
// Use GetDefaultSpamCutoff if using HrSetSVMDataLocation;
// static member function
/////////////////////////////////////////////////////////////////////////////
HRESULT CJunkFilter::HrReadDefaultSpamCutoff(LPSTR pszFullPath, DOUBLE * pdblDefCutoff)
{
HRESULT hr = S_OK;
CParseStream parsestm;
LPSTR pszBuff = NULL;
ULONG cchBuff = 0;
LPSTR pszDefThresh = NULL;
ULONG ulIndex = 0;
LPSTR pszDummy = NULL;
if ((NULL == pszFullPath) || ('\0' == *pszFullPath) || (NULL == pdblDefCutoff))
{
hr = E_INVALIDARG;
goto exit;
}
// Get the parse stream
hr = parsestm.HrSetFile(0, pszFullPath);
if (FAILED(hr))
{
goto exit;
}
// skip first three lines
for (ulIndex = 0; ulIndex < 4; ulIndex++)
{
SafeMemFree(pszBuff);
hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff);
if (FAILED(hr))
{
goto exit;
}
}
// Find the default threshold
pszDefThresh = StrStr(pszBuff, ::szDefaultThresh);
if (NULL == pszDefThresh)
{
hr = E_FAIL;
goto exit;
}
// Grab the value
pszDefThresh += lstrlen(::szDefaultThresh);
*pdblDefCutoff = StrToDbl(pszDefThresh, &pszDummy);
// Set the proper return value
hr = S_OK;
exit:
SafeMemFree(pszBuff);
return hr;
}
/////////////////////////////////////////////////////////////////////////////
// Constructor/destructor
//
/////////////////////////////////////////////////////////////////////////////
CJunkFilter::CJunkFilter() : m_cRef(0), m_pszFirstName(NULL), m_cchFirstName(0), m_pszLastName(NULL),
m_cchLastName(0), m_pszCompanyName(NULL), m_cchCompanyName(0), m_pblistBodyList(NULL),
m_cblistBodyList(0), m_rgfeaturecomps(NULL), m_rgdblSVMWeights(NULL), m_dblCC(0), m_dblDD(0),
m_dblThresh(-1), m_dblDefaultThresh(-1), m_dblMostThresh(0), m_dblLeastThresh(0), m_cFeatures(0),
m_cFeatureComps(0), m_rgulFeatureStatus(0),
m_pszLOCPath(NULL), m_dblSpamCutoff(0), m_pszFrom(NULL), m_pszTo(NULL), m_pszSubject(NULL),
m_pIStmBody(NULL), m_cbBody(0), m_fDirectMessage(FALSE), m_fHasAttach(FALSE),
m_fRule14(FALSE), m_fRule17(FALSE)
{
ZeroMemory(m_rgiBodyList, sizeof(m_rgiBodyList));
ZeroMemory(&m_ftMessageSent, sizeof(m_ftMessageSent));
InitializeCriticalSection(&m_cs);
#ifdef DEBUG
m_fJunkMailLogInit = FALSE;
m_pILogFile = NULL;
#endif // DEBUG
}
CJunkFilter::~CJunkFilter()
{
ULONG ulIndex = 0;
SafeMemFree(m_pszFirstName);
SafeMemFree(m_pszLastName);
SafeMemFree(m_pszCompanyName);
#ifdef DEBUG
SafeRelease(m_pILogFile);
#endif // DEBUG
for (ulIndex = 0; ulIndex < m_cFeatureComps; ulIndex++)
{
if ((locNil != m_rgfeaturecomps[ulIndex].loc) && (locSpecial != m_rgfeaturecomps[ulIndex].loc))
{
SafeMemFree(m_rgfeaturecomps[ulIndex].pszFeature);
}
}
SafeMemFree(m_pblistBodyList);
m_cblistBodyList = 0;
ZeroMemory(m_rgiBodyList, sizeof(m_rgiBodyList));
SafeMemFree(m_rgdblSVMWeights);
SafeMemFree(m_rgulFeatureStatus);
SafeMemFree(m_rgfeaturecomps);
DeleteCriticalSection(&m_cs);
}
STDMETHODIMP_(ULONG) CJunkFilter::AddRef()
{
return ::InterlockedIncrement(&m_cRef);
}
STDMETHODIMP_(ULONG) CJunkFilter::Release()
{
LONG cRef = 0;
cRef = ::InterlockedDecrement(&m_cRef);
if (0 == cRef)
{
delete this;
return cRef;
}
return cRef;
}
STDMETHODIMP CJunkFilter::QueryInterface(REFIID riid, void ** ppvObject)
{
HRESULT hr = S_OK;
// Check the incoming params
if (NULL == ppvObject)
{
hr = E_INVALIDARG;
goto exit;
}
// Initialize outgoing param
*ppvObject = NULL;
if ((riid == IID_IUnknown) || (riid == IID_IOEJunkFilter))
{
*ppvObject = static_cast<IOEJunkFilter *>(this);
}
else
{
hr = E_NOINTERFACE;
goto exit;
}
reinterpret_cast<IUnknown *>(*ppvObject)->AddRef();
hr = S_OK;
exit:
return hr;
}
STDMETHODIMP CJunkFilter::SetIdentity(LPCSTR pszFirstName, LPCSTR pszLastName, LPCSTR pszCompanyName)
{
HRESULT hr = S_OK;
//Set the new first name
SafeMemFree(m_pszFirstName);
m_cchFirstName = 0;
if (NULL != pszFirstName)
{
m_pszFirstName = PszDupA(pszFirstName);
if (NULL == m_pszFirstName)
{
hr = E_OUTOFMEMORY;
goto exit;
}
m_cchFirstName = CharUpperBuff(m_pszFirstName, lstrlen(m_pszFirstName));
}
// Set the new last name
SafeMemFree(m_pszLastName);
m_cchLastName = 0;
if (NULL != pszLastName)
{
m_pszLastName = PszDupA(pszLastName);
if (NULL == m_pszLastName)
{
hr = E_OUTOFMEMORY;
goto exit;
}
m_cchLastName = CharUpperBuff(m_pszLastName, lstrlen(m_pszLastName));
}
// Set the new company name
SafeMemFree(m_pszCompanyName);
m_cchCompanyName = 0;
if (NULL != pszCompanyName)
{
m_pszCompanyName = PszDupA(pszCompanyName);
if (NULL == m_pszCompanyName)
{
hr = E_OUTOFMEMORY;
goto exit;
}
m_cchCompanyName = CharUpperBuff(m_pszCompanyName, lstrlen(m_pszCompanyName));
}
hr = S_OK;
exit:
return hr;
}
STDMETHODIMP CJunkFilter::LoadDataFile(LPCSTR pszFilePath)
{
HRESULT hr = S_OK;
if ((NULL == pszFilePath) || ('\0' == *pszFilePath))
{
hr = E_INVALIDARG;
goto exit;
}
hr = _HrReadSVMOutput(pszFilePath);
if (FAILED(hr))
{
AssertSz(FALSE, "Unable to successfully read filter params");
goto exit;
}
// Set the proper return value
hr = S_OK;
exit:
return hr;
}
STDMETHODIMP CJunkFilter::SetSpamThresh(ULONG ulThresh)
{
HRESULT hr = S_OK;
switch (ulThresh)
{
case STF_USE_MOST:
m_dblSpamCutoff = m_dblMostThresh;
break;
case STF_USE_MORE:
m_dblSpamCutoff = m_dblDefaultThresh + ((m_dblMostThresh - m_dblDefaultThresh) / 2);
break;
case STF_USE_DEFAULT:
m_dblSpamCutoff = m_dblDefaultThresh;
break;
case STF_USE_LESS:
m_dblSpamCutoff = m_dblDefaultThresh - ((m_dblDefaultThresh - m_dblLeastThresh) / 2);
break;
case STF_USE_LEAST:
m_dblSpamCutoff = m_dblLeastThresh;
break;
default:
hr = E_INVALIDARG;
goto exit;
}
hr = S_OK;
exit:
return hr;
}
STDMETHODIMP CJunkFilter::GetSpamThresh(ULONG * pulThresh)
{
HRESULT hr = S_OK;
ULONG ulThresh = 0;
// Check the incoming params
if (NULL == pulThresh)
{
hr = E_INVALIDARG;
goto exit;
}
// Initialize outgoing params
if (m_dblDefaultThresh == m_dblSpamCutoff)
{
ulThresh = STF_USE_DEFAULT;
}
else if (m_dblMostThresh == m_dblSpamCutoff)
{
ulThresh = STF_USE_MOST;
}
else if (m_dblLeastThresh == m_dblSpamCutoff)
{
ulThresh = STF_USE_LEAST;
}
else if (m_dblSpamCutoff > m_dblDefaultThresh)
{
ulThresh = STF_USE_MORE;
}
else
{
ulThresh = STF_USE_LESS;
}
hr = S_OK;
exit:
return hr;
}
STDMETHODIMP CJunkFilter::GetDefaultSpamThresh(DOUBLE * pdblThresh)
{
HRESULT hr = S_OK;
// Check the incoming params
if (NULL == pdblThresh)
{
hr = E_INVALIDARG;
goto exit;
}
// Initialize outgoing params
*pdblThresh = m_dblDefaultThresh * 100.0;
hr = S_OK;
exit:
return hr;
}
STDMETHODIMP CJunkFilter::CalcJunkProb(DWORD dwFlags, IMimePropertySet * pIMPropSet, IMimeMessage * pIMMsg, double * pdblProb)
{
HRESULT hr = S_OK;
BOOL fSpam = FALSE;
PROPVARIANT propvar = {0};
DWORD dwFlagsMsg = 0;
FILETIME ftMsgSent = {0};
LPSTR pszFrom = NULL;
LPSTR pszTo = NULL;
LPSTR pszSubject = NULL;
IStream * pIStmBody = NULL;
IStream * pIStmHtml = NULL;
BOOL fSentToMe = FALSE;
BOOL fHasAttachments = FALSE;
if ((NULL == pIMPropSet) || (NULL == pIMMsg))
{
hr = E_INVALIDARG;
goto exit;
}
// Get Message Flags
if (SUCCEEDED(pIMMsg->GetFlags(&dwFlagsMsg)))
{
fHasAttachments = (0 != (dwFlagsMsg & IMF_ATTACHMENTS));
}
// Was the message sent to me
fSentToMe = (0 != (dwFlags & CJPF_SENT_TO_ME));
// Get the from field
propvar.vt = VT_LPSTR;
hr = pIMPropSet->GetProp(PIDTOSTR(PID_HDR_FROM), NOFLAGS, &propvar);
if (SUCCEEDED(hr))
{
pszFrom = propvar.pszVal;
}
// Get the To field
propvar.vt = VT_LPSTR;
hr = pIMPropSet->GetProp(PIDTOSTR(PID_HDR_TO), NOFLAGS, &propvar);
if (SUCCEEDED(hr))
{
pszTo = propvar.pszVal;
}
// Try to Get the Plain Text Stream
if (FAILED(pIMMsg->GetTextBody(TXT_PLAIN, IET_DECODED, &pIStmBody, NULL)))
{
// Try to get the text version from the HTML stream
if ((FAILED(pIMMsg->GetTextBody(TXT_HTML, IET_DECODED, &pIStmHtml, NULL))) ||
(FAILED(HrConvertHTMLToPlainText(pIStmHtml, &pIStmBody))))
{
pIStmBody = NULL;
}
}
// Get the Subject field
propvar.vt = VT_LPSTR;
hr = pIMPropSet->GetProp(PIDTOSTR(PID_HDR_SUBJECT), NOFLAGS, &propvar);
if (SUCCEEDED(hr))
{
pszSubject = propvar.pszVal;
}
// Is this a direct message
// When was the message sent?
propvar.vt = VT_FILETIME;
hr = pIMPropSet->GetProp(PIDTOSTR(PID_ATT_SENTTIME), 0, &propvar);
if (SUCCEEDED(hr))
{
ftMsgSent = propvar.filetime;
}
FillMemory(m_rgulFeatureStatus, sizeof(*m_rgulFeatureStatus) * m_cFeatures, -1);
if (FALSE == FCalculateSpamProb(pszFrom, pszTo, pszSubject, pIStmBody,
fSentToMe, fHasAttachments, &ftMsgSent,
pdblProb, &fSpam))
{
hr = E_FAIL;
goto exit;
}
hr = (FALSE != fSpam) ? S_OK : S_FALSE;
exit:
SafeRelease(pIStmHtml);
SafeRelease(pIStmBody);
SafeMemFree(pszSubject);
SafeMemFree(pszTo);
SafeMemFree(pszFrom);
return hr;
}
///////////////////////////////////////////////////////////////////////////////
//
// HrCreateJunkFilter
//
// This creates a junk filter.
//
// ppIRule - pointer to return the junk filter
//
// Returns: S_OK, on success
// E_OUTOFMEMORY, if can't create the Junk Filter object
//
///////////////////////////////////////////////////////////////////////////////
HRESULT WINAPI HrCreateJunkFilter(DWORD dwFlags, IOEJunkFilter ** ppIJunkFilter)
{
CJunkFilter * pJunk = NULL;
HRESULT hr = S_OK;
// Check the incoming params
if (NULL == ppIJunkFilter)
{
hr = E_INVALIDARG;
goto exit;
}
// Initialize outgoing params
*ppIJunkFilter = NULL;
// Create the rules manager object
pJunk = new CJunkFilter;
if (NULL == pJunk)
{
hr = E_OUTOFMEMORY;
goto exit;
}
// Get the rules manager interface
hr = pJunk->QueryInterface(IID_IOEJunkFilter, (void **) ppIJunkFilter);
if (FAILED(hr))
{
goto exit;
}
pJunk = NULL;
// Set the proper return value
hr = S_OK;
exit:
if (NULL != pJunk)
{
delete pJunk;
}
return hr;
}
BOOL FReadDouble(LPSTR pszLine, LPSTR pszToken, DOUBLE * pdblVal)
{
BOOL fRet = FALSE;
LPSTR pszVal = NULL;
BOOL fNegative = FALSE;
// Search for token
pszVal = StrStr(pszLine, pszToken);
// If token isn't found then bail
if (NULL == pszVal)
{
fRet = FALSE;
goto exit;
}
// Skip over the token
pszVal += lstrlen(pszToken);
// Check to see if the value is negative
if ('-' == *pszVal)
{
fNegative = TRUE;
pszVal++;
}
// Read in value
*pdblVal = StrToDbl(pszVal, &pszVal);
// Negate the value if neccessary
if (FALSE != fNegative)
{
*pdblVal *= -1;
}
fRet = TRUE;
exit:
return fRet;
}
#ifdef DEBUG
static const LPSTR LOG_SPECIAL_BODY_FIRSTNAME = "Special: Body contains the First Name";
static const LPSTR LOG_SPECIAL_BODY_LASTNAME = "Special: Body contains the Last Name";
static const LPSTR LOG_SPECIAL_BODY_COMPANYNAME = "Special: Body contains the Company Name";
static const LPSTR LOG_SPECIAL_BODY_YEARRECVD = "Special: Body contains the year message received";
static const LPSTR LOG_SPECIAL_SENTTIME_WEEHRS = "Special: Sent time was between 7PM and 6AM";
static const LPSTR LOG_SPECIAL_SENTTIME_WKEND = "Special: Sent time was on the weekend (Sat or Sun)";
static const LPSTR LOG_SPECIAL_BODY_25PCTUPCWDS = "Special: Body contains 25% uppercase words out of the first 50 words";
static const LPSTR LOG_SPECIAL_BODY_8PCTNONALPHA = "Special: Body contains 8% non-alpha characters out of the first 200 characters";
static const LPSTR LOG_SPECIAL_SENT_DIRECT = "Special: Sent directly to user";
static const LPSTR LOG_SPECIAL_SUBJECT_25PCTUPCWDS = "Special: Subject contains 25% uppercase words out of the first 50 words";
static const LPSTR LOG_SPECIAL_SUBJECT_8PCTNONALPHA = "Special: Subject contains 8% non-alpha characters out of the first 200 characters";
static const LPSTR LOG_SPECIAL_TO_EMPTY = "Special: To line is empty";
static const LPSTR LOG_SPECIAL_HASATTACH = "Special: Message has an attachment";
static const LPSTR LOG_SPECIAL_BODY_GT125B = "Special: Body is greater than 125 Bytes";
static const LPSTR LOG_SPECIAL_BODY_GT250B = "Special: Body is greater than 250 Bytes";
static const LPSTR LOG_SPECIAL_BODY_GT500B = "Special: Body is greater than 500 Bytes";
static const LPSTR LOG_SPECIAL_BODY_GT1000B = "Special: Body is greater than 1000 Bytes";
static const LPSTR LOG_SPECIAL_BODY_GT2000B = "Special: Body is greater than 2000 Bytes";
static const LPSTR LOG_SPECIAL_BODY_GT4000B = "Special: Body is greater than 4000 Bytes";
static const LPSTR LOG_SPECIAL_BODY_GT8000B = "Special: Body is greater than 8000 Bytes";
static const LPSTR LOG_SPECIAL_BODY_GT16000B = "Special: Body is greater than 16000 Bytes";
VOID CJunkFilter::_PrintSpecialFeatureToLog(UINT iRuleNum)
{
Assert(NULL != m_pILogFile);
switch (iRuleNum)
{
case 1:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_FIRSTNAME);
break;
case 2:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_LASTNAME);
break;
case 3:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_COMPANYNAME);
break;
case 4:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_YEARRECVD);
break;
case 5:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_SENTTIME_WEEHRS);
break;
case 6:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_SENTTIME_WKEND);
break;
case 14:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_25PCTUPCWDS);
break;
case 15:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_8PCTNONALPHA);
break;
case 16:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_SENT_DIRECT);
break;
case 17:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_SUBJECT_25PCTUPCWDS);
break;
case 18:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_SUBJECT_8PCTNONALPHA);
break;
case 19:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_TO_EMPTY);
break;
case 20:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_HASATTACH);
break;
case 40:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT125B);
break;
case 41:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT250B);
break;
case 42:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT500B);
break;
case 43:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT1000B);
break;
case 44:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT2000B);
break;
case 45:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT4000B);
break;
case 46:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT8000B);
break;
case 47:
m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT16000B);
break;
default:
AssertSz(FALSE, "unsupported special feature");
break;
}
return;
}
VOID CJunkFilter::_PrintFeatureToLog(ULONG ulIndex)
{
LPSTR pszBuff = NULL;
LPSTR pszTag = NULL;
// Figure out which tag line to use
switch (m_rgfeaturecomps[ulIndex].loc)
{
case locNil:
goto exit;
break;
case locBody:
pszTag = LOG_BODY;
break;
case locSubj:
pszTag = LOG_SUBJECT;
break;
case locFrom:
pszTag = LOG_FROM;
break;
case locTo:
pszTag = LOG_TO;
break;
case locSpecial:
_PrintSpecialFeatureToLog(m_rgfeaturecomps[ulIndex].ulRuleNum);
goto exit;
break;
}
// Write out the feature to the log
PrintToLogFile(m_pILogFile, pszTag, m_rgfeaturecomps[ulIndex].pszFeature);
exit:
SafeMemFree(pszBuff);
return;
}
HRESULT CJunkFilter::_HrCreateLogFile(VOID)
{
HRESULT hr = S_OK;
LPSTR pszLogFile = NULL;
ULONG cbData = 0;
ILogFile * pILogFile = NULL;
DWORD dwData = 0;
if (FALSE != m_fJunkMailLogInit)
{
hr = S_FALSE;
goto exit;
}
m_fJunkMailLogInit = TRUE;
// Get the size of the path to Outlook Express
cbData = sizeof(dwData);
if ((ERROR_SUCCESS != SHGetValue(HKEY_LOCAL_MACHINE, STR_REG_PATH_FLAT, "JunkMailLog", NULL, (BYTE *) &dwData, &cbData)) ||
(0 == dwData))
{
hr = S_FALSE;
goto exit;
}
// Get the size of the path to Outlook Express
if (ERROR_SUCCESS != SHGetValue(HKEY_LOCAL_MACHINE, STR_REG_PATH_FLAT, "InstallRoot", NULL, NULL, &cbData))
{
hr = E_FAIL;
goto exit;
}
// How much room do we need to build up the path
cbData += lstrlen(szJunkMailLog) + 2;
// Allocate space to hold the path
hr = HrAlloc((VOID **) &pszLogFile, cbData);
if (FAILED(hr))
{
goto exit;
}
// Get the path to Outlook Express
ULONG cbBuffer = cbData;
if (ERROR_SUCCESS != SHGetValue(HKEY_LOCAL_MACHINE, STR_REG_PATH_FLAT, "InstallRoot", NULL, (BYTE *) pszLogFile, &cbBuffer))
{
hr = E_FAIL;
goto exit;
}
// Build up the path to the Junk DLL
if ('\\' != pszLogFile[lstrlen(pszLogFile)])
{
StrCatBuff(pszLogFile, "\\", cbData);
}
StrCatBuff(pszLogFile, szJunkMailLog, cbData);
hr = CreateLogFile(g_hInst, pszLogFile, szJunkMailPrefix, DONT_TRUNCATE, &pILogFile, FILE_SHARE_READ | FILE_SHARE_WRITE);
if (FAILED(hr))
{
goto exit;
}
SafeRelease(m_pILogFile);
m_pILogFile = pILogFile;
hr = S_OK;
exit:
SafeMemFree(pszLogFile);
return hr;
}
VOID PrintToLogFile(ILogFile * pILogFile, LPSTR pszTmpl, LPSTR pszArg)
{
LPSTR pszBuff = NULL;
ULONG cchBuff = 0;
Assert(NULL != pILogFile);
Assert(NULL != pszTmpl);
if (NULL == pszArg)
{
pszArg = "";
}
// Figure out the size of the resulting buffer
cchBuff = lstrlen(pszTmpl) + lstrlen(pszArg) + 2;
// Allocate the needed space
if (FAILED(HrAlloc((VOID **) &pszBuff, cchBuff * sizeof(*pszBuff))))
{
goto exit;
}
// Create the output string
wnsprintf(pszBuff, cchBuff, pszTmpl, pszArg);
// Print the buffer to the log file
pILogFile->WriteLog(LOGFILE_DB, pszBuff);
exit:
SafeMemFree(pszBuff);
return;
}
#endif // DEBUG