Windows2003-3790/inetcore/datacab/tdc/tdcparse.cpp

//------------------------------------------------------------------------
//
//  Tabular Data Control Parsing Module
//  Copyright (C) Microsoft Corporation, 1996, 1997
//
//  File:       TDCParse.cpp
//
//  Contents:   Implementation of CTDCParse classes.
//
//------------------------------------------------------------------------


#include "stdafx.h"
#include <simpdata.h>
#include "TDC.h"
#include <MLang.h>
#include "Notify.h"
#include "TDCParse.h"
#include "TDCArr.h"
#include "locale.h"
#include "wch.h"

//#ifndef DISPID_AMBIENT_CODEPAGE
//#define DISPID_AMBIENT_CODEPAGE (-725)
//#endif

#define BYTE_ORDER_MARK 0xFEFF
#define REVERSE_BYTE_ORDER_MARK 0xFFFE

//------------------------------------------------------------------------
//
//  Function:   IsSpace()
//
//  Synopsis:   Returns TRUE if the given character is a space or tab character.
//
//  Arguments:  ch            Character to test.
//
//  Returns:    TRUE if 'ch' is a space or tab character.
//              FALSE otherwise.
//
//------------------------------------------------------------------------

inline boolean IsSpace(WCHAR ch)
{
    return (ch == L' ' || ch == L'\t');
}

//////////////////////////////////////////////////////////////////////////
//
//        CTDCTokenise Class - see comments in file TDCParse.h
//        ------------------
//////////////////////////////////////////////////////////////////////////


//------------------------------------------------------------------------
//
//  Method:     CTDCTokenise::Create()
//
//  Synopsis:   Initialise the CTDCTokenise object
//
//  Arguments:  pFieldSink         Object to send parsed fields to.
//              wchDelimField      \
//              wchDelimRow         |  Set of characters that control
//              wchQuote            |  the parsing of fields
//              wchEscape          /
//
//  Returns:    S_OK indicating success.
//
//------------------------------------------------------------------------

HRESULT CTDCUnify::InitTokenizer(CTDCFieldSink *pFieldSink, WCHAR wchDelimField,
                                 WCHAR wchDelimRow, WCHAR wchQuote, WCHAR wchEscape)
{
    _ASSERT(pFieldSink != NULL);
    m_pFieldSink = pFieldSink;
    m_wchDelimField = wchDelimField;
    m_wchDelimRow = wchDelimRow;
    m_wchQuote = wchQuote;
    m_wchEscape = wchEscape;
    m_ucParsed = 0;

    m_fIgnoreNextLF = FALSE;
    m_fIgnoreNextCR = FALSE;
    m_fIgnoreNextWhiteSpace = FALSE;
    m_fEscapeActive = FALSE;
    m_fQuoteActive = FALSE;
    m_fFoldWhiteSpace = FALSE;

    //  Ensure that the field and row delimiters are set.
    //
    if (m_wchDelimRow == 0)
        m_wchDelimRow = DEFAULT_ROW_DELIM[0];

    //  Remove conflicting delimiter values
    //
    if (m_wchDelimRow == m_wchDelimField)
        m_wchDelimRow = 0;
    if (m_wchQuote != 0)
    {
        if (m_wchQuote == m_wchDelimField || m_wchQuote == m_wchDelimRow)
            m_wchQuote = 0;
    }
    if (m_wchEscape != 0)
    {
        if (m_wchEscape == m_wchDelimField ||
            m_wchEscape == m_wchDelimRow ||
            m_wchEscape == m_wchQuote)
            m_wchEscape = 0;
    }

    m_fFoldCRLF = (m_wchDelimRow == L'\r' || m_wchDelimRow == L'\n');

    return S_OK;
}

//------------------------------------------------------------------------
//
//  Method:     CTDCTokenise::AddWcharBuffer()
//
//  Synopsis:   Takes a buffer of characters, breaks it up into fields
//              and passes them to the embedded CTDCFieldSink object
//              as fields.
//
//  Arguments:  pwch               Buffer containing characters to be parsed.
//              dwSize             Number of significant characters in 'pwch'
//                                  dwSize == 0 means "End-of-stream"
//
//  Returns:    S_OK upon success.
//              E_OUTOFMEMORY indicating insufficient memory to carry
//                out the parse operation.
//              Other misc error code upon failure.
//
//------------------------------------------------------------------------

HRESULT CTDCUnify::AddWcharBuffer(BOOL fLastData)
{

    OutputDebugStringX(_T("CTDCTokenise::AddWcharBuffer called\n"));

    _ASSERT(m_pFieldSink != NULL);

    HRESULT hr = S_OK;

    LPWCH   pwchCurr;   //  Next character to process
    LPWCH   pwchEnd;    //  End-of-buffer marker
    LPWCH   pwchDest;   //  Where to write next char processed
    LPWCH   pwchStart;  //  Beginning of current token

    pwchStart = &m_psWcharBuf[0];
    pwchCurr = pwchStart + m_ucParsed;
    pwchDest = pwchCurr;
    pwchEnd = &m_psWcharBuf[m_ucWcharBufCount];

    //  Read up to the next field boundary (field or row delimiter)
    //
    while (pwchCurr < pwchEnd)
    {
        // Security:  If we see a null character, it's not a text file.  Abort the
        // download, so that no one can use the TDC to download .exe's or other
        // binary files.
        if (*pwchCurr == 0)
        {
            hr = E_ABORT;
            goto Cleanup;
        }

        if (m_fIgnoreNextLF)
        {
            //  We're expecting a LF to terminate a CR-LF sequence.
            //
            m_fIgnoreNextLF = FALSE;
            if (*pwchCurr == L'\n')
            {
                //  Found a LF - ignore it
                //
                pwchCurr++;
                continue;
            }

            //  Found something else - carry on ...
            //
        }

        if (m_fIgnoreNextCR)
        {
            //  We're expecting a CR to terminate a LF-CR sequence.
            //
            m_fIgnoreNextCR = FALSE;
            if (*pwchCurr == L'\r')
            {
                //  Found a CR - ignore it
                //
                pwchCurr++;
                continue;
            }

            //  Found something else - carry on ...
            //
        }

        if (m_fIgnoreNextWhiteSpace)
        {
            //  We're expecting the rest of a white-space sequence
            //
            if (IsSpace(*pwchCurr))
            {
                //  Found white-space - ignore it
                //
                pwchCurr++;
                continue;
            }
            m_fIgnoreNextWhiteSpace = FALSE;
        }

        //  Escape characters work, even in quoted strings
        //
        if (m_fEscapeActive)
        {
            *pwchDest++ = *pwchCurr++;
            m_fEscapeActive = FALSE;
            continue;
        }
        if (*pwchCurr == m_wchEscape)
        {
            pwchCurr++;
            m_fEscapeActive = TRUE;
            continue;
        }

        //  Quotes activate/deactivate Field/Row delimiters
        //
        if (*pwchCurr == m_wchQuote)
        {
            pwchCurr++;
            m_fQuoteActive = !m_fQuoteActive;
            continue;
        }

        if (m_fQuoteActive)
        {
            *pwchDest++ = *pwchCurr++;
            continue;
        }


        if (*pwchCurr == m_wchDelimField ||
            (m_fFoldWhiteSpace && IsSpace(*pwchCurr)))
        {
            hr = m_pFieldSink->AddField(pwchStart, pwchDest - pwchStart);
            if (!SUCCEEDED(hr))
                goto Cleanup;
            pwchCurr++;
            if (m_fFoldWhiteSpace && IsSpace(*pwchCurr))
                m_fIgnoreNextWhiteSpace = TRUE;
            pwchStart = &m_psWcharBuf[0];
            pwchDest = pwchStart;
            continue;
        }

        if (*pwchCurr == m_wchDelimRow ||
            (m_fFoldCRLF && (*pwchCurr == L'\r' || *pwchCurr == L'\n')))
        {
            hr = m_pFieldSink->AddField(pwchStart, pwchDest - pwchStart);
            if (!SUCCEEDED(hr))
                goto Cleanup;
            hr = m_pFieldSink->EOLN();
            if (!SUCCEEDED(hr))
                goto Cleanup;
            if (m_fFoldCRLF)
            {
                m_fIgnoreNextLF = (*pwchCurr == L'\r');
                m_fIgnoreNextCR = (*pwchCurr == L'\n');
            }
            pwchCurr++;
            pwchStart = &m_psWcharBuf[0];
            pwchDest = pwchStart;
            continue;
        }

        *pwchDest++ = *pwchCurr++;
    }

    m_ucWcharBufCount = pwchDest - pwchStart;
    m_ucParsed = pwchDest - pwchStart;  // amount we've already parsed

    // If this is the last data packet, and there's a fragment left,
    // parse it.
    if (m_ucWcharBufCount && fLastData)
    {
        hr = m_pFieldSink->AddField(pwchStart, m_ucParsed);
        if (!SUCCEEDED(hr))
            goto Cleanup;
        m_ucParsed = 0;
        hr = m_pFieldSink->EOLN();
        return hr;
    }


Cleanup:
    return hr;
}


//////////////////////////////////////////////////////////////////////////
//
//        CTDCUnify Class - see comments in file TDCParse.h
//        ---------------
//////////////////////////////////////////////////////////////////////////

//------------------------------------------------------------------------
//
//  Method:     CTDCUnify::CTDCUnify()
//
//  Synopsis:   Constuctor
//
//------------------------------------------------------------------------

CTDCUnify::CTDCUnify()
{
    m_pML = NULL;
}

//------------------------------------------------------------------------
//
//  Method:     CTDCUnify::~CTDCUnify()
//
//  Synopsis:   Destructor
//
//------------------------------------------------------------------------

CTDCUnify::~CTDCUnify()
{
    delete [] m_psByteBuf;
    delete [] m_psWcharBuf;

    if (m_pML != NULL)
        m_pML->Release();
}

//------------------------------------------------------------------------
//
//  Method:     CTDCUnify::Create()
//
//  Synopsis:   Initialise the CTDCUnify object
//
//  Arguments:  pTokenise         Object to send converted buffers to.
//              nCodePage         Code page for ASCII->Unicode conversions
//              pML               MLANG COM object (used for conversions)
//
//  Returns:    S_OK to indicate success.
//
//------------------------------------------------------------------------

HRESULT CTDCUnify::Create(UINT nCodePage, UINT nAmbientCodePage, IMultiLanguage *pML)
{
    m_pML = pML;
    m_pML->AddRef();
    m_nCodePage = nCodePage;
    m_nAmbientCodePage = nAmbientCodePage;
    m_fDataMarkedUnicode = FALSE;
    m_fDataIsUnicode = FALSE;
    m_dwBytesProcessed = 0;
    m_fCanConvertToUnicode = 0;
    m_nUnicode = 0;
    m_fProcessedAllowDomainList = FALSE;

    m_dwConvertMode = 0;
    m_ucByteBufSize = 0;
    m_ucByteBufCount = 0;
    m_psByteBuf = NULL;

    m_ucWcharBufSize = 0;
    m_ucWcharBufCount = 0;
    m_psWcharBuf = NULL;

    if (m_nCodePage && S_OK != m_pML->IsConvertible(m_nCodePage, UNICODE_CP))
    {
        m_nCodePage = 0;
    }

    if (m_nAmbientCodePage && S_OK != m_pML->IsConvertible(m_nAmbientCodePage, UNICODE_CP))
    {
        m_nAmbientCodePage = 0;
    }

    return S_OK;
}

//------------------------------------------------------------------------
//
//  Method:     CTDCUnify::IsUnicode
//
//  Synopsis:   Determines if our text buffer is Unicode or not.  Should
//              only be called once on the FIRST text buffer.
//
//              Assume if the data is marked as Unicode, that it's correct.
//
//              The determination this routine makes will override any
//              single byte codepage the user may have specified.
//
//
//  Arguments:  pBytes            Buffer containing characters to be converted.
//              dwSize            Number of significant characters in 'pBytes'
//
//  Returns:    Code page of text, or zero if not Unicode (UNICODE_CP,
//              UNICODE_REVERSE_CP, or 0)
//
//
//------------------------------------------------------------------------
int
CTDCUnify::IsUnicode(BYTE * pBytes, DWORD dwSize)
{
    if (BYTE_ORDER_MARK == *(WCHAR *)pBytes)
        return UNICODE_CP;

    if (REVERSE_BYTE_ORDER_MARK == *(WCHAR *)pBytes)
        return UNICODE_REVERSE_CP;

    else return 0;
}

//------------------------------------------------------------------------
//
//  Method:     CTDCUnify::ConvertByteBuffer()
//
//  Synopsis:   Converts a byte-buffer into a wide-character stream
//              (applying unicode conversions if necessary) and passes
//              it to the embedded TDCTokenise object to be broken into
//              fields.
//
//  Arguments:  pBytes            Buffer containing characters to be converted.
//              dwSize            Number of significant characters in 'pBytes'
//                                  dwSize == 0 means "End-of-stream"
//
//  Returns:    S_OK upon success.
//              S_FALSE if not enough data has shown up yet to be useful
//              OLE_E_CANTCONVERT if a non-unicode buffer can't be
//                converted into unicode.
//              E_OUTOFMEMORY if there isn't enough memory to perform
//                a data conversion.
//
//------------------------------------------------------------------------

HRESULT CTDCUnify::ConvertByteBuffer(BYTE *pBytes, DWORD dwSize)
{
    OutputDebugStringX(_T("CTDCUnify::ConvertByteBuffer called\n"));

    _ASSERT(pBytes != NULL || dwSize == 0);

    HRESULT     hr = S_OK;
    UINT        ucBytes;
    UINT        ucWchars;

    // Is there enough space in Byte buffer for this packet?
    if (dwSize > (m_ucByteBufSize - m_ucByteBufCount))
    {
        // No, the current buffer is too small, make a new one.
        BYTE * psTemp = new BYTE[m_ucByteBufCount + dwSize];
        if (psTemp==NULL)
        {
            hr = E_OUTOFMEMORY;

            goto Done;
        }

        if (m_psByteBuf != NULL)        // if not first time
        {
            memmove(psTemp, m_psByteBuf, m_ucByteBufCount);
            delete [] m_psByteBuf;
        }
        m_ucByteBufSize = m_ucByteBufCount + dwSize;
        m_psByteBuf = psTemp;
    }

    // Append the new data to the old data.
    memmove(m_psByteBuf + m_ucByteBufCount, pBytes, dwSize);
    m_ucByteBufCount += dwSize;

    // Is there enough space in the Wchar buffer for the converted data?
    // We make a very conservative assumption here that N source buffer bytes
    // convert to N Wchar buffer chars (or 2*N bytes).  This will ensure that
    // our call to ConvertToUnicode will never not finish because there wasn't
    // enough room in the output buffer.
    if (m_ucByteBufCount > (m_ucWcharBufSize - m_ucWcharBufCount))
    {
        // The current buffer is too small, make a new one.
        WCHAR * psTemp = new WCHAR[m_ucWcharBufCount + m_ucByteBufCount];
        if (psTemp==NULL)
        {
            hr = E_OUTOFMEMORY;
            goto Done;
        }

        if (m_psWcharBuf != NULL)       // if not first time
        {
            memmove(psTemp, m_psWcharBuf,
                    m_ucWcharBufCount*sizeof(WCHAR));
            delete [] m_psWcharBuf;
        }
        m_psWcharBuf = psTemp;
        m_ucWcharBufSize = m_ucWcharBufCount + m_ucByteBufCount;
    }

    if (0 == m_dwBytesProcessed)
    {
        // if we can't determine the codepage yet, try again later
        if (!DetermineCodePage(dwSize==0))
        {
            hr = S_FALSE;
            goto Done;
        }
    }

    // Convert as many source bytes as we can to Unicode chars
    ucBytes = m_ucByteBufCount;
    ucWchars = m_ucWcharBufSize - m_ucWcharBufCount;

    // ConvertStringToUnicode won't convert Unicode to Unicode for us.
    // So we'll do it ourselves.
    if (m_nUnicode)
    {
        _ASSERT( ucWchars * sizeof(WCHAR) >= ucBytes);

        // This might copy an odd extra byte
        memmove((BYTE *)(m_psWcharBuf + m_ucWcharBufCount), m_psByteBuf,
                ucBytes);

        // But we only count the number of complete WCHAR's we copied.
        ucWchars = ucBytes / sizeof(WCHAR);
        ucBytes = ucWchars * sizeof(WCHAR);

        if (UNICODE_REVERSE_CP == m_nUnicode)
        {
            // need to byte swap
            BYTE *pByteSwap = (BYTE *)(m_psWcharBuf + m_ucWcharBufCount);
            BYTE bTemp;
            for (ULONG i = ucWchars; i != 0; i--)
            {
                // Well, OK, we've kind of hardwired WCHAR == 2 here, but ..
                bTemp = pByteSwap[0];
                pByteSwap[0] = pByteSwap[1];
                pByteSwap[1] = bTemp;
                pByteSwap += 2;
            }
        }

        // On first packet, need to remove Unicode signature.
        // Only need to look for 0xFFFE -- we already swapped bytes.
        if (0 == m_dwBytesProcessed && m_psWcharBuf[0] == BYTE_ORDER_MARK)
        {
            ucWchars--;
            memmove((BYTE *)m_psWcharBuf, (BYTE *)m_psWcharBuf+2,
                   ucWchars*sizeof(ucWchars));
        }
    }
    else
    {
        hr = m_pML->ConvertStringToUnicode(&m_dwConvertMode, m_nCodePage,
                                           (char *)m_psByteBuf, &ucBytes,
                                           m_psWcharBuf +m_ucWcharBufCount,
                                           &ucWchars);

        // Some character(s) failed conversion.  The best we can do is
        // attempt to skip the character that failed conversion.
        if (FAILED(hr))
        {
            // Did we come back around and try to unconvertable portion again?
            if (ucBytes==0)
            {
                // Yes, and it made no progress.  Skip a char to try to make
                // forward progress.
                ucBytes++;
            }
            // We can't return this error, or we won't look a the rest of the
            // file.
            hr = S_OK;
        }

    }

    // Move any leftover source characters to the start of the buffer.
    // These are probably split Unicode chars, lead bytes without trail
    // bytes, etc.
    m_ucByteBufCount -= ucBytes;
    memmove(m_psByteBuf, m_psByteBuf + ucBytes,
            m_ucByteBufCount);

    // The number of useful chars in the output buf is increased by the
    // number we managed to convert.
    m_ucWcharBufCount += ucWchars;
    m_dwBytesProcessed += ucWchars;

Done:
    return hr;
}


//------------------------------------------------------------------------
//
//  Method:     CTDCUnify::DetermineCodePage()
//
//  Synopsis:   Figures out what codepage to use to read the data.
//              Sets m_nCodePage and m_nUnicode appropriately.
//
//  Arguments:  fForce      determine the answer, no matter what
//
//  Returns:    TRUE        the codepage is determined.
//              FALSE       not enough data yet to determine
//
//------------------------------------------------------------------------

BOOL
CTDCUnify::DetermineCodePage(BOOL fForce)
{
    DWORD   dwConvertMode = 0;
    HRESULT hr;
    UINT    ucBytes = m_ucByteBufCount;
    UINT    ucWchars = m_ucWcharBufSize - m_ucWcharBufCount;
    UINT    cpDetected;
    IMultiLanguage2 *pML2 = NULL;

    _ASSERT(m_dwBytesProcessed == 0 && m_pML);

    // First look for Unicode.  Assume it's not Unicode to start.
    m_nUnicode = 0;

    // Need at least 2 chars for Unicode signature (0xFFFE or 0xFEFF)
    if (m_ucByteBufCount > 1)
    {
        // If we detect Unicode, it overrides any user specified code page.
        m_nUnicode = IsUnicode(m_psByteBuf, m_ucByteBufCount);
        if (m_nUnicode)
        {
            m_nCodePage = m_nUnicode;
            return TRUE;
        }

        // It's not Unicode.  If the user specified a code page, use it.
        if (m_nCodePage)
        {
            return TRUE;
        }
    }

    // if we need an answer and user specified a code page, use it
    if (fForce && m_nCodePage)
    {
        return TRUE;
    }

    // At this point, we have to guess.  If we have enough input or if we
    // need an answer now, use MLang to do the guessing
    if (fForce || m_ucByteBufCount >= CODEPAGE_BYTE_THRESHOLD)
    {
        // First see if the auto-detect interface is available.
        hr = m_pML->QueryInterface(IID_IMultiLanguage2, (void**)&pML2);
        if (!hr && pML2)
        {
            DetectEncodingInfo info[N_DETECTENCODINGINFO];
            int nInfo = N_DETECTENCODINGINFO;

            // auto-detect
            hr = pML2->DetectInputCodepage(
                            MLDETECTCP_NONE,
                            CP_ACP,
                            (char *)m_psByteBuf,
                            (int*)&ucBytes,
                            info,
                            &nInfo);
            pML2->Release();

            if (!hr)
            {
                // if one of the returned codepages is "good enough", use it.
                for (int i=0; i<nInfo; ++i)
                {
                    if (info[i].nConfidence >= 90 && info[i].nDocPercent >= 90)
                    {
                        if (S_OK == m_pML->IsConvertible(info[i].nCodePage, UNICODE_CP))
                        {
                            m_nCodePage = info[i].nCodePage;
                            return TRUE;
                        }
                    }
                }
            }
        }

        // Try plain old MLang.
        // Ask MLang to convert the input using the"auto-detect" codepage.
        hr = m_pML->ConvertStringToUnicode(&dwConvertMode, CP_AUTO,
                                           (char *)m_psByteBuf, &ucBytes,
                                           m_psWcharBuf + m_ucWcharBufCount,
                                           &ucWchars);
        cpDetected = HIWORD(dwConvertMode);

        // if MLang detected a codepage, use it
        if (!hr && cpDetected != 0)
        {
            if (S_OK == m_pML->IsConvertible(cpDetected, UNICODE_CP))
            {
                m_nCodePage = cpDetected;
                return TRUE;
            }
        }
    }

    // guessing didn't work.  If we don't have to decide now, try again later
    if (!fForce)
    {
        return FALSE;
    }

    // if we have to decide and all else has failed, use the host page's
    // encoding.  If even that isn't available, use the machine's ASCII codepage.
    m_nCodePage = m_nAmbientCodePage ? m_nAmbientCodePage : GetACP();

    // and if this still isn't convertible to Unicode, use windows-1252
    if (m_nCodePage == 0 || S_OK != m_pML->IsConvertible(m_nCodePage, UNICODE_CP))
    {
        m_nCodePage = CP_1252;
    }

    return TRUE;
}


LPWCH SkipSpace(LPWCH pwchCurr)
{
    while (IsSpace(*pwchCurr)) pwchCurr++;
    return pwchCurr;
}

static
boolean IsEnd(WCHAR ch)
{
    return (ch == 0 || ch == L'\r' || ch == L'\n');
}

static
boolean IsBreak(WCHAR ch)
{
    return (ch == L';' || IsEnd(ch));
}

// Returns FALSE if names didn't match.
// Returns TRUE if they did.
// Sets *ppwchAdvance to terminator of the match name
BOOL
MatchName(LPWCH pwchMatchName, LPCWCH pwzHostName, LPWCH *ppwchAdvance)
{
    // match from right to left
    LPWCH pwchMatchRight = &pwchMatchName[0];
    LPCWCH pwchHostRight = &pwzHostName[0] + ocslen(pwzHostName) -1;

    // handle empty match name
    if (IsBreak(*pwchMatchRight))
    {
        if (!IsEnd(*pwchMatchRight))    // be sure to advance (unless at end)
            ++ pwchMatchRight;
        *ppwchAdvance = pwchMatchRight;
        return FALSE;
    }

    // Find end of Match name.
    while (!IsBreak(*pwchMatchRight)) pwchMatchRight++;

    *ppwchAdvance = pwchMatchRight;     // return pointer to terminator

    pwchMatchRight--;

    while (IsSpace(*pwchMatchRight) && pwchMatchRight >= pwchMatchName)
        -- pwchMatchRight;              // ignore trailing whitespace

    // match full wildcard the easy way
    if (pwchMatchRight == pwchMatchName && pwchMatchRight[0] == '*')
        return TRUE;

    // match right-to-left, stop at mismatch or beginning of either string
    for (; pwchMatchRight>=pwchMatchName && pwchHostRight>=pwzHostName;
            --pwchMatchRight, --pwchHostRight)
    {
        if (*pwchMatchRight != *pwchHostRight || *pwchMatchRight == '*')
            break;
    }

    // it's a match if strings matched completely
    if (pwchMatchRight+1 == pwchMatchName  &&  pwchHostRight+1 == pwzHostName)
        return TRUE;

    // or if match name started with "*." and the rest matched a suffix of host name
    if (pwchMatchRight == pwchMatchName  &&  pwchMatchRight[0] == '*'  &&
        pwchMatchRight[1] == '.')
        return TRUE;

    // otherwise it's not a match
    return FALSE;
}

HRESULT
CTDCUnify::MatchAllowDomainList(LPCWSTR pwzURL)
{
    HRESULT hr = E_FAIL;                // assume failure
    LPWCH pwchCurr = &m_psWcharBuf[0];
    LPWCH pwchCurr2;
    int cchHostDoman = ocslen(pwzURL);

    // skip over white space
    pwchCurr = SkipSpace(pwchCurr);
    if (IsEnd(*pwchCurr))
        goto Cleanup;

    // must have the equal sign
    if (*pwchCurr++ != '=' || *pwchCurr == '\0')
        goto Cleanup;

    while (TRUE)
    {
        // skip over white space
        pwchCurr = SkipSpace(pwchCurr);

        if (IsEnd(*pwchCurr))           // terminate on \r, \n, \0
            break;

        if (IsBreak(*pwchCurr))         // Must be ';',
            pwchCurr++;                 // skip it.

        // skip over white space
        pwchCurr = SkipSpace(pwchCurr);

        if (MatchName(pwchCurr, pwzURL, &pwchCurr2))
        {
            hr = S_OK;
            break;
        }
        pwchCurr = pwchCurr2;
    }

Cleanup:
    while (!IsEnd(*pwchCurr))
        pwchCurr++;

    // Skip CRLF combos
    if (*pwchCurr == '\r' && pwchCurr[1] == '\n') pwchCurr++;

    // Eat the AllowDomain line so it doesn't screw up the data.
    m_ucWcharBufCount -= (ULONG)(pwchCurr+1 - m_psWcharBuf);
    memmove(m_psWcharBuf, pwchCurr+1, m_ucWcharBufCount*sizeof(WCHAR));

    m_fProcessedAllowDomainList = TRUE;

    return hr;
}

//------------------------------------------------------------------------
//
//  Method:     CTDCUnify::CheckForAllowDomainList
//
//  Synopsis:   Checks the beggining of the Wide Char buffer to see if it
//              contains the string "@!allow.domains".  This is used to
//              determine if this file has a list of domain names which are
//              allowed to access this file, even though the access may be
//              coming from another internet host.
//
//  Arguments:  uses CTDCUnify state variables for the Wide Char buffer:
//              m_psWcharBUf            the Wide char buffer
//              m_ucWcharBufCount       the # of chars in the wide char buf
//
//  Returns:    ALLOW_DOMAINLIST_NO             signature not found
//              ALLOW_DOMAINLIST_YES            signature was found
//              ALLOW_DOMAINLIST_DONTKNOW       don't have enough characters
//                                              to know for sure yet.
//
//------------------------------------------------------------------------

CTDCUnify::ALLOWDOMAINLIST
CTDCUnify::CheckForAllowDomainList()
{
    ULONG cAllowDomainLen = ocslen(ALLOW_DOMAIN_STRING);

    // Make sure we have a whole line.
    LPWCH pwchCurr = m_psWcharBuf;
    LPWCH pwchEnd = &m_psWcharBuf[m_ucWcharBufCount];

    while (pwchCurr < pwchEnd)
    {
        if (IsEnd(*pwchCurr))
            break;
        ++ pwchCurr;
    }

    if (pwchCurr >= pwchEnd)            // if buffer ended before line did
        return ALLOW_DOMAINLIST_DONTKNOW;

    if (0 == wch_incmp(m_psWcharBuf, ALLOW_DOMAIN_STRING, cAllowDomainLen))
    {
        // We matched equal and have the whole string.
        // Take the "@!allow.domains" out of the buffer..
        m_ucWcharBufCount -= cAllowDomainLen;
        memmove(m_psWcharBuf, &m_psWcharBuf[cAllowDomainLen],
                m_ucWcharBufCount*sizeof(WCHAR));
        return ALLOW_DOMAINLIST_YES;
    }

    // We didn't match equal, no point in looking any more.
    return ALLOW_DOMAINLIST_NO;
}