325 lines
9.9 KiB
C
325 lines
9.9 KiB
C
//---------------------------------------------------------------------------
|
|
// Copyright (C) Microsoft Corporation, 1997 - 1999
|
|
//
|
|
// regexp.c
|
|
//
|
|
// Simple regular expression matching.
|
|
//
|
|
// Author:
|
|
// 06-02-97 Edward Reus Initial version.
|
|
//
|
|
//---------------------------------------------------------------------------
|
|
|
|
#include <sysinc.h>
|
|
#include <mbstring.h>
|
|
#include "regexp.h"
|
|
|
|
//-------------------------------------------------------------------------
|
|
// MatchREi()
|
|
//
|
|
// Match the test string (pszString) against the specifed pattern. If they
|
|
// match return TRUE, else return FALSE. This version works with ANSI
|
|
// characters and is case independent.
|
|
//
|
|
// In this function patterns are made up from "literal" characters plus
|
|
// some control characters, "*", "?". Asterix (*) is a place
|
|
// holder for "zero or more" of any character. Question Mark (?) is a place
|
|
// holder for "any single character". The square brackets ([]) contain a
|
|
// list of matching characters, in this case "-" is used to denote a range
|
|
// of characters (i.e. [A-Z] matches any alpha character), but I didn't
|
|
// pub brackets in this one yet...
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
BOOL MatchREi( unsigned char *pszString,
|
|
unsigned char *pszPattern )
|
|
{
|
|
unsigned char *pchRangeLow;
|
|
|
|
while (TRUE)
|
|
{
|
|
// Walk throuh the pattern, matching it against the string.
|
|
switch (*pszPattern)
|
|
{
|
|
case '*':
|
|
// Match zero or more characters.
|
|
pszPattern = _mbsinc(pszPattern);
|
|
while (*pszString)
|
|
{
|
|
if (MatchREi(pszString,pszPattern))
|
|
{
|
|
return TRUE;
|
|
}
|
|
pszString = _mbsinc(pszString);
|
|
}
|
|
return MatchREi(pszString,pszPattern);
|
|
|
|
case '?':
|
|
// Match any single character.
|
|
if (*pszString == 0)
|
|
{
|
|
// Not at end of string, so no match.
|
|
return FALSE;
|
|
}
|
|
pszString = _mbsinc(pszString);
|
|
break;
|
|
|
|
#if FALSE
|
|
case '[':
|
|
// Match a set of characters.
|
|
if (*pszString == 0)
|
|
{
|
|
// Syntax error, no matching close bracket "]".
|
|
return FALSE;
|
|
}
|
|
|
|
pchRangeLow = 0;
|
|
while (*pszPattern)
|
|
{
|
|
if (*pszPattern == ']')
|
|
{
|
|
// End of char set, no match found.
|
|
return FALSE;
|
|
}
|
|
|
|
if (*pszPattern == '-')
|
|
{
|
|
// check a range of chars?
|
|
pszPattern = _mbsinc(pszPattern);
|
|
|
|
// get high limit of range:
|
|
if ((*pszPattern == 0)||(*pszPattern == ']'))
|
|
{
|
|
// Syntax error.
|
|
return FALSE;
|
|
}
|
|
|
|
if ( (_mbsnicoll(pszString,pchRangeLow,1) >= 0)
|
|
&&(_mbsnicoll(pszString,pszPattern,1) <= 0))
|
|
{
|
|
// In range, go to next character.
|
|
break;
|
|
}
|
|
}
|
|
|
|
pchRangeLow = pchPattern;
|
|
|
|
// See if character matches this pattern element.
|
|
if (_mbsnicoll(pszString,pszPattern,1) == 0)
|
|
{
|
|
// Character match, go on.
|
|
break;
|
|
}
|
|
|
|
pszPattern = _mbsinc(pszPattern);
|
|
}
|
|
|
|
// Have a match in the character set, skip to the end of the set.
|
|
while ((*pszPattern != 0)&&(*pszPattern != ']'))
|
|
{
|
|
pszPattern = _mbsinc(pszPattern);
|
|
}
|
|
|
|
break;
|
|
#endif
|
|
|
|
case 0:
|
|
// End of pattern, return TRUE if at end of string.
|
|
return ((*pszString)? FALSE : TRUE);
|
|
|
|
default:
|
|
// Check for exact character match.
|
|
if (_mbsnicoll(pszString,pszPattern,1))
|
|
{
|
|
// No match.
|
|
return FALSE;
|
|
}
|
|
pszString = _mbsinc(pszString);
|
|
break;
|
|
}
|
|
|
|
pszPattern = _mbsinc(pszPattern);
|
|
}
|
|
|
|
// Can never exit from here.
|
|
}
|
|
|
|
#if FALSE
|
|
... not currently used ...
|
|
//-------------------------------------------------------------------------
|
|
// MatchRE()
|
|
//
|
|
// Match the test string (pszString) against the specifed pattern. If they
|
|
// match return TRUE, else return FALSE.
|
|
//
|
|
// In this function patterns are made up from "literal" characters plus
|
|
// some control characters, "*", "?", "[" and "]". Asterix (*) is a place
|
|
// holder for "zero or more" of any character. Question Mark (?) is a place
|
|
// holder for "any single character". The square brackets ([]) contain a
|
|
// list of matching characters, in this case "-" is used to denote a range
|
|
// of characters (i.e. [a-zA-Z] matches any alpha character).
|
|
//
|
|
// Note: Currently there is no support for "or" (|) operator.
|
|
//
|
|
// Note: Ranges are simple, there is no support for dash at the begining
|
|
// of a range to denote the dash itself.
|
|
//-------------------------------------------------------------------------
|
|
BOOL MatchRE( unsigned char *pszString,
|
|
unsigned char *pszPattern )
|
|
{
|
|
unsigned char ch;
|
|
unsigned char chPattern;
|
|
unsigned char chRangeLow;
|
|
|
|
while (TRUE)
|
|
{
|
|
// Walk throuh the pattern, matching it against the string.
|
|
switch (chPattern = *pszPattern++)
|
|
{
|
|
case '*':
|
|
// Match zero or more characters.
|
|
while (*pszString)
|
|
{
|
|
if (MatchRE(pszString++,pszPattern))
|
|
{
|
|
return TRUE;
|
|
}
|
|
}
|
|
return MatchRE(pszString,pszPattern);
|
|
|
|
case '?':
|
|
// Match any single character.
|
|
if (*pszString++ == 0)
|
|
{
|
|
// Not at end of string, so no match.
|
|
return FALSE;
|
|
}
|
|
break;
|
|
|
|
case '[':
|
|
// Match a set of characters.
|
|
if ( (ch = *pszString++) == 0)
|
|
{
|
|
// Syntax error, no matching close bracket "]".
|
|
return FALSE;
|
|
}
|
|
|
|
// ch = toupper(ch);
|
|
chRangeLow = 0;
|
|
while (chPattern = *pszPattern++)
|
|
{
|
|
if (chPattern == ']')
|
|
{
|
|
// End of char set, no match found.
|
|
return FALSE;
|
|
}
|
|
|
|
if (chPattern == '-')
|
|
{
|
|
// check a range of chars?
|
|
chPattern = *pszPattern; // get high limit of range
|
|
if ((chPattern == 0)||(chPattern == ']'))
|
|
{
|
|
// Syntax error.
|
|
return FALSE;
|
|
}
|
|
|
|
if ((ch >= chRangeLow)&&(ch <= chPattern))
|
|
{
|
|
// In range, go to next character.
|
|
break;
|
|
}
|
|
}
|
|
|
|
chRangeLow = chPattern;
|
|
// See if character matches this pattern element.
|
|
if (ch == chPattern)
|
|
{
|
|
// Character match, go on.
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Have a match in the character set, skip to the end of the set.
|
|
while ((chPattern)&&(chPattern != ']'))
|
|
{
|
|
chPattern = *pszPattern++;
|
|
}
|
|
|
|
break;
|
|
|
|
case 0:
|
|
// End of pattern, return TRUE if at end of string.
|
|
return ((*pszString)? FALSE : TRUE);
|
|
|
|
default:
|
|
ch = *pszString++;
|
|
// Check for exact character match.
|
|
// Note: CASE doesn't matter...
|
|
if (tolower(ch) != tolower(chPattern))
|
|
{
|
|
// No match.
|
|
return FALSE;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Can never exit from here.
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// MatchREList()
|
|
//
|
|
// Match a string against a list (array) of RE pattens, return TRUE iff
|
|
// the string matches one of the RE patterns. The list of patterns is a
|
|
// NULL terminated array of pointers to RE pattern strings.
|
|
//-------------------------------------------------------------------------
|
|
BOOL MatchREList( unsigned char *pszString,
|
|
unsigned char **ppszREList )
|
|
{
|
|
unsigned char *pszPattern;
|
|
|
|
if (ppszREList)
|
|
{
|
|
pszPattern = *ppszREList;
|
|
while (pszPattern)
|
|
{
|
|
if (MatchRE(pszString,pszPattern))
|
|
{
|
|
return TRUE;
|
|
}
|
|
|
|
pszPattern = *(++ppszREList);
|
|
}
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
//-------------------------------------------------------------------------
|
|
// MatchExactList()
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
BOOL MatchExactList( unsigned char *pszString,
|
|
unsigned char **ppszREList )
|
|
{
|
|
unsigned char *pszPattern;
|
|
|
|
if (ppszREList)
|
|
{
|
|
pszPattern = *ppszREList;
|
|
while (pszPattern)
|
|
{
|
|
if (!_mbsicmp(pszString,pszPattern))
|
|
{
|
|
return TRUE;
|
|
}
|
|
|
|
pszPattern = *(++ppszREList);
|
|
}
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
#endif
|