2020-09-30 16:53:49 +02:00

325 lines
9.9 KiB
C

//---------------------------------------------------------------------------
// Copyright (C) Microsoft Corporation, 1997 - 1999
//
// regexp.c
//
// Simple regular expression matching.
//
// Author:
// 06-02-97 Edward Reus Initial version.
//
//---------------------------------------------------------------------------
#include <sysinc.h>
#include <mbstring.h>
#include "regexp.h"
//-------------------------------------------------------------------------
// MatchREi()
//
// Match the test string (pszString) against the specifed pattern. If they
// match return TRUE, else return FALSE. This version works with ANSI
// characters and is case independent.
//
// In this function patterns are made up from "literal" characters plus
// some control characters, "*", "?". Asterix (*) is a place
// holder for "zero or more" of any character. Question Mark (?) is a place
// holder for "any single character". The square brackets ([]) contain a
// list of matching characters, in this case "-" is used to denote a range
// of characters (i.e. [A-Z] matches any alpha character), but I didn't
// pub brackets in this one yet...
//
//-------------------------------------------------------------------------
BOOL MatchREi( unsigned char *pszString,
unsigned char *pszPattern )
{
unsigned char *pchRangeLow;
while (TRUE)
{
// Walk throuh the pattern, matching it against the string.
switch (*pszPattern)
{
case '*':
// Match zero or more characters.
pszPattern = _mbsinc(pszPattern);
while (*pszString)
{
if (MatchREi(pszString,pszPattern))
{
return TRUE;
}
pszString = _mbsinc(pszString);
}
return MatchREi(pszString,pszPattern);
case '?':
// Match any single character.
if (*pszString == 0)
{
// Not at end of string, so no match.
return FALSE;
}
pszString = _mbsinc(pszString);
break;
#if FALSE
case '[':
// Match a set of characters.
if (*pszString == 0)
{
// Syntax error, no matching close bracket "]".
return FALSE;
}
pchRangeLow = 0;
while (*pszPattern)
{
if (*pszPattern == ']')
{
// End of char set, no match found.
return FALSE;
}
if (*pszPattern == '-')
{
// check a range of chars?
pszPattern = _mbsinc(pszPattern);
// get high limit of range:
if ((*pszPattern == 0)||(*pszPattern == ']'))
{
// Syntax error.
return FALSE;
}
if ( (_mbsnicoll(pszString,pchRangeLow,1) >= 0)
&&(_mbsnicoll(pszString,pszPattern,1) <= 0))
{
// In range, go to next character.
break;
}
}
pchRangeLow = pchPattern;
// See if character matches this pattern element.
if (_mbsnicoll(pszString,pszPattern,1) == 0)
{
// Character match, go on.
break;
}
pszPattern = _mbsinc(pszPattern);
}
// Have a match in the character set, skip to the end of the set.
while ((*pszPattern != 0)&&(*pszPattern != ']'))
{
pszPattern = _mbsinc(pszPattern);
}
break;
#endif
case 0:
// End of pattern, return TRUE if at end of string.
return ((*pszString)? FALSE : TRUE);
default:
// Check for exact character match.
if (_mbsnicoll(pszString,pszPattern,1))
{
// No match.
return FALSE;
}
pszString = _mbsinc(pszString);
break;
}
pszPattern = _mbsinc(pszPattern);
}
// Can never exit from here.
}
#if FALSE
... not currently used ...
//-------------------------------------------------------------------------
// MatchRE()
//
// Match the test string (pszString) against the specifed pattern. If they
// match return TRUE, else return FALSE.
//
// In this function patterns are made up from "literal" characters plus
// some control characters, "*", "?", "[" and "]". Asterix (*) is a place
// holder for "zero or more" of any character. Question Mark (?) is a place
// holder for "any single character". The square brackets ([]) contain a
// list of matching characters, in this case "-" is used to denote a range
// of characters (i.e. [a-zA-Z] matches any alpha character).
//
// Note: Currently there is no support for "or" (|) operator.
//
// Note: Ranges are simple, there is no support for dash at the begining
// of a range to denote the dash itself.
//-------------------------------------------------------------------------
BOOL MatchRE( unsigned char *pszString,
unsigned char *pszPattern )
{
unsigned char ch;
unsigned char chPattern;
unsigned char chRangeLow;
while (TRUE)
{
// Walk throuh the pattern, matching it against the string.
switch (chPattern = *pszPattern++)
{
case '*':
// Match zero or more characters.
while (*pszString)
{
if (MatchRE(pszString++,pszPattern))
{
return TRUE;
}
}
return MatchRE(pszString,pszPattern);
case '?':
// Match any single character.
if (*pszString++ == 0)
{
// Not at end of string, so no match.
return FALSE;
}
break;
case '[':
// Match a set of characters.
if ( (ch = *pszString++) == 0)
{
// Syntax error, no matching close bracket "]".
return FALSE;
}
// ch = toupper(ch);
chRangeLow = 0;
while (chPattern = *pszPattern++)
{
if (chPattern == ']')
{
// End of char set, no match found.
return FALSE;
}
if (chPattern == '-')
{
// check a range of chars?
chPattern = *pszPattern; // get high limit of range
if ((chPattern == 0)||(chPattern == ']'))
{
// Syntax error.
return FALSE;
}
if ((ch >= chRangeLow)&&(ch <= chPattern))
{
// In range, go to next character.
break;
}
}
chRangeLow = chPattern;
// See if character matches this pattern element.
if (ch == chPattern)
{
// Character match, go on.
break;
}
}
// Have a match in the character set, skip to the end of the set.
while ((chPattern)&&(chPattern != ']'))
{
chPattern = *pszPattern++;
}
break;
case 0:
// End of pattern, return TRUE if at end of string.
return ((*pszString)? FALSE : TRUE);
default:
ch = *pszString++;
// Check for exact character match.
// Note: CASE doesn't matter...
if (tolower(ch) != tolower(chPattern))
{
// No match.
return FALSE;
}
break;
}
}
// Can never exit from here.
}
//-------------------------------------------------------------------------
// MatchREList()
//
// Match a string against a list (array) of RE pattens, return TRUE iff
// the string matches one of the RE patterns. The list of patterns is a
// NULL terminated array of pointers to RE pattern strings.
//-------------------------------------------------------------------------
BOOL MatchREList( unsigned char *pszString,
unsigned char **ppszREList )
{
unsigned char *pszPattern;
if (ppszREList)
{
pszPattern = *ppszREList;
while (pszPattern)
{
if (MatchRE(pszString,pszPattern))
{
return TRUE;
}
pszPattern = *(++ppszREList);
}
}
return FALSE;
}
//-------------------------------------------------------------------------
// MatchExactList()
//
//-------------------------------------------------------------------------
BOOL MatchExactList( unsigned char *pszString,
unsigned char **ppszREList )
{
unsigned char *pszPattern;
if (ppszREList)
{
pszPattern = *ppszREList;
while (pszPattern)
{
if (!_mbsicmp(pszString,pszPattern))
{
return TRUE;
}
pszPattern = *(++ppszREList);
}
}
return FALSE;
}
#endif