/*  reparse.c - parse a regular expression

 *  cl /c /Zep /AM /NT RE /Gs /G2 /Oa /D LINT_ARGS /Fc reparse.c

 *  Modifications:

 *      22-Jul-1986 mz  Hookable allocator (allow Z to create enough free space)
 *      19-Nov-1986 mz  Add RETranslateLength for Z to determine overflows
 *      18-Aug-1987 mz  Add field width and justification in translations
 *      01-Mar-1988 mz  Add in UNIX-like syntax
 *      14-Jun-1988 mz  Fix file parts allowing backslashes
 *      04-Dec-1989 bp  Let :p accept uppercase drive names
 *      20-Dec-1989 ln  capture trailing periods in :p
 *      23-Jan-1990 ln  Handle escaped characters & invalid trailing \ in
 *                      RETranslate.
 *      05-Feb-1991 mz  Merged in KANJI stuff

*/

#include "precomp.h"
#pragma hdrstop

#include  <string.h>   // Move(): memmove()
// Fill(): memset()


char *REmalloc(size_t size);


#if DEBUG
#define DEBOUT(x)   printf x; fflush (stdout)
#else
#define DEBOUT(x)
#endif


/*  regular expression compiler.  A regular expression is compiled into pseudo-
 *  machine code.  The principle is portable to other machines and is outlined
 *  below.  We parse by recursive descent.

 *  The pseudo-code is fairly close to normal assembler and can be easily
 *  converted to be real machine code and has been done for the 80*86
 *  processor family.

 *  The basic regular expressions handled are:

 *      letter      matches a single letter
 *      [class]     matches a single character in the class
 *      [~class]    matches a single character not in the class
 *      ^           matches the beginning of the line
 *      $           matches the end of the line
 *      ?           matches any character (except previous two)
 *      \x          literal x
 *      \n          matches the previously tagged/matched expression (n digit)

 *  Regular expressions are now build from the above via:

 *      x*          matches 0 or more x, matching minimal number
 *      x+          matches 1 or more x, matching minimal number
 *      x@          matches 0 or more x, matching maximal number
 *      x#          matches 1 or more x, matching maximal number
 *      (x1!x2!...) matches x1 or x2 or ...
 *      ~x          matches 0 characters but prevents x from occuring
 *      {x}         identifies an argument

 *  The final expression that is matched by the compiler is:

 *      xy          matches x then y


 *  The actual grammar used is:                     Parsing action:

 *      TOP ->  re                                  PROLOG .re. EPILOG


 *      re ->   { re } re   |                       LEFTARG .re. RIGHTARG
 *              e re        |
 *              empty

 *      e ->    se *        |                       SMSTAR .se. SMSTAR1
 *              se +        |
 *              se @        |                       STAR .se. STAR1
 *              se #        |
 *              se

 *      se ->   ( alt )     |
 *              [ ccl ]     |
 *              ?           |                       ANY
 *              ^           |                       BOL
 *              $           |                       EOL
 *              ~ se        |                       NOTSIGN .se. NOTSIGN1
 *              :x          |
 *              \n          |                       PREV
 *              letter                              LETTER x

 *      alt ->  re ! alt    |                       LEFTOR .re. ORSIGN
 *              re                                  LEFTOR .re. ORSIGN RIGHTOR

 *      ccl ->  ~ cset      |                       CCLBEG NOTSIGN .cset. CCLEND
 *              cset                                CCLBEG NULL .cset. CCLEND

 *      cset -> item cset   |
 *              item

 *      item -> letter - letter |                   RANGE x y
 *              letter                              RANGE x x

 *  Abbreviations are introduced by :.

 *      :a      [a-zA-Z0-9]                             alphanumeric
 *      :b      ([<space><tab>]#)                       whitespace
 *      :c      [a-zA-Z]                                alphabetic
 *      :d      [0-9]                                   digit
 *      :f      ([~/\\ "\[\]\:<|>+=;,.]#)               file part
 *      :h      ([0-9a-fA-F]#)                          hex number
 *      :i      ([a-zA-Z_$][a-zA-Z0-9_$]@)              identifier
 *      :n      ([0-9]#.[0-9]@![0-9]@.[0-9]#![0-9]#)    number
 *      :p      (([A-Za-z]\:!)(\\!)(:f(.:f!)(\\!/))@:f(.:f!.!)) path
 *      :q      ("[~"]@"!'[~']@')                       quoted string
 *      :w      ([a-zA-Z]#)                             word
 *      :z      ([0-9]#)                                integer

*/

extern  char XLTab[256];                /* lower-casing table                */

static  BOOL  RE__hasBeenInitialized = 0;
static  void  RE__ModuleInitialize(void);

/*  There are several classes of characters:

 *  Closure characters are suffixes that indicate repetition of the previous
 *  RE.

 *  Simple RE chars are characters that indicate a particular type of match

*/

/*  Closure character equates
*/
#define CC_SMPLUS       0               /* plus closure                      */
#define CC_SMCLOSURE    1               /* star closure                      */
#define CC_POWER        2               /* n repetitions of previous pattern */
#define CC_CLOSURE      3               /* greedy closure                    */
#define CC_PLUS         4               /* greedy plus                       */
#define CC_EMPTY        5
#define CC_ERROR        -1

/*  Simple RE character equates */
#define SR_BOL          0
#define SR_EOL          1
#define SR_ANY          2
#define SR_CCLBEG       3
#define SR_LEFTOR       4
#define SR_CCLEND       5
#define SR_ABBREV       6
#define SR_RIGHTOR      7
#define SR_ORSIGN       8
#define SR_NOTSIGN      9
#define SR_LEFTARG      10
#define SR_RIGHTARG     11
#define SR_LETTER       12
#define SR_PREV         13

int EndAltRE[] = {SR_ORSIGN, SR_RIGHTOR, -1};
int EndArg[] = {SR_RIGHTARG, -1};

char *pAbbrev[] = {
         "a[a-zA-Z0-9]",
         "b([ \t]#)",
         "c[a-zA-Z]",
         "d[0-9]",
         "f([~/\\\\ \\\"\\[\\]\\:<|>+=;,.]#!..!.)",
         "h([0-9a-fA-F]#)",
         "i([a-zA-Z_$][a-zA-Z0-9_$]@)",
         "n([0-9]#.[0-9]@![0-9]@.[0-9]#![0-9]#)",
         "p(([A-Za-z]\\:!)(\\\\!/!)(:f(.:f!)(\\\\!/))@:f(.:f!.!))",
         "q(\"[~\"]@\"!'[~']@')",
         "w([a-zA-Z]#)",
         "z([0-9]#)",
         NULL
};

static char *digits = "0123456789";

static flagType fZSyntax = TRUE;        /* TRUE => use Z syntax for things */

static int cArg;

#if defined(KANJI)

/*  Lead byte test for KANJI.  Since Kanji has a lead byte in the range
 *  0x81-0xA0 and 0xE0-0xFC we have a bit table to test for presence in these
 *  ranges.
*/
unsigned char REKTab[32] = {0x00,   0x00,   /*  0 .. F  */
                                0x00,   0x00,   /* 10 .. 1F */
                                0x00,   0x00,   /* 20 .. 2F */
                                0x00,   0x00,   /* 30 .. 3F */
                                0x00,   0x00,   /* 40 .. 4F */
                                0x00,   0x00,   /* 50 .. 5F */
                                0x00,   0x00,   /* 60 .. 6F */
                                0x00,   0x00,   /* 70 .. 7F */
                                0x7f,   0xff,   /* 80 .. 8F */
                                0xff,   0xff,   /* 90 .. 9F */
                                0x00,   0x00,   /* A0 .. AF */
                                0x00,   0x00,   /* B0 .. BF */
                                0x00,   0x00,   /* C0 .. CF */
                                0x00,   0x00,   /* D0 .. DF */
                                0xff,   0xff,   /* E0 .. EF */
                                0xff,   0xf8    /* F0 .. FF */
};

unsigned char REBTab[8] = {0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01};

#endif

/*  RECharType - classify a character type

 *  p           character pointer

 *  returns     type of character (SR_xx)
*/
int pascal INTERNAL RECharType(char *p)
{
    if (fZSyntax)
        /*  Zibo syntax
         */
        switch (*p) {
        case '^':
            return SR_BOL;
        case '$':
            if (isdigit(p[1]))
                return SR_PREV;
            else
                return SR_EOL;
        case '?':
            return SR_ANY;
        case '[':
            return SR_CCLBEG;
        case '(':
            return SR_LEFTOR;
        case ']':
            return SR_CCLEND;
        case ':':
            return SR_ABBREV;
        case ')':
            return SR_RIGHTOR;
        case '!':
            return SR_ORSIGN;
        case '~':
            return SR_NOTSIGN;
        case '{':
            return SR_LEFTARG;
        case '}':
            return SR_RIGHTARG;
        default:
            return SR_LETTER;
        } else
            /*  UNIX syntax
             */
            switch (*p) {
            case '^':
                return SR_BOL;
            case '$':
                return SR_EOL;
            case '.':
                return SR_ANY;
            case '[':
                return SR_CCLBEG;
            case ']':
                return SR_CCLEND;
            case '\\':
                switch (p[1]) {
                case ':':                          /*  \:C */
                    return SR_ABBREV;
                case '(':                          /*  \(  */
                    return SR_LEFTARG;
                case ')':                          /*  \)  */
                    return SR_RIGHTARG;
                case '~':                          /*  \~  */
                    return SR_NOTSIGN;
                case '{':                          /*  \{  */
                    return SR_LEFTOR;
                case '}':                          /*  \}  */
                    return SR_RIGHTOR;
                case '!':                          /*  \!  */
                    return SR_ORSIGN;
                }
                if (isdigit(p[1]))        /*  \N  */
                    return SR_PREV;
            default:
                return SR_LETTER;
            }
}

/*  RECharLen - length of character type

 *  p           character pointer to type

 *  returns     length in chars of type
*/
int pascal INTERNAL RECharLen(char *p)
{
    if (fZSyntax)
        if (RECharType(p) == SR_PREV)      /*  $N  */
            return 2;
        else
            if (RECharType(p) == SR_ABBREV)    /*  :N  */
                return 2;
            else
                return 1;
    else {
        if (*p == '\\')
            switch (p[1]) {
            case '{':
            case '}':
            case '~':
            case '(':
            case ')':
            case '!':
                return 2;                   /*  \C  */
            case ':':                          /*  \:C */
                return 3;
            default:
                if (isdigit(p[1]))
                    return 2;                  /*  \N  */
                else
                    return 1;
            }
        return 1;
    }
}

/*  REClosureLen - length of character type

 *  p           character pointer to type

 *  returns     length in chars of type
*/
int pascal INTERNAL REClosureLen(char *p)
{
    return 1;
}

/*  REParseRE - parse a general RE up to but not including the pEnd set
 *  of chars.  Apply a particular action to each node in the parse tree.

 *  pAction     Parse action routine to call at particluar points in the
 *              parse tree.  This routine returns an unsigned quantity that
 *              is expected to be passed on to other action calls within the
 *              same node.
 *  p           character pointer to string being parsed
 *  pEnd        pointer to set of char types that end the current RE.
 *              External callers will typically use NULL for this value.
 *              Internally, however, we need to break on the ALT-terminating
 *              types or on arg-terminating types.

 *  Returns:    pointer to delimited character if successful parse
 *              NULL if unsuccessful parse (syntax error).

*/
char * pascal INTERNAL REParseRE(PACT pAction, register char *p, int *pEnd)
{
    int *pe;
    UINT_PTR u;

    DEBOUT(("REParseRE (%04x, %s)\n", pAction, p));

    while (TRUE) {
        /*  If we're at end of input
         */
        if (*p == '\0')
            /*     If we're not in the midst of an open expression
             */
            if (pEnd == NULL)
                /*  return the current parse position
                 */
                return p;
            else {
                /*  End of input, but expecting more, ERROR
                 */
                DEBOUT(("REParse expecting more, ERROR\n"));
                return NULL;
            }

        /*  If there is an open expression
         */
        if (pEnd != NULL)
            /*     Find a matching character
             */
            for (pe = pEnd; *pe != -1; pe++)
                if (RECharType(p) == *pe)
                    return p;

        /*  If we are looking at a left argument
         */
        if (RECharType(p) == SR_LEFTARG) {
            /*     Parse LEFTARG .re. RIGHTARG
             */
            u = (*pAction) (LEFTARG, 0, 0, 0);
            if ((p = REParseRE(pAction, p + RECharLen(p), EndArg)) == NULL)
                return NULL;
            (*pAction) (RIGHTARG, u, 0, 0);
            cArg++;
            p += RECharLen(p);
        } else
            /*  Parse .e.
             */
            if ((p = REParseE(pAction, p)) == NULL)
                return NULL;
    }
}

/*  REParseE - parse a simple regular expression with potential closures.

 *  pAction     Action to apply at special parse nodes
 *  p           character pointer to spot where parsing occurs

 *  Returns     pointer past parsed text if successful
 *              NULL otherwise (syntax error)
*/
char * pascal INTERNAL REParseE(PACT pAction, register char *p)
{
    DEBOUT(("REParseE (%04x, %s)\n", pAction, p));

    switch (REClosureChar(p)) {
    case CC_SMPLUS:
        if (REParseSE(pAction, p) == NULL)
            return NULL;
    case CC_SMCLOSURE:
        return REParseClosure(pAction, p);

    case CC_PLUS:
        if (REParseSE(pAction, p) == NULL)
            return NULL;
    case CC_CLOSURE:
        return REParseGreedy(pAction, p);

    case CC_POWER:
        return REParsePower(pAction, p);

    case CC_EMPTY:
        return REParseSE(pAction, p);

    default:
        return NULL;
    }
}

/*  REParseSE - parse a simple regular expression

 *  pAction     Action to apply at special parse nodes
 *  p           character pointer to spot where parsing occurs

 *  Returns     pointer past parsed text if successful
 *              NULL otherwise (syntax error)
*/
char * pascal INTERNAL REParseSE(register PACT pAction, register char *p)
{
    DEBOUT(("REParseSE (%04x, %s)\n", pAction, p));

    switch (RECharType(p)) {
    case SR_CCLBEG:
        return REParseClass(pAction, p);
    case SR_ANY:
        return REParseAny(pAction, p);
    case SR_BOL:
        return REParseBOL(pAction, p);
    case SR_EOL:
        return REParseEOL(pAction, p);
    case SR_PREV:
        return REParsePrev(pAction, p);
    case SR_LEFTOR:
        return REParseAlt(pAction, p);
    case SR_NOTSIGN:
        return REParseNot(pAction, p);
    case SR_ABBREV:
        return REParseAbbrev(pAction, p);
    default:
        return REParseChar(pAction, p);
    }
}

/*  REParseClass - parse a class membership match

 *  pAction     Action to apply at beginning of parse and at each range
 *  p           character pointer to spot where parsing occurs

 *  Returns     pointer past parsed text if successful
 *              NULL otherwise (syntax error)
*/
char * pascal INTERNAL REParseClass(PACT pAction, register char *p)
{
    char c;
    char c2, c3, c4;
    UINT_PTR u;

    DEBOUT(("REParseClass (%04x, %s)\n", pAction, p));

    p += RECharLen(p);
    if ((fZSyntax && *p == '~') || (!fZSyntax && *p == '^')) {
        u = (*pAction) (CCLNOT, 0, 0, 0);
        p += RECharLen(p);
    } else
        u = (*pAction) (CCLBEG, 0, 0, 0);

    while (RECharType(p) != SR_CCLEND) {
        if (*p == '\\')
            p++;
        if (*p == '\0') {
            DEBOUT(("REParseClass expecting more, ERROR\n"));
            return NULL;
        }
        c = *p++;

        if (IsDBCSLeadByte((BYTE)c))
            c2 = *p++;
        else {
            c2 = c;
            c = 0;
        }
        if (*p == '-') {
            p++;
            if (*p == '\\')
                p++;
            if (*p == '\0') {
                DEBOUT(("REParseClass expecting more, ERROR\n"));
                return NULL;
            }
            c3 = *p;
            if (IsDBCSLeadByte(*(unsigned char *)p))
                c4 = *++p;
            else {
                c4 = c3;
                c3 = 0;
            }
            if ((c == 0 && c3 == 0) || (c != 0 && c3 != 0)) {
                u = (*pAction) (RANGEDBCS1, 0, c, c2);
                (*pAction) (RANGEDBCS2, u, c3, c4);
            } else
                return NULL;
            p++;
        } else
#if defined(KANJI)
        {
            u = (*pAction) (RANGEJ1, 0, c, c2);
            (*pAction) (RANGEJ2, u, c, c2);
        }
#else
            (*pAction) (RANGE, u, c, c);
#endif
    }
    c = 0;
    u = (*pAction) (RANGEDBCS1, 0, c, c);
    (*pAction) (RANGEDBCS2, u, c, c);
    return p + RECharLen(p);
}

/*  REParseAny - parse a match-any-character expression

 *  pAction     Action to apply
 *  p           character pointer to spot where parsing occurs

 *  Returns     pointer past parsed text if successful
 *              NULL otherwise (syntax error)
*/
char * pascal INTERNAL REParseAny(PACT pAction, char *p)
{
    DEBOUT(("REParseAny (%04x, %s)\n", pAction, p));

    (*pAction) (ANY, 0, 0, 0);
    return p + RECharLen(p);
}

/*  REParseBOL - parse a beginning-of-line match

 *  pAction     Action to apply
 *  p           character pointer to spot where parsing occurs

 *  Returns     pointer past parsed text if successful
 *              NULL otherwise (syntax error)
*/
char * pascal INTERNAL REParseBOL(PACT pAction, char *p)
{
    DEBOUT(("REParseBOL (%04x, %s)\n", pAction, p));

    (*pAction) (BOL, 0, 0, 0);
    return p + RECharLen(p);
}

/*  REParsePrev - parse a previous-match item

 *  pAction     Action to apply
 *  p           character pointer to spot where parsing occurs

 *  Returns     pointer past parsed text if successful
 *              NULL otherwise (syntax error)
*/
char * pascal INTERNAL REParsePrev(PACT pAction, char *p)
{
    UINT_PTR i = *(p + 1) - '0';

    DEBOUT(("REParsePrev (%04x, %s)\n", pAction, p));

    if (i < 1 || i >(unsigned) cArg) {
        DEBOUT(("REParsePrev invalid previous number, ERROR\n"));
        return NULL;
    }

    (*pAction) (PREV, i, 0, 0);
    return p + RECharLen(p);
}

/*  REParseEOL - parse an end-of-line match

 *  pAction     Action to apply
 *  p           character pointer to spot where parsing occurs

 *  Returns     pointer past parsed text if successful
 *              NULL otherwise (syntax error)
*/
char * pascal INTERNAL REParseEOL(PACT pAction, char *p)
{
    DEBOUT(("REParseEOL (%04x, %s)\n", pAction, p));

    (*pAction) (EOL, 0, 0, 0);
    return p + RECharLen(p);
}

/*  REParseAlt - parse a series of alternatives

 *  pAction     Action to apply before and after each alternative
 *  p           character pointer to spot where parsing occurs

 *  Returns     pointer past parsed text if successful
 *              NULL otherwise (syntax error)
*/
char * pascal INTERNAL REParseAlt(PACT pAction, register char *p)
{
    UINT_PTR u = 0;

    DEBOUT(("REParseAlt (%04x, %s)\n", pAction, p));

    while (RECharType(p) != SR_RIGHTOR) {
        p += RECharLen(p);
        u = (*pAction) (LEFTOR, u, 0, 0);
        if ((p = REParseRE(pAction, p, EndAltRE)) == NULL)
            return NULL;
        u = (*pAction) (ORSIGN, u, 0, 0);
    }
    (*pAction) (RIGHTOR, u, 0, 0);
    return p + RECharLen(p);
}

/*  REParseNot - parse a guard-against match

 *  pAction     Action to apply
 *  p           character pointer to spot where parsing occurs

 *  Returns     pointer past parsed text if successful
 *              NULL otherwise (syntax error)
*/
char * pascal INTERNAL REParseNot(PACT pAction, register char *p)
{
    UINT_PTR u;

    DEBOUT(("REParseNot (%04x, %s)\n", pAction, p));

    p += RECharLen(p);
    if (*p == '\0') {
        DEBOUT(("REParseNot expecting more, ERROR\n"));
        return NULL;
    }
    u = (*pAction) (NOTSIGN, 0, 0, 0);
    p = REParseSE(pAction, p);
    (*pAction) (NOTSIGN1, u, 0, 0);
    return p;
}

/*  REParseAbbrev - parse and expand an abbreviation

 *  Note that since the abbreviations are in Z syntax, we must change syntax
 *  temporarily to Z.  We are careful to do this so that we do not mess up
 *  advancign the pointers.

 *  pAction     Action to apply
 *  p           character pointer to spot where parsing occurs

 *  Returns     pointer past parsed text if successful
 *              NULL otherwise (syntax error)
*/
char * pascal INTERNAL REParseAbbrev(PACT pAction, register char *p)
{
    int i;
    flagType fZSTmp;

    DEBOUT(("REParseAbbrev (%04x, %s)\n", pAction, p));

    p += RECharLen(p);

    fZSTmp = fZSyntax;
    fZSyntax = TRUE;
    if (p[-1] == '\0') {
        DEBOUT(("REParseAbbrev expecting abbrev char, ERROR\n"));
        fZSyntax = fZSTmp;
        return NULL;
    }

    for (i = 0; pAbbrev[i]; i++)
        if (p[-1] == *pAbbrev[i])
            if (REParseSE(pAction, pAbbrev[i] + 1) == NULL) {
                fZSyntax = fZSTmp;
                return NULL;
            } else {
                fZSyntax = fZSTmp;
                return p;
            }
    DEBOUT(("REParseAbbrev found invalid abbrev char %s, ERROR\n", p - 1));
    fZSyntax = fZSTmp;
    return NULL;
}

/*  REParseChar - parse a single character match

 *  pAction     Action to apply
 *  p           character pointer to spot where parsing occurs

 *  Returns     pointer past parsed text if successful
 *              NULL otherwise (syntax error)
*/
char * pascal INTERNAL REParseChar(PACT pAction, register char *p)
{
    DEBOUT(("REParseChar (%04x, %s)\n", pAction, p));

    if (*p == '\\')
        p++;
    if (*p == '\0') {
        DEBOUT(("REParseChar expected more, ERROR\n"));
        return NULL;
    }
    if (IsDBCSLeadByte((BYTE)*p)) {
        (*pAction) (LETTER, 0, *p, *(p + 1));
        return p + 2;
    } else {
        (*pAction) (LETTER, 0, *p, 0);
        return p + 1;
    }
}

/*  REParseClosure - parse a minimal match closure.  The match occurs by
 *  matching none, then one, ...

 *  pAction     Action to apply
 *  p           character pointer to spot where parsing occurs

 *  Returns     pointer past parsed text if successful
 *              NULL otherwise (syntax error)
*/
char * pascal INTERNAL REParseClosure(PACT pAction, register char *p)
{
    UINT_PTR u;

    DEBOUT(("REParseaClosure (%04x, %s)\n", pAction, p));

    u = (*pAction) (SMSTAR, 0, 0, 0);
    if ((p = REParseSE(pAction, p)) == NULL)
        return NULL;
    (*pAction) (SMSTAR1, u, 0, 0);
    return p + REClosureLen(p);
}

/*  REParseGreedy - parse a maximal-match closure.  The match occurs by
 *  matching the maximal number and then backing off as failures occur.

 *  pAction     Action to apply
 *  p           character pointer to spot where parsing occurs

 *  Returns     pointer past parsed text if successful
 *              NULL otherwise (syntax error)
*/
char * pascal INTERNAL REParseGreedy(PACT pAction, register char *p)
{
    UINT_PTR u;

    DEBOUT(("REParseGreedy (%04x, %s)\n", pAction, p));

    u = (*pAction) (STAR, 0, 0, 0);
    if ((p = REParseSE(pAction, p)) == NULL)
        return NULL;
    (*pAction) (STAR1, u, 0, 0);
    return p + REClosureLen(p);
}

/*  REParsePower -  parse a power-closure.  This is merely the simple pattern
 *  repeated the number of times specified by the exponent.

 *  pAction     Action to apply
 *  p           character pointer to spot where parsing occurs

 *  Returns     pointer past parsed text if successful
 *              NULL otherwise (syntax error)
*/
char * pascal INTERNAL REParsePower(PACT pAction, char *p)
{
    register char *p1;
    int exp;

    DEBOUT(("REParsePower (%04x, %s)\n", pAction, p));

    /*     We have .se. POWER something.  Skip over the .se. and POWER
     *     to make sure that what follows is a valid number
     */
    p1 = REParseSE(NullAction, p);

    if (p1 == '\0')
        /*  Parse of .se. failed
         */
        return NULL;

    /*     skip POWER
     */
    p1 += REClosureLen(p1);

    if (*p1 == '\0') {
        DEBOUT(("REParsePower expecting more, ERROR\n"));
        return NULL;
    }

    /* try to parse off number */
    if (sscanf(p1, "%d", &exp) != 1) {
        DEBOUT(("REParsePower expecting number, ERROR\n"));
        return NULL;
    }

    p1 = strbskip(p1, digits);

    /* iterate the pattern the exponent number of times */
    while (exp--)
        if (REParseSE(pAction, p) == NULL)
            return NULL;
    return p1;
}

/*  NullAction - a do-nothing action.  Used for stubbing out the action
 *  during a parse.
*/
UINT_PTR  INTERNAL NullAction(OPTYPE type, UINT_PTR u,
                              unsigned char x, unsigned char y)
{
    type; u; x; y;
    return 0;
}

/*  REClosureChar - return the character that corresponds to the next
 *  closure to be parsed.  We call REParseSE with a null action to merely
 *  advance the character pointer to point just beyond the current simple
 *  regular expression.

 *  p           character pointer to spot where parsing occurs

 *  Returns     closure character if appropriate
 *              CC_EMPTY if no closure character found.
*/
char  pascal INTERNAL REClosureChar(char *p)
{
    p = REParseSE(NullAction, p);
    if (p == NULL)
        return CC_ERROR;

    if (fZSyntax)
        /*  Zibo syntax
         */
        switch (*p) {
        case '^':
            return CC_POWER;
        case '+':
            return CC_SMPLUS;
        case '#':
            return CC_PLUS;
        case '*':
            return CC_SMCLOSURE;
        case '@':
            return CC_CLOSURE;
        default:
            return CC_EMPTY;
        } else
            /*  UNIX syntax
             */
            switch (*p) {
            case '+':
                return CC_SMPLUS;
            case '*':
                return CC_SMCLOSURE;
            default:
                return CC_EMPTY;
            }
}

/*  RECompile - compile a pattern into the internal machine.  Return a
 *  pointer to the match machine.

 *  p       character pointer to pattern being compiled

 *  Returns:    pointer to the internal machine if compilation was successful
 *              NULL if syntax error or not enough memory for malloc
*/
struct patType *RECompile(char *p, flagType fCase, flagType fZS)
{
    if (!RE__hasBeenInitialized) { RE__ModuleInitialize(); }

    fZSyntax = fZS;

    REEstimate(p);

    DEBOUT(("Length is %04x\n", RESize));
    if (RESize == -1)
        return NULL;

    if ((REPat = (struct patType *) REmalloc(RESize)) == NULL)
        return NULL;

    Fill((char far *) REPat, -1, RESize);
    Fill((char far *) REPat->pArgBeg, 0, sizeof(REPat->pArgBeg));
    Fill((char far *) REPat->pArgEnd, 0, sizeof(REPat->pArgEnd));

    REip = REPat->code;
    REArg = 1;
    REPat->fCase = fCase;
    REPat->fUnix = (flagType)!fZS;

    cArg = 0;

    CompileAction(PROLOG, 0, 0, 0);

    if (REParseRE(CompileAction, p, NULL) == NULL)
        return NULL;

    CompileAction(EPILOG, 0, 0, 0);

#if DEBUG
    REDump(REPat);
#endif
    return REPat;
}

/*  Escaped - translate an escaped character ala UNIX C conventions.

 *  \t => tab       \e => ESC char  \h => backspace \g => bell
 *  \n => lf        \r => cr        \\ => \

 *  c       character to be translated

 *  Returns:    character as per above
*/
char  pascal INTERNAL Escaped(char c)
{
    switch (c) {
    case 't':
        return '\t';
    case 'e':
        return 0x1B;
    case 'h':
        return 0x08;
    case 'g':
        return 0x07;
    case 'n':
        return '\n';
    case 'r':
        return '\r';
    case '\\':
        return '\\';
    default:
        return c;
    }
}

/*  REGetArg - copy argument string out from match.

 *  pat     matched pattern
 *  i       index of argument to fetch, 0 is entire pattern
 *  p       destination of argument

 *  Returns:    TRUE if successful, FALSE if i is out of range.
*/
flagType REGetArg(struct patType *pat, int i, char *p)
{
    int l = 0;

    if (i > MAXPATARG)
        return FALSE;
    else
        if (pat->pArgBeg[i] != (char *)-1)
            Move((char far *)pat->pArgBeg[i], (char far *)p, l = RELength(pat, i));
    p[l] = '\0';
    return TRUE;
}

/*  RETranslate - translate a pattern string and match structure into an
 *  output string.  During pattern search-and-replace, RETranslate is used
 *  to generate an output string based on an input match pattern and a template
 *  that directs the output.

 *  The input match is any patType returned from RECompile that has been passed
 *  to fREMatch and that causes fREMatch to return TRUE.  The template string
 *  is any set of ascii chars.  The $ character leads in arguments:

 *      $$ is replaced with $
 *      $0 is replaced with the entire match string
 *      $1-$9 is replaced with the corresponding tagged (by {}) item from
 *          the match.

 *  An alternative method is to specify the argument as:

 *      $([w,]a) where a is the argument number (0-9) and w is an optional field
 *          width that will be used in a printf %ws format.

 *  buf     pattern matched
 *  src     template for the match
 *  dst     destination of the translation

 *  Returns:    TRUE if translation was successful, FALSE otherwise
*/
flagType RETranslate(struct patType *buf, register char *src, register char *dst)
{
    int i, w;
    char *work;
    char chArg = (char)(buf->fUnix ? '\\' : '$');

    work = REmalloc(MAXLINELEN);

    if (work == NULL)
        return FALSE;

    *dst = '\0';

    while (*src != '\0') {
        /*  Process tagged substitutions first
         */
        if (*src == chArg && (isdigit(src[1]) || src[1] == '(')) {
            /*     presume 0-width field */
            w = 0;

            /*     skip $ and char */
            src += 2;

            /*     if we saw $n */
            if (isdigit(src[-1]))
                i = src[-1] - '0';
            /*     else we saw $( */
            else {
                /*  get tagged expr number */
                i = atoi(src);

                /*  skip over number */
                if (*src == '-')
                    src++;
                src = strbskip(src, digits);

                /*  was there a comma? */
                if (*src == ',') {
                    /*     We saw field width, parse off expr number */
                    w = i;
                    i = atoi(++src);
                    src = strbskip(src, digits);
                }

                /*  We MUST end with a close paren */
                if (*src++ != ')') {
                    free(work);
                    return FALSE;
                }
            }
            /*     w is field width
             *     i is selected argument
             */
            if (!REGetArg(buf, i, work)) {
                free(work);
                return FALSE;
            }
            sprintf(dst, "%*s", w, work);
            dst += strlen(dst);
        } else
            /* process escaped characters */
            if (*src == '\\') {
                src++;
                if (!*src) {
                    free(work);
                    return FALSE;
                }
                *dst++ = Escaped(*src++);
            } else
                /*  chArg quotes itself */
                if (*src == chArg && src[1] == chArg) {
                    *dst++ = chArg;
                    src += 2;
                } else
                    if (IsDBCSLeadByte(*src) && *(src + 1)) {
                        *dst++ = *src++;
                        *dst++ = *src++;
                    } else
                        *dst++ = *src++;
    }
    *dst = '\0';
    free(work);
    return TRUE;
}

/*  RETranslateLength - given a matched pattern and a replacement string
 *  return the length of the final replacement

 *  The inputs have the same syntax/semantics as in RETranslate.

 *  buf     pattern matched
 *  src     template for the match

 *  Returns:    number of bytes in total replacement, -1 if error
*/
int RETranslateLength(struct patType *buf, register char *src)
{
    int i, w;
    int length = 0;
    char chArg = (char)(buf->fUnix ? '\\' : '$');

    while (*src != '\0') {
        /*  Process tagged substitutions first
         */
        if (*src == chArg && (isdigit(src[1]) || src[1] == '(')) {
            w = 0;
            src += 2;
            if (isdigit(src[-1]))
                i = src[-1] - '0';
            else {
                i = atoi(src);
                if (*src == '-')
                    src++;
                src = strbskip(src, digits);
                if (*src == ',') {
                    w = i;
                    i = atoi(++src);
                    src = strbskip(src, digits);
                }
                if (*src++ != ')')
                    return -1;
            }
            /*     w is field width
             *     i is selected argument
             */
            i = RELength(buf, i);
            length += max(i, abs(w));
        } else
            /* process escaped characters */
            if (*src == '\\') {
                src += 2;
                length++;
            } else
                /*  chArg quotes itself */
                if (*src == chArg && src[1] == chArg) {
                    src += 2;
                    length++;
                } else
                    if (IsDBCSLeadByte(*src) && *(src + 1)) {
                        length += 2;
                        src += 2;
                    } else {
                        length++;
                        src++;
                    }
    }
    return length;
}

/*  RELength - return length of argument in match.

 *  pat     matched pattern
 *  i       index of argument to examine, 0 is entire pattern

 *  Returns:    length of ith argument, -1 if i is out-of-range.
*/
int RELength(struct patType *pat, int i)
{
    if (i > MAXPATARG)
        return -1;
    else
        if (pat->pArgBeg[i] == (char *)-1)
            return 0;
        else
            return (int)(pat->pArgEnd[i] - pat->pArgBeg[i]);
}

/*  REStart - return pointer to beginning of match.

 *  ppat    matched pattern

 *  Returns:    character pointer to beginning of match
*/
char *REStart(struct patType *pat)
{
    return pat->pArgBeg[0] == (char *)-1 ? NULL : pat->pArgBeg[0];
}


//  void Fill(void FAR * a, char b, unsigned int c) {;}
//  void Move(void FAR * a, void FAR * b, unsigned int c) {;}
char XLTab[256];
char * strbskip(char const * a, char const * b) { return (char *)a; }


void Fill(void FAR * a, char b, unsigned int c)
{
    (void)memset(a, (int)b, c);
    return;
}


void Move(void FAR * a, void FAR * b, unsigned int c)
{
    (void)memmove(b, a, c);
    return;
}


/*

* void  RE__ModuleInitialize (void)

* "Initialize the Regular Expression module.  Presently, this comprises
*  loading lowercase information into the global(!) array 'XLTab[]'."

* Answers:   <nothing>

* Requires:  true

* Ensures:   The global array 'XLTab[]' has, for each index, the ASCII
*            lowercase equivalent of that index (as defined by invoking
*            'tolower()' on each index value).

*            Only the *first* invocation of this method will do the
*            initialization procedure; subsequent invocations are legal
*            but have no effect.

* Modifies:  XLTab[]
*            RE__hasBeenInitialized

* Raises:    <nothing>

* COMMENTS:  There is a companion array 'XUTab[]' which we ignore because
*            the entire system ignores it, also.

*            We #include <ctype.h> just to be sure, even though it may
*            have already been pulled in somewhere else (surely *all*
*            header files watch for mulitple inclusions...).

*            In keeping with windbg philosophy, we do *not* pay attention
*            to Unicode stuff.

*/

#include <ctype.h>    // RE__ModuleInitialize(): tolower()

static
void
RE__ModuleInitialize(void)
{
    int idxChar;

    if (!RE__hasBeenInitialized) {
        for (idxChar = 0; idxChar != sizeof(XLTab); idxChar++) {
            XLTab[idxChar] = (char)tolower(idxChar);
        }

        RE__hasBeenInitialized = TRUE;
    }

    return;
}