347 lines
9.2 KiB
C
347 lines
9.2 KiB
C
|
|
// deblexr.c - replacement (portable) for deblexer.asm
|
|
|
|
// Copyright (C) 1993, Microsoft Corporation
|
|
|
|
// Revision History:
|
|
|
|
// [] 27-Apr-1993 Dans Created
|
|
|
|
|
|
|
|
#if 0
|
|
; This module implements a very basic transition diagram lexer for
|
|
; use in the QC debugging expression evaluator. It is flexible enough
|
|
; to facilitate future expansion to include more operators.
|
|
;
|
|
; The state tables are fairly simple to operate. Consider, for example,
|
|
; the '>' symbol in C. This can be followed by '>', '=' or something
|
|
; else. If it is followed by '>', it can thereafter be followed by
|
|
; '=' or something else. In all, we have four possibilities:
|
|
;
|
|
; >, >=, >>, >>=
|
|
;
|
|
; The transition diagram would be something like:
|
|
;
|
|
; '>' '>' '='
|
|
; start ----- state1 ----- state2 ----- token('>>=')
|
|
; | |
|
|
; | |other
|
|
; | +----- token('>>')
|
|
; | '='
|
|
; +----- token('>=')
|
|
; |
|
|
; |other
|
|
; +----- token('>')
|
|
;
|
|
; Each entry in LexTable is a single character (thus, a transition to
|
|
; another state based on "char is digit 0..9" CANNOT be handled by this
|
|
; code -- that's why it's simple) followed by either the identifier
|
|
; INTERMEDIATE or ENDSTATE, indicating whether following that edge leads you
|
|
; to a new state or to an actual value (token). If it is followed by
|
|
; INTERMEDIATE, the next word must contain the offset of the new state
|
|
; table. If followed by ENDSTATE, the next word contains the token value.
|
|
;
|
|
; Thus, the above example would look like this (using the macro defined
|
|
; below):
|
|
;
|
|
; LexTable label byte
|
|
;
|
|
; LexEntry '>', INTERMEDIATE, <dataOFFSET LTstate1>
|
|
; ...
|
|
; (other entries)
|
|
; ...
|
|
; LexEntry TABLE_END, 0, 0
|
|
;
|
|
; LTstate1 label byte
|
|
;
|
|
; LexEntry '>', INTERMEDIATE, <dataOFFSET LTstate2>
|
|
; LexEntry '=', ENDSTATE, TOK_GTEQ
|
|
; LexEntry OTHER, ENDSTATE, TOK_GT
|
|
;
|
|
; LTstate2 label byte
|
|
;
|
|
; LexEntry '=', ENDSTATE, TOK_GTGTEQ
|
|
; LexEntry OTHER, ENDSTATE, TOK_GTGT
|
|
;
|
|
; Note that for the intermediate state tables, a TABLE_END entry is
|
|
; unnecessary since the OTHER route is automatically taken.
|
|
;
|
|
; These routines do NOT handle identifiers or constants; only those
|
|
; symbol strings explicitly defined in the state tables will be
|
|
; recognized (i.e., only operators).
|
|
;------------------------------------------------------------
|
|
;
|
|
;------------------------------------------------------------
|
|
; Macro for clean lexer tables
|
|
;------------------------------------------------------------
|
|
|
|
LexEntry macro Character, StateType, NextTableOrTok
|
|
|
|
db Character, StateType
|
|
ifdef HOST32
|
|
dd NextTableOrTok
|
|
else
|
|
dw NextTableOrTok
|
|
endif
|
|
|
|
endm
|
|
|
|
;------------------------------------------------------------
|
|
; Identifiers used for tables
|
|
;------------------------------------------------------------
|
|
|
|
INTERMEDIATE equ 1
|
|
ENDSTATE equ 2
|
|
|
|
|
|
#endif
|
|
|
|
#include <stddef.h>
|
|
#include "debexpr.h"
|
|
|
|
typedef struct LEXENT * PLEXENT;
|
|
|
|
typedef struct LEXENT {
|
|
unsigned char ch;
|
|
unsigned char state;
|
|
PLEXENT plexentNext;
|
|
} LEXENT;
|
|
|
|
/*
|
|
|
|
** Identifiers used for tables
|
|
|
|
*/
|
|
|
|
#define INTERMEDIATE 1
|
|
#define ENDSTATE 2
|
|
|
|
/*
|
|
** The use of the following constants assumes that the character string
|
|
** being lexed contains only ASCII values 00h <= val <= 7Fh.
|
|
*/
|
|
|
|
#define OTHER ((unsigned char) 0xFE)
|
|
#define TABLE_END ((unsigned char) 0xFF)
|
|
|
|
/*
|
|
**; Second state intermediate state tables
|
|
*/
|
|
|
|
LEXENT LTltlt[] = {
|
|
'=', ENDSTATE, (PLEXENT) OP_shleq,
|
|
OTHER, ENDSTATE, (PLEXENT) OP_shl
|
|
};
|
|
|
|
LEXENT LTgtgt[] = {
|
|
'=', ENDSTATE, (PLEXENT) OP_shreq,
|
|
OTHER, ENDSTATE, (PLEXENT) OP_shr
|
|
};
|
|
|
|
LEXENT LTdashgt[] = {
|
|
'*', ENDSTATE, (PLEXENT) OP_pmember,
|
|
OTHER, ENDSTATE, (PLEXENT) OP_pointsto
|
|
};
|
|
|
|
/*
|
|
** First state intermediate state tables
|
|
*/
|
|
|
|
LEXENT LTdash[] = {
|
|
'>', INTERMEDIATE, (PLEXENT) LTdashgt,
|
|
'=', ENDSTATE, (PLEXENT) OP_minuseq,
|
|
'-', ENDSTATE, (PLEXENT) OP_decr,
|
|
OTHER, ENDSTATE, (PLEXENT) OP_negate
|
|
};
|
|
|
|
LEXENT LTbang[] = {
|
|
'=', ENDSTATE, (PLEXENT) OP_bangeq,
|
|
OTHER, ENDSTATE, (PLEXENT) OP_bang
|
|
};
|
|
|
|
LEXENT LTstar[] = {
|
|
'=', ENDSTATE, (PLEXENT) OP_multeq,
|
|
OTHER, ENDSTATE, (PLEXENT) OP_fetch
|
|
};
|
|
|
|
LEXENT LTampersand[] = {
|
|
'&', ENDSTATE, (PLEXENT) OP_andand,
|
|
'=', ENDSTATE, (PLEXENT) OP_andeq,
|
|
OTHER, ENDSTATE, (PLEXENT) OP_addrof
|
|
};
|
|
|
|
LEXENT LTslash[] = {
|
|
'=', ENDSTATE, (PLEXENT) OP_diveq,
|
|
OTHER, ENDSTATE, (PLEXENT) OP_div
|
|
};
|
|
|
|
LEXENT LTpct[] = {
|
|
'=', ENDSTATE, (PLEXENT) OP_modeq,
|
|
OTHER, ENDSTATE, (PLEXENT) OP_mod
|
|
};
|
|
|
|
LEXENT LTplus[] = {
|
|
'=', ENDSTATE, (PLEXENT) OP_pluseq,
|
|
'+', ENDSTATE, (PLEXENT) OP_incr,
|
|
OTHER, ENDSTATE, (PLEXENT) OP_uplus
|
|
};
|
|
|
|
LEXENT LTlessthan[] = {
|
|
'<', INTERMEDIATE, (PLEXENT) LTltlt,
|
|
'=', ENDSTATE, (PLEXENT) OP_lteq,
|
|
OTHER, ENDSTATE, (PLEXENT) OP_lt
|
|
};
|
|
|
|
LEXENT LTgreaterthan[] = {
|
|
'>', INTERMEDIATE, (PLEXENT) LTgtgt,
|
|
'=', ENDSTATE, (PLEXENT) OP_gteq,
|
|
OTHER, ENDSTATE, (PLEXENT) OP_gt
|
|
};
|
|
|
|
LEXENT LTequals[] = {
|
|
'=', ENDSTATE, (PLEXENT) OP_eqeq,
|
|
OTHER, ENDSTATE, (PLEXENT) OP_eq,
|
|
};
|
|
|
|
LEXENT LTcaret[] = {
|
|
'=', ENDSTATE, (PLEXENT) OP_xoreq,
|
|
|
|
OTHER, ENDSTATE, (PLEXENT) OP_xor
|
|
};
|
|
|
|
LEXENT LTpipe[] = {
|
|
'|', ENDSTATE, (PLEXENT) OP_oror,
|
|
'=', ENDSTATE, (PLEXENT) OP_oreq,
|
|
OTHER, ENDSTATE, (PLEXENT) OP_or
|
|
};
|
|
|
|
LEXENT LTdot[] = {
|
|
'*', ENDSTATE, (PLEXENT) OP_dotmember,
|
|
OTHER, ENDSTATE, (PLEXENT) OP_dot
|
|
};
|
|
|
|
LEXENT LTcolon[] = {
|
|
':', ENDSTATE, (PLEXENT) OP_uscope,
|
|
'>', ENDSTATE, (PLEXENT) OP_baseptr,
|
|
OTHER, ENDSTATE, (PLEXENT) OP_segop
|
|
};
|
|
|
|
|
|
/*
|
|
** main Lexer table
|
|
*/
|
|
|
|
LEXENT LexTable [] = {
|
|
'+', INTERMEDIATE, (PLEXENT) LTplus,
|
|
'-', INTERMEDIATE, (PLEXENT) LTdash,
|
|
'*', INTERMEDIATE, (PLEXENT) LTstar,
|
|
'&', INTERMEDIATE, (PLEXENT) LTampersand,
|
|
'/', INTERMEDIATE, (PLEXENT) LTslash,
|
|
'.', INTERMEDIATE, (PLEXENT) LTdot,
|
|
'!', INTERMEDIATE, (PLEXENT) LTbang,
|
|
'~', ENDSTATE, (PLEXENT) OP_tilde,
|
|
'%', INTERMEDIATE, (PLEXENT) LTpct,
|
|
'<', INTERMEDIATE, (PLEXENT) LTlessthan,
|
|
'>', INTERMEDIATE, (PLEXENT) LTgreaterthan,
|
|
'=', INTERMEDIATE, (PLEXENT) LTequals,
|
|
'^', INTERMEDIATE, (PLEXENT) LTcaret,
|
|
'|', INTERMEDIATE, (PLEXENT) LTpipe,
|
|
':', INTERMEDIATE, (PLEXENT) LTcolon,
|
|
';', ENDSTATE, (PLEXENT) OP_lowprec,
|
|
',', ENDSTATE, (PLEXENT) OP_comma,
|
|
'(', ENDSTATE, (PLEXENT) OP_lparen,
|
|
')', ENDSTATE, (PLEXENT) OP_rparen,
|
|
'[', ENDSTATE, (PLEXENT) OP_lbrack,
|
|
']', ENDSTATE, (PLEXENT) OP_rbrack,
|
|
'{', ENDSTATE, (PLEXENT) OP_lcurly,
|
|
'}', ENDSTATE, (PLEXENT) OP_rcurly,
|
|
|
|
TABLE_END, 0, 0
|
|
};
|
|
|
|
|
|
/*
|
|
; ptoken_t ParseOp (pb, pTok)
|
|
; unsigned char *pb;
|
|
; token_t *pTok;
|
|
;
|
|
; Scans the input string (pb) for the next token and returns
|
|
; the token type. Also returns the number of characters in
|
|
; the token so that the caller can advance the input stream
|
|
; before calling again. The string need not be NULL-terminated:
|
|
; it will only scan as deep as the lexer tables indicate.
|
|
;------------------------------------------------------------
|
|
*/
|
|
|
|
EESTATUS
|
|
ParseOp (
|
|
unsigned char * pb,
|
|
token_t * lpTok
|
|
)
|
|
{
|
|
PLEXENT plexent = &LexTable[0];
|
|
|
|
/*
|
|
** Skip over any leading white space in the string
|
|
** as this is not part of the next token
|
|
*/
|
|
|
|
while (*pb == ' ')
|
|
pb++;
|
|
|
|
while ( TRUE ) {
|
|
/*
|
|
** Check for the end of this lexer table. If we
|
|
** run off the table then we can not recognized this
|
|
** token and return an error.
|
|
*/
|
|
|
|
if (plexent->ch == TABLE_END) {
|
|
lpTok->opTok = OP_badtok;
|
|
return /*EESYNTAX*/ 10;
|
|
}
|
|
|
|
/*
|
|
** Check for the wild card marker. This means that
|
|
** we have found a complete token prior to this character.
|
|
** An example of this is '<a'.
|
|
*/
|
|
|
|
if (plexent->ch == OTHER) {
|
|
// Assert(plexent->state == ENDSTATE);
|
|
lpTok->pbEnd = (char *) pb;
|
|
lpTok->opTok = (op_t) (INT_PTR) plexent->plexentNext;
|
|
return EENOERROR;
|
|
}
|
|
|
|
/*
|
|
** Check for a match of this character against
|
|
** the parser table
|
|
*/
|
|
|
|
if (plexent->ch == *pb) {
|
|
/*
|
|
** It matches -- see if we have found a complete token
|
|
*/
|
|
|
|
pb++;
|
|
if (plexent->state == ENDSTATE) {
|
|
lpTok->pbEnd = (char *) pb;
|
|
lpTok->opTok = (op_t) (INT_PTR) plexent->plexentNext;
|
|
return EENOERROR;
|
|
}
|
|
else {
|
|
plexent = plexent->plexentNext;
|
|
}
|
|
}
|
|
else {
|
|
/*
|
|
** Move to the next entry in the lexer table
|
|
*/
|
|
|
|
plexent++;
|
|
}
|
|
}
|
|
} /* ParseOp() */
|