// deblexr.c - replacement (portable) for deblexer.asm // Copyright (C) 1993, Microsoft Corporation // Revision History: // [] 27-Apr-1993 Dans Created #if 0 ; This module implements a very basic transition diagram lexer for ; use in the QC debugging expression evaluator. It is flexible enough ; to facilitate future expansion to include more operators. ; ; The state tables are fairly simple to operate. Consider, for example, ; the '>' symbol in C. This can be followed by '>', '=' or something ; else. If it is followed by '>', it can thereafter be followed by ; '=' or something else. In all, we have four possibilities: ; ; >, >=, >>, >>= ; ; The transition diagram would be something like: ; ; '>' '>' '=' ; start ----- state1 ----- state2 ----- token('>>=') ; | | ; | |other ; | +----- token('>>') ; | '=' ; +----- token('>=') ; | ; |other ; +----- token('>') ; ; Each entry in LexTable is a single character (thus, a transition to ; another state based on "char is digit 0..9" CANNOT be handled by this ; code -- that's why it's simple) followed by either the identifier ; INTERMEDIATE or ENDSTATE, indicating whether following that edge leads you ; to a new state or to an actual value (token). If it is followed by ; INTERMEDIATE, the next word must contain the offset of the new state ; table. If followed by ENDSTATE, the next word contains the token value. ; ; Thus, the above example would look like this (using the macro defined ; below): ; ; LexTable label byte ; ; LexEntry '>', INTERMEDIATE, ; ... ; (other entries) ; ... ; LexEntry TABLE_END, 0, 0 ; ; LTstate1 label byte ; ; LexEntry '>', INTERMEDIATE, ; LexEntry '=', ENDSTATE, TOK_GTEQ ; LexEntry OTHER, ENDSTATE, TOK_GT ; ; LTstate2 label byte ; ; LexEntry '=', ENDSTATE, TOK_GTGTEQ ; LexEntry OTHER, ENDSTATE, TOK_GTGT ; ; Note that for the intermediate state tables, a TABLE_END entry is ; unnecessary since the OTHER route is automatically taken. ; ; These routines do NOT handle identifiers or constants; only those ; symbol strings explicitly defined in the state tables will be ; recognized (i.e., only operators). ;------------------------------------------------------------ ; ;------------------------------------------------------------ ; Macro for clean lexer tables ;------------------------------------------------------------ LexEntry macro Character, StateType, NextTableOrTok db Character, StateType ifdef HOST32 dd NextTableOrTok else dw NextTableOrTok endif endm ;------------------------------------------------------------ ; Identifiers used for tables ;------------------------------------------------------------ INTERMEDIATE equ 1 ENDSTATE equ 2 #endif #include #include "debexpr.h" typedef struct LEXENT * PLEXENT; typedef struct LEXENT { unsigned char ch; unsigned char state; PLEXENT plexentNext; } LEXENT; /* ** Identifiers used for tables */ #define INTERMEDIATE 1 #define ENDSTATE 2 /* ** The use of the following constants assumes that the character string ** being lexed contains only ASCII values 00h <= val <= 7Fh. */ #define OTHER ((unsigned char) 0xFE) #define TABLE_END ((unsigned char) 0xFF) /* **; Second state intermediate state tables */ LEXENT LTltlt[] = { '=', ENDSTATE, (PLEXENT) OP_shleq, OTHER, ENDSTATE, (PLEXENT) OP_shl }; LEXENT LTgtgt[] = { '=', ENDSTATE, (PLEXENT) OP_shreq, OTHER, ENDSTATE, (PLEXENT) OP_shr }; LEXENT LTdashgt[] = { '*', ENDSTATE, (PLEXENT) OP_pmember, OTHER, ENDSTATE, (PLEXENT) OP_pointsto }; /* ** First state intermediate state tables */ LEXENT LTdash[] = { '>', INTERMEDIATE, (PLEXENT) LTdashgt, '=', ENDSTATE, (PLEXENT) OP_minuseq, '-', ENDSTATE, (PLEXENT) OP_decr, OTHER, ENDSTATE, (PLEXENT) OP_negate }; LEXENT LTbang[] = { '=', ENDSTATE, (PLEXENT) OP_bangeq, OTHER, ENDSTATE, (PLEXENT) OP_bang }; LEXENT LTstar[] = { '=', ENDSTATE, (PLEXENT) OP_multeq, OTHER, ENDSTATE, (PLEXENT) OP_fetch }; LEXENT LTampersand[] = { '&', ENDSTATE, (PLEXENT) OP_andand, '=', ENDSTATE, (PLEXENT) OP_andeq, OTHER, ENDSTATE, (PLEXENT) OP_addrof }; LEXENT LTslash[] = { '=', ENDSTATE, (PLEXENT) OP_diveq, OTHER, ENDSTATE, (PLEXENT) OP_div }; LEXENT LTpct[] = { '=', ENDSTATE, (PLEXENT) OP_modeq, OTHER, ENDSTATE, (PLEXENT) OP_mod }; LEXENT LTplus[] = { '=', ENDSTATE, (PLEXENT) OP_pluseq, '+', ENDSTATE, (PLEXENT) OP_incr, OTHER, ENDSTATE, (PLEXENT) OP_uplus }; LEXENT LTlessthan[] = { '<', INTERMEDIATE, (PLEXENT) LTltlt, '=', ENDSTATE, (PLEXENT) OP_lteq, OTHER, ENDSTATE, (PLEXENT) OP_lt }; LEXENT LTgreaterthan[] = { '>', INTERMEDIATE, (PLEXENT) LTgtgt, '=', ENDSTATE, (PLEXENT) OP_gteq, OTHER, ENDSTATE, (PLEXENT) OP_gt }; LEXENT LTequals[] = { '=', ENDSTATE, (PLEXENT) OP_eqeq, OTHER, ENDSTATE, (PLEXENT) OP_eq, }; LEXENT LTcaret[] = { '=', ENDSTATE, (PLEXENT) OP_xoreq, OTHER, ENDSTATE, (PLEXENT) OP_xor }; LEXENT LTpipe[] = { '|', ENDSTATE, (PLEXENT) OP_oror, '=', ENDSTATE, (PLEXENT) OP_oreq, OTHER, ENDSTATE, (PLEXENT) OP_or }; LEXENT LTdot[] = { '*', ENDSTATE, (PLEXENT) OP_dotmember, OTHER, ENDSTATE, (PLEXENT) OP_dot }; LEXENT LTcolon[] = { ':', ENDSTATE, (PLEXENT) OP_uscope, '>', ENDSTATE, (PLEXENT) OP_baseptr, OTHER, ENDSTATE, (PLEXENT) OP_segop }; /* ** main Lexer table */ LEXENT LexTable [] = { '+', INTERMEDIATE, (PLEXENT) LTplus, '-', INTERMEDIATE, (PLEXENT) LTdash, '*', INTERMEDIATE, (PLEXENT) LTstar, '&', INTERMEDIATE, (PLEXENT) LTampersand, '/', INTERMEDIATE, (PLEXENT) LTslash, '.', INTERMEDIATE, (PLEXENT) LTdot, '!', INTERMEDIATE, (PLEXENT) LTbang, '~', ENDSTATE, (PLEXENT) OP_tilde, '%', INTERMEDIATE, (PLEXENT) LTpct, '<', INTERMEDIATE, (PLEXENT) LTlessthan, '>', INTERMEDIATE, (PLEXENT) LTgreaterthan, '=', INTERMEDIATE, (PLEXENT) LTequals, '^', INTERMEDIATE, (PLEXENT) LTcaret, '|', INTERMEDIATE, (PLEXENT) LTpipe, ':', INTERMEDIATE, (PLEXENT) LTcolon, ';', ENDSTATE, (PLEXENT) OP_lowprec, ',', ENDSTATE, (PLEXENT) OP_comma, '(', ENDSTATE, (PLEXENT) OP_lparen, ')', ENDSTATE, (PLEXENT) OP_rparen, '[', ENDSTATE, (PLEXENT) OP_lbrack, ']', ENDSTATE, (PLEXENT) OP_rbrack, '{', ENDSTATE, (PLEXENT) OP_lcurly, '}', ENDSTATE, (PLEXENT) OP_rcurly, TABLE_END, 0, 0 }; /* ; ptoken_t ParseOp (pb, pTok) ; unsigned char *pb; ; token_t *pTok; ; ; Scans the input string (pb) for the next token and returns ; the token type. Also returns the number of characters in ; the token so that the caller can advance the input stream ; before calling again. The string need not be NULL-terminated: ; it will only scan as deep as the lexer tables indicate. ;------------------------------------------------------------ */ EESTATUS ParseOp ( unsigned char * pb, token_t * lpTok ) { PLEXENT plexent = &LexTable[0]; /* ** Skip over any leading white space in the string ** as this is not part of the next token */ while (*pb == ' ') pb++; while ( TRUE ) { /* ** Check for the end of this lexer table. If we ** run off the table then we can not recognized this ** token and return an error. */ if (plexent->ch == TABLE_END) { lpTok->opTok = OP_badtok; return /*EESYNTAX*/ 10; } /* ** Check for the wild card marker. This means that ** we have found a complete token prior to this character. ** An example of this is 'ch == OTHER) { // Assert(plexent->state == ENDSTATE); lpTok->pbEnd = (char *) pb; lpTok->opTok = (op_t) (INT_PTR) plexent->plexentNext; return EENOERROR; } /* ** Check for a match of this character against ** the parser table */ if (plexent->ch == *pb) { /* ** It matches -- see if we have found a complete token */ pb++; if (plexent->state == ENDSTATE) { lpTok->pbEnd = (char *) pb; lpTok->opTok = (op_t) (INT_PTR) plexent->plexentNext; return EENOERROR; } else { plexent = plexent->plexentNext; } } else { /* ** Move to the next entry in the lexer table */ plexent++; } } } /* ParseOp() */