1103 lines
26 KiB
C
Raw Normal View History

2001-01-01 00:00:00 +01:00
/* RCPP - Resource Compiler Pre-Processor for NT system */
/* SCANNER.C - Routines for token scanning */
/* 29-Nov-90 w-BrianM Update for NT from PM SDK RCPP */
#include "rc.h"
#define ABS(x) ((x > 0) ? x : -1 * x)
#define ALERT_CHAR L'\007' /* ANSI alert character is ASCII BEL */
ULONG lCPPTotalLinenumber = 0;
extern int vfCurrFileType; //- Added for 16-bit file support.
/* Local Function Prototypes */
token_t c_size(long);
int ctoi(int);
int escape(int);
token_t get_real(PWCHAR);
token_t l_size(long);
long matol(PWCHAR, int);
token_t uc_size(long);
token_t ul_size(long);
void skip_1comment(void);
/* local_c_hash */
hash_t
local_c_hash(
REG WCHAR *name
)
{
REG hash_t i;
i = 0;
while(*name) {
i += (*name & HASH_MASK);
name++;
}
return(i);
}
/*
* GETID - Get an identifier or keyword.
* (we know that we're given at least 1 id char)
* in addition, we'll hash the value using 'c'.
*/
void
getid(
REG UINT c
)
{
REG WCHAR *p;
p = Reuse_W;
*p++ = (WCHAR)c;
c &= HASH_MASK;
repeat:
while(LXC_IS_IDENT(*p = GETCH())) { /* while it's an id char . . . */
c += (*p & HASH_MASK); /* hash it */
p++;
}
if(*p != EOS_CHAR) {
if((*p == L'\\') && (checknl())) {
goto repeat;
}
UNGETCH();
if(p >= LIMIT(Reuse_W)) {
strcpy (Msg_Text, GET_MSG (1067));
fatal(1067);
}
if( ((p - Reuse_W) > LIMIT_ID_LENGTH) && ( ! Prep )) {
p = Reuse_W + LIMIT_ID_LENGTH;
*p++ = L'\0';
c = local_c_hash(Reuse_W);
Msg_Temp = GET_MSG (4011);
SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, Reuse_W);
warning(4011); /* id truncated */
} else {
*p++ = L'\0';
}
Reuse_W_hash = (hash_t)c;
Reuse_W_length = (UINT)(p - Reuse_W);
return;
}
if(io_eob()) { /* end of file in middle of id */
strcpy (Msg_Text, GET_MSG (1004));
fatal(1004);
}
goto repeat;
}
/*
** prep_string : outputs char/string constants when preprocessing only
*/
void
prep_string(
REG WCHAR c
)
{
REG WCHAR *p_buf;
int term_char;
p_buf = Reuse_W;
term_char = c;
*p_buf++ = c; /* save the open quote */
for(;;) {
switch(CHARMAP(c = GETCH())) {
case LX_DQUOTE:
case LX_SQUOTE:
if(c == (WCHAR)term_char) {
*p_buf++ = (WCHAR)term_char;/* save the terminating quote */
goto out_of_loop;
}
break;
case LX_BACKSLASH:
*p_buf++ = c;
break;
case LX_CR:
continue;
case LX_NL:
UNGETCH();
goto out_of_loop;
case LX_EOS:
if(c == L'\\') {
*p_buf++ = c;
c = get_non_eof();
break;
}
handle_eos();
continue;
}
*p_buf++ = c;
if(p_buf >= &Reuse_W[MED_BUFFER - 1]) {
*p_buf = L'\0';
myfwrite(Reuse_W, (size_t)(p_buf - Reuse_W) * sizeof(WCHAR), 1, OUTPUTFILE);
p_buf = Reuse_W;
}
}
out_of_loop:
*p_buf = L'\0';
myfwrite(Reuse_W, (size_t)(p_buf - Reuse_W) * sizeof(WCHAR), 1, OUTPUTFILE);
}
/*
** char_const : gather up a character constant
** we're called after finding the openning single quote.
*/
token_t
char_const(
void
)
{
REG WCHAR c;
value_t value;
token_t tok;
tok = (token_t)(Jflag ? L_CUNSIGNED : L_CINTEGER);
first_switch:
switch(CHARMAP(c = GETCH())) {
case LX_BACKSLASH:
break;
case LX_SQUOTE:
strcpy (Msg_Text, GET_MSG (2137)); //"empty character constant"
error(2137);
value.v_long = 0;
UNGETCH();
break;
case LX_EOS: /* ??? assumes i/o buffering > 1 char */
if(handle_eos() != BACKSLASH_EOS) {
goto first_switch;
}
value.v_long = escape(get_non_eof());
if( tok == L_CUNSIGNED ) { /* don't sign extend */
value.v_long &= 0xff;
}
break;
case LX_NL:
/* newline in character constant */
strcpy (Msg_Text, GET_MSG (2001));
error (2001);
UNGETCH();
/*
** FALLTHROUGH
*/
default:
value.v_long = c;
break;
}
if((c = get_non_eof()) != L'\'') {
strcpy (Msg_Text, GET_MSG (2015));
error (2015); /* too many chars in constant */
do {
if(c == L'\n') {
strcpy (Msg_Text, GET_MSG (2016));
error(2016); /* missing closing ' */
break;
}
} while((c = get_non_eof()) != L'\'');
}
yylval.yy_tree = build_const(tok, &value);
return(tok);
}
/*
** str_const : gather up a string constant
*/
void
str_const(
VOID
)
{
REG WCHAR c;
REG PWCHAR p_buf;
int not_warned_yet = TRUE;
p_buf = yylval.yy_string.str_ptr = Macro_buffer;
/*
** Is it possible that reading this string during a rescan will
** overwrite the expansion being rescanned? No, because a macro
** expansion is limited to the top half of Macro_buffer.
** For Macro_depth > 0, this is like copying the string from
** somewhere in the top half of Macro_buffer to the bottom half
** of Macro_buffer.
** Note that the restriction on the size of an expanded macro is
** stricter than the limit on an L_STRING length. An expanded
** macro is limited to around 1019 bytes, but an L_STRING is
** limited to 2043 bytes.
*/
for(;;) {
switch(CHARMAP(c = GETCH())) {
case LX_NL:
UNGETCH();
strcpy (Msg_Text, GET_MSG (2001));
error(2001);
/*
** FALLTHROUGH
*/
case LX_DQUOTE:
*p_buf++ = L'\0';
yylval.yy_string.str_len = (USHORT)(p_buf-yylval.yy_string.str_ptr);
return;
break;
case LX_EOS:
if(handle_eos() != BACKSLASH_EOS) {
continue;
}
if(InInclude) {
break;
}
else {
c = (WCHAR)escape(get_non_eof()); /* process escaped char */
}
break;
}
if(p_buf - Macro_buffer > LIMIT_STRING_LENGTH) {
if( not_warned_yet ) {
strcpy (Msg_Text, GET_MSG (4009));
warning(4009); /* string too big, truncating */
not_warned_yet = FALSE;
}
} else {
*p_buf++ = c;
}
}
}
/*
** do_newline : does work after a newline has been found.
*/
void
do_newline(
void
)
{
++Linenumber;
for(;;) {
switch(CHARMAP(GETCH())) {
case LX_BOM: // ignore Byte Order Mark
break;
case LX_CR:
break;
case LX_POUND:
preprocess();
break;
case LX_SLASH:
if( ! skip_comment()) {
goto leave_do_newline;
}
break;
case LX_NL:
if ((lCPPTotalLinenumber++ & RC_PREPROCESS_UPDATE) == 0)
UpdateStatus(1, lCPPTotalLinenumber);
Linenumber++;
// must manually write '\r' with '\n' when writing 16-bit strings
if( Prep ) { /* preprocessing only */
myfwrite(L"\r", sizeof(WCHAR), 1, OUTPUTFILE);
}
/*
** FALLTHROUGH
*/
case LX_WHITE:
if( Prep ) { /* preprocessing only, output whitespace */
myfwrite(&(PREVCH()), sizeof(WCHAR), 1, OUTPUTFILE);
} else {
do {
;
} while(LXC_IS_WHITE(GETCH()));
UNGETCH();
}
break;
case LX_EOS:
if(PREVCH() == EOS_CHAR || PREVCH() == CONTROL_Z) {
if(io_eob()) { /* leaves us pointing at a valid char */
return;
}
break;
}
if(checknl()) {
continue;
}
/* it's a backslash */
/*
** FALLTHROUGH
*/
default: /* first non-white is not a '#', leave */
leave_do_newline:
UNGETCH();
return;
}
}
}
/*
* GETNUM - Get a number from the input stream.
* ARGUMENTS
* radix - the radix of the number to be accumulated. Can only be 8, 10,
* or 16
* pval - a pointer to a VALUE union to be filled in with the value
* RETURNS - type of the token (L_CINTEGER or L_CFLOAT)
* SIDE EFFECTS -
* does push back on the input stream.
* writes into pval by reference
* uses buffer Reuse_W
* DESCRIPTION -
* Accumulate the number according to the rules for each radix.
* Set up the format string according to the radix (or distinguish
* integer from float if radix is 10) and convert to binary.
* AUTHOR - Ralph Ryan, Sept. 8, 1982
* MODIFICATIONS - none
*/
token_t
getnum(
REG WCHAR c
)
{
REG WCHAR *p;
WCHAR *start;
int radix;
token_t tok;
value_t value;
tok = L_CINTEGER;
start = (Tiny_lexer_nesting ? Exp_ptr : Reuse_W);
p = start;
if( c == L'0' ) {
c = get_non_eof();
if( IS_X(c) ) {
radix = 16;
if( Prep ) {
*p++ = L'0';
*p++ = L'x';
}
for(c = get_non_eof(); LXC_IS_XDIGIT(c); c = get_non_eof()) {
/* no check for overflow? */
*p++ = c;
}
if((p == Reuse_W) && (Tiny_lexer_nesting == 0)) {
strcpy (Msg_Text, GET_MSG (2153));
error(2153);
}
goto check_suffix;
} else {
radix = 8;
*p++ = L'0'; /* for preprocessing or 0.xxx case */
}
} else {
radix = 10;
}
while( LXC_IS_DIGIT((WCHAR)c) ) {
*p++ = c;
c = get_non_eof();
}
if( IS_DOT(c) || IS_E(c) ) {
UNGETCH();
return(get_real(p));
}
check_suffix:
if( IS_EL(c) ) {
if( Prep ) {
*p++ = c;
}
c = get_non_eof();
if( IS_U(c) ) {
if(Prep) {
*p++ = c;
}
tok = L_LONGUNSIGNED;
} else {
tok = L_LONGINT;
UNGETCH();
}
} else if( IS_U(c) ) {
if( Prep ) {
*p++ = c;
}
c = get_non_eof();
if( IS_EL(c) ) {
if( Prep ) {
*p++ = c;
}
tok = L_LONGUNSIGNED;
} else {
tok = L_CUNSIGNED;
UNGETCH();
}
} else {
UNGETCH();
}
*p = L'\0';
if( start == Exp_ptr ) {
Exp_ptr = p;
return(L_NOTOKEN);
} else if( Prep ) {
myfwrite( Reuse_W, (size_t)(p - Reuse_W) * sizeof(WCHAR), 1, OUTPUTFILE);
return(L_NOTOKEN);
}
value.v_long = matol(Reuse_W,radix);
switch(tok) {
case L_CINTEGER:
tok = (radix == 10)
? c_size(value.v_long)
: uc_size(value.v_long)
;
break;
case L_LONGINT:
tok = l_size(value.v_long);
break;
case L_CUNSIGNED:
tok = ul_size(value.v_long);
break;
}
yylval.yy_tree = build_const(tok, &value);
return(tok);
}
/*
** get_real : gathers the real part/exponent of a real number.
** Input : ptr to the null terminator of the whole part
** pointer to receive value.
** Output : L_CFLOAT
** ASSUMES whole part is either at Exp_ptr or Reuse_W.
*/
token_t
get_real(
REG PWCHAR p
)
{
REG int c;
token_t tok;
c = get_non_eof();
if(Cross_compile && (Tiny_lexer_nesting == 0)) {
strcpy (Msg_Text, GET_MSG (4012));
warning(4012); /* float constant in cross compilation */
Cross_compile = FALSE; /* only one msg per file */
}
/*
** if the next char is a digit, then we've been called after
** finding a '.'. if this is true, then
** we want to find the fractional part of the number.
** if it's a '.', then we've been called after finding
** a whole part, and we want the fraction.
*/
if( LXC_IS_DIGIT((WCHAR)c) || IS_DOT(c) ) {
do {
*p++ = (WCHAR)c;
c = (int)get_non_eof();
} while( LXC_IS_DIGIT((WCHAR)c) );
}
if( IS_E((WCHAR)c) ) { /* now have found the exponent */
*p++ = (WCHAR)c; /* save the 'e' */
c = (WCHAR)get_non_eof(); /* skip it */
if( IS_SIGN(c) ) { /* optional sign */
*p++ = (WCHAR)c; /* save the sign */
c = (int)get_non_eof();
}
if( ! LXC_IS_DIGIT((WCHAR)c)) {
if( ! Rflag ) {
if(Tiny_lexer_nesting == 0) {
Msg_Temp = GET_MSG (2021);
SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, c);
error(2021); /* missing or malformed exponent */
}
*p++ = L'0';
}
} else {
do { /* gather the exponent */
*p++ = (WCHAR)c;
c = (int)get_non_eof();
} while( LXC_IS_DIGIT((WCHAR)c) );
}
}
if( IS_F((WCHAR)c) ) {
tok = L_CFLOAT;
if( Prep ) {
*p++ = (WCHAR)c;
}
} else if( IS_EL((WCHAR)c) ) {
tok = L_CLDOUBLE;
if( Prep ) {
*p++ = (WCHAR)c;
}
} else {
UNGETCH();
tok = L_CDOUBLE;
}
*p = L'\0';
if( Tiny_lexer_nesting > 0 ) {
Exp_ptr = p;
return(L_NOTOKEN);
}
else if( Prep ) {
myfwrite( Reuse_W, (size_t)(p - Reuse_W) * sizeof(WCHAR), 1, OUTPUTFILE);
return(L_NOTOKEN);
}
/*
** reals aren't used during preprocessing
*/
return(tok);
}
/*
** matol : ascii to long, given a radix.
*/
long
matol(
REG PWCHAR p_start,
REG int radix
)
{
long result, old_result;
unsigned int i;
old_result = result = 0;
while(*p_start) {
result *= radix;
i = ctoi(*p_start);
if( ((int)i >= radix) && (! Prep) ) {
Msg_Temp = GET_MSG (2020);
SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, *p_start, radix);
error(2020); /* illegal digit % for base % */
}
result += i;
p_start++;
if(radix == 10) {
if(result < old_result) {
p_start--; /* fix the string ptr since we have overflowed */
break;
}
} else if(*p_start) {
/*
** the loop is not finished.
** we will multiply by the radix again
** check the upper bits. if they're on, then
** that mult will overflow the value
*/
if(radix == 8) {
if(result & 0xe0000000) {
break;
}
} else if(result & 0xf0000000) {
break;
}
}
old_result = result;
}
if(*p_start) {
strcpy (Msg_Text, GET_MSG (2177));
error(2177); /* constant too big */
result = 0;
}
return(result);
}
/*
** uc_size : returns 'int' or 'long' (virtual unsigned).
** if their are no bits in the upper part of the value,
** then it's an int. otherwise, it's a long.
** this is valid too if target sizeof(int) != sizeof(long).
** then L_CINTEGER and L_LONGINT are synonymous.
*/
token_t
uc_size(
long value
)
{
return((token_t)((value > INT_MAX) ? L_CUNSIGNED : L_CINTEGER));
}
/*
** c_size : returns 'int' or 'long' for signed numbers.
** if the sign bit of the lower word is on or any bits
** in the upper word are on, then we must use 'long'.
*/
token_t
c_size(
long value
)
{
return((token_t)((ABS(value) > INT_MAX) ? L_LONGINT : L_CINTEGER));
}
/*
** l_size : returns 'longint' or 'longunsigned' for long numbers.
** if the sign bit of the high word is on this is 'longunsigned';
*/
token_t
l_size(
long value
)
{
return((token_t)((value > LONG_MAX) ? L_LONGUNSIGNED : L_LONGINT));
}
/*
** ul_size : returns 'unsigned' or 'longunsigned' for unsigned numbers.
** if the number can't be represented as unsigned, it is promoted to
** unsignedlong.
*/
token_t
ul_size(
long value
)
{
return((token_t)((ABS(value) > UINT_MAX-1) ? L_LONGUNSIGNED : L_CUNSIGNED));
}
/*
** ctoi : character to int.
*/
int
ctoi(
int c
)
{
if(LXC_IS_DIGIT((WCHAR)c)) {
return(c - L'0');
} else {
return(towupper((WCHAR)c) - towupper(L'A') + 10);
}
}
/*
* ESCAPE - get an escaped character
* ARGUMENTS - none
* RETURNS - value of escaped character
* SIDE EFFECTS - may push back input
* DESCRIPTION - An escape ( '\' ) was discovered in the input. Translate
* the next symbol or symbols into an escape sequence.
* AUTHOR - Ralph Ryan, Sept. 7, 1982
* MODIFICATIONS - none
*/
int
escape(
REG int c
)
{
REG int value;
int cnt;
escape_again:
if( LXC_IS_ODIGIT((WCHAR)c) ) {/* \ooo is an octal number, must fit into a byte */
cnt = 1;
for(value = ctoi(c), c = get_non_eof();
(cnt < 3) && LXC_IS_ODIGIT((WCHAR)c);
cnt++, c = get_non_eof()
) {
value *= 8;
value += ctoi(c);
}
if( ! Prep ) {
if(value > 255) {
Msg_Temp = GET_MSG (2022);
SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, value);
error (2022);
}
}
UNGETCH();
return((char)value);
}
switch( c ) {
case L'a':
return(ALERT_CHAR);
break;
case L'b':
return(L'\b');
break;
case L'f':
return(L'\f');
break;
case L'n':
return fMacRsrcs ? (L'\r') : (L'\n');
break;
case L'r':
return fMacRsrcs ? (L'\n') : (L'\r');
break;
case L't':
return(L'\t');
break;
case L'v':
return(L'\v');
break;
case L'x':
cnt = 0;
value = 0;
c = get_non_eof();
while((cnt < 3) && LXC_IS_XDIGIT((WCHAR)c)) {
value *= 16;
value += ctoi(c);
c = get_non_eof();
cnt++;
}
if(cnt == 0) {
strcpy (Msg_Text, GET_MSG (2153));
error (2153);
}
UNGETCH();
return((char)value); /* cast to get sign extend */
default:
if(c != L'\\') {
return(c);
} else {
if(checknl()) {
c = get_non_eof();
goto escape_again;
} else {
return(c);
}
}
}
}
/*
* CHECKOP - Check whether the next input character matches the argument.
* ARGUMENTS
* short op - the character to be checked against
* RETURNS
* TRUE or FALSE
* SIDE EFFECTS
* Will push character back onto the input if there is no match.
* DESCRIPTION
* If the next input character matches op, return TRUE. Otherwise
* push it back onto the input.
* AUTHOR - Ralph Ryan, Sept. 9, 1982
* MODIFICATIONS - none
*/
int
checkop(
int op
)
{
if(op == (int)get_non_eof()) {
return(TRUE);
}
UNGETCH();
return(FALSE);
}
/*
** DumpSlashComment : while skipping a comment, output it.
*/
void
DumpSlashComment(
VOID
)
{
if( ! Cflag ) {
skip_NLonly();
return;
}
myfwrite(L"//", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
for(;;) {
WCHAR c;
switch(CHARMAP(c = GETCH())) {
// must manually write '\r' with '\n' when writing 16-bit strings
//case LX_CR:
// continue;
case LX_EOS:
handle_eos();
continue;
case LX_NL:
UNGETCH();
return;
}
myfwrite(&c, sizeof(WCHAR), 1, OUTPUTFILE);
}
}
/*
** dump_comment : while skipping a comment, output it.
*/
void
dump_comment(
void
)
{
if( ! Cflag ) {
skip_1comment();
return;
}
myfwrite(L"/*", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
for(;;) {
WCHAR c;
switch(CHARMAP(c = GETCH())) {
case LX_STAR:
if(checkop(L'/')) {
myfwrite(L"*/", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
return;
}
break;
case LX_EOS:
handle_eos();
continue;
case LX_NL:
Linenumber++;
break; /* output below */
// must manually write '\r' with '\n' when writing 16-bit strings
//case LX_CR:
// continue;
}
myfwrite(&c, sizeof(WCHAR), 1, OUTPUTFILE);
}
}
/* skip_comment() */
int
skip_comment(
void
)
{
if(checkop(L'*')) {
skip_1comment();
return(TRUE);
} else if(checkop(L'/')) {
skip_NLonly();
return(TRUE);
} else {
return(FALSE);
}
}
/*
** skip_1comment : we're called when we're already in a comment.
** we're looking for the comment close. we also count newlines
** and output them if we're preprocessing.
*/
void
skip_1comment(
void
)
{
UINT c;
for(;;) {
c = GETCH();
if(c == L'*') {
recheck:
c = GETCH();
if(c == L'/') { /* end of comment */
return;
} else if(c == L'*') {
/*
** if we get another '*' go back and check for a slash
*/
goto recheck;
} else if(c == EOS_CHAR) {
handle_eos();
goto recheck;
}
}
/*
** note we fall through here. we know this baby is not a '*'
** we used to unget the char and continue. since we check for
** another '*' inside the above test, we can fall through here
** without ungetting/getting and checking again.
*/
if(c <= L'\n') {
/*
** hopefully, the above test is less expensive than doing two tests
*/
if(c == L'\n') {
Linenumber++;
if(Prep) {
myfwrite(L"\r\n", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
}
} else if(c == EOS_CHAR) {
handle_eos();
}
}
}
}
/*
** skip_cwhite : while the current character is whitespace or a comment.
** a newline is NOT whitespace.
*/
WCHAR
skip_cwhite(
void
)
{
REG WCHAR c;
skip_cwhite_again:
while((c = GETCH()) <= L'/') { /* many chars are above this */
if(c == L'/') {
if( ! skip_comment()) {
return(L'/');
}
} else if(c > L' ') { /* char is between '!' and '.' */
return(c);
} else {
switch(CHARMAP(c)) {
case LX_EOS:
handle_eos();
break;
case LX_WHITE:
continue;
break;
case LX_CR:
continue;
break;
default:
return(c);
break;
}
}
}
if((c == L'\\') && (checknl())) {
goto skip_cwhite_again;
}
return(c);
}
/*
** checknl : check for newline, skipping carriage return if there is one.
** also increments Linenumber, so this should be used by routines which
** will not push the newline back in such a way that rawtok() will be invoked,
** find the newline and do another increment.
*/
int
checknl(
void
)
{
REG WCHAR c;
for(;;) {
c = GETCH();
if(c > L'\r') {
UNGETCH();
return(FALSE);
}
switch(c) {
case L'\n':
Linenumber++;
// must manually write '\r' with '\n' when writing 16-bit strings
if( Prep ) {
myfwrite(L"\r\n", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
}
return(TRUE);
break;
case L'\r':
continue;
break;
case EOS_CHAR:
handle_eos();
PREVCH() = L'\\'; /* M00HACK - needs pushback */
continue;
break;
default:
UNGETCH();
return(FALSE);
break;
}
}
}
/*
** get_non_eof : get a real char.
*/
WCHAR
get_non_eof(
void
)
{
WCHAR c;
get_non_eof_again:
while((c = GETCH()) <= L'\r') {
if(c == L'\r') {
continue;
} else if(c != EOS_CHAR) {
break;
}
if(Tiny_lexer_nesting > 0) {
break;
}
handle_eos();
}
if((c == L'\\') && (checknl())) {
goto get_non_eof_again;
}
return(c);
}