2020-09-30 16:53:55 +02:00

796 lines
22 KiB
C++

/*++
Copyright (c) 1997-2000 Microsoft Corporation
Module Name:
EzParse.cpp
Abstract:
Poor man C/C++/any file parser.
Author:
Gor Nishanov (gorn) 03-Apr-1999
Revision History:
Gor Nishanov (gorn) 03-Apr-1999 -- hacked together to prove that this can work
GorN: 29-Sep-2000 - fix enumeration bug
GorN: 29-Sep-2000 - add support for KdPrintEx like function
GorN: 09-Oct-2000 - fixed "//" in the string bug
GorN: 23-Oct-2000 - IGNORE_CPP_COMMENT, IGNORE_POUND_COMMENT options added
GorN: 16-Apr-2001 - Properly handle \" within a string
ToDo:
Clean it up
--*/
#define STRICT
#include <stdio.h>
#include <windows.h>
#pragma warning(disable: 4100)
#include <algorithm>
#include <xstring>
#include "ezparse.h"
DWORD ErrorCount = 0;
PEZPARSE_CONTEXT EzParseCurrentContext = NULL;
// To force build tool to recognize our errors
#define BUILD_PREFIX_FNAME "cl %s\n"
#define BUILD_PREFIX "cl wpp\n"
void ExParsePrintErrorPrefix(FILE* f, char * func)
{
++ErrorCount;
if (EzParseCurrentContext) {
fprintf(f,BUILD_PREFIX_FNAME "%s(%d) : error : (%s)",
EzParseCurrentContext->filename,
EzParseCurrentContext->filename,
EzGetLineNo(EzParseCurrentContext->currentStart, EzParseCurrentContext),
func);
} else {
fprintf(f,BUILD_PREFIX "wpp : error : (%s)", func);
}
}
LPCSTR skip_stuff_in_quotes(LPCSTR q, LPCSTR begin)
{
char ch = *q;
if (q > begin) {
if (q[-1] == '\\') {
return q - 1;
}
}
for(;;) {
if (q == begin) {
return 0;
}
--q;
if (*q == ch && ( (q == begin) || (q[-1] != '\\') ) ) {
return q;
}
}
}
void
adjust_pair( STR_PAIR& str )
/*++
Shrink the pair to remote leading and trailing whitespace
*/
{
while (str.beg < str.end && isspace(*str.beg)) { ++str.beg; }
while (str.beg < str.end && isspace(str.end[-1])) { --str.end; }
}
void
remove_cpp_comment(STR_PAIR& str)
{
LPCSTR p = str.beg;
// printf("rcb: %s\n", std::string(str.beg, str.end).c_str());
// let's cut the comment in the beginning of the string
for(;;) {
// skip the whitespace
for(;;) {
if (p == str.end) return;
if (!isspace(*p)) break;
++p;
}
str.beg = p;
if (p + 1 == str.end) return;
if (p[0] == '/' && p[1] == '/') {
// we have a comment. Need to get to the end of the comment
p += 2;
// printf("rcd: %s %s\n", std::string(str.beg, p).c_str(), std::string(p,str.end).c_str());
for(;;) {
if (p == str.end) return;
if (*p == '\r' || *p == '\n') {
str.beg = p;
break;
}
++p;
}
} else {
// no leading comment
break;
}
}
// printf("rcc: %s %s\n", std::string(str.beg, p).c_str(), std::string(p,str.end).c_str());
for(;;) {
if (p == str.end) return;
if (*p == '"') {
// don't look for comments within a string
for(;;) {
if (++p == str.end) return;
if (*p == '"' && p[-1] != '\\') break;
}
++p;
continue;
}
if (p + 1 == str.end) return;
if (p[0] == '/')
if (p[1] == '/') break;
else p += 2;
else
p += 1;
}
str.end = p;
// printf("rce: %s\n", std::string(str.beg, str.end).c_str());
}
DWORD
ScanForFunctionCallsEx(
IN LPCSTR begin,
IN LPCSTR end,
IN EZPARSE_CALLBACK Callback,
IN PVOID Context,
IN OUT PEZPARSE_CONTEXT ParseContext,
IN DWORD Options
)
/*++
Routine Description:
Scan the buffer for expressions that looks like function calls,
i.e name(sd,sdf,sdf,sdf,sdf); . It will treat variable declaration
with constructor call as a function call as well.
Inputs:
begin, end -- pointers to the beginning and the end of the buffer
Callback -- to be called for every function
Context -- opaque context to be passed to callback
ParseContext -- holds current parse state information
--*/
{
LPCSTR p = begin;
LPCSTR q, funcNameEnd;
DWORD Status = ERROR_SUCCESS;
bool double_par = FALSE;
no_match:
if (Options & NO_SEMICOLON) {
q = end;
Options &= ~NO_SEMICOLON;
} else {
do {
++p;
if (p == end) {
return Status;
}
} while ( *p != ';' );
// Ok. Now p points to ';' //
q = p;
}
do {
if (--q <= begin) {
goto no_match;
}
} while ( isspace(*q) );
// Now q points on the first non white space character //
// If it is not a ')' then we need to search for the next ';' //
if (*q != ')') {
goto no_match;
}
ParseContext->macroEnd = q;
// Ok. This is a function call (definition).
// Now, let's go and collect all the arguments of the first level and
// get to the name of the function
// HACKHACK
// We need a special case for functions that looks like
// KdPrintEx((Level, Indent, Msg, ...));
// Essentially, we need to treat them as
// KdPrintEx(Level, Indent, Msg, ...);
const char *r = q;
// check if we have ));
do {
if (--r <= begin) break; // no "));"
} while ( isspace(*r) );
double_par = r > begin && *r == ')';
if (double_par) {
q = r;
// we assume that this is KdPrint((a,b,c,d,...)); at the moment
// if our assumtion is wrong, we will retry the loop below
}
retry:
{
int level = 0;
LPCSTR ends[128], *current = ends;
STR_PAIR strs[128];
// LPCSTR closing_parenthisis = q;
*current = q;
for(;;) {
--q;
if (q <= begin) {
goto no_match;
}
switch (*q) {
case ',': if (!level) {
if (current - ends == 127) goto no_match;
*++current = q;
}
break;
case '(': if (level) --level; else goto maybe_match; break;
case ')': ++level; break;
case '\'':
case '"':
q = skip_stuff_in_quotes(q, begin); if(!q) goto no_match;
}
}
maybe_match:
if (current - ends == 127) goto no_match;
*++current = q;
funcNameEnd = q;
// now q point to '(' we need to find name of the function //
do {
--q;
if (q <= begin) {
goto no_match;
}
} while(isspace(*q));
// now q points to first not white character
if (double_par) {
// if we see )); and found a matching
// parenthesis for the inner one, we can have
// one of two cases
// 1) KdPrint((a,b,c,d,...));
// or
// 2) DebugPrint(a,b,(c,d));
// If it is the latter, we just need to
// retry the scanning, now using leftmost bracket as a starting point
if (*q != '(') {
// restore q to the rightmost parenthesis
q = ParseContext->macroEnd;
double_par = FALSE;
goto retry;
}
funcNameEnd = q;
// now q point to '(' we need to find name of the function //
do {
--q;
if (q <= begin) {
goto no_match;
}
} while(isspace(*q));
}
// now q points to first non white character
// BUGBUG '{' and '}' are allowed only in config files
if (*q == '}') {
for(;;) {
if (--q < begin) goto no_match;
if (*q == '{') break;
}
if (--q < begin) goto no_match;
}
if (!(isalpha(*q) || isdigit(*q) || *q == '_')) {
goto no_match;
}
do {
--q;
if (q <= begin) {
goto found;
}
} while ( isalpha(*q) || isdigit(*q) || *q == '_');
++q;
if (isdigit(*q)) {
goto no_match;
}
found:
if (Options & IGNORE_COMMENT)
// Verify that it is not a comment
// # sign in the beginning of the line
{
LPCSTR line = q;
//
// Find the beginning of the line or file
//
for(;;) {
if (line == begin) {
// Beginning of the file. Good enough
break;
}
if (Options & IGNORE_CPP_COMMENT && line[0] == '/' && line[1] == '/') {
// C++ comment. Ignore
goto no_match;
}
if (*line == 13 || *line == 10) {
++line;
break;
}
--line;
}
//
// If the first non-white character is #, ignore it
//
while (line <= q) {
if ( *line != ' ' && *line != '\t' ) {
break;
}
++line;
}
if (Options & IGNORE_POUND_COMMENT && *line == '#') {
goto no_match;
}
}
{
int i = 0;
strs[0].beg = q;
strs[0].end = funcNameEnd;
adjust_pair(strs[0]);
while (current != ends) {
// putchar('<');printrange(current[0]+1, current[-1]); putchar('>');
++i;
strs[i].beg = current[0]+1;
--current;
strs[i].end = current[0];
adjust_pair(strs[i]);
remove_cpp_comment(strs[i]);
}
ParseContext->currentStart = strs[0].beg;
ParseContext->currentEnd = strs[0].end;
ParseContext->doubleParent = double_par;
Status = Callback(strs, i+1, Context, ParseContext);
if (Status != ERROR_SUCCESS) {
return Status;
}
}
goto no_match;
}
// return ERROR_SUCCESS; // unreachable code
}
DWORD
ScanForFunctionCalls(
IN LPCSTR begin,
IN LPCSTR end,
IN EZPARSE_CALLBACK Callback,
IN PVOID Context,
IN OUT PEZPARSE_CONTEXT ParseContext
)
{
return ScanForFunctionCallsEx(
begin, end, Callback, Context,
ParseContext, IGNORE_COMMENT);
}
DWORD
EzGetLineNo(
IN LPCSTR Ptr,
IN OUT PEZPARSE_CONTEXT ParseContext
)
/*++
Computes a line number based on
an pointer within a buffer.
Last known lineno/pointer is cached in ParseContext
for performance
*/
{
int count = ParseContext->scannedLineCount;
LPCSTR downto = ParseContext->lastScanned;
LPCSTR p = Ptr;
if (downto > p) {
count = 1;
downto = ParseContext->start;
}
while (p > downto) {
if (*p == '\n') {
++count;
}
--p;
}
ParseContext->scannedLineCount = count;
ParseContext->lastScanned = Ptr;
return count;
}
const char begin_wpp[] = "begin_wpp";
const char end_wpp[] = "end_wpp";
const char define_[] = "#define";
const char enum_[] = "enum ";
enum {
begin_wpp_size = (sizeof(begin_wpp)-1),
end_wpp_size = (sizeof(end_wpp)-1),
define_size = (sizeof(define_)-1),
enum_size = (sizeof(enum_)-1),
};
typedef struct _SmartContext {
EZPARSE_CALLBACK Callback;
PVOID Context;
OUT PEZPARSE_CONTEXT ParseContext;
std::string buf;
} SMART_CONTEXT, *PSMART_CONTEXT;
void DoEnumItems(PSTR_PAIR name, LPCSTR begin, LPCSTR end, PSMART_CONTEXT ctx)
{
LPCSTR p,q;
ULONG value = 0;
STR_PAIR Item;
BOOL First = TRUE;
ctx->buf.assign("CUSTOM_TYPE(");
ctx->buf.append(name->beg, name->end);
ctx->buf.append(", ItemListLong");
p = begin;
while(begin < end && isspace(*--end)); // skip spaces
if (begin < end && *end != ',') ++end;
for(;p < end;) {
Item.beg = p;
q = p;
for(;;) {
if (q == end) {
goto enum_end;
}
if (*q == ',' || *q == '}') {
// valueless item. Use current
Item.end = q;
break;
} else if (*q == '=') {
// need to calc the value. Skip for now //
Item.end = q;
while (q < end && *q != ',') ++q;
break;
}
++q;
}
adjust_pair(Item);
if (Item.beg == Item.end) {
break;
}
if (First) {ctx->buf.append("("); First = FALSE;} else ctx->buf.append(",");
ctx->buf.append(Item.beg, Item.end);
if (q == end) break;
p = q+1;
++value;
}
enum_end:;
ctx->buf.append(") )");
ScanForFunctionCallsEx(
&ctx->buf[0], &ctx->buf[0] + ctx->buf.size(), ctx->Callback, ctx->Context,
ctx->ParseContext, NO_SEMICOLON);
Flood("enum %s\n", ctx->buf.c_str());
}
void DoEnum(LPCSTR begin, LPCSTR end, PSMART_CONTEXT Ctx)
{
LPCSTR p, q, current = begin;
for(;;) {
p = std::search(current, end, enum_, enum_ + enum_size);
if (p == end) break;
q = std::find(p, end, '{');
if (q == end) break;
// let's figure out enum name //
STR_PAIR name;
name.beg = p + enum_size;
name.end = q;
adjust_pair(name);
if ( *name.beg == '_' ) ++name.beg;
p = q+1; // past "{";
q = std::find(p, end, '}');
if (q == end) break;
if (name.end > name.beg) {
DoEnumItems(&name, p, q, Ctx);
} else {
ReportError("Cannot handle tagless enums yet");
}
current = q;
}
}
DWORD
SmartScan(
IN LPCSTR begin,
IN LPCSTR end,
IN EZPARSE_CALLBACK Callback,
IN PVOID Context,
IN OUT PEZPARSE_CONTEXT ParseContext
)
{
LPCSTR block_start, block_end, current = begin;
SMART_CONTEXT Ctx;
Ctx.Callback = Callback;
Ctx.Context = Context;
Ctx.ParseContext = ParseContext;
for(;;) {
block_start = std::search(current, end, begin_wpp, begin_wpp + begin_wpp_size);
if (block_start == end) break;
current = block_start;
block_end = std::search(block_start, end, end_wpp, end_wpp + end_wpp_size);
if (block_end == end) break;
Flood("Block Found\n");
// determine block type //
// begin_wpp enum
// begin_wpp config
// begin_wpp func
// begin_wpp define
LPCSTR block_type = block_start + begin_wpp_size + 1;
Flood("block_type = %c%c%c%c\n", block_type[0],block_type[1],block_type[2],block_type[3]);
if (memcmp(block_type, "enum", 4) == 0) {
// do enum block //
DoEnum( block_type + 4, block_end, &Ctx );
} else if (memcmp(block_type, "config", 6) == 0) {
// do config block //
ScanForFunctionCallsEx(block_type + 6, block_end, Callback, Context, ParseContext, IGNORE_POUND_COMMENT);
} else if (memcmp(block_type, "func", 4) == 0) {
LPCSTR func_start, func_end;
current = block_type + 6;
for(;;) {
func_start = std::search(current, block_end, define_, define_ + define_size);
if (func_start == block_end) break;
func_start += define_size;
while (isspace(*func_start)) {
if(++func_start == block_end) goto no_func;
}
func_end = func_start;
while (!isspace(*func_end)) {
if(*func_end == '(') break;
if(++func_end == block_end) goto no_func;
}
if(*func_end != '(') {
Ctx.buf.assign(func_start, func_end);
Ctx.buf.append("(MSGARGS)");
} else {
func_end = std::find(func_start, block_end, ')');
if (func_end == block_end) break;
++func_end; // include ")"
Ctx.buf.assign(func_start, func_end);
}
Flood("Func %s\n", Ctx.buf.c_str());
ScanForFunctionCallsEx(
Ctx.buf.begin(), Ctx.buf.end(), Callback, Context,
ParseContext, NO_SEMICOLON);
current = func_end;
}
no_func:;
} else if (memcmp(block_type, "define", 6) == 0) {
// do define block
} else {
ReportError("Unknown block");
}
current = block_end + end_wpp_size;
}
if (current == begin) {
// file without marking, let's do default processing
Unusual("Reverting back to plain scan\n");
ScanForFunctionCalls(begin, end, Callback, Context, ParseContext);
}
return ERROR_SUCCESS;
}
DWORD
EzParse(
IN LPCSTR filename,
IN EZPARSE_CALLBACK Callback,
IN PVOID Context)
{
// return EzParseEx(filename, SmartScan, Callback, Context);
return EzParseEx(filename, ScanForFunctionCalls, Callback, Context, IGNORE_POUND_COMMENT);
}
DWORD
EzParseWithOptions(
IN LPCSTR filename,
IN EZPARSE_CALLBACK Callback,
IN PVOID Context,
IN DWORD Options)
{
return EzParseEx(filename, ScanForFunctionCalls, Callback, Context, Options);
}
DWORD
EzParseEx(
IN LPCSTR filename,
IN PROCESSFILE_CALLBACK ProcessData,
IN EZPARSE_CALLBACK Callback,
IN PVOID Context,
IN DWORD Options
)
{
DWORD Status = ERROR_SUCCESS;
HANDLE mapping;
HANDLE file = CreateFileA(filename,
GENERIC_READ, FILE_SHARE_READ, NULL,
OPEN_EXISTING, 0, 0);
if (file == INVALID_HANDLE_VALUE) {
Status = GetLastError();
ReportError("Cannot open file %s, error %u\n", filename, Status );
return Status;
}
DWORD size = GetFileSize(file, 0);
mapping = CreateFileMapping(file,0,PAGE_READONLY,0,0, 0);
if (!mapping) {
Status = GetLastError();
ReportError("Cannot create mapping, error %u\n", Status );
CloseHandle(file);
return Status;
}
PCHAR buf = (PCHAR)MapViewOfFileEx(mapping, FILE_MAP_READ,0,0,0,0);
if (buf) {
EZPARSE_CONTEXT ParseContext;
ZeroMemory(&ParseContext, sizeof(ParseContext) );
ParseContext.start = buf;
ParseContext.filename = filename;
ParseContext.scannedLineCount = 1;
ParseContext.lastScanned = buf;
ParseContext.previousContext = EzParseCurrentContext;
ParseContext.Options = Options;
EzParseCurrentContext = &ParseContext;
Status = (*ProcessData)(buf, buf + size, Callback, Context, &ParseContext);
EzParseCurrentContext = ParseContext.previousContext;
UnmapViewOfFile( buf );
} else {
Status = GetLastError();
ReportError("MapViewOfFileEx failed, error %u\n", Status );
}
CloseHandle(mapping);
CloseHandle(file);
return Status;
}
DWORD
EzParseResourceEx(
IN LPCSTR ResName,
IN PROCESSFILE_CALLBACK ProcessData,
IN EZPARSE_CALLBACK Callback,
IN PVOID Context)
{
DWORD Status = ERROR_SUCCESS;
HRSRC hRsrc;
hRsrc = FindResource(
NULL, //this Module
ResName,
RT_RCDATA);
if (hRsrc == NULL) {
Status = GetLastError();
ReportError("Cannot open resource %s, error %u\n", ResName, Status );
return Status;
}
HGLOBAL hGlobal = LoadResource(NULL, hRsrc);
if (!hGlobal) {
Status = GetLastError();
ReportError("LockResource failed, error %u\n", Status );
return Status;
}
DWORD size = SizeofResource(NULL, hRsrc);
PCHAR buf = (PCHAR)LockResource(hGlobal);
if (buf) {
EZPARSE_CONTEXT ParseContext;
ZeroMemory(&ParseContext, sizeof(ParseContext) );
ParseContext.start = buf;
ParseContext.filename = ResName;
ParseContext.scannedLineCount = 1;
ParseContext.lastScanned = buf;
ParseContext.previousContext = EzParseCurrentContext;
EzParseCurrentContext = &ParseContext;
Status = (*ProcessData)(buf, buf + size, Callback, Context, &ParseContext);
EzParseCurrentContext = ParseContext.previousContext;
} else {
Status = GetLastError();
ReportError("LockResource failed, error %u\n", Status );
}
// According to MSDN. There is no need to call Unlock/Free Resource
return Status;
}