NT4/private/windows/media/tools/extract/bfuncs.c
2020-09-30 17:12:29 +02:00

628 lines
18 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include "extract.h"
#include "tags.h"
#define SPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
/* Output routine error messages */
char errOut[] = "%s: Error writing to file.\n";
/* Standard templated error messages */
static char errmsg[] = "%s (%u): %s\n";
/* File private functions
*/
static WORD CommonGetBlock(NPSourceFile sf, PSTR p);
/*
* @doc EXTRACT
*
* @api void | OutputTag | Print a tag name to the output file.
*
* @parm NPSourceFile | sf | Specifies the source file buffer block.
*
* @parm WORD | wBlock | Specifies the current outerlevel block type.
*
* @parm WORD | wTag | Gives the index of the tag to print.
*
* @comm Prints the innerlevel block tag specified by <p wTag>, as
* determined from the global tag array. The output tag printed is
* affected by the current outerlevel block type, so that different
* outerlevel blocks will generate different output tags for the same
* input tag.
*
* The tag text is followed by a tab character. No output will occur if
* the global fNoOutput flag is True.
*
*/
void OutputTag(NPSourceFile sf, WORD wBlock, WORD wTag)
{
if (fNoOutput)
return;
/* Output text, and if error occurs, exit() for now. HACK! */
putc(TAG, fpOutput);
assert(wBlock < NUM_LEVELS);
/* Make sure there's a valid output tag to print */
assert(DocTags[wBlock][wTag] != NULL);
if (fputs(DocTags[wBlock][wTag], fpOutput)) {
fprintf(stderr, errOut, sf->fileEntry->filename);
exit(4);
}
putc('\t', fpOutput);
}
/*
* @doc EXTRACT
* @api void | OutputTagText | Print a tag to the output file, where
* the tag is specified by an immediate string.
*
* @parm NPSourceFile | sf | Specifies the source file buffer block.
* @parm PSTR | szTag | Specifies the tag to output.
*
* @comm Prints tag <p szTag> to the output file. The tag text is
* followed by a tab character. <p szTag> should not include the tag
* prefix character (ie the '@') sign, as this is printed automatically.
* No output will occur if the global flag fNoOutput is set.
*
*/
void OutputTagText(NPSourceFile sf, PSTR szTag)
{
if (fNoOutput)
return;
/* Output text, and if error occurs, exit() for now. HACK! */
putc(TAG, fpOutput);
if (fputs(szTag, fpOutput)) {
fprintf(stderr, errOut, sf->fileEntry->filename);
exit(4);
}
putc('\t', fpOutput);
}
/*
* @doc EXTRACT
* @api void | OutputRegion | Print the text between the point and
* the mark, inclusive.
*
* @parm NPSourceFile | sf | Specifies the source file buffer block,
* containing the output buffer, point, and mark.
* @parm char | chPost | Specifies character with which to output
* after outputing the text region. Usually a newline. This character
* is ignored if NULL.
*
* @comm Prints the region given by the pt and mark fields of <p sf>.
* The text printed is inclusive from the point to the mark. The
* <p chPost> character is appended to the output if <p chPost> is
* non-NULL (useful for printing newlines or tabs).
*
* If a write error occurs, an error message is printed to stderr and
* the program exited.
*
* No output occurs if the global fNoOutput flag is TRUE.
*
*/
void OutputRegion(NPSourceFile sf, char chPost)
{
char c;
if (fNoOutput)
return;
/* Save char following mark, replace with NULL for printing */
if (*sf->mark) {
c = *(sf->mark + 1);
*(sf->mark + 1) = '\0';
}
if (fputs(sf->pt, fpOutput)) {
TextOutputError:
fprintf(stderr, errOut, sf->fileEntry->filename);
exit(4);
}
/* Send newline if one was asked for */
if (chPost)
if (EOF == putc(chPost, fpOutput))
goto TextOutputError;
/* Restored NULLed over character */
if (*sf->mark)
*(sf->mark + 1) = c;
}
/*
* @doc EXTRACT
* @api void | OutputText | Outputs an arbitrary text string to the
* output file.
*
* @parm NPSourceFile | sf | Specifies the source file buffer block.
* @parm PSTR | szText | Specifies the text string to print.
*
* @comm Prints <p szText> to the output file. If a write error
* occurs, an error message is printed and the program exited. If the
* global fNoOutput flag is set, no output occurs. No newlines or other
* formatting characters are appended to the output.
*
*/
void OutputText(NPSourceFile sf, PSTR szText)
{
if (fNoOutput)
return;
if (fputs(szText, fpOutput)) {
TextOutputError:
fprintf(stderr, errOut, sf->fileEntry->filename);
exit(4);
}
}
/*
* @doc EXTRACT
* @api void | CopyRegion | Copies the current region from point to
* mark inclusive into a null terminated buffer.
* @parm NPSourceFile | sf | Specifies the source file buffer block.
* @parm PSTR | buf | Pointer to buffer into which region will be
* placed.
* @parm WORD | wLen | Length in bytes of buffer <p buf>.
*
* @comm Copies the region from point to mark inclusive into the
* <p buf>. Up to <p wLen> - 1 characters will be copied, and <p buf> is
* guaranteed to be NULL terminated.
*
*/
void CopyRegion(NPSourceFile sf, PSTR buf, WORD wLen)
{
PSTR p;
PSTR end;
/* Fixup end to smaller of length of buffer, or region to copy */
end = sf->pt + (int) min(wLen, ((int) (sf->mark - sf->pt)));
for (p = sf->pt; *p && p < end; *buf++ = *p++);
*buf = '\0';
}
/*
* @doc EXTRACT
* @api BOOL | FindNextTag | Moves the point forward until it points
* to the next tag in a comment block, and moves the mark to the end of
* the tag word.
*
* @parm NPSourceFile | sf | Specifies the source file buffer block.
*
* @rdesc Returns TRUE if a tag was found, or FALSE if no tag was found
* in the comment block.
*
* @comm Starting from the current point, moves the point forward to
* the next tag in the block. The mark is moved to the end of the tag
* that is found. If no next tag exists in the buffer, FALSE is
* returned and the mark and point are undefined.
*
* Note that multiple calls to <f FindNextTag> without intervening calls
* to move the point will cause the same tag to be repeatadly
* found, as the search for tags begins at the point.
*
*/
BOOL FindNextTag(NPSourceFile sf)
{
PSTR p;
/* move forward until finding next tag, put point there */
p = sf->pt;
BogusNextTag:
for (; *p && *p != TAG; p++);
/* Make sure that this is a tag by testing for a \n before the TAG char */
if (p > sf->lpbuf && *(p-1) != '\n') {
p++;
goto BogusNextTag;
}
if (!*p)
return FALSE; // end of comment block!
p++;
if (!(*p && !SPACE(*p))) {
sf->mark = p;
return FALSE;
}
/* save beginning of tag */
sf->pt = p - 1;
/* now move forward until finding next space, set mark there */
for (; *p && !SPACE(*p); p++);
sf->mark = p;
return TRUE;
}
/*
* @doc EXTRACT
* @api WORD | GetFirstBlock | Moves the point and mark to surround
* the first block of text following a tag that has been located with
* <f FindNextTag>.
*
* @parm NPSourceFile | sf | Identifies the source file buffer
* block.
*
* @rdesc If the call succeeds, the point is set to the start of the
* text block that immediately follows the tag. The mark is set to the
* end of this block, and either RET_ENDTAG or RET_ENDBLOCK is returned,
* depending on if there are no more blocks in the tag or if there is a block
* following respectively.
*
* If the call fails, the point is set to the start of the next tag or
* the end of the comment buffer if no more tags exist, and
* RET_EMPTYBLOCK is returned.
*
* In any case, if this function is followed by a call to
* <f FindNextTag>, no problems will result.
*
* @comm This call expects the point to be pointing the beginning of
* the tag upon entry (as setup by <f FindNextTag>). Error conditions
* should be checked upon exit from this function.
*
*/
WORD GetFirstBlock(NPSourceFile sf)
{
PSTR p;
p = sf->pt;
/* Assumes that I'm on beginning of tag */
assert(*p == TAG);
/* Move forward to first non-whitespace, to skip over tag */
for (; *p && !SPACE(*p); p++); // skip word
for (; *p && SPACE(*p); p++); // skip whitespace
/* Set point to this location, the beginning of the text */
sf->pt = p;
return CommonGetBlock(sf, p);
}
/*
* @doc EXTRACT
* @api WORD | GetNextBlock | Moves the point and mark to surround
* the next block of text of a particular tag.
*
* @parm NPSourceFile | sf | Identifies the source file buffer
* information.
*
* @rdesc If the call succeeds, the point is set to the start of the
* text block that follows the initial mark. The mark upon return is
* set the end of the next text block. Either RET_ENDTAG or
* RET_ENDBLOCK is returned.
*
* If the call fails due to a non existent block, or encountering the
* end of the comment buffer, RET_ENDCOMMENT is returned and the point
* is set to the start of the next tag or the end of the comment
* buffer.
*
* @comm This procedure, in combination with <f GetFirstBlock>, allows
* the tag reader to step through the text fields associated with a tag.
* Contiguous calls to <f GetNextBlock> are possible, which will
* move the region forward to surround each field. If the tag's text
* fields end prematurely, RET_EMPTYBLOCK will be returned as an error
* flag.
*
* Calls to <f GetNextBlock> may always be followed by a call to
* <f FindNextTag>.
*
*/
WORD GetNextBlock(NPSourceFile sf)
{
PSTR p;
WORD ret;
/* Entry: mark is at end of previous block of text. Move forward
* to find the start of the next block (the one we want).
*/
p = sf->mark;
/* If I'm on a block char, then this is an empty block being exited,
* So we want to not skip whitespace
*/
if (*p != BLOCK)
p++;
/* Skip whitespace, till `|' char found */
for (; *p && SPACE(*p); p++);
/* This should be the start of next block. If not, then puke */
if (*p != BLOCK) {
sf->pt = sf->mark = p; // reset mark and point for FindNextTag.
return RET_EMPTYBLOCK;
}
/* Don't bother with END_COMMENT conditions (ie NULL), as CommonGetBlock
* will return RET_EMPTYBLOCK for this case. The next FindNextTag()
* will then fail, causing a general comment buffer failure to result!
*/
#if 0
if (!*p) {
sf->pt = p;
return RET_ENDCOMMENT;
}
#endif
/* Skip more whitespace, to start of actual text, set point there */
/* (if this under EOF, no pt advance is done */
if (*p) // skip the '|' char if there is one.
p++;
for (; *p && SPACE(*p); p++);
sf->pt = p; // point at beginning of text
return CommonGetBlock(sf, p);
}
/*
* @doc EXTRACT
* @api WORD | CommonGetBlock | Common block searcher routine for use
* by <f GetFirstBlock> and <f GetNextBlock>.
*
* @parm NPSourceFile | sf | Blah.
* @parm PSTR | p | Point to start searching for the beginning of a
* text block from.
*
* @rdesc Returns RET_ENDBLOCK when there are text blocks following
* this tag, RET_ENDTAG when no more text blocks follow for this tag, or
* RET_EMPTYBLOCK when this block has no text. Current region (point to
* mark inclusive) is set to the selected block.
*
* @comm Performs magic. This does the real work for <f GetNextBlock>
* and <f GetFirstBlock>.
*
*/
static WORD CommonGetBlock(NPSourceFile sf, PSTR p)
{
PSTR porig;
PSTR psave;
WORD ret;
/* Entry: Save the initial p, as this is assumed to be the
* start of the current block.
*/
porig = p;
/* Scan forward until end of this block, either @ or | or EOF */
GetBlockScan:
for (; *p && !(*p == TAG || *p == BLOCK); p++);
/* Make sure there isn't an escaped char kicking off the scan */
if (*p == BLOCK)
if (p > sf->lpbuf && *(p-1) == '\\') {
p++;
goto GetBlockScan;
}
/* Check the same thing for at characters */
if (*p == TAG)
/* Tag must be on start of new line, so if not there, kick it out */
if (p > sf->lpbuf && *(p-1) != '\n') {
p++;
goto GetBlockScan;
}
/* Encountered another tag, or another block. For both, backup to
* last non-white character, set mark there. Return appropriate
* condition codes.
*/
ret = RET_ENDTAG; // the default return value.
if (*p == BLOCK)
ret = RET_ENDBLOCK; // if encountered another block following
/* Now back up whitespaces until last non-whitespace is found.
* If we end up backing up over the original setting of p on entry,
* then this is an empty block, and return error condition.
*/
psave = p; // hang onto this location, if EMPTYBLOCK occurs.
for (p--; *p && SPACE(*p) && p >= porig; p--);
if (p < porig) { // emptyblock, so pt = end of prev block.
sf->mark = sf->pt = psave; // point to next tag
return RET_EMPTYBLOCK;
}
else { // normal backed up to end of block, set mark there.
sf->mark = p;
return ret;
}
}
/*
* @doc EXTRACT
* @api void | FixLineCounts | Updates the line counts of the current
* point and mark for error reporting purposes.
*
* @parm NPSourceFile | sf | Blah.
*
* @parm PSTR | pt | Point to return the line number of. This must be
* a valid point within the comment buffer of <p sf>.
*
* @rdesc Returns the line number of point <p pt> within the comment
* buffer of <p sf>. Newlines are counted to determine the line offset
* within the buffer, and the resulting number of newlines added to the
* initial line number of the first line of the comment buffer. This
* value is returned. It is thus important for other tag reader
* routines not to alter the original comment buffer, as the line number
* returned from this routine would then be invalid.
*
*/
WORD FixLineCounts(NPSourceFile sf, PSTR pt)
{
PSTR c;
WORD w;
/* Update the line counts for the point and mark by counting
* newlines in the buffer
*/
w = 0;
for (c = sf->lpbuf; c <= pt; c++) {
if (*c == '\n')
w++;
if (c == pt)
return (sf->wLineBuf + w);
}
/* something bogus happened */
return 0;
}
/*
* @doc EXTRACT
* @api void | PrintError | Prints an error message in a standard
* format, and sets the exit condition flag for the source file block.
*
* @parm NPSourceFile | sf | Specifies the source file buffer block.
* @parm PSTR | szMessage | Error message to print.
* @parm BOOL | fExit | Indicates whether this is a fatal exit. If
* TRUE, the program will exit when the current file has been completely
* parsed.
*
* @comm Prints the source file filename and the line number of the
* current point to standard error, followed by <p szMessage>.
*
*/
void PrintError(NPSourceFile sf, PSTR szMessage, BOOL fExit)
{
WORD w;
w = FixLineCounts(sf, sf->pt);
fprintf(stderr, errmsg, sf->fileEntry->filename, w, szMessage);
if (fExit)
sf->fExitAfter = TRUE;
}
/*
* @doc EXTRACT
* @api WORD | ProcessWordList | Process a whitespace or comma
* separated list of words following a tag, formatting
* them as a space separated list of words.
*
* @parm NPSourceFile | sf | Blah.
* @parm PSTR * | bufPt | Pointer to a buffer pointer, which should
* initially contain a near buffer obtained with <f NearMalloc>, where
* the formatted word list will be placed. The buffer pointed to will
* be automatically expanded as necessary.
*
* @parm BOOL | fCap | Specifies whether to convert to uppercase
* the processed list of words.
*
* @rdesc Returns either RET_ENDBLOCK or RET_ENDTAG, depending on
* whether there are following blocks within the tag's text or not,
* respectively. (What a horrible sentence). The point and mark will
* be at the end of the text block upon return. If there is no text
* block following the tag, then RET_EMPTYBLOCK is returned, and the
* point and mark point to the next tag in the comment block, or the
* end of the comment block.
*
*/
#define SEPSPACE(c) ((c)==' ' || (c)=='\n' || (c)=='\t' ||(c)==','||(c)==';')
WORD ProcessWordList(NPSourceFile sf, PSTR *bufPt, BOOL fCap)
{
WORD ret; // hold return code
PSTR pNew; // runner on copy buffer
PSTR pOldMark; // keep the old mark around
PSTR p; // runner on comment block
ret = RET_ENDTAG;
/* Get the text of the first block, ie the doclevel specification */
ret = GetFirstBlock(sf);
if (ret == RET_EMPTYBLOCK)
return ret;
/* Warn if there's extra text blocks on DOC tag, ie ret == RET_ENDBLOCK */
/* Grow the memory copy buffer if needed */
if (NearSize(*bufPt) < (int) (sf->mark - sf->pt) + 5)
*bufPt = NearRealloc(*bufPt, (WORD) (sf->mark - sf->pt) + 10);
/* Save away copy buffer status */
pNew = *bufPt;
pOldMark = sf->mark + 1; // save mark plus one
p = sf->pt;
while (1) {
/* skip whitespace before doc level word */
for (; p < pOldMark && SEPSPACE(*p); p++);
if (p >= pOldMark) {
dprintf("ProcessWordList: Breaking loop after space skip\n");
break;
}
/* Save this location, beginning of word, and move to end of word */
for (sf->pt = p; p < pOldMark && !SEPSPACE(*p); p++)
if (fCap)
*pNew++ = (char) toupper(*p);
else
*pNew++ = *p;
/* Put a space between the words, and then null terminate in
* case this is the last word in a list
*/
*pNew++ = ' ';
*pNew = '\0';
/* Check if we're at end of buffer */
if (p >= pOldMark) {
dprintf("ProcessWordList: Breaking loop after word copy.\n");
break; // get out of loop
}
} // while loop
/* Restore point and mark to the end of @doc text block */
sf->pt = sf->mark = pOldMark - 1;
return ret;
}
/*
* @doc EXTRACT
* @api void | OutputFileHeader | Prints an output file header using
* compiled in constants and system information.
*
* @parm FILE * | fpOut | File pointer to which to write header.
*
* @comm Currently, only the program name, version, and the current
* time (in UNIX <f asctime>) format. The file header is surrounded by
* header begin and end tags.
*
*/
#include <time.h>
#include "version.h"
void OutputFileHeader(FILE *fpOut)
{
time_t curtime;
fprintf(fpOut, "@%s\t\n", T2TEXT_BEGINHEADER);
fprintf(fpOut, "@%s\t%s\n", T2TEXT_EXTRACTID, VERSIONNAME);
fprintf(fpOut, "@%s\t%d.%d.%d\n", T2TEXT_EXTRACTVER, rmj, rmm, rup);
time(&curtime);
fprintf(fpOut, "@%s\t%s", T2TEXT_EXTRACTDATE, asctime(localtime(&curtime)));
fprintf(fpOut, "@%s\t\n", T2TEXT_ENDHEADER);
}