671 lines
18 KiB
C
671 lines
18 KiB
C
/*
|
|
* mktable - table-building program to ease table maintenance problems
|
|
|
|
* DESCRIPTION
|
|
* Several parts of the FORTRAN compiler need large tables.
|
|
* For example, the lexer contains tables of keywords and multicharacter
|
|
* tokens; the intrinsic-function handler contains a table of all the
|
|
* FORTRAN intrinsic functions.
|
|
* Maintaining these tables can be aggravating, since they are typically
|
|
* large and involve lots of drudge work (like changing many sequentially-
|
|
* numbered macro definitions) to modify.
|
|
|
|
* `mktable' can be used to build tables automatically as part of the
|
|
* usual compiler building process. Its usages and semantics are as
|
|
* follows.
|
|
|
|
* `mktable' takes a "table" file on its standard input. Each line of
|
|
* the table file has one of the following forms:
|
|
|
|
* # commentary information
|
|
* "key-string" [index-macro-name [arbitrary-stuff]]
|
|
* <blank line>
|
|
|
|
* The key string and arbitrary-stuff form the contents of a single
|
|
* table record. The index-macro-name is #define'd to be the index
|
|
* of the given record in the table. If the index-macro-name is absent or
|
|
* is an empty string ("") then no macro definition is produced for the
|
|
* record.
|
|
|
|
* `mktable' produces its output on four files:
|
|
* mktable.keys: the key string
|
|
* mktable.defs: #define <index_macro_name> <index to mktable.keys>
|
|
* mktable.indx: contains the initialization part of a definition
|
|
* for an index array for key-letter indexed tables,
|
|
* or the initialization part of a collision-resolution
|
|
* table for linear-list hashed tables.
|
|
* (not generated for sorted or _open-addressed tables.)
|
|
* mktable.info: contains arbitrary-stuff
|
|
|
|
* For example, if the table to be defined were named "symtab" and the
|
|
* table being constructed was of the "sorted" type (suitable for binary
|
|
* search),
|
|
|
|
* # contents of symtab:
|
|
* "alpha" ST_ALPHA 2, 4, MONADIC
|
|
* "gamma" ST_GAMMA 2, 3, MONADIC
|
|
* "delta" ST_DELTA 2, 1, DYADIC
|
|
* "epsilon"
|
|
|
|
* then `mktable' produces the following in mktable.keys:
|
|
|
|
* "alpha","delta","epsilon","gamma"
|
|
|
|
* and the following in mktable.defs:
|
|
|
|
* #define ST_ALPHA 0
|
|
* #define ST_DELTA 1
|
|
* #define ST_GAMMA 2
|
|
|
|
* and in mktable.info :
|
|
|
|
* {2, 4, MONADIC}, {2, 1, DYADIC}, {0}, {2, 3, MONADIC}
|
|
|
|
* The files might be included in a C source program in the
|
|
* following way:
|
|
|
|
* #include "mktable.defs"
|
|
* ...
|
|
* char *symname[] = {
|
|
* # include "mktable.keys"
|
|
* };
|
|
* struct syminfo
|
|
* {
|
|
* int size;
|
|
* int cycles;
|
|
* int arity;
|
|
* };
|
|
* struct syminfo symtab[] = {
|
|
* # include "mktable.info"
|
|
* };
|
|
|
|
* The `mktable' command itself is used in one of the following ways:
|
|
|
|
* mktable "open" size <tablefile
|
|
* This form creates an _open-addressed hash table, keyed on
|
|
* the string fields at the beginning of each record in the
|
|
* table file. The hash function used is the absolute value
|
|
* of the sum of all the characters in a key, modulo the table
|
|
* size. The collision resolution function is simply one plus
|
|
* the last hash, modulo the table size.
|
|
* Since some of the entries in the hash table may be empty,
|
|
* and `mktable' has no way of knowing how to fill them,
|
|
* one of the records supplied by the user will be replicated
|
|
* in the empty entries with its key value set to NULL.
|
|
* "table.c" will be created with the hash table itself, and
|
|
* "table.h" will be created with index-macro definitions that
|
|
* may be used to index directly into the table in "table.c".
|
|
|
|
* mktable "hashed" size <tablefile
|
|
* This form creates a hash table keyed on the string fields
|
|
* at the beginning of each table file record. The hash function
|
|
* is the absolute value of the sum of all the characters in a
|
|
* key, modulo the table size. Collision resolution is handled
|
|
* with linear chaining, as follows: If two keys hash to the
|
|
* same table location, the first one will be placed in the table,
|
|
* and the corresponding entry of the collision resolution vector
|
|
* will contain the (integer) index of the next table slot to be
|
|
* checked for the hash synonym. When the collision resolution
|
|
* vector entry is -1, the end of the chain has been reached.
|
|
* Note that since all entries are stored in the main table, the
|
|
* `size' must be at least as large as the number of entries.
|
|
* As with _open addressing, some slots in the table may be
|
|
* padded with a replicated entry (key value set to NULL).
|
|
* "table.c" receives the hash table. "table.h" receives the
|
|
* index-macro definitions that will index into the table in
|
|
* "table.c". "tabindex.c" receives the conflict resolution
|
|
* vector.
|
|
|
|
* mktable "sorted" <tablefile
|
|
* This form creates a table sorted in ascending order, keyed
|
|
* on the string fields at the beginning of each record in the
|
|
* table file. Comparisons are ordered according to the ASCII
|
|
* values of the characters being compared.
|
|
* "table.c" will be created with the sorted table itself, and
|
|
* "table.h" will be created with index-macro definitions that
|
|
* may be used to index directly into the table in "table.c".
|
|
|
|
* mktable "key-letter" <tablefile
|
|
* This form creates a key-letter-indexed table.
|
|
* The string fields serve as the
|
|
* key letter. An auxiliary table indexed from 'A' to 'Z'+1
|
|
* gives the starting index of all the entries whose keys begin
|
|
* with each letter (the last entry duplicates the entry for 'Z').
|
|
* "table.c" will contain the sorted table. "tabindex.c" will
|
|
* contain the auxiliary index table information. "table.h" will
|
|
* contain the index-macro definitions that may be used to index
|
|
* directly into the "table.c" table.
|
|
* Note that key-letter tables are sorted in a peculiar way;
|
|
* in ascending order by first letter of the key, but descending
|
|
* order by the remainder of the key. This is required by
|
|
* FORTRAN, to insure that longer keywords are matched before
|
|
* shorter keywords that are initial substrings of the longer
|
|
* keywords.
|
|
* Also note that the key strings themselves are missing the first
|
|
* char, since by indexing into the table, we are always assured
|
|
* of having matched the first char.
|
|
|
|
* AUTHOR
|
|
* February, 1984 Allen Akin
|
|
|
|
* MODIFICATIONS
|
|
* March 8, 1984 Allen Akin
|
|
* Added linear-list resolved hashing.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <ctype.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#define MAXRECORDS 300 /* maximum-size table we can handle */
|
|
#define MAXLINE 82 /* maximum line length (incl "\n\0") */
|
|
|
|
#define HASHED 0 /* flag used by table loader */
|
|
#define LINEAR 1 /* ditto */
|
|
#define OPENADDR 2 /* ditto */
|
|
|
|
#define KEYFILE "mktable.key" /* name of table output file */
|
|
#define DEFFILE "mktable.def" /* name of index defs output file */
|
|
#define INDEXFILE "mktable.ind" /* name of table index output file */
|
|
#define INFOFILE "mktable.inf" /* gots the infos in it */
|
|
|
|
typedef struct rec {
|
|
char *key; /* key-string field */
|
|
char *id; /* index macro identifier */
|
|
char *other; /* other stuff in the record - output untouched */
|
|
struct rec *link; /* pointer to next record in hash synonyms list */
|
|
} Rec_t;
|
|
|
|
int Upper = 0;
|
|
|
|
FILE *Fkeys, *Findex, *Fdefs, *Finfo;
|
|
|
|
|
|
/* Function Prototypes */
|
|
|
|
void main (int argc, char **argv);
|
|
void usage (void);
|
|
void error(char * message);
|
|
void open_addr(int size);
|
|
void hash_linear(int size);
|
|
void sorted(void);
|
|
void key_letter(void);
|
|
int load(Rec_t *record, int method, int size);
|
|
void startoutput(void);
|
|
void endoutput(void);
|
|
void outrec(Rec_t *rec);
|
|
void outdef(char *name, int value);
|
|
void outinx(int value);
|
|
void sortrec(Rec_t **rptr, int size);
|
|
int hash(register char *name);
|
|
|
|
|
|
|
|
/* Program code */
|
|
|
|
void __cdecl
|
|
main (
|
|
int argc,
|
|
char **argv
|
|
)
|
|
{
|
|
if (argc <= 1)
|
|
usage();
|
|
|
|
if(strcmp(argv[1], "-U") == 0) {
|
|
Upper = 1;
|
|
argv++;
|
|
argc--;
|
|
}
|
|
|
|
if (strcmp(argv[1], "open") == 0) {
|
|
if (argc != 3)
|
|
usage();
|
|
open_addr(atoi(argv[2]));
|
|
} else if (strcmp(argv[1], "hashed") == 0) {
|
|
if (argc != 3)
|
|
usage();
|
|
hash_linear(atoi(argv[2]));
|
|
} else if (strcmp(argv[1], "sorted") == 0) {
|
|
if (argc != 2)
|
|
usage();
|
|
sorted();
|
|
} else if (strcmp(argv[1], "key-letter") == 0) {
|
|
if (argc != 2)
|
|
usage();
|
|
key_letter();
|
|
} else
|
|
usage();
|
|
exit(0);
|
|
}
|
|
|
|
void
|
|
usage (
|
|
void
|
|
)
|
|
{
|
|
error("usage: mktable (open SIZE | hashed SIZE | sorted | key-letter) <table-master");
|
|
}
|
|
|
|
void
|
|
error(
|
|
char * message
|
|
)
|
|
{
|
|
fprintf(stderr, "%s\n", message);
|
|
exit(1);
|
|
}
|
|
|
|
void
|
|
open_addr(
|
|
int size
|
|
)
|
|
{
|
|
register Rec_t *record; /* points to array storing all records */
|
|
Rec_t defrec; /* "default" record for empty array slot */
|
|
register int i;
|
|
|
|
if (size <= 0)
|
|
error("hash table size specified is less than zero");
|
|
|
|
if ((record = (Rec_t *)calloc(size, sizeof(Rec_t))) == NULL)
|
|
error("insufficient memory for hash table");
|
|
|
|
for (i = 0; i < size; ++i)
|
|
record[i].key = NULL;
|
|
|
|
if (load(record, OPENADDR, size) == 0)
|
|
error("couldn't find any input records");
|
|
|
|
defrec.key = NULL;
|
|
defrec.id = NULL;
|
|
for (i = 0; i < size; ++i)
|
|
if (record[i].key != NULL)
|
|
break;
|
|
defrec.other = record[i].other;
|
|
|
|
startoutput();
|
|
|
|
for (i = 0; i < size; ++i) {
|
|
if (record[i].key == NULL) {
|
|
outrec(&defrec);
|
|
} else {
|
|
outrec(&record[i]);
|
|
outdef(record[i].id, i);
|
|
}
|
|
}
|
|
|
|
endoutput();
|
|
_unlink(INDEXFILE);
|
|
}
|
|
|
|
void
|
|
hash_linear(
|
|
int size
|
|
)
|
|
{
|
|
register Rec_t *record, /* stores some records, all buckets */
|
|
*rp;
|
|
Rec_t defrec; /* default record for empty hash table slots */
|
|
register int i,
|
|
nextslot, /* next empty slot in main hash table */
|
|
prev;
|
|
|
|
if (size <= 0)
|
|
error("hash table size specified is less than zero");
|
|
|
|
if ((record = (Rec_t *)calloc(size, sizeof(Rec_t))) == NULL)
|
|
error("insufficient memory for hash table");
|
|
|
|
for (i = 0; i < size; ++i) {
|
|
record[i].key = NULL;
|
|
record[i].link = NULL;
|
|
}
|
|
|
|
if ((i = load(record, HASHED, size)) == 0)
|
|
error("couldn't find any input records");
|
|
|
|
if (i > size)
|
|
error("too many records to hold in table");
|
|
|
|
defrec.key = NULL;
|
|
defrec.id = NULL;
|
|
for (i = 0; i < size; ++i) {
|
|
if (record[i].key != NULL)
|
|
break;
|
|
}
|
|
defrec.other = record[i].other;
|
|
defrec.link = NULL;
|
|
/*
|
|
* The `load' routine has built a hash table `record'.
|
|
* Each entry in `record' is either empty (key == NULL) or contains a record.
|
|
* Each record may have a NULL link field, or a link field that points to
|
|
* a hash synonym.
|
|
* With this section of code, we rearrange the linked lists of hash synonyms
|
|
* so that all the entries are stored in `record'.
|
|
*/
|
|
nextslot = 0;
|
|
for (i = 0; i < size; ++i) {
|
|
if ((record[i].key != NULL) &&
|
|
(record[i].link != NULL) &&
|
|
((record[i].link < record) || (record[i].link >= (record + size))))
|
|
{
|
|
for (prev = i, rp = record[i].link; rp != NULL; rp = rp->link) {
|
|
while (record[nextslot].key != NULL)
|
|
++nextslot;
|
|
record[prev].link = &record[nextslot];
|
|
record[nextslot] = *rp;
|
|
prev = nextslot;
|
|
}
|
|
}
|
|
}
|
|
|
|
startoutput();
|
|
|
|
for (i = 0; i < size; ++i) {
|
|
if (record[i].key == NULL) {
|
|
outrec(&defrec);
|
|
outinx(-1);
|
|
} else {
|
|
outrec(&record[i]);
|
|
if (record[i].link == NULL)
|
|
outinx(-1);
|
|
else
|
|
outinx(record[i].link - record); /* cvt. to inx in table */
|
|
outdef(record[i].id, i);
|
|
}
|
|
}
|
|
|
|
endoutput();
|
|
}
|
|
|
|
void
|
|
sorted(
|
|
void
|
|
)
|
|
{
|
|
Rec_t record[MAXRECORDS],
|
|
*rptr[MAXRECORDS];
|
|
register int i, size;
|
|
|
|
size = load(record, LINEAR, MAXRECORDS);
|
|
|
|
for (i = 0; i < size; ++i)
|
|
rptr[i] = &record[i];
|
|
|
|
sortrec(rptr, size);
|
|
|
|
startoutput();
|
|
|
|
for (i = 0; i < size; ++i) {
|
|
outrec(rptr[i]);
|
|
outdef(rptr[i]->id, i);
|
|
}
|
|
|
|
endoutput();
|
|
_unlink(INDEXFILE);
|
|
}
|
|
|
|
void
|
|
key_letter(
|
|
void
|
|
)
|
|
{
|
|
Rec_t record[MAXRECORDS],
|
|
*rptr[MAXRECORDS],
|
|
*temp;
|
|
register int i, size, j, k, l;
|
|
|
|
register char lastletter;
|
|
|
|
size = load(record, LINEAR, MAXRECORDS);
|
|
|
|
for (i = 0; i < size; ++i)
|
|
rptr[i] = &record[i];
|
|
|
|
sortrec(rptr, size);
|
|
|
|
for (i = 0; i < size; i = j) {
|
|
for (j = i; j < size; ++j) {
|
|
if (rptr[i]->key[0] != rptr[j]->key[0])
|
|
break;
|
|
}
|
|
|
|
l = j - 1;
|
|
|
|
for (k = i; k < l; ++k, --l) {
|
|
temp = rptr[k];
|
|
rptr[k] = rptr[l];
|
|
rptr[l] = temp;
|
|
}
|
|
}
|
|
|
|
startoutput();
|
|
|
|
lastletter = (char)((Upper ? 'A' : '_') - 1);
|
|
for (i = 0; i < size; ++i)
|
|
{
|
|
while (rptr[i]->key[0] > lastletter) {
|
|
outinx(i);
|
|
++lastletter;
|
|
}
|
|
outrec(rptr[i]);
|
|
outdef(rptr[i]->id, i);
|
|
}
|
|
|
|
|
|
for (; lastletter < (char)((Upper ? 'Z' : 'z') + 1); ++lastletter)
|
|
outinx(size);
|
|
|
|
endoutput();
|
|
}
|
|
|
|
int
|
|
load(
|
|
Rec_t *record,
|
|
int method,
|
|
int size
|
|
)
|
|
{
|
|
char *line;
|
|
register char *p;
|
|
int rec, h, chainlen, maxchainlen = 0, collisions = 0;
|
|
Rec_t r;
|
|
|
|
for (rec = 0; ; ++rec)
|
|
{
|
|
if ((line = malloc(MAXLINE)) == NULL)
|
|
error("insufficient memory to load records");
|
|
|
|
if (fgets(line, MAXLINE, stdin) == NULL)
|
|
break;
|
|
|
|
if (rec >= size)
|
|
error("too many records to handle");
|
|
|
|
r.key = r.id = r.other = NULL;
|
|
r.link = NULL;
|
|
|
|
for (p = line; *p && isspace(*p); ++p)
|
|
;
|
|
if (*p != '"') {
|
|
free(line);
|
|
--rec;
|
|
continue;
|
|
}
|
|
r.key = ++p;
|
|
for (; *p != '"'; ++p) {
|
|
if(Upper && (islower(*p)))
|
|
*p = (char)toupper(*p);
|
|
}
|
|
|
|
*p++ = '\0';
|
|
|
|
for (; *p && isspace(*p); ++p) /* skip space key and id */
|
|
;
|
|
if (*p == '"' && *(p + 1) == '"') { /* no id */
|
|
r.id = NULL;
|
|
p += 2;
|
|
} else if (*p) {
|
|
r.id = p++; /* id start */
|
|
for (; *p && ( ! isspace(*p)); ++p) /* til first space */
|
|
;
|
|
if(*p) {
|
|
*p++ = '\0'; /* terminate id */
|
|
}
|
|
}
|
|
|
|
for (; *p && isspace(*p); ++p) /* skip space til other info */
|
|
;
|
|
if(*p) {
|
|
r.other = p++;
|
|
for (; *p != '\n' && *p != '\0'; ++p)
|
|
;
|
|
*p = '\0';
|
|
}
|
|
|
|
if (method == LINEAR) {
|
|
record[rec] = r;
|
|
} else if (method == OPENADDR) {
|
|
chainlen = 0;
|
|
for(h = hash(r.key) % size; record[h].key; h = (h+1) % size) {
|
|
++chainlen;
|
|
++collisions;
|
|
}
|
|
maxchainlen = (chainlen < maxchainlen)? maxchainlen: chainlen;
|
|
record[h] = r;
|
|
} else { /* method == HASHED */
|
|
Rec_t *rp;
|
|
|
|
h = hash(r.key) % size;
|
|
if (record[h].key == NULL) {
|
|
record[h] = r;
|
|
} else {
|
|
if ((rp = (Rec_t *)malloc(sizeof(Rec_t))) == NULL)
|
|
error("insufficient memory to store all records");
|
|
*rp = record[h];
|
|
r.link = rp;
|
|
record[h] = r;
|
|
++collisions;
|
|
chainlen = 1;
|
|
for (rp = &record[h]; rp->link != NULL; rp = rp->link)
|
|
++chainlen;
|
|
maxchainlen = (chainlen < maxchainlen)? maxchainlen: chainlen;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (method == HASHED || method == OPENADDR)
|
|
fprintf(stderr, "%d collisions, max chain length %d\n", collisions, maxchainlen);
|
|
|
|
return rec;
|
|
}
|
|
|
|
void
|
|
startoutput(
|
|
void
|
|
)
|
|
{
|
|
if ((Fkeys = fopen(KEYFILE, "w")) == NULL)
|
|
error("can't open keys output file");
|
|
|
|
if ((Findex = fopen(INDEXFILE, "w")) == NULL)
|
|
error("can't open index output file");
|
|
|
|
if ((Fdefs = fopen(DEFFILE, "w")) == NULL)
|
|
error("can't open definitions output file");
|
|
|
|
if ((Finfo = fopen(INFOFILE, "w")) == NULL)
|
|
error("can't open info output file");
|
|
}
|
|
|
|
void
|
|
endoutput(
|
|
void
|
|
)
|
|
{
|
|
fclose(Fkeys);
|
|
fclose(Findex);
|
|
fclose(Fdefs);
|
|
fclose(Finfo);
|
|
}
|
|
|
|
void outrec(Rec_t *rec)
|
|
{
|
|
if (rec->key == NULL)
|
|
fprintf(Fkeys, "NULL,\n");
|
|
else
|
|
fprintf(Fkeys, "\"%s\",\n", ((rec->key) + 1));
|
|
|
|
if (rec->other == NULL)
|
|
fprintf(Finfo, "{0},\n");
|
|
else
|
|
fprintf(Finfo, "{%s},\n", rec->other);
|
|
}
|
|
|
|
void
|
|
outdef(
|
|
char *name,
|
|
int value
|
|
)
|
|
{
|
|
if (name != NULL)
|
|
fprintf(Fdefs, "#define %s %d\n", name, value);
|
|
}
|
|
|
|
void
|
|
outinx(
|
|
int value
|
|
)
|
|
{
|
|
fprintf(Findex, "%d,\n", value);
|
|
}
|
|
/*
|
|
* Following code defines the hash function used in `mktable' and in
|
|
* the compiler. Since we must guarantee they are the same function,
|
|
* we use a single source file.
|
|
|
|
* `mktable' does not use the standard include file that the compiler
|
|
* uses, so we define the allowable register declarations here.
|
|
*/
|
|
#define REG1 register
|
|
#define REG2 register
|
|
#define REG3 register
|
|
|
|
void
|
|
sortrec(
|
|
Rec_t **rptr,
|
|
int size
|
|
)
|
|
{
|
|
register int j, i, gap;
|
|
Rec_t *temp;
|
|
|
|
for (gap = size / 2; gap > 0; gap /= 2) {
|
|
for (i = gap; i < size; ++i) {
|
|
for (j = i - gap; j >= 0; j -= gap) {
|
|
if (strcmp(rptr[j]->key, rptr[j + gap]->key) <= 0)
|
|
break;
|
|
temp = rptr[j];
|
|
rptr[j] = rptr[j + gap];
|
|
rptr[j + gap] = temp;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
int
|
|
hash(
|
|
register char *name
|
|
)
|
|
{
|
|
register int i;
|
|
|
|
i = 0;
|
|
while(*name) {
|
|
i += *name++ ;
|
|
}
|
|
return(i) ;
|
|
}
|