1: char *wartv = "Wart Version 1A(003) 27 May 85";
   2: 
   3: /* W A R T */
   4: 
   5: /*
   6:  pre-process a lex-like file into a C program.
   7: 
   8:  Author:Jeff Damens, Columbia University Center for Computing Activites, 11/84.
   9:  Copyright (C) 1985, Trustees of Columbia University in the City of New York.
  10:  Permission is granted to any individual or institution to use, copy, or
  11:  redistribute this software so long as it is not sold for profit, provided this
  12:  copyright notice is retained.
  13: 
  14:  * input format is:
  15:  *  lines to be copied | %state <state names...>
  16:  *  %%
  17:  * <state> | <state,state,...> CHAR  { actions }
  18:  * ...
  19:  *  %%
  20:  */
  21: 
  22: #include "ckcdeb.h"         /* Includes */
  23: #include <stdio.h>
  24: #include <ctype.h>
  25: 
  26: #define C_L 014             /* Formfeed */
  27: 
  28: #define SEP 1                           /* Token types */
  29: #define LBRACK 2
  30: #define RBRACK 3
  31: #define WORD 4
  32: #define COMMA 5
  33: 
  34: /* Storage sizes */
  35: 
  36: #define MAXSTATES 50            /* max number of states */
  37: #define MAXWORD 50          /* max # of chars/word */
  38: #define SBYTES ((MAXSTATES+7)/8)    /* # of bytes for state bitmask */
  39: 
  40: /* Name of wart function in generated program */
  41: 
  42: #ifndef FNAME
  43: #define FNAME "wart"
  44: #endif
  45: 
  46: /* Structure for state information */
  47: 
  48: struct trans { CHAR states[SBYTES]; /* included states */
  49:                int anyst;       /* true if this good from any state */
  50:                CHAR inchr;      /* input character */
  51:            int actno;       /* associated action */
  52:            struct trans *nxt; };    /* next transition */
  53: 
  54: typedef struct trans *Trans;
  55: 
  56: char *malloc();             /* Returns pointer (not int) */
  57: 
  58: 
  59: /* Variables and tables */
  60: 
  61: int lines,nstates,nacts;
  62: 
  63: char tokval[MAXWORD];
  64: 
  65: int tbl[MAXSTATES*128];
  66: 
  67: 
  68: 
  69: char *txt1 = "\n#define BEGIN state =\n\nint state = 0;\n\n";
  70: 
  71: char *fname = FNAME;        /* function name goes here */
  72: 
  73: /* rest of program... */
  74: 
  75: char *txt2 = "()\n\
  76: {\n\
  77:   int c,actno;\n\
  78:   extern int tbl[];\n\
  79:   while (1) {\n\
  80: 	c = input();\n\
  81: 	if ((actno = tbl[c + state*128]) != -1)\n\
  82: 	  switch(actno) {\n";
  83: 
  84: /* this program's output goes here, followed by final text... */
  85: 
  86: char *txt3 = "\n    }\n  }\n\}\n\n";
  87: 
  88: 
  89: /*
  90:  * turn on the bit associated with the given state
  91:  *
  92:  */
  93: setstate(state,t)
  94: int state;
  95: Trans t;
  96: {
  97:   int idx,msk;
  98:   idx = state/8;            /* byte associated with state */
  99:   msk = 0x80 >> (state % 8);        /* bit mask for state */
 100:   t->states[idx] |= msk;
 101: }
 102: 
 103: /*
 104:  * see if the state is involved in the transition
 105:  *
 106:  */
 107: 
 108: teststate(state,t)
 109: int state;
 110: Trans t;
 111: {
 112:   int idx,msk;
 113:   idx = state/8;
 114:   msk = 0x80 >> (state % 8);
 115:   return(t->states[idx] & msk);
 116: }
 117: 
 118: 
 119: /*
 120:  * read input from here...
 121:  *
 122:  */
 123: 
 124: Trans
 125: rdinput(infp,outfp)
 126: FILE *infp,*outfp;
 127: {
 128:   Trans x,rdrules();
 129:   lines = 1;                /* line counter */
 130:   nstates = 0;              /* no states */
 131:   nacts = 0;                /* no actions yet */
 132:   fprintf(outfp,"\n%c* WARNING -- This C source program generated by ",'/');
 133:   fprintf(outfp,"Wart preprocessor. */\n");
 134:   fprintf(outfp,"%c* Do not edit this file; edit the Wart-format ",'/');
 135:   fprintf(outfp,"source file instead, */\n");
 136:   fprintf(outfp,"%c* and then run it through Wart to produce a new ",'/');
 137:   fprintf(outfp,"C source file.     */\n\n");
 138:   fprintf(outfp,"%c* Wart Version Info: */\n",'/');
 139:   fprintf(outfp,"char *wartv = \"%s\";\n\n",wartv);
 140: 
 141:   initial(infp,outfp);          /* read state names, initial defs */
 142:   prolog(outfp);            /* write out our initial code */
 143:   x = rdrules(infp,outfp);      /* read rules */
 144:   epilogue(outfp);          /* write out epilogue code */
 145:   return(x);
 146: }
 147: 
 148: 
 149: /*
 150:  * initial - read initial definitions and state names.  Returns
 151:  * on EOF or %%.
 152:  *
 153:  */
 154: 
 155: initial(infp,outfp)
 156: FILE *infp,*outfp;
 157: {
 158:   int c;
 159:   char wordbuf[MAXWORD];
 160:   while ((c = getc(infp)) != EOF) {
 161:     if (c == '%') {
 162:             rdword(infp,wordbuf);
 163:             if (strcmp(wordbuf,"states") == 0)
 164:                 rdstates(infp,outfp);
 165:             else if (strcmp(wordbuf,"%") == 0) return;
 166:             else fprintf(outfp,"%%%s",wordbuf);
 167:               }
 168:     else putc(c,outfp);
 169:     if (c == '\n') lines++;
 170:      }
 171: }
 172: 
 173: /*
 174:  * boolean function to tell if the given character can be part of
 175:  * a word.
 176:  *
 177:  */
 178: isin(s,c) char *s; int c; {
 179:    for (; *s != '\0'; s++)
 180:       if (*s == c) return(1);
 181:    return(0);
 182: }
 183: isword(c)
 184: int c;
 185: {
 186:   static char special[] = ".%_-$@"; /* these are allowable */
 187:   return(isalnum(c) || isin(special,c));
 188: }
 189: 
 190: /*
 191:  * read the next word into the given buffer.
 192:  *
 193:  */
 194: rdword(fp,buf)
 195: FILE *fp;
 196: char *buf;
 197: {
 198:   int len = 0,c;
 199:   while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = c;
 200:   *buf++ = '\0';            /* tie off word */
 201:   ungetc(c,fp);             /* put break char back */
 202: }
 203: 
 204: 
 205: /*
 206:  * read state names, up to a newline.
 207:  *
 208:  */
 209: 
 210: rdstates(fp,ofp)
 211: FILE *fp,*ofp;
 212: {
 213:   int c;
 214:   char wordbuf[MAXWORD];
 215:   while ((c = getc(fp)) != EOF && c != '\n')
 216:   {
 217:     if (isspace(c) || c == C_L) continue;   /* skip whitespace */
 218:     ungetc(c,fp);           /* put char back */
 219:     rdword(fp,wordbuf);     /* read the whole word */
 220:     enter(wordbuf,++nstates);   /* put into symbol tbl */
 221:     fprintf(ofp,"#define %s %d\n",wordbuf,nstates);
 222:   }
 223:   lines++;
 224: }
 225: 
 226: /*
 227:  * allocate a new, empty transition node
 228:  *
 229:  */
 230: 
 231: Trans
 232: newtrans()
 233: {
 234:   Trans new;
 235:   int i;
 236:   new = (Trans) malloc(sizeof (struct trans));
 237:   for (i=0; i<SBYTES; i++) new->states[i] = 0;
 238:   new->anyst = 0;
 239:   new->nxt = NULL;
 240:   return(new);
 241: }
 242: 
 243: 
 244: /*
 245:  * read all the rules.
 246:  *
 247:  */
 248: 
 249: Trans
 250: rdrules(fp,out)
 251: FILE *fp,*out;
 252: {
 253:   Trans head,cur,prev;
 254:   int curtok,i;
 255:   head = cur = NULL;
 256:   while ((curtok = gettoken(fp)) != SEP)
 257: 
 258:     switch(curtok) {
 259:         case LBRACK: if (cur == NULL) cur = newtrans();
 260:                      else fatal("duplicate state list");
 261:                  statelist(fp,cur);/* set states */
 262:                  continue;  /* prepare to read char */
 263: 
 264:         case WORD:   if (strlen(tokval) != 1)
 265:                     fatal("multiple chars in state");
 266:                  if (cur == NULL) {
 267:                 cur = newtrans();
 268:                 cur->anyst = 1;
 269:                 }
 270:                  cur->actno = ++nacts;
 271:                  cur->inchr = tokval[0];
 272:                  if (head == NULL) head = cur;
 273:                  else prev->nxt = cur;
 274:                  prev = cur;
 275:                  cur = NULL;
 276:                  copyact(fp,out,nacts);
 277:                  break;
 278:          default: fatal("bad input format");
 279:          }
 280: 
 281:    return(head);
 282: }
 283: 
 284: 
 285: /*
 286:  * read a list of (comma-separated) states, set them in the
 287:  * given transition.
 288:  *
 289:  */
 290: statelist(fp,t)
 291: FILE *fp;
 292: Trans t;
 293: {
 294:   int curtok,sval;
 295:   curtok = COMMA;
 296:   while (curtok != RBRACK) {
 297:     if (curtok != COMMA) fatal("missing comma");
 298:     if ((curtok = gettoken(fp)) != WORD) fatal("missing state name");
 299:         if ((sval = lkup(tokval)) == -1) {
 300:         fprintf(stderr,"state %s undefined\n",tokval);
 301:         fatal("undefined state");
 302:        }
 303:         setstate(sval,t);
 304:     curtok = gettoken(fp);
 305:    }
 306: }
 307: 
 308: /*
 309:  * copy an action from the input to the output file
 310:  *
 311:  */
 312: copyact(inp,outp,actno)
 313: FILE *inp,*outp;
 314: int actno;
 315: {
 316:   int c,bcnt;
 317:   fprintf(outp,"case %d:\n",actno);
 318:   while (((c = getc(inp)) != '\n') && (isspace(c) || c == C_L));
 319:   if (c == '{') {
 320:      bcnt = 1;
 321:      putc(c,outp);
 322:      while (bcnt > 0 && (c = getc(inp)) != EOF) {
 323:     if (c == '{') bcnt++;
 324:     else if (c == '}') bcnt--;
 325:     else if (c == '\n') lines++;
 326:     putc(c,outp);
 327:       }
 328:      if (bcnt > 0) fatal("action doesn't end");
 329:     }
 330:    else {
 331:       while (c != '\n' && c != EOF) {
 332:         putc(c,outp);
 333:         c = getc(inp);
 334:         }
 335:       lines++;
 336:     }
 337:    fprintf(outp,"\nbreak;\n");
 338: }
 339: 
 340: 
 341: /*
 342:  * find the action associated with a given character and state.
 343:  * returns -1 if one can't be found.
 344:  *
 345:  */
 346: faction(hd,state,chr)
 347: Trans hd;
 348: int state,chr;
 349: {
 350:   while (hd != NULL) {
 351:     if (hd->anyst || teststate(state,hd))
 352:       if (hd->inchr == '.' || hd->inchr == chr) return(hd->actno);
 353:     hd = hd->nxt;
 354:     }
 355:   return(-1);
 356: }
 357: 
 358: 
 359: /*
 360:  * empty the table...
 361:  *
 362:  */
 363: emptytbl()
 364: {
 365:   int i;
 366:   for (i=0; i<nstates*128; i++) tbl[i] = -1;
 367: }
 368: 
 369: /*
 370:  * add the specified action to the output for the given state and chr.
 371:  *
 372:  */
 373: 
 374: addaction(act,state,chr)
 375: int act,state,chr;
 376: {
 377:  tbl[state*128 + chr] = act;
 378: }
 379: 
 380: writetbl(fp)
 381: FILE *fp;
 382: {
 383:   warray(fp,"tbl",tbl,128*(nstates+1));
 384: }
 385: 
 386: 
 387: /*
 388:  * write an array to the output file, given its name and size.
 389:  *
 390:  */
 391: warray(fp,nam,cont,siz)
 392: FILE *fp;
 393: char *nam;
 394: int cont[],siz;
 395: {
 396:   int i;
 397:   fprintf(fp,"int %s[] = {\n",nam);
 398:   for (i = 0; i < siz; i++) {
 399:     fprintf(fp,"%d, ",cont[i]);
 400:     if ((i % 20) == 0) putc('\n',fp);
 401:     }
 402:   fprintf(fp,"};\n");
 403: }
 404: 
 405: main(argc,argv)
 406: int argc;
 407: char *argv[];
 408: {
 409:   Trans head;
 410:   int state,c;
 411:   FILE *infile,*outfile;
 412: 
 413:   if (argc > 1) {
 414:     if ((infile = fopen(argv[1],"r")) == NULL) {
 415:         fprintf(stderr,"Can't open %s\n",argv[1]);
 416:     fatal("unreadable input file"); } }
 417:   else infile = stdin;
 418: 
 419:   if (argc > 2) {
 420:     if ((outfile = fopen(argv[2],"w")) == NULL) {
 421:         fprintf(stderr,"Can't write to %s\n",argv[2]);
 422:     fatal("bad output file"); } }
 423:   else outfile = stdout;
 424: 
 425:   clrhash();                /* empty hash table */
 426:   head = rdinput(infile,outfile);   /* read input file */
 427:   emptytbl();               /* empty our tables */
 428:   for (state = 0; state <= nstates; state++)
 429:     for (c = 1; c < 128; c++)
 430:      addaction(faction(head,state,c),state,c);  /* find actions, add to tbl */
 431:   writetbl(outfile);
 432:   copyrest(infile,outfile);
 433:   fprintf(stderr,"%d states, %d actions\n",nstates,nacts);
 434: #ifdef undef
 435:   for (state = 1; state <= nstates; state ++)
 436:     for (c = 1; c < 128; c++)
 437:        if (tbl[state*128 + c] != -1) printf("state %d, chr %d, act %d\n",
 438:         state,c,tbl[state*128 + c]);
 439: #endif
 440:   exit(GOOD_EXIT);
 441: }
 442: 
 443: 
 444: /*
 445:  * fatal error handler
 446:  *
 447:  */
 448: 
 449: fatal(msg)
 450: char *msg;
 451: {
 452:   fprintf(stderr,"error in line %d: %s\n",lines,msg);
 453:   exit(BAD_EXIT);
 454: }
 455: 
 456: prolog(outfp)
 457: FILE *outfp;
 458: {
 459:   int c;
 460:   while ((c = *txt1++) != '\0')  putc(c,outfp);
 461:   while ((c = *fname++) != '\0') putc(c,outfp);
 462:   while ((c = *txt2++) != '\0')  putc(c,outfp);
 463: }
 464: 
 465: epilogue(outfp)
 466: FILE *outfp;
 467: {
 468:   int c;
 469:   while ((c = *txt3++) != '\0') putc(c,outfp);
 470: }
 471: 
 472: copyrest(in,out)
 473: FILE *in,*out;
 474: {
 475:   int c;
 476:   while ((c = getc(in)) != EOF) putc(c,out);
 477: }
 478: 
 479: 
 480: /*
 481:  * gettoken - returns token type of next token, sets tokval
 482:  * to the string value of the token if appropriate.
 483:  *
 484:  */
 485: 
 486: gettoken(fp)
 487: FILE *fp;
 488: {
 489:   int c;
 490:   while (1) {               /* loop if reading comments... */
 491:     do {
 492:       c = getc(fp);
 493:       if (c == '\n') lines++;
 494:        } while ((isspace(c) || c == C_L)); /* skip whitespace */
 495:     switch(c) {
 496:       case EOF: return(SEP);
 497:       case '%': if ((c = getc(fp)) == '%') return(SEP);
 498:             tokval[0] = '%';
 499:             tokval[1] = c;
 500:             rdword(fp,tokval+2);
 501:             return(WORD);
 502:       case '<': return(LBRACK);
 503:       case '>': return(RBRACK);
 504:       case ',': return(COMMA);
 505:       case '/': if ((c = getc(fp)) == '*') {
 506:                   rdcmnt(fp);   /* skip over the comment */
 507:               continue; }   /* and keep looping */
 508:             else {
 509:             ungetc(c);  /* put this back into input */
 510:             c = '/'; }  /* put character back, fall thru */
 511: 
 512:       default: if (isword(c)) {
 513:               ungetc(c,fp);
 514:               rdword(fp,tokval);
 515:               return(WORD);
 516:                 }
 517:            else fatal("Invalid character in input");
 518:          }
 519:   }
 520: }
 521: 
 522: /*
 523:  * skip over a comment
 524:  *
 525:  */
 526: 
 527: rdcmnt(fp)
 528: FILE *fp;
 529: {
 530:   int c,star,prcnt;
 531:   prcnt = star = 0;         /* no star seen yet */
 532:   while (!((c = getc(fp)) == '/' && star)) {
 533:     if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment");
 534:     prcnt = (c == '%');
 535:     star = (c == '*');
 536:     if (c == '\n') lines++; }
 537: }
 538: 
 539: 
 540: 
 541: /*
 542:  * symbol table management for wart
 543:  *
 544:  * entry points:
 545:  *   clrhash - empty hash table.
 546:  *   enter - enter a name into the symbol table
 547:  *   lkup - find a name's value in the symbol table.
 548:  *
 549:  */
 550: 
 551: #define HASHSIZE 101            /* # of entries in hash table */
 552: 
 553: struct sym { char *name;        /* symbol name */
 554:          int val;           /* value */
 555:          struct sym *hnxt; }    /* next on collision chain */
 556:     *htab[HASHSIZE];            /* the hash table */
 557: 
 558: 
 559: /*
 560:  * empty the hash table before using it...
 561:  *
 562:  */
 563: clrhash()
 564: {
 565:   int i;
 566:   for (i=0; i<HASHSIZE; i++) htab[i] = NULL;
 567: }
 568: 
 569: /*
 570:  * compute the value of the hash for a symbol
 571:  *
 572:  */
 573: hash(name)
 574: char *name;
 575: {
 576:   int sum;
 577:   for (sum = 0; *name != '\0'; name++) sum += (sum + *name);
 578:   sum %= HASHSIZE;          /* take sum mod hashsize */
 579:   if (sum < 0) sum += HASHSIZE;     /* disallow negative hash value */
 580:   return(sum);
 581: }
 582: 
 583: /*
 584:  * make a private copy of a string...
 585:  *
 586:  */
 587: char *
 588: copy(s)
 589: char *s;
 590: {
 591:   char *new;
 592:   new = (char *) malloc(strlen(s) + 1);
 593:   strcpy(new,s);
 594:   return(new);
 595: }
 596: 
 597: 
 598: /*
 599:  * enter state name into the hash table
 600:  *
 601:  */
 602: enter(name,svalue)
 603: char *name;
 604: int svalue;
 605: {
 606:   int h;
 607:   struct sym *cur;
 608:   if (lkup(name) != -1) {
 609:     fprintf(stderr,"state %s appears twice...\n");
 610:     exit(BAD_EXIT); }
 611:   h = hash(name);
 612:   cur = (struct sym *)malloc(sizeof (struct sym));
 613:   cur->name = copy(name);
 614:   cur->val = svalue;
 615:   cur->hnxt = htab[h];
 616:   htab[h] = cur;
 617: }
 618: 
 619: /*
 620:  * find name in the symbol table, return its value.  Returns -1
 621:  * if not found.
 622:  *
 623:  */
 624: lkup(name)
 625: char *name;
 626: {
 627:   struct sym *cur;
 628:   for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt)
 629:     if (strcmp(cur->name,name) == 0) return(cur->val);
 630:   return(-1);
 631: }

Defined functions

addaction defined in line 374; used 1 times
clrhash defined in line 563; used 1 times
copy defined in line 587; used 1 times
copyact defined in line 312; used 1 times
copyrest defined in line 472; used 1 times
emptytbl defined in line 363; used 1 times
enter defined in line 602; used 1 times
epilogue defined in line 465; used 1 times
faction defined in line 346; used 1 times
fatal defined in line 449; used 11 times
gettoken defined in line 486; used 3 times
hash defined in line 573; used 2 times
initial defined in line 155; used 1 times
isin defined in line 178; used 1 times
isword defined in line 183; used 2 times
lkup defined in line 624; used 2 times
main defined in line 405; never used
newtrans defined in line 231; used 2 times
prolog defined in line 456; used 1 times
rdcmnt defined in line 527; used 1 times
rdinput defined in line 124; used 1 times
rdrules defined in line 249; used 2 times
rdstates defined in line 210; used 1 times
rdword defined in line 194; used 4 times
setstate defined in line 93; used 1 times
statelist defined in line 290; used 1 times
teststate defined in line 108; used 1 times
warray defined in line 391; used 1 times
writetbl defined in line 380; used 1 times

Defined variables

fname defined in line 71; used 1 times
htab defined in line 556; used 4 times
lines defined in line 61; used 8 times
nacts defined in line 61; used 4 times
nstates defined in line 61; used 8 times
tbl defined in line 65; used 5 times
tokval defined in line 63; used 8 times
txt1 defined in line 69; used 1 times
txt2 defined in line 75; used 1 times
txt3 defined in line 86; used 1 times
wartv defined in line 1; used 1 times

Defined struct's

sym defined in line 553; used 10 times
trans defined in line 48; used 6 times

Defined typedef's

Trans defined in line 54; used 12 times

Defined macros

COMMA defined in line 32; used 3 times
C_L defined in line 26; used 3 times
FNAME defined in line 43; used 2 times
HASHSIZE defined in line 551; used 4 times
LBRACK defined in line 29; used 1 times
MAXSTATES defined in line 36; used 2 times
MAXWORD defined in line 37; used 4 times
RBRACK defined in line 30; used 2 times
SBYTES defined in line 38; used 2 times
SEP defined in line 28; used 3 times
WORD defined in line 31; used 3 times
Last modified: 1986-04-26
Generated: 2016-12-26
Generated by src2html V0.67
page hit count: 1652
Valid CSS Valid XHTML 1.0 Strict