1: char *wartv = "Wart Version 2A(009) 14 Jan 92";
   3: #ifdef MDEBUG
   4: /* Use the real ones in this module only */
   5: #ifdef malloc
   6: #undef malloc
   7: #endif /* malloc */
   8: #ifdef calloc
   9: #undef calloc
  10: #endif /* calloc */
  11: #ifdef realloc
  12: #undef realloc
  13: #endif /* realloc */
  14: #ifdef free
  15: #undef free
  16: #endif /* free */
  17: #endif /* MDEBUG */
  19: #ifdef MAC
  20: #define VOID void
  21: #endif /* MAC */
  23: /* W A R T */
  25: /*
  26:   A small subset of "lex".
  28:   Authors: Jeff Damens, Frank da Cruz
  29:   Columbia University Center for Computing Activites.
  30:   First released November 1984.
  31:   Copyright (C) 1984, 1992, Trustees of Columbia University in the City of New
  32:   York.  Permission is granted to any individual or institution to use this
  33:   software as long as it is not sold for profit.  This copyright notice must be
  34:   retained.  This software may not be included in commercial products without
  35:   written permission of Columbia University.
  36: */
  38: /*
  39:  * input format is:
  40:  *  lines to be copied | %state <state names...>
  41:  *  %%
  42:  * <state> | <state,state,...> CHAR  { actions }
  43:  * ...
  44:  *  %%
  45:  *  more lines to be copied
  46:  */
  48: #include "ckcdeb.h"         /* Includes */
  50: /*
  51:  The following "char" should be changed to "short", "int", or "long" if your
  52:  wart program will generate more than 127 states.  Since wart is used mainly
  53:  with C-Kermit, which has about 50 states, "char" is adequate.  This
  54:  keeps the program about 3K-4K smaller.
  55: */
  57: #define TBL_TYPE "char"         /* C data type of state table */
  59: #define C_L 014             /* Formfeed */
  61: #define SEP 1                           /* Token types */
  62: #define LBRACK 2
  63: #define RBRACK 3
  64: #define WORD 4
  65: #define COMMA 5
  67: /* Storage sizes */
  69: #define MAXSTATES 50            /* max number of states */
  70: #define MAXWORD 50          /* max # of chars/word */
  71: #define SBYTES ((MAXSTATES+6)/8)    /* # of bytes for state bitmask */
  73: /* Name of wart function in generated program */
  75: #ifndef FNAME
  76: #define FNAME "wart"
  77: #endif /* FNAME */
  79: /* Structure for state information */
  81: struct transx {
  82:     CHAR states[SBYTES];        /* included states */
  83:     int anyst;              /* true if this good from any state */
  84:     CHAR inchr;             /* input character */
  85:     int actno;              /* associated action */
  86:     struct transx *nxt;
  87: };                  /* next transition */
  88: typedef struct transx *trans;
  90: /* Function prototypes */
  92: _PROTOTYP( VOID setwstate, (int, trans) );
  93: _PROTOTYP( int teststate, (int, trans) );
  94: _PROTOTYP( trans rdinput, (FILE *, FILE *) );
  95: _PROTOTYP( VOID initial, (FILE *, FILE *) );
  96: _PROTOTYP( int isin, (char *, int) );
  97: _PROTOTYP( int isword, (int) );
  98: _PROTOTYP( VOID rdword, (FILE *, char *) );
  99: _PROTOTYP( VOID rdstates, (FILE *, FILE *) );
 100: _PROTOTYP( trans newtrans, (void) );
 101: _PROTOTYP( trans rdrules, (FILE *, FILE *) );
 102: _PROTOTYP( VOID statelist, (FILE *, trans) );
 103: _PROTOTYP( VOID copyact, (FILE *, FILE *, int) );
 104: _PROTOTYP( int faction, (trans, int, int) );
 105: _PROTOTYP( VOID emptytbl, (void) );
 106: _PROTOTYP( VOID addaction, (int, int, int) );
 107: _PROTOTYP( VOID writetbl, (FILE *) );
 108: _PROTOTYP( VOID warray, (FILE *, char *, int [], int, char *) );
 109: _PROTOTYP( VOID fatal, (char *) );
 110: _PROTOTYP( VOID prolog, (FILE *) );
 111: _PROTOTYP( VOID epilogue, (FILE *) );
 112: _PROTOTYP( VOID copyrest, (FILE *, FILE *) );
 113: _PROTOTYP( int gettoken, (FILE *) );
 114: _PROTOTYP( VOID rdcmnt, (FILE *) );
 115: _PROTOTYP( VOID clrhash, (void) );
 116: _PROTOTYP( int hash, (char *) );
 117: _PROTOTYP( VOID enter, (char *, int) );
 118: _PROTOTYP( int lkup, (char *) );
 119: _PROTOTYP( static char* copy, (char *s) );
 121: /* Variables and tables */
 123: /* lt 1992-10-08 Begin
 124:  * provide definition for deblog variable
 125:  * ckcdeb.h declares as extern. DECC AXP is strict about ref/def model
 126:  * Variable is unused herein, to the best of my knowledge.
 127:  */
 128: #ifdef VMS
 129: int deblog;
 130: #endif /* VMS */
 131: /* lt 1992-10-08 End
 132:  */
 134: static int lines, nstates, nacts;
 136: static char tokval[MAXWORD];
 138: static int tbl[MAXSTATES*96];
 140: char *tbl_type = TBL_TYPE;
 142: char *txt1 = "\n#define BEGIN state =\n\nint state = 0;\n\nint\n";
 144: char *fname = FNAME;            /* Generated function name goes here */
 146: /* rest of program... */
 148: char *txt2 = "()\n\
 149: {\n\
 150:     int c,actno;\n\
 151:     extern ";
 153: /* Data type of state table is inserted here (short or int) */
 155: char *txt2a = " tbl[];\n    while (1) {\n	c = input() - 32;\n\
 156:         if (c < 0 || c > 95) c = 0;\n";
 158: char *txt2b = "	if ((actno = tbl[c + state*96]) != -1)\n\
 159: 	    switch(actno) {\n";
 161: /* this program's output goes here, followed by final text... */
 163: char *txt3 = "\n	    }\n    }\n}\n\n";
 166: /*
 167:  * turn on the bit associated with the given state
 168:  *
 169:  */
 170: VOID
 171: setwstate(state,t) int state; trans t; {
 172:     int idx,msk;
 173:     idx = state/8;          /* byte associated with state */
 174:     msk = 0x80 >> (state % 8);      /* bit mask for state */
 175:     t->states[idx] |= msk;
 176: }
 178: /*
 179:  * see if the state is involved in the transition
 180:  *
 181:  */
 182: int
 183: teststate(state,t) int state; trans t; {
 184:     int idx,msk;
 185:     idx = state/8;
 186:     msk = 0x80 >> (state % 8);
 187:     return(t->states[idx] & msk);
 188: }
 191: /*
 192:  * read input from here...
 193:  *
 194:  */
 196: trans
 197: rdinput(infp,outfp) FILE *infp,*outfp; {
 198:     trans x,rdrules();
 199:     lines = 1;              /* line counter */
 200:     nstates = 0;            /* no states */
 201:     nacts = 0;              /* no actions yet */
 202:     fprintf(outfp,"\n%c* WARNING -- This C source program generated by ",'/');
 203:     fprintf(outfp,"Wart preprocessor. */\n");
 204:     fprintf(outfp,"%c* Do not edit this file; edit the Wart-format ",'/');
 205:     fprintf(outfp,"source file instead, */\n");
 206:     fprintf(outfp,"%c* and then run it through Wart to produce a new ",'/');
 207:     fprintf(outfp,"C source file.     */\n\n");
 208:     fprintf(outfp,"%c* Wart Version Info: */\n",'/');
 209:     fprintf(outfp,"char *wartv = \"%s\";\n\n",wartv);
 211:     initial(infp,outfp);        /* read state names, initial defs */
 212:     prolog(outfp);          /* write out our initial code */
 213:     x = rdrules(infp,outfp);        /* read rules */
 214:     epilogue(outfp);            /* write out epilogue code */
 215:     return(x);
 216: }
 219: /*
 220:  * initial - read initial definitions and state names.  Returns
 221:  * on EOF or %%.
 222:  *
 223:  */
 224: VOID
 225: initial(infp,outfp) FILE *infp, *outfp; {
 226:     int c;
 227:     char wordbuf[MAXWORD];
 228:     while ((c = getc(infp)) != EOF) {
 229:     if (c == '%') {
 230:         rdword(infp,wordbuf);
 231:         if (strcmp(wordbuf,"states") == 0)
 232:           rdstates(infp,outfp);
 233:         else if (strcmp(wordbuf,"%") == 0) return;
 234:         else fprintf(outfp,"%%%s",wordbuf);
 235:     }
 236:     else putc(c,outfp);
 237:     if (c == '\n') lines++;
 238:     }
 239: }
 241: /*
 242:  * boolean function to tell if the given character can be part of
 243:  * a word.
 244:  *
 245:  */
 246: int
 247: isin(s,c) char *s; int c; {
 248:     for (; *s != '\0'; s++)
 249:       if (*s == (char) c) return(1);
 250:     return(0);
 251: }
 252: int
 253: isword(c) int c; {
 254:     static char special[] = ".%_-$@";   /* these are allowable */
 255:     return(isalnum(c) || isin(special,c));
 256: }
 258: /*
 259:  * read the next word into the given buffer.
 260:  *
 261:  */
 262: VOID
 263: rdword(fp,buf) FILE *fp; char *buf; {
 264:     int len = 0,c;
 265:     while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = (char) c;
 266:     *buf++ = '\0';          /* tie off word */
 267:     ungetc(c,fp);           /* put break char back */
 268: }
 270: /*
 271:  * read state names, up to a newline.
 272:  *
 273:  */
 274: VOID
 275: rdstates(fp,ofp) FILE *fp,*ofp; {
 276:     int c;
 277:     char wordbuf[MAXWORD];
 278:     while ((c = getc(fp)) != EOF && c != '\n')   {
 279:     if (isspace(c) || c == C_L) continue;   /* skip whitespace */
 280:     ungetc(c,fp);           /* put char back */
 281:     rdword(fp,wordbuf);     /* read the whole word */
 282:     enter(wordbuf,++nstates);   /* put into symbol tbl */
 283:     fprintf(ofp,"#define %s %d\n",wordbuf,nstates);
 284:     }
 285:     lines++;
 286: }
 288: /*
 289:  * allocate a new, empty transition node
 290:  *
 291:  */
 292: trans
 293: newtrans() {
 294:     trans new;
 295:     int i;
 296:     new = (trans) malloc(sizeof (struct transx));
 297:     for (i=0; i<SBYTES; i++) new->states[i] = 0;
 298:     new->anyst = 0;
 299:     new->nxt = NULL;
 300:     return(new);
 301: }
 304: /*
 305:  * read all the rules.
 306:  *
 307:  */
 309: trans
 310: rdrules(fp,out) FILE *fp,*out; {
 311:     trans head,cur,prev;
 312:     int curtok;
 313:     head = cur = prev = NULL;
 314:     while ((curtok = gettoken(fp)) != SEP)
 316:       switch(curtok) {
 317:     case LBRACK:
 318:       if (cur == NULL)
 319:         cur = newtrans();
 320:       else
 321:         fatal("duplicate state list");
 322:       statelist(fp,cur);        /* set states */
 323:       continue;         /* prepare to read char */
 325:     case WORD:
 326:       if ((int)strlen(tokval) != 1)
 327:         fatal("multiple chars in state");
 328:       if (cur == NULL) {
 329:           cur = newtrans();
 330:           cur->anyst = 1;
 331:       }
 332:       cur->actno = ++nacts;
 333:       cur->inchr = (char) (tokval[0] - 32);
 334:       if (head == NULL)
 335:         head = cur;
 336:       else
 337:         prev->nxt = cur;
 338:       prev = cur;
 339:       cur = NULL;
 340:       copyact(fp,out,nacts);
 341:       break;
 342:     default: fatal("bad input format");
 343:       }
 344:     return(head);
 345: }
 347: /*
 348:  * read a list of (comma-separated) states, set them in the
 349:  * given transition.
 350:  *
 351:  */
 352: VOID
 353: statelist(fp,t) FILE *fp; trans t; {
 354:     int curtok,sval;
 355:     curtok = COMMA;
 356:     while (curtok != RBRACK) {
 357:     if (curtok != COMMA) fatal("missing comma");
 358:     if ((curtok = gettoken(fp)) != WORD) fatal("missing state name");
 359:         if ((sval = lkup(tokval)) == -1) {
 360:         fprintf(stderr,"state %s undefined\n",tokval);
 361:         fatal("undefined state");
 362:     }
 363:         setwstate(sval,t);
 364:     curtok = gettoken(fp);
 365:     }
 366: }
 368: /*
 369:  * copy an action from the input to the output file
 370:  *
 371:  */
 372: VOID
 373: copyact(inp,outp,actno) FILE *inp,*outp; int actno; {
 374:     int c,bcnt;
 375:     fprintf(outp,"case %d:\n",actno);
 376:     while (c = getc(inp), (isspace(c) || c == C_L))
 377:       if (c == '\n') lines++;
 378:     if (c == '{') {
 379:     bcnt = 1;
 380:     fputs("    {",outp);
 381:     while (bcnt > 0 && (c = getc(inp)) != EOF) {
 382:         if (c == '{') bcnt++;
 383:         else if (c == '}') bcnt--;
 384:         else if (c == '\n') lines++;
 385:         putc(c,outp);
 386:     }
 387:     if (bcnt > 0) fatal("action doesn't end");
 388:     } else {
 389:     while (c != '\n' && c != EOF) {
 390:         putc(c,outp);
 391:         c = getc(inp);
 392:     }
 393:     lines++;
 394:     }
 395:     fprintf(outp,"\n    break;\n");
 396: }
 398: /*
 399:  * find the action associated with a given character and state.
 400:  * returns -1 if one can't be found.
 401:  *
 402:  */
 403: int
 404: faction(hd,state,chr) trans hd; int state,chr; {
 405:     while (hd != NULL) {
 406:     if (hd->anyst || teststate(state,hd))
 407:       if (hd->inchr == ('.' - 32) || hd->inchr == (char) chr)
 408:         return(hd->actno);
 409:     hd = hd->nxt;
 410:     }
 411:     return(-1);
 412: }
 414: /*
 415:  * empty the table...
 416:  *
 417:  */
 418: VOID
 419: emptytbl() {
 420:     int i;
 421:     for (i=0; i<nstates*96; i++) tbl[i] = -1;
 422: }
 424: /*
 425:  * add the specified action to the output for the given state and chr.
 426:  *
 427:  */
 428: VOID
 429: addaction(act,state,chr) int act,state,chr; {
 430:     tbl[state*96 + chr] = act;
 431: }
 433: VOID
 434: writetbl(fp) FILE *fp; {
 435:     warray(fp,"tbl",tbl,96*(nstates+1),TBL_TYPE);
 436: }
 439: /*
 440:  * write an array to the output file, given its name and size.
 441:  *
 442:  */
 443: VOID
 444: warray(fp,nam,cont,siz,typ) FILE *fp; char *nam; int cont[],siz; char *typ; {
 445:     int i;
 446:     fprintf(fp,"%s %s[] = {\n",typ,nam);
 447:     for (i = 0; i < siz - 1; ) {
 448:     fprintf(fp,"%2d, ",cont[i]);
 449:     if ((++i % 16) == 0) putc('\n',fp);
 450:     }
 451:     fprintf(fp,"%2d ",cont[siz-1]);
 452:     fprintf(fp,"};\n");
 453: }
 455: VOID
 456: main(argc,argv) int argc; char *argv[]; {
 457:     trans head;
 458:     int state,c;
 459:     FILE *infile,*outfile;
 461:     if (argc > 1) {
 462:     if ((infile = fopen(argv[1],"r")) == NULL) {
 463:         fprintf(stderr,"Can't open %s\n",argv[1]);
 464:         fatal("unreadable input file");
 465:     }
 466:     } else infile = stdin;
 468:     if (argc > 2) {
 469:     if ((outfile = fopen(argv[2],"w")) == NULL) {
 470:         fprintf(stderr,"Can't write to %s\n",argv[2]);
 471:         fatal("bad output file");
 472:     }
 473:     } else outfile = stdout;
 475:     clrhash();              /* empty hash table */
 476:     head = rdinput(infile,outfile); /* read input file */
 477:     emptytbl();             /* empty our tables */
 478:     for (state = 0; state <= nstates; state++)
 479:       for (c = 1; c < 96; c++)      /* find actions, */
 480:     addaction(faction(head,state,c),state,c); /* add to tbl */
 481:     writetbl(outfile);
 482:     copyrest(infile,outfile);
 483:     printf("%d states, %d actions\n",nstates,nacts);
 484:     exit(GOOD_EXIT);
 485: }
 488: /*
 489:  * fatal error handler
 490:  *
 491:  */
 493: VOID
 494: fatal(msg) char *msg; {
 495:     fprintf(stderr,"error in line %d: %s\n",lines,msg);
 496:     exit(BAD_EXIT);
 497: }
 499: VOID
 500: prolog(outfp) FILE *outfp; {
 501:     int c;
 502:     while ((c = *txt1++)     != '\0') putc(c,outfp);
 503:     while ((c = *fname++)    != '\0') putc(c,outfp);
 504:     while ((c = *txt2++)     != '\0') putc(c,outfp);
 505:     while ((c = *tbl_type++) != '\0') putc(c,outfp);
 506:     while ((c = *txt2a++)    != '\0') putc(c,outfp);
 507:     while ((c = *txt2b++)    != '\0') putc(c,outfp);
 508: }
 510: VOID
 511: epilogue(outfp) FILE *outfp; {
 512:     int c;
 513:     while ((c = *txt3++) != '\0') putc(c,outfp);
 514: }
 516: VOID
 517: copyrest(in,out) FILE *in,*out; {
 518:     int c;
 519:     while ((c = getc(in)) != EOF) putc(c,out);
 520: }
 522: /*
 523:  * gettoken - returns token type of next token, sets tokval
 524:  * to the string value of the token if appropriate.
 525:  *
 526:  */
 528: int
 529: gettoken(fp) FILE *fp; {
 530:     int c;
 531:     while (1) {             /* loop if reading comments... */
 532:     do {
 533:         c = getc(fp);
 534:         if (c == '\n') lines++;
 535:     } while ((isspace(c) || c == C_L)); /* skip whitespace */
 536:     switch(c) {
 537:       case EOF:
 538:         return(SEP);
 539:       case '%':
 540:         if ((c = getc(fp)) == '%') return(SEP);
 541:         tokval[0] = '%';
 542:         tokval[1] = (char) c;
 543:         rdword(fp,tokval+2);
 544:         return(WORD);
 545:       case '<':
 546:         return(LBRACK);
 547:       case '>':
 548:         return(RBRACK);
 549:       case ',':
 550:         return(COMMA);
 551:       case '/':
 552:         if ((c = getc(fp)) == '*') {
 553:         rdcmnt(fp);     /* skip over the comment */
 554:         continue;
 555:         } else {            /* and keep looping */
 556:         ungetc(c,fp);       /* put this back into input */
 557:         c = '/';        /* put character back, fall thru */
 558:         }
 560:       default:
 561:         if (isword(c)) {
 562:         ungetc(c,fp);
 563:         rdword(fp,tokval);
 564:         return(WORD);
 565:         } else fatal("Invalid character in input");
 566:     }
 567:     }
 568: }
 570: /*
 571:  * skip over a comment
 572:  *
 573:  */
 575: VOID
 576: rdcmnt(fp) FILE *fp; {
 577:     int c,star,prcnt;
 578:     prcnt = star = 0;           /* no star seen yet */
 579:     while (!((c = getc(fp)) == '/' && star)) {
 580:     if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment");
 581:     prcnt = (c == '%');
 582:     star = (c == '*');
 583:     if (c == '\n') lines++;
 584:     }
 585: }
 587: /*
 588:  * symbol table management for wart
 589:  *
 590:  * entry points:
 591:  *   clrhash - empty hash table.
 592:  *   enter - enter a name into the symbol table
 593:  *   lkup - find a name's value in the symbol table.
 594:  *
 595:  */
 597: #define HASHSIZE 101            /* # of entries in hash table */
 599: struct sym {
 600:     char *name;             /* symbol name */
 601:     int val;                /* value */
 602:     struct sym *hnxt;           /* next on collision chain */
 603: } *htab[HASHSIZE];          /* the hash table */
 605: /*
 606:  * empty the hash table before using it...
 607:  *
 608:  */
 609: VOID
 610: clrhash() {
 611:     int i;
 612:     for (i=0; i<HASHSIZE; i++) htab[i] = NULL;
 613: }
 615: /*
 616:  * compute the value of the hash for a symbol
 617:  *
 618:  */
 619: int
 620: hash(name) char *name; {
 621:     int sum;
 622:     for (sum = 0; *name != '\0'; name++) sum += (sum + *name);
 623:     sum %= HASHSIZE;            /* take sum mod hashsize */
 624:     if (sum < 0) sum += HASHSIZE;   /* disallow negative hash value */
 625:     return(sum);
 626: }
 628: /*
 629:  * make a private copy of a string...
 630:  *
 631:  */
 632: static char*
 633: copy(s) char *s; {
 634:     char *new;
 635:     new = (char *) malloc((int)strlen(s) + 1);
 636:     strcpy(new,s);
 637:     return(new);
 638: }
 640: /*
 641:  * enter state name into the hash table
 642:  *
 643:  */
 644: VOID
 645: enter(name,svalue) char *name; int svalue; {
 646:     int h;
 647:     struct sym *cur;
 648:     if (lkup(name) != -1) {
 649:     fprintf(stderr,"state \"%s\" appears twice...\n", name);
 650:     exit(BAD_EXIT);
 651:     }
 652:     h = hash(name);
 653:     cur = (struct sym *)malloc(sizeof (struct sym));
 654:     cur->name = copy(name);
 655:     cur->val = svalue;
 656:     cur->hnxt = htab[h];
 657:     htab[h] = cur;
 658: }
 660: /*
 661:  * find name in the symbol table, return its value.  Returns -1
 662:  * if not found.
 663:  *
 664:  */
 665: int
 666: lkup(name) char *name; {
 667:     struct sym *cur;
 668:     for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt)
 669:       if (strcmp(cur->name,name) == 0) return(cur->val);
 670:     return(-1);
 671: }

