1: /* 2: * RCS file input 3: */ 4: static char rcsid[]= 5: "$Header: /usr/wft/RCS/SRC/RCS/rcslex.c,v 3.3 82/12/10 16:22:37 wft Exp $ Purdue CS"; 6: /********************************************************************************* 7: * Lexical Analysis. 8: * Character mapping table, 9: * hashtable, Lexinit, nextlex, getlex, getkey, 10: * getid, getnum, readstring, printstring, savestring, 11: * checkid, serror, fatserror, error, faterror, warn, diagnose 12: * fflsbuf, puts, fprintf 13: * Testprogram: define LEXDB 14: ********************************************************************************* 15: * 16: * Copyright (C) 1982 by Walter F. Tichy 17: * Purdue University 18: * Computer Science Department 19: * West Lafayette, IN 47907 20: * 21: * All rights reserved. No part of this software may be sold or distributed 22: * in any form or by any means without the prior written permission of the 23: * author. 24: * Report problems and direct all inquiries to Tichy@purdue (ARPA net). 25: */ 26: 27: /* $Log: rcslex.c,v $ 28: * Revision 3.3 82/12/10 16:22:37 wft 29: * Improved error messages, changed exit status on error to 1. 30: * 31: * Revision 3.2 82/11/28 21:27:10 wft 32: * Renamed ctab to map and included EOFILE; ctab is now a macro in rcsbase.h. 33: * Added fflsbuf(), fputs(), and fprintf(), which abort the RCS operations 34: * properly in case there is an IO-error (e.g., file system full). 35: * 36: * Revision 3.1 82/10/11 19:43:56 wft 37: * removed unused label out:; 38: * made sure all calls to getc() return into an integer, not a char. 39: */ 40: 41: 42: /* 43: #define LEXDB 44: /* version LEXDB is for testing the lexical analyzer. The testprogram 45: * reads a stream of lexemes, enters the revision numbers into the 46: * hashtable, and prints the recognized tokens. Keywords are recognized 47: * as identifiers. 48: */ 49: 50: 51: 52: #include "rcsbase.h" 53: 54: 55: 56: /* character mapping table */ 57: enum tokens map[] = { 58: EOFILE, /* this will end up at ctab[-1] */ 59: UNKN, INSERT, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, 60: UNKN, SPACE, NEWLN, UNKN, SPACE, UNKN, UNKN, UNKN, 61: UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, 62: UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, 63: SPACE, EXCLA, DQUOTE, HASH, DOLLAR, PERCNT, AMPER, SQUOTE, 64: LPARN, RPARN, TIMES, PLUS, COMMA, MINUS, PERIOD, DIVIDE, 65: DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, 66: DIGIT, DIGIT, COLON, SEMI, LESS, EQUAL, GREAT, QUEST, 67: AT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 68: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 69: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 70: LETTER, LETTER, LETTER, LBRACK, BACKSL, RBRACK, UPARR, UNDER, 71: ACCENT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 72: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 73: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 74: LETTER, LETTER, LETTER, LBRACE, BAR, RBRACE, TILDE, UNKN 75: }; 76: 77: 78: 79: 80: struct hshentry * nexthsh; /*pointer to next hashtable-entry, set by lookup*/ 81: 82: enum tokens nexttok; /*next token, set by nextlex */ 83: 84: int hshenter /*if true, next suitable lexeme will be entered */ 85: = true; /*into the symbol table. Handle with care. */ 86: int nextc; /*next input character, initialized by Lexinit */ 87: 88: int eof /*end-of-file indicator, set to >0 on end of file*/ 89: = 0; 90: int line /*current line-number of input */ 91: = 1; 92: int nerror /*counter for errors */ 93: = 0; 94: int nwarn /*counter for warnings */ 95: = 0; 96: char * cmdid /*command identification for error messages */ 97: = nil; 98: int quietflag /*indicates quiet mode */ 99: = false; 100: FILE * finptr; /*input file descriptor */ 101: 102: FILE * frewrite; /*file descriptor for echoing input */ 103: 104: int rewriteflag;/*indicates whether to echo to frewrite */ 105: 106: char StringTab[strtsize]; /* string table and heap */ 107: 108: char * NextString /*pointer to next identifier in StringTab*/ 109: = nil; 110: char * Topchar /*pointer to next free byte in StringTab*/ 111: = &StringTab[0]; /*set by nextlex, lookup */ 112: struct hshentry hshtab[hshsize]; /*hashtable */ 113: 114: 115: 116: 117: 118: lookup() { 119: 120: /* Function: Looks up the character string pointed to by NextString in the 121: * hashtable. If the string is not present, a new entry for it is created. 122: * If the string is present, TopChar is moved back to save the space for 123: * the string, and NextString is set to point to the original string. 124: * In any case, the address of the corresponding hashtable entry is placed 125: * into nexthsh. 126: * Algorithm: Quadratic hash, covering all entries. 127: * Assumptions: NextString points at the first character of the string. 128: * Topchar points at the first empty byte after the string. 129: */ 130: 131: register int ihash; /* index into hashtable */ 132: register char * sp, * np; 133: int c, delta, final, FirstScan; /*loop control*/ 134: 135: /* calculate hash code */ 136: sp = NextString; 137: ihash = 0; 138: while (*sp) ihash += *sp++; 139: 140: /* set up first search loop (c=0,step=1,until (hshsiz-1)/2 */ 141: c=0;delta=1;final=(hshsize-1)/2; 142: FirstScan=true; /*first loop */ 143: 144: for (;;) { 145: ihash = (ihash+c)%hshsize; /*next index*/ 146: 147: if (hshtab[ihash].num == nil) { 148: /*empty slot found*/ 149: hshtab[ihash].num = NextString; 150: nexthsh= &hshtab[ihash];/*save hashtable address*/ 151: # ifdef LEXDB 152: printf("\nEntered: %s at %d ",nexthsh->num, ihash); 153: # endif 154: return; 155: } 156: /* compare strings */ 157: sp=NextString;np=hshtab[ihash].num; 158: while (*sp == *np++) { 159: if (*sp == 0) { 160: /* match found */ 161: nexthsh= &hshtab[ihash]; 162: Topchar = NextString; 163: NextString = nexthsh->num; 164: return; 165: } else sp++; 166: } 167: 168: /* neither empty slot nor string found */ 169: /* calculate next index and repeat */ 170: if (c != final) 171: c += delta; 172: else { 173: if (FirstScan) { 174: /*set up second sweep*/ 175: delta = -1; final = 1; FirstScan= false; 176: } else { 177: fatserror("Hashtable overflow"); 178: } 179: } 180: } 181: }; 182: 183: 184: 185: 186: 187: 188: Lexinit() 189: /* Function: Initialization of lexical analyzer: 190: * initializes the hastable, 191: * initializes nextc, nexttok if finptr != NULL 192: */ 193: { register int i; 194: 195: for (i=hshsize-1; i>=0; i--) { 196: hshtab[i].num = nil; 197: } 198: 199: hshenter=true; eof=0; line=1; nerror=0; nwarn=0; 200: NextString=nil; Topchar = &StringTab[0]; 201: if (finptr) { 202: nextc = GETC(finptr,frewrite,rewriteflag); /*initial character*/ 203: nextlex(); /*initial token*/ 204: } else { 205: nextc = '\0'; 206: nexttok=EOFILE; 207: } 208: } 209: 210: 211: 212: 213: 214: 215: 216: nextlex() 217: 218: /* Function: Reads the next token and sets nexttok to the next token code. 219: * Only if the hshenter==true, a revision number is entered into the 220: * hashtable and a pointer to it is placed into nexthsh. 221: * This is useful for avoiding that dates are placed into the hashtable. 222: * For ID's and NUM's, NextString is set to the character string in the 223: * string table. Assumption: nextc contains the next character. 224: */ 225: { register c; 226: register char * sp; 227: register enum tokens d; 228: 229: if (eof) { 230: nexttok=EOFILE; 231: return; 232: } 233: loop: 234: switch(nexttok=ctab[nextc]) { 235: 236: case UNKN: 237: case IDCHAR: 238: case PERIOD: 239: serror("unknown Character: %c",nextc); 240: nextc=GETC(finptr,frewrite,rewriteflag); 241: goto loop; 242: 243: case NEWLN: 244: line++; 245: # ifdef LEXDB 246: putchar('\n'); 247: # endif 248: /* Note: falls into next case */ 249: 250: case SPACE: 251: nextc=GETC(finptr,frewrite,rewriteflag); 252: goto loop; 253: 254: case EOFILE: 255: eof++; 256: nexttok=EOFILE; 257: return; 258: 259: case DIGIT: 260: NextString = sp = Topchar; 261: *sp++ = nextc; 262: while ((d=ctab[c=GETC(finptr,frewrite,rewriteflag)])==DIGIT || 263: d==PERIOD) { 264: *sp++ = c; /* 1.2. and 1.2 are different */ 265: } 266: *sp++ = '\0'; 267: if (sp >= StringTab+strtsize) { 268: /*may have written outside stringtable already*/ 269: fatserror("Stringtable overflow"); 270: } 271: Topchar = sp; 272: nextc = c; 273: if (hshenter == true) 274: lookup(); /* lookup updates NextString, Topchar*/ 275: nexttok = NUM; 276: return; 277: 278: 279: case LETTER: 280: NextString = sp = Topchar; 281: *sp++ = nextc; 282: while ((d=ctab[c=GETC(finptr,frewrite,rewriteflag)])==LETTER || 283: d==DIGIT || d==IDCHAR) { 284: *sp++ = c; 285: } 286: *sp++ = '\0'; 287: if (sp >= StringTab+strtsize) { 288: /*may have written outside stringtable already*/ 289: fatserror("Stringtable overflow"); 290: } 291: Topchar = sp; 292: nextc = c; 293: nexttok = ID; /* may be ID or keyword */ 294: return; 295: 296: case SBEGIN: /* long string */ 297: nexttok = STRING; 298: /* note: only the initial SBEGIN has been read*/ 299: /* read the string, and reset nextc afterwards*/ 300: return; 301: 302: default: 303: nextc=GETC(finptr,frewrite,rewriteflag); 304: return; 305: } 306: } 307: 308: 309: int getlex(token) 310: enum tokens token; 311: /* Function: Checks if nexttok is the same as token. If so, 312: * advances the input by calling nextlex and returns true. 313: * otherwise returns false. 314: * Doesn't work for strings and keywords; loses the character string for ids. 315: */ 316: { 317: if (nexttok==token) { 318: nextlex(); 319: return(true); 320: } else return(false); 321: } 322: 323: int getkey (key) 324: char * key; 325: /* Function: If the current token is a keyword identical to key, 326: * getkey advances the input by calling nextlex and returns true; 327: * otherwise returns false. 328: */ 329: { 330: register char *s1,*s2; 331: 332: if (nexttok==ID) { 333: s1=key; s2=NextString; 334: while(*s1 == *s2++) 335: if (*s1++ == '\0') { 336: /* match found */ 337: Topchar = NextString; /*reset Topchar */ 338: nextlex(); 339: return(true); 340: } 341: } 342: return(false); 343: } 344: 345: 346: 347: char * getid() 348: /* Function: Checks if nexttok is an identifier. If so, 349: * advances the input by calling nextlex and returns a pointer 350: * to the identifier; otherwise returns nil. 351: * Treats keywords as identifiers. 352: */ 353: { 354: register char * name; 355: if (nexttok==ID) { 356: name = NextString; 357: nextlex(); 358: return name; 359: } else return nil; 360: } 361: 362: 363: struct hshentry * getnum() 364: /* Function: Checks if nexttok is a number. If so, 365: * advances the input by calling nextlex and returns a pointer 366: * to the hashtable entry. Otherwise returns nil. 367: * Doesn't work if hshenter is false. 368: */ 369: { 370: register struct hshentry * num; 371: if (nexttok==NUM) { 372: num=nexthsh; 373: nextlex(); 374: return num; 375: } else return nil; 376: } 377: 378: 379: readstring() 380: /* skip over characters until terminating single SDELIM */ 381: /* if rewriteflag==true, copy every character read to frewrite.*/ 382: /* Does not advance nextlex at the end. */ 383: { register c; 384: if (rewriteflag) { 385: /* copy string verbatim to frewrite */ 386: while ((c=putc(getc(finptr),frewrite)) != EOF) { 387: if (c==SDELIM) { 388: if ((c=putc(getc(finptr),frewrite)) != SDELIM) { 389: /* end of string */ 390: nextc=c; 391: return; 392: } 393: } 394: } 395: } else { 396: /* skip string */ 397: while ((c=getc(finptr)) != EOF) { 398: if (c==SDELIM) { 399: if ((c=getc(finptr)) != SDELIM) { 400: /* end of string */ 401: nextc=c; 402: return; 403: } 404: } 405: } 406: } 407: nextc = c; 408: error("Unterminated string"); 409: } 410: 411: 412: printstring() 413: /* Function: copy a string to stdout, until terminated with a single SDELIM. 414: * Does not advance nextlex at the end. 415: */ 416: { 417: register c; 418: while ((c=getc(finptr)) != EOF) { 419: if (c==SDELIM) { 420: if ((c=getc(finptr)) != SDELIM) { 421: /* end of string */ 422: nextc=c; 423: return; 424: } 425: } 426: putchar(c); 427: } 428: nextc = c; 429: error("Unterminated string"); 430: } 431: 432: 433: 434: int savestring(target,length) 435: char * target; int length; 436: /* copies a string terminated with SDELIM from file finptr to buffer target, 437: * but not more than length bytes. If the string is longer than length, 438: * the extra characters are skipped. The string may be empty, in which 439: * case a '\0' is placed into target. 440: * Double SDELIM is replaced with SDELIM. 441: * If rewriteflag==true, the string is also copied unchanged to frewrite. 442: * Returns the length of the saved string. 443: * Does not advance nextlex at the end. 444: */ 445: { 446: register char * tp, * max; 447: register c; 448: 449: tp=target; max= target+length; /*max is one too large*/ 450: while ((c=GETC(finptr,frewrite,rewriteflag))!=EOF) { 451: *tp++ =c; 452: if (c== SDELIM) { 453: if ((c=GETC(finptr,frewrite,rewriteflag))!=SDELIM) { 454: /* end of string */ 455: *(tp-1)='\0'; 456: nextc=c; 457: return tp-target; 458: } 459: } 460: if (tp >= max) { 461: /* overflow */ 462: error("string buffer overflow -- truncating string"); 463: target[length-1]='\0'; 464: /* skip rest of string */ 465: while ((c=GETC(finptr,frewrite,rewriteflag))!=EOF) { 466: if ((c==SDELIM) && ((c=GETC(finptr,frewrite,rewriteflag))!=SDELIM)) { 467: /* end of string */ 468: nextc=c; 469: return length; 470: } 471: } 472: nextc = c; 473: error("Can't find %c to terminate string before end of file",SDELIM); 474: return length; 475: } 476: } 477: nextc = c; 478: error("Can't find %c to terminate string before end of file",SDELIM); 479: return length; 480: } 481: 482: 483: char *checkid(id, delim) 484: char *id, delim; 485: /* Function: check whether the string starting at id is an */ 486: /* identifier and return a pointer to the last char*/ 487: /* of the identifer. White space, delim and '\0' */ 488: /* are legal delimeters. Aborts the program if not */ 489: /* a legal identifier. Useful for checking commands*/ 490: { 491: register enum tokens d; 492: register char *temp; 493: register char c,tc; 494: 495: temp = id; 496: if ( ctab[*id] == LETTER ) { 497: while( (d=ctab[c=(*++id)]) == LETTER || d==DIGIT || d==IDCHAR) ; 498: if ( c!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) { 499: /* append \0 to end of id before error message */ 500: tc = c; 501: while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ; 502: *id = '\0'; 503: faterror("Invalid character %c in identifier %s",tc,temp); 504: return nil ; 505: } else 506: return id; 507: } else { 508: /* append \0 to end of id before error message */ 509: while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ; 510: *id = '\0'; 511: faterror("Identifier %s does not start with letter",temp); 512: return nil; 513: } 514: } 515: 516: 517: serror(e,e1,e2,e3,e4,e5) 518: char * e, * e1; 519: /* non-fatal syntax error */ 520: { nerror++; 521: fprintf(stderr,"%s error, line %d: ", cmdid, line); 522: fprintf(stderr,e, e1, e2, e3, e4, e5); 523: putc('\n',stderr); 524: } 525: 526: error(e,e1,e2,e3,e4,e5) 527: char * e, * e1; 528: /* non-fatal error */ 529: { nerror++; 530: fprintf(stderr,"%s error: ",cmdid); 531: fprintf(stderr,e, e1, e2, e3, e4, e5); 532: putc('\n',stderr); 533: } 534: 535: fatserror(e,e1,e2,e3,e4,e5) 536: char * e, * e1; 537: /* fatal syntax error */ 538: { nerror++; 539: fprintf(stderr,"%s error, line %d: ", cmdid,line); 540: fprintf(stderr,e, e1, e2, e3, e4, e5); 541: fprintf(stderr,"\n%s aborted\n",cmdid); 542: cleanup(); 543: exit(1); 544: } 545: 546: faterror(e,e1,e2,e3,e4,e5) 547: char * e, * e1; 548: /* fatal error, terminates program after cleanup */ 549: { nerror++; 550: fprintf(stderr,"%s error: ",cmdid); 551: fprintf(stderr,e, e1, e2, e3, e4, e5); 552: fprintf(stderr,"\n%s aborted\n",cmdid); 553: cleanup(); 554: exit(1); 555: } 556: 557: warn(e,e1,e2,e3,e4,e5) 558: char * e, * e1; 559: /* prints a warning message */ 560: { nwarn++; 561: fprintf(stderr,"%s warning: ",cmdid); 562: fprintf(stderr,e, e1, e2, e3, e4, e5); 563: putc('\n',stderr); 564: } 565: 566: 567: diagnose(e,e1,e2,e3,e4,e5) 568: char * e, * e1; 569: /* prints a diagnostic message */ 570: { 571: if (!quietflag) { 572: fprintf(stderr,e, e1, e2, e3, e4, e5); 573: putc('\n',stderr); 574: } 575: } 576: 577: 578: 579: fflsbuf(c, iop) 580: int c; register FILE * iop; 581: /* Function: Flush iop. 582: * Same routine as _flsbuf in stdio, but aborts program on error. 583: */ 584: { register result; 585: if ((result=_flsbuf(c,iop))==EOF) 586: faterror("write error"); 587: return result; 588: } 589: 590: 591: fputs(s, iop) 592: register char *s; 593: register FILE *iop; 594: /* Function: Put string s on file iop, abort on error. 595: * Same as puts in stdio, but with different putc macro. 596: */ 597: { 598: register r; 599: register c; 600: 601: while (c = *s++) 602: r = putc(c, iop); 603: return(r); 604: } 605: 606: 607: 608: fprintf(iop, fmt, args) 609: FILE *iop; 610: char *fmt; 611: /* Function: formatted output. Same as fprintf in stdio, 612: * but aborts program on error 613: */ 614: { 615: _doprnt(fmt, &args, iop); 616: if (ferror(iop)) { 617: faterror("write error"); 618: return EOF; 619: } else return 0; 620: } 621: 622: 623: 624: #ifdef LEXDB 625: /* test program reading a stream of lexems and printing the tokens. 626: */ 627: 628: 629: 630: main(argc,argv) 631: int argc; char * argv[]; 632: { 633: cmdid="lextest"; 634: if (argc<2) { 635: fputs("No input file\n",stderr); 636: exit(1); 637: } 638: if ((finptr=fopen(argv[1], "r")) == NULL) { 639: faterror("Can't open input file %s\n",argv[1]); 640: } 641: Lexinit(); 642: rewriteflag=false; 643: while (nexttok != EOFILE) { 644: switch (nexttok) { 645: 646: case ID: 647: printf("ID: %s",NextString); 648: break; 649: 650: case NUM: 651: if (hshenter==true) 652: printf("NUM: %s, index: %d",nexthsh->num, nexthsh-hshtab); 653: else 654: printf("NUM, unentered: %s",NextString); 655: hshenter = !hshenter; /*alternate between dates and numbers*/ 656: break; 657: 658: case COLON: 659: printf("COLON"); break; 660: 661: case SEMI: 662: printf("SEMI"); break; 663: 664: case STRING: 665: readstring(); 666: printf("STRING"); break; 667: 668: case UNKN: 669: printf("UNKN"); break; 670: 671: default: 672: printf("DEFAULT"); break; 673: } 674: printf(" | "); 675: nextlex(); 676: } 677: printf("\nEnd of lexical analyzer test\n"); 678: } 679: 680: cleanup() 681: /* dummy */ 682: {} 683: 684: 685: #endif