1: /* 2: * egrep -- print lines containing (or not containing) a regular expression 3: * 4: * status returns: 5: * 0 - ok, and some matches 6: * 1 - ok, but no matches 7: * 2 - some error 8: */ 9: %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST 10: %left OR 11: %left CHAR DOT CCL NCCL '(' 12: %left CAT 13: %left STAR PLUS QUEST 14: 15: %{ 16: #include <stdio.h> 17: 18: #define MAXLIN 350 19: #define MAXPOS 4000 20: #define NCHARS 128 21: #define NSTATES 128 22: #define FINAL -1 23: char gotofn[NSTATES][NCHARS]; 24: int state[NSTATES]; 25: char out[NSTATES]; 26: int line 1; 27: int name[MAXLIN]; 28: int left[MAXLIN]; 29: int right[MAXLIN]; 30: int parent[MAXLIN]; 31: int foll[MAXLIN]; 32: int positions[MAXPOS]; 33: char chars[MAXLIN]; 34: int nxtpos; 35: int nxtchar 0; 36: int tmpstat[MAXLIN]; 37: int initstat[MAXLIN]; 38: int xstate; 39: int count; 40: int icount; 41: char *input; 42: 43: long lnum; 44: int bflag; 45: int cflag; 46: int fflag; 47: int lflag; 48: int nflag; 49: int hflag = 1; 50: int sflag; 51: int vflag; 52: int nfile; 53: long blkno; 54: long tln; 55: int nsucc; 56: 57: int f; 58: int fname; 59: %} 60: 61: %% 62: s: t 63: ={ unary(FINAL, $1); 64: line--; 65: } 66: ; 67: t: b r 68: ={ $$ = node(CAT, $1, $2); } 69: | OR b r OR 70: ={ $$ = node(CAT, $2, $3); } 71: | OR b r 72: ={ $$ = node(CAT, $2, $3); } 73: | b r OR 74: ={ $$ = node(CAT, $1, $2); } 75: ; 76: b: 77: ={ $$ = enter(DOT); 78: $$ = unary(STAR, $$); } 79: ; 80: r: CHAR 81: ={ $$ = enter($1); } 82: | DOT 83: ={ $$ = enter(DOT); } 84: | CCL 85: ={ $$ = cclenter(CCL); } 86: | NCCL 87: ={ $$ = cclenter(NCCL); } 88: ; 89: 90: r: r OR r 91: ={ $$ = node(OR, $1, $3); } 92: | r r %prec CAT 93: ={ $$ = node(CAT, $1, $2); } 94: | r STAR 95: ={ $$ = unary(STAR, $1); } 96: | r PLUS 97: ={ $$ = unary(PLUS, $1); } 98: | r QUEST 99: ={ $$ = unary(QUEST, $1); } 100: | '(' r ')' 101: ={ $$ = $2; } 102: | error 103: ; 104: 105: %% 106: yyerror(s) { 107: fprintf(stderr, "egrep: %s\n", s); 108: exit(2); 109: } 110: 111: yylex() { 112: extern int yylval; 113: int cclcnt, x; 114: register char c, d; 115: switch(c = nextch()) { 116: case '$': 117: case '^': c = '\n'; 118: goto defchar; 119: case '|': return (OR); 120: case '*': return (STAR); 121: case '+': return (PLUS); 122: case '?': return (QUEST); 123: case '(': return (c); 124: case ')': return (c); 125: case '.': return (DOT); 126: case '\0': return (0); 127: case '\n': return (OR); 128: case '[': 129: x = CCL; 130: cclcnt = 0; 131: count = nxtchar++; 132: if ((c = nextch()) == '^') { 133: x = NCCL; 134: c = nextch(); 135: } 136: do { 137: if (c == '\0') synerror(); 138: if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { 139: if ((d = nextch()) != 0) { 140: c = chars[nxtchar-1]; 141: while (c < d) { 142: if (nxtchar >= MAXLIN) overflo(); 143: chars[nxtchar++] = ++c; 144: cclcnt++; 145: } 146: continue; 147: } 148: } 149: if (nxtchar >= MAXLIN) overflo(); 150: chars[nxtchar++] = c; 151: cclcnt++; 152: } while ((c = nextch()) != ']'); 153: chars[count] = cclcnt; 154: return (x); 155: case '\\': 156: if ((c = nextch()) == '\0') synerror(); 157: defchar: 158: default: yylval = c; return (CHAR); 159: } 160: } 161: nextch() { 162: register char c; 163: if (fflag) { 164: if ((c = getc(stdin)) == EOF) return(0); 165: } 166: else c = *input++; 167: return(c); 168: } 169: 170: synerror() { 171: fprintf(stderr, "egrep: syntax error\n"); 172: exit(2); 173: } 174: 175: enter(x) int x; { 176: if(line >= MAXLIN) overflo(); 177: name[line] = x; 178: left[line] = 0; 179: right[line] = 0; 180: return(line++); 181: } 182: 183: cclenter(x) int x; { 184: register linno; 185: linno = enter(x); 186: right[linno] = count; 187: return (linno); 188: } 189: 190: node(x, l, r) { 191: if(line >= MAXLIN) overflo(); 192: name[line] = x; 193: left[line] = l; 194: right[line] = r; 195: parent[l] = line; 196: parent[r] = line; 197: return(line++); 198: } 199: 200: unary(x, d) { 201: if(line >= MAXLIN) overflo(); 202: name[line] = x; 203: left[line] = d; 204: right[line] = 0; 205: parent[d] = line; 206: return(line++); 207: } 208: overflo() { 209: fprintf(stderr, "egrep: regular expression too long\n"); 210: exit(2); 211: } 212: 213: cfoll(v) { 214: register i; 215: if (left[v] == 0) { 216: count = 0; 217: for (i=1; i<=line; i++) tmpstat[i] = 0; 218: follow(v); 219: add(foll, v); 220: } 221: else if (right[v] == 0) cfoll(left[v]); 222: else { 223: cfoll(left[v]); 224: cfoll(right[v]); 225: } 226: } 227: cgotofn() { 228: register c, i, k; 229: int n, s; 230: char symbol[NCHARS]; 231: int j, nc, pc, pos; 232: int curpos, num; 233: int number, newpos; 234: count = 0; 235: for (n=3; n<=line; n++) tmpstat[n] = 0; 236: if (cstate(line-1)==0) { 237: tmpstat[line] = 1; 238: count++; 239: out[0] = 1; 240: } 241: for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; 242: count--; /*leave out position 1 */ 243: icount = count; 244: tmpstat[1] = 0; 245: add(state, 0); 246: n = 0; 247: for (s=0; s<=n; s++) { 248: if (out[s] == 1) continue; 249: for (i=0; i<NCHARS; i++) symbol[i] = 0; 250: num = positions[state[s]]; 251: count = icount; 252: for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; 253: pos = state[s] + 1; 254: for (i=0; i<num; i++) { 255: curpos = positions[pos]; 256: if ((c = name[curpos]) >= 0) { 257: if (c < NCHARS) symbol[c] = 1; 258: else if (c == DOT) { 259: for (k=0; k<NCHARS; k++) 260: if (k!='\n') symbol[k] = 1; 261: } 262: else if (c == CCL) { 263: nc = chars[right[curpos]]; 264: pc = right[curpos] + 1; 265: for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; 266: } 267: else if (c == NCCL) { 268: nc = chars[right[curpos]]; 269: for (j = 0; j < NCHARS; j++) { 270: pc = right[curpos] + 1; 271: for (k = 0; k < nc; k++) 272: if (j==chars[pc++]) goto cont; 273: if (j!='\n') symbol[j] = 1; 274: cont:; 275: } 276: } 277: else printf("something's funny\n"); 278: } 279: pos++; 280: } 281: for (c=0; c<NCHARS; c++) { 282: if (symbol[c] == 1) { /* nextstate(s,c) */ 283: count = icount; 284: for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; 285: pos = state[s] + 1; 286: for (i=0; i<num; i++) { 287: curpos = positions[pos]; 288: if ((k = name[curpos]) >= 0) 289: if ( 290: (k == c) 291: | (k == DOT) 292: | (k == CCL && member(c, right[curpos], 1)) 293: | (k == NCCL && member(c, right[curpos], 0)) 294: ) { 295: number = positions[foll[curpos]]; 296: newpos = foll[curpos] + 1; 297: for (k=0; k<number; k++) { 298: if (tmpstat[positions[newpos]] != 1) { 299: tmpstat[positions[newpos]] = 1; 300: count++; 301: } 302: newpos++; 303: } 304: } 305: pos++; 306: } /* end nextstate */ 307: if (notin(n)) { 308: if (n >= NSTATES) overflo(); 309: add(state, ++n); 310: if (tmpstat[line] == 1) out[n] = 1; 311: gotofn[s][c] = n; 312: } 313: else { 314: gotofn[s][c] = xstate; 315: } 316: } 317: } 318: } 319: } 320: 321: cstate(v) { 322: register b; 323: if (left[v] == 0) { 324: if (tmpstat[v] != 1) { 325: tmpstat[v] = 1; 326: count++; 327: } 328: return(1); 329: } 330: else if (right[v] == 0) { 331: if (cstate(left[v]) == 0) return (0); 332: else if (name[v] == PLUS) return (1); 333: else return (0); 334: } 335: else if (name[v] == CAT) { 336: if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); 337: else return (1); 338: } 339: else { /* name[v] == OR */ 340: b = cstate(right[v]); 341: if (cstate(left[v]) == 0 || b == 0) return (0); 342: else return (1); 343: } 344: } 345: 346: 347: member(symb, set, torf) { 348: register i, num, pos; 349: num = chars[set]; 350: pos = set + 1; 351: for (i=0; i<num; i++) 352: if (symb == chars[pos++]) return (torf); 353: return (!torf); 354: } 355: 356: notin(n) { 357: register i, j, pos; 358: for (i=0; i<=n; i++) { 359: if (positions[state[i]] == count) { 360: pos = state[i] + 1; 361: for (j=0; j < count; j++) 362: if (tmpstat[positions[pos++]] != 1) goto nxt; 363: xstate = i; 364: return (0); 365: } 366: nxt: ; 367: } 368: return (1); 369: } 370: 371: add(array, n) int *array; { 372: register i; 373: if (nxtpos + count > MAXPOS) overflo(); 374: array[n] = nxtpos; 375: positions[nxtpos++] = count; 376: for (i=3; i <= line; i++) { 377: if (tmpstat[i] == 1) { 378: positions[nxtpos++] = i; 379: } 380: } 381: } 382: 383: follow(v) int v; { 384: int p; 385: if (v == line) return; 386: p = parent[v]; 387: switch(name[p]) { 388: case STAR: 389: case PLUS: cstate(v); 390: follow(p); 391: return; 392: 393: case OR: 394: case QUEST: follow(p); 395: return; 396: 397: case CAT: if (v == left[p]) { 398: if (cstate(right[p]) == 0) { 399: follow(p); 400: return; 401: } 402: } 403: else follow(p); 404: return; 405: case FINAL: if (tmpstat[line] != 1) { 406: tmpstat[line] = 1; 407: count++; 408: } 409: return; 410: } 411: } 412: 413: 414: main(argc, argv) 415: char **argv; 416: { 417: while (--argc > 0 && (++argv)[0][0]=='-') 418: switch (argv[0][1]) { 419: 420: case 's': 421: sflag++; 422: continue; 423: 424: case 'h': 425: hflag = 0; 426: continue; 427: 428: case 'b': 429: bflag++; 430: continue; 431: 432: case 'c': 433: cflag++; 434: continue; 435: 436: case 'e': 437: argc--; 438: argv++; 439: goto out; 440: 441: case 'f': 442: fflag++; 443: continue; 444: 445: case 'l': 446: lflag++; 447: continue; 448: 449: case 'n': 450: nflag++; 451: continue; 452: 453: case 'v': 454: vflag++; 455: continue; 456: 457: default: 458: fprintf(stderr, "egrep: unknown flag\n"); 459: continue; 460: } 461: out: 462: if (argc<=0) 463: exit(2); 464: if (fflag) { 465: if (freopen(fname = *argv, "r", stdin) == NULL) { 466: fprintf(stderr, "egrep: can't open %s\n", fname); 467: exit(2); 468: } 469: } 470: else input = *argv; 471: argc--; 472: argv++; 473: 474: yyparse(); 475: 476: cfoll(line-1); 477: cgotofn(); 478: nfile = argc; 479: if (argc<=0) { 480: if (lflag) exit(1); 481: execute(0); 482: } 483: else while (--argc >= 0) { 484: execute(*argv); 485: argv++; 486: } 487: exit(nsucc == 0); 488: } 489: 490: execute(file) 491: char *file; 492: { 493: register char *p; 494: register cstat; 495: register ccount; 496: char buf[1024]; 497: char *nlp; 498: int istat; 499: if (file) { 500: if ((f = open(file, 0)) < 0) { 501: fprintf(stderr, "egrep: can't open %s\n", file); 502: exit(2); 503: } 504: } 505: else f = 0; 506: ccount = 0; 507: lnum = 1; 508: tln = 0; 509: p = buf; 510: nlp = p; 511: if ((ccount = read(f,p,512))<=0) goto done; 512: blkno = ccount; 513: istat = cstat = gotofn[0]['\n']; 514: if (out[cstat]) goto found; 515: for (;;) { 516: cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ 517: if (out[cstat]) { 518: found: for(;;) { 519: if (*p++ == '\n') { 520: if (vflag == 0) { 521: succeed: nsucc = 1; 522: if (cflag) tln++; 523: else if (sflag) 524: ; /* ugh */ 525: else if (lflag) { 526: printf("%s\n", file); 527: close(f); 528: return; 529: } 530: else { 531: if (nfile > 1 && hflag) printf("%s:", file); 532: if (bflag) printf("%ld:", (blkno-ccount-1)/512); 533: if (nflag) printf("%ld:", lnum); 534: if (p <= nlp) { 535: while (nlp < &buf[1024]) putchar(*nlp++); 536: nlp = buf; 537: } 538: while (nlp < p) putchar(*nlp++); 539: } 540: } 541: lnum++; 542: nlp = p; 543: if ((out[(cstat=istat)]) == 0) goto brk2; 544: } 545: cfound: 546: if (--ccount <= 0) { 547: if (p <= &buf[512]) { 548: if ((ccount = read(f, p, 512)) <= 0) goto done; 549: } 550: else if (p == &buf[1024]) { 551: p = buf; 552: if ((ccount = read(f, p, 512)) <= 0) goto done; 553: } 554: else { 555: if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done; 556: } 557: if(nlp>p && nlp<=p+ccount) 558: nlp = p+ccount; 559: blkno += ccount; 560: } 561: } 562: } 563: if (*p++ == '\n') { 564: if (vflag) goto succeed; 565: else { 566: lnum++; 567: nlp = p; 568: if (out[(cstat=istat)]) goto cfound; 569: } 570: } 571: brk2: 572: if (--ccount <= 0) { 573: if (p <= &buf[512]) { 574: if ((ccount = read(f, p, 512)) <= 0) break; 575: } 576: else if (p == &buf[1024]) { 577: p = buf; 578: if ((ccount = read(f, p, 512)) <= 0) break; 579: } 580: else { 581: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 582: } 583: if(nlp>p && nlp<=p+ccount) 584: nlp = p+ccount; 585: blkno += ccount; 586: } 587: } 588: done: close(f); 589: if (cflag) { 590: if (nfile > 1) 591: printf("%s:", file); 592: printf("%ld\n", tln); 593: } 594: }