1: /*
   2:  * egrep -- print lines containing (or not containing) a regular expression
   3:  *
   4:  *	status returns:
   5:  *		0 - ok, and some matches
   6:  *		1 - ok, but no matches
   7:  *		2 - some error
   8:  */
   9: %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST
  10: %left OR
  11: %left CHAR DOT CCL NCCL '('
  12: %left CAT
  13: %left STAR PLUS QUEST
  14: 
  15: %{
  16: #include <stdio.h>
  17: 
  18: #define MAXLIN 350
  19: #define MAXPOS 4000
  20: #define NCHARS 128
  21: #define NSTATES 128
  22: #define FINAL -1
  23: char gotofn[NSTATES][NCHARS];
  24: int state[NSTATES];
  25: char out[NSTATES];
  26: int line 1;
  27: int name[MAXLIN];
  28: int left[MAXLIN];
  29: int right[MAXLIN];
  30: int parent[MAXLIN];
  31: int foll[MAXLIN];
  32: int positions[MAXPOS];
  33: char chars[MAXLIN];
  34: int nxtpos;
  35: int nxtchar 0;
  36: int tmpstat[MAXLIN];
  37: int initstat[MAXLIN];
  38: int xstate;
  39: int count;
  40: int icount;
  41: char *input;
  42: 
  43: long    lnum;
  44: int bflag;
  45: int cflag;
  46: int fflag;
  47: int lflag;
  48: int nflag;
  49: int hflag   = 1;
  50: int sflag;
  51: int vflag;
  52: int nfile;
  53: long    blkno;
  54: long    tln;
  55: int nsucc;
  56: 
  57: int f;
  58: int fname;
  59: %}
  60: 
  61: %%
  62: s:  t
  63:         ={ unary(FINAL, $1);
  64:           line--;
  65:         }
  66:     ;
  67: t:  b r
  68:         ={ $$ = node(CAT, $1, $2); }
  69:     | OR b r OR
  70:         ={ $$ = node(CAT, $2, $3); }
  71:     | OR b r
  72:         ={ $$ = node(CAT, $2, $3); }
  73:     | b r OR
  74:         ={ $$ = node(CAT, $1, $2); }
  75:     ;
  76: b:
  77:         ={ $$ = enter(DOT);
  78:            $$ = unary(STAR, $$); }
  79:     ;
  80: r:  CHAR
  81:         ={ $$ = enter($1); }
  82:     | DOT
  83:         ={ $$ = enter(DOT); }
  84:     | CCL
  85:         ={ $$ = cclenter(CCL); }
  86:     | NCCL
  87:         ={ $$ = cclenter(NCCL); }
  88:     ;
  89: 
  90: r:  r OR r
  91:         ={ $$ = node(OR, $1, $3); }
  92:     | r r %prec CAT
  93:         ={ $$ = node(CAT, $1, $2); }
  94:     | r STAR
  95:         ={ $$ = unary(STAR, $1); }
  96:     | r PLUS
  97:         ={ $$ = unary(PLUS, $1); }
  98:     | r QUEST
  99:         ={ $$ = unary(QUEST, $1); }
 100:     | '(' r ')'
 101:         ={ $$ = $2; }
 102:     | error
 103:     ;
 104: 
 105: %%
 106: yyerror(s) {
 107:     fprintf(stderr, "egrep: %s\n", s);
 108:     exit(2);
 109: }
 110: 
 111: yylex() {
 112:     extern int yylval;
 113:     int cclcnt, x;
 114:     register char c, d;
 115:     switch(c = nextch()) {
 116:         case '$':
 117:         case '^': c = '\n';
 118:             goto defchar;
 119:         case '|': return (OR);
 120:         case '*': return (STAR);
 121:         case '+': return (PLUS);
 122:         case '?': return (QUEST);
 123:         case '(': return (c);
 124:         case ')': return (c);
 125:         case '.': return (DOT);
 126:         case '\0': return (0);
 127:         case '\n': return (OR);
 128:         case '[':
 129:             x = CCL;
 130:             cclcnt = 0;
 131:             count = nxtchar++;
 132:             if ((c = nextch()) == '^') {
 133:                 x = NCCL;
 134:                 c = nextch();
 135:             }
 136:             do {
 137:                 if (c == '\0') synerror();
 138:                 if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) {
 139:                     if ((d = nextch()) != 0) {
 140:                         c = chars[nxtchar-1];
 141:                         while (c < d) {
 142:                             if (nxtchar >= MAXLIN) overflo();
 143:                             chars[nxtchar++] = ++c;
 144:                             cclcnt++;
 145:                         }
 146:                         continue;
 147:                     }
 148:                 }
 149:                 if (nxtchar >= MAXLIN) overflo();
 150:                 chars[nxtchar++] = c;
 151:                 cclcnt++;
 152:             } while ((c = nextch()) != ']');
 153:             chars[count] = cclcnt;
 154:             return (x);
 155:         case '\\':
 156:             if ((c = nextch()) == '\0') synerror();
 157:         defchar:
 158:         default: yylval = c; return (CHAR);
 159:     }
 160: }
 161: nextch() {
 162:     register char c;
 163:     if (fflag) {
 164:         if ((c = getc(stdin)) == EOF) return(0);
 165:     }
 166:     else c = *input++;
 167:     return(c);
 168: }
 169: 
 170: synerror() {
 171:     fprintf(stderr, "egrep: syntax error\n");
 172:     exit(2);
 173: }
 174: 
 175: enter(x) int x; {
 176:     if(line >= MAXLIN) overflo();
 177:     name[line] = x;
 178:     left[line] = 0;
 179:     right[line] = 0;
 180:     return(line++);
 181: }
 182: 
 183: cclenter(x) int x; {
 184:     register linno;
 185:     linno = enter(x);
 186:     right[linno] = count;
 187:     return (linno);
 188: }
 189: 
 190: node(x, l, r) {
 191:     if(line >= MAXLIN) overflo();
 192:     name[line] = x;
 193:     left[line] = l;
 194:     right[line] = r;
 195:     parent[l] = line;
 196:     parent[r] = line;
 197:     return(line++);
 198: }
 199: 
 200: unary(x, d) {
 201:     if(line >= MAXLIN) overflo();
 202:     name[line] = x;
 203:     left[line] = d;
 204:     right[line] = 0;
 205:     parent[d] = line;
 206:     return(line++);
 207: }
 208: overflo() {
 209:     fprintf(stderr, "egrep: regular expression too long\n");
 210:     exit(2);
 211: }
 212: 
 213: cfoll(v) {
 214:     register i;
 215:     if (left[v] == 0) {
 216:         count = 0;
 217:         for (i=1; i<=line; i++) tmpstat[i] = 0;
 218:         follow(v);
 219:         add(foll, v);
 220:     }
 221:     else if (right[v] == 0) cfoll(left[v]);
 222:     else {
 223:         cfoll(left[v]);
 224:         cfoll(right[v]);
 225:     }
 226: }
 227: cgotofn() {
 228:     register c, i, k;
 229:     int n, s;
 230:     char symbol[NCHARS];
 231:     int j, nc, pc, pos;
 232:     int curpos, num;
 233:     int number, newpos;
 234:     count = 0;
 235:     for (n=3; n<=line; n++) tmpstat[n] = 0;
 236:     if (cstate(line-1)==0) {
 237:         tmpstat[line] = 1;
 238:         count++;
 239:         out[0] = 1;
 240:     }
 241:     for (n=3; n<=line; n++) initstat[n] = tmpstat[n];
 242:     count--;        /*leave out position 1 */
 243:     icount = count;
 244:     tmpstat[1] = 0;
 245:     add(state, 0);
 246:     n = 0;
 247:     for (s=0; s<=n; s++)  {
 248:         if (out[s] == 1) continue;
 249:         for (i=0; i<NCHARS; i++) symbol[i] = 0;
 250:         num = positions[state[s]];
 251:         count = icount;
 252:         for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
 253:         pos = state[s] + 1;
 254:         for (i=0; i<num; i++) {
 255:             curpos = positions[pos];
 256:             if ((c = name[curpos]) >= 0) {
 257:                 if (c < NCHARS) symbol[c] = 1;
 258:                 else if (c == DOT) {
 259:                     for (k=0; k<NCHARS; k++)
 260:                         if (k!='\n') symbol[k] = 1;
 261:                 }
 262:                 else if (c == CCL) {
 263:                     nc = chars[right[curpos]];
 264:                     pc = right[curpos] + 1;
 265:                     for (k=0; k<nc; k++) symbol[chars[pc++]] = 1;
 266:                 }
 267:                 else if (c == NCCL) {
 268:                     nc = chars[right[curpos]];
 269:                     for (j = 0; j < NCHARS; j++) {
 270:                         pc = right[curpos] + 1;
 271:                         for (k = 0; k < nc; k++)
 272:                             if (j==chars[pc++]) goto cont;
 273:                         if (j!='\n') symbol[j] = 1;
 274:                         cont:;
 275:                     }
 276:                 }
 277:                 else printf("something's funny\n");
 278:             }
 279:             pos++;
 280:         }
 281:         for (c=0; c<NCHARS; c++) {
 282:             if (symbol[c] == 1) { /* nextstate(s,c) */
 283:                 count = icount;
 284:                 for (i=3; i <= line; i++) tmpstat[i] = initstat[i];
 285:                 pos = state[s] + 1;
 286:                 for (i=0; i<num; i++) {
 287:                     curpos = positions[pos];
 288:                     if ((k = name[curpos]) >= 0)
 289:                         if (
 290:                             (k == c)
 291:                             | (k == DOT)
 292:                             | (k == CCL && member(c, right[curpos], 1))
 293:                             | (k == NCCL && member(c, right[curpos], 0))
 294:                         ) {
 295:                             number = positions[foll[curpos]];
 296:                             newpos = foll[curpos] + 1;
 297:                             for (k=0; k<number; k++) {
 298:                                 if (tmpstat[positions[newpos]] != 1) {
 299:                                     tmpstat[positions[newpos]] = 1;
 300:                                     count++;
 301:                                 }
 302:                                 newpos++;
 303:                             }
 304:                         }
 305:                     pos++;
 306:                 } /* end nextstate */
 307:                 if (notin(n)) {
 308:                     if (n >= NSTATES) overflo();
 309:                     add(state, ++n);
 310:                     if (tmpstat[line] == 1) out[n] = 1;
 311:                     gotofn[s][c] = n;
 312:                 }
 313:                 else {
 314:                     gotofn[s][c] = xstate;
 315:                 }
 316:             }
 317:         }
 318:     }
 319: }
 320: 
 321: cstate(v) {
 322:     register b;
 323:     if (left[v] == 0) {
 324:         if (tmpstat[v] != 1) {
 325:             tmpstat[v] = 1;
 326:             count++;
 327:         }
 328:         return(1);
 329:     }
 330:     else if (right[v] == 0) {
 331:         if (cstate(left[v]) == 0) return (0);
 332:         else if (name[v] == PLUS) return (1);
 333:         else return (0);
 334:     }
 335:     else if (name[v] == CAT) {
 336:         if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
 337:         else return (1);
 338:     }
 339:     else { /* name[v] == OR */
 340:         b = cstate(right[v]);
 341:         if (cstate(left[v]) == 0 || b == 0) return (0);
 342:         else return (1);
 343:     }
 344: }
 345: 
 346: 
 347: member(symb, set, torf) {
 348:     register i, num, pos;
 349:     num = chars[set];
 350:     pos = set + 1;
 351:     for (i=0; i<num; i++)
 352:         if (symb == chars[pos++]) return (torf);
 353:     return (!torf);
 354: }
 355: 
 356: notin(n) {
 357:     register i, j, pos;
 358:     for (i=0; i<=n; i++) {
 359:         if (positions[state[i]] == count) {
 360:             pos = state[i] + 1;
 361:             for (j=0; j < count; j++)
 362:                 if (tmpstat[positions[pos++]] != 1) goto nxt;
 363:             xstate = i;
 364:             return (0);
 365:         }
 366:         nxt: ;
 367:     }
 368:     return (1);
 369: }
 370: 
 371: add(array, n) int *array; {
 372:     register i;
 373:     if (nxtpos + count > MAXPOS) overflo();
 374:     array[n] = nxtpos;
 375:     positions[nxtpos++] = count;
 376:     for (i=3; i <= line; i++) {
 377:         if (tmpstat[i] == 1) {
 378:             positions[nxtpos++] = i;
 379:         }
 380:     }
 381: }
 382: 
 383: follow(v) int v; {
 384:     int p;
 385:     if (v == line) return;
 386:     p = parent[v];
 387:     switch(name[p]) {
 388:         case STAR:
 389:         case PLUS:  cstate(v);
 390:                 follow(p);
 391:                 return;
 392: 
 393:         case OR:
 394:         case QUEST: follow(p);
 395:                 return;
 396: 
 397:         case CAT:   if (v == left[p]) {
 398:                     if (cstate(right[p]) == 0) {
 399:                         follow(p);
 400:                         return;
 401:                     }
 402:                 }
 403:                 else follow(p);
 404:                 return;
 405:         case FINAL: if (tmpstat[line] != 1) {
 406:                     tmpstat[line] = 1;
 407:                     count++;
 408:                 }
 409:                 return;
 410:     }
 411: }
 412: 
 413: 
 414: main(argc, argv)
 415: char **argv;
 416: {
 417:     while (--argc > 0 && (++argv)[0][0]=='-')
 418:         switch (argv[0][1]) {
 419: 
 420:         case 's':
 421:             sflag++;
 422:             continue;
 423: 
 424:         case 'h':
 425:             hflag = 0;
 426:             continue;
 427: 
 428:         case 'b':
 429:             bflag++;
 430:             continue;
 431: 
 432:         case 'c':
 433:             cflag++;
 434:             continue;
 435: 
 436:         case 'e':
 437:             argc--;
 438:             argv++;
 439:             goto out;
 440: 
 441:         case 'f':
 442:             fflag++;
 443:             continue;
 444: 
 445:         case 'l':
 446:             lflag++;
 447:             continue;
 448: 
 449:         case 'n':
 450:             nflag++;
 451:             continue;
 452: 
 453:         case 'v':
 454:             vflag++;
 455:             continue;
 456: 
 457:         default:
 458:             fprintf(stderr, "egrep: unknown flag\n");
 459:             continue;
 460:         }
 461: out:
 462:     if (argc<=0)
 463:         exit(2);
 464:     if (fflag) {
 465:         if (freopen(fname = *argv, "r", stdin) == NULL) {
 466:             fprintf(stderr, "egrep: can't open %s\n", fname);
 467:             exit(2);
 468:         }
 469:     }
 470:     else input = *argv;
 471:     argc--;
 472:     argv++;
 473: 
 474:     yyparse();
 475: 
 476:     cfoll(line-1);
 477:     cgotofn();
 478:     nfile = argc;
 479:     if (argc<=0) {
 480:         if (lflag) exit(1);
 481:         execute(0);
 482:     }
 483:     else while (--argc >= 0) {
 484:         execute(*argv);
 485:         argv++;
 486:     }
 487:     exit(nsucc == 0);
 488: }
 489: 
 490: execute(file)
 491: char *file;
 492: {
 493:     register char *p;
 494:     register cstat;
 495:     register ccount;
 496:     char buf[1024];
 497:     char *nlp;
 498:     int istat;
 499:     if (file) {
 500:         if ((f = open(file, 0)) < 0) {
 501:             fprintf(stderr, "egrep: can't open %s\n", file);
 502:             exit(2);
 503:         }
 504:     }
 505:     else f = 0;
 506:     ccount = 0;
 507:     lnum = 1;
 508:     tln = 0;
 509:     p = buf;
 510:     nlp = p;
 511:     if ((ccount = read(f,p,512))<=0) goto done;
 512:     blkno = ccount;
 513:     istat = cstat = gotofn[0]['\n'];
 514:     if (out[cstat]) goto found;
 515:     for (;;) {
 516:         cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */
 517:         if (out[cstat]) {
 518:         found:  for(;;) {
 519:                 if (*p++ == '\n') {
 520:                     if (vflag == 0) {
 521:                 succeed:    nsucc = 1;
 522:                         if (cflag) tln++;
 523:                         else if (sflag)
 524:                             ;   /* ugh */
 525:                         else if (lflag) {
 526:                             printf("%s\n", file);
 527:                             close(f);
 528:                             return;
 529:                         }
 530:                         else {
 531:                             if (nfile > 1 && hflag) printf("%s:", file);
 532:                             if (bflag) printf("%ld:", (blkno-ccount-1)/512);
 533:                             if (nflag) printf("%ld:", lnum);
 534:                             if (p <= nlp) {
 535:                                 while (nlp < &buf[1024]) putchar(*nlp++);
 536:                                 nlp = buf;
 537:                             }
 538:                             while (nlp < p) putchar(*nlp++);
 539:                         }
 540:                     }
 541:                     lnum++;
 542:                     nlp = p;
 543:                     if ((out[(cstat=istat)]) == 0) goto brk2;
 544:                 }
 545:                 cfound:
 546:                 if (--ccount <= 0) {
 547:                     if (p <= &buf[512]) {
 548:                         if ((ccount = read(f, p, 512)) <= 0) goto done;
 549:                     }
 550:                     else if (p == &buf[1024]) {
 551:                         p = buf;
 552:                         if ((ccount = read(f, p, 512)) <= 0) goto done;
 553:                     }
 554:                     else {
 555:                         if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done;
 556:                     }
 557:                     if(nlp>p && nlp<=p+ccount)
 558:                         nlp = p+ccount;
 559:                     blkno += ccount;
 560:                 }
 561:             }
 562:         }
 563:         if (*p++ == '\n') {
 564:             if (vflag) goto succeed;
 565:             else {
 566:                 lnum++;
 567:                 nlp = p;
 568:                 if (out[(cstat=istat)]) goto cfound;
 569:             }
 570:         }
 571:         brk2:
 572:         if (--ccount <= 0) {
 573:             if (p <= &buf[512]) {
 574:                 if ((ccount = read(f, p, 512)) <= 0) break;
 575:             }
 576:             else if (p == &buf[1024]) {
 577:                 p = buf;
 578:                 if ((ccount = read(f, p, 512)) <= 0) break;
 579:             }
 580:             else {
 581:                 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
 582:             }
 583:             if(nlp>p && nlp<=p+ccount)
 584:                 nlp = p+ccount;
 585:             blkno += ccount;
 586:         }
 587:     }
 588: done:   close(f);
 589:     if (cflag) {
 590:         if (nfile > 1)
 591:             printf("%s:", file);
 592:         printf("%ld\n", tln);
 593:     }
 594: }

Defined functions

_add defined in line 371; used 3 times
_cclenter defined in line 183; used 2 times
_cfoll defined in line 213; used 4 times
_cgotofn defined in line 227; used 1 times
_cstate defined in line 321; used 8 times
_enter defined in line 175; used 4 times
_execute defined in line 490; used 2 times
_follow defined in line 383; used 5 times
_main defined in line 414; never used
_member defined in line 347; used 2 times
_nextch defined in line 161; used 6 times
_node defined in line 190; used 6 times
_notin defined in line 356; used 1 times
_overflo defined in line 208; used 7 times
_synerror defined in line 170; used 2 times
_unary defined in line 200; used 5 times
_yyerror defined in line 105; never used
_yylex defined in line 111; never used

Defined macros

FINAL defined in line 22; used 1 times
  • in line 63
MAXLIN defined in line 18; used 13 times
MAXPOS defined in line 19; used 2 times
NCHARS defined in line 20; used 7 times
NSTATES defined in line 21; used 4 times
Last modified: 1979-02-26
Generated: 2016-12-26
Generated by src2html V0.67
page hit count: 1506
Valid CSS Valid XHTML 1.0 Strict