regex.c

Functions

error             X
init_syntax_once  X
main              X
print_buf         X
printchar         X
re_comp           X
re_compile_fastmapX
re_compile_patternX
re_exec           X
re_match          X
re_match_2        X
re_search         X
re_search_2       X

Macros

BYTEWIDTH         X
EXTEND_BUFFER     X
NFAILURES         X
PATFETCH          X
PATFETCH_RAW      X
PATPUSH           X
PATUNFETCH        X
PREFETCH          X
SIGN_EXTEND_CHAR  X
SYNTAX            X
Sword             X

enums's

regexpcode        X

   1: /* Extended regular expression matching and search.
   2:    Copyright (C) 1985 Richard M. Stallman
   3: 
   4: This program is distributed in the hope that it will be useful,
   5: but without any warranty.  No author or distributor
   6: accepts responsibility to anyone for the consequences of using it
   7: or for whether it serves any particular purpose or works at all,
   8: unless he says so in writing.
   9: 
  10:    Permission is granted to anyone to distribute verbatim copies
  11:    of this program's source code as received, in any medium, provided that
  12:    the copyright notice, the nonwarraty notice above
  13:    and this permission notice are preserved,
  14:    and that the distributor grants the recipient all rights
  15:    for further redistribution as permitted by this notice,
  16:    and informs him of these rights.
  17: 
  18:    Permission is granted to distribute modified versions of this
  19:    program's source code, or of portions of it, under the above
  20:    conditions, plus the conditions that all changed files carry
  21:    prominent notices stating who last changed them and that the
  22:    derived material, including anything packaged together with it and
  23:    conceptually functioning as a modification of it rather than an
  24:    application of it, is in its entirety subject to a permission
  25:    notice identical to this one.
  26: 
  27:    Permission is granted to distribute this program (verbatim or
  28:    as modified) in compiled or executable form, provided verbatim
  29:    redistribution is permitted as stated above for source code, and
  30:     A.  it is accompanied by the corresponding machine-readable
  31:       source code, under the above conditions, or
  32:     B.  it is accompanied by a written offer, with no time limit,
  33:       to distribute the corresponding machine-readable source code,
  34:       under the above conditions, to any one, in return for reimbursement
  35:       of the cost of distribution.   Verbatim redistribution of the
  36:       written offer must be permitted.  Or,
  37:     C.  it is distributed by someone who received only the
  38:       compiled or executable form, and is accompanied by a copy of the
  39:       written offer of source code which he received along with it.
  40: 
  41:    Permission is granted to distribute this program (verbatim or as modified)
  42:    in executable form as part of a larger system provided that the source
  43:    code for this program, including any modifications used,
  44:    is also distributed or offered as stated in the preceding paragraph.
  45: 
  46: In other words, you are welcome to use, share and improve this program.
  47: You are forbidden to forbid anyone else to use, share and improve
  48: what you give them.   Help stamp out software-hoarding!  */
  49: 
  50: 
  51: /* To test, compile with -Dtest.
  52:  This Dtestable feature turns this into a self-contained program
  53:  which reads a pattern, describes how it compiles,
  54:  then reads a string and searches for it.  */
  55: 
  56: 
  57: #ifdef emacs
  58: 
  59: /* The `emacs' switch turns on certain special matching commands
  60:  that make sense only in emacs. */
  61: 
  62: #include "config.h"
  63: #include "lisp.h"
  64: #include "buffer.h"
  65: #include "syntax.h"
  66: 
  67: #else  /* not emacs */
  68: 
  69: /*
  70:  * Define the syntax stuff, so we can do the \<...\> things.
  71:  */
  72: #define Sword 1
  73: 
  74: #define SYNTAX(c) syntax_table[c]
  75: 
  76: static char syntax_table[256];
  77: 
  78: #endif /* not emacs */
  79: 
  80: #include "regex.h"
  81: 
  82: /* Number of failure points to allocate space for initially,
  83:  when matching.  If this number is exceeded, more space is allocated,
  84:  so it is not a hard limit.  */
  85: 
  86: #ifndef NFAILURES
  87: #define NFAILURES 80
  88: #endif NFAILURES
  89: 
  90: /* width of a byte in bits */
  91: 
  92: #define BYTEWIDTH 8
  93: 
  94: /* These are the command codes that appear in compiled regular expressions, one per byte.
  95:   Some command codes are followed by argument bytes.
  96:   A command code can specify any interpretation whatever for its arguments.
  97:   Zero-bytes may appear in the compiled regular expression. */
  98: 
  99: enum regexpcode
 100:   {
 101:     unused,
 102:     exactn,    /* followed by one byte giving n, and then by n literal bytes */
 103:     begline,   /* fails unless at beginning of line */
 104:     endline,   /* fails unless at end of line */
 105:     jump,    /* followed by two bytes giving relative address to jump to */
 106:     on_failure_jump,     /* followed by two bytes giving relative address of place
 107: 		            to resume at in case of failure. */
 108:     finalize_jump,   /* Throw away latest failure point and then jump to address. */
 109:     maybe_finalize_jump, /* Like jump but finalize if safe to do so.
 110: 			    This is used to jump back to the beginning
 111: 			    of a repeat.  If the command that follows
 112: 			    this jump is clearly incompatible with the
 113: 			    one at the beginning of the repeat, such that
 114: 			    we can be sure that there is no use backtracking
 115: 			    out of repetitions already completed,
 116: 			    then we finalize. */
 117:     dummy_failure_jump,  /* jump, and push a dummy failure point.
 118: 			    This failure point will be thrown away
 119: 			    if an attempt is made to use it for a failure.
 120: 			    A + construct makes this before the first repeat.  */
 121:     anychar,     /* matches any one character */
 122:     charset,     /* matches any one char belonging to specified set.
 123: 		    First following byte is # bitmap bytes.
 124: 		    Then come bytes for a bit-map saying which chars are in.
 125: 		    Bits in each byte are ordered low-bit-first.
 126: 		    A character is in the set if its bit is 1.
 127: 		    A character too large to have a bit in the map
 128: 		    is automatically not in the set */
 129:     charset_not, /* similar but match any character that is NOT one of those specified */
 130:     start_memory, /* starts remembering the text that is matched
 131: 		    and stores it in a memory register.
 132: 		    followed by one byte containing the register number.
 133: 		    Register numbers must be in the range 0 through NREGS. */
 134:     stop_memory, /* stops remembering the text that is matched
 135: 		    and stores it in a memory register.
 136: 		    followed by one byte containing the register number.
 137: 		    Register numbers must be in the range 0 through NREGS. */
 138:     duplicate,    /* match a duplicate of something remembered.
 139: 		    Followed by one byte containing the index of the memory register. */
 140:     before_dot,  /* Succeeds if before dot */
 141:     at_dot,  /* Succeeds if at dot */
 142:     after_dot,   /* Succeeds if after dot */
 143:     begbuf,      /* Succeeds if at beginning of buffer */
 144:     endbuf,      /* Succeeds if at end of buffer */
 145:     wordchar,    /* Matches any word-constituent character */
 146:     notwordchar, /* Matches any char that is not a word-constituent */
 147:     wordbeg,     /* Succeeds if at word beginning */
 148:     wordend,     /* Succeeds if at word end */
 149:     wordbound,   /* Succeeds if at a word boundary */
 150:     notwordbound, /* Succeeds if not at a word boundary */
 151:     syntaxspec,  /* Matches any character whose syntax is specified.
 152: 		    followed by a byte which contains a syntax code, Sword or such like */
 153:     notsyntaxspec /* Matches any character whose syntax differs from the specified. */
 154:   };
 155: 
 156: #ifndef SIGN_EXTEND_CHAR
 157: #define SIGN_EXTEND_CHAR(x) (x)
 158: #endif
 159: 
 160: /* compile_pattern takes a regular-expression descriptor string in the user's format
 161:   and converts it into a buffer full of byte commands for matching.
 162: 
 163:   pattern   is the address of the pattern string
 164:   size      is the length of it.
 165:   bufp	    is a  struct re_pattern_buffer *  which points to the info
 166: 	    on where to store the byte commands.
 167: 	    This structure contains a  char *  which points to the
 168: 	    actual space, which should have been obtained with malloc.
 169: 	    compile_pattern may use  realloc  to grow the buffer space.
 170: 
 171:   The number of bytes of commands can be found out by looking in
 172:   the  struct re_pattern_buffer  that bufp pointed to,
 173:   after compile_pattern returns.
 174: */
 175: 
 176: #define PATPUSH(ch) (*b++ = (char) (ch))
 177: 
 178: #define PATFETCH(c) \
 179:  {if (p == pend) goto end_of_pattern; \
 180:   c = * (unsigned char *) p++; \
 181:   if (translate) c = translate[c]; }
 182: 
 183: #define PATFETCH_RAW(c) \
 184:  {if (p == pend) goto end_of_pattern; \
 185:   c = * (unsigned char *) p++; }
 186: 
 187: #define PATUNFETCH p--
 188: 
 189: #define EXTEND_BUFFER \
 190:   { old_buffer = bufp->buffer; \
 191:     if (bufp->allocated == (1<<16)) goto too_big; \
 192:     bufp->allocated *= 2; \
 193:     if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
 194:     if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
 195:       goto memory_exhausted; \
 196:     c = bufp->buffer - old_buffer; \
 197:     b += c; \
 198:     if (fixup_jump) \
 199:       fixup_jump += c; \
 200:     if (laststart) \
 201:       laststart += c; \
 202:     begalt += c; \
 203:     if (pending_exact) \
 204:       pending_exact += c; \
 205:   }
 206: 
 207: static int store_jump (), insert_jump ();
 208: 
 209: char *
 210: re_compile_pattern (pattern, size, bufp)
 211:      char *pattern;
 212:      int size;
 213:      struct re_pattern_buffer *bufp;
 214: {
 215:   register char *b = bufp->buffer;
 216:   register char *p = pattern;
 217:   char *pend = pattern + size;
 218:   register unsigned c, c1;
 219:   char *p1;
 220:   unsigned char *translate = (unsigned char *) bufp->translate;
 221: 
 222:   /* Temporary used when buffer is made bigger. */
 223: 
 224:   char *old_buffer;
 225: 
 226:   /* address of the count-byte of the most recently inserted "exactn" command.
 227:     This makes it possible to tell whether a new exact-match character
 228:     can be added to that command or requires a new "exactn" command. */
 229: 
 230:   char *pending_exact = 0;
 231: 
 232:   /* address of the place where a forward-jump should go
 233:     to the end of the containing expression.
 234:     Each alternative of an "or", except the last, ends with a forward-jump
 235:     of this sort. */
 236: 
 237:   char *fixup_jump = 0;
 238: 
 239:   /* address of start of the most recently finished expression.
 240:     This tells postfix * where to find the start of its operand. */
 241: 
 242:   char *laststart = 0;
 243: 
 244:   /* In processing a repeat, 1 means zero matches is allowed */
 245: 
 246:   char zero_times_ok;
 247: 
 248:   /* In processing a repeat, 1 means many matches is allowed */
 249: 
 250:   char many_times_ok;
 251: 
 252:   /* address of beginning of regexp, or inside of last \( */
 253: 
 254:   char *begalt = b;
 255: 
 256:   /* Stack of information saved by \( and restored by \).
 257:      Four stack elements are pushed by each \(:
 258:        First, the value of b.
 259:        Second, the value of fixup_jump.
 260:        Third, the value of regnum.
 261:        Fourth, the value of begalt.  */
 262: 
 263:   int stackb[40];
 264:   int *stackp = stackb;
 265:   int *stacke = stackb + 40;
 266:   int *stackt;
 267: 
 268:   /* Counts \('s as they are encountered.  Remembered for the matching \),
 269:      where it becomes the "register number" to put in the stop_memory command */
 270: 
 271:   int regnum = 1;
 272: 
 273:   bufp->fastmap_accurate = 0;
 274: 
 275: #ifndef emacs
 276:   /*
 277:    * Initialize the syntax table.
 278:    */
 279:    init_syntax_once();
 280: #endif emacs
 281: 
 282:   while (p != pend)
 283:     {
 284:       if (b - bufp->buffer
 285:       > bufp->allocated - 10)
 286:     /* Note that EXTEND_BUFFER clobbers c */
 287:     EXTEND_BUFFER;
 288: 
 289:       PATFETCH (c);
 290: 
 291:       switch (c)
 292:     {
 293:     case '$':
 294:       /* $ means succeed if at end of line, but only in special contexts.
 295: 	    If randonly in the middle of a pattern, it is a normal character. */
 296:       if (p == pend || (*p == '\\' && (p[1] == ')' || p[1] == '|')))
 297:         {
 298:           PATPUSH (endline);
 299:           break;
 300:         }
 301:       goto normal_char;
 302: 
 303:     case '^':
 304:       /* ^ means succeed if at beg of line, but only if no preceding pattern. */
 305:       if (laststart) goto normal_char;
 306:       PATPUSH (begline);
 307:       break;
 308: 
 309:     case '*':
 310:     case '+':
 311:     case '?':
 312:       /* If there is no previous pattern, char not special. */
 313:       if (!laststart)
 314:         goto normal_char;
 315:       /* If there is a sequence of repetition chars,
 316: 	     collapse it down to equivalent to just one.  */
 317:       zero_times_ok = 0;
 318:       many_times_ok = 0;
 319:       while (1)
 320:         {
 321:           zero_times_ok |= c != '+';
 322:           many_times_ok |= c != '?';
 323:           if (p == pend)
 324:         break;
 325:           PATFETCH (c);
 326:           if (!(c == '*' || c == '+' || c == '?'))
 327:         {
 328:           PATUNFETCH;
 329:           break;
 330:         }
 331:         }
 332: 
 333:       /* Now we know whether 0 matches is allowed,
 334: 	     and whether 2 or more matches is allowed.  */
 335:       if (many_times_ok)
 336:         {
 337:           /* If more than one repetition is allowed,
 338: 		 put in a backward jump at the end.  */
 339:           store_jump (b, maybe_finalize_jump, laststart - 3);
 340:           b += 3;
 341:         }
 342:       insert_jump (on_failure_jump, laststart, b + 3, b);
 343:       pending_exact = 0;
 344:       b += 3;
 345:       if (!zero_times_ok)
 346:         {
 347:           /* At least one repetition required: insert before the loop
 348: 		 a skip over the initial on-failure-jump instruction */
 349:           insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
 350:           b += 3;
 351:         }
 352:       break;
 353: 
 354:     case '.':
 355:       laststart = b;
 356:       PATPUSH (anychar);
 357:       break;
 358: 
 359:     case '[':
 360:       if (b - bufp->buffer
 361:           > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
 362:         /* Note that EXTEND_BUFFER clobbers c */
 363:         EXTEND_BUFFER;
 364: 
 365:       laststart = b;
 366:       if (*p == '^')
 367:         PATPUSH (charset_not), p++;
 368:       else
 369:         PATPUSH (charset);
 370:       p1 = p;
 371: 
 372:       PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
 373:       /* Clear the whole map */
 374:       bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
 375:       /* Read in characters and ranges, setting map bits */
 376:       while (1)
 377:         {
 378:           PATFETCH (c);
 379:           if (c == ']' && p != p1 + 1) break;
 380:           if (*p == '-')
 381:         {
 382:           PATFETCH (c1);
 383:           PATFETCH (c1);
 384:           while (c <= c1)
 385:             b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
 386:         }
 387:           else
 388:         {
 389:           b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
 390:         }
 391:         }
 392:       /* Discard any bitmap bytes that are all 0 at the end of the map.
 393: 	     Decrement the map-length byte too. */
 394:       while (b[-1] > 0 && b[b[-1] - 1] == 0)
 395:         b[-1]--;
 396:       b += b[-1];
 397:       break;
 398: 
 399:         case '\\':
 400:       if (p == pend) goto invalid_pattern;
 401:       PATFETCH_RAW (c);
 402:       switch (c)
 403:         {
 404:         case '(':
 405:           if (stackp == stacke) goto nesting_too_deep;
 406:           if (regnum < RE_NREGS)
 407:             {
 408:           PATPUSH (start_memory);
 409:           PATPUSH (regnum);
 410:             }
 411:           *stackp++ = b - bufp->buffer;
 412:           *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
 413:           *stackp++ = regnum++;
 414:           *stackp++ = begalt - bufp->buffer;
 415:           fixup_jump = 0;
 416:           laststart = 0;
 417:           begalt = b;
 418:           break;
 419: 
 420:         case ')':
 421:           if (stackp == stackb) goto unmatched_close;
 422:           begalt = *--stackp + bufp->buffer;
 423:           if (fixup_jump)
 424:         store_jump (fixup_jump, jump, b);
 425:           if (stackp[-1] < RE_NREGS)
 426:         {
 427:           PATPUSH (stop_memory);
 428:           PATPUSH (stackp[-1]);
 429:         }
 430:           stackp -= 2;
 431:           fixup_jump = 0;
 432:           if (*stackp)
 433:         fixup_jump = *stackp + bufp->buffer - 1;
 434:           laststart = *--stackp + bufp->buffer;
 435:           break;
 436: 
 437:         case '|':
 438:           insert_jump (on_failure_jump, begalt, b + 6, b);
 439:           pending_exact = 0;
 440:           b += 3;
 441:           if (fixup_jump)
 442:         store_jump (fixup_jump, jump, b);
 443:           fixup_jump = b;
 444:           b += 3;
 445:           laststart = 0;
 446:           begalt = b;
 447:           break;
 448: 
 449: #ifdef emacs
 450:         case '=':
 451:           PATPUSH (at_dot);
 452:           break;
 453: 
 454:         case 's':
 455:           laststart = b;
 456:           PATPUSH (syntaxspec);
 457:           PATFETCH (c);
 458:           PATPUSH (syntax_spec_code[c]);
 459:           break;
 460: 
 461:         case 'S':
 462:           laststart = b;
 463:           PATPUSH (notsyntaxspec);
 464:           PATFETCH (c);
 465:           PATPUSH (syntax_spec_code[c]);
 466:           break;
 467: #endif emacs
 468: 
 469:         case 'w':
 470:           laststart = b;
 471:           PATPUSH (wordchar);
 472:           break;
 473: 
 474:         case 'W':
 475:           laststart = b;
 476:           PATPUSH (notwordchar);
 477:           break;
 478: 
 479:         case '<':
 480:           PATPUSH (wordbeg);
 481:           break;
 482: 
 483:         case '>':
 484:           PATPUSH (wordend);
 485:           break;
 486: 
 487:         case 'b':
 488:           PATPUSH (wordbound);
 489:           break;
 490: 
 491:         case 'B':
 492:           PATPUSH (notwordbound);
 493:           break;
 494: 
 495:         case '`':
 496:           PATPUSH (begbuf);
 497:           break;
 498: 
 499:         case '\'':
 500:           PATPUSH (endbuf);
 501:           break;
 502: 
 503:         case '1':
 504:         case '2':
 505:         case '3':
 506:         case '4':
 507:         case '5':
 508:         case '6':
 509:         case '7':
 510:         case '8':
 511:         case '9':
 512:           c1 = c - '0';
 513:           if (c1 >= regnum)
 514:         goto normal_char;
 515:           for (stackt = stackp - 2;  stackt > stackb;  stackt -= 4)
 516:         if (*stackt == c1)
 517:           goto normal_char;
 518:           laststart = b;
 519:           PATPUSH (duplicate);
 520:           PATPUSH (c1);
 521:           break;
 522:         default:
 523:           goto normal_char;
 524:         }
 525:       break;
 526: 
 527:     default:
 528:     normal_char:
 529:       if (!pending_exact || pending_exact + *pending_exact + 1 != b
 530:           || *pending_exact == 0177 || *p == '*' || *p == '^'
 531:           || *p == '+' || *p == '?')
 532:         {
 533:           laststart = b;
 534:           PATPUSH (exactn);
 535:           pending_exact = b;
 536:           PATPUSH (0);
 537:         }
 538:       PATPUSH (c);
 539:       (*pending_exact)++;
 540:     }
 541:     }
 542: 
 543:   if (fixup_jump)
 544:     store_jump (fixup_jump, jump, b);
 545: 
 546:   if (stackp != stackb) goto unmatched_open;
 547: 
 548:   bufp->used = b - bufp->buffer;
 549:   return 0;
 550: 
 551:  invalid_pattern:
 552:   return "Invalid regular expression";
 553: 
 554:  unmatched_open:
 555:   return "Unmatched \\(";
 556: 
 557:  unmatched_close:
 558:   return "Unmatched \\)";
 559: 
 560:  end_of_pattern:
 561:   return "Premature end of regular expression";
 562: 
 563:  nesting_too_deep:
 564:   return "Nesting too deep";
 565: 
 566:  too_big:
 567:   return "Regular expression too big";
 568: 
 569:  memory_exhausted:
 570:   return "Memory exhausted";
 571: }
 572: 
 573: #ifndef emacs
 574: init_syntax_once ()
 575: {
 576:    register int c;
 577:    static int done = 0;
 578: 
 579:    if (done)
 580:      return;
 581: 
 582:    bzero (syntax_table, sizeof syntax_table);
 583: 
 584:    for (c = 'a'; c <= 'z'; c++)
 585:      syntax_table[c] = Sword;
 586: 
 587:    for (c = 'A'; c <= 'Z'; c++)
 588:      syntax_table[c] = Sword;
 589: 
 590:    for (c = '0'; c <= '9'; c++)
 591:      syntax_table[c] = Sword;
 592: 
 593:    done = 1;
 594: }
 595: #endif not emacs
 596: 
 597: /* Store where `from' points a jump operation to jump to where `to' points.
 598:   `opcode' is the opcode to store. */
 599: 
 600: static int
 601: store_jump (from, opcode, to)
 602:      char *from, *to;
 603:      char opcode;
 604: {
 605:   from[0] = opcode;
 606:   from[1] = (to - (from + 3)) & 0377;
 607:   from[2] = (to - (from + 3)) >> 8;
 608: }
 609: 
 610: /* Open up space at char FROM, and insert there a jump to TO.
 611:    CURRENT_END gives te end of the storage no in use,
 612:    so we know how much data to copy up.
 613:    OP is the opcode of the jump to insert.
 614: 
 615:    If you call this function, you must zero out pending_exact.  */
 616: 
 617: static int
 618: insert_jump (op, from, to, current_end)
 619:      char op;
 620:      char *from, *to, *current_end;
 621: {
 622:   register char *pto = current_end + 3;
 623:   register char *pfrom = current_end;
 624:   while (pfrom != from)
 625:     *--pto = *--pfrom;
 626:   store_jump (from, op, to);
 627: }
 628: 
 629: /* Given a pattern, compute a fastmap from it.
 630:  The fastmap records which of the (1 << BYTEWIDTH) possible characters
 631:  can start a string that matches the pattern.
 632:  This fastmap is used by re_search to skip quickly over totally implausible text.
 633: 
 634:  The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
 635:  as bufp->fastmap.
 636:  The other components of bufp describe the pattern to be used.  */
 637: 
 638: re_compile_fastmap (bufp)
 639:      struct re_pattern_buffer *bufp;
 640: {
 641:   char *pattern = bufp->buffer;
 642:   int size = bufp->used;
 643:   register char *fastmap = bufp->fastmap;
 644:   register char *p = pattern;
 645:   register char *pend = pattern + size;
 646:   register int j, k;
 647:   unsigned char *translate = (unsigned char *) bufp->translate;
 648: 
 649:   char *stackb[NFAILURES];
 650:   char **stackp = stackb;
 651: 
 652:   bzero (fastmap, (1 << BYTEWIDTH));
 653:   bufp->fastmap_accurate = 1;
 654:   bufp->can_be_null = 0;
 655: 
 656:   while (p)
 657:     {
 658:       if (p == pend)
 659:     {
 660:       bufp->can_be_null = 1;
 661:       break;
 662:     }
 663: #ifdef SWITCH_ENUM_BUG
 664:       switch ((int) ((enum regexpcode) *p++))
 665: #else
 666:       switch ((enum regexpcode) *p++)
 667: #endif
 668:     {
 669:     case exactn:
 670:       if (translate)
 671:         fastmap[translate[p[1]]] = 1;
 672:       else
 673:         fastmap[p[1]] = 1;
 674:       break;
 675: 
 676:         case begline:
 677:         case before_dot:
 678:     case at_dot:
 679:     case after_dot:
 680:     case begbuf:
 681:     case endbuf:
 682:     case wordbound:
 683:     case notwordbound:
 684:     case wordbeg:
 685:     case wordend:
 686:       continue;
 687: 
 688:     case endline:
 689:       if (translate)
 690:         fastmap[translate['\n']] = 1;
 691:       else
 692:         fastmap['\n'] = 1;
 693:       bufp->can_be_null = 1;
 694:       break;
 695: 
 696:     case finalize_jump:
 697:     case maybe_finalize_jump:
 698:     case jump:
 699:     case dummy_failure_jump:
 700:       bufp->can_be_null = 1;
 701:       j = *p++ & 0377;
 702:       j += SIGN_EXTEND_CHAR (*p++) << 8;
 703:       p += j;
 704:       if (j > 0)
 705:         continue;
 706:       /* Jump backward reached implies we just went through
 707: 	     the body of a loop and matched nothing.
 708: 	     Opcode jumped to should be an on_failure_jump.
 709: 	     Just treat it like an ordinary jump.
 710: 	     For a * loop, it has pushed its failure point already;
 711: 	     if so, discard that as redundant.  */
 712:       if ((enum regexpcode) *p != on_failure_jump)
 713:         continue;
 714:       p++;
 715:       j = *p++ & 0377;
 716:       j += SIGN_EXTEND_CHAR (*p++) << 8;
 717:       p += j;
 718:       if (stackp != stackb && *stackp == p)
 719:         stackp--;
 720:       continue;
 721: 
 722:     case on_failure_jump:
 723:       j = *p++ & 0377;
 724:       j += SIGN_EXTEND_CHAR (*p++) << 8;
 725:       *++stackp = p + j;
 726:       continue;
 727: 
 728:     case start_memory:
 729:     case stop_memory:
 730:       p++;
 731:       continue;
 732: 
 733:     case duplicate:
 734:       bufp->can_be_null = 1;
 735:     case anychar:
 736:       for (j = 0; j < (1 << BYTEWIDTH); j++)
 737:         fastmap[j] = 1;
 738:       return;
 739: 
 740:     case wordchar:
 741:       for (j = 0; j < (1 << BYTEWIDTH); j++)
 742:         if (SYNTAX (j) == Sword)
 743:           fastmap[j] = 1;
 744:       break;
 745: 
 746:     case notwordchar:
 747:       for (j = 0; j < (1 << BYTEWIDTH); j++)
 748:         if (SYNTAX (j) != Sword)
 749:           fastmap[j] = 1;
 750:       break;
 751: 
 752: #ifdef emacs
 753:     case syntaxspec:
 754:       k = *p++;
 755:       for (j = 0; j < (1 << BYTEWIDTH); j++)
 756:         if (SYNTAX (j) == (enum syntaxcode) k)
 757:           fastmap[j] = 1;
 758:       break;
 759: 
 760:     case notsyntaxspec:
 761:       for (j = 0; j < (1 << BYTEWIDTH); j++)
 762:         if (SYNTAX (j) != (enum syntaxcode) k)
 763:           fastmap[j] = 1;
 764:       break;
 765: #endif emacs
 766: 
 767:     case charset:
 768:       for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
 769:         if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
 770:           {
 771:         if (translate)
 772:           fastmap[translate[j]] = 1;
 773:         else
 774:           fastmap[j] = 1;
 775:           }
 776:       break;
 777: 
 778:     case charset_not:
 779:       /* Chars beyond end of map must be allowed */
 780:       for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
 781:         if (translate)
 782:           fastmap[translate[j]] = 1;
 783:         else
 784:           fastmap[j] = 1;
 785: 
 786:       for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
 787:         if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
 788:           {
 789:         if (translate)
 790:           fastmap[translate[j]] = 1;
 791:         else
 792:           fastmap[j] = 1;
 793:           }
 794:       break;
 795:     }
 796: 
 797:       /* Get here means we have successfully found the possible starting characters
 798: 	 of one path of the pattern.  We need not follow this path any farther.
 799: 	 Instead, look at the next alternative remembered in the stack. */
 800:       if (stackp != stackb)
 801:     p = *stackp--;
 802:       else
 803:     break;
 804:     }
 805: }
 806: 
 807: /* Like re_search_2, below, but only one string is specified. */
 808: 
 809: re_search (pbufp, string, size, startpos, range, regs)
 810:      struct re_pattern_buffer *pbufp;
 811:      char *string;
 812:      int size, startpos, range;
 813:      struct re_registers *regs;
 814: {
 815:   return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
 816: }
 817: 
 818: /* Like re_match_2 but tries first a match starting at index `startpos',
 819:  then at startpos + 1, and so on.
 820:  `range' is the number of places to try before giving up.
 821:  If `range' is negative, the starting positions tried are
 822:   startpos, startpos - 1, etc.
 823:  It is up to the caller to make sure that range is not so large
 824:   as to take the starting position outside of the input strings.
 825: 
 826: The value returned is the position at which the match was found,
 827:  or -1 if no match was found. */
 828: 
 829: int
 830: re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
 831:      struct re_pattern_buffer *pbufp;
 832:      char *string1, *string2;
 833:      int size1, size2;
 834:      int startpos;
 835:      register int range;
 836:      struct re_registers *regs;
 837:      int mstop;
 838: {
 839:   register char *fastmap = pbufp->fastmap;
 840:   register char *translate = pbufp->translate;
 841:   int total = size1 + size2;
 842: 
 843:   /* Update the fastmap now if not correct already */
 844:   if (fastmap && !pbufp->fastmap_accurate)
 845:     re_compile_fastmap (pbufp);
 846: 
 847:   while (1)
 848:     {
 849:       /* If a fastmap is supplied, skip quickly over characters
 850: 	 that cannot possibly be the start of a match.
 851: 	 Note, however, that if the pattern can possibly match
 852: 	 the null string, we must test it at each starting point
 853: 	 so that we take the first null string we get.  */
 854: 
 855:       if (fastmap && startpos < total && !pbufp->can_be_null)
 856:     {
 857:       if (range > 0)
 858:         {
 859:           register int lim = 0;
 860:           register char *p;
 861:           int irange = range;
 862:           if (startpos < size1 && startpos + range >= size1)
 863:         lim = range - (size1 - startpos);
 864: 
 865:           p = &(startpos >= size1 ? string2 - size1 : string1)[startpos];
 866: 
 867:           if (translate)
 868:         {
 869:           while (range > lim && !fastmap[translate[*p++]])
 870:             range--;
 871:         }
 872:           else
 873:         {
 874:           while (range > lim && !fastmap[*p++])
 875:             range--;
 876:         }
 877:           startpos += irange - range;
 878:         }
 879:       else
 880:         {
 881:           register char c;
 882:           if (startpos >= size1) c = string2[startpos - size1];
 883:           else c = string1[startpos];
 884:           if (translate ? !fastmap[translate[c]] : !fastmap[c])
 885:         goto advance;
 886:         }
 887:     }
 888: 
 889:       if (range >= 0 && startpos == total
 890:       && fastmap && !pbufp->can_be_null)
 891:     return -1;
 892: 
 893:       if (0 <= re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop))
 894:     return startpos;
 895: 
 896:     advance:
 897:       if (!range) break;
 898:       if (range > 0) range--, startpos++; else range++, startpos--;
 899:     }
 900:   return -1;
 901: }
 902: 
 903: #ifndef emacs   /* emacs never uses this */
 904: re_match (pbufp, string, size, pos, regs)
 905:      struct re_pattern_buffer *pbufp;
 906:      char *string;
 907:      int size, pos;
 908:      struct re_registers *regs;
 909: {
 910:   return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
 911: }
 912: #endif /* emacs */
 913: 
 914: /* Match the pattern described by `pbufp'
 915:   against data which is the virtual concatenation of `string1' and `string2'.
 916:   `size1' and `size2' are the sizes of the two data strings.
 917:   Start the match at position `pos'.
 918:   Do not consider matching past the position `mstop'.
 919: 
 920:   If pbufp->fastmap is nonzero, then it had better be up to date.
 921: 
 922:   The reason that the data to match is specified as two components
 923:   which are to be regarded as concatenated
 924:   is so that this function can be used directly on the contents of an Emacs buffer.
 925: 
 926:   -1 is returned if there is no match.  Otherwise the value is the length
 927:   of the substring which was matched.
 928: */
 929: 
 930: int
 931: re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
 932:      struct re_pattern_buffer *pbufp;
 933:      char *string1, *string2;
 934:      int size1, size2;
 935:      int pos;
 936:      struct re_registers *regs;
 937:      int mstop;
 938: {
 939:   register char *p = pbufp->buffer;
 940:   register char *pend = p + pbufp->used;
 941:   /* End of first string */
 942:   char *end1;
 943:   /* End of second string */
 944:   char *end2;
 945:   /* Pointer just past last char to consider matching */
 946:   char *end_match_1, *end_match_2;
 947:   register char *d, *dend;
 948:   register int mcnt;
 949:   char *translate = pbufp->translate;
 950: 
 951:  /* Failure point stack.  Each place that can handle a failure further down the line
 952:     pushes a failure point on this stack.  It consists of two char *'s.
 953:     The first one pushed is where to resume scanning the pattern;
 954:     the second pushed is where to resume scanning the strings.
 955:     If the latter is zero, the failure point is a "dummy".
 956:     If a failure happens and the innermost failure point is dormant,
 957:     it discards that failure point and tries the next one. */
 958: 
 959:   char **stackb = (char **) alloca (2 * NFAILURES * sizeof (char *));
 960:   char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
 961: 
 962:   /* Information on the "contents" of registers.
 963:      These are pointers into the input strings; they record
 964:      just what was matched (on this attempt) by some part of the pattern.
 965:      The start_memory command stores the start of a register's contents
 966:      and the stop_memory command stores the end.
 967: 
 968:      At that point, regstart[regnum] points to the first character in the register,
 969:      regend[regnum] points to the first character beyond the end of the register,
 970:      and regstart_segend[regnum] is either the same as regend[regnum]
 971:      or else points to the end of the input string into which regstart[regnum] points.
 972:      The latter case happens when regstart[regnum] is in string1 and
 973:      regend[regnum] is in string2.  */
 974: 
 975:   char *regstart[RE_NREGS];
 976:   char *regstart_segend[RE_NREGS];
 977:   char *regend[RE_NREGS];
 978: 
 979:   /* Set up pointers to ends of strings.
 980:      Don't allow the second string to be empty unless both are empty.  */
 981:   if (!size2)
 982:     {
 983:       string2 = string1;
 984:       size2 = size1;
 985:       string1 = 0;
 986:       size1 = 0;
 987:     }
 988:   end1 = string1 + size1;
 989:   end2 = string2 + size2;
 990: 
 991:   /* Compute where to stop matching, within the two strings */
 992:   if (mstop <= size1)
 993:     {
 994:       end_match_1 = string1 + mstop;
 995:       end_match_2 = string2;
 996:     }
 997:   else
 998:     {
 999:       end_match_1 = end1;
1000:       end_match_2 = string2 + mstop - size1;
1001:     }
1002: 
1003:   /* Initialize \( and \) text positions to -1
1004:      to mark ones that no \( or \) has been seen for.  */
1005: 
1006:   for (mcnt = 0; mcnt < sizeof (regstart) / sizeof (*regstart); mcnt++)
1007:     regstart[mcnt] = (char *) -1;
1008: 
1009:   /* `p' scans through the pattern as `d' scans through the data.
1010:      `dend' is the end of the input string that `d' points within.
1011:      `d' is advanced into the following input string whenever necessary,
1012:      but this happens before fetching;
1013:      therefore, at the beginning of the loop,
1014:      `d' can be pointing at the end of a string,
1015:      but it cannot equal string2.  */
1016: 
1017:   if (pos <= size1)
1018:     d = string1 + pos, dend = end_match_1;
1019:   else
1020:     d = string2 + pos - size1, dend = end_match_2;
1021: 
1022: /* Write PREFETCH; just before fetching a character with *d.  */
1023: #define PREFETCH \
1024:  while (d == dend)                          \
1025:   { if (dend == end_match_2) goto fail;  /* end of string2 => failure */   \
1026:     d = string2;  /* end of string1 => advance to string2. */       \
1027:     dend = end_match_2; }
1028: 
1029:   /* This loop loops over pattern commands.
1030:      It exits by returning from the function if match is complete,
1031:      or it drops through if match fails at this starting point in the input data. */
1032: 
1033:   while (1)
1034:     {
1035:       if (p == pend)
1036:     /* End of pattern means we have succeeded! */
1037:     {
1038:       /* If caller wants register contents data back, convert it to indices */
1039:       if (regs)
1040:         {
1041:           bzero (regs, sizeof (*regs));
1042: 
1043:           regend[0] = d;
1044:           regstart[0] = string1;
1045:           for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
1046:         {
1047:           if (mcnt && regstart[mcnt] == (char *) -1) continue;
1048:           if (regstart[mcnt] - string1 < 0 || regstart[mcnt] - string1 > size1)
1049:             regs->start[mcnt] = regstart[mcnt] - string2 + size1;
1050:           else
1051:             regs->start[mcnt] = regstart[mcnt] - string1;
1052:           if (regend[mcnt] - string1 < 0 || regend[mcnt] - string1 > size1)
1053:             regs->end[mcnt] = regend[mcnt] - string2 + size1;
1054:           else
1055:             regs->end[mcnt] = regend[mcnt] - string1;
1056:         }
1057:           regs->start[0] = pos;
1058:         }
1059:       if (d - string1 >= 0 && d - string1 <= size1)
1060:         return d - string1 - pos;
1061:       else
1062:         return d - string2 + size1 - pos;
1063:     }
1064: 
1065:       /* Otherwise match next pattern command */
1066: #ifdef SWITCH_ENUM_BUG
1067:       switch ((int) ((enum regexpcode) *p++))
1068: #else
1069:       switch ((enum regexpcode) *p++)
1070: #endif
1071:     {
1072: 
1073:     /* \( is represented by a start_memory, \) by a stop_memory.
1074: 	    Both of those commands contain a "register number" argument.
1075: 	    The text matched within the \( and \) is recorded under that number.
1076: 	    Then, \<digit> turns into a `duplicate' command which
1077: 	    is followed by the numeric value of <digit> as the register number. */
1078: 
1079:     case start_memory:
1080:       regstart[*p] = d;
1081:       regstart_segend[*p++] = dend;
1082:       break;
1083: 
1084:     case stop_memory:
1085:       regend[*p] = d;
1086:       if (regstart_segend[*p] == dend)
1087:         regstart_segend[*p] = d;
1088:       p++;
1089:       break;
1090: 
1091:     case duplicate:
1092:       {
1093:         int regno = *p++;   /* Get which register to match against */
1094:         register char *d2, *dend2;
1095: 
1096:         d2 = regstart[regno];
1097:         dend2 = regstart_segend[regno];
1098:         while (1)
1099:           {
1100:         /* Advance to next segment in register contents, if necessary */
1101:         while (d2 == dend2)
1102:           {
1103:             if (dend2 == end_match_2) break;
1104:             if (dend2 == regend[regno]) break;
1105:             d2 = string2, dend2 = regend[regno];  /* end of string1 => advance to string2. */
1106:           }
1107:         /* At end of register contents => success */
1108:         if (d2 == dend2) break;
1109: 
1110:         /* Advance to next segment in data being matched, if necessary */
1111:         PREFETCH;
1112: 
1113:         /* mcnt gets # consecutive chars to compare */
1114:         mcnt = dend - d;
1115:         if (mcnt > dend2 - d2)
1116:           mcnt = dend2 - d2;
1117:         /* Compare that many; failure if mismatch, else skip them. */
1118:         if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
1119:           goto fail;
1120:         d += mcnt, d2 += mcnt;
1121:           }
1122:       }
1123:       break;
1124: 
1125:     case anychar:
1126:       /* fetch a data character */
1127:       PREFETCH;
1128:       /* Match anything but a newline.  */
1129:       if ((translate ? translate[*d++] : *d++) == '\n')
1130:         goto fail;
1131:       break;
1132: 
1133:     case charset:
1134:     case charset_not:
1135:       {
1136:         /* Nonzero for charset_not */
1137:         int not = 0;
1138:         register int c;
1139:         if (*(p - 1) == (char) charset_not)
1140:           not = 1;
1141: 
1142:         /* fetch a data character */
1143:         PREFETCH;
1144: 
1145:         if (translate)
1146:           c = translate [*(unsigned char *)d];
1147:         else
1148:           c = *(unsigned char *)d;
1149: 
1150:         if (c < *p * BYTEWIDTH
1151:         && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1152:           not = !not;
1153: 
1154:         p += 1 + *p;
1155: 
1156:         if (!not) goto fail;
1157:         d++;
1158:         break;
1159:       }
1160: 
1161:     case begline:
1162:       if (d == string1 || d[-1] == '\n')
1163:         break;
1164:       goto fail;
1165: 
1166:     case endline:
1167:       if (d == end2
1168:           || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
1169:         break;
1170:       goto fail;
1171: 
1172:     /* "or" constructs ("|") are handled by starting each alternative
1173: 	    with an on_failure_jump that points to the start of the next alternative.
1174: 	    Each alternative except the last ends with a jump to the joining point.
1175: 	    (Actually, each jump except for the last one really jumps
1176: 	     to the following jump, because tensioning the jumps is a hassle.) */
1177: 
1178:     /* The start of a stupid repeat has an on_failure_jump that points
1179: 	   past the end of the repeat text.
1180: 	   This makes a failure point so that, on failure to match a repetition,
1181: 	   matching restarts past as many repetitions have been found
1182: 	   with no way to fail and look for another one.  */
1183: 
1184:     /* A smart repeat is similar but loops back to the on_failure_jump
1185: 	   so that each repetition makes another failure point. */
1186: 
1187:     case on_failure_jump:
1188:       if (stackp == stacke)
1189:         {
1190:           char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *));
1191:           bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
1192:           stackp += stackx - stackb;
1193:           stacke = stackx + 2 * (stacke - stackb);
1194:           stackb = stackx;
1195:         }
1196:       mcnt = *p++ & 0377;
1197:       mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
1198:       *stackp++ = mcnt + p;
1199:       *stackp++ = d;
1200:       break;
1201: 
1202:     /* The end of a smart repeat has an maybe_finalize_jump back.
1203: 	   Change it either to a finalize_jump or an ordinary jump. */
1204: 
1205:     case maybe_finalize_jump:
1206:       mcnt = *p++ & 0377;
1207:       mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
1208:       /* Compare what follows with the begining of the repeat.
1209: 	     If we can establish that there is nothing that they would
1210: 	     both match, we can change to finalize_jump */
1211:       if (p == pend)
1212:         p[-3] = (char) finalize_jump;
1213:       else if (*p == (char) exactn || *p == (char) endline)
1214:         {
1215:           register int c = *p == (char) endline ? '\n' : p[2];
1216:           register char *p1 = p + mcnt;
1217:           /* p1[0] ... p1[2] are an on_failure_jump.
1218: 		 Examine what follows that */
1219:           if (p1[3] == (char) exactn && p1[5] != c)
1220:         p[-3] = (char) finalize_jump;
1221:           else if (p1[3] == (char) charset || p1[3] == (char) charset_not)
1222:         {
1223:           int not = p1[3] == (char) charset_not;
1224:           if (c < p1[4] * BYTEWIDTH
1225:               && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1226:             not = !not;
1227:           /* not is 1 if c would match */
1228:           /* That means it is not safe to finalize */
1229:           if (!not)
1230:             p[-3] = (char) finalize_jump;
1231:         }
1232:         }
1233:       p -= 2;
1234:       if (p[-1] != (char) finalize_jump)
1235:         {
1236:           p[-1] = (char) jump;
1237:           goto nofinalize;
1238:         }
1239: 
1240:     /* The end of a stupid repeat has a finalize-jump
1241: 	   back to the start, where another failure point will be made
1242: 	   which will point after all the repetitions found so far. */
1243: 
1244:     case finalize_jump:
1245:       stackp -= 2;
1246: 
1247:     case jump:
1248:     nofinalize:
1249:       mcnt = *p++ & 0377;
1250:       mcnt += SIGN_EXTEND_CHAR (*p++) << 8;
1251:       p += mcnt;
1252:       break;
1253: 
1254:     case dummy_failure_jump:
1255:       if (stackp == stacke)
1256:         {
1257:           char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *));
1258:           bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
1259:           stackp += stackx - stackb;
1260:           stacke = stackx + 2 * (stacke - stackb);
1261:           stackb = stackx;
1262:         }
1263:       *stackp++ = 0;
1264:       *stackp++ = 0;
1265:       goto nofinalize;
1266: 
1267:     case wordbound:
1268:       if (d == string1  /* Points to first char */
1269:           || d == end2  /* Points to end */
1270:           || (d == end1 && size2 == 0)) /* Points to end */
1271:         break;
1272:       if ((SYNTAX (((unsigned char *)d)[-1]) == Sword)
1273:           != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword))
1274:         break;
1275:       goto fail;
1276: 
1277:     case notwordbound:
1278:       if (d == string1  /* Points to first char */
1279:           || d == end2  /* Points to end */
1280:           || (d == end1 && size2 == 0)) /* Points to end */
1281:         goto fail;
1282:       if ((SYNTAX (((unsigned char *)d)[-1]) == Sword)
1283:           != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword))
1284:         goto fail;
1285:       break;
1286: 
1287:     case wordbeg:
1288:       if (d == end2  /* Points to end */
1289:           || (d == end1 && size2 == 0) /* Points to end */
1290:           || SYNTAX (*(unsigned char *) (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
1291:         goto fail;
1292:       if (d == string1  /* Points to first char */
1293:           || SYNTAX (((unsigned char *)d)[-1]) != Sword)  /* prev char not letter */
1294:         break;
1295:       goto fail;
1296: 
1297:     case wordend:
1298:       if (d == string1  /* Points to first char */
1299:           || SYNTAX (((unsigned char *)d)[-1]) != Sword)  /* prev char not letter */
1300:         goto fail;
1301:       if (d == end2  /* Points to end */
1302:           || (d == end1 && size2 == 0) /* Points to end */
1303:           || SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) != Sword) /* Next char not a letter */
1304:         break;
1305:       goto fail;
1306: 
1307: #ifdef emacs
1308:     case before_dot:
1309:       if (((d - string2 <= (unsigned) size2)
1310:            ? d - (char *) bf_p2 : d - (char *) bf_p1)
1311:           <= point)
1312:         goto fail;
1313:       break;
1314: 
1315:     case at_dot:
1316:       if (((d - string2 <= (unsigned) size2)
1317:            ? d - (char *) bf_p2 : d - (char *) bf_p1)
1318:           == point)
1319:         goto fail;
1320:       break;
1321: 
1322:     case after_dot:
1323:       if (((d - string2 <= (unsigned) size2)
1324:            ? d - (char *) bf_p2 : d - (char *) bf_p1)
1325:           >= point)
1326:         goto fail;
1327:       break;
1328: 
1329:     case wordchar:
1330:       mcnt = (int) Sword;
1331:       goto matchsyntax;
1332: 
1333:     case syntaxspec:
1334:       mcnt = *p++;
1335:     matchsyntax:
1336:       PREFETCH;
1337:       if (SYNTAX (*(unsigned char *)d++) != (enum syntaxcode) mcnt) goto fail;
1338:       break;
1339: 
1340:     case notwordchar:
1341:       mcnt = (int) Sword;
1342:       goto matchnotsyntax;
1343: 
1344:     case notsyntaxspec:
1345:       mcnt = *p++;
1346:     matchnotsyntax:
1347:       PREFETCH;
1348:       if (SYNTAX (*(unsigned char *)d++) == (enum syntaxcode) mcnt) goto fail;
1349:       break;
1350: #else
1351:     case wordchar:
1352:       PREFETCH;
1353:       if (SYNTAX (*(unsigned char *)d++) == 0) goto fail;
1354:       break;
1355: 
1356:     case notwordchar:
1357:       PREFETCH;
1358:       if (SYNTAX (*(unsigned char *)d++) != 0) goto fail;
1359:       break;
1360: #endif not emacs
1361: 
1362:     case begbuf:
1363:       if (d == string1) /* Note, d cannot equal string2 */
1364:         break;      /* unless string1 == string2.  */
1365:       goto fail;
1366: 
1367:     case endbuf:
1368:       if (d == end2 || (d == end1 && size2 == 0))
1369:         break;
1370:       goto fail;
1371: 
1372:     case exactn:
1373:       /* Match the next few pattern characters exactly.
1374: 	     mcnt is how many characters to match. */
1375:       mcnt = *p++;
1376:       if (translate)
1377:         {
1378:           do
1379:         {
1380:           PREFETCH;
1381:           if (translate[*(unsigned char *)d++] != *p++) goto fail;
1382:         }
1383:           while (--mcnt);
1384:         }
1385:       else
1386:         {
1387:           do
1388:         {
1389:           PREFETCH;
1390:           if (*d++ != *p++) goto fail;
1391:         }
1392:           while (--mcnt);
1393:         }
1394:       break;
1395:     }
1396:       continue;    /* Successfully matched one pattern command; keep matching */
1397: 
1398:       /* Jump here if any matching operation fails. */
1399:     fail:
1400:       if (stackp != stackb)
1401:     /* A restart point is known.  Restart there and pop it. */
1402:     {
1403:       if (!stackp[-2])
1404:         {   /* If innermost failure point is dormant, flush it and keep looking */
1405:           stackp -= 2;
1406:           goto fail;
1407:         }
1408:       d = *--stackp;
1409:       p = *--stackp;
1410:       if (d >= string1 && d <= end1)
1411:         dend = end_match_1;
1412:     }
1413:       else break;   /* Matching at this starting point really fails! */
1414:     }
1415:   return -1;         /* Failure to match */
1416: }
1417: 
1418: static int
1419: bcmp_translate (s1, s2, len, translate)
1420:      char *s1, *s2;
1421:      register int len;
1422:      char *translate;
1423: {
1424:   register char *p1 = s1, *p2 = s2;
1425:   while (len)
1426:     {
1427:       if (translate [*p1++] != translate [*p2++]) return 1;
1428:       len--;
1429:     }
1430:   return 0;
1431: }
1432: 
1433: /* Entry points compatible with bsd4.2 regex library */
1434: 
1435: #ifndef emacs
1436: 
1437: static struct re_pattern_buffer re_comp_buf;
1438: 
1439: char *
1440: re_comp (s)
1441:      char *s;
1442: {
1443:   if (!s)
1444:     {
1445:       if (!re_comp_buf.buffer)
1446:     return "No previous regular expression";
1447:       return 0;
1448:     }
1449: 
1450:   if (!re_comp_buf.buffer)
1451:     {
1452:       if (!(re_comp_buf.buffer = (char *) malloc (200)))
1453:     return "Memory exhausted";
1454:       re_comp_buf.allocated = 200;
1455:       if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
1456:     return "Memory exhausted";
1457:     }
1458:   return re_compile_pattern (s, strlen (s), &re_comp_buf);
1459: }
1460: 
1461: int
1462: re_exec (s)
1463:      char *s;
1464: {
1465:   int len = strlen (s);
1466:   return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
1467: }
1468: 
1469: #endif /* emacs */
1470: 
1471: #ifdef test
1472: 
1473: #include <stdio.h>
1474: 
1475: /* Indexed by a character, gives the upper case equivalent of the character */
1476: 
1477: static char upcase[0400] =
1478:   { 000, 001, 002, 003, 004, 005, 006, 007,
1479:     010, 011, 012, 013, 014, 015, 016, 017,
1480:     020, 021, 022, 023, 024, 025, 026, 027,
1481:     030, 031, 032, 033, 034, 035, 036, 037,
1482:     040, 041, 042, 043, 044, 045, 046, 047,
1483:     050, 051, 052, 053, 054, 055, 056, 057,
1484:     060, 061, 062, 063, 064, 065, 066, 067,
1485:     070, 071, 072, 073, 074, 075, 076, 077,
1486:     0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1487:     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1488:     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1489:     0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
1490:     0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1491:     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1492:     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1493:     0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
1494:     0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
1495:     0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
1496:     0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
1497:     0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
1498:     0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
1499:     0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1500:     0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
1501:     0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
1502:     0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
1503:     0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
1504:     0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
1505:     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1506:     0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
1507:     0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
1508:     0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
1509:     0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
1510:   };
1511: 
1512: main ()
1513: {
1514:   char pat[80];
1515:   struct re_pattern_buffer buf;
1516:   int i;
1517:   char c;
1518:   char fastmap[(1 << BYTEWIDTH)];
1519: 
1520:   buf.allocated = 40;
1521:   buf.buffer = (char *) malloc (buf.allocated);
1522:   buf.fastmap = fastmap;
1523:   buf.translate = upcase;
1524: 
1525:   while (1)
1526:     {
1527:       gets (pat);
1528: 
1529:       if (*pat)
1530:     {
1531:           re_compile_pattern (pat, strlen(pat), &buf);
1532: 
1533:       for (i = 0; i < buf.used; i++)
1534:         printchar (buf.buffer[i]);
1535: 
1536:       putchar ('\n');
1537: 
1538:       printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
1539: 
1540:       re_compile_fastmap (&buf);
1541:       printf ("Allowed by fastmap: ");
1542:       for (i = 0; i < (1 << BYTEWIDTH); i++)
1543:         if (fastmap[i]) printchar (i);
1544:       putchar ('\n');
1545:     }
1546: 
1547:       gets (pat);   /* Now read the string to match against */
1548: 
1549:       i = re_match (&buf, pat, strlen (pat), 0, 0);
1550:       printf ("Match value %d.\n", i);
1551:     }
1552: }
1553: 
1554: #ifdef NOTDEF
1555: print_buf (bufp)
1556:      struct re_pattern_buffer *bufp;
1557: {
1558:   int i;
1559: 
1560:   printf ("buf is :\n----------------\n");
1561:   for (i = 0; i < bufp->used; i++)
1562:     printchar (bufp->buffer[i]);
1563: 
1564:   printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
1565: 
1566:   printf ("Allowed by fastmap: ");
1567:   for (i = 0; i < (1 << BYTEWIDTH); i++)
1568:     if (bufp->fastmap[i])
1569:       printchar (i);
1570:   printf ("\nAllowed by translate: ");
1571:   if (bufp->translate)
1572:     for (i = 0; i < (1 << BYTEWIDTH); i++)
1573:       if (bufp->translate[i])
1574:     printchar (i);
1575:   printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
1576:   printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
1577: }
1578: #endif
1579: 
1580: printchar (c)
1581:      char c;
1582: {
1583:   if (c < 041 || c >= 0177)
1584:     {
1585:       putchar ('\\');
1586:       putchar (((c >> 6) & 3) + '0');
1587:       putchar (((c >> 3) & 7) + '0');
1588:       putchar ((c & 7) + '0');
1589:     }
1590:   else
1591:     putchar (c);
1592: }
1593: 
1594: error (string)
1595:      char *string;
1596: {
1597:   puts (string);
1598:   exit (1);
1599: }
1600: 
1601: #endif test