1: /* Extended regular expression matching and search. 2: Copyright (C) 1985 Richard M. Stallman 3: 4: This program is distributed in the hope that it will be useful, 5: but without any warranty. No author or distributor 6: accepts responsibility to anyone for the consequences of using it 7: or for whether it serves any particular purpose or works at all, 8: unless he says so in writing. 9: 10: Permission is granted to anyone to distribute verbatim copies 11: of this program's source code as received, in any medium, provided that 12: the copyright notice, the nonwarraty notice above 13: and this permission notice are preserved, 14: and that the distributor grants the recipient all rights 15: for further redistribution as permitted by this notice, 16: and informs him of these rights. 17: 18: Permission is granted to distribute modified versions of this 19: program's source code, or of portions of it, under the above 20: conditions, plus the conditions that all changed files carry 21: prominent notices stating who last changed them and that the 22: derived material, including anything packaged together with it and 23: conceptually functioning as a modification of it rather than an 24: application of it, is in its entirety subject to a permission 25: notice identical to this one. 26: 27: Permission is granted to distribute this program (verbatim or 28: as modified) in compiled or executable form, provided verbatim 29: redistribution is permitted as stated above for source code, and 30: A. it is accompanied by the corresponding machine-readable 31: source code, under the above conditions, or 32: B. it is accompanied by a written offer, with no time limit, 33: to distribute the corresponding machine-readable source code, 34: under the above conditions, to any one, in return for reimbursement 35: of the cost of distribution. Verbatim redistribution of the 36: written offer must be permitted. Or, 37: C. it is distributed by someone who received only the 38: compiled or executable form, and is accompanied by a copy of the 39: written offer of source code which he received along with it. 40: 41: Permission is granted to distribute this program (verbatim or as modified) 42: in executable form as part of a larger system provided that the source 43: code for this program, including any modifications used, 44: is also distributed or offered as stated in the preceding paragraph. 45: 46: In other words, you are welcome to use, share and improve this program. 47: You are forbidden to forbid anyone else to use, share and improve 48: what you give them. Help stamp out software-hoarding! */ 49: 50: 51: /* To test, compile with -Dtest. 52: This Dtestable feature turns this into a self-contained program 53: which reads a pattern, describes how it compiles, 54: then reads a string and searches for it. */ 55: 56: 57: #ifdef emacs 58: 59: /* The `emacs' switch turns on certain special matching commands 60: that make sense only in emacs. */ 61: 62: #include "config.h" 63: #include "lisp.h" 64: #include "buffer.h" 65: #include "syntax.h" 66: 67: #else /* not emacs */ 68: 69: /* 70: * Define the syntax stuff, so we can do the \<...\> things. 71: */ 72: #define Sword 1 73: 74: #define SYNTAX(c) syntax_table[c] 75: 76: static char syntax_table[256]; 77: 78: #endif /* not emacs */ 79: 80: #include "regex.h" 81: 82: /* Number of failure points to allocate space for initially, 83: when matching. If this number is exceeded, more space is allocated, 84: so it is not a hard limit. */ 85: 86: #ifndef NFAILURES 87: #define NFAILURES 80 88: #endif NFAILURES 89: 90: /* width of a byte in bits */ 91: 92: #define BYTEWIDTH 8 93: 94: /* These are the command codes that appear in compiled regular expressions, one per byte. 95: Some command codes are followed by argument bytes. 96: A command code can specify any interpretation whatever for its arguments. 97: Zero-bytes may appear in the compiled regular expression. */ 98: 99: enum regexpcode 100: { 101: unused, 102: exactn, /* followed by one byte giving n, and then by n literal bytes */ 103: begline, /* fails unless at beginning of line */ 104: endline, /* fails unless at end of line */ 105: jump, /* followed by two bytes giving relative address to jump to */ 106: on_failure_jump, /* followed by two bytes giving relative address of place 107: to resume at in case of failure. */ 108: finalize_jump, /* Throw away latest failure point and then jump to address. */ 109: maybe_finalize_jump, /* Like jump but finalize if safe to do so. 110: This is used to jump back to the beginning 111: of a repeat. If the command that follows 112: this jump is clearly incompatible with the 113: one at the beginning of the repeat, such that 114: we can be sure that there is no use backtracking 115: out of repetitions already completed, 116: then we finalize. */ 117: dummy_failure_jump, /* jump, and push a dummy failure point. 118: This failure point will be thrown away 119: if an attempt is made to use it for a failure. 120: A + construct makes this before the first repeat. */ 121: anychar, /* matches any one character */ 122: charset, /* matches any one char belonging to specified set. 123: First following byte is # bitmap bytes. 124: Then come bytes for a bit-map saying which chars are in. 125: Bits in each byte are ordered low-bit-first. 126: A character is in the set if its bit is 1. 127: A character too large to have a bit in the map 128: is automatically not in the set */ 129: charset_not, /* similar but match any character that is NOT one of those specified */ 130: start_memory, /* starts remembering the text that is matched 131: and stores it in a memory register. 132: followed by one byte containing the register number. 133: Register numbers must be in the range 0 through NREGS. */ 134: stop_memory, /* stops remembering the text that is matched 135: and stores it in a memory register. 136: followed by one byte containing the register number. 137: Register numbers must be in the range 0 through NREGS. */ 138: duplicate, /* match a duplicate of something remembered. 139: Followed by one byte containing the index of the memory register. */ 140: before_dot, /* Succeeds if before dot */ 141: at_dot, /* Succeeds if at dot */ 142: after_dot, /* Succeeds if after dot */ 143: begbuf, /* Succeeds if at beginning of buffer */ 144: endbuf, /* Succeeds if at end of buffer */ 145: wordchar, /* Matches any word-constituent character */ 146: notwordchar, /* Matches any char that is not a word-constituent */ 147: wordbeg, /* Succeeds if at word beginning */ 148: wordend, /* Succeeds if at word end */ 149: wordbound, /* Succeeds if at a word boundary */ 150: notwordbound, /* Succeeds if not at a word boundary */ 151: syntaxspec, /* Matches any character whose syntax is specified. 152: followed by a byte which contains a syntax code, Sword or such like */ 153: notsyntaxspec /* Matches any character whose syntax differs from the specified. */ 154: }; 155: 156: #ifndef SIGN_EXTEND_CHAR 157: #define SIGN_EXTEND_CHAR(x) (x) 158: #endif 159: 160: /* compile_pattern takes a regular-expression descriptor string in the user's format 161: and converts it into a buffer full of byte commands for matching. 162: 163: pattern is the address of the pattern string 164: size is the length of it. 165: bufp is a struct re_pattern_buffer * which points to the info 166: on where to store the byte commands. 167: This structure contains a char * which points to the 168: actual space, which should have been obtained with malloc. 169: compile_pattern may use realloc to grow the buffer space. 170: 171: The number of bytes of commands can be found out by looking in 172: the struct re_pattern_buffer that bufp pointed to, 173: after compile_pattern returns. 174: */ 175: 176: #define PATPUSH(ch) (*b++ = (char) (ch)) 177: 178: #define PATFETCH(c) \ 179: {if (p == pend) goto end_of_pattern; \ 180: c = * (unsigned char *) p++; \ 181: if (translate) c = translate[c]; } 182: 183: #define PATFETCH_RAW(c) \ 184: {if (p == pend) goto end_of_pattern; \ 185: c = * (unsigned char *) p++; } 186: 187: #define PATUNFETCH p-- 188: 189: #define EXTEND_BUFFER \ 190: { old_buffer = bufp->buffer; \ 191: if (bufp->allocated == (1<<16)) goto too_big; \ 192: bufp->allocated *= 2; \ 193: if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \ 194: if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \ 195: goto memory_exhausted; \ 196: c = bufp->buffer - old_buffer; \ 197: b += c; \ 198: if (fixup_jump) \ 199: fixup_jump += c; \ 200: if (laststart) \ 201: laststart += c; \ 202: begalt += c; \ 203: if (pending_exact) \ 204: pending_exact += c; \ 205: } 206: 207: static int store_jump (), insert_jump (); 208: 209: char * 210: re_compile_pattern (pattern, size, bufp) 211: char *pattern; 212: int size; 213: struct re_pattern_buffer *bufp; 214: { 215: register char *b = bufp->buffer; 216: register char *p = pattern; 217: char *pend = pattern + size; 218: register unsigned c, c1; 219: char *p1; 220: unsigned char *translate = (unsigned char *) bufp->translate; 221: 222: /* Temporary used when buffer is made bigger. */ 223: 224: char *old_buffer; 225: 226: /* address of the count-byte of the most recently inserted "exactn" command. 227: This makes it possible to tell whether a new exact-match character 228: can be added to that command or requires a new "exactn" command. */ 229: 230: char *pending_exact = 0; 231: 232: /* address of the place where a forward-jump should go 233: to the end of the containing expression. 234: Each alternative of an "or", except the last, ends with a forward-jump 235: of this sort. */ 236: 237: char *fixup_jump = 0; 238: 239: /* address of start of the most recently finished expression. 240: This tells postfix * where to find the start of its operand. */ 241: 242: char *laststart = 0; 243: 244: /* In processing a repeat, 1 means zero matches is allowed */ 245: 246: char zero_times_ok; 247: 248: /* In processing a repeat, 1 means many matches is allowed */ 249: 250: char many_times_ok; 251: 252: /* address of beginning of regexp, or inside of last \( */ 253: 254: char *begalt = b; 255: 256: /* Stack of information saved by \( and restored by \). 257: Four stack elements are pushed by each \(: 258: First, the value of b. 259: Second, the value of fixup_jump. 260: Third, the value of regnum. 261: Fourth, the value of begalt. */ 262: 263: int stackb[40]; 264: int *stackp = stackb; 265: int *stacke = stackb + 40; 266: int *stackt; 267: 268: /* Counts \('s as they are encountered. Remembered for the matching \), 269: where it becomes the "register number" to put in the stop_memory command */ 270: 271: int regnum = 1; 272: 273: bufp->fastmap_accurate = 0; 274: 275: #ifndef emacs 276: /* 277: * Initialize the syntax table. 278: */ 279: init_syntax_once(); 280: #endif emacs 281: 282: while (p != pend) 283: { 284: if (b - bufp->buffer 285: > bufp->allocated - 10) 286: /* Note that EXTEND_BUFFER clobbers c */ 287: EXTEND_BUFFER; 288: 289: PATFETCH (c); 290: 291: switch (c) 292: { 293: case '$': 294: /* $ means succeed if at end of line, but only in special contexts. 295: If randonly in the middle of a pattern, it is a normal character. */ 296: if (p == pend || (*p == '\\' && (p[1] == ')' || p[1] == '|'))) 297: { 298: PATPUSH (endline); 299: break; 300: } 301: goto normal_char; 302: 303: case '^': 304: /* ^ means succeed if at beg of line, but only if no preceding pattern. */ 305: if (laststart) goto normal_char; 306: PATPUSH (begline); 307: break; 308: 309: case '*': 310: case '+': 311: case '?': 312: /* If there is no previous pattern, char not special. */ 313: if (!laststart) 314: goto normal_char; 315: /* If there is a sequence of repetition chars, 316: collapse it down to equivalent to just one. */ 317: zero_times_ok = 0; 318: many_times_ok = 0; 319: while (1) 320: { 321: zero_times_ok |= c != '+'; 322: many_times_ok |= c != '?'; 323: if (p == pend) 324: break; 325: PATFETCH (c); 326: if (!(c == '*' || c == '+' || c == '?')) 327: { 328: PATUNFETCH; 329: break; 330: } 331: } 332: 333: /* Now we know whether 0 matches is allowed, 334: and whether 2 or more matches is allowed. */ 335: if (many_times_ok) 336: { 337: /* If more than one repetition is allowed, 338: put in a backward jump at the end. */ 339: store_jump (b, maybe_finalize_jump, laststart - 3); 340: b += 3; 341: } 342: insert_jump (on_failure_jump, laststart, b + 3, b); 343: pending_exact = 0; 344: b += 3; 345: if (!zero_times_ok) 346: { 347: /* At least one repetition required: insert before the loop 348: a skip over the initial on-failure-jump instruction */ 349: insert_jump (dummy_failure_jump, laststart, laststart + 6, b); 350: b += 3; 351: } 352: break; 353: 354: case '.': 355: laststart = b; 356: PATPUSH (anychar); 357: break; 358: 359: case '[': 360: if (b - bufp->buffer 361: > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH) 362: /* Note that EXTEND_BUFFER clobbers c */ 363: EXTEND_BUFFER; 364: 365: laststart = b; 366: if (*p == '^') 367: PATPUSH (charset_not), p++; 368: else 369: PATPUSH (charset); 370: p1 = p; 371: 372: PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH); 373: /* Clear the whole map */ 374: bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); 375: /* Read in characters and ranges, setting map bits */ 376: while (1) 377: { 378: PATFETCH (c); 379: if (c == ']' && p != p1 + 1) break; 380: if (*p == '-') 381: { 382: PATFETCH (c1); 383: PATFETCH (c1); 384: while (c <= c1) 385: b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++; 386: } 387: else 388: { 389: b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH); 390: } 391: } 392: /* Discard any bitmap bytes that are all 0 at the end of the map. 393: Decrement the map-length byte too. */ 394: while (b[-1] > 0 && b[b[-1] - 1] == 0) 395: b[-1]--; 396: b += b[-1]; 397: break; 398: 399: case '\\': 400: if (p == pend) goto invalid_pattern; 401: PATFETCH_RAW (c); 402: switch (c) 403: { 404: case '(': 405: if (stackp == stacke) goto nesting_too_deep; 406: if (regnum < RE_NREGS) 407: { 408: PATPUSH (start_memory); 409: PATPUSH (regnum); 410: } 411: *stackp++ = b - bufp->buffer; 412: *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0; 413: *stackp++ = regnum++; 414: *stackp++ = begalt - bufp->buffer; 415: fixup_jump = 0; 416: laststart = 0; 417: begalt = b; 418: break; 419: 420: case ')': 421: if (stackp == stackb) goto unmatched_close; 422: begalt = *--stackp + bufp->buffer; 423: if (fixup_jump) 424: store_jump (fixup_jump, jump, b); 425: if (stackp[-1] < RE_NREGS) 426: { 427: PATPUSH (stop_memory); 428: PATPUSH (stackp[-1]); 429: } 430: stackp -= 2; 431: fixup_jump = 0; 432: if (*stackp) 433: fixup_jump = *stackp + bufp->buffer - 1; 434: laststart = *--stackp + bufp->buffer; 435: break; 436: 437: case '|': 438: insert_jump (on_failure_jump, begalt, b + 6, b); 439: pending_exact = 0; 440: b += 3; 441: if (fixup_jump) 442: store_jump (fixup_jump, jump, b); 443: fixup_jump = b; 444: b += 3; 445: laststart = 0; 446: begalt = b; 447: break; 448: 449: #ifdef emacs 450: case '=': 451: PATPUSH (at_dot); 452: break; 453: 454: case 's': 455: laststart = b; 456: PATPUSH (syntaxspec); 457: PATFETCH (c); 458: PATPUSH (syntax_spec_code[c]); 459: break; 460: 461: case 'S': 462: laststart = b; 463: PATPUSH (notsyntaxspec); 464: PATFETCH (c); 465: PATPUSH (syntax_spec_code[c]); 466: break; 467: #endif emacs 468: 469: case 'w': 470: laststart = b; 471: PATPUSH (wordchar); 472: break; 473: 474: case 'W': 475: laststart = b; 476: PATPUSH (notwordchar); 477: break; 478: 479: case '<': 480: PATPUSH (wordbeg); 481: break; 482: 483: case '>': 484: PATPUSH (wordend); 485: break; 486: 487: case 'b': 488: PATPUSH (wordbound); 489: break; 490: 491: case 'B': 492: PATPUSH (notwordbound); 493: break; 494: 495: case '`': 496: PATPUSH (begbuf); 497: break; 498: 499: case '\'': 500: PATPUSH (endbuf); 501: break; 502: 503: case '1': 504: case '2': 505: case '3': 506: case '4': 507: case '5': 508: case '6': 509: case '7': 510: case '8': 511: case '9': 512: c1 = c - '0'; 513: if (c1 >= regnum) 514: goto normal_char; 515: for (stackt = stackp - 2; stackt > stackb; stackt -= 4) 516: if (*stackt == c1) 517: goto normal_char; 518: laststart = b; 519: PATPUSH (duplicate); 520: PATPUSH (c1); 521: break; 522: default: 523: goto normal_char; 524: } 525: break; 526: 527: default: 528: normal_char: 529: if (!pending_exact || pending_exact + *pending_exact + 1 != b 530: || *pending_exact == 0177 || *p == '*' || *p == '^' 531: || *p == '+' || *p == '?') 532: { 533: laststart = b; 534: PATPUSH (exactn); 535: pending_exact = b; 536: PATPUSH (0); 537: } 538: PATPUSH (c); 539: (*pending_exact)++; 540: } 541: } 542: 543: if (fixup_jump) 544: store_jump (fixup_jump, jump, b); 545: 546: if (stackp != stackb) goto unmatched_open; 547: 548: bufp->used = b - bufp->buffer; 549: return 0; 550: 551: invalid_pattern: 552: return "Invalid regular expression"; 553: 554: unmatched_open: 555: return "Unmatched \\("; 556: 557: unmatched_close: 558: return "Unmatched \\)"; 559: 560: end_of_pattern: 561: return "Premature end of regular expression"; 562: 563: nesting_too_deep: 564: return "Nesting too deep"; 565: 566: too_big: 567: return "Regular expression too big"; 568: 569: memory_exhausted: 570: return "Memory exhausted"; 571: } 572: 573: #ifndef emacs 574: init_syntax_once () 575: { 576: register int c; 577: static int done = 0; 578: 579: if (done) 580: return; 581: 582: bzero (syntax_table, sizeof syntax_table); 583: 584: for (c = 'a'; c <= 'z'; c++) 585: syntax_table[c] = Sword; 586: 587: for (c = 'A'; c <= 'Z'; c++) 588: syntax_table[c] = Sword; 589: 590: for (c = '0'; c <= '9'; c++) 591: syntax_table[c] = Sword; 592: 593: done = 1; 594: } 595: #endif not emacs 596: 597: /* Store where `from' points a jump operation to jump to where `to' points. 598: `opcode' is the opcode to store. */ 599: 600: static int 601: store_jump (from, opcode, to) 602: char *from, *to; 603: char opcode; 604: { 605: from[0] = opcode; 606: from[1] = (to - (from + 3)) & 0377; 607: from[2] = (to - (from + 3)) >> 8; 608: } 609: 610: /* Open up space at char FROM, and insert there a jump to TO. 611: CURRENT_END gives te end of the storage no in use, 612: so we know how much data to copy up. 613: OP is the opcode of the jump to insert. 614: 615: If you call this function, you must zero out pending_exact. */ 616: 617: static int 618: insert_jump (op, from, to, current_end) 619: char op; 620: char *from, *to, *current_end; 621: { 622: register char *pto = current_end + 3; 623: register char *pfrom = current_end; 624: while (pfrom != from) 625: *--pto = *--pfrom; 626: store_jump (from, op, to); 627: } 628: 629: /* Given a pattern, compute a fastmap from it. 630: The fastmap records which of the (1 << BYTEWIDTH) possible characters 631: can start a string that matches the pattern. 632: This fastmap is used by re_search to skip quickly over totally implausible text. 633: 634: The caller must supply the address of a (1 << BYTEWIDTH)-byte data area 635: as bufp->fastmap. 636: The other components of bufp describe the pattern to be used. */ 637: 638: re_compile_fastmap (bufp) 639: struct re_pattern_buffer *bufp; 640: { 641: char *pattern = bufp->buffer; 642: int size = bufp->used; 643: register char *fastmap = bufp->fastmap; 644: register char *p = pattern; 645: register char *pend = pattern + size; 646: register int j, k; 647: unsigned char *translate = (unsigned char *) bufp->translate; 648: 649: char *stackb[NFAILURES]; 650: char **stackp = stackb; 651: 652: bzero (fastmap, (1 << BYTEWIDTH)); 653: bufp->fastmap_accurate = 1; 654: bufp->can_be_null = 0; 655: 656: while (p) 657: { 658: if (p == pend) 659: { 660: bufp->can_be_null = 1; 661: break; 662: } 663: #ifdef SWITCH_ENUM_BUG 664: switch ((int) ((enum regexpcode) *p++)) 665: #else 666: switch ((enum regexpcode) *p++) 667: #endif 668: { 669: case exactn: 670: if (translate) 671: fastmap[translate[p[1]]] = 1; 672: else 673: fastmap[p[1]] = 1; 674: break; 675: 676: case begline: 677: case before_dot: 678: case at_dot: 679: case after_dot: 680: case begbuf: 681: case endbuf: 682: case wordbound: 683: case notwordbound: 684: case wordbeg: 685: case wordend: 686: continue; 687: 688: case endline: 689: if (translate) 690: fastmap[translate['\n']] = 1; 691: else 692: fastmap['\n'] = 1; 693: bufp->can_be_null = 1; 694: break; 695: 696: case finalize_jump: 697: case maybe_finalize_jump: 698: case jump: 699: case dummy_failure_jump: 700: bufp->can_be_null = 1; 701: j = *p++ & 0377; 702: j += SIGN_EXTEND_CHAR (*p++) << 8; 703: p += j; 704: if (j > 0) 705: continue; 706: /* Jump backward reached implies we just went through 707: the body of a loop and matched nothing. 708: Opcode jumped to should be an on_failure_jump. 709: Just treat it like an ordinary jump. 710: For a * loop, it has pushed its failure point already; 711: if so, discard that as redundant. */ 712: if ((enum regexpcode) *p != on_failure_jump) 713: continue; 714: p++; 715: j = *p++ & 0377; 716: j += SIGN_EXTEND_CHAR (*p++) << 8; 717: p += j; 718: if (stackp != stackb && *stackp == p) 719: stackp--; 720: continue; 721: 722: case on_failure_jump: 723: j = *p++ & 0377; 724: j += SIGN_EXTEND_CHAR (*p++) << 8; 725: *++stackp = p + j; 726: continue; 727: 728: case start_memory: 729: case stop_memory: 730: p++; 731: continue; 732: 733: case duplicate: 734: bufp->can_be_null = 1; 735: case anychar: 736: for (j = 0; j < (1 << BYTEWIDTH); j++) 737: fastmap[j] = 1; 738: return; 739: 740: case wordchar: 741: for (j = 0; j < (1 << BYTEWIDTH); j++) 742: if (SYNTAX (j) == Sword) 743: fastmap[j] = 1; 744: break; 745: 746: case notwordchar: 747: for (j = 0; j < (1 << BYTEWIDTH); j++) 748: if (SYNTAX (j) != Sword) 749: fastmap[j] = 1; 750: break; 751: 752: #ifdef emacs 753: case syntaxspec: 754: k = *p++; 755: for (j = 0; j < (1 << BYTEWIDTH); j++) 756: if (SYNTAX (j) == (enum syntaxcode) k) 757: fastmap[j] = 1; 758: break; 759: 760: case notsyntaxspec: 761: for (j = 0; j < (1 << BYTEWIDTH); j++) 762: if (SYNTAX (j) != (enum syntaxcode) k) 763: fastmap[j] = 1; 764: break; 765: #endif emacs 766: 767: case charset: 768: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 769: if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) 770: { 771: if (translate) 772: fastmap[translate[j]] = 1; 773: else 774: fastmap[j] = 1; 775: } 776: break; 777: 778: case charset_not: 779: /* Chars beyond end of map must be allowed */ 780: for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) 781: if (translate) 782: fastmap[translate[j]] = 1; 783: else 784: fastmap[j] = 1; 785: 786: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 787: if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) 788: { 789: if (translate) 790: fastmap[translate[j]] = 1; 791: else 792: fastmap[j] = 1; 793: } 794: break; 795: } 796: 797: /* Get here means we have successfully found the possible starting characters 798: of one path of the pattern. We need not follow this path any farther. 799: Instead, look at the next alternative remembered in the stack. */ 800: if (stackp != stackb) 801: p = *stackp--; 802: else 803: break; 804: } 805: } 806: 807: /* Like re_search_2, below, but only one string is specified. */ 808: 809: re_search (pbufp, string, size, startpos, range, regs) 810: struct re_pattern_buffer *pbufp; 811: char *string; 812: int size, startpos, range; 813: struct re_registers *regs; 814: { 815: return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size); 816: } 817: 818: /* Like re_match_2 but tries first a match starting at index `startpos', 819: then at startpos + 1, and so on. 820: `range' is the number of places to try before giving up. 821: If `range' is negative, the starting positions tried are 822: startpos, startpos - 1, etc. 823: It is up to the caller to make sure that range is not so large 824: as to take the starting position outside of the input strings. 825: 826: The value returned is the position at which the match was found, 827: or -1 if no match was found. */ 828: 829: int 830: re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop) 831: struct re_pattern_buffer *pbufp; 832: char *string1, *string2; 833: int size1, size2; 834: int startpos; 835: register int range; 836: struct re_registers *regs; 837: int mstop; 838: { 839: register char *fastmap = pbufp->fastmap; 840: register char *translate = pbufp->translate; 841: int total = size1 + size2; 842: 843: /* Update the fastmap now if not correct already */ 844: if (fastmap && !pbufp->fastmap_accurate) 845: re_compile_fastmap (pbufp); 846: 847: while (1) 848: { 849: /* If a fastmap is supplied, skip quickly over characters 850: that cannot possibly be the start of a match. 851: Note, however, that if the pattern can possibly match 852: the null string, we must test it at each starting point 853: so that we take the first null string we get. */ 854: 855: if (fastmap && startpos < total && !pbufp->can_be_null) 856: { 857: if (range > 0) 858: { 859: register int lim = 0; 860: register char *p; 861: int irange = range; 862: if (startpos < size1 && startpos + range >= size1) 863: lim = range - (size1 - startpos); 864: 865: p = &(startpos >= size1 ? string2 - size1 : string1)[startpos]; 866: 867: if (translate) 868: { 869: while (range > lim && !fastmap[translate[*p++]]) 870: range--; 871: } 872: else 873: { 874: while (range > lim && !fastmap[*p++]) 875: range--; 876: } 877: startpos += irange - range; 878: } 879: else 880: { 881: register char c; 882: if (startpos >= size1) c = string2[startpos - size1]; 883: else c = string1[startpos]; 884: if (translate ? !fastmap[translate[c]] : !fastmap[c]) 885: goto advance; 886: } 887: } 888: 889: if (range >= 0 && startpos == total 890: && fastmap && !pbufp->can_be_null) 891: return -1; 892: 893: if (0 <= re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop)) 894: return startpos; 895: 896: advance: 897: if (!range) break; 898: if (range > 0) range--, startpos++; else range++, startpos--; 899: } 900: return -1; 901: } 902: 903: #ifndef emacs /* emacs never uses this */ 904: re_match (pbufp, string, size, pos, regs) 905: struct re_pattern_buffer *pbufp; 906: char *string; 907: int size, pos; 908: struct re_registers *regs; 909: { 910: return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size); 911: } 912: #endif /* emacs */ 913: 914: /* Match the pattern described by `pbufp' 915: against data which is the virtual concatenation of `string1' and `string2'. 916: `size1' and `size2' are the sizes of the two data strings. 917: Start the match at position `pos'. 918: Do not consider matching past the position `mstop'. 919: 920: If pbufp->fastmap is nonzero, then it had better be up to date. 921: 922: The reason that the data to match is specified as two components 923: which are to be regarded as concatenated 924: is so that this function can be used directly on the contents of an Emacs buffer. 925: 926: -1 is returned if there is no match. Otherwise the value is the length 927: of the substring which was matched. 928: */ 929: 930: int 931: re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop) 932: struct re_pattern_buffer *pbufp; 933: char *string1, *string2; 934: int size1, size2; 935: int pos; 936: struct re_registers *regs; 937: int mstop; 938: { 939: register char *p = pbufp->buffer; 940: register char *pend = p + pbufp->used; 941: /* End of first string */ 942: char *end1; 943: /* End of second string */ 944: char *end2; 945: /* Pointer just past last char to consider matching */ 946: char *end_match_1, *end_match_2; 947: register char *d, *dend; 948: register int mcnt; 949: char *translate = pbufp->translate; 950: 951: /* Failure point stack. Each place that can handle a failure further down the line 952: pushes a failure point on this stack. It consists of two char *'s. 953: The first one pushed is where to resume scanning the pattern; 954: the second pushed is where to resume scanning the strings. 955: If the latter is zero, the failure point is a "dummy". 956: If a failure happens and the innermost failure point is dormant, 957: it discards that failure point and tries the next one. */ 958: 959: char **stackb = (char **) alloca (2 * NFAILURES * sizeof (char *)); 960: char **stackp = stackb, **stacke = &stackb[2 * NFAILURES]; 961: 962: /* Information on the "contents" of registers. 963: These are pointers into the input strings; they record 964: just what was matched (on this attempt) by some part of the pattern. 965: The start_memory command stores the start of a register's contents 966: and the stop_memory command stores the end. 967: 968: At that point, regstart[regnum] points to the first character in the register, 969: regend[regnum] points to the first character beyond the end of the register, 970: and regstart_segend[regnum] is either the same as regend[regnum] 971: or else points to the end of the input string into which regstart[regnum] points. 972: The latter case happens when regstart[regnum] is in string1 and 973: regend[regnum] is in string2. */ 974: 975: char *regstart[RE_NREGS]; 976: char *regstart_segend[RE_NREGS]; 977: char *regend[RE_NREGS]; 978: 979: /* Set up pointers to ends of strings. 980: Don't allow the second string to be empty unless both are empty. */ 981: if (!size2) 982: { 983: string2 = string1; 984: size2 = size1; 985: string1 = 0; 986: size1 = 0; 987: } 988: end1 = string1 + size1; 989: end2 = string2 + size2; 990: 991: /* Compute where to stop matching, within the two strings */ 992: if (mstop <= size1) 993: { 994: end_match_1 = string1 + mstop; 995: end_match_2 = string2; 996: } 997: else 998: { 999: end_match_1 = end1; 1000: end_match_2 = string2 + mstop - size1; 1001: } 1002: 1003: /* Initialize \( and \) text positions to -1 1004: to mark ones that no \( or \) has been seen for. */ 1005: 1006: for (mcnt = 0; mcnt < sizeof (regstart) / sizeof (*regstart); mcnt++) 1007: regstart[mcnt] = (char *) -1; 1008: 1009: /* `p' scans through the pattern as `d' scans through the data. 1010: `dend' is the end of the input string that `d' points within. 1011: `d' is advanced into the following input string whenever necessary, 1012: but this happens before fetching; 1013: therefore, at the beginning of the loop, 1014: `d' can be pointing at the end of a string, 1015: but it cannot equal string2. */ 1016: 1017: if (pos <= size1) 1018: d = string1 + pos, dend = end_match_1; 1019: else 1020: d = string2 + pos - size1, dend = end_match_2; 1021: 1022: /* Write PREFETCH; just before fetching a character with *d. */ 1023: #define PREFETCH \ 1024: while (d == dend) \ 1025: { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \ 1026: d = string2; /* end of string1 => advance to string2. */ \ 1027: dend = end_match_2; } 1028: 1029: /* This loop loops over pattern commands. 1030: It exits by returning from the function if match is complete, 1031: or it drops through if match fails at this starting point in the input data. */ 1032: 1033: while (1) 1034: { 1035: if (p == pend) 1036: /* End of pattern means we have succeeded! */ 1037: { 1038: /* If caller wants register contents data back, convert it to indices */ 1039: if (regs) 1040: { 1041: bzero (regs, sizeof (*regs)); 1042: 1043: regend[0] = d; 1044: regstart[0] = string1; 1045: for (mcnt = 0; mcnt < RE_NREGS; mcnt++) 1046: { 1047: if (mcnt && regstart[mcnt] == (char *) -1) continue; 1048: if (regstart[mcnt] - string1 < 0 || regstart[mcnt] - string1 > size1) 1049: regs->start[mcnt] = regstart[mcnt] - string2 + size1; 1050: else 1051: regs->start[mcnt] = regstart[mcnt] - string1; 1052: if (regend[mcnt] - string1 < 0 || regend[mcnt] - string1 > size1) 1053: regs->end[mcnt] = regend[mcnt] - string2 + size1; 1054: else 1055: regs->end[mcnt] = regend[mcnt] - string1; 1056: } 1057: regs->start[0] = pos; 1058: } 1059: if (d - string1 >= 0 && d - string1 <= size1) 1060: return d - string1 - pos; 1061: else 1062: return d - string2 + size1 - pos; 1063: } 1064: 1065: /* Otherwise match next pattern command */ 1066: #ifdef SWITCH_ENUM_BUG 1067: switch ((int) ((enum regexpcode) *p++)) 1068: #else 1069: switch ((enum regexpcode) *p++) 1070: #endif 1071: { 1072: 1073: /* \( is represented by a start_memory, \) by a stop_memory. 1074: Both of those commands contain a "register number" argument. 1075: The text matched within the \( and \) is recorded under that number. 1076: Then, \<digit> turns into a `duplicate' command which 1077: is followed by the numeric value of <digit> as the register number. */ 1078: 1079: case start_memory: 1080: regstart[*p] = d; 1081: regstart_segend[*p++] = dend; 1082: break; 1083: 1084: case stop_memory: 1085: regend[*p] = d; 1086: if (regstart_segend[*p] == dend) 1087: regstart_segend[*p] = d; 1088: p++; 1089: break; 1090: 1091: case duplicate: 1092: { 1093: int regno = *p++; /* Get which register to match against */ 1094: register char *d2, *dend2; 1095: 1096: d2 = regstart[regno]; 1097: dend2 = regstart_segend[regno]; 1098: while (1) 1099: { 1100: /* Advance to next segment in register contents, if necessary */ 1101: while (d2 == dend2) 1102: { 1103: if (dend2 == end_match_2) break; 1104: if (dend2 == regend[regno]) break; 1105: d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */ 1106: } 1107: /* At end of register contents => success */ 1108: if (d2 == dend2) break; 1109: 1110: /* Advance to next segment in data being matched, if necessary */ 1111: PREFETCH; 1112: 1113: /* mcnt gets # consecutive chars to compare */ 1114: mcnt = dend - d; 1115: if (mcnt > dend2 - d2) 1116: mcnt = dend2 - d2; 1117: /* Compare that many; failure if mismatch, else skip them. */ 1118: if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt)) 1119: goto fail; 1120: d += mcnt, d2 += mcnt; 1121: } 1122: } 1123: break; 1124: 1125: case anychar: 1126: /* fetch a data character */ 1127: PREFETCH; 1128: /* Match anything but a newline. */ 1129: if ((translate ? translate[*d++] : *d++) == '\n') 1130: goto fail; 1131: break; 1132: 1133: case charset: 1134: case charset_not: 1135: { 1136: /* Nonzero for charset_not */ 1137: int not = 0; 1138: register int c; 1139: if (*(p - 1) == (char) charset_not) 1140: not = 1; 1141: 1142: /* fetch a data character */ 1143: PREFETCH; 1144: 1145: if (translate) 1146: c = translate [*(unsigned char *)d]; 1147: else 1148: c = *(unsigned char *)d; 1149: 1150: if (c < *p * BYTEWIDTH 1151: && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 1152: not = !not; 1153: 1154: p += 1 + *p; 1155: 1156: if (!not) goto fail; 1157: d++; 1158: break; 1159: } 1160: 1161: case begline: 1162: if (d == string1 || d[-1] == '\n') 1163: break; 1164: goto fail; 1165: 1166: case endline: 1167: if (d == end2 1168: || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n')) 1169: break; 1170: goto fail; 1171: 1172: /* "or" constructs ("|") are handled by starting each alternative 1173: with an on_failure_jump that points to the start of the next alternative. 1174: Each alternative except the last ends with a jump to the joining point. 1175: (Actually, each jump except for the last one really jumps 1176: to the following jump, because tensioning the jumps is a hassle.) */ 1177: 1178: /* The start of a stupid repeat has an on_failure_jump that points 1179: past the end of the repeat text. 1180: This makes a failure point so that, on failure to match a repetition, 1181: matching restarts past as many repetitions have been found 1182: with no way to fail and look for another one. */ 1183: 1184: /* A smart repeat is similar but loops back to the on_failure_jump 1185: so that each repetition makes another failure point. */ 1186: 1187: case on_failure_jump: 1188: if (stackp == stacke) 1189: { 1190: char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *)); 1191: bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *)); 1192: stackp += stackx - stackb; 1193: stacke = stackx + 2 * (stacke - stackb); 1194: stackb = stackx; 1195: } 1196: mcnt = *p++ & 0377; 1197: mcnt += SIGN_EXTEND_CHAR (*p++) << 8; 1198: *stackp++ = mcnt + p; 1199: *stackp++ = d; 1200: break; 1201: 1202: /* The end of a smart repeat has an maybe_finalize_jump back. 1203: Change it either to a finalize_jump or an ordinary jump. */ 1204: 1205: case maybe_finalize_jump: 1206: mcnt = *p++ & 0377; 1207: mcnt += SIGN_EXTEND_CHAR (*p++) << 8; 1208: /* Compare what follows with the begining of the repeat. 1209: If we can establish that there is nothing that they would 1210: both match, we can change to finalize_jump */ 1211: if (p == pend) 1212: p[-3] = (char) finalize_jump; 1213: else if (*p == (char) exactn || *p == (char) endline) 1214: { 1215: register int c = *p == (char) endline ? '\n' : p[2]; 1216: register char *p1 = p + mcnt; 1217: /* p1[0] ... p1[2] are an on_failure_jump. 1218: Examine what follows that */ 1219: if (p1[3] == (char) exactn && p1[5] != c) 1220: p[-3] = (char) finalize_jump; 1221: else if (p1[3] == (char) charset || p1[3] == (char) charset_not) 1222: { 1223: int not = p1[3] == (char) charset_not; 1224: if (c < p1[4] * BYTEWIDTH 1225: && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 1226: not = !not; 1227: /* not is 1 if c would match */ 1228: /* That means it is not safe to finalize */ 1229: if (!not) 1230: p[-3] = (char) finalize_jump; 1231: } 1232: } 1233: p -= 2; 1234: if (p[-1] != (char) finalize_jump) 1235: { 1236: p[-1] = (char) jump; 1237: goto nofinalize; 1238: } 1239: 1240: /* The end of a stupid repeat has a finalize-jump 1241: back to the start, where another failure point will be made 1242: which will point after all the repetitions found so far. */ 1243: 1244: case finalize_jump: 1245: stackp -= 2; 1246: 1247: case jump: 1248: nofinalize: 1249: mcnt = *p++ & 0377; 1250: mcnt += SIGN_EXTEND_CHAR (*p++) << 8; 1251: p += mcnt; 1252: break; 1253: 1254: case dummy_failure_jump: 1255: if (stackp == stacke) 1256: { 1257: char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *)); 1258: bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *)); 1259: stackp += stackx - stackb; 1260: stacke = stackx + 2 * (stacke - stackb); 1261: stackb = stackx; 1262: } 1263: *stackp++ = 0; 1264: *stackp++ = 0; 1265: goto nofinalize; 1266: 1267: case wordbound: 1268: if (d == string1 /* Points to first char */ 1269: || d == end2 /* Points to end */ 1270: || (d == end1 && size2 == 0)) /* Points to end */ 1271: break; 1272: if ((SYNTAX (((unsigned char *)d)[-1]) == Sword) 1273: != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword)) 1274: break; 1275: goto fail; 1276: 1277: case notwordbound: 1278: if (d == string1 /* Points to first char */ 1279: || d == end2 /* Points to end */ 1280: || (d == end1 && size2 == 0)) /* Points to end */ 1281: goto fail; 1282: if ((SYNTAX (((unsigned char *)d)[-1]) == Sword) 1283: != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword)) 1284: goto fail; 1285: break; 1286: 1287: case wordbeg: 1288: if (d == end2 /* Points to end */ 1289: || (d == end1 && size2 == 0) /* Points to end */ 1290: || SYNTAX (*(unsigned char *) (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */ 1291: goto fail; 1292: if (d == string1 /* Points to first char */ 1293: || SYNTAX (((unsigned char *)d)[-1]) != Sword) /* prev char not letter */ 1294: break; 1295: goto fail; 1296: 1297: case wordend: 1298: if (d == string1 /* Points to first char */ 1299: || SYNTAX (((unsigned char *)d)[-1]) != Sword) /* prev char not letter */ 1300: goto fail; 1301: if (d == end2 /* Points to end */ 1302: || (d == end1 && size2 == 0) /* Points to end */ 1303: || SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) != Sword) /* Next char not a letter */ 1304: break; 1305: goto fail; 1306: 1307: #ifdef emacs 1308: case before_dot: 1309: if (((d - string2 <= (unsigned) size2) 1310: ? d - (char *) bf_p2 : d - (char *) bf_p1) 1311: <= point) 1312: goto fail; 1313: break; 1314: 1315: case at_dot: 1316: if (((d - string2 <= (unsigned) size2) 1317: ? d - (char *) bf_p2 : d - (char *) bf_p1) 1318: == point) 1319: goto fail; 1320: break; 1321: 1322: case after_dot: 1323: if (((d - string2 <= (unsigned) size2) 1324: ? d - (char *) bf_p2 : d - (char *) bf_p1) 1325: >= point) 1326: goto fail; 1327: break; 1328: 1329: case wordchar: 1330: mcnt = (int) Sword; 1331: goto matchsyntax; 1332: 1333: case syntaxspec: 1334: mcnt = *p++; 1335: matchsyntax: 1336: PREFETCH; 1337: if (SYNTAX (*(unsigned char *)d++) != (enum syntaxcode) mcnt) goto fail; 1338: break; 1339: 1340: case notwordchar: 1341: mcnt = (int) Sword; 1342: goto matchnotsyntax; 1343: 1344: case notsyntaxspec: 1345: mcnt = *p++; 1346: matchnotsyntax: 1347: PREFETCH; 1348: if (SYNTAX (*(unsigned char *)d++) == (enum syntaxcode) mcnt) goto fail; 1349: break; 1350: #else 1351: case wordchar: 1352: PREFETCH; 1353: if (SYNTAX (*(unsigned char *)d++) == 0) goto fail; 1354: break; 1355: 1356: case notwordchar: 1357: PREFETCH; 1358: if (SYNTAX (*(unsigned char *)d++) != 0) goto fail; 1359: break; 1360: #endif not emacs 1361: 1362: case begbuf: 1363: if (d == string1) /* Note, d cannot equal string2 */ 1364: break; /* unless string1 == string2. */ 1365: goto fail; 1366: 1367: case endbuf: 1368: if (d == end2 || (d == end1 && size2 == 0)) 1369: break; 1370: goto fail; 1371: 1372: case exactn: 1373: /* Match the next few pattern characters exactly. 1374: mcnt is how many characters to match. */ 1375: mcnt = *p++; 1376: if (translate) 1377: { 1378: do 1379: { 1380: PREFETCH; 1381: if (translate[*(unsigned char *)d++] != *p++) goto fail; 1382: } 1383: while (--mcnt); 1384: } 1385: else 1386: { 1387: do 1388: { 1389: PREFETCH; 1390: if (*d++ != *p++) goto fail; 1391: } 1392: while (--mcnt); 1393: } 1394: break; 1395: } 1396: continue; /* Successfully matched one pattern command; keep matching */ 1397: 1398: /* Jump here if any matching operation fails. */ 1399: fail: 1400: if (stackp != stackb) 1401: /* A restart point is known. Restart there and pop it. */ 1402: { 1403: if (!stackp[-2]) 1404: { /* If innermost failure point is dormant, flush it and keep looking */ 1405: stackp -= 2; 1406: goto fail; 1407: } 1408: d = *--stackp; 1409: p = *--stackp; 1410: if (d >= string1 && d <= end1) 1411: dend = end_match_1; 1412: } 1413: else break; /* Matching at this starting point really fails! */ 1414: } 1415: return -1; /* Failure to match */ 1416: } 1417: 1418: static int 1419: bcmp_translate (s1, s2, len, translate) 1420: char *s1, *s2; 1421: register int len; 1422: char *translate; 1423: { 1424: register char *p1 = s1, *p2 = s2; 1425: while (len) 1426: { 1427: if (translate [*p1++] != translate [*p2++]) return 1; 1428: len--; 1429: } 1430: return 0; 1431: } 1432: 1433: /* Entry points compatible with bsd4.2 regex library */ 1434: 1435: #ifndef emacs 1436: 1437: static struct re_pattern_buffer re_comp_buf; 1438: 1439: char * 1440: re_comp (s) 1441: char *s; 1442: { 1443: if (!s) 1444: { 1445: if (!re_comp_buf.buffer) 1446: return "No previous regular expression"; 1447: return 0; 1448: } 1449: 1450: if (!re_comp_buf.buffer) 1451: { 1452: if (!(re_comp_buf.buffer = (char *) malloc (200))) 1453: return "Memory exhausted"; 1454: re_comp_buf.allocated = 200; 1455: if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH))) 1456: return "Memory exhausted"; 1457: } 1458: return re_compile_pattern (s, strlen (s), &re_comp_buf); 1459: } 1460: 1461: int 1462: re_exec (s) 1463: char *s; 1464: { 1465: int len = strlen (s); 1466: return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0); 1467: } 1468: 1469: #endif /* emacs */ 1470: 1471: #ifdef test 1472: 1473: #include <stdio.h> 1474: 1475: /* Indexed by a character, gives the upper case equivalent of the character */ 1476: 1477: static char upcase[0400] = 1478: { 000, 001, 002, 003, 004, 005, 006, 007, 1479: 010, 011, 012, 013, 014, 015, 016, 017, 1480: 020, 021, 022, 023, 024, 025, 026, 027, 1481: 030, 031, 032, 033, 034, 035, 036, 037, 1482: 040, 041, 042, 043, 044, 045, 046, 047, 1483: 050, 051, 052, 053, 054, 055, 056, 057, 1484: 060, 061, 062, 063, 064, 065, 066, 067, 1485: 070, 071, 072, 073, 074, 075, 076, 077, 1486: 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107, 1487: 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, 1488: 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, 1489: 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137, 1490: 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107, 1491: 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, 1492: 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, 1493: 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177, 1494: 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207, 1495: 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, 1496: 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227, 1497: 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, 1498: 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247, 1499: 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, 1500: 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, 1501: 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, 1502: 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307, 1503: 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, 1504: 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327, 1505: 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, 1506: 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, 1507: 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, 1508: 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, 1509: 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377 1510: }; 1511: 1512: main () 1513: { 1514: char pat[80]; 1515: struct re_pattern_buffer buf; 1516: int i; 1517: char c; 1518: char fastmap[(1 << BYTEWIDTH)]; 1519: 1520: buf.allocated = 40; 1521: buf.buffer = (char *) malloc (buf.allocated); 1522: buf.fastmap = fastmap; 1523: buf.translate = upcase; 1524: 1525: while (1) 1526: { 1527: gets (pat); 1528: 1529: if (*pat) 1530: { 1531: re_compile_pattern (pat, strlen(pat), &buf); 1532: 1533: for (i = 0; i < buf.used; i++) 1534: printchar (buf.buffer[i]); 1535: 1536: putchar ('\n'); 1537: 1538: printf ("%d allocated, %d used.\n", buf.allocated, buf.used); 1539: 1540: re_compile_fastmap (&buf); 1541: printf ("Allowed by fastmap: "); 1542: for (i = 0; i < (1 << BYTEWIDTH); i++) 1543: if (fastmap[i]) printchar (i); 1544: putchar ('\n'); 1545: } 1546: 1547: gets (pat); /* Now read the string to match against */ 1548: 1549: i = re_match (&buf, pat, strlen (pat), 0, 0); 1550: printf ("Match value %d.\n", i); 1551: } 1552: } 1553: 1554: #ifdef NOTDEF 1555: print_buf (bufp) 1556: struct re_pattern_buffer *bufp; 1557: { 1558: int i; 1559: 1560: printf ("buf is :\n----------------\n"); 1561: for (i = 0; i < bufp->used; i++) 1562: printchar (bufp->buffer[i]); 1563: 1564: printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used); 1565: 1566: printf ("Allowed by fastmap: "); 1567: for (i = 0; i < (1 << BYTEWIDTH); i++) 1568: if (bufp->fastmap[i]) 1569: printchar (i); 1570: printf ("\nAllowed by translate: "); 1571: if (bufp->translate) 1572: for (i = 0; i < (1 << BYTEWIDTH); i++) 1573: if (bufp->translate[i]) 1574: printchar (i); 1575: printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't"); 1576: printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not"); 1577: } 1578: #endif 1579: 1580: printchar (c) 1581: char c; 1582: { 1583: if (c < 041 || c >= 0177) 1584: { 1585: putchar ('\\'); 1586: putchar (((c >> 6) & 3) + '0'); 1587: putchar (((c >> 3) & 7) + '0'); 1588: putchar ((c & 7) + '0'); 1589: } 1590: else 1591: putchar (c); 1592: } 1593: 1594: error (string) 1595: char *string; 1596: { 1597: puts (string); 1598: exit (1); 1599: } 1600: 1601: #endif test