1: /*
   2:  * Copyright (c) 1980 Regents of the University of California.
   3:  * All rights reserved.  The Berkeley software License Agreement
   4:  * specifies the terms and conditions for redistribution.
   5:  */
   6: 
   7: #ifndef lint
   8: static char sccsid[] = "@(#)regexp.c	5.1 (Berkeley) 6/5/85";
   9: #endif not lint
  10: 
  11: #include <ctype.h>
  12: 
  13: typedef int boolean;
  14: #define TRUE    1
  15: #define FALSE   0
  16: #define NIL 0
  17: 
  18: boolean l_onecase;  /* true if upper and lower equivalent */
  19: 
  20: #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
  21: 
  22: /*  STRNCMP -	like strncmp except that we convert the
  23:  *	 	first string to lower case before comparing
  24:  *		if l_onecase is set.
  25:  */
  26: 
  27: STRNCMP(s1, s2, len)
  28:     register char *s1,*s2;
  29:     register int len;
  30: {
  31:     if (l_onecase) {
  32:         do
  33:         if (*s2 - makelower(*s1))
  34:             return (*s2 - makelower(*s1));
  35:         else {
  36:             s2++;
  37:             s1++;
  38:         }
  39:         while (--len);
  40:     } else {
  41:         do
  42:         if (*s2 - *s1)
  43:             return (*s2 - *s1);
  44:         else {
  45:             s2++;
  46:             s1++;
  47:         }
  48:         while (--len);
  49:     }
  50:     return(0);
  51: }
  52: 
  53: /*	The following routine converts an irregular expression to
  54:  *	internal format.
  55:  *
  56:  *	Either meta symbols (\a \d or \p) or character strings or
  57:  *	operations ( alternation or perenthesizing ) can be
  58:  *	specified.  Each starts with a descriptor byte.  The descriptor
  59:  *	byte has STR set for strings, META set for meta symbols
  60:  *	and OPER set for operations.
  61:  *	The descriptor byte can also have the OPT bit set if the object
  62:  *	defined is optional.  Also ALT can be set to indicate an alternation.
  63:  *
  64:  *	For metasymbols the byte following the descriptor byte identities
  65:  *	the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '(').  For
  66:  *	strings the byte after the descriptor is a character count for
  67:  *	the string:
  68:  *
  69:  *		meta symbols := descriptor
  70:  *				symbol
  71:  *
  72:  *		strings :=	descriptor
  73:  *				character count
  74:  *				the string
  75:  *
  76:  *		operatins :=	descriptor
  77:  *				symbol
  78:  *				character count
  79:  */
  80: 
  81: /*
  82:  *  handy macros for accessing parts of match blocks
  83:  */
  84: #define MSYM(A) (*(A+1))    /* symbol in a meta symbol block */
  85: #define MNEXT(A) (A+2)      /* character following a metasymbol block */
  86: 
  87: #define OSYM(A) (*(A+1))    /* symbol in an operation block */
  88: #define OCNT(A) (*(A+2))    /* character count */
  89: #define ONEXT(A) (A+3)      /* next character after the operation */
  90: #define OPTR(A) (A+*(A+2))  /* place pointed to by the operator */
  91: 
  92: #define SCNT(A) (*(A+1))    /* byte count of a string */
  93: #define SSTR(A) (A+2)       /* address of the string */
  94: #define SNEXT(A) (A+2+*(A+1))   /* character following the string */
  95: 
  96: /*
  97:  *  bit flags in the descriptor
  98:  */
  99: #define OPT 1
 100: #define STR 2
 101: #define META 4
 102: #define ALT 8
 103: #define OPER 16
 104: 
 105: char *ure;      /* pointer current position in unconverted exp */
 106: char *ccre;     /* pointer to current position in converted exp*/
 107: char *malloc();
 108: 
 109: char *
 110: convexp(re)
 111:     char *re;       /* unconverted irregular expression */
 112: {
 113:     register char *cre;     /* pointer to converted regular expression */
 114: 
 115:     /* allocate room for the converted expression */
 116:     if (re == NIL)
 117:     return (NIL);
 118:     if (*re == '\0')
 119:     return (NIL);
 120:     cre = malloc (4 * strlen(re) + 3);
 121:     ccre = cre;
 122:     ure = re;
 123: 
 124:     /* start the conversion with a \a */
 125:     *cre = META | OPT;
 126:     MSYM(cre) = 'a';
 127:     ccre = MNEXT(cre);
 128: 
 129:     /* start the conversion (its recursive) */
 130:     expconv ();
 131:     *ccre = 0;
 132:     return (cre);
 133: }
 134: 
 135: expconv()
 136: {
 137:     register char *cs;      /* pointer to current symbol in converted exp */
 138:     register char c;        /* character being processed */
 139:     register char *acs;     /* pinter to last alternate */
 140:     register int temp;
 141: 
 142:     /* let the conversion begin */
 143:     acs = NIL;
 144:     cs = NIL;
 145:     while (*ure != NIL) {
 146:     switch (c = *ure++) {
 147: 
 148:     case '\\':
 149:         switch (c = *ure++) {
 150: 
 151:         /* escaped characters are just characters */
 152:         default:
 153:         if (cs == NIL || (*cs & STR) == 0) {
 154:             cs = ccre;
 155:             *cs = STR;
 156:             SCNT(cs) = 1;
 157:             ccre += 2;
 158:         } else
 159:             SCNT(cs)++;
 160:         *ccre++ = c;
 161:         break;
 162: 
 163:         /* normal(?) metacharacters */
 164:         case 'a':
 165:         case 'd':
 166:         case 'e':
 167:         case 'p':
 168:         if (acs != NIL && acs != cs) {
 169:             do {
 170:             temp = OCNT(acs);
 171:             OCNT(acs) = ccre - acs;
 172:             acs -= temp;
 173:             } while (temp != 0);
 174:             acs = NIL;
 175:         }
 176:         cs = ccre;
 177:         *cs = META;
 178:         MSYM(cs) = c;
 179:         ccre = MNEXT(cs);
 180:         break;
 181:         }
 182:         break;
 183: 
 184:     /* just put the symbol in */
 185:     case '^':
 186:     case '$':
 187:         if (acs != NIL && acs != cs) {
 188:         do {
 189:             temp = OCNT(acs);
 190:             OCNT(acs) = ccre - acs;
 191:             acs -= temp;
 192:         } while (temp != 0);
 193:         acs = NIL;
 194:         }
 195:         cs = ccre;
 196:         *cs = META;
 197:         MSYM(cs) = c;
 198:         ccre = MNEXT(cs);
 199:         break;
 200: 
 201:     /* mark the last match sequence as optional */
 202:     case '?':
 203:         if (cs)
 204:             *cs = *cs | OPT;
 205:         break;
 206: 
 207:     /* recurse and define a subexpression */
 208:     case '(':
 209:         if (acs != NIL && acs != cs) {
 210:         do {
 211:             temp = OCNT(acs);
 212:             OCNT(acs) = ccre - acs;
 213:             acs -= temp;
 214:         } while (temp != 0);
 215:         acs = NIL;
 216:         }
 217:         cs = ccre;
 218:         *cs = OPER;
 219:         OSYM(cs) = '(';
 220:         ccre = ONEXT(cs);
 221:         expconv ();
 222:         OCNT(cs) = ccre - cs;       /* offset to next symbol */
 223:         break;
 224: 
 225:     /* return from a recursion */
 226:     case ')':
 227:         if (acs != NIL) {
 228:         do {
 229:             temp = OCNT(acs);
 230:             OCNT(acs) = ccre - acs;
 231:             acs -= temp;
 232:         } while (temp != 0);
 233:         acs = NIL;
 234:         }
 235:         cs = ccre;
 236:         *cs = META;
 237:         MSYM(cs) = c;
 238:         ccre = MNEXT(cs);
 239:         return;
 240: 
 241:     /* mark the last match sequence as having an alternate */
 242:     /* the third byte will contain an offset to jump over the */
 243:     /* alternate match in case the first did not fail */
 244:     case '|':
 245:         if (acs != NIL && acs != cs)
 246:         OCNT(ccre) = ccre - acs;    /* make a back pointer */
 247:         else
 248:         OCNT(ccre) = 0;
 249:         *cs |= ALT;
 250:         cs = ccre;
 251:         *cs = OPER;
 252:         OSYM(cs) = '|';
 253:         ccre = ONEXT(cs);
 254:         acs = cs;   /* remember that the pointer is to be filles */
 255:         break;
 256: 
 257:     /* if its not a metasymbol just build a scharacter string */
 258:     default:
 259:         if (cs == NIL || (*cs & STR) == 0) {
 260:         cs = ccre;
 261:         *cs = STR;
 262:         SCNT(cs) = 1;
 263:         ccre = SSTR(cs);
 264:         } else
 265:         SCNT(cs)++;
 266:         *ccre++ = c;
 267:         break;
 268:     }
 269:     }
 270:     if (acs != NIL) {
 271:     do {
 272:         temp = OCNT(acs);
 273:         OCNT(acs) = ccre - acs;
 274:         acs -= temp;
 275:     } while (temp != 0);
 276:     acs = NIL;
 277:     }
 278:     return;
 279: }
 280: /* end of convertre */
 281: 
 282: 
 283: /*
 284:  *	The following routine recognises an irregular expresion
 285:  *	with the following special characters:
 286:  *
 287:  *		\?	-	means last match was optional
 288:  *		\a	-	matches any number of characters
 289:  *		\d	-	matches any number of spaces and tabs
 290:  *		\p	-	matches any number of alphanumeric
 291:  *				characters. The
 292:  *				characters matched will be copied into
 293:  *				the area pointed to by 'name'.
 294:  *		\|	-	alternation
 295:  *		\( \)	-	grouping used mostly for alternation and
 296:  *				optionality
 297:  *
 298:  *	The irregular expression must be translated to internal form
 299:  *	prior to calling this routine
 300:  *
 301:  *	The value returned is the pointer to the first non \a
 302:  *	character matched.
 303:  */
 304: 
 305: boolean _escaped;       /* true if we are currently _escaped */
 306: char *_start;           /* start of string */
 307: 
 308: char *
 309: expmatch (s, re, mstring)
 310:     register char *s;       /* string to check for a match in */
 311:     register char *re;      /* a converted irregular expression */
 312:     register char *mstring; /* where to put whatever matches a \p */
 313: {
 314:     register char *cs;      /* the current symbol */
 315:     register char *ptr,*s1; /* temporary pointer */
 316:     boolean matched;        /* a temporary boolean */
 317: 
 318:     /* initial conditions */
 319:     if (re == NIL)
 320:     return (NIL);
 321:     cs = re;
 322:     matched = FALSE;
 323: 
 324:     /* loop till expression string is exhausted (or at least pretty tired) */
 325:     while (*cs) {
 326:     switch (*cs & (OPER | STR | META)) {
 327: 
 328:     /* try to match a string */
 329:     case STR:
 330:         matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
 331:         if (matched) {
 332: 
 333:         /* hoorah it matches */
 334:         s += SCNT(cs);
 335:         cs = SNEXT(cs);
 336:         } else if (*cs & ALT) {
 337: 
 338:         /* alternation, skip to next expression */
 339:         cs = SNEXT(cs);
 340:         } else if (*cs & OPT) {
 341: 
 342:         /* the match is optional */
 343:         cs = SNEXT(cs);
 344:         matched = 1;        /* indicate a successful match */
 345:         } else {
 346: 
 347:         /* no match, error return */
 348:         return (NIL);
 349:         }
 350:         break;
 351: 
 352:     /* an operator, do something fancy */
 353:     case OPER:
 354:         switch (OSYM(cs)) {
 355: 
 356:         /* this is an alternation */
 357:         case '|':
 358:         if (matched)
 359: 
 360:             /* last thing in the alternation was a match, skip ahead */
 361:             cs = OPTR(cs);
 362:         else
 363: 
 364:             /* no match, keep trying */
 365:             cs = ONEXT(cs);
 366:         break;
 367: 
 368:         /* this is a grouping, recurse */
 369:         case '(':
 370:         ptr = expmatch (s, ONEXT(cs), mstring);
 371:         if (ptr != NIL) {
 372: 
 373:             /* the subexpression matched */
 374:             matched = 1;
 375:             s = ptr;
 376:         } else if (*cs & ALT) {
 377: 
 378:             /* alternation, skip to next expression */
 379:             matched = 0;
 380:         } else if (*cs & OPT) {
 381: 
 382:             /* the match is optional */
 383:             matched = 1;    /* indicate a successful match */
 384:         } else {
 385: 
 386:             /* no match, error return */
 387:             return (NIL);
 388:         }
 389:         cs = OPTR(cs);
 390:         break;
 391:         }
 392:         break;
 393: 
 394:     /* try to match a metasymbol */
 395:     case META:
 396:         switch (MSYM(cs)) {
 397: 
 398:         /* try to match anything and remember what was matched */
 399:         case 'p':
 400:         /*
 401: 		 *  This is really the same as trying the match the
 402: 		 *  remaining parts of the expression to any subset
 403: 		 *  of the string.
 404: 		 */
 405:         s1 = s;
 406:         do {
 407:             ptr = expmatch (s1, MNEXT(cs), mstring);
 408:             if (ptr != NIL && s1 != s) {
 409: 
 410:             /* we have a match, remember the match */
 411:             strncpy (mstring, s, s1 - s);
 412:             mstring[s1 - s] = '\0';
 413:             return (ptr);
 414:             } else if (ptr != NIL && (*cs & OPT)) {
 415: 
 416:             /* it was aoptional so no match is ok */
 417:             return (ptr);
 418:             } else if (ptr != NIL) {
 419: 
 420:             /* not optional and we still matched */
 421:             return (NIL);
 422:             }
 423:             if (!isalnum(*s1) && *s1 != '_')
 424:             return (NIL);
 425:             if (*s1 == '\\')
 426:             _escaped = _escaped ? FALSE : TRUE;
 427:             else
 428:             _escaped = FALSE;
 429:         } while (*s1++);
 430:         return (NIL);
 431: 
 432:         /* try to match anything */
 433:         case 'a':
 434:         /*
 435: 		 *  This is really the same as trying the match the
 436: 		 *  remaining parts of the expression to any subset
 437: 		 *  of the string.
 438: 		 */
 439:         s1 = s;
 440:         do {
 441:             ptr = expmatch (s1, MNEXT(cs), mstring);
 442:             if (ptr != NIL && s1 != s) {
 443: 
 444:             /* we have a match */
 445:             return (ptr);
 446:             } else if (ptr != NIL && (*cs & OPT)) {
 447: 
 448:             /* it was aoptional so no match is ok */
 449:             return (ptr);
 450:             } else if (ptr != NIL) {
 451: 
 452:             /* not optional and we still matched */
 453:             return (NIL);
 454:             }
 455:             if (*s1 == '\\')
 456:             _escaped = _escaped ? FALSE : TRUE;
 457:             else
 458:             _escaped = FALSE;
 459:         } while (*s1++);
 460:         return (NIL);
 461: 
 462:         /* fail if we are currently _escaped */
 463:         case 'e':
 464:         if (_escaped)
 465:             return(NIL);
 466:         cs = MNEXT(cs);
 467:         break;
 468: 
 469:         /* match any number of tabs and spaces */
 470:         case 'd':
 471:         ptr = s;
 472:         while (*s == ' ' || *s == '\t')
 473:             s++;
 474:         if (s != ptr || s == _start) {
 475: 
 476:             /* match, be happy */
 477:             matched = 1;
 478:             cs = MNEXT(cs);
 479:         } else if (*s == '\n' || *s == '\0') {
 480: 
 481:             /* match, be happy */
 482:             matched = 1;
 483:             cs = MNEXT(cs);
 484:         } else if (*cs & ALT) {
 485: 
 486:             /* try the next part */
 487:             matched = 0;
 488:             cs = MNEXT(cs);
 489:         } else if (*cs & OPT) {
 490: 
 491:             /* doesn't matter */
 492:             matched = 1;
 493:             cs = MNEXT(cs);
 494:         } else
 495: 
 496:             /* no match, error return */
 497:             return (NIL);
 498:         break;
 499: 
 500:         /* check for end of line */
 501:         case '$':
 502:         if (*s == '\0' || *s == '\n') {
 503: 
 504:             /* match, be happy */
 505:             s++;
 506:             matched = 1;
 507:             cs = MNEXT(cs);
 508:         } else if (*cs & ALT) {
 509: 
 510:             /* try the next part */
 511:             matched = 0;
 512:             cs = MNEXT(cs);
 513:         } else if (*cs & OPT) {
 514: 
 515:             /* doesn't matter */
 516:             matched = 1;
 517:             cs = MNEXT(cs);
 518:         } else
 519: 
 520:             /* no match, error return */
 521:             return (NIL);
 522:         break;
 523: 
 524:         /* check for start of line */
 525:         case '^':
 526:         if (s == _start) {
 527: 
 528:             /* match, be happy */
 529:             matched = 1;
 530:             cs = MNEXT(cs);
 531:         } else if (*cs & ALT) {
 532: 
 533:             /* try the next part */
 534:             matched = 0;
 535:             cs = MNEXT(cs);
 536:         } else if (*cs & OPT) {
 537: 
 538:             /* doesn't matter */
 539:             matched = 1;
 540:             cs = MNEXT(cs);
 541:         } else
 542: 
 543:             /* no match, error return */
 544:             return (NIL);
 545:         break;
 546: 
 547:         /* end of a subexpression, return success */
 548:         case ')':
 549:         return (s);
 550:         }
 551:         break;
 552:     }
 553:     }
 554:     return (s);
 555: }

Defined functions

STRNCMP defined in line 27; used 3 times
convexp defined in line 109; used 14 times
expconv defined in line 135; used 2 times
expmatch defined in line 308; used 17 times

Defined variables

_escaped defined in line 305; used 7 times
_start defined in line 306; used 2 times
ccre defined in line 106; used 28 times
l_onecase defined in line 18; used 1 times
  • in line 31
sccsid defined in line 8; never used
ure defined in line 105; used 4 times

Defined typedef's

boolean defined in line 13; used 3 times

Defined macros

ALT defined in line 102; used 6 times
FALSE defined in line 15; used 5 times
META defined in line 101; used 5 times
MNEXT defined in line 85; used 17 times
MSYM defined in line 84; used 5 times
NIL defined in line 16; used 39 times
OCNT defined in line 88; used 13 times
ONEXT defined in line 89; used 4 times
OPER defined in line 103; used 3 times
OPT defined in line 99; used 9 times
OPTR defined in line 90; used 2 times
OSYM defined in line 87; used 3 times
SCNT defined in line 92; used 6 times
SNEXT defined in line 94; used 3 times
SSTR defined in line 93; used 2 times
STR defined in line 100; used 5 times
TRUE defined in line 14; used 2 times
makelower defined in line 20; used 2 times
Last modified: 1987-02-17
Generated: 2016-12-26
Generated by src2html V0.67
page hit count: 3647
Valid CSS Valid XHTML 1.0 Strict