1: # include   <stdio.h>
   2: # include   <ctype.h>
   3: # include   "strfile.h"
   4: 
   5: /*
   6:  *	This program takes a file composed of strings seperated by
   7:  * lines starting with two consecutive delimiting character (default
   8:  * character is '%') and creates another file which consists of a table
   9:  * describing the file (structure from "strfile.h"), a table of seek
  10:  * pointers to the start of the strings, and the strings, each terinated
  11:  * by a null byte.  Usage:
  12:  *
  13:  *	% strfile [ - ] [ -cC ] [ -sv ] [ -oir ] sourcefile [ datafile ]
  14:  *
  15:  *	- - Give a usage summary useful for jogging the memory
  16:  *	c - Change delimiting character from '%' to 'C'
  17:  *	s - Silent.  Give no summary of data processed at the end of
  18:  *	    the run.
  19:  *	v - Verbose.  Give summary of data processed.  (Default)
  20:  *	o - order the strings in alphabetic order
  21:  *	i - if ordering, ignore case
  22:  *	r - randomize the order of the strings
  23:  *
  24:  *		Ken Arnold	Sept. 7, 1978 --
  25:  *
  26:  *	Added method to indicate dividers.  A "%-" will cause the address
  27:  * to be added to the structure in one of the pointer elements.
  28:  *
  29:  *		Ken Arnold	Nov., 1984 --
  30:  *
  31:  *	Added ordering options.
  32:  */
  33: 
  34: # define    TRUE    1
  35: # define    FALSE   0
  36: 
  37: # define    DELIM_CH    '-'
  38: 
  39: typedef struct {
  40:     char    first;
  41:     long    pos;
  42: } STR;
  43: 
  44: char    *Infile     = NULL,     /* input file name */
  45:     Outfile[100]    = "",       /* output file name */
  46:     Delimch     = '%',      /* delimiting character */
  47:     *Usage[]    = {     /* usage summary */
  48:        "usage:	strfile [ - ] [ -cC ] [ -sv ] [ -oir ] inputfile [ datafile ]",
  49:        "	- - Give this usage summary",
  50:        "	c - Replace delimiting character with 'C'",
  51:        "	s - Silent.  Give no summary",
  52:        "	v - Verbose.  Give summary.  (default)",
  53:        "	o - order strings alphabetically",
  54:        "	i - ignore case in ordering",
  55:        "	r - randomize the order of the strings",
  56:        "	Default \"datafile\" is inputfile.dat",
  57:     NULL
  58:     };
  59: 
  60: int Sflag       = FALSE;    /* silent run flag */
  61: int Oflag       = FALSE;    /* ordering flag */
  62: int Iflag       = FALSE;    /* ignore case flag */
  63: int Rflag       = FALSE;    /* randomize order flag */
  64: int Delim       = 0;        /* current delimiter number */
  65: 
  66: long    *Seekpts;
  67: 
  68: FILE    *Sort_1, *Sort_2;       /* pointers for sorting */
  69: 
  70: STRFILE Tbl;                /* statistics table */
  71: 
  72: STR *Firstch;           /* first chars of each string */
  73: 
  74: char    *fgets(), *malloc(), *strcpy(), *strcat();
  75: 
  76: long    ftell();
  77: 
  78: main(ac, av)
  79: int ac;
  80: char    **av;
  81: {
  82:     register char       *sp, dc;
  83:     register long       *lp;
  84:     register unsigned int   curseek;    /* number of strings */
  85:     register long       *seekpts, li;   /* table of seek pointers */
  86:     register FILE       *inf, *outf;
  87:     register int        first;
  88:     register char       *nsp;
  89:     register STR        *fp;
  90:     static char     string[257];
  91: 
  92:     getargs(ac, av);        /* evalute arguments */
  93: 
  94:     /*
  95: 	 * initial counting of input file
  96: 	 */
  97: 
  98:     dc = Delimch;
  99:     if ((inf = fopen(Infile, "r")) == NULL) {
 100:         perror(Infile);
 101:         exit(-1);
 102:     }
 103:     for (curseek = 0; (sp = fgets(string, 256, inf)) != NULL; )
 104:         if (*sp++ == dc && (*sp == dc || *sp == DELIM_CH))
 105:             curseek++;
 106:     curseek++;
 107: 
 108:     /*
 109: 	 * save space at begginning of file for tables
 110: 	 */
 111: 
 112:     if ((outf = fopen(Outfile, "w")) == NULL) {
 113:         perror(Outfile);
 114:         exit(-1);
 115:     }
 116: 
 117:     /*
 118: 	 * Allocate space for the pointers, adding one to the end so the
 119: 	 * length of the final string can be calculated.
 120: 	 */
 121:     ++curseek;
 122:     seekpts = (long *) malloc(sizeof *seekpts * curseek);   /* NOSTRICT */
 123:     if (seekpts == NULL) {
 124:         perror("calloc");
 125:         exit(-1);
 126:     }
 127:     if (Oflag) {
 128:         Firstch = (STR *) malloc(sizeof *Firstch * curseek);
 129:         if (Firstch == NULL) {
 130:             perror("calloc");
 131:             exit(-1);
 132:         }
 133:     }
 134: 
 135:     (void) fseek(outf, (long) (sizeof Tbl + sizeof *seekpts * curseek), 0);
 136:     (void) fseek(inf, (long) 0, 0);     /* goto start of input */
 137: 
 138:     /*
 139: 	 * write the strings onto the file
 140: 	 */
 141: 
 142:     Tbl.str_longlen = 0;
 143:     Tbl.str_shortlen = (unsigned int) 0xffffffff;
 144:     lp = seekpts;
 145:     first = Oflag;
 146:     *seekpts = ftell(outf);
 147:     fp = Firstch;
 148:     do {
 149:         sp = fgets(string, 256, inf);
 150:         if (sp == NULL ||
 151:             (*sp == dc && (sp[1] == dc || sp[1] == DELIM_CH))) {
 152:             putc('\0', outf);
 153:             *++lp = ftell(outf);
 154:             li = ftell(outf) - lp[-1] - 1;
 155:             if (Tbl.str_longlen < li)
 156:                 Tbl.str_longlen = li;
 157:             if (Tbl.str_shortlen > li)
 158:                 Tbl.str_shortlen = li;
 159:             if (sp && sp[1] == DELIM_CH && Delim < MAXDELIMS)
 160:                 Tbl.str_delims[Delim++] = lp - seekpts;
 161:             first = Oflag;
 162:         }
 163:         else {
 164:             if (first) {
 165:                 for (nsp = sp; !isalnum(*nsp); nsp++)
 166:                     continue;
 167:                 if (Iflag && isupper(*nsp))
 168:                     fp->first = tolower(*nsp);
 169:                 else
 170:                     fp->first = *nsp;
 171:                 fp->pos = *lp;
 172:                 fp++;
 173:                 first = FALSE;
 174:             }
 175:             fputs(sp, outf);
 176:         }
 177:     } while (sp != NULL);
 178: 
 179:     /*
 180: 	 * write the tables in
 181: 	 */
 182: 
 183:     (void) fclose(inf);
 184:     Tbl.str_numstr = curseek - 1;
 185: 
 186:     if (Oflag)
 187:         do_order(seekpts, outf);
 188:     else if (Rflag)
 189:         randomize(seekpts);
 190: 
 191:     (void) fseek(outf, (long) 0, 0);
 192:     (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf);
 193:     (void) fwrite((char *) seekpts, sizeof *seekpts, curseek, outf);
 194:     (void) fclose(outf);
 195: 
 196:     if (!Sflag) {
 197:         printf("\"%s\" converted to \"%s\"\n", Infile, Outfile);
 198:         if (curseek == 0)
 199:             puts("There was 1 string");
 200:         else
 201:             printf("There were %u strings\n", curseek - 1);
 202:         printf("Longest string: %u byte%s\n", Tbl.str_longlen,
 203:                Tbl.str_longlen == 1 ? "" : "s");
 204:         printf("Shortest string: %u byte%s\n", Tbl.str_shortlen,
 205:                Tbl.str_shortlen == 1 ? "" : "s");
 206:     }
 207:     exit(0);
 208: }
 209: 
 210: /*
 211:  *	This routine evaluates arguments from the command line
 212:  */
 213: getargs(ac, av)
 214: register int    ac;
 215: register char   **av;
 216: {
 217:     register char   *sp;
 218:     register int    i;
 219:     register int    bad, j;
 220: 
 221:     bad = 0;
 222:     for (i = 1; i < ac; i++)
 223:         if (*av[i] == '-' && av[i][1]) {
 224:             for (sp = &av[i][1]; *sp; sp++)
 225:                 switch (*sp) {
 226:                   case 'c': /* new delimiting char */
 227:                     if ((Delimch = *++sp) == '\0') {
 228:                         --sp;
 229:                         Delimch = *av[++i];
 230:                     }
 231:                     if (Delimch <= 0 || Delimch > '~' ||
 232:                         Delimch == DELIM_CH) {
 233:                         printf("bad delimiting character: '\\%o\n'",
 234:                                Delimch);
 235:                         bad++;
 236:                     }
 237:                     break;
 238:                   case 's': /* silent */
 239:                     Sflag++;
 240:                     break;
 241:                   case 'v': /* verbose */
 242:                     Sflag = 0;
 243:                     break;
 244:                   case 'o': /* order strings */
 245:                     Oflag++;
 246:                     break;
 247:                   case 'i': /* ignore case in ordering */
 248:                     Iflag++;
 249:                     break;
 250:                   case 'r': /* ignore case in ordering */
 251:                     Rflag++;
 252:                     break;
 253:                   default:  /* unknown flag */
 254:                     bad++;
 255:                     printf("bad flag: '%c'\n", *sp);
 256:                     break;
 257:                 }
 258:         }
 259:         else if (*av[i] == '-') {
 260:             for (j = 0; Usage[j]; j++)
 261:                 puts(Usage[j]);
 262:             exit(0);
 263:         }
 264:         else if (Infile)
 265:             (void) strcpy(Outfile, av[i]);
 266:         else
 267:             Infile = av[i];
 268:     if (!Infile) {
 269:         bad++;
 270:         puts("No input file name");
 271:     }
 272:     if (*Outfile == '\0' && !bad) {
 273:         (void) strcpy(Outfile, Infile);
 274:         (void) strcat(Outfile, ".dat");
 275:     }
 276:     if (bad) {
 277:         puts("use \"strfile -\" to get usage");
 278:         exit(-1);
 279:     }
 280: }
 281: 
 282: /*
 283:  * do_order:
 284:  *	Order the strings alphabetically (possibly ignoring case).
 285:  */
 286: do_order(seekpts, outf)
 287: long    *seekpts;
 288: FILE    *outf;
 289: {
 290:     register int    i;
 291:     register long   *lp;
 292:     register STR    *fp;
 293:     extern int  cmp_str();
 294: 
 295:     (void) fflush(outf);
 296:     Sort_1 = fopen(Outfile, "r");
 297:     Sort_2 = fopen(Outfile, "r");
 298:     Seekpts = seekpts;
 299:     qsort((char *) Firstch, Tbl.str_numstr, sizeof *Firstch, cmp_str);
 300:     i = Tbl.str_numstr;
 301:     lp = seekpts;
 302:     fp = Firstch;
 303:     while (i--)
 304:         *lp++ = fp++->pos;
 305:     (void) fclose(Sort_1);
 306:     (void) fclose(Sort_2);
 307:     Tbl.str_flags |= STR_ORDERED;
 308: }
 309: 
 310: /*
 311:  * cmp_str:
 312:  *	Compare two strings in the file
 313:  */
 314: cmp_str(p1, p2)
 315: STR *p1, *p2;
 316: {
 317:     register int    c1, c2;
 318: 
 319:     c1 = p1->first;
 320:     c2 = p2->first;
 321:     if (c1 != c2)
 322:         return c1 - c2;
 323: 
 324:     (void) fseek(Sort_1, p1->pos, 0);
 325:     (void) fseek(Sort_2, p2->pos, 0);
 326: 
 327:     while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0')
 328:         continue;
 329:     while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0')
 330:         continue;
 331: 
 332:     while (c1 != '\0' && c2 != '\0') {
 333:         if (Iflag) {
 334:             if (isupper(c1))
 335:                 c1 = tolower(c1);
 336:             if (isupper(c2))
 337:                 c2 = tolower(c2);
 338:         }
 339:         if (c1 != c2)
 340:             return c1 - c2;
 341:         c1 = getc(Sort_1);
 342:         c2 = getc(Sort_2);
 343:     }
 344:     return c1 - c2;
 345: }
 346: 
 347: /*
 348:  * randomize:
 349:  *	Randomize the order of the string table.  We must be careful
 350:  *	not to randomize across delimiter boundaries.  All
 351:  *	randomization is done within each block.
 352:  */
 353: randomize(seekpts)
 354: register long   *seekpts;
 355: {
 356:     register int    cnt, i, j, start;
 357:     register long   tmp;
 358:     register long   *origsp;
 359: 
 360:     Tbl.str_flags |= STR_RANDOM;
 361:     srnd(time((long *) NULL) + getpid());
 362:     origsp = seekpts;
 363:     for (j = 0; j <= Delim; j++) {
 364: 
 365:         /*
 366: 		 * get the starting place for the block
 367: 		 */
 368: 
 369:         if (j == 0)
 370:             start = 0;
 371:         else
 372:             start = Tbl.str_delims[j - 1];
 373: 
 374:         /*
 375: 		 * get the ending point
 376: 		 */
 377: 
 378:         if (j == Delim)
 379:             cnt = Tbl.str_numstr;
 380:         else
 381:             cnt = Tbl.str_delims[j];
 382: 
 383:         /*
 384: 		 * move things around randomly
 385: 		 */
 386: 
 387:         for (seekpts = &origsp[start]; cnt > start; cnt--, seekpts++) {
 388:             i = rnd(cnt - start);
 389:             tmp = seekpts[0];
 390:             seekpts[0] = seekpts[i];
 391:             seekpts[i] = tmp;
 392:         }
 393:     }
 394: }

Defined functions

cmp_str defined in line 314; used 2 times
do_order defined in line 286; used 1 times
getargs defined in line 213; used 1 times
  • in line 92
main defined in line 78; never used
randomize defined in line 353; used 1 times

Defined variables

Delim defined in line 64; used 4 times
Iflag defined in line 62; used 3 times
Infile defined in line 44; used 7 times
Oflag defined in line 61; used 5 times
Rflag defined in line 63; used 2 times
Seekpts defined in line 66; used 1 times
Sflag defined in line 60; used 3 times
Tbl defined in line 70; used 22 times

Defined macros

DELIM_CH defined in line 37; used 4 times
FALSE defined in line 35; used 5 times
TRUE defined in line 34; never used
Last modified: 1986-01-10
Generated: 2016-12-26
Generated by src2html V0.67
page hit count: 3697
Valid CSS Valid XHTML 1.0 Strict