1: #ifndef lint
   2: static char sccsid[] = "@(#)invert.c	2.5	9/10/85";
   3: #endif not lint
   4: #
   5: /*  input:  records of lines, separated by blank lines
   6:     output: key:file1 start/length ... start/length:file2 start/length ...
   7: */
   8: 
   9: # include "stdio.h"
  10: # include "streams.h"
  11: # include "bib.h"
  12: # define isnull(x)  (*(x) == NULL)
  13: # define makelow(c) ('A'<=(c) && (c)<='Z' ? (c)-'A'+'a' : c)
  14: 
  15: int     max_kcnt = 100;     /*  max number of keys                      */
  16: int     max_klen =   6;     /*  max length of keys                      */
  17: char    *ignore =           /*  string of line starts to ignore         */
  18:             "CNOPVX";
  19: char    *common =           /*  name of file of common words            */
  20:             COMFILE;
  21: char    *INDEX=             /*  name of output file                     */
  22:             INDXFILE;
  23: 
  24: char    *tmpfile =          /*  name of temporary file                  */
  25:             INVTEMPFILE;
  26: 
  27: int silent = 0;     /*  0 => statistics printed			*/
  28:                 /*  1 => no statisitics printed		*/
  29: 
  30: char *sort_it =
  31:         "sort -u +0 -1 +1 -2 +2n -3 +3n %s -o %s";
  32: char sortcmd[maxstr];
  33: 
  34: int     argc;
  35: char    **argv;
  36: 
  37: main(argcount,arglist)
  38: int argcount;
  39: char **arglist;
  40: {   char            *filename;
  41:     FILE            *input, *output;
  42:     long int        start,length;
  43:     char            word[maxstr];
  44:     int             kcnt;
  45:     char            tag_line[maxstr];
  46: 
  47:     long int        records = 0;  /*  number of records read           */
  48:     long int        keys    = 0;  /*  number of keys read (occurences) */
  49:     long int        distinct;     /*  number of distinct keys          */
  50:     long int        shorten();
  51: 
  52:     strcpy(COMFILE, N_COMFILE);
  53:     strcpy(BMACLIB, N_BMACLIB);
  54: 
  55:     argc= argcount-1;
  56:     argv= arglist+1;
  57:     mktemp(tmpfile);
  58:     output= fopen(tmpfile,"w");
  59: 
  60:     for ( flags() ; argc>0 ; argc--, argv++ ,flags() )
  61:     {   /* open input file              */
  62:             filename=   *argv;
  63:             input=      fopen(filename,"r");
  64:             if (input==NULL)
  65:             {   fprintf(stderr, "invert: error in open of %s\n", filename);
  66:                 continue;
  67:             }
  68:             start=      0L;
  69:             length=     0L;
  70: 
  71:         for(;;) /* each record  */
  72:         {   /* find start of next record (exit if none)     */
  73:                 start= nextrecord(input,start+length);
  74:                 if (start==EOF)   break;
  75:             records++;
  76:         kcnt= 0;
  77:             length= recsize(input,start);
  78:             sprintf(tag_line, " %s %d %d\n", filename, start, length);
  79: 
  80:             while (ftell(input) < start+length && kcnt < max_kcnt)
  81:             {   getword(input,word,ignore);
  82:                 makekey(word,max_klen,common);
  83:                 if (!isnull(word))
  84:                 {   fputs(word,output); fputs(tag_line,output);
  85:                     kcnt++; keys++;
  86:                 }
  87:             }
  88:         }
  89:         fclose(input);
  90:     }
  91:     fclose(output);
  92: 
  93:     sprintf(sortcmd, sort_it, tmpfile, tmpfile);
  94:     system(sortcmd);
  95: 
  96:     distinct = shorten(tmpfile,INDEX);
  97:     if( silent == 0 )
  98:     fprintf(stderr,
  99:         "%d documents   %d distinct keys  %d key occurrences\n",
 100:         records, distinct, keys);
 101:     exit(0);
 102: }
 103: 
 104: 
 105: 
 106: /*  Flag    Meaning                             Default
 107:     -ki     Keys per record                     100
 108:     -li     max Length of keys                  6
 109:     -%str   ignore lines that begin with %x     CNOPVX
 110:             where x is in str
 111:             str is a seq of chars
 112:     -cfile  file contains Common words          /usr/new/lib/bib/common
 113:             do not use common words as keys
 114:     -pfile  name of output file                 INDEX
 115:     -s	    do not print statistics		statistics printed
 116: */
 117: 
 118: # define    operand     (strlen(*argv+2)==0 ? (argv++,argc--,*argv) : *argv+2)
 119: 
 120: flags()
 121: {
 122:     char *p;
 123:     for (; argc>0 && *argv[0]=='-';  argc--,argv++)
 124:     {   switch ((*argv)[1])
 125:         {   case 'k':   max_kcnt= atoi(operand);
 126:                         break;
 127:             case 'l':   max_klen= atoi(operand);
 128:                         break;
 129:             case 'c':   common=  operand;
 130:                         break;
 131:             case '%':   ignore=  *argv+2;
 132:                         break;
 133:             case 'p':   INDEX=  operand;
 134:                         break;
 135:         case 's':   silent= 1;
 136:             break;
 137:         case 'd':
 138:         p = &argv[0][2];
 139:         if (!p) {
 140:             argv++;
 141:             p = &argv[0][0];
 142:         }
 143:         strreplace(COMFILE, BMACLIB, p);
 144:         strcpy(BMACLIB, p);
 145:         break;
 146:             default:    fprintf(stderr, "unknown flag '%s'\n", *argv);
 147:         }
 148:     }
 149: }
 150: 
 151: 
 152: /*  shorten(inf,outf): file "inf" consists of lines of the form:
 153:         key file start length
 154:     sorted by key and file.  replace lines with the same key
 155:     with one line of the form:
 156:         key:file1 start/length ... start/length:file2 start/length ...
 157:     rename as file "outf"
 158:     returns number of lines in output
 159: */
 160: long shorten(inf,outf)
 161: char *inf, *outf;
 162: {   FILE *in, *out;
 163:     char line[maxstr];
 164:     char key[maxstr],  newkey[maxstr],
 165:          file[maxstr], newfile[maxstr];
 166:     long int start, length;
 167:     long int lines = 0;
 168: 
 169:     in=  fopen(inf, "r");
 170:     out= fopen(outf, "w");
 171:     if (in==NULL || out==NULL)
 172:     {   fprintf(stderr, "invert: error in opening file for compression\n");
 173:         return(0);
 174:     }
 175: 
 176:     getline(in,line);
 177:     sscanf(line,"%s%s%d%d", key, file, &start, &length);
 178:     fprintf(out, "%s :%s %d/%d", key, file, start, length);
 179:     for ( getline(in, line) ; !feof(in);  getline(in, line))
 180:     {   sscanf(line,"%s%s%d%d", newkey, newfile, &start, &length);
 181:         if (strcmp(key,newkey)!=0)
 182:         {   strcpy(key, newkey);
 183:             strcpy(file, newfile);
 184:             fprintf(out, "\n%s :%s %d/%d",  key, file, start, length);
 185:         lines++;
 186:         }
 187:         else if (strcmp(file,newfile)!=0)
 188:         {   strcpy(file,newfile);
 189:             fprintf(out, ":%s %d/%d", file, start, length);
 190:         }
 191:         else
 192:             fprintf(out, " %d/%d", start, length);
 193:     }
 194:     fprintf(out, "\n");
 195:     lines++;
 196: 
 197:     fclose(in); fclose(out);
 198:     unlink(inf);
 199:     return (lines);
 200: }

Defined functions

flags defined in line 120; used 2 times
  • in line 60(2)
main defined in line 37; never used
shorten defined in line 160; used 2 times

Defined variables

INDEX defined in line 21; used 2 times
argc defined in line 34; used 6 times
argv defined in line 35; used 15 times
common defined in line 19; used 2 times
ignore defined in line 17; used 2 times
max_kcnt defined in line 15; used 2 times
max_klen defined in line 16; used 2 times
sccsid defined in line 2; never used
silent defined in line 27; used 2 times
sort_it defined in line 30; used 1 times
  • in line 93
sortcmd defined in line 32; used 2 times
tmpfile defined in line 24; used 5 times

Defined macros

isnull defined in line 12; used 1 times
  • in line 83
makelow defined in line 13; never used
operand defined in line 118; used 4 times
Last modified: 1986-03-12
Generated: 2016-12-26
Generated by src2html V0.67
page hit count: 1320
Valid CSS Valid XHTML 1.0 Strict