1: /* decompr16: decompress 16bit compressed files on a 16bit Intel processor
   2:  *    running minix.
   3:  * This kludge was hacked together by John N. White on 6/30/91 and
   4:  *    is Public Domain.
   5:  * Use chmem =500 decompr16.
   6:  * decompr16 needs about 280k to run in pipe mode. (56k per copy)
   7:  *
   8:  * This program acts as a filter:
   9:  *    decompr16 < compressed_file > decompressed_file
  10:  *
  11:  * The arguments -0 to -4 causes only the corresponding pass to be done. Thus:
  12:  *    decompr16 -0 < compressed_file | decompr16 -1 | decompr16 -2 |
  13:  *         decompr16 -3 | decompr16 -4 > decompressed_file
  14:  * will also work.
  15:  * If RAM is scarce these passes can be done sequentially using tmp files.
  16:  *
  17:  * Compress uses a modified LZW compression algorithm. A compressed file
  18:  * is a set of indices into a dictionary of strings. The number of bits
  19:  * used to store each index depends on the number of entries currently
  20:  * in the dictionary. If there are between 257 and 512 entries, 9 bits
  21:  * are used. With 513 entries, 10 bits are used, etc. The initial dictionary
  22:  * consists of 0-255 (which are the corresponding chars) and 256 (which
  23:  * is a special CLEAR code). As each index in the compressed file is read,
  24:  * a new entry is added to the dictionary consisting of the current string
  25:  * with the first char of the next string appended. When the dictionary
  26:  * is full, no further entries are added. If a CLEAR code is received,
  27:  * the dictionary will be completely reset. The first two bytes of the
  28:  * compressed file are a magic number, and the third byte indicates the
  29:  * maximum number of bits, and whether the CLEAR code is used (older versions
  30:  * of compress didn't have CLEAR).
  31:  *
  32:  * This program works by forking four more copies of itself. The five
  33:  * programs form a pipeline. Copy 0 reads from stdin and writes to
  34:  * copy 1, which writes to copy 2, then to copy 3, then to copy 4 (the
  35:  * original) which writes to stdout. If given a switch -#, where # is a
  36:  * digit from 0 to 4 (example: -2), the program will run as that copy,
  37:  * reading from stdin and writing to stdout. This allows decompressing
  38:  * with very limited RAM because only one of the five passes is in
  39:  * memory at a time.
  40:  *
  41:  * The compressed data is a series of string indices (and a header at
  42:  * the beginning and an occasional CLEAR code). As these indices flow
  43:  * through the pipes, each program decodes the ones it can. The result
  44:  * of each decoding will be indices that the following programs can handle.
  45:  *
  46:  * Each of the 65536 strings in the dictionary is an earlier string with
  47:  * some character added to the end (except for the the 256 predefined
  48:  * single char strings). When new entries are made to the dictionary,
  49:  * the string index part will just be the last index to pass through.
  50:  * But the char part is the first char of the next string, which isn't
  51:  * known yet. So the string can be stored as a pair of indices. When
  52:  * this string is specified, it is converted to this pair of indices,
  53:  * which are flagged so that the first will be decoded in full while
  54:  * the second will be decoded to its first char. The dictionary takes
  55:  * 256k to store (64k strings of 2 indices of 2 bytes each). This is
  56:  * too big for a 64k data segment, so it is divided into 5 equal parts.
  57:  * Copy 0 of the program maintains the high part and copy 4 holds the
  58:  * low part.
  59:  */
  60: 
  61: #define BUFSZ       256     /* size of i/o buffers */
  62: #define BUFSZ_2     (BUFSZ/2)   /* # of unsigned shorts in i/o bufs */
  63: #define DICTSZ      (unsigned)13056 /* # of local dictionary entries */
  64: #define EOF_INDEX   (unsigned short)0xFFFF  /* EOF flag for pipeline */
  65: 
  66: int pipein=0, pipeout=1;    /* file nums for pipeline input and output */
  67: int fildes[2];          /* for use with pipe() */
  68: int ibufp, obufp, ibufe;    /* pointers to bufs, (inp, output, end) */
  69: int ipbufp=BUFSZ_2, opbufp; /* pointers to pipe bufs */
  70: int pnum= -1;           /* ID of this copy */
  71: unsigned short ipbuf[BUFSZ_2];  /* for buffering input */
  72: unsigned short opbuf[BUFSZ_2];  /* for buffering output */
  73: unsigned char *ibuf=(unsigned char*)ipbuf, *obuf=(unsigned char*)opbuf;
  74: 
  75: unsigned short  dindex[DICTSZ]; /* dictionary: index to substring */
  76: unsigned short  dchar[DICTSZ];  /* dictionary: last char of string */
  77: unsigned iindex, tindex, tindex2;/* holds index being processed */
  78: unsigned base;          /* where in global dict local dict starts */
  79: unsigned tbase;
  80: unsigned locend;        /* where in global dict local dict ends */
  81: unsigned curend=256;        /* current end of global dict */
  82: unsigned maxend;        /* max end of global dict */
  83: int dcharp;         /* ptr to dchar that needs next index entry */
  84: int curbits;            /* number of bits for getbits() to read */
  85: int maxbits;            /* limit on number of bits */
  86: int clearflg;           /* if set, allow CLEAR */
  87: int inmod;          /* mod 8 for getbits() */
  88: 
  89: main(argc, argv) char **argv; {
  90:   int i;
  91: 
  92:   /* check for arguments */
  93:   if(argc>=2){
  94:     if(**++argv != '-' || (pnum = (*argv)[1]-'0') < 0 || pnum > 4)
  95:       die("bad args\n");
  96:   }
  97: 
  98:   if(pnum<=0){              /* if this is pass 0 */
  99:     /* check header of compressed file */
 100:     if(mygetc() != 0x1F || mygetc() != 0x9D)/* check magic number */
 101:       die("not a compressed file\n");
 102:     iindex=mygetc();            /* get compression style */
 103:   } else getpipe();             /* get compression style */
 104:   maxbits = iindex&0x1F;
 105:   clearflg = (iindex&0x80) != 0;
 106:   if(maxbits<9 || maxbits>16)           /* check for valid maxbits */
 107:     die("can't decompress\n");
 108:   if((pnum & ~3) == 0) putpipe(iindex, 0);  /* pass style to next copy */
 109: 
 110:   if(pnum<0){           /* if decompr should set up pipeline */
 111:     /* fork copies and setup pipeline */
 112:     for(pnum=0; pnum<4; pnum++){        /* do four forks */
 113:         if(pipe(fildes)) die("pipe() error\n");
 114:         if((i=fork()) == -1) die("fork() error\n");
 115:         if(i){              /* if this is the copy */
 116:             pipeout = fildes[1];
 117:             break;
 118:         }
 119:         /* if this is the original */
 120:         pipein = fildes[0];
 121:         close(fildes[1]);       /* don't accumulate these */
 122:     }
 123:   }
 124: 
 125:   /* Preliminary inits. Note: end/maxend/curend are highest, not highest+1 */
 126:   base = DICTSZ*(4-pnum) + 256, locend = base+DICTSZ-1;
 127:   maxend = (1<<maxbits)-1;
 128:   if(maxend>locend) maxend=locend;
 129: 
 130:   for(;;){
 131:     curend = 255+clearflg;      /* init dictionary */
 132:     dcharp = DICTSZ;        /* flag for none needed */
 133:     curbits = 9;            /* init curbits (for copy 0) */
 134:     for(;;){    /* for each index in input */
 135:         if(pnum) getpipe(); /* get next index */
 136:         else{           /* get index using getbits() */
 137:             if(curbits<maxbits && (1<<curbits)<=curend){
 138:                 /* curbits needs to be increased */
 139:                 /* due to uglyness in compress, these indices
 140: 				 * in the compressed file are wasted */
 141:                 while(inmod) getbits();
 142:                 curbits++;
 143:             }
 144:             getbits();
 145:         }
 146:         if(iindex==256 && clearflg){
 147:             if(pnum<4) putpipe(iindex, 0);
 148:             /* due to uglyness in compress, these indices
 149: 			 * in the compressed file are wasted */
 150:             while(inmod) getbits();
 151:             break;
 152:         }
 153:         tindex=iindex;
 154:         /* convert the index part, ignoring spawned chars */
 155:         while(tindex>=base) tindex = dindex[tindex-base];
 156:         putpipe(tindex, 0); /* pass on the index */
 157:         /* save the char of the last added entry, if any */
 158:         if(dcharp<DICTSZ) dchar[dcharp++] = tindex;
 159:         if(curend<maxend)   /* if still building dictionary */
 160:           if(++curend >= base)
 161:             dindex[dcharp=(curend-base)] = iindex;
 162:         /* Do spawned chars. They are naturally produced in the wrong
 163: 		 * order. To get them in the right order without using memory,
 164: 		 * a series of passes, progressively less deep, are used */
 165:         tbase = base;
 166:         while((tindex=iindex) >= tbase){/* for each char to spawn */
 167:             while((tindex2=dindex[tindex-base]) >= tbase)
 168:               tindex=tindex2;   /* scan to desired char */
 169:             putpipe(dchar[tindex-base], 1);/* put it to the pipe */
 170:             tbase=tindex+1;
 171:         }
 172:     }
 173:   }
 174: }
 175: 
 176: /* If s, write to stderr. Flush buffers if needed. Then exit. */
 177: die(s) char *s; {
 178:   static int recurseflag=0;
 179:   if(recurseflag++) exit(1);
 180:   if(s) while(*s) write(2, s++, 1);
 181:   if(obufp) write(1, obuf, obufp);     /* flush stdout buf if needed */
 182:   do putpipe(EOF_INDEX, 0); while(opbufp); /* flush pipe if needed */
 183:   exit(s ? 1 : 0);
 184: }
 185: 
 186: /* Put a char to stdout. */
 187: myputc(c){
 188:   obuf[obufp++] = c;
 189:   if(obufp >= BUFSZ){           /* if stdout buf full */
 190:     write(1, obuf, BUFSZ);      /*   flush to stdout */
 191:     obufp=0;
 192:   }
 193: }
 194: 
 195: /* Get a char from stdin. If EOF, then die() and exit. */
 196: mygetc(){
 197:   unsigned u;
 198:   if(ibufp >= ibufe){       /* if stdin buf empty */
 199:     if((ibufe = read(0, ibuf, BUFSZ)) <= 0)
 200:       die(0);       /* if EOF, do normal exit */
 201:     ibufp=0;
 202:   }
 203:   return(ibuf[ibufp++]);
 204: }
 205: 
 206: /* Put curbits bits into index from stdin. Note: only copy 0 uses this.
 207:  * The bits within a byte are in the correct order. But when the bits
 208:  * cross a byte boundry, the lowest bits will be in the higher part of
 209:  * the current byte, and the higher bits will be in the lower part of
 210:  * the next byte. */
 211: getbits(){
 212:   int have;
 213:   static unsigned curbyte;  /* byte having bits extracted from it */
 214:   static int left;      /* how many bits are left in curbyte */
 215: 
 216:   inmod = (inmod+1) & 7;    /* count input mod 8 */
 217:   iindex=curbyte, have=left;
 218:   if(curbits-have > 8) iindex |= (unsigned)mygetc() << have, have+=8;
 219:   iindex |= ((curbyte=mygetc()) << have) & ~((unsigned)0xFFFF << curbits);
 220:   curbyte >>= curbits-have, left = 8 - (curbits-have);
 221: }
 222: 
 223: /* Get an index from the pipeline. If flagged firstonly, handle it here. */
 224: getpipe(){
 225:   static short flags;
 226:   static int n=0;   /* number of flags in flags */
 227: 
 228:   for(;;){      /* while index with firstonly flag set */
 229:     if(n<=0){
 230:         if(ipbufp >= BUFSZ_2){  /* if pipe input buf empty */
 231:             if(read(pipein, (char*)ipbuf, BUFSZ) != BUFSZ)
 232:               die("bad pipe read\n");
 233:             ipbufp=0;
 234:         }
 235:         flags = ipbuf[ipbufp++];
 236:         n = 15;
 237:     }
 238:     iindex = ipbuf[ipbufp++];
 239:     if(iindex>curend)
 240:       die(iindex == EOF_INDEX ? 0 : "invalid data\n");
 241:     flags<<=1, n--;
 242:     /* assume flags<0 if highest remaining flag is set */
 243:     if(flags>=0)        /* if firstonly flag for index is not set */
 244:       return;       /* return with valid non-firstonly index */
 245:     /* handle firstonly case here */
 246:     while(iindex>=base) iindex = dindex[iindex-base];
 247:     putpipe(iindex, 1);
 248:   }
 249: }
 250: 
 251: /* put an index to the pipeline */
 252: putpipe(u, flag) unsigned u; {
 253:   static unsigned short flags, *flagp;
 254:   static int n=0;   /* number of flags in flags */
 255: 
 256:   if(pnum==4){      /* if we should write to stdout */
 257:     myputc(u);  /* index will BE the char value */
 258:     return;
 259:   }
 260: 
 261:   if(n==0)          /* if we need to reserve a flag entry */
 262:     flags=0, flagp = opbuf + opbufp++;
 263:   opbuf[opbufp++] = u;      /* add index to buffer */
 264:   flags = (flags<<1) | flag;    /* add firstonly flag */
 265:   if(++n >= 15){        /* if block of 15 indicies */
 266:     n=0, *flagp=flags;  /*   insert flags entry */
 267:     if(opbufp >= BUFSZ_2){  /* if pipe out buf full */
 268:         opbufp=0;
 269:         if(write(pipeout, (char*)opbuf, BUFSZ) != BUFSZ)
 270:           die("bad pipe write\n");
 271:     }
 272:   }
 273: }

Defined functions

die defined in line 177; used 9 times
getbits defined in line 211; used 3 times
getpipe defined in line 224; used 2 times
main defined in line 89; never used
mygetc defined in line 196; used 5 times
myputc defined in line 187; used 1 times
putpipe defined in line 252; used 6 times

Defined variables

base defined in line 78; used 11 times
clearflg defined in line 86; used 3 times
curbits defined in line 84; used 8 times
curend defined in line 81; used 6 times
dchar defined in line 76; used 2 times
dcharp defined in line 83; used 4 times
dindex defined in line 75; used 4 times
fildes defined in line 67; used 4 times
ibuf defined in line 73; used 2 times
ibufe defined in line 68; used 2 times
ibufp defined in line 68; used 3 times
iindex defined in line 77; used 19 times
inmod defined in line 87; used 4 times
ipbuf defined in line 71; used 4 times
ipbufp defined in line 69; used 4 times
locend defined in line 80; used 3 times
maxbits defined in line 85; used 5 times
maxend defined in line 82; used 4 times
obufp defined in line 68; used 5 times
opbuf defined in line 72; used 4 times
pipein defined in line 66; used 2 times
pnum defined in line 70; used 12 times
tbase defined in line 79; used 4 times
tindex defined in line 77; used 11 times
tindex2 defined in line 77; used 2 times

Defined macros

BUFSZ defined in line 61; used 8 times
BUFSZ_2 defined in line 62; used 5 times
DICTSZ defined in line 63; used 6 times
EOF_INDEX defined in line 64; used 2 times
Last modified: 1992-12-05
Generated: 2016-12-26
Generated by src2html V0.67
page hit count: 1667
Valid CSS Valid XHTML 1.0 Strict