1 /* 2 * Copyright (c) 1985, 1986 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * James A. Woods, derived from original work by Spencer Thomas 7 * and Joseph Orost. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #ifndef lint 39 char copyright[] = 40 "@(#) Copyright (c) 1985, 1986 The Regents of the University of California.\n\ 41 All rights reserved.\n"; 42 #endif /* not lint */ 43 44 #ifndef lint 45 /*static char sccsid[] = "from: @(#)compress.c 5.19 (Berkeley) 3/18/91";*/ 46 static char rcsid[] = "$Id: compress.c,v 1.5 1993/08/01 18:17:18 mycroft Exp $"; 47 #endif /* not lint */ 48 49 /* 50 * compress.c - File compression ala IEEE Computer, June 1984. 51 * 52 * Authors: Spencer W. Thomas (decvax!utah-cs!thomas) 53 * Jim McKie (decvax!mcvax!jim) 54 * Steve Davies (decvax!vax135!petsd!peora!srd) 55 * Ken Turkowski (decvax!decwrl!turtlevax!ken) 56 * James A. Woods (decvax!ihnp4!ames!jaw) 57 * Joe Orost (decvax!vax135!petsd!joe) 58 */ 59 60 #include <sys/param.h> 61 #include <sys/stat.h> 62 #include <signal.h> 63 #include <utime.h> 64 #include <errno.h> 65 #include <unistd.h> 66 #include <stdio.h> 67 #include <ctype.h> 68 #include <stdlib.h> 69 #include <string.h> 70 71 /* 72 * Set USERMEM to the maximum amount of physical user memory available 73 * in bytes. USERMEM is used to determine the maximum BITS that can be used 74 * for compression. 75 * 76 * SACREDMEM is the amount of physical memory saved for others; compress 77 * will hog the rest. 78 */ 79 #ifndef SACREDMEM 80 #define SACREDMEM 0 81 #endif 82 83 #ifndef USERMEM 84 # define USERMEM 450000 /* default user memory */ 85 #endif 86 87 #ifdef pdp11 88 # define BITS 12 /* max bits/code for 16-bit machine */ 89 # define NO_UCHAR /* also if "unsigned char" functions as signed char */ 90 # undef USERMEM 91 #endif /* pdp11 */ /* don't forget to compile with -i */ 92 93 #ifdef USERMEM 94 # if USERMEM >= (433484+SACREDMEM) 95 # define PBITS 16 96 # else 97 # if USERMEM >= (229600+SACREDMEM) 98 # define PBITS 15 99 # else 100 # if USERMEM >= (127536+SACREDMEM) 101 # define PBITS 14 102 # else 103 # if USERMEM >= (73464+SACREDMEM) 104 # define PBITS 13 105 # else 106 # define PBITS 12 107 # endif 108 # endif 109 # endif 110 # endif 111 # undef USERMEM 112 #endif /* USERMEM */ 113 114 #ifdef PBITS /* Preferred BITS for this memory size */ 115 # ifndef BITS 116 # define BITS PBITS 117 # endif BITS 118 #endif /* PBITS */ 119 120 #if BITS == 16 121 # define HSIZE 69001 /* 95% occupancy */ 122 #endif 123 #if BITS == 15 124 # define HSIZE 35023 /* 94% occupancy */ 125 #endif 126 #if BITS == 14 127 # define HSIZE 18013 /* 91% occupancy */ 128 #endif 129 #if BITS == 13 130 # define HSIZE 9001 /* 91% occupancy */ 131 #endif 132 #if BITS <= 12 133 # define HSIZE 5003 /* 80% occupancy */ 134 #endif 135 136 /* 137 * a code_int must be able to hold 2**BITS values of type int, and also -1 138 */ 139 #if BITS > 15 140 typedef long int code_int; 141 #else 142 typedef int code_int; 143 #endif 144 145 #ifdef SIGNED_COMPARE_SLOW 146 typedef unsigned long int count_int; 147 typedef unsigned short int count_short; 148 #else 149 typedef long int count_int; 150 #endif 151 152 #ifdef NO_UCHAR 153 typedef char char_type; 154 #else 155 typedef unsigned char char_type; 156 #endif /* UCHAR */ 157 char_type magic_header[] = { "\037\235" }; /* 1F 9D */ 158 159 /* Defines for third byte of header */ 160 #define BIT_MASK 0x1f 161 #define BLOCK_MASK 0x80 162 /* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is 163 a fourth header byte (for expansion). 164 */ 165 #define INIT_BITS 9 /* initial number of bits/code */ 166 167 int n_bits; /* number of bits/code */ 168 int maxbits = BITS; /* user settable max # bits/code */ 169 code_int maxcode; /* maximum code, given n_bits */ 170 code_int maxmaxcode = 1 << BITS; /* should NEVER generate this code */ 171 #ifdef COMPATIBLE /* But wrong! */ 172 # define MAXCODE(n_bits) (1 << (n_bits) - 1) 173 #else 174 # define MAXCODE(n_bits) ((1 << (n_bits)) - 1) 175 #endif /* COMPATIBLE */ 176 177 count_int htab [HSIZE]; 178 unsigned short codetab [HSIZE]; 179 180 #define htabof(i) htab[i] 181 #define codetabof(i) codetab[i] 182 code_int hsize = HSIZE; /* for dynamic table sizing */ 183 count_int fsize; 184 185 /* 186 * To save much memory, we overlay the table used by compress() with those 187 * used by decompress(). The tab_prefix table is the same size and type 188 * as the codetab. The tab_suffix table needs 2**BITS characters. We 189 * get this from the beginning of htab. The output stack uses the rest 190 * of htab, and contains characters. There is plenty of room for any 191 * possible stack (stack used to be 8000 characters). 192 */ 193 194 #define tab_prefixof(i) codetabof(i) 195 # define tab_suffixof(i) ((char_type *)(htab))[i] 196 # define de_stack ((char_type *)&tab_suffixof(1<<BITS)) 197 198 code_int free_ent = 0; /* first unused entry */ 199 int exit_stat = 0; /* per-file status */ 200 int perm_stat = 0; /* permanent status */ 201 202 code_int getcode(); 203 204 int nomagic = 0; /* Use a 3-byte magic number header, unless old file */ 205 int zcat_flg = 0; /* Write output on stdout, suppress messages */ 206 int precious = 1; /* Don't unlink output file on interrupt */ 207 int quiet = 1; /* don't tell me about compression */ 208 209 /* 210 * block compression parameters -- after all codes are used up, 211 * and compression rate changes, start over. 212 */ 213 int block_compress = BLOCK_MASK; 214 int clear_flg = 0; 215 long int ratio = 0; 216 #define CHECK_GAP 10000 /* ratio check interval */ 217 count_int checkpoint = CHECK_GAP; 218 /* 219 * the next two codes should not be changed lightly, as they must not 220 * lie within the contiguous general code space. 221 */ 222 #define FIRST 257 /* first free entry */ 223 #define CLEAR 256 /* table clear output code */ 224 225 int force = 0; 226 char ofname [MAXPATHLEN]; 227 #ifdef DEBUG 228 int debug, verbose; 229 #endif 230 sig_t oldint; 231 int bgnd_flag; 232 233 int do_decomp = 0; 234 235 /*- 236 * Algorithm from "A Technique for High Performance Data Compression", 237 * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. 238 * 239 * Usage: compress [-dfvc] [-b bits] [file ...] 240 * Inputs: 241 * -d: If given, decompression is done instead. 242 * 243 * -c: Write output on stdout, don't remove original. 244 * 245 * -b: Parameter limits the max number of bits/code. 246 * 247 * -f: Forces output file to be generated, even if one already 248 * exists, and even if no space is saved by compressing. 249 * If -f is not used, the user will be prompted if stdin is 250 * a tty, otherwise, the output file will not be overwritten. 251 * 252 * -v: Write compression statistics 253 * 254 * file ...: Files to be compressed. If none specified, stdin 255 * is used. 256 * Outputs: 257 * file.Z: Compressed form of file with same mode, owner, and utimes 258 * or stdout (if stdin used as input) 259 * 260 * Assumptions: 261 * When filenames are given, replaces with the compressed version 262 * (.Z suffix) only if the file decreases in size. 263 * Algorithm: 264 * Modified Lempel-Ziv method (LZW). Basically finds common 265 * substrings and replaces them with a variable size code. This is 266 * deterministic, and can be done on the fly. Thus, the decompression 267 * procedure needs no input table, but tracks the way the table was built. 268 */ 269 270 main(argc, argv) 271 int argc; 272 char **argv; 273 { 274 extern int optind; 275 extern char *optarg; 276 struct stat statbuf; 277 int ch, overwrite; 278 char **filelist, **fileptr, *cp, tempname[MAXPATHLEN]; 279 void onintr(), oops(); 280 281 /* This bg check only works for sh. */ 282 if ((oldint = signal(SIGINT, SIG_IGN)) != SIG_IGN) { 283 (void)signal(SIGINT, onintr); 284 (void)signal(SIGSEGV, oops); /* XXX */ 285 } 286 bgnd_flag = oldint != SIG_DFL; 287 288 #ifdef COMPATIBLE 289 nomagic = 1; /* Original didn't have a magic number */ 290 #endif 291 292 if (cp = rindex(argv[0], '/')) 293 ++cp; 294 else 295 cp = argv[0]; 296 if (strcmp(cp, "uncompress") == 0) 297 do_decomp = 1; 298 else if(strcmp(cp, "zcat") == 0) { 299 do_decomp = 1; 300 zcat_flg = 1; 301 } 302 303 /* 304 * -b maxbits => maxbits. 305 * -C => generate output compatible with compress 2.0. 306 * -c => cat all output to stdout 307 * -D => debug 308 * -d => do_decomp 309 * -f => force overwrite of output file 310 * -n => no header: useful to uncompress old files 311 * -V => print Version; debug verbose 312 * -v => unquiet 313 */ 314 315 overwrite = 0; 316 #ifdef DEBUG 317 while ((ch = getopt(argc, argv, "b:CcDdfnVv")) != EOF) 318 #else 319 while ((ch = getopt(argc, argv, "b:Ccdfnv")) != EOF) 320 #endif 321 switch(ch) { 322 case 'b': 323 maxbits = atoi(optarg); 324 break; 325 case 'C': 326 block_compress = 0; 327 break; 328 case 'c': 329 zcat_flg = 1; 330 break; 331 #ifdef DEBUG 332 case 'D': 333 debug = 1; 334 break; 335 #endif 336 case 'd': 337 do_decomp = 1; 338 break; 339 case 'f': 340 overwrite = 1; 341 force = 1; 342 break; 343 case 'n': 344 nomagic = 1; 345 break; 346 case 'q': 347 quiet = 1; 348 break; 349 #ifdef DEBUG 350 case 'V': 351 verbose = 1; 352 break; 353 #endif 354 case 'v': 355 quiet = 0; 356 break; 357 case '?': 358 default: 359 usage(); 360 } 361 argc -= optind; 362 argv += optind; 363 364 if (maxbits < INIT_BITS) 365 maxbits = INIT_BITS; 366 if (maxbits > BITS) 367 maxbits = BITS; 368 maxmaxcode = 1 << maxbits; 369 370 filelist = fileptr = argv; 371 372 if (*filelist != NULL) { 373 for (fileptr = filelist; *fileptr; fileptr++) { 374 exit_stat = 0; 375 if (do_decomp) { /* DECOMPRESSION */ 376 /* Check for .Z suffix */ 377 if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) { 378 /* No .Z: tack one on */ 379 strcpy(tempname, *fileptr); 380 strcat(tempname, ".Z"); 381 *fileptr = tempname; 382 } 383 /* Open input file */ 384 if ((freopen(*fileptr, "r", stdin)) == NULL) { 385 perror(*fileptr); 386 perm_stat = 1; 387 continue; 388 } 389 /* Check the magic number */ 390 if (nomagic == 0) { 391 if ((getchar() != (magic_header[0] & 0xFF)) 392 || (getchar() != (magic_header[1] & 0xFF))) { 393 fprintf(stderr, "%s: not in compressed format\n", 394 *fileptr); 395 continue; 396 } 397 maxbits = getchar(); /* set -b from file */ 398 block_compress = maxbits & BLOCK_MASK; 399 maxbits &= BIT_MASK; 400 maxmaxcode = 1 << maxbits; 401 if(maxbits > BITS) { 402 fprintf(stderr, 403 "%s: compressed with %d bits, can only handle %d bits\n", 404 *fileptr, maxbits, BITS); 405 continue; 406 } 407 } 408 /* Generate output filename */ 409 strcpy(ofname, *fileptr); 410 ofname[strlen(*fileptr) - 2] = '\0'; /* Strip off .Z */ 411 } else { /* COMPRESSION */ 412 if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) { 413 fprintf(stderr, "%s: already has .Z suffix -- no change\n", 414 *fileptr); 415 continue; 416 } 417 /* Open input file */ 418 if ((freopen(*fileptr, "r", stdin)) == NULL) { 419 perror(*fileptr); 420 perm_stat = 1; 421 continue; 422 } 423 stat ( *fileptr, &statbuf ); 424 fsize = (long) statbuf.st_size; 425 /* 426 * tune hash table size for small files -- ad hoc, 427 * but the sizes match earlier #defines, which 428 * serve as upper bounds on the number of output codes. 429 */ 430 hsize = HSIZE; 431 if ( fsize < (1 << 12) ) 432 hsize = MIN ( 5003, HSIZE ); 433 else if ( fsize < (1 << 13) ) 434 hsize = MIN ( 9001, HSIZE ); 435 else if ( fsize < (1 << 14) ) 436 hsize = MIN ( 18013, HSIZE ); 437 else if ( fsize < (1 << 15) ) 438 hsize = MIN ( 35023, HSIZE ); 439 else if ( fsize < 47000 ) 440 hsize = MIN ( 50021, HSIZE ); 441 442 /* Generate output filename */ 443 strcpy(ofname, *fileptr); 444 strcat(ofname, ".Z"); 445 } 446 /* Check for overwrite of existing file */ 447 if (overwrite == 0 && zcat_flg == 0) { 448 if (stat(ofname, &statbuf) == 0) { 449 char response[2]; 450 response[0] = 'n'; 451 fprintf(stderr, "%s already exists;", ofname); 452 if (bgnd_flag == 0 && isatty(2)) { 453 fprintf(stderr, " do you wish to overwrite %s (y or n)? ", 454 ofname); 455 fflush(stderr); 456 read(2, response, 2); 457 while (response[1] != '\n') { 458 if (read(2, response+1, 1) < 0) { /* Ack! */ 459 perror("stderr"); break; 460 } 461 } 462 } 463 if (response[0] != 'y') { 464 fprintf(stderr, "\tnot overwritten\n"); 465 continue; 466 } 467 } 468 } 469 if(zcat_flg == 0) { /* Open output file */ 470 if (freopen(ofname, "w", stdout) == NULL) { 471 perror(ofname); 472 perm_stat = 1; 473 continue; 474 } 475 precious = 0; 476 if(!quiet) 477 fprintf(stderr, "%s: ", *fileptr); 478 } 479 480 /* Actually do the compression/decompression */ 481 if (do_decomp == 0) compress(); 482 #ifndef DEBUG 483 else decompress(); 484 #else 485 else if (debug == 0) decompress(); 486 else printcodes(); 487 if (verbose) dump_tab(); 488 #endif /* DEBUG */ 489 if(zcat_flg == 0) { 490 copystat(*fileptr, ofname); /* Copy stats */ 491 precious = 1; 492 if((exit_stat == 1) || (!quiet)) 493 putc('\n', stderr); 494 } 495 } 496 } else { /* Standard input */ 497 if (do_decomp == 0) { 498 compress(); 499 #ifdef DEBUG 500 if(verbose) dump_tab(); 501 #endif /* DEBUG */ 502 if(!quiet) 503 putc('\n', stderr); 504 } else { 505 /* Check the magic number */ 506 if (nomagic == 0) { 507 if ((getchar()!=(magic_header[0] & 0xFF)) 508 || (getchar()!=(magic_header[1] & 0xFF))) { 509 fprintf(stderr, "stdin: not in compressed format\n"); 510 exit(1); 511 } 512 maxbits = getchar(); /* set -b from file */ 513 block_compress = maxbits & BLOCK_MASK; 514 maxbits &= BIT_MASK; 515 maxmaxcode = 1 << maxbits; 516 fsize = 100000; /* assume stdin large for USERMEM */ 517 if(maxbits > BITS) { 518 fprintf(stderr, 519 "stdin: compressed with %d bits, can only handle %d bits\n", 520 maxbits, BITS); 521 exit(1); 522 } 523 } 524 #ifndef DEBUG 525 decompress(); 526 #else 527 if (debug == 0) decompress(); 528 else printcodes(); 529 if (verbose) dump_tab(); 530 #endif /* DEBUG */ 531 } 532 } 533 exit(perm_stat ? perm_stat : exit_stat); 534 } 535 536 static int offset; 537 long int in_count = 1; /* length of input */ 538 long int bytes_out; /* length of compressed output */ 539 long int out_count = 0; /* # of codes output (for debugging) */ 540 541 /* 542 * compress stdin to stdout 543 * 544 * Algorithm: use open addressing double hashing (no chaining) on the 545 * prefix code / next character combination. We do a variant of Knuth's 546 * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime 547 * secondary probe. Here, the modular division first probe is gives way 548 * to a faster exclusive-or manipulation. Also do block compression with 549 * an adaptive reset, whereby the code table is cleared when the compression 550 * ratio decreases, but after the table fills. The variable-length output 551 * codes are re-sized at this point, and a special CLEAR code is generated 552 * for the decompressor. Late addition: construct the table according to 553 * file size for noticeable speed improvement on small files. Please direct 554 * questions about this implementation to ames!jaw. 555 */ 556 557 compress() 558 { 559 register long fcode; 560 register code_int i = 0; 561 register int c; 562 register code_int ent; 563 register int disp; 564 register code_int hsize_reg; 565 register int hshift; 566 567 #ifndef COMPATIBLE 568 if (nomagic == 0) { 569 putchar(magic_header[0]); 570 putchar(magic_header[1]); 571 putchar((char)(maxbits | block_compress)); 572 if(ferror(stdout)) 573 writeerr(); 574 } 575 #endif /* COMPATIBLE */ 576 577 offset = 0; 578 bytes_out = 3; /* includes 3-byte header mojo */ 579 out_count = 0; 580 clear_flg = 0; 581 ratio = 0; 582 in_count = 1; 583 checkpoint = CHECK_GAP; 584 maxcode = MAXCODE(n_bits = INIT_BITS); 585 free_ent = ((block_compress) ? FIRST : 256 ); 586 587 ent = getchar (); 588 589 hshift = 0; 590 for ( fcode = (long) hsize; fcode < 65536L; fcode *= 2L ) 591 hshift++; 592 hshift = 8 - hshift; /* set hash code range bound */ 593 594 hsize_reg = hsize; 595 cl_hash( (count_int) hsize_reg); /* clear hash table */ 596 597 #ifdef SIGNED_COMPARE_SLOW 598 while ( (c = getchar()) != (unsigned) EOF ) { 599 #else 600 while ( (c = getchar()) != EOF ) { 601 #endif 602 in_count++; 603 fcode = (long) (((long) c << maxbits) + ent); 604 i = ((c << hshift) ^ ent); /* xor hashing */ 605 606 if ( htabof (i) == fcode ) { 607 ent = codetabof (i); 608 continue; 609 } else if ( (long)htabof (i) < 0 ) /* empty slot */ 610 goto nomatch; 611 disp = hsize_reg - i; /* secondary hash (after G. Knott) */ 612 if ( i == 0 ) 613 disp = 1; 614 probe: 615 if ( (i -= disp) < 0 ) 616 i += hsize_reg; 617 618 if ( htabof (i) == fcode ) { 619 ent = codetabof (i); 620 continue; 621 } 622 if ( (long)htabof (i) > 0 ) 623 goto probe; 624 nomatch: 625 output ( (code_int) ent ); 626 out_count++; 627 ent = c; 628 #ifdef SIGNED_COMPARE_SLOW 629 if ( (unsigned) free_ent < (unsigned) maxmaxcode) { 630 #else 631 if ( free_ent < maxmaxcode ) { 632 #endif 633 codetabof (i) = free_ent++; /* code -> hashtable */ 634 htabof (i) = fcode; 635 } 636 else if ( (count_int)in_count >= checkpoint && block_compress ) 637 cl_block (); 638 } 639 /* 640 * Put out the final code. 641 */ 642 output( (code_int)ent ); 643 out_count++; 644 output( (code_int)-1 ); 645 646 /* 647 * Print out stats on stderr 648 */ 649 if(zcat_flg == 0 && !quiet) { 650 #ifdef DEBUG 651 fprintf( stderr, 652 "%ld chars in, %ld codes (%ld bytes) out, compression factor: ", 653 in_count, out_count, bytes_out ); 654 prratio( stderr, in_count, bytes_out ); 655 fprintf( stderr, "\n"); 656 fprintf( stderr, "\tCompression as in compact: " ); 657 prratio( stderr, in_count-bytes_out, in_count ); 658 fprintf( stderr, "\n"); 659 fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n", 660 free_ent - 1, n_bits ); 661 #else /* !DEBUG */ 662 fprintf( stderr, "Compression: " ); 663 prratio( stderr, in_count-bytes_out, in_count ); 664 #endif /* DEBUG */ 665 } 666 if(bytes_out > in_count) /* exit(2) if no savings */ 667 exit_stat = 2; 668 return; 669 } 670 671 /*- 672 * Output the given code. 673 * Inputs: 674 * code: A n_bits-bit integer. If == -1, then EOF. This assumes 675 * that n_bits =< (long)wordsize - 1. 676 * Outputs: 677 * Outputs code to the file. 678 * Assumptions: 679 * Chars are 8 bits long. 680 * Algorithm: 681 * Maintain a BITS character long buffer (so that 8 codes will 682 * fit in it exactly). Use the VAX insv instruction to insert each 683 * code in turn. When the buffer fills up empty it and start over. 684 */ 685 686 static char buf[BITS]; 687 688 #ifndef vax 689 char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; 690 char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; 691 #endif /* vax */ 692 693 output( code ) 694 code_int code; 695 { 696 #ifdef DEBUG 697 static int col = 0; 698 #endif /* DEBUG */ 699 700 /* 701 * On the VAX, it is important to have the register declarations 702 * in exactly the order given, or the asm will break. 703 */ 704 register int r_off = offset, bits= n_bits; 705 register char * bp = buf; 706 707 #ifdef DEBUG 708 if ( verbose ) 709 fprintf( stderr, "%5d%c", code, 710 (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); 711 #endif /* DEBUG */ 712 if ( code >= 0 ) { 713 #if defined(vax) && !defined(__GNUC__) 714 /* 715 * VAX and PCC DEPENDENT!! Implementation on other machines is 716 * below. 717 * 718 * Translation: Insert BITS bits from the argument starting at 719 * offset bits from the beginning of buf. 720 */ 721 0; /* Work around for pcc -O bug with asm and if stmt */ 722 asm( "insv 4(ap),r11,r10,(r9)" ); 723 #else 724 /* 725 * byte/bit numbering on the VAX is simulated by the following code 726 */ 727 /* 728 * Get to the first byte. 729 */ 730 bp += (r_off >> 3); 731 r_off &= 7; 732 /* 733 * Since code is always >= 8 bits, only need to mask the first 734 * hunk on the left. 735 */ 736 *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off]; 737 bp++; 738 bits -= (8 - r_off); 739 code >>= 8 - r_off; 740 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 741 if ( bits >= 8 ) { 742 *bp++ = code; 743 code >>= 8; 744 bits -= 8; 745 } 746 /* Last bits. */ 747 if(bits) 748 *bp = code; 749 #endif /* vax */ 750 offset += n_bits; 751 if ( offset == (n_bits << 3) ) { 752 bp = buf; 753 bits = n_bits; 754 bytes_out += bits; 755 do { 756 putchar(*bp++); 757 if (ferror(stdout)) 758 writeerr(); 759 } while(--bits); 760 offset = 0; 761 } 762 763 /* 764 * If the next entry is going to be too big for the code size, 765 * then increase it, if possible. 766 */ 767 if ( free_ent > maxcode || (clear_flg > 0)) 768 { 769 /* 770 * Write the whole buffer, because the input side won't 771 * discover the size increase until after it has read it. 772 */ 773 if ( offset > 0 ) { 774 if( fwrite( buf, 1, n_bits, stdout ) != n_bits) 775 writeerr(); 776 bytes_out += n_bits; 777 } 778 offset = 0; 779 780 if ( clear_flg ) { 781 maxcode = MAXCODE (n_bits = INIT_BITS); 782 clear_flg = 0; 783 } 784 else { 785 n_bits++; 786 if ( n_bits == maxbits ) 787 maxcode = maxmaxcode; 788 else 789 maxcode = MAXCODE(n_bits); 790 } 791 #ifdef DEBUG 792 if ( debug ) { 793 fprintf( stderr, "\nChange to %d bits\n", n_bits ); 794 col = 0; 795 } 796 #endif /* DEBUG */ 797 } 798 } else { 799 /* 800 * At EOF, write the rest of the buffer. 801 */ 802 if ( offset > 0 ) { 803 offset = (offset + 7) / 8; 804 if( fwrite( buf, 1, offset, stdout ) != offset ) 805 writeerr(); 806 bytes_out += offset; 807 } 808 offset = 0; 809 (void)fflush( stdout ); 810 if( ferror( stdout ) ) 811 writeerr(); 812 #ifdef DEBUG 813 if ( verbose ) 814 fprintf( stderr, "\n" ); 815 #endif 816 } 817 } 818 819 /* 820 * Decompress stdin to stdout. This routine adapts to the codes in the 821 * file building the "string" table on-the-fly; requiring no table to 822 * be stored in the compressed file. The tables used herein are shared 823 * with those of the compress() routine. See the definitions above. 824 */ 825 826 decompress() { 827 register char_type *stackp; 828 register int finchar; 829 register code_int code, oldcode, incode; 830 int n, nwritten, offset; /* Variables for buffered write */ 831 char buff[BUFSIZ]; /* Buffer for buffered write */ 832 833 834 /* 835 * As above, initialize the first 256 entries in the table. 836 */ 837 maxcode = MAXCODE(n_bits = INIT_BITS); 838 for ( code = 255; code >= 0; code-- ) { 839 tab_prefixof(code) = 0; 840 tab_suffixof(code) = (char_type)code; 841 } 842 free_ent = ((block_compress) ? FIRST : 256 ); 843 844 finchar = oldcode = getcode(); 845 if(oldcode == -1) /* EOF already? */ 846 return; /* Get out of here */ 847 848 /* first code must be 8 bits = char */ 849 n=0; 850 buff[n++] = (char)finchar; 851 852 stackp = de_stack; 853 854 while ( (code = getcode()) > -1 ) { 855 856 if ( (code == CLEAR) && block_compress ) { 857 for ( code = 255; code >= 0; code-- ) 858 tab_prefixof(code) = 0; 859 clear_flg = 1; 860 free_ent = FIRST - 1; 861 if ( (code = getcode ()) == -1 ) /* O, untimely death! */ 862 break; 863 } 864 incode = code; 865 /* 866 * Special case for KwKwK string. 867 */ 868 if ( code >= free_ent ) { 869 *stackp++ = finchar; 870 code = oldcode; 871 } 872 873 /* 874 * Generate output characters in reverse order 875 */ 876 #ifdef SIGNED_COMPARE_SLOW 877 while ( ((unsigned long)code) >= ((unsigned long)256) ) { 878 #else 879 while ( code >= 256 ) { 880 #endif 881 *stackp++ = tab_suffixof(code); 882 code = tab_prefixof(code); 883 } 884 *stackp++ = finchar = tab_suffixof(code); 885 886 /* 887 * And put them out in forward order 888 */ 889 do { 890 /* 891 * About 60% of the time is spent in the putchar() call 892 * that appeared here. It was originally 893 * putchar ( *--stackp ); 894 * If we buffer the writes ourselves, we can go faster (about 895 * 30%). 896 * 897 * At this point, the next line is the next *big* time 898 * sink in the code. It takes up about 10% of the time. 899 */ 900 buff[n++] = *--stackp; 901 if (n == BUFSIZ) { 902 offset = 0; 903 do { 904 nwritten = write(fileno(stdout), &buff[offset], n); 905 if (nwritten < 0) 906 writeerr(); 907 offset += nwritten; 908 } while ((n -= nwritten) > 0); 909 } 910 } while ( stackp > de_stack ); 911 912 /* 913 * Generate the new entry. 914 */ 915 if ( (code=free_ent) < maxmaxcode ) { 916 tab_prefixof(code) = (unsigned short)oldcode; 917 tab_suffixof(code) = finchar; 918 free_ent = code+1; 919 } 920 /* 921 * Remember previous code. 922 */ 923 oldcode = incode; 924 } 925 /* 926 * Flush the stuff remaining in our buffer... 927 */ 928 offset = 0; 929 while (n > 0) { 930 nwritten = write(fileno(stdout), &buff[offset], n); 931 if (nwritten < 0) 932 writeerr(); 933 offset += nwritten; 934 n -= nwritten; 935 } 936 } 937 938 /*- 939 * Read one code from the standard input. If EOF, return -1. 940 * Inputs: 941 * stdin 942 * Outputs: 943 * code or -1 is returned. 944 */ 945 code_int 946 getcode() { 947 /* 948 * On the VAX, it is important to have the register declarations 949 * in exactly the order given, or the asm will break. 950 */ 951 register code_int code; 952 static int offset = 0, size = 0; 953 static char_type buf[BITS]; 954 register int r_off, bits; 955 register char_type *bp = buf; 956 957 if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) { 958 /* 959 * If the next entry will be too big for the current code 960 * size, then we must increase the size. This implies reading 961 * a new buffer full, too. 962 */ 963 if ( free_ent > maxcode ) { 964 n_bits++; 965 if ( n_bits == maxbits ) 966 maxcode = maxmaxcode; /* won't get any bigger now */ 967 else 968 maxcode = MAXCODE(n_bits); 969 } 970 if ( clear_flg > 0) { 971 maxcode = MAXCODE (n_bits = INIT_BITS); 972 clear_flg = 0; 973 } 974 size = fread( buf, 1, n_bits, stdin ); 975 if ( size <= 0 ) 976 return -1; /* end of file */ 977 offset = 0; 978 /* Round size down to integral number of codes */ 979 size = (size << 3) - (n_bits - 1); 980 } 981 r_off = offset; 982 bits = n_bits; 983 #ifdef vax 984 asm( "extzv r10,r9,(r8),r11" ); 985 #else /* not a vax */ 986 /* 987 * Get to the first byte. 988 */ 989 bp += (r_off >> 3); 990 r_off &= 7; 991 /* Get first part (low order bits) */ 992 #ifdef NO_UCHAR 993 code = ((*bp++ >> r_off) & rmask[8 - r_off]) & 0xff; 994 #else 995 code = (*bp++ >> r_off); 996 #endif /* NO_UCHAR */ 997 bits -= (8 - r_off); 998 r_off = 8 - r_off; /* now, offset into code word */ 999 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 1000 if ( bits >= 8 ) { 1001 #ifdef NO_UCHAR 1002 code |= (*bp++ & 0xff) << r_off; 1003 #else 1004 code |= *bp++ << r_off; 1005 #endif /* NO_UCHAR */ 1006 r_off += 8; 1007 bits -= 8; 1008 } 1009 /* high order bits. */ 1010 code |= (*bp & rmask[bits]) << r_off; 1011 #endif /* vax */ 1012 offset += n_bits; 1013 1014 return code; 1015 } 1016 1017 #ifdef DEBUG 1018 printcodes() 1019 { 1020 /* 1021 * Just print out codes from input file. For debugging. 1022 */ 1023 code_int code; 1024 int col = 0, bits; 1025 1026 bits = n_bits = INIT_BITS; 1027 maxcode = MAXCODE(n_bits); 1028 free_ent = ((block_compress) ? FIRST : 256 ); 1029 while ( ( code = getcode() ) >= 0 ) { 1030 if ( (code == CLEAR) && block_compress ) { 1031 free_ent = FIRST - 1; 1032 clear_flg = 1; 1033 } 1034 else if ( free_ent < maxmaxcode ) 1035 free_ent++; 1036 if ( bits != n_bits ) { 1037 fprintf(stderr, "\nChange to %d bits\n", n_bits ); 1038 bits = n_bits; 1039 col = 0; 1040 } 1041 fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); 1042 } 1043 putc( '\n', stderr ); 1044 exit( 0 ); 1045 } 1046 1047 code_int sorttab[1<<BITS]; /* sorted pointers into htab */ 1048 1049 dump_tab() /* dump string table */ 1050 { 1051 register int i, first; 1052 register ent; 1053 #define STACK_SIZE 15000 1054 int stack_top = STACK_SIZE; 1055 register c; 1056 1057 if(do_decomp == 0) { /* compressing */ 1058 register int flag = 1; 1059 1060 for(i=0; i<hsize; i++) { /* build sort pointers */ 1061 if((long)htabof(i) >= 0) { 1062 sorttab[codetabof(i)] = i; 1063 } 1064 } 1065 first = block_compress ? FIRST : 256; 1066 for(i = first; i < free_ent; i++) { 1067 fprintf(stderr, "%5d: \"", i); 1068 de_stack[--stack_top] = '\n'; 1069 de_stack[--stack_top] = '"'; 1070 stack_top = in_stack((htabof(sorttab[i])>>maxbits)&0xff, 1071 stack_top); 1072 for(ent=htabof(sorttab[i]) & ((1<<maxbits)-1); 1073 ent > 256; 1074 ent=htabof(sorttab[ent]) & ((1<<maxbits)-1)) { 1075 stack_top = in_stack(htabof(sorttab[ent]) >> maxbits, 1076 stack_top); 1077 } 1078 stack_top = in_stack(ent, stack_top); 1079 fwrite( &de_stack[stack_top], 1, STACK_SIZE-stack_top, stderr); 1080 stack_top = STACK_SIZE; 1081 } 1082 } else if(!debug) { /* decompressing */ 1083 1084 for ( i = 0; i < free_ent; i++ ) { 1085 ent = i; 1086 c = tab_suffixof(ent); 1087 if ( isascii(c) && isprint(c) ) 1088 fprintf( stderr, "%5d: %5d/'%c' \"", 1089 ent, tab_prefixof(ent), c ); 1090 else 1091 fprintf( stderr, "%5d: %5d/\\%03o \"", 1092 ent, tab_prefixof(ent), c ); 1093 de_stack[--stack_top] = '\n'; 1094 de_stack[--stack_top] = '"'; 1095 for ( ; ent != NULL; 1096 ent = (ent >= FIRST ? tab_prefixof(ent) : NULL) ) { 1097 stack_top = in_stack(tab_suffixof(ent), stack_top); 1098 } 1099 fwrite( &de_stack[stack_top], 1, STACK_SIZE - stack_top, stderr ); 1100 stack_top = STACK_SIZE; 1101 } 1102 } 1103 } 1104 1105 int 1106 in_stack(c, stack_top) 1107 register c, stack_top; 1108 { 1109 if ( (isascii(c) && isprint(c) && c != '\\') || c == ' ' ) { 1110 de_stack[--stack_top] = c; 1111 } else { 1112 switch( c ) { 1113 case '\n': de_stack[--stack_top] = 'n'; break; 1114 case '\t': de_stack[--stack_top] = 't'; break; 1115 case '\b': de_stack[--stack_top] = 'b'; break; 1116 case '\f': de_stack[--stack_top] = 'f'; break; 1117 case '\r': de_stack[--stack_top] = 'r'; break; 1118 case '\\': de_stack[--stack_top] = '\\'; break; 1119 default: 1120 de_stack[--stack_top] = '0' + c % 8; 1121 de_stack[--stack_top] = '0' + (c / 8) % 8; 1122 de_stack[--stack_top] = '0' + c / 64; 1123 break; 1124 } 1125 de_stack[--stack_top] = '\\'; 1126 } 1127 return stack_top; 1128 } 1129 #endif /* DEBUG */ 1130 1131 writeerr() 1132 { 1133 (void)fprintf(stderr, "compress: %s: %s\n", 1134 ofname[0] ? ofname : "stdout", strerror(errno)); 1135 (void)unlink(ofname); 1136 exit(1); 1137 } 1138 1139 copystat(ifname, ofname) 1140 char *ifname, *ofname; 1141 { 1142 struct stat statbuf; 1143 int mode; 1144 struct utimbuf tp; 1145 1146 fclose(stdout); 1147 if (stat(ifname, &statbuf)) { /* Get stat on input file */ 1148 perror(ifname); 1149 return; 1150 } 1151 if ((statbuf.st_mode & S_IFMT/*0170000*/) != S_IFREG/*0100000*/) { 1152 if(quiet) 1153 fprintf(stderr, "%s: ", ifname); 1154 fprintf(stderr, " -- not a regular file: unchanged"); 1155 exit_stat = 1; 1156 perm_stat = 1; 1157 } else if (statbuf.st_nlink > 1) { 1158 if(quiet) 1159 fprintf(stderr, "%s: ", ifname); 1160 fprintf(stderr, " -- has %d other links: unchanged", 1161 statbuf.st_nlink - 1); 1162 exit_stat = 1; 1163 perm_stat = 1; 1164 } else if (exit_stat == 2 && (!force)) { /* No compression: remove file.Z */ 1165 if(!quiet) 1166 fprintf(stderr, " -- file unchanged"); 1167 } else { /* ***** Successful Compression ***** */ 1168 exit_stat = 0; 1169 mode = statbuf.st_mode & 07777; 1170 if (chmod(ofname, mode)) /* Copy modes */ 1171 perror(ofname); 1172 chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */ 1173 tp.actime = statbuf.st_atime; 1174 tp.modtime = statbuf.st_mtime; 1175 utime(ofname, &tp); /* Update last accessed and modified times */ 1176 if (unlink(ifname)) /* Remove input file */ 1177 perror(ifname); 1178 if(!quiet) 1179 fprintf(stderr, " -- replaced with %s", ofname); 1180 return; /* Successful return */ 1181 } 1182 1183 /* Unsuccessful return -- one of the tests failed */ 1184 if (unlink(ofname)) 1185 perror(ofname); 1186 } 1187 1188 void 1189 onintr ( ) 1190 { 1191 if (!precious) 1192 unlink ( ofname ); 1193 exit ( 1 ); 1194 } 1195 1196 void 1197 oops ( ) /* wild pointer -- assume bad input */ 1198 { 1199 if ( do_decomp ) 1200 fprintf ( stderr, "uncompress: corrupt input\n" ); 1201 unlink ( ofname ); 1202 exit ( 1 ); 1203 } 1204 1205 cl_block () /* table clear for block compress */ 1206 { 1207 register long int rat; 1208 1209 checkpoint = in_count + CHECK_GAP; 1210 #ifdef DEBUG 1211 if ( debug ) { 1212 fprintf ( stderr, "count: %ld, ratio: ", in_count ); 1213 prratio ( stderr, in_count, bytes_out ); 1214 fprintf ( stderr, "\n"); 1215 } 1216 #endif /* DEBUG */ 1217 1218 if(in_count > 0x007fffff) { /* shift will overflow */ 1219 rat = bytes_out >> 8; 1220 if(rat == 0) { /* Don't divide by zero */ 1221 rat = 0x7fffffff; 1222 } else { 1223 rat = in_count / rat; 1224 } 1225 } else { 1226 rat = (in_count << 8) / bytes_out; /* 8 fractional bits */ 1227 } 1228 if ( rat > ratio ) { 1229 ratio = rat; 1230 } else { 1231 ratio = 0; 1232 #ifdef DEBUG 1233 if(verbose) 1234 dump_tab(); /* dump string table */ 1235 #endif 1236 cl_hash ( (count_int) hsize ); 1237 free_ent = FIRST; 1238 clear_flg = 1; 1239 output ( (code_int) CLEAR ); 1240 #ifdef DEBUG 1241 if(debug) 1242 fprintf ( stderr, "clear\n" ); 1243 #endif /* DEBUG */ 1244 } 1245 } 1246 1247 cl_hash(hsize) /* reset code table */ 1248 register count_int hsize; 1249 { 1250 register count_int *htab_p = htab+hsize; 1251 register long i; 1252 register long m1 = -1; 1253 1254 i = hsize - 16; 1255 do { /* might use Sys V memset(3) here */ 1256 *(htab_p-16) = m1; 1257 *(htab_p-15) = m1; 1258 *(htab_p-14) = m1; 1259 *(htab_p-13) = m1; 1260 *(htab_p-12) = m1; 1261 *(htab_p-11) = m1; 1262 *(htab_p-10) = m1; 1263 *(htab_p-9) = m1; 1264 *(htab_p-8) = m1; 1265 *(htab_p-7) = m1; 1266 *(htab_p-6) = m1; 1267 *(htab_p-5) = m1; 1268 *(htab_p-4) = m1; 1269 *(htab_p-3) = m1; 1270 *(htab_p-2) = m1; 1271 *(htab_p-1) = m1; 1272 htab_p -= 16; 1273 } while ((i -= 16) >= 0); 1274 for ( i += 16; i > 0; i-- ) 1275 *--htab_p = m1; 1276 } 1277 1278 prratio(stream, num, den) 1279 FILE *stream; 1280 long int num, den; 1281 { 1282 register int q; /* Doesn't need to be long */ 1283 1284 if(num > 214748L) { /* 2147483647/10000 */ 1285 q = num / (den / 10000L); 1286 } else { 1287 q = 10000L * num / den; /* Long calculations, though */ 1288 } 1289 if (q < 0) { 1290 putc('-', stream); 1291 q = -q; 1292 } 1293 fprintf(stream, "%d.%02d%%", q / 100, q % 100); 1294 } 1295 1296 usage() 1297 { 1298 (void)fprintf(stderr, 1299 #ifdef DEBUG 1300 "compress [-CDVcdfnv] [-b maxbits] [file ...]\n"); 1301 #else 1302 "compress [-Ccdfnv] [-b maxbits] [file ...]\n"); 1303 #endif 1304 exit(1); 1305 } 1306