1 /* 2 * Copyright (c) 1985, 1986 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * James A. Woods, derived from original work by Spencer Thomas 7 * and Joseph Orost. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #ifndef lint 39 char copyright[] = 40 "@(#) Copyright (c) 1985, 1986 The Regents of the University of California.\n\ 41 All rights reserved.\n"; 42 #endif /* not lint */ 43 44 #ifndef lint 45 static char sccsid[] = "@(#)compress.c 5.19 (Berkeley) 3/18/91"; 46 #endif /* not lint */ 47 48 /* 49 * compress.c - File compression ala IEEE Computer, June 1984. 50 * 51 * Authors: Spencer W. Thomas (decvax!utah-cs!thomas) 52 * Jim McKie (decvax!mcvax!jim) 53 * Steve Davies (decvax!vax135!petsd!peora!srd) 54 * Ken Turkowski (decvax!decwrl!turtlevax!ken) 55 * James A. Woods (decvax!ihnp4!ames!jaw) 56 * Joe Orost (decvax!vax135!petsd!joe) 57 */ 58 59 #include <sys/param.h> 60 #include <sys/stat.h> 61 #include <signal.h> 62 #include <utime.h> 63 #include <errno.h> 64 #include <unistd.h> 65 #include <stdio.h> 66 #include <ctype.h> 67 #include <stdlib.h> 68 #include <string.h> 69 70 /* 71 * Set USERMEM to the maximum amount of physical user memory available 72 * in bytes. USERMEM is used to determine the maximum BITS that can be used 73 * for compression. 74 * 75 * SACREDMEM is the amount of physical memory saved for others; compress 76 * will hog the rest. 77 */ 78 #ifndef SACREDMEM 79 #define SACREDMEM 0 80 #endif 81 82 #ifndef USERMEM 83 # define USERMEM 450000 /* default user memory */ 84 #endif 85 86 #ifdef pdp11 87 # define BITS 12 /* max bits/code for 16-bit machine */ 88 # define NO_UCHAR /* also if "unsigned char" functions as signed char */ 89 # undef USERMEM 90 #endif /* pdp11 */ /* don't forget to compile with -i */ 91 92 #ifdef USERMEM 93 # if USERMEM >= (433484+SACREDMEM) 94 # define PBITS 16 95 # else 96 # if USERMEM >= (229600+SACREDMEM) 97 # define PBITS 15 98 # else 99 # if USERMEM >= (127536+SACREDMEM) 100 # define PBITS 14 101 # else 102 # if USERMEM >= (73464+SACREDMEM) 103 # define PBITS 13 104 # else 105 # define PBITS 12 106 # endif 107 # endif 108 # endif 109 # endif 110 # undef USERMEM 111 #endif /* USERMEM */ 112 113 #ifdef PBITS /* Preferred BITS for this memory size */ 114 # ifndef BITS 115 # define BITS PBITS 116 # endif BITS 117 #endif /* PBITS */ 118 119 #if BITS == 16 120 # define HSIZE 69001 /* 95% occupancy */ 121 #endif 122 #if BITS == 15 123 # define HSIZE 35023 /* 94% occupancy */ 124 #endif 125 #if BITS == 14 126 # define HSIZE 18013 /* 91% occupancy */ 127 #endif 128 #if BITS == 13 129 # define HSIZE 9001 /* 91% occupancy */ 130 #endif 131 #if BITS <= 12 132 # define HSIZE 5003 /* 80% occupancy */ 133 #endif 134 135 /* 136 * a code_int must be able to hold 2**BITS values of type int, and also -1 137 */ 138 #if BITS > 15 139 typedef long int code_int; 140 #else 141 typedef int code_int; 142 #endif 143 144 #ifdef SIGNED_COMPARE_SLOW 145 typedef unsigned long int count_int; 146 typedef unsigned short int count_short; 147 #else 148 typedef long int count_int; 149 #endif 150 151 #ifdef NO_UCHAR 152 typedef char char_type; 153 #else 154 typedef unsigned char char_type; 155 #endif /* UCHAR */ 156 char_type magic_header[] = { "\037\235" }; /* 1F 9D */ 157 158 /* Defines for third byte of header */ 159 #define BIT_MASK 0x1f 160 #define BLOCK_MASK 0x80 161 /* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is 162 a fourth header byte (for expansion). 163 */ 164 #define INIT_BITS 9 /* initial number of bits/code */ 165 166 int n_bits; /* number of bits/code */ 167 int maxbits = BITS; /* user settable max # bits/code */ 168 code_int maxcode; /* maximum code, given n_bits */ 169 code_int maxmaxcode = 1 << BITS; /* should NEVER generate this code */ 170 #ifdef COMPATIBLE /* But wrong! */ 171 # define MAXCODE(n_bits) (1 << (n_bits) - 1) 172 #else 173 # define MAXCODE(n_bits) ((1 << (n_bits)) - 1) 174 #endif /* COMPATIBLE */ 175 176 count_int htab [HSIZE]; 177 unsigned short codetab [HSIZE]; 178 179 #define htabof(i) htab[i] 180 #define codetabof(i) codetab[i] 181 code_int hsize = HSIZE; /* for dynamic table sizing */ 182 count_int fsize; 183 184 /* 185 * To save much memory, we overlay the table used by compress() with those 186 * used by decompress(). The tab_prefix table is the same size and type 187 * as the codetab. The tab_suffix table needs 2**BITS characters. We 188 * get this from the beginning of htab. The output stack uses the rest 189 * of htab, and contains characters. There is plenty of room for any 190 * possible stack (stack used to be 8000 characters). 191 */ 192 193 #define tab_prefixof(i) codetabof(i) 194 # define tab_suffixof(i) ((char_type *)(htab))[i] 195 # define de_stack ((char_type *)&tab_suffixof(1<<BITS)) 196 197 code_int free_ent = 0; /* first unused entry */ 198 int exit_stat = 0; /* per-file status */ 199 int perm_stat = 0; /* permanent status */ 200 201 code_int getcode(); 202 203 int nomagic = 0; /* Use a 3-byte magic number header, unless old file */ 204 int zcat_flg = 0; /* Write output on stdout, suppress messages */ 205 int precious = 1; /* Don't unlink output file on interrupt */ 206 int quiet = 1; /* don't tell me about compression */ 207 208 /* 209 * block compression parameters -- after all codes are used up, 210 * and compression rate changes, start over. 211 */ 212 int block_compress = BLOCK_MASK; 213 int clear_flg = 0; 214 long int ratio = 0; 215 #define CHECK_GAP 10000 /* ratio check interval */ 216 count_int checkpoint = CHECK_GAP; 217 /* 218 * the next two codes should not be changed lightly, as they must not 219 * lie within the contiguous general code space. 220 */ 221 #define FIRST 257 /* first free entry */ 222 #define CLEAR 256 /* table clear output code */ 223 224 int force = 0; 225 char ofname [100]; 226 #ifdef DEBUG 227 int debug, verbose; 228 #endif 229 sig_t oldint; 230 int bgnd_flag; 231 232 int do_decomp = 0; 233 234 /*- 235 * Algorithm from "A Technique for High Performance Data Compression", 236 * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. 237 * 238 * Usage: compress [-dfvc] [-b bits] [file ...] 239 * Inputs: 240 * -d: If given, decompression is done instead. 241 * 242 * -c: Write output on stdout, don't remove original. 243 * 244 * -b: Parameter limits the max number of bits/code. 245 * 246 * -f: Forces output file to be generated, even if one already 247 * exists, and even if no space is saved by compressing. 248 * If -f is not used, the user will be prompted if stdin is 249 * a tty, otherwise, the output file will not be overwritten. 250 * 251 * -v: Write compression statistics 252 * 253 * file ...: Files to be compressed. If none specified, stdin 254 * is used. 255 * Outputs: 256 * file.Z: Compressed form of file with same mode, owner, and utimes 257 * or stdout (if stdin used as input) 258 * 259 * Assumptions: 260 * When filenames are given, replaces with the compressed version 261 * (.Z suffix) only if the file decreases in size. 262 * Algorithm: 263 * Modified Lempel-Ziv method (LZW). Basically finds common 264 * substrings and replaces them with a variable size code. This is 265 * deterministic, and can be done on the fly. Thus, the decompression 266 * procedure needs no input table, but tracks the way the table was built. 267 */ 268 269 main(argc, argv) 270 int argc; 271 char **argv; 272 { 273 extern int optind; 274 extern char *optarg; 275 struct stat statbuf; 276 int ch, overwrite; 277 char **filelist, **fileptr, *cp, tempname[MAXPATHLEN]; 278 void onintr(), oops(); 279 280 /* This bg check only works for sh. */ 281 if ((oldint = signal(SIGINT, SIG_IGN)) != SIG_IGN) { 282 (void)signal(SIGINT, onintr); 283 (void)signal(SIGSEGV, oops); /* XXX */ 284 } 285 bgnd_flag = oldint != SIG_DFL; 286 287 #ifdef COMPATIBLE 288 nomagic = 1; /* Original didn't have a magic number */ 289 #endif 290 291 if (cp = rindex(argv[0], '/')) 292 ++cp; 293 else 294 cp = argv[0]; 295 if (strcmp(cp, "uncompress") == 0) 296 do_decomp = 1; 297 else if(strcmp(cp, "zcat") == 0) { 298 do_decomp = 1; 299 zcat_flg = 1; 300 } 301 302 /* 303 * -b maxbits => maxbits. 304 * -C => generate output compatible with compress 2.0. 305 * -c => cat all output to stdout 306 * -D => debug 307 * -d => do_decomp 308 * -f => force overwrite of output file 309 * -n => no header: useful to uncompress old files 310 * -V => print Version; debug verbose 311 * -v => unquiet 312 */ 313 314 overwrite = 0; 315 #ifdef DEBUG 316 while ((ch = getopt(argc, argv, "b:CcDdfnVv")) != EOF) 317 #else 318 while ((ch = getopt(argc, argv, "b:Ccdfnv")) != EOF) 319 #endif 320 switch(ch) { 321 case 'b': 322 maxbits = atoi(optarg); 323 break; 324 case 'C': 325 block_compress = 0; 326 break; 327 case 'c': 328 zcat_flg = 1; 329 break; 330 #ifdef DEBUG 331 case 'D': 332 debug = 1; 333 break; 334 #endif 335 case 'd': 336 do_decomp = 1; 337 break; 338 case 'f': 339 overwrite = 1; 340 force = 1; 341 break; 342 case 'n': 343 nomagic = 1; 344 break; 345 case 'q': 346 quiet = 1; 347 break; 348 #ifdef DEBUG 349 case 'V': 350 verbose = 1; 351 break; 352 #endif 353 case 'v': 354 quiet = 0; 355 break; 356 case '?': 357 default: 358 usage(); 359 } 360 argc -= optind; 361 argv += optind; 362 363 if (maxbits < INIT_BITS) 364 maxbits = INIT_BITS; 365 if (maxbits > BITS) 366 maxbits = BITS; 367 maxmaxcode = 1 << maxbits; 368 369 /* Build useless input file list. */ 370 filelist = fileptr = (char **)(malloc(argc * sizeof(*argv))); 371 while (*argv) 372 *fileptr++ = *argv++; 373 *fileptr = NULL; 374 375 if (*filelist != NULL) { 376 for (fileptr = filelist; *fileptr; fileptr++) { 377 exit_stat = 0; 378 if (do_decomp) { /* DECOMPRESSION */ 379 /* Check for .Z suffix */ 380 if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) { 381 /* No .Z: tack one on */ 382 strcpy(tempname, *fileptr); 383 strcat(tempname, ".Z"); 384 *fileptr = tempname; 385 } 386 /* Open input file */ 387 if ((freopen(*fileptr, "r", stdin)) == NULL) { 388 perror(*fileptr); 389 perm_stat = 1; 390 continue; 391 } 392 /* Check the magic number */ 393 if (nomagic == 0) { 394 if ((getchar() != (magic_header[0] & 0xFF)) 395 || (getchar() != (magic_header[1] & 0xFF))) { 396 fprintf(stderr, "%s: not in compressed format\n", 397 *fileptr); 398 continue; 399 } 400 maxbits = getchar(); /* set -b from file */ 401 block_compress = maxbits & BLOCK_MASK; 402 maxbits &= BIT_MASK; 403 maxmaxcode = 1 << maxbits; 404 if(maxbits > BITS) { 405 fprintf(stderr, 406 "%s: compressed with %d bits, can only handle %d bits\n", 407 *fileptr, maxbits, BITS); 408 continue; 409 } 410 } 411 /* Generate output filename */ 412 strcpy(ofname, *fileptr); 413 ofname[strlen(*fileptr) - 2] = '\0'; /* Strip off .Z */ 414 } else { /* COMPRESSION */ 415 if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) { 416 fprintf(stderr, "%s: already has .Z suffix -- no change\n", 417 *fileptr); 418 continue; 419 } 420 /* Open input file */ 421 if ((freopen(*fileptr, "r", stdin)) == NULL) { 422 perror(*fileptr); 423 perm_stat = 1; 424 continue; 425 } 426 stat ( *fileptr, &statbuf ); 427 fsize = (long) statbuf.st_size; 428 /* 429 * tune hash table size for small files -- ad hoc, 430 * but the sizes match earlier #defines, which 431 * serve as upper bounds on the number of output codes. 432 */ 433 hsize = HSIZE; 434 if ( fsize < (1 << 12) ) 435 hsize = MIN ( 5003, HSIZE ); 436 else if ( fsize < (1 << 13) ) 437 hsize = MIN ( 9001, HSIZE ); 438 else if ( fsize < (1 << 14) ) 439 hsize = MIN ( 18013, HSIZE ); 440 else if ( fsize < (1 << 15) ) 441 hsize = MIN ( 35023, HSIZE ); 442 else if ( fsize < 47000 ) 443 hsize = MIN ( 50021, HSIZE ); 444 445 /* Generate output filename */ 446 strcpy(ofname, *fileptr); 447 strcat(ofname, ".Z"); 448 } 449 /* Check for overwrite of existing file */ 450 if (overwrite == 0 && zcat_flg == 0) { 451 if (stat(ofname, &statbuf) == 0) { 452 char response[2]; 453 response[0] = 'n'; 454 fprintf(stderr, "%s already exists;", ofname); 455 if (bgnd_flag == 0 && isatty(2)) { 456 fprintf(stderr, " do you wish to overwrite %s (y or n)? ", 457 ofname); 458 fflush(stderr); 459 read(2, response, 2); 460 while (response[1] != '\n') { 461 if (read(2, response+1, 1) < 0) { /* Ack! */ 462 perror("stderr"); break; 463 } 464 } 465 } 466 if (response[0] != 'y') { 467 fprintf(stderr, "\tnot overwritten\n"); 468 continue; 469 } 470 } 471 } 472 if(zcat_flg == 0) { /* Open output file */ 473 if (freopen(ofname, "w", stdout) == NULL) { 474 perror(ofname); 475 perm_stat = 1; 476 continue; 477 } 478 precious = 0; 479 if(!quiet) 480 fprintf(stderr, "%s: ", *fileptr); 481 } 482 483 /* Actually do the compression/decompression */ 484 if (do_decomp == 0) compress(); 485 #ifndef DEBUG 486 else decompress(); 487 #else 488 else if (debug == 0) decompress(); 489 else printcodes(); 490 if (verbose) dump_tab(); 491 #endif /* DEBUG */ 492 if(zcat_flg == 0) { 493 copystat(*fileptr, ofname); /* Copy stats */ 494 precious = 1; 495 if((exit_stat == 1) || (!quiet)) 496 putc('\n', stderr); 497 } 498 } 499 } else { /* Standard input */ 500 if (do_decomp == 0) { 501 compress(); 502 #ifdef DEBUG 503 if(verbose) dump_tab(); 504 #endif /* DEBUG */ 505 if(!quiet) 506 putc('\n', stderr); 507 } else { 508 /* Check the magic number */ 509 if (nomagic == 0) { 510 if ((getchar()!=(magic_header[0] & 0xFF)) 511 || (getchar()!=(magic_header[1] & 0xFF))) { 512 fprintf(stderr, "stdin: not in compressed format\n"); 513 exit(1); 514 } 515 maxbits = getchar(); /* set -b from file */ 516 block_compress = maxbits & BLOCK_MASK; 517 maxbits &= BIT_MASK; 518 maxmaxcode = 1 << maxbits; 519 fsize = 100000; /* assume stdin large for USERMEM */ 520 if(maxbits > BITS) { 521 fprintf(stderr, 522 "stdin: compressed with %d bits, can only handle %d bits\n", 523 maxbits, BITS); 524 exit(1); 525 } 526 } 527 #ifndef DEBUG 528 decompress(); 529 #else 530 if (debug == 0) decompress(); 531 else printcodes(); 532 if (verbose) dump_tab(); 533 #endif /* DEBUG */ 534 } 535 } 536 exit(perm_stat ? perm_stat : exit_stat); 537 } 538 539 static int offset; 540 long int in_count = 1; /* length of input */ 541 long int bytes_out; /* length of compressed output */ 542 long int out_count = 0; /* # of codes output (for debugging) */ 543 544 /* 545 * compress stdin to stdout 546 * 547 * Algorithm: use open addressing double hashing (no chaining) on the 548 * prefix code / next character combination. We do a variant of Knuth's 549 * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime 550 * secondary probe. Here, the modular division first probe is gives way 551 * to a faster exclusive-or manipulation. Also do block compression with 552 * an adaptive reset, whereby the code table is cleared when the compression 553 * ratio decreases, but after the table fills. The variable-length output 554 * codes are re-sized at this point, and a special CLEAR code is generated 555 * for the decompressor. Late addition: construct the table according to 556 * file size for noticeable speed improvement on small files. Please direct 557 * questions about this implementation to ames!jaw. 558 */ 559 560 compress() 561 { 562 register long fcode; 563 register code_int i = 0; 564 register int c; 565 register code_int ent; 566 register int disp; 567 register code_int hsize_reg; 568 register int hshift; 569 570 #ifndef COMPATIBLE 571 if (nomagic == 0) { 572 putchar(magic_header[0]); 573 putchar(magic_header[1]); 574 putchar((char)(maxbits | block_compress)); 575 if(ferror(stdout)) 576 writeerr(); 577 } 578 #endif /* COMPATIBLE */ 579 580 offset = 0; 581 bytes_out = 3; /* includes 3-byte header mojo */ 582 out_count = 0; 583 clear_flg = 0; 584 ratio = 0; 585 in_count = 1; 586 checkpoint = CHECK_GAP; 587 maxcode = MAXCODE(n_bits = INIT_BITS); 588 free_ent = ((block_compress) ? FIRST : 256 ); 589 590 ent = getchar (); 591 592 hshift = 0; 593 for ( fcode = (long) hsize; fcode < 65536L; fcode *= 2L ) 594 hshift++; 595 hshift = 8 - hshift; /* set hash code range bound */ 596 597 hsize_reg = hsize; 598 cl_hash( (count_int) hsize_reg); /* clear hash table */ 599 600 #ifdef SIGNED_COMPARE_SLOW 601 while ( (c = getchar()) != (unsigned) EOF ) { 602 #else 603 while ( (c = getchar()) != EOF ) { 604 #endif 605 in_count++; 606 fcode = (long) (((long) c << maxbits) + ent); 607 i = ((c << hshift) ^ ent); /* xor hashing */ 608 609 if ( htabof (i) == fcode ) { 610 ent = codetabof (i); 611 continue; 612 } else if ( (long)htabof (i) < 0 ) /* empty slot */ 613 goto nomatch; 614 disp = hsize_reg - i; /* secondary hash (after G. Knott) */ 615 if ( i == 0 ) 616 disp = 1; 617 probe: 618 if ( (i -= disp) < 0 ) 619 i += hsize_reg; 620 621 if ( htabof (i) == fcode ) { 622 ent = codetabof (i); 623 continue; 624 } 625 if ( (long)htabof (i) > 0 ) 626 goto probe; 627 nomatch: 628 output ( (code_int) ent ); 629 out_count++; 630 ent = c; 631 #ifdef SIGNED_COMPARE_SLOW 632 if ( (unsigned) free_ent < (unsigned) maxmaxcode) { 633 #else 634 if ( free_ent < maxmaxcode ) { 635 #endif 636 codetabof (i) = free_ent++; /* code -> hashtable */ 637 htabof (i) = fcode; 638 } 639 else if ( (count_int)in_count >= checkpoint && block_compress ) 640 cl_block (); 641 } 642 /* 643 * Put out the final code. 644 */ 645 output( (code_int)ent ); 646 out_count++; 647 output( (code_int)-1 ); 648 649 /* 650 * Print out stats on stderr 651 */ 652 if(zcat_flg == 0 && !quiet) { 653 #ifdef DEBUG 654 fprintf( stderr, 655 "%ld chars in, %ld codes (%ld bytes) out, compression factor: ", 656 in_count, out_count, bytes_out ); 657 prratio( stderr, in_count, bytes_out ); 658 fprintf( stderr, "\n"); 659 fprintf( stderr, "\tCompression as in compact: " ); 660 prratio( stderr, in_count-bytes_out, in_count ); 661 fprintf( stderr, "\n"); 662 fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n", 663 free_ent - 1, n_bits ); 664 #else /* !DEBUG */ 665 fprintf( stderr, "Compression: " ); 666 prratio( stderr, in_count-bytes_out, in_count ); 667 #endif /* DEBUG */ 668 } 669 if(bytes_out > in_count) /* exit(2) if no savings */ 670 exit_stat = 2; 671 return; 672 } 673 674 /*- 675 * Output the given code. 676 * Inputs: 677 * code: A n_bits-bit integer. If == -1, then EOF. This assumes 678 * that n_bits =< (long)wordsize - 1. 679 * Outputs: 680 * Outputs code to the file. 681 * Assumptions: 682 * Chars are 8 bits long. 683 * Algorithm: 684 * Maintain a BITS character long buffer (so that 8 codes will 685 * fit in it exactly). Use the VAX insv instruction to insert each 686 * code in turn. When the buffer fills up empty it and start over. 687 */ 688 689 static char buf[BITS]; 690 691 #ifndef vax 692 char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; 693 char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; 694 #endif /* vax */ 695 696 output( code ) 697 code_int code; 698 { 699 #ifdef DEBUG 700 static int col = 0; 701 #endif /* DEBUG */ 702 703 /* 704 * On the VAX, it is important to have the register declarations 705 * in exactly the order given, or the asm will break. 706 */ 707 register int r_off = offset, bits= n_bits; 708 register char * bp = buf; 709 710 #ifdef DEBUG 711 if ( verbose ) 712 fprintf( stderr, "%5d%c", code, 713 (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); 714 #endif /* DEBUG */ 715 if ( code >= 0 ) { 716 #if defined(vax) && !defined(__GNUC__) 717 /* 718 * VAX and PCC DEPENDENT!! Implementation on other machines is 719 * below. 720 * 721 * Translation: Insert BITS bits from the argument starting at 722 * offset bits from the beginning of buf. 723 */ 724 0; /* Work around for pcc -O bug with asm and if stmt */ 725 asm( "insv 4(ap),r11,r10,(r9)" ); 726 #else 727 /* 728 * byte/bit numbering on the VAX is simulated by the following code 729 */ 730 /* 731 * Get to the first byte. 732 */ 733 bp += (r_off >> 3); 734 r_off &= 7; 735 /* 736 * Since code is always >= 8 bits, only need to mask the first 737 * hunk on the left. 738 */ 739 *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off]; 740 bp++; 741 bits -= (8 - r_off); 742 code >>= 8 - r_off; 743 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 744 if ( bits >= 8 ) { 745 *bp++ = code; 746 code >>= 8; 747 bits -= 8; 748 } 749 /* Last bits. */ 750 if(bits) 751 *bp = code; 752 #endif /* vax */ 753 offset += n_bits; 754 if ( offset == (n_bits << 3) ) { 755 bp = buf; 756 bits = n_bits; 757 bytes_out += bits; 758 do { 759 putchar(*bp++); 760 if (ferror(stdout)) 761 writeerr(); 762 } while(--bits); 763 offset = 0; 764 } 765 766 /* 767 * If the next entry is going to be too big for the code size, 768 * then increase it, if possible. 769 */ 770 if ( free_ent > maxcode || (clear_flg > 0)) 771 { 772 /* 773 * Write the whole buffer, because the input side won't 774 * discover the size increase until after it has read it. 775 */ 776 if ( offset > 0 ) { 777 if( fwrite( buf, 1, n_bits, stdout ) != n_bits) 778 writeerr(); 779 bytes_out += n_bits; 780 } 781 offset = 0; 782 783 if ( clear_flg ) { 784 maxcode = MAXCODE (n_bits = INIT_BITS); 785 clear_flg = 0; 786 } 787 else { 788 n_bits++; 789 if ( n_bits == maxbits ) 790 maxcode = maxmaxcode; 791 else 792 maxcode = MAXCODE(n_bits); 793 } 794 #ifdef DEBUG 795 if ( debug ) { 796 fprintf( stderr, "\nChange to %d bits\n", n_bits ); 797 col = 0; 798 } 799 #endif /* DEBUG */ 800 } 801 } else { 802 /* 803 * At EOF, write the rest of the buffer. 804 */ 805 if ( offset > 0 ) { 806 offset = (offset + 7) / 8; 807 if( fwrite( buf, 1, offset, stdout ) != offset ) 808 writeerr(); 809 bytes_out += offset; 810 } 811 offset = 0; 812 (void)fflush( stdout ); 813 if( ferror( stdout ) ) 814 writeerr(); 815 #ifdef DEBUG 816 if ( verbose ) 817 fprintf( stderr, "\n" ); 818 #endif 819 } 820 } 821 822 /* 823 * Decompress stdin to stdout. This routine adapts to the codes in the 824 * file building the "string" table on-the-fly; requiring no table to 825 * be stored in the compressed file. The tables used herein are shared 826 * with those of the compress() routine. See the definitions above. 827 */ 828 829 decompress() { 830 register char_type *stackp; 831 register int finchar; 832 register code_int code, oldcode, incode; 833 int n, nwritten, offset; /* Variables for buffered write */ 834 char buff[BUFSIZ]; /* Buffer for buffered write */ 835 836 837 /* 838 * As above, initialize the first 256 entries in the table. 839 */ 840 maxcode = MAXCODE(n_bits = INIT_BITS); 841 for ( code = 255; code >= 0; code-- ) { 842 tab_prefixof(code) = 0; 843 tab_suffixof(code) = (char_type)code; 844 } 845 free_ent = ((block_compress) ? FIRST : 256 ); 846 847 finchar = oldcode = getcode(); 848 if(oldcode == -1) /* EOF already? */ 849 return; /* Get out of here */ 850 851 /* first code must be 8 bits = char */ 852 n=0; 853 buff[n++] = (char)finchar; 854 855 stackp = de_stack; 856 857 while ( (code = getcode()) > -1 ) { 858 859 if ( (code == CLEAR) && block_compress ) { 860 for ( code = 255; code >= 0; code-- ) 861 tab_prefixof(code) = 0; 862 clear_flg = 1; 863 free_ent = FIRST - 1; 864 if ( (code = getcode ()) == -1 ) /* O, untimely death! */ 865 break; 866 } 867 incode = code; 868 /* 869 * Special case for KwKwK string. 870 */ 871 if ( code >= free_ent ) { 872 *stackp++ = finchar; 873 code = oldcode; 874 } 875 876 /* 877 * Generate output characters in reverse order 878 */ 879 #ifdef SIGNED_COMPARE_SLOW 880 while ( ((unsigned long)code) >= ((unsigned long)256) ) { 881 #else 882 while ( code >= 256 ) { 883 #endif 884 *stackp++ = tab_suffixof(code); 885 code = tab_prefixof(code); 886 } 887 *stackp++ = finchar = tab_suffixof(code); 888 889 /* 890 * And put them out in forward order 891 */ 892 do { 893 /* 894 * About 60% of the time is spent in the putchar() call 895 * that appeared here. It was originally 896 * putchar ( *--stackp ); 897 * If we buffer the writes ourselves, we can go faster (about 898 * 30%). 899 * 900 * At this point, the next line is the next *big* time 901 * sink in the code. It takes up about 10% of the time. 902 */ 903 buff[n++] = *--stackp; 904 if (n == BUFSIZ) { 905 offset = 0; 906 do { 907 nwritten = write(fileno(stdout), &buff[offset], n); 908 if (nwritten < 0) 909 writeerr(); 910 offset += nwritten; 911 } while ((n -= nwritten) > 0); 912 } 913 } while ( stackp > de_stack ); 914 915 /* 916 * Generate the new entry. 917 */ 918 if ( (code=free_ent) < maxmaxcode ) { 919 tab_prefixof(code) = (unsigned short)oldcode; 920 tab_suffixof(code) = finchar; 921 free_ent = code+1; 922 } 923 /* 924 * Remember previous code. 925 */ 926 oldcode = incode; 927 } 928 /* 929 * Flush the stuff remaining in our buffer... 930 */ 931 offset = 0; 932 while (n > 0) { 933 nwritten = write(fileno(stdout), &buff[offset], n); 934 if (nwritten < 0) 935 writeerr(); 936 offset += nwritten; 937 n -= nwritten; 938 } 939 } 940 941 /*- 942 * Read one code from the standard input. If EOF, return -1. 943 * Inputs: 944 * stdin 945 * Outputs: 946 * code or -1 is returned. 947 */ 948 code_int 949 getcode() { 950 /* 951 * On the VAX, it is important to have the register declarations 952 * in exactly the order given, or the asm will break. 953 */ 954 register code_int code; 955 static int offset = 0, size = 0; 956 static char_type buf[BITS]; 957 register int r_off, bits; 958 register char_type *bp = buf; 959 960 if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) { 961 /* 962 * If the next entry will be too big for the current code 963 * size, then we must increase the size. This implies reading 964 * a new buffer full, too. 965 */ 966 if ( free_ent > maxcode ) { 967 n_bits++; 968 if ( n_bits == maxbits ) 969 maxcode = maxmaxcode; /* won't get any bigger now */ 970 else 971 maxcode = MAXCODE(n_bits); 972 } 973 if ( clear_flg > 0) { 974 maxcode = MAXCODE (n_bits = INIT_BITS); 975 clear_flg = 0; 976 } 977 size = fread( buf, 1, n_bits, stdin ); 978 if ( size <= 0 ) 979 return -1; /* end of file */ 980 offset = 0; 981 /* Round size down to integral number of codes */ 982 size = (size << 3) - (n_bits - 1); 983 } 984 r_off = offset; 985 bits = n_bits; 986 #ifdef vax 987 asm( "extzv r10,r9,(r8),r11" ); 988 #else /* not a vax */ 989 /* 990 * Get to the first byte. 991 */ 992 bp += (r_off >> 3); 993 r_off &= 7; 994 /* Get first part (low order bits) */ 995 #ifdef NO_UCHAR 996 code = ((*bp++ >> r_off) & rmask[8 - r_off]) & 0xff; 997 #else 998 code = (*bp++ >> r_off); 999 #endif /* NO_UCHAR */ 1000 bits -= (8 - r_off); 1001 r_off = 8 - r_off; /* now, offset into code word */ 1002 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 1003 if ( bits >= 8 ) { 1004 #ifdef NO_UCHAR 1005 code |= (*bp++ & 0xff) << r_off; 1006 #else 1007 code |= *bp++ << r_off; 1008 #endif /* NO_UCHAR */ 1009 r_off += 8; 1010 bits -= 8; 1011 } 1012 /* high order bits. */ 1013 code |= (*bp & rmask[bits]) << r_off; 1014 #endif /* vax */ 1015 offset += n_bits; 1016 1017 return code; 1018 } 1019 1020 #ifdef DEBUG 1021 printcodes() 1022 { 1023 /* 1024 * Just print out codes from input file. For debugging. 1025 */ 1026 code_int code; 1027 int col = 0, bits; 1028 1029 bits = n_bits = INIT_BITS; 1030 maxcode = MAXCODE(n_bits); 1031 free_ent = ((block_compress) ? FIRST : 256 ); 1032 while ( ( code = getcode() ) >= 0 ) { 1033 if ( (code == CLEAR) && block_compress ) { 1034 free_ent = FIRST - 1; 1035 clear_flg = 1; 1036 } 1037 else if ( free_ent < maxmaxcode ) 1038 free_ent++; 1039 if ( bits != n_bits ) { 1040 fprintf(stderr, "\nChange to %d bits\n", n_bits ); 1041 bits = n_bits; 1042 col = 0; 1043 } 1044 fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); 1045 } 1046 putc( '\n', stderr ); 1047 exit( 0 ); 1048 } 1049 1050 code_int sorttab[1<<BITS]; /* sorted pointers into htab */ 1051 1052 dump_tab() /* dump string table */ 1053 { 1054 register int i, first; 1055 register ent; 1056 #define STACK_SIZE 15000 1057 int stack_top = STACK_SIZE; 1058 register c; 1059 1060 if(do_decomp == 0) { /* compressing */ 1061 register int flag = 1; 1062 1063 for(i=0; i<hsize; i++) { /* build sort pointers */ 1064 if((long)htabof(i) >= 0) { 1065 sorttab[codetabof(i)] = i; 1066 } 1067 } 1068 first = block_compress ? FIRST : 256; 1069 for(i = first; i < free_ent; i++) { 1070 fprintf(stderr, "%5d: \"", i); 1071 de_stack[--stack_top] = '\n'; 1072 de_stack[--stack_top] = '"'; 1073 stack_top = in_stack((htabof(sorttab[i])>>maxbits)&0xff, 1074 stack_top); 1075 for(ent=htabof(sorttab[i]) & ((1<<maxbits)-1); 1076 ent > 256; 1077 ent=htabof(sorttab[ent]) & ((1<<maxbits)-1)) { 1078 stack_top = in_stack(htabof(sorttab[ent]) >> maxbits, 1079 stack_top); 1080 } 1081 stack_top = in_stack(ent, stack_top); 1082 fwrite( &de_stack[stack_top], 1, STACK_SIZE-stack_top, stderr); 1083 stack_top = STACK_SIZE; 1084 } 1085 } else if(!debug) { /* decompressing */ 1086 1087 for ( i = 0; i < free_ent; i++ ) { 1088 ent = i; 1089 c = tab_suffixof(ent); 1090 if ( isascii(c) && isprint(c) ) 1091 fprintf( stderr, "%5d: %5d/'%c' \"", 1092 ent, tab_prefixof(ent), c ); 1093 else 1094 fprintf( stderr, "%5d: %5d/\\%03o \"", 1095 ent, tab_prefixof(ent), c ); 1096 de_stack[--stack_top] = '\n'; 1097 de_stack[--stack_top] = '"'; 1098 for ( ; ent != NULL; 1099 ent = (ent >= FIRST ? tab_prefixof(ent) : NULL) ) { 1100 stack_top = in_stack(tab_suffixof(ent), stack_top); 1101 } 1102 fwrite( &de_stack[stack_top], 1, STACK_SIZE - stack_top, stderr ); 1103 stack_top = STACK_SIZE; 1104 } 1105 } 1106 } 1107 1108 int 1109 in_stack(c, stack_top) 1110 register c, stack_top; 1111 { 1112 if ( (isascii(c) && isprint(c) && c != '\\') || c == ' ' ) { 1113 de_stack[--stack_top] = c; 1114 } else { 1115 switch( c ) { 1116 case '\n': de_stack[--stack_top] = 'n'; break; 1117 case '\t': de_stack[--stack_top] = 't'; break; 1118 case '\b': de_stack[--stack_top] = 'b'; break; 1119 case '\f': de_stack[--stack_top] = 'f'; break; 1120 case '\r': de_stack[--stack_top] = 'r'; break; 1121 case '\\': de_stack[--stack_top] = '\\'; break; 1122 default: 1123 de_stack[--stack_top] = '0' + c % 8; 1124 de_stack[--stack_top] = '0' + (c / 8) % 8; 1125 de_stack[--stack_top] = '0' + c / 64; 1126 break; 1127 } 1128 de_stack[--stack_top] = '\\'; 1129 } 1130 return stack_top; 1131 } 1132 #endif /* DEBUG */ 1133 1134 writeerr() 1135 { 1136 (void)fprintf(stderr, "compress: %s: %s\n", 1137 ofname[0] ? ofname : "stdout", strerror(errno)); 1138 (void)unlink(ofname); 1139 exit(1); 1140 } 1141 1142 copystat(ifname, ofname) 1143 char *ifname, *ofname; 1144 { 1145 struct stat statbuf; 1146 int mode; 1147 struct utimbuf tp; 1148 1149 fclose(stdout); 1150 if (stat(ifname, &statbuf)) { /* Get stat on input file */ 1151 perror(ifname); 1152 return; 1153 } 1154 if ((statbuf.st_mode & S_IFMT/*0170000*/) != S_IFREG/*0100000*/) { 1155 if(quiet) 1156 fprintf(stderr, "%s: ", ifname); 1157 fprintf(stderr, " -- not a regular file: unchanged"); 1158 exit_stat = 1; 1159 perm_stat = 1; 1160 } else if (statbuf.st_nlink > 1) { 1161 if(quiet) 1162 fprintf(stderr, "%s: ", ifname); 1163 fprintf(stderr, " -- has %d other links: unchanged", 1164 statbuf.st_nlink - 1); 1165 exit_stat = 1; 1166 perm_stat = 1; 1167 } else if (exit_stat == 2 && (!force)) { /* No compression: remove file.Z */ 1168 if(!quiet) 1169 fprintf(stderr, " -- file unchanged"); 1170 } else { /* ***** Successful Compression ***** */ 1171 exit_stat = 0; 1172 mode = statbuf.st_mode & 07777; 1173 if (chmod(ofname, mode)) /* Copy modes */ 1174 perror(ofname); 1175 chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */ 1176 tp.actime = statbuf.st_atime; 1177 tp.modtime = statbuf.st_mtime; 1178 utime(ofname, &tp); /* Update last accessed and modified times */ 1179 if (unlink(ifname)) /* Remove input file */ 1180 perror(ifname); 1181 if(!quiet) 1182 fprintf(stderr, " -- replaced with %s", ofname); 1183 return; /* Successful return */ 1184 } 1185 1186 /* Unsuccessful return -- one of the tests failed */ 1187 if (unlink(ofname)) 1188 perror(ofname); 1189 } 1190 1191 void 1192 onintr ( ) 1193 { 1194 if (!precious) 1195 unlink ( ofname ); 1196 exit ( 1 ); 1197 } 1198 1199 void 1200 oops ( ) /* wild pointer -- assume bad input */ 1201 { 1202 if ( do_decomp ) 1203 fprintf ( stderr, "uncompress: corrupt input\n" ); 1204 unlink ( ofname ); 1205 exit ( 1 ); 1206 } 1207 1208 cl_block () /* table clear for block compress */ 1209 { 1210 register long int rat; 1211 1212 checkpoint = in_count + CHECK_GAP; 1213 #ifdef DEBUG 1214 if ( debug ) { 1215 fprintf ( stderr, "count: %ld, ratio: ", in_count ); 1216 prratio ( stderr, in_count, bytes_out ); 1217 fprintf ( stderr, "\n"); 1218 } 1219 #endif /* DEBUG */ 1220 1221 if(in_count > 0x007fffff) { /* shift will overflow */ 1222 rat = bytes_out >> 8; 1223 if(rat == 0) { /* Don't divide by zero */ 1224 rat = 0x7fffffff; 1225 } else { 1226 rat = in_count / rat; 1227 } 1228 } else { 1229 rat = (in_count << 8) / bytes_out; /* 8 fractional bits */ 1230 } 1231 if ( rat > ratio ) { 1232 ratio = rat; 1233 } else { 1234 ratio = 0; 1235 #ifdef DEBUG 1236 if(verbose) 1237 dump_tab(); /* dump string table */ 1238 #endif 1239 cl_hash ( (count_int) hsize ); 1240 free_ent = FIRST; 1241 clear_flg = 1; 1242 output ( (code_int) CLEAR ); 1243 #ifdef DEBUG 1244 if(debug) 1245 fprintf ( stderr, "clear\n" ); 1246 #endif /* DEBUG */ 1247 } 1248 } 1249 1250 cl_hash(hsize) /* reset code table */ 1251 register count_int hsize; 1252 { 1253 register count_int *htab_p = htab+hsize; 1254 register long i; 1255 register long m1 = -1; 1256 1257 i = hsize - 16; 1258 do { /* might use Sys V memset(3) here */ 1259 *(htab_p-16) = m1; 1260 *(htab_p-15) = m1; 1261 *(htab_p-14) = m1; 1262 *(htab_p-13) = m1; 1263 *(htab_p-12) = m1; 1264 *(htab_p-11) = m1; 1265 *(htab_p-10) = m1; 1266 *(htab_p-9) = m1; 1267 *(htab_p-8) = m1; 1268 *(htab_p-7) = m1; 1269 *(htab_p-6) = m1; 1270 *(htab_p-5) = m1; 1271 *(htab_p-4) = m1; 1272 *(htab_p-3) = m1; 1273 *(htab_p-2) = m1; 1274 *(htab_p-1) = m1; 1275 htab_p -= 16; 1276 } while ((i -= 16) >= 0); 1277 for ( i += 16; i > 0; i-- ) 1278 *--htab_p = m1; 1279 } 1280 1281 prratio(stream, num, den) 1282 FILE *stream; 1283 long int num, den; 1284 { 1285 register int q; /* Doesn't need to be long */ 1286 1287 if(num > 214748L) { /* 2147483647/10000 */ 1288 q = num / (den / 10000L); 1289 } else { 1290 q = 10000L * num / den; /* Long calculations, though */ 1291 } 1292 if (q < 0) { 1293 putc('-', stream); 1294 q = -q; 1295 } 1296 fprintf(stream, "%d.%02d%%", q / 100, q % 100); 1297 } 1298 1299 usage() 1300 { 1301 (void)fprintf(stderr, 1302 #ifdef DEBUG 1303 "compress [-CDVcdfnv] [-b maxbits] [file ...]\n"); 1304 #else 1305 "compress [-Ccdfnv] [-b maxbits] [file ...]\n"); 1306 #endif 1307 exit(1); 1308 } 1309