1 /* 2 * Copyright (c) 1985, 1986 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * James A. Woods, derived from original work by Spencer Thomas 7 * and Joseph Orost. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #ifndef lint 39 char copyright[] = 40 "@(#) Copyright (c) 1985, 1986 The Regents of the University of California.\n\ 41 All rights reserved.\n"; 42 #endif /* not lint */ 43 44 #ifndef lint 45 static char sccsid[] = "@(#)compress.c 5.19 (Berkeley) 3/18/91"; 46 #endif /* not lint */ 47 48 /* 49 * compress.c - File compression ala IEEE Computer, June 1984. 50 * 51 * Authors: Spencer W. Thomas (decvax!utah-cs!thomas) 52 * Jim McKie (decvax!mcvax!jim) 53 * Steve Davies (decvax!vax135!petsd!peora!srd) 54 * Ken Turkowski (decvax!decwrl!turtlevax!ken) 55 * James A. Woods (decvax!ihnp4!ames!jaw) 56 * Joe Orost (decvax!vax135!petsd!joe) 57 */ 58 59 #include <sys/param.h> 60 #include <sys/stat.h> 61 #include <signal.h> 62 #include <utime.h> 63 #include <errno.h> 64 #include <unistd.h> 65 #include <stdio.h> 66 #include <ctype.h> 67 #include <stdlib.h> 68 #include <string.h> 69 70 /* 71 * Set USERMEM to the maximum amount of physical user memory available 72 * in bytes. USERMEM is used to determine the maximum BITS that can be used 73 * for compression. 74 * 75 * SACREDMEM is the amount of physical memory saved for others; compress 76 * will hog the rest. 77 */ 78 #ifndef SACREDMEM 79 #define SACREDMEM 0 80 #endif 81 82 #ifndef USERMEM 83 # define USERMEM 450000 /* default user memory */ 84 #endif 85 86 #ifdef pdp11 87 # define BITS 12 /* max bits/code for 16-bit machine */ 88 # define NO_UCHAR /* also if "unsigned char" functions as signed char */ 89 # undef USERMEM 90 #endif /* pdp11 */ /* don't forget to compile with -i */ 91 92 #ifdef USERMEM 93 # if USERMEM >= (433484+SACREDMEM) 94 # define PBITS 16 95 # else 96 # if USERMEM >= (229600+SACREDMEM) 97 # define PBITS 15 98 # else 99 # if USERMEM >= (127536+SACREDMEM) 100 # define PBITS 14 101 # else 102 # if USERMEM >= (73464+SACREDMEM) 103 # define PBITS 13 104 # else 105 # define PBITS 12 106 # endif 107 # endif 108 # endif 109 # endif 110 # undef USERMEM 111 #endif /* USERMEM */ 112 113 #ifdef PBITS /* Preferred BITS for this memory size */ 114 # ifndef BITS 115 # define BITS PBITS 116 # endif BITS 117 #endif /* PBITS */ 118 119 #if BITS == 16 120 # define HSIZE 69001 /* 95% occupancy */ 121 #endif 122 #if BITS == 15 123 # define HSIZE 35023 /* 94% occupancy */ 124 #endif 125 #if BITS == 14 126 # define HSIZE 18013 /* 91% occupancy */ 127 #endif 128 #if BITS == 13 129 # define HSIZE 9001 /* 91% occupancy */ 130 #endif 131 #if BITS <= 12 132 # define HSIZE 5003 /* 80% occupancy */ 133 #endif 134 135 /* 136 * a code_int must be able to hold 2**BITS values of type int, and also -1 137 */ 138 #if BITS > 15 139 typedef long int code_int; 140 #else 141 typedef int code_int; 142 #endif 143 144 #ifdef SIGNED_COMPARE_SLOW 145 typedef unsigned long int count_int; 146 typedef unsigned short int count_short; 147 #else 148 typedef long int count_int; 149 #endif 150 151 #ifdef NO_UCHAR 152 typedef char char_type; 153 #else 154 typedef unsigned char char_type; 155 #endif /* UCHAR */ 156 char_type magic_header[] = { "\037\235" }; /* 1F 9D */ 157 158 /* Defines for third byte of header */ 159 #define BIT_MASK 0x1f 160 #define BLOCK_MASK 0x80 161 /* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is 162 a fourth header byte (for expansion). 163 */ 164 #define INIT_BITS 9 /* initial number of bits/code */ 165 166 int n_bits; /* number of bits/code */ 167 int maxbits = BITS; /* user settable max # bits/code */ 168 code_int maxcode; /* maximum code, given n_bits */ 169 code_int maxmaxcode = 1 << BITS; /* should NEVER generate this code */ 170 #ifdef COMPATIBLE /* But wrong! */ 171 # define MAXCODE(n_bits) (1 << (n_bits) - 1) 172 #else 173 # define MAXCODE(n_bits) ((1 << (n_bits)) - 1) 174 #endif /* COMPATIBLE */ 175 176 count_int htab [HSIZE]; 177 unsigned short codetab [HSIZE]; 178 179 #define htabof(i) htab[i] 180 #define codetabof(i) codetab[i] 181 code_int hsize = HSIZE; /* for dynamic table sizing */ 182 count_int fsize; 183 184 /* 185 * To save much memory, we overlay the table used by compress() with those 186 * used by decompress(). The tab_prefix table is the same size and type 187 * as the codetab. The tab_suffix table needs 2**BITS characters. We 188 * get this from the beginning of htab. The output stack uses the rest 189 * of htab, and contains characters. There is plenty of room for any 190 * possible stack (stack used to be 8000 characters). 191 */ 192 193 #define tab_prefixof(i) codetabof(i) 194 # define tab_suffixof(i) ((char_type *)(htab))[i] 195 # define de_stack ((char_type *)&tab_suffixof(1<<BITS)) 196 197 code_int free_ent = 0; /* first unused entry */ 198 int exit_stat = 0; /* per-file status */ 199 int perm_stat = 0; /* permanent status */ 200 201 code_int getcode(); 202 203 int nomagic = 0; /* Use a 3-byte magic number header, unless old file */ 204 int zcat_flg = 0; /* Write output on stdout, suppress messages */ 205 int precious = 1; /* Don't unlink output file on interrupt */ 206 int quiet = 1; /* don't tell me about compression */ 207 208 /* 209 * block compression parameters -- after all codes are used up, 210 * and compression rate changes, start over. 211 */ 212 int block_compress = BLOCK_MASK; 213 int clear_flg = 0; 214 long int ratio = 0; 215 #define CHECK_GAP 10000 /* ratio check interval */ 216 count_int checkpoint = CHECK_GAP; 217 /* 218 * the next two codes should not be changed lightly, as they must not 219 * lie within the contiguous general code space. 220 */ 221 #define FIRST 257 /* first free entry */ 222 #define CLEAR 256 /* table clear output code */ 223 224 int force = 0; 225 char ofname [MAXPATHLEN]; 226 #ifdef DEBUG 227 int debug, verbose; 228 #endif 229 sig_t oldint; 230 int bgnd_flag; 231 232 int do_decomp = 0; 233 234 /*- 235 * Algorithm from "A Technique for High Performance Data Compression", 236 * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. 237 * 238 * Usage: compress [-dfvc] [-b bits] [file ...] 239 * Inputs: 240 * -d: If given, decompression is done instead. 241 * 242 * -c: Write output on stdout, don't remove original. 243 * 244 * -b: Parameter limits the max number of bits/code. 245 * 246 * -f: Forces output file to be generated, even if one already 247 * exists, and even if no space is saved by compressing. 248 * If -f is not used, the user will be prompted if stdin is 249 * a tty, otherwise, the output file will not be overwritten. 250 * 251 * -v: Write compression statistics 252 * 253 * file ...: Files to be compressed. If none specified, stdin 254 * is used. 255 * Outputs: 256 * file.Z: Compressed form of file with same mode, owner, and utimes 257 * or stdout (if stdin used as input) 258 * 259 * Assumptions: 260 * When filenames are given, replaces with the compressed version 261 * (.Z suffix) only if the file decreases in size. 262 * Algorithm: 263 * Modified Lempel-Ziv method (LZW). Basically finds common 264 * substrings and replaces them with a variable size code. This is 265 * deterministic, and can be done on the fly. Thus, the decompression 266 * procedure needs no input table, but tracks the way the table was built. 267 */ 268 269 main(argc, argv) 270 int argc; 271 char **argv; 272 { 273 extern int optind; 274 extern char *optarg; 275 struct stat statbuf; 276 int ch, overwrite; 277 char **filelist, **fileptr, *cp, tempname[MAXPATHLEN]; 278 void onintr(), oops(); 279 280 /* This bg check only works for sh. */ 281 if ((oldint = signal(SIGINT, SIG_IGN)) != SIG_IGN) { 282 (void)signal(SIGINT, onintr); 283 (void)signal(SIGSEGV, oops); /* XXX */ 284 } 285 bgnd_flag = oldint != SIG_DFL; 286 287 #ifdef COMPATIBLE 288 nomagic = 1; /* Original didn't have a magic number */ 289 #endif 290 291 if (cp = rindex(argv[0], '/')) 292 ++cp; 293 else 294 cp = argv[0]; 295 if (strcmp(cp, "uncompress") == 0) 296 do_decomp = 1; 297 else if(strcmp(cp, "zcat") == 0) { 298 do_decomp = 1; 299 zcat_flg = 1; 300 } 301 302 /* 303 * -b maxbits => maxbits. 304 * -C => generate output compatible with compress 2.0. 305 * -c => cat all output to stdout 306 * -D => debug 307 * -d => do_decomp 308 * -f => force overwrite of output file 309 * -n => no header: useful to uncompress old files 310 * -V => print Version; debug verbose 311 * -v => unquiet 312 */ 313 314 overwrite = 0; 315 #ifdef DEBUG 316 while ((ch = getopt(argc, argv, "b:CcDdfnVv")) != EOF) 317 #else 318 while ((ch = getopt(argc, argv, "b:Ccdfnv")) != EOF) 319 #endif 320 switch(ch) { 321 case 'b': 322 maxbits = atoi(optarg); 323 break; 324 case 'C': 325 block_compress = 0; 326 break; 327 case 'c': 328 zcat_flg = 1; 329 break; 330 #ifdef DEBUG 331 case 'D': 332 debug = 1; 333 break; 334 #endif 335 case 'd': 336 do_decomp = 1; 337 break; 338 case 'f': 339 overwrite = 1; 340 force = 1; 341 break; 342 case 'n': 343 nomagic = 1; 344 break; 345 case 'q': 346 quiet = 1; 347 break; 348 #ifdef DEBUG 349 case 'V': 350 verbose = 1; 351 break; 352 #endif 353 case 'v': 354 quiet = 0; 355 break; 356 case '?': 357 default: 358 usage(); 359 } 360 argc -= optind; 361 argv += optind; 362 363 if (maxbits < INIT_BITS) 364 maxbits = INIT_BITS; 365 if (maxbits > BITS) 366 maxbits = BITS; 367 maxmaxcode = 1 << maxbits; 368 369 filelist = fileptr = argv; 370 371 if (*filelist != NULL) { 372 for (fileptr = filelist; *fileptr; fileptr++) { 373 exit_stat = 0; 374 if (do_decomp) { /* DECOMPRESSION */ 375 /* Check for .Z suffix */ 376 if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) { 377 /* No .Z: tack one on */ 378 strcpy(tempname, *fileptr); 379 strcat(tempname, ".Z"); 380 *fileptr = tempname; 381 } 382 /* Open input file */ 383 if ((freopen(*fileptr, "r", stdin)) == NULL) { 384 perror(*fileptr); 385 perm_stat = 1; 386 continue; 387 } 388 /* Check the magic number */ 389 if (nomagic == 0) { 390 if ((getchar() != (magic_header[0] & 0xFF)) 391 || (getchar() != (magic_header[1] & 0xFF))) { 392 fprintf(stderr, "%s: not in compressed format\n", 393 *fileptr); 394 continue; 395 } 396 maxbits = getchar(); /* set -b from file */ 397 block_compress = maxbits & BLOCK_MASK; 398 maxbits &= BIT_MASK; 399 maxmaxcode = 1 << maxbits; 400 if(maxbits > BITS) { 401 fprintf(stderr, 402 "%s: compressed with %d bits, can only handle %d bits\n", 403 *fileptr, maxbits, BITS); 404 continue; 405 } 406 } 407 /* Generate output filename */ 408 strcpy(ofname, *fileptr); 409 ofname[strlen(*fileptr) - 2] = '\0'; /* Strip off .Z */ 410 } else { /* COMPRESSION */ 411 if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) { 412 fprintf(stderr, "%s: already has .Z suffix -- no change\n", 413 *fileptr); 414 continue; 415 } 416 /* Open input file */ 417 if ((freopen(*fileptr, "r", stdin)) == NULL) { 418 perror(*fileptr); 419 perm_stat = 1; 420 continue; 421 } 422 stat ( *fileptr, &statbuf ); 423 fsize = (long) statbuf.st_size; 424 /* 425 * tune hash table size for small files -- ad hoc, 426 * but the sizes match earlier #defines, which 427 * serve as upper bounds on the number of output codes. 428 */ 429 hsize = HSIZE; 430 if ( fsize < (1 << 12) ) 431 hsize = MIN ( 5003, HSIZE ); 432 else if ( fsize < (1 << 13) ) 433 hsize = MIN ( 9001, HSIZE ); 434 else if ( fsize < (1 << 14) ) 435 hsize = MIN ( 18013, HSIZE ); 436 else if ( fsize < (1 << 15) ) 437 hsize = MIN ( 35023, HSIZE ); 438 else if ( fsize < 47000 ) 439 hsize = MIN ( 50021, HSIZE ); 440 441 /* Generate output filename */ 442 strcpy(ofname, *fileptr); 443 strcat(ofname, ".Z"); 444 } 445 /* Check for overwrite of existing file */ 446 if (overwrite == 0 && zcat_flg == 0) { 447 if (stat(ofname, &statbuf) == 0) { 448 char response[2]; 449 response[0] = 'n'; 450 fprintf(stderr, "%s already exists;", ofname); 451 if (bgnd_flag == 0 && isatty(2)) { 452 fprintf(stderr, " do you wish to overwrite %s (y or n)? ", 453 ofname); 454 fflush(stderr); 455 read(2, response, 2); 456 while (response[1] != '\n') { 457 if (read(2, response+1, 1) < 0) { /* Ack! */ 458 perror("stderr"); break; 459 } 460 } 461 } 462 if (response[0] != 'y') { 463 fprintf(stderr, "\tnot overwritten\n"); 464 continue; 465 } 466 } 467 } 468 if(zcat_flg == 0) { /* Open output file */ 469 if (freopen(ofname, "w", stdout) == NULL) { 470 perror(ofname); 471 perm_stat = 1; 472 continue; 473 } 474 precious = 0; 475 if(!quiet) 476 fprintf(stderr, "%s: ", *fileptr); 477 } 478 479 /* Actually do the compression/decompression */ 480 if (do_decomp == 0) compress(); 481 #ifndef DEBUG 482 else decompress(); 483 #else 484 else if (debug == 0) decompress(); 485 else printcodes(); 486 if (verbose) dump_tab(); 487 #endif /* DEBUG */ 488 if(zcat_flg == 0) { 489 copystat(*fileptr, ofname); /* Copy stats */ 490 precious = 1; 491 if((exit_stat == 1) || (!quiet)) 492 putc('\n', stderr); 493 } 494 } 495 } else { /* Standard input */ 496 if (do_decomp == 0) { 497 compress(); 498 #ifdef DEBUG 499 if(verbose) dump_tab(); 500 #endif /* DEBUG */ 501 if(!quiet) 502 putc('\n', stderr); 503 } else { 504 /* Check the magic number */ 505 if (nomagic == 0) { 506 if ((getchar()!=(magic_header[0] & 0xFF)) 507 || (getchar()!=(magic_header[1] & 0xFF))) { 508 fprintf(stderr, "stdin: not in compressed format\n"); 509 exit(1); 510 } 511 maxbits = getchar(); /* set -b from file */ 512 block_compress = maxbits & BLOCK_MASK; 513 maxbits &= BIT_MASK; 514 maxmaxcode = 1 << maxbits; 515 fsize = 100000; /* assume stdin large for USERMEM */ 516 if(maxbits > BITS) { 517 fprintf(stderr, 518 "stdin: compressed with %d bits, can only handle %d bits\n", 519 maxbits, BITS); 520 exit(1); 521 } 522 } 523 #ifndef DEBUG 524 decompress(); 525 #else 526 if (debug == 0) decompress(); 527 else printcodes(); 528 if (verbose) dump_tab(); 529 #endif /* DEBUG */ 530 } 531 } 532 exit(perm_stat ? perm_stat : exit_stat); 533 } 534 535 static int offset; 536 long int in_count = 1; /* length of input */ 537 long int bytes_out; /* length of compressed output */ 538 long int out_count = 0; /* # of codes output (for debugging) */ 539 540 /* 541 * compress stdin to stdout 542 * 543 * Algorithm: use open addressing double hashing (no chaining) on the 544 * prefix code / next character combination. We do a variant of Knuth's 545 * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime 546 * secondary probe. Here, the modular division first probe is gives way 547 * to a faster exclusive-or manipulation. Also do block compression with 548 * an adaptive reset, whereby the code table is cleared when the compression 549 * ratio decreases, but after the table fills. The variable-length output 550 * codes are re-sized at this point, and a special CLEAR code is generated 551 * for the decompressor. Late addition: construct the table according to 552 * file size for noticeable speed improvement on small files. Please direct 553 * questions about this implementation to ames!jaw. 554 */ 555 556 compress() 557 { 558 register long fcode; 559 register code_int i = 0; 560 register int c; 561 register code_int ent; 562 register int disp; 563 register code_int hsize_reg; 564 register int hshift; 565 566 #ifndef COMPATIBLE 567 if (nomagic == 0) { 568 putchar(magic_header[0]); 569 putchar(magic_header[1]); 570 putchar((char)(maxbits | block_compress)); 571 if(ferror(stdout)) 572 writeerr(); 573 } 574 #endif /* COMPATIBLE */ 575 576 offset = 0; 577 bytes_out = 3; /* includes 3-byte header mojo */ 578 out_count = 0; 579 clear_flg = 0; 580 ratio = 0; 581 in_count = 1; 582 checkpoint = CHECK_GAP; 583 maxcode = MAXCODE(n_bits = INIT_BITS); 584 free_ent = ((block_compress) ? FIRST : 256 ); 585 586 ent = getchar (); 587 588 hshift = 0; 589 for ( fcode = (long) hsize; fcode < 65536L; fcode *= 2L ) 590 hshift++; 591 hshift = 8 - hshift; /* set hash code range bound */ 592 593 hsize_reg = hsize; 594 cl_hash( (count_int) hsize_reg); /* clear hash table */ 595 596 #ifdef SIGNED_COMPARE_SLOW 597 while ( (c = getchar()) != (unsigned) EOF ) { 598 #else 599 while ( (c = getchar()) != EOF ) { 600 #endif 601 in_count++; 602 fcode = (long) (((long) c << maxbits) + ent); 603 i = ((c << hshift) ^ ent); /* xor hashing */ 604 605 if ( htabof (i) == fcode ) { 606 ent = codetabof (i); 607 continue; 608 } else if ( (long)htabof (i) < 0 ) /* empty slot */ 609 goto nomatch; 610 disp = hsize_reg - i; /* secondary hash (after G. Knott) */ 611 if ( i == 0 ) 612 disp = 1; 613 probe: 614 if ( (i -= disp) < 0 ) 615 i += hsize_reg; 616 617 if ( htabof (i) == fcode ) { 618 ent = codetabof (i); 619 continue; 620 } 621 if ( (long)htabof (i) > 0 ) 622 goto probe; 623 nomatch: 624 output ( (code_int) ent ); 625 out_count++; 626 ent = c; 627 #ifdef SIGNED_COMPARE_SLOW 628 if ( (unsigned) free_ent < (unsigned) maxmaxcode) { 629 #else 630 if ( free_ent < maxmaxcode ) { 631 #endif 632 codetabof (i) = free_ent++; /* code -> hashtable */ 633 htabof (i) = fcode; 634 } 635 else if ( (count_int)in_count >= checkpoint && block_compress ) 636 cl_block (); 637 } 638 /* 639 * Put out the final code. 640 */ 641 output( (code_int)ent ); 642 out_count++; 643 output( (code_int)-1 ); 644 645 /* 646 * Print out stats on stderr 647 */ 648 if(zcat_flg == 0 && !quiet) { 649 #ifdef DEBUG 650 fprintf( stderr, 651 "%ld chars in, %ld codes (%ld bytes) out, compression factor: ", 652 in_count, out_count, bytes_out ); 653 prratio( stderr, in_count, bytes_out ); 654 fprintf( stderr, "\n"); 655 fprintf( stderr, "\tCompression as in compact: " ); 656 prratio( stderr, in_count-bytes_out, in_count ); 657 fprintf( stderr, "\n"); 658 fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n", 659 free_ent - 1, n_bits ); 660 #else /* !DEBUG */ 661 fprintf( stderr, "Compression: " ); 662 prratio( stderr, in_count-bytes_out, in_count ); 663 #endif /* DEBUG */ 664 } 665 if(bytes_out > in_count) /* exit(2) if no savings */ 666 exit_stat = 2; 667 return; 668 } 669 670 /*- 671 * Output the given code. 672 * Inputs: 673 * code: A n_bits-bit integer. If == -1, then EOF. This assumes 674 * that n_bits =< (long)wordsize - 1. 675 * Outputs: 676 * Outputs code to the file. 677 * Assumptions: 678 * Chars are 8 bits long. 679 * Algorithm: 680 * Maintain a BITS character long buffer (so that 8 codes will 681 * fit in it exactly). Use the VAX insv instruction to insert each 682 * code in turn. When the buffer fills up empty it and start over. 683 */ 684 685 static char buf[BITS]; 686 687 #ifndef vax 688 char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; 689 char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; 690 #endif /* vax */ 691 692 output( code ) 693 code_int code; 694 { 695 #ifdef DEBUG 696 static int col = 0; 697 #endif /* DEBUG */ 698 699 /* 700 * On the VAX, it is important to have the register declarations 701 * in exactly the order given, or the asm will break. 702 */ 703 register int r_off = offset, bits= n_bits; 704 register char * bp = buf; 705 706 #ifdef DEBUG 707 if ( verbose ) 708 fprintf( stderr, "%5d%c", code, 709 (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); 710 #endif /* DEBUG */ 711 if ( code >= 0 ) { 712 #if defined(vax) && !defined(__GNUC__) 713 /* 714 * VAX and PCC DEPENDENT!! Implementation on other machines is 715 * below. 716 * 717 * Translation: Insert BITS bits from the argument starting at 718 * offset bits from the beginning of buf. 719 */ 720 0; /* Work around for pcc -O bug with asm and if stmt */ 721 asm( "insv 4(ap),r11,r10,(r9)" ); 722 #else 723 /* 724 * byte/bit numbering on the VAX is simulated by the following code 725 */ 726 /* 727 * Get to the first byte. 728 */ 729 bp += (r_off >> 3); 730 r_off &= 7; 731 /* 732 * Since code is always >= 8 bits, only need to mask the first 733 * hunk on the left. 734 */ 735 *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off]; 736 bp++; 737 bits -= (8 - r_off); 738 code >>= 8 - r_off; 739 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 740 if ( bits >= 8 ) { 741 *bp++ = code; 742 code >>= 8; 743 bits -= 8; 744 } 745 /* Last bits. */ 746 if(bits) 747 *bp = code; 748 #endif /* vax */ 749 offset += n_bits; 750 if ( offset == (n_bits << 3) ) { 751 bp = buf; 752 bits = n_bits; 753 bytes_out += bits; 754 do { 755 putchar(*bp++); 756 if (ferror(stdout)) 757 writeerr(); 758 } while(--bits); 759 offset = 0; 760 } 761 762 /* 763 * If the next entry is going to be too big for the code size, 764 * then increase it, if possible. 765 */ 766 if ( free_ent > maxcode || (clear_flg > 0)) 767 { 768 /* 769 * Write the whole buffer, because the input side won't 770 * discover the size increase until after it has read it. 771 */ 772 if ( offset > 0 ) { 773 if( fwrite( buf, 1, n_bits, stdout ) != n_bits) 774 writeerr(); 775 bytes_out += n_bits; 776 } 777 offset = 0; 778 779 if ( clear_flg ) { 780 maxcode = MAXCODE (n_bits = INIT_BITS); 781 clear_flg = 0; 782 } 783 else { 784 n_bits++; 785 if ( n_bits == maxbits ) 786 maxcode = maxmaxcode; 787 else 788 maxcode = MAXCODE(n_bits); 789 } 790 #ifdef DEBUG 791 if ( debug ) { 792 fprintf( stderr, "\nChange to %d bits\n", n_bits ); 793 col = 0; 794 } 795 #endif /* DEBUG */ 796 } 797 } else { 798 /* 799 * At EOF, write the rest of the buffer. 800 */ 801 if ( offset > 0 ) { 802 offset = (offset + 7) / 8; 803 if( fwrite( buf, 1, offset, stdout ) != offset ) 804 writeerr(); 805 bytes_out += offset; 806 } 807 offset = 0; 808 (void)fflush( stdout ); 809 if( ferror( stdout ) ) 810 writeerr(); 811 #ifdef DEBUG 812 if ( verbose ) 813 fprintf( stderr, "\n" ); 814 #endif 815 } 816 } 817 818 /* 819 * Decompress stdin to stdout. This routine adapts to the codes in the 820 * file building the "string" table on-the-fly; requiring no table to 821 * be stored in the compressed file. The tables used herein are shared 822 * with those of the compress() routine. See the definitions above. 823 */ 824 825 decompress() { 826 register char_type *stackp; 827 register int finchar; 828 register code_int code, oldcode, incode; 829 int n, nwritten, offset; /* Variables for buffered write */ 830 char buff[BUFSIZ]; /* Buffer for buffered write */ 831 832 833 /* 834 * As above, initialize the first 256 entries in the table. 835 */ 836 maxcode = MAXCODE(n_bits = INIT_BITS); 837 for ( code = 255; code >= 0; code-- ) { 838 tab_prefixof(code) = 0; 839 tab_suffixof(code) = (char_type)code; 840 } 841 free_ent = ((block_compress) ? FIRST : 256 ); 842 843 finchar = oldcode = getcode(); 844 if(oldcode == -1) /* EOF already? */ 845 return; /* Get out of here */ 846 847 /* first code must be 8 bits = char */ 848 n=0; 849 buff[n++] = (char)finchar; 850 851 stackp = de_stack; 852 853 while ( (code = getcode()) > -1 ) { 854 855 if ( (code == CLEAR) && block_compress ) { 856 for ( code = 255; code >= 0; code-- ) 857 tab_prefixof(code) = 0; 858 clear_flg = 1; 859 free_ent = FIRST - 1; 860 if ( (code = getcode ()) == -1 ) /* O, untimely death! */ 861 break; 862 } 863 incode = code; 864 /* 865 * Special case for KwKwK string. 866 */ 867 if ( code >= free_ent ) { 868 *stackp++ = finchar; 869 code = oldcode; 870 } 871 872 /* 873 * Generate output characters in reverse order 874 */ 875 #ifdef SIGNED_COMPARE_SLOW 876 while ( ((unsigned long)code) >= ((unsigned long)256) ) { 877 #else 878 while ( code >= 256 ) { 879 #endif 880 *stackp++ = tab_suffixof(code); 881 code = tab_prefixof(code); 882 } 883 *stackp++ = finchar = tab_suffixof(code); 884 885 /* 886 * And put them out in forward order 887 */ 888 do { 889 /* 890 * About 60% of the time is spent in the putchar() call 891 * that appeared here. It was originally 892 * putchar ( *--stackp ); 893 * If we buffer the writes ourselves, we can go faster (about 894 * 30%). 895 * 896 * At this point, the next line is the next *big* time 897 * sink in the code. It takes up about 10% of the time. 898 */ 899 buff[n++] = *--stackp; 900 if (n == BUFSIZ) { 901 offset = 0; 902 do { 903 nwritten = write(fileno(stdout), &buff[offset], n); 904 if (nwritten < 0) 905 writeerr(); 906 offset += nwritten; 907 } while ((n -= nwritten) > 0); 908 } 909 } while ( stackp > de_stack ); 910 911 /* 912 * Generate the new entry. 913 */ 914 if ( (code=free_ent) < maxmaxcode ) { 915 tab_prefixof(code) = (unsigned short)oldcode; 916 tab_suffixof(code) = finchar; 917 free_ent = code+1; 918 } 919 /* 920 * Remember previous code. 921 */ 922 oldcode = incode; 923 } 924 /* 925 * Flush the stuff remaining in our buffer... 926 */ 927 offset = 0; 928 while (n > 0) { 929 nwritten = write(fileno(stdout), &buff[offset], n); 930 if (nwritten < 0) 931 writeerr(); 932 offset += nwritten; 933 n -= nwritten; 934 } 935 } 936 937 /*- 938 * Read one code from the standard input. If EOF, return -1. 939 * Inputs: 940 * stdin 941 * Outputs: 942 * code or -1 is returned. 943 */ 944 code_int 945 getcode() { 946 /* 947 * On the VAX, it is important to have the register declarations 948 * in exactly the order given, or the asm will break. 949 */ 950 register code_int code; 951 static int offset = 0, size = 0; 952 static char_type buf[BITS]; 953 register int r_off, bits; 954 register char_type *bp = buf; 955 956 if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) { 957 /* 958 * If the next entry will be too big for the current code 959 * size, then we must increase the size. This implies reading 960 * a new buffer full, too. 961 */ 962 if ( free_ent > maxcode ) { 963 n_bits++; 964 if ( n_bits == maxbits ) 965 maxcode = maxmaxcode; /* won't get any bigger now */ 966 else 967 maxcode = MAXCODE(n_bits); 968 } 969 if ( clear_flg > 0) { 970 maxcode = MAXCODE (n_bits = INIT_BITS); 971 clear_flg = 0; 972 } 973 size = fread( buf, 1, n_bits, stdin ); 974 if ( size <= 0 ) 975 return -1; /* end of file */ 976 offset = 0; 977 /* Round size down to integral number of codes */ 978 size = (size << 3) - (n_bits - 1); 979 } 980 r_off = offset; 981 bits = n_bits; 982 #ifdef vax 983 asm( "extzv r10,r9,(r8),r11" ); 984 #else /* not a vax */ 985 /* 986 * Get to the first byte. 987 */ 988 bp += (r_off >> 3); 989 r_off &= 7; 990 /* Get first part (low order bits) */ 991 #ifdef NO_UCHAR 992 code = ((*bp++ >> r_off) & rmask[8 - r_off]) & 0xff; 993 #else 994 code = (*bp++ >> r_off); 995 #endif /* NO_UCHAR */ 996 bits -= (8 - r_off); 997 r_off = 8 - r_off; /* now, offset into code word */ 998 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 999 if ( bits >= 8 ) { 1000 #ifdef NO_UCHAR 1001 code |= (*bp++ & 0xff) << r_off; 1002 #else 1003 code |= *bp++ << r_off; 1004 #endif /* NO_UCHAR */ 1005 r_off += 8; 1006 bits -= 8; 1007 } 1008 /* high order bits. */ 1009 code |= (*bp & rmask[bits]) << r_off; 1010 #endif /* vax */ 1011 offset += n_bits; 1012 1013 return code; 1014 } 1015 1016 #ifdef DEBUG 1017 printcodes() 1018 { 1019 /* 1020 * Just print out codes from input file. For debugging. 1021 */ 1022 code_int code; 1023 int col = 0, bits; 1024 1025 bits = n_bits = INIT_BITS; 1026 maxcode = MAXCODE(n_bits); 1027 free_ent = ((block_compress) ? FIRST : 256 ); 1028 while ( ( code = getcode() ) >= 0 ) { 1029 if ( (code == CLEAR) && block_compress ) { 1030 free_ent = FIRST - 1; 1031 clear_flg = 1; 1032 } 1033 else if ( free_ent < maxmaxcode ) 1034 free_ent++; 1035 if ( bits != n_bits ) { 1036 fprintf(stderr, "\nChange to %d bits\n", n_bits ); 1037 bits = n_bits; 1038 col = 0; 1039 } 1040 fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); 1041 } 1042 putc( '\n', stderr ); 1043 exit( 0 ); 1044 } 1045 1046 code_int sorttab[1<<BITS]; /* sorted pointers into htab */ 1047 1048 dump_tab() /* dump string table */ 1049 { 1050 register int i, first; 1051 register ent; 1052 #define STACK_SIZE 15000 1053 int stack_top = STACK_SIZE; 1054 register c; 1055 1056 if(do_decomp == 0) { /* compressing */ 1057 register int flag = 1; 1058 1059 for(i=0; i<hsize; i++) { /* build sort pointers */ 1060 if((long)htabof(i) >= 0) { 1061 sorttab[codetabof(i)] = i; 1062 } 1063 } 1064 first = block_compress ? FIRST : 256; 1065 for(i = first; i < free_ent; i++) { 1066 fprintf(stderr, "%5d: \"", i); 1067 de_stack[--stack_top] = '\n'; 1068 de_stack[--stack_top] = '"'; 1069 stack_top = in_stack((htabof(sorttab[i])>>maxbits)&0xff, 1070 stack_top); 1071 for(ent=htabof(sorttab[i]) & ((1<<maxbits)-1); 1072 ent > 256; 1073 ent=htabof(sorttab[ent]) & ((1<<maxbits)-1)) { 1074 stack_top = in_stack(htabof(sorttab[ent]) >> maxbits, 1075 stack_top); 1076 } 1077 stack_top = in_stack(ent, stack_top); 1078 fwrite( &de_stack[stack_top], 1, STACK_SIZE-stack_top, stderr); 1079 stack_top = STACK_SIZE; 1080 } 1081 } else if(!debug) { /* decompressing */ 1082 1083 for ( i = 0; i < free_ent; i++ ) { 1084 ent = i; 1085 c = tab_suffixof(ent); 1086 if ( isascii(c) && isprint(c) ) 1087 fprintf( stderr, "%5d: %5d/'%c' \"", 1088 ent, tab_prefixof(ent), c ); 1089 else 1090 fprintf( stderr, "%5d: %5d/\\%03o \"", 1091 ent, tab_prefixof(ent), c ); 1092 de_stack[--stack_top] = '\n'; 1093 de_stack[--stack_top] = '"'; 1094 for ( ; ent != NULL; 1095 ent = (ent >= FIRST ? tab_prefixof(ent) : NULL) ) { 1096 stack_top = in_stack(tab_suffixof(ent), stack_top); 1097 } 1098 fwrite( &de_stack[stack_top], 1, STACK_SIZE - stack_top, stderr ); 1099 stack_top = STACK_SIZE; 1100 } 1101 } 1102 } 1103 1104 int 1105 in_stack(c, stack_top) 1106 register c, stack_top; 1107 { 1108 if ( (isascii(c) && isprint(c) && c != '\\') || c == ' ' ) { 1109 de_stack[--stack_top] = c; 1110 } else { 1111 switch( c ) { 1112 case '\n': de_stack[--stack_top] = 'n'; break; 1113 case '\t': de_stack[--stack_top] = 't'; break; 1114 case '\b': de_stack[--stack_top] = 'b'; break; 1115 case '\f': de_stack[--stack_top] = 'f'; break; 1116 case '\r': de_stack[--stack_top] = 'r'; break; 1117 case '\\': de_stack[--stack_top] = '\\'; break; 1118 default: 1119 de_stack[--stack_top] = '0' + c % 8; 1120 de_stack[--stack_top] = '0' + (c / 8) % 8; 1121 de_stack[--stack_top] = '0' + c / 64; 1122 break; 1123 } 1124 de_stack[--stack_top] = '\\'; 1125 } 1126 return stack_top; 1127 } 1128 #endif /* DEBUG */ 1129 1130 writeerr() 1131 { 1132 (void)fprintf(stderr, "compress: %s: %s\n", 1133 ofname[0] ? ofname : "stdout", strerror(errno)); 1134 (void)unlink(ofname); 1135 exit(1); 1136 } 1137 1138 copystat(ifname, ofname) 1139 char *ifname, *ofname; 1140 { 1141 struct stat statbuf; 1142 int mode; 1143 struct utimbuf tp; 1144 1145 fclose(stdout); 1146 if (stat(ifname, &statbuf)) { /* Get stat on input file */ 1147 perror(ifname); 1148 return; 1149 } 1150 if ((statbuf.st_mode & S_IFMT/*0170000*/) != S_IFREG/*0100000*/) { 1151 if(quiet) 1152 fprintf(stderr, "%s: ", ifname); 1153 fprintf(stderr, " -- not a regular file: unchanged"); 1154 exit_stat = 1; 1155 perm_stat = 1; 1156 } else if (statbuf.st_nlink > 1) { 1157 if(quiet) 1158 fprintf(stderr, "%s: ", ifname); 1159 fprintf(stderr, " -- has %d other links: unchanged", 1160 statbuf.st_nlink - 1); 1161 exit_stat = 1; 1162 perm_stat = 1; 1163 } else if (exit_stat == 2 && (!force)) { /* No compression: remove file.Z */ 1164 if(!quiet) 1165 fprintf(stderr, " -- file unchanged"); 1166 } else { /* ***** Successful Compression ***** */ 1167 exit_stat = 0; 1168 mode = statbuf.st_mode & 07777; 1169 if (chmod(ofname, mode)) /* Copy modes */ 1170 perror(ofname); 1171 chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */ 1172 tp.actime = statbuf.st_atime; 1173 tp.modtime = statbuf.st_mtime; 1174 utime(ofname, &tp); /* Update last accessed and modified times */ 1175 if (unlink(ifname)) /* Remove input file */ 1176 perror(ifname); 1177 if(!quiet) 1178 fprintf(stderr, " -- replaced with %s", ofname); 1179 return; /* Successful return */ 1180 } 1181 1182 /* Unsuccessful return -- one of the tests failed */ 1183 if (unlink(ofname)) 1184 perror(ofname); 1185 } 1186 1187 void 1188 onintr ( ) 1189 { 1190 if (!precious) 1191 unlink ( ofname ); 1192 exit ( 1 ); 1193 } 1194 1195 void 1196 oops ( ) /* wild pointer -- assume bad input */ 1197 { 1198 if ( do_decomp ) 1199 fprintf ( stderr, "uncompress: corrupt input\n" ); 1200 unlink ( ofname ); 1201 exit ( 1 ); 1202 } 1203 1204 cl_block () /* table clear for block compress */ 1205 { 1206 register long int rat; 1207 1208 checkpoint = in_count + CHECK_GAP; 1209 #ifdef DEBUG 1210 if ( debug ) { 1211 fprintf ( stderr, "count: %ld, ratio: ", in_count ); 1212 prratio ( stderr, in_count, bytes_out ); 1213 fprintf ( stderr, "\n"); 1214 } 1215 #endif /* DEBUG */ 1216 1217 if(in_count > 0x007fffff) { /* shift will overflow */ 1218 rat = bytes_out >> 8; 1219 if(rat == 0) { /* Don't divide by zero */ 1220 rat = 0x7fffffff; 1221 } else { 1222 rat = in_count / rat; 1223 } 1224 } else { 1225 rat = (in_count << 8) / bytes_out; /* 8 fractional bits */ 1226 } 1227 if ( rat > ratio ) { 1228 ratio = rat; 1229 } else { 1230 ratio = 0; 1231 #ifdef DEBUG 1232 if(verbose) 1233 dump_tab(); /* dump string table */ 1234 #endif 1235 cl_hash ( (count_int) hsize ); 1236 free_ent = FIRST; 1237 clear_flg = 1; 1238 output ( (code_int) CLEAR ); 1239 #ifdef DEBUG 1240 if(debug) 1241 fprintf ( stderr, "clear\n" ); 1242 #endif /* DEBUG */ 1243 } 1244 } 1245 1246 cl_hash(hsize) /* reset code table */ 1247 register count_int hsize; 1248 { 1249 register count_int *htab_p = htab+hsize; 1250 register long i; 1251 register long m1 = -1; 1252 1253 i = hsize - 16; 1254 do { /* might use Sys V memset(3) here */ 1255 *(htab_p-16) = m1; 1256 *(htab_p-15) = m1; 1257 *(htab_p-14) = m1; 1258 *(htab_p-13) = m1; 1259 *(htab_p-12) = m1; 1260 *(htab_p-11) = m1; 1261 *(htab_p-10) = m1; 1262 *(htab_p-9) = m1; 1263 *(htab_p-8) = m1; 1264 *(htab_p-7) = m1; 1265 *(htab_p-6) = m1; 1266 *(htab_p-5) = m1; 1267 *(htab_p-4) = m1; 1268 *(htab_p-3) = m1; 1269 *(htab_p-2) = m1; 1270 *(htab_p-1) = m1; 1271 htab_p -= 16; 1272 } while ((i -= 16) >= 0); 1273 for ( i += 16; i > 0; i-- ) 1274 *--htab_p = m1; 1275 } 1276 1277 prratio(stream, num, den) 1278 FILE *stream; 1279 long int num, den; 1280 { 1281 register int q; /* Doesn't need to be long */ 1282 1283 if(num > 214748L) { /* 2147483647/10000 */ 1284 q = num / (den / 10000L); 1285 } else { 1286 q = 10000L * num / den; /* Long calculations, though */ 1287 } 1288 if (q < 0) { 1289 putc('-', stream); 1290 q = -q; 1291 } 1292 fprintf(stream, "%d.%02d%%", q / 100, q % 100); 1293 } 1294 1295 usage() 1296 { 1297 (void)fprintf(stderr, 1298 #ifdef DEBUG 1299 "compress [-CDVcdfnv] [-b maxbits] [file ...]\n"); 1300 #else 1301 "compress [-Ccdfnv] [-b maxbits] [file ...]\n"); 1302 #endif 1303 exit(1); 1304 } 1305