xref: /plan9/sys/src/cmd/compress/compress.c (revision f9e1cf08d3be51592e03e639fc848a68dc31a55e)
1 /*
2  * compress - File compression ala IEEE Computer, June 1984.
3  *
4  * Algorithm from "A Technique for High Performance Data Compression",
5  * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
6  *
7  * Usage: compress [-dfvc] [-b bits] [file ...]
8  * Inputs:
9  *	-b:	limit the max number of bits/code.
10  *	-c:	write output on stdout, don't remove original.
11  *	-d:	decompress instead.
12  *	-f:	Forces output file to be generated, even if one already
13  *		exists, and even if no space is saved by compressing.
14  *		If -f is not used, the user will be prompted if stdin is
15  *		a tty, otherwise, the output file will not be overwritten.
16  *	-v:	Write compression statistics
17  *
18  * 	file ...: Files to be compressed.  If none specified, stdin is used.
19  * Outputs:
20  *	file.Z:	Compressed form of file with same mode, owner, and utimes
21  * 		or stdout (if stdin used as input)
22  *
23  * Assumptions:
24  *	When filenames are given, replaces with the compressed version
25  *	(.Z suffix) only if the file decreases in size.
26  * Algorithm:
27  * 	Modified Lempel-Ziv method (LZW).  Basically finds common
28  * substrings and replaces them with a variable size code.  This is
29  * deterministic, and can be done on the fly.  Thus, the decompression
30  * procedure needs no input table, but tracks the way the table was built.
31 
32  * Authors:	Spencer W. Thomas	(decvax!harpo!utah-cs!utah-gr!thomas)
33  *		Jim McKie		(decvax!mcvax!jim)
34  *		Steve Davies		(decvax!vax135!petsd!peora!srd)
35  *		Ken Turkowski		(decvax!decwrl!turtlevax!ken)
36  *		James A. Woods		(decvax!ihnp4!ames!jaw)
37  *		Joe Orost		(decvax!vax135!petsd!joe)
38  */
39 #define _PLAN9_SOURCE
40 
41 #include <u.h>
42 #include <stdio.h>
43 #include <ctype.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <signal.h>
47 #include <sys/types.h>
48 #include <sys/stat.h>
49 
50 #define	min(a,b)	((a>b) ? b : a)
51 
52 #define BITS	16
53 #define HSIZE	69001		/* 95% occupancy */
54 
55 /*
56  * a code_int must be able to hold 2**BITS values of type int, and also -1
57  */
58 typedef long	code_int;
59 typedef long	count_int;
60 
61 static char rcs_ident[] = "$Header: compress.c,v 4.0 85/07/30 12:50:00 joe Release $";
62 
63 uchar magic_header[] = { 0x1F, 0x9D };	/* 1F 9D */
64 
65 /* Defines for third byte of header */
66 #define BIT_MASK	0x1f
67 #define BLOCK_MASK	0x80
68 /* Masks 0x40 and 0x20 are free.  I think 0x20 should mean that there is
69 	a fourth header byte (for expansion).
70 */
71 #define INIT_BITS 9			/* initial number of bits/code */
72 
73 void onintr(int);
74 void oops(int);
75 
76 #define ARGVAL() (*++(*argv) || (--argc && *++argv))
77 
78 int n_bits;				/* number of bits/code */
79 int maxbits = BITS;			/* user settable max # bits/code */
80 code_int maxcode;			/* maximum code, given n_bits */
81 code_int maxmaxcode = 1 << BITS;	/* should NEVER generate this code */
82 
83 #define MAXCODE(n_bits)	((1 << (n_bits)) - 1)
84 
85 count_int htab[HSIZE];
86 ushort codetab[HSIZE];
87 
88 #define htabof(i)	htab[i]
89 #define codetabof(i)	codetab[i]
90 
91 code_int hsize = HSIZE;			/* for dynamic table sizing */
92 count_int fsize;
93 
94 /*
95  * To save much memory, we overlay the table used by compress() with those
96  * used by decompress().  The tab_prefix table is the same size and type
97  * as the codetab.  The tab_suffix table needs 2**BITS characters.  We
98  * get this from the beginning of htab.  The output stack uses the rest
99  * of htab, and contains characters.  There is plenty of room for any
100  * possible stack (stack used to be 8000 characters).
101  */
102 
103 #define tab_prefixof(i)	codetabof(i)
104 #define tab_suffixof(i)	((uchar *)(htab))[i]
105 #define de_stack		((uchar *)&tab_suffixof(1<<BITS))
106 
107 code_int free_ent = 0;			/* first unused entry */
108 int exit_stat = 0;
109 
110 code_int getcode();
111 
112 Usage()
113 {
114 #ifdef DEBUG
115 	fprintf(stderr,"Usage: compress [-cdfDV] [-b maxbits] [file ...]\n");
116 #else
117 	fprintf(stderr,"Usage: compress [-cdfvV] [-b maxbits] [file ...]\n");
118 #endif /* DEBUG */
119 }
120 
121 int debug = 0;
122 int nomagic = 0;	/* Use a 3-byte magic number header, unless old file */
123 int zcat_flg = 0;	/* Write output on stdout, suppress messages */
124 int quiet = 1;		/* don't tell me about compression */
125 
126 /*
127  * block compression parameters -- after all codes are used up,
128  * and compression rate changes, start over.
129  */
130 int block_compress = BLOCK_MASK;
131 int clear_flg = 0;
132 long ratio = 0;
133 #define CHECK_GAP 10000	/* ratio check interval */
134 count_int checkpoint = CHECK_GAP;
135 /*
136  * the next two codes should not be changed lightly, as they must not
137  * lie within the contiguous general code space.
138  */
139 #define FIRST	257	/* first free entry */
140 #define	CLEAR	256	/* table clear output code */
141 
142 int force = 0;
143 char ofname [100];
144 #ifdef DEBUG
145 int verbose = 0;
146 #endif /* DEBUG */
147 void (*bgnd_flag)(int);
148 
149 int do_decomp = 0;
150 
151 main(argc, argv)
152 int argc;
153 char **argv;
154 {
155 	int overwrite = 0;	/* Do not overwrite unless given -f flag */
156 	char tempname[512];
157 	char **filelist, **fileptr;
158 	char *cp;
159 	struct stat statbuf;
160 
161 	if ( (bgnd_flag = signal ( SIGINT, SIG_IGN )) != SIG_IGN ) {
162 		signal(SIGINT, onintr);
163 		signal(SIGSEGV, oops);
164 	}
165 
166 	filelist = fileptr = (char **)(malloc(argc * sizeof(*argv)));
167 	*filelist = NULL;
168 
169 	if((cp = strrchr(argv[0], '/')) != 0)
170 		cp++;
171 	else
172 		cp = argv[0];
173 	if(strcmp(cp, "uncompress") == 0)
174 		do_decomp = 1;
175 	else if(strcmp(cp, "zcat") == 0) {
176 		do_decomp = 1;
177 		zcat_flg = 1;
178 	}
179 
180 	/*
181 	 * Argument Processing
182 	 * All flags are optional.
183 	 * -C	generate output compatible with compress 2.0.
184 	 * -D	debug
185 	 * -V	print Version; debug verbose
186 	 * -b maxbits	maxbits.  If -b is specified, then maxbits MUST be
187 	 *		given also.
188 	 * -c	cat all output to stdout
189 	 * -d	do_decomp
190 	 * -f	force overwrite of output file
191 	 * -n	no header: useful to uncompress old files
192 	 * -v	unquiet
193 	 * if a string is left, must be an input filename.
194 	 */
195 	for (argc--, argv++; argc > 0; argc--, argv++) {
196 	if (**argv == '-') {	/* A flag argument */
197 		while (*++(*argv)) {	/* Process all flags in this arg */
198 		switch (**argv) {
199 		case 'C':
200 			block_compress = 0;
201 			break;
202 #ifdef DEBUG
203 		case 'D':
204 			debug = 1;
205 			break;
206 		case 'V':
207 			verbose = 1;
208 			version();
209 			break;
210 #else
211 		case 'V':
212 			version();
213 			break;
214 #endif
215 		case 'b':
216 			if (!ARGVAL()) {
217 				fprintf(stderr, "Missing maxbits\n");
218 				Usage();
219 				exit(1);
220 			}
221 			maxbits = atoi(*argv);
222 			goto nextarg;
223 		case 'c':
224 			zcat_flg = 1;
225 			break;
226 		case 'd':
227 			do_decomp = 1;
228 			break;
229 		case 'f':
230 		case 'F':
231 			overwrite = 1;
232 			force = 1;
233 			break;
234 		case 'n':
235 			nomagic = 1;
236 			break;
237 		case 'q':
238 			quiet = 1;
239 			break;
240 		case 'v':
241 			quiet = 0;
242 			break;
243 		default:
244 			fprintf(stderr, "Unknown flag: '%c'; ", **argv);
245 			Usage();
246 			exit(1);
247 		}
248 		}
249 	} else {			/* Input file name */
250 		*fileptr++ = *argv;	/* Build input file list */
251 		*fileptr = NULL;
252 		/* process nextarg; */
253 	}
254 nextarg:
255 		continue;
256 	}
257 
258 	if(maxbits < INIT_BITS) maxbits = INIT_BITS;
259 	if (maxbits > BITS) maxbits = BITS;
260 	maxmaxcode = 1 << maxbits;
261 
262 	if (*filelist != NULL) {
263 	for (fileptr = filelist; *fileptr; fileptr++) {
264 		exit_stat = 0;
265 		if (do_decomp != 0) {			/* DECOMPRESSION */
266 		/* Check for .Z suffix */
267 		if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) {
268 			/* No .Z: tack one on */
269 			strcpy(tempname, *fileptr);
270 			strcat(tempname, ".Z");
271 			*fileptr = tempname;
272 		}
273 		/* Open input file */
274 		if ((freopen(*fileptr, "r", stdin)) == NULL) {
275 			perror(*fileptr);
276 			continue;
277 		}
278 		/* Check the magic number */
279 		if (nomagic == 0) {
280 			if ((getchar() != (magic_header[0] & 0xFF))
281 			|| (getchar() != (magic_header[1] & 0xFF))) {
282 				fprintf(stderr, "%s: not in compressed format\n",
283 					*fileptr);
284 				continue;
285 			}
286 			maxbits = getchar();	/* set -b from file */
287 			block_compress = maxbits & BLOCK_MASK;
288 			maxbits &= BIT_MASK;
289 			maxmaxcode = 1 << maxbits;
290 			if(maxbits > BITS) {
291 				fprintf(stderr,
292 		"%s: compressed with %d bits, can only handle %d bits\n",
293 					*fileptr, maxbits, BITS);
294 				continue;
295 			}
296 		}
297 		/* Generate output filename */
298 		strcpy(ofname, *fileptr);
299 		ofname[strlen(*fileptr) - 2] = '\0';  /* Strip off .Z */
300 		} else {				/* COMPRESSION */
301 		if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) {
302 			fprintf(stderr,
303 				"%s: already has .Z suffix -- no change\n",
304 				*fileptr);
305 			continue;
306 		}
307 		/* Open input file */
308 		if ((freopen(*fileptr, "r", stdin)) == NULL) {
309 			perror(*fileptr);
310 			continue;
311 		}
312 		(void) stat(*fileptr, &statbuf);
313 		fsize = (long) statbuf.st_size;
314 		/*
315 		 * tune hash table size for small files -- ad hoc,
316 		 * but the sizes match earlier #defines, which
317 		 * serve as upper bounds on the number of output codes.
318 		 */
319 		hsize = HSIZE;
320 		if (fsize < (1 << 12))
321 			hsize = min(5003, HSIZE);
322 		else if (fsize < (1 << 13))
323 			hsize = min(9001, HSIZE);
324 		else if (fsize < (1 << 14))
325 			hsize = min (18013, HSIZE);
326 		else if (fsize < (1 << 15))
327 			hsize = min (35023, HSIZE);
328 		else if (fsize < 47000)
329 			hsize = min (50021, HSIZE);
330 
331 		/* Generate output filename */
332 		strcpy(ofname, *fileptr);
333 #ifndef BSD4_2
334 		if ((cp=strrchr(ofname,'/')) != NULL)
335 			cp++;
336 		else
337 			cp = ofname;
338 		/*
339 		 *** changed 12 to 25;  should be NAMELEN-3, but I don't want
340 		 * to fight the headers.	ehg 5 Nov 92 **
341 		 */
342 		if (strlen(cp) > 25) {
343 			fprintf(stderr, "%s: filename too long to tack on .Z\n",
344 				cp);
345 			continue;
346 		}
347 #endif
348 		strcat(ofname, ".Z");
349 		}
350 		/* Check for overwrite of existing file */
351 		if (overwrite == 0 && zcat_flg == 0 &&
352 		   stat(ofname, &statbuf) == 0) {
353 			char response[2];
354 
355 			response[0] = 'n';
356 			fprintf(stderr, "%s already exists;", ofname);
357 			if (foreground()) {
358 				fprintf(stderr,
359 				    " do you wish to overwrite %s (y or n)? ",
360 					ofname);
361 				fflush(stderr);
362 				(void) read(2, response, 2);
363 				while (response[1] != '\n')
364 					if (read(2, response+1, 1) < 0) {
365 						/* Ack! */
366 						perror("stderr");
367 						break;
368 					}
369 			}
370 			if (response[0] != 'y') {
371 				fprintf(stderr, "\tnot overwritten\n");
372 				continue;
373 			}
374 		}
375 		if(zcat_flg == 0) {		/* Open output file */
376 			if (freopen(ofname, "w", stdout) == NULL) {
377 				perror(ofname);
378 				continue;
379 			}
380 			if(!quiet)
381 				fprintf(stderr, "%s: ", *fileptr);
382 		}
383 
384 		/* Actually do the compression/decompression */
385 		if (do_decomp == 0)
386 			compress();
387 #ifndef DEBUG
388 		else
389 			decompress();
390 #else
391 		else if (debug == 0)
392 			decompress();
393 		else
394 			printcodes();
395 		if (verbose)
396 			dump_tab();
397 #endif						/* DEBUG */
398 		if(zcat_flg == 0) {
399 			copystat(*fileptr, ofname);	/* Copy stats */
400 			if (exit_stat == 1 || !quiet)
401 				putc('\n', stderr);
402 		}
403 	}
404 	} else {		/* Standard input */
405 	if (do_decomp == 0) {
406 		compress();
407 #ifdef DEBUG
408 		if(verbose)
409 			dump_tab();
410 #endif
411 		if(!quiet)
412 			putc('\n', stderr);
413 	} else {
414 		/* Check the magic number */
415 		if (nomagic == 0) {
416 		if ((getchar()!=(magic_header[0] & 0xFF))
417 		 || (getchar()!=(magic_header[1] & 0xFF))) {
418 			fprintf(stderr, "stdin: not in compressed format\n");
419 			exit(1);
420 		}
421 		maxbits = getchar();	/* set -b from file */
422 		block_compress = maxbits & BLOCK_MASK;
423 		maxbits &= BIT_MASK;
424 		maxmaxcode = 1 << maxbits;
425 		fsize = 100000;		/* assume stdin large for USERMEM */
426 		if(maxbits > BITS) {
427 			fprintf(stderr,
428 			"stdin: compressed with %d bits, can only handle %d bits\n",
429 			maxbits, BITS);
430 			exit(1);
431 		}
432 		}
433 #ifndef DEBUG
434 		decompress();
435 #else
436 		if (debug == 0)
437 			decompress();
438 		else
439 			printcodes();
440 		if (verbose)
441 			dump_tab();
442 #endif						/* DEBUG */
443 	}
444 	}
445 	exit(exit_stat);
446 }
447 
448 static int offset;
449 long in_count = 1;		/* length of input */
450 long bytes_out;			/* length of compressed output */
451 long out_count = 0;		/* # of codes output (for debugging) */
452 
453 /*
454  * compress stdin to stdout
455  *
456  * Algorithm:  use open addressing double hashing (no chaining) on the
457  * prefix code / next character combination.  We do a variant of Knuth's
458  * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
459  * secondary probe.  Here, the modular division first probe is gives way
460  * to a faster exclusive-or manipulation.  Also do block compression with
461  * an adaptive reset, whereby the code table is cleared when the compression
462  * ratio decreases, but after the table fills.  The variable-length output
463  * codes are re-sized at this point, and a special CLEAR code is generated
464  * for the decompressor.  Late addition:  construct the table according to
465  * file size for noticeable speed improvement on small files.  Please direct
466  * questions about this implementation to ames!jaw.
467  */
468 compress()
469 {
470 	code_int ent, hsize_reg;
471 	code_int i = 0;
472 	int c, disp, hshift;
473 	long fcode;
474 
475 	if (nomagic == 0) {
476 		putchar(magic_header[0]);
477 		putchar(magic_header[1]);
478 		putchar((char)(maxbits | block_compress));
479 		if(ferror(stdout))
480 			writeerr();
481 	}
482 	offset = 0;
483 	bytes_out = 3;		/* includes 3-byte header mojo */
484 	out_count = 0;
485 	clear_flg = 0;
486 	ratio = 0;
487 	in_count = 1;
488 	checkpoint = CHECK_GAP;
489 	maxcode = MAXCODE(n_bits = INIT_BITS);
490 	free_ent = (block_compress? FIRST: 256);
491 
492 	ent = getchar ();
493 
494 	hshift = 0;
495 	for (fcode = (long)hsize;  fcode < 65536L; fcode *= 2)
496 		hshift++;
497 	hshift = 8 - hshift;		/* set hash code range bound */
498 
499 	hsize_reg = hsize;
500 	cl_hash( (count_int) hsize_reg);		/* clear hash table */
501 
502 	while ((c = getchar()) != EOF) {
503 		in_count++;
504 		fcode = (long) (((long) c << maxbits) + ent);
505 	 	i = ((c << hshift) ^ ent);	/* xor hashing */
506 
507 		if (htabof (i) == fcode) {
508 			ent = codetabof(i);
509 			continue;
510 		} else if ((long)htabof(i) < 0 )	/* empty slot */
511 			goto nomatch;
512 	 	disp = hsize_reg - i;	/* secondary hash (after G. Knott) */
513 		if (i == 0)
514 			disp = 1;
515 probe:
516 		if ((i -= disp) < 0)
517 			i += hsize_reg;
518 
519 		if (htabof (i) == fcode) {
520 			ent = codetabof(i);
521 			continue;
522 		}
523 		if ((long)htabof(i) > 0)
524 			goto probe;
525 nomatch:
526 		output((code_int)ent);
527 		out_count++;
528 	 	ent = c;
529 		if (free_ent < maxmaxcode) {
530 	 		codetabof(i) = free_ent++;	/* code -> hashtable */
531 			htabof(i) = fcode;
532 		} else if ((count_int)in_count >= checkpoint && block_compress)
533 			cl_block ();
534 	}
535 	/*
536 	* Put out the final code.
537 	*/
538 	output( (code_int)ent );
539 	out_count++;
540 	output( (code_int)-1 );
541 
542 	/*
543 	* Print out stats on stderr
544 	*/
545 	if(zcat_flg == 0 && !quiet) {
546 #ifdef DEBUG
547 	fprintf( stderr,
548 		"%ld chars in, %ld codes (%ld bytes) out, compression factor: ",
549 		in_count, out_count, bytes_out );
550 	prratio( stderr, in_count, bytes_out );
551 	fprintf( stderr, "\n");
552 	fprintf( stderr, "\tCompression as in compact: " );
553 	prratio( stderr, in_count-bytes_out, in_count );
554 	fprintf( stderr, "\n");
555 	fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n",
556 		free_ent - 1, n_bits );
557 #else /* !DEBUG */
558 	fprintf( stderr, "Compression: " );
559 	prratio( stderr, in_count-bytes_out, in_count );
560 #endif /* DEBUG */
561 	}
562 	if(bytes_out > in_count)	/* exit(2) if no savings */
563 		exit_stat = 2;
564 }
565 
566 /*
567  * TAG( output )
568  *
569  * Output the given code.
570  * Inputs:
571  * 	code:	A n_bits-bit integer.  If == -1, then EOF.  This assumes
572  *		that n_bits =< (long)wordsize - 1.
573  * Outputs:
574  * 	Outputs code to the file.
575  * Assumptions:
576  *	Chars are 8 bits long.
577  * Algorithm:
578  * 	Maintain a BITS character long buffer (so that 8 codes will
579  * fit in it exactly).  When the buffer fills up empty it and start over.
580  */
581 
582 static char buf[BITS];
583 
584 uchar lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
585 uchar rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
586 
587 output( code )
588 code_int  code;
589 {
590 #ifdef DEBUG
591 	static int col = 0;
592 #endif
593 	int r_off = offset, bits= n_bits;
594 	char *bp = buf;
595 
596 #ifdef DEBUG
597 	if (verbose)
598 		fprintf(stderr, "%5d%c", code,
599 			(col+=6) >= 74? (col = 0, '\n'): ' ');
600 #endif
601 	if (code >= 0) {
602 		/*
603 		 * byte/bit numbering on the VAX is simulated by the
604 		 * following code
605 		 */
606 		/*
607 		 * Get to the first byte.
608 		 */
609 		bp += (r_off >> 3);
610 		r_off &= 7;
611 		/*
612 		 * Since code is always >= 8 bits, only need to mask the first
613 		 * hunk on the left.
614 		 */
615 		*bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
616 		bp++;
617 		bits -=  8 - r_off;
618 		code >>= 8 - r_off;
619 		/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
620 		if ( bits >= 8 ) {
621 			*bp++ = code;
622 			code >>= 8;
623 			bits -= 8;
624 		}
625 		/* Last bits. */
626 		if(bits)
627 			*bp = code;
628 
629 		offset += n_bits;
630 		if ( offset == (n_bits << 3) ) {
631 			bp = buf;
632 			bits = n_bits;
633 			bytes_out += bits;
634 			do {
635 				putchar(*bp++);
636 			} while(--bits);
637 			offset = 0;
638 		}
639 
640 		/*
641 		 * If the next entry is going to be too big for the code size,
642 		 * then increase it, if possible.
643 		 */
644 		if ( free_ent > maxcode || (clear_flg > 0)) {
645 			/*
646 			* Write the whole buffer, because the input side won't
647 			* discover the size increase until after it has read it.
648 			*/
649 			if ( offset > 0 ) {
650 				if( fwrite( buf, 1, n_bits, stdout ) != n_bits)
651 					writeerr();
652 				bytes_out += n_bits;
653 			}
654 			offset = 0;
655 
656 			if ( clear_flg ) {
657 				maxcode = MAXCODE (n_bits = INIT_BITS);
658 				clear_flg = 0;
659 			} else {
660 				n_bits++;
661 				if ( n_bits == maxbits )
662 					maxcode = maxmaxcode;
663 				else
664 					maxcode = MAXCODE(n_bits);
665 			}
666 #ifdef DEBUG
667 			if ( debug ) {
668 				fprintf(stderr,
669 					"\nChange to %d bits\n", n_bits);
670 				col = 0;
671 			}
672 #endif
673 		}
674 	} else {
675 		/*
676 		 * At EOF, write the rest of the buffer.
677 		 */
678 		if ( offset > 0 )
679 			fwrite( buf, 1, (offset + 7) / 8, stdout );
680 		bytes_out += (offset + 7) / 8;
681 		offset = 0;
682 		fflush( stdout );
683 #ifdef DEBUG
684 		if ( verbose )
685 			fprintf( stderr, "\n" );
686 #endif
687 		if( ferror( stdout ) )
688 			writeerr();
689 	}
690 }
691 
692 /*
693  * Decompress stdin to stdout.  This routine adapts to the codes in the
694  * file building the "string" table on-the-fly; requiring no table to
695  * be stored in the compressed file.  The tables used herein are shared
696  * with those of the compress() routine.  See the definitions above.
697  */
698 decompress()
699 {
700 	int finchar;
701 	code_int code, oldcode, incode;
702 	uchar *stackp;
703 
704 	/*
705 	* As above, initialize the first 256 entries in the table.
706 	*/
707 	maxcode = MAXCODE(n_bits = INIT_BITS);
708 	for (code = 255; code >= 0; code--) {
709 		tab_prefixof(code) = 0;
710 		tab_suffixof(code) = (uchar)code;
711 	}
712 	free_ent = (block_compress? FIRST: 256);
713 
714 	finchar = oldcode = getcode();
715 	if(oldcode == -1)		/* EOF already? */
716 		return;			/* Get out of here */
717 	putchar((char)finchar);		/* first code must be 8 bits = char */
718 	if(ferror(stdout))		/* Crash if can't write */
719 		writeerr();
720 	stackp = de_stack;
721 
722 	while ((code = getcode()) > -1) {
723 		if ((code == CLEAR) && block_compress) {
724 			for (code = 255; code >= 0; code--)
725 				tab_prefixof(code) = 0;
726 			clear_flg = 1;
727 			free_ent = FIRST - 1;
728 			if ((code = getcode()) == -1)	/* O, untimely death! */
729 				break;
730 		}
731 		incode = code;
732 		/*
733 		 * Special case for KwKwK string.
734 		 */
735 		if (code >= free_ent) {
736 			*stackp++ = finchar;
737 			code = oldcode;
738 		}
739 
740 		/*
741 		 * Generate output characters in reverse order
742 		 */
743 		while (code >= 256) {
744 			*stackp++ = tab_suffixof(code);
745 			code = tab_prefixof(code);
746 		}
747 		*stackp++ = finchar = tab_suffixof(code);
748 
749 		/*
750 		 * And put them out in forward order
751 		 */
752 		do {
753 			putchar(*--stackp);
754 		} while (stackp > de_stack);
755 
756 		/*
757 		 * Generate the new entry.
758 		 */
759 		if ( (code=free_ent) < maxmaxcode ) {
760 			tab_prefixof(code) = (ushort)oldcode;
761 			tab_suffixof(code) = finchar;
762 			free_ent = code+1;
763 		}
764 		/*
765 		 * Remember previous code.
766 		 */
767 		oldcode = incode;
768 	}
769 	fflush(stdout);
770 	if(ferror(stdout))
771 		writeerr();
772 }
773 
774 /*
775  * TAG( getcode )
776  *
777  * Read one code from the standard input.  If EOF, return -1.
778  * Inputs:
779  * 	stdin
780  * Outputs:
781  * 	code or -1 is returned.
782  */
783 code_int
784 getcode()
785 {
786 	int r_off, bits;
787 	code_int code;
788 	static int offset = 0, size = 0;
789 	static uchar buf[BITS];
790 	uchar *bp = buf;
791 
792 	if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) {
793 		/*
794 		 * If the next entry will be too big for the current code
795 		 * size, then we must increase the size.  This implies reading
796 		 * a new buffer full, too.
797 		 */
798 		if ( free_ent > maxcode ) {
799 			n_bits++;
800 			if ( n_bits == maxbits )
801 				maxcode = maxmaxcode; /* won't get any bigger now */
802 			else
803 				maxcode = MAXCODE(n_bits);
804 		}
805 		if ( clear_flg > 0) {
806 			maxcode = MAXCODE(n_bits = INIT_BITS);
807 			clear_flg = 0;
808 		}
809 		size = fread(buf, 1, n_bits, stdin);
810 		if (size <= 0)
811 			return -1;			/* end of file */
812 		offset = 0;
813 		/* Round size down to integral number of codes */
814 		size = (size << 3) - (n_bits - 1);
815 	}
816 	r_off = offset;
817 	bits = n_bits;
818 	/*
819 	 * Get to the first byte.
820 	 */
821 	bp += (r_off >> 3);
822 	r_off &= 7;
823 	/* Get first part (low order bits) */
824 	code = (*bp++ >> r_off);
825 	bits -= (8 - r_off);
826 	r_off = 8 - r_off;		/* now, offset into code word */
827 	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
828 	if (bits >= 8) {
829 		code |= *bp++ << r_off;
830 		r_off += 8;
831 		bits -= 8;
832 	}
833 	/* high order bits. */
834 	code |= (*bp & rmask[bits]) << r_off;
835 	offset += n_bits;
836 	return code;
837 }
838 
839 #ifdef DEBUG
840 printcodes()
841 {
842 	/*
843 	* Just print out codes from input file.  For debugging.
844 	*/
845 	code_int code;
846 	int col = 0, bits;
847 
848 	bits = n_bits = INIT_BITS;
849 	maxcode = MAXCODE(n_bits);
850 	free_ent = ((block_compress) ? FIRST : 256 );
851 	while ( ( code = getcode() ) >= 0 ) {
852 	if ( (code == CLEAR) && block_compress ) {
853 			free_ent = FIRST - 1;
854 			clear_flg = 1;
855 	}
856 	else if ( free_ent < maxmaxcode )
857 		free_ent++;
858 	if ( bits != n_bits ) {
859 		fprintf(stderr, "\nChange to %d bits\n", n_bits );
860 		bits = n_bits;
861 		col = 0;
862 	}
863 	fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' );
864 	}
865 	putc( '\n', stderr );
866 	exit( 0 );
867 }
868 
869 code_int sorttab[1<<BITS];	/* sorted pointers into htab */
870 
871 #define STACK_SIZE	15000
872 
873 dump_tab()	/* dump string table */
874 {
875 	int i, first, c, ent;
876 	int stack_top = STACK_SIZE;
877 
878 	if(do_decomp == 0) {	/* compressing */
879 	int flag = 1;
880 
881 	for(i=0; i<hsize; i++) {	/* build sort pointers */
882 		if((long)htabof(i) >= 0) {
883 			sorttab[codetabof(i)] = i;
884 		}
885 	}
886 	first = block_compress ? FIRST : 256;
887 	for(i = first; i < free_ent; i++) {
888 		fprintf(stderr, "%5d: \"", i);
889 		de_stack[--stack_top] = '\n';
890 		de_stack[--stack_top] = '"';
891 		stack_top = in_stack((htabof(sorttab[i])>>maxbits)&0xff,
892 	stack_top);
893 		for(ent=htabof(sorttab[i]) & ((1<<maxbits)-1);
894 			ent > 256;
895 			ent=htabof(sorttab[ent]) & ((1<<maxbits)-1)) {
896 			stack_top = in_stack(htabof(sorttab[ent]) >> maxbits,
897 						stack_top);
898 		}
899 		stack_top = in_stack(ent, stack_top);
900 		fwrite( &de_stack[stack_top], 1, STACK_SIZE-stack_top, stderr);
901 			stack_top = STACK_SIZE;
902 	}
903 	} else if(!debug) {	/* decompressing */
904 
905 	for ( i = 0; i < free_ent; i++ ) {
906 		ent = i;
907 		c = tab_suffixof(ent);
908 		if ( isascii(c) && isprint(c) )
909 		fprintf( stderr, "%5d: %5d/'%c'  \"",
910 				ent, tab_prefixof(ent), c );
911 		else
912 		fprintf( stderr, "%5d: %5d/\\%03o \"",
913 				ent, tab_prefixof(ent), c );
914 		de_stack[--stack_top] = '\n';
915 		de_stack[--stack_top] = '"';
916 		for ( ; ent != NULL;
917 			ent = (ent >= FIRST ? tab_prefixof(ent) : NULL) ) {
918 		stack_top = in_stack(tab_suffixof(ent), stack_top);
919 		}
920 		fwrite( &de_stack[stack_top], 1, STACK_SIZE - stack_top, stderr );
921 		stack_top = STACK_SIZE;
922 	}
923 	}
924 }
925 
926 int
927 in_stack(int c, int stack_top)
928 {
929 	if ( (isascii(c) && isprint(c) && c != '\\') || c == ' ' ) {
930 		de_stack[--stack_top] = c;
931 	} else {
932 		switch( c ) {
933 		case '\n': de_stack[--stack_top] = 'n'; break;
934 		case '\t': de_stack[--stack_top] = 't'; break;
935 		case '\b': de_stack[--stack_top] = 'b'; break;
936 		case '\f': de_stack[--stack_top] = 'f'; break;
937 		case '\r': de_stack[--stack_top] = 'r'; break;
938 		case '\\': de_stack[--stack_top] = '\\'; break;
939 		default:
940 	 	de_stack[--stack_top] = '0' + c % 8;
941 	 	de_stack[--stack_top] = '0' + (c / 8) % 8;
942 	 	de_stack[--stack_top] = '0' + c / 64;
943 	 	break;
944 		}
945 		de_stack[--stack_top] = '\\';
946 	}
947 	return stack_top;
948 }
949 #endif /* DEBUG */
950 
951 writeerr()
952 {
953 	perror(ofname);
954 	unlink(ofname);
955 	exit(1);
956 }
957 
958 copystat(ifname, ofname)
959 char *ifname, *ofname;
960 {
961 	int mode;
962 	time_t timep[2];
963 	struct stat statbuf;
964 
965 	fclose(stdout);
966 	if (stat(ifname, &statbuf)) {		/* Get stat on input file */
967 		perror(ifname);
968 		return;
969 	}
970 	if (!S_ISREG(statbuf.st_mode)) {
971 		if (quiet)
972 			fprintf(stderr, "%s: ", ifname);
973 		fprintf(stderr, " -- not a regular file: unchanged");
974 		exit_stat = 1;
975 	} else if (exit_stat == 2 && !force) {
976 		/* No compression: remove file.Z */
977 		if (!quiet)
978 			fprintf(stderr, " -- file unchanged");
979 	} else {			/* Successful Compression */
980 		exit_stat = 0;
981 		mode = statbuf.st_mode & 0777;
982 		if (chmod(ofname, mode))		/* Copy modes */
983 			perror(ofname);
984 		/* Copy ownership */
985 		chown(ofname, statbuf.st_uid, statbuf.st_gid);
986 		timep[0] = statbuf.st_atime;
987 		timep[1] = statbuf.st_mtime;
988 		/* Update last accessed and modified times */
989 		utime(ofname, timep);
990 //		if (unlink(ifname))	/* Remove input file */
991 //			perror(ifname);
992 		return;			/* success */
993 	}
994 
995 	/* Unsuccessful return -- one of the tests failed */
996 	if (unlink(ofname))
997 		perror(ofname);
998 }
999 
1000 /*
1001  * This routine returns 1 if we are running in the foreground and stderr
1002  * is a tty.
1003  */
1004 foreground()
1005 {
1006 	if(bgnd_flag)			/* background? */
1007 		return 0;
1008 	else				/* foreground */
1009 		return isatty(2);	/* and stderr is a tty */
1010 }
1011 
1012 void
1013 onintr(int x)
1014 {
1015 	unlink(ofname);
1016 	exit(1);
1017 }
1018 
1019 void
1020 oops(int x)		/* wild pointer -- assume bad input */
1021 {
1022 	if (do_decomp == 1)
1023 		fprintf(stderr, "uncompress: corrupt input\n");
1024 	unlink(ofname);
1025 	exit(1);
1026 }
1027 
1028 cl_block ()		/* table clear for block compress */
1029 {
1030 	long rat;
1031 
1032 	checkpoint = in_count + CHECK_GAP;
1033 #ifdef DEBUG
1034 	if ( debug ) {
1035 		fprintf ( stderr, "count: %ld, ratio: ", in_count );
1036 		prratio ( stderr, in_count, bytes_out );
1037 		fprintf ( stderr, "\n");
1038 	}
1039 #endif /* DEBUG */
1040 
1041 	if (in_count > 0x007fffff) {	/* shift will overflow */
1042 		rat = bytes_out >> 8;
1043 		if (rat == 0)		/* Don't divide by zero */
1044 			rat = 0x7fffffff;
1045 		else
1046 			rat = in_count / rat;
1047 	} else
1048 		rat = (in_count << 8) / bytes_out;	/* 8 fractional bits */
1049 	if (rat > ratio)
1050 		ratio = rat;
1051 	else {
1052 		ratio = 0;
1053 #ifdef DEBUG
1054 		if (verbose)
1055 			dump_tab();	/* dump string table */
1056 #endif
1057 		cl_hash((count_int)hsize);
1058 		free_ent = FIRST;
1059 		clear_flg = 1;
1060 		output((code_int)CLEAR);
1061 #ifdef DEBUG
1062 		if (debug)
1063 			fprintf(stderr, "clear\n");
1064 #endif /* DEBUG */
1065 	}
1066 }
1067 
1068 cl_hash(hsize)		/* reset code table */
1069 count_int hsize;
1070 {
1071 	count_int *htab_p = htab+hsize;
1072 	long i;
1073 	long m1 = -1;
1074 
1075 	i = hsize - 16;
1076  	do {				/* might use Sys V memset(3) here */
1077 		*(htab_p-16) = m1;
1078 		*(htab_p-15) = m1;
1079 		*(htab_p-14) = m1;
1080 		*(htab_p-13) = m1;
1081 		*(htab_p-12) = m1;
1082 		*(htab_p-11) = m1;
1083 		*(htab_p-10) = m1;
1084 		*(htab_p-9) = m1;
1085 		*(htab_p-8) = m1;
1086 		*(htab_p-7) = m1;
1087 		*(htab_p-6) = m1;
1088 		*(htab_p-5) = m1;
1089 		*(htab_p-4) = m1;
1090 		*(htab_p-3) = m1;
1091 		*(htab_p-2) = m1;
1092 		*(htab_p-1) = m1;
1093 		htab_p -= 16;
1094 	} while ((i -= 16) >= 0);
1095 	for ( i += 16; i > 0; i-- )
1096 		*--htab_p = m1;
1097 }
1098 
1099 prratio(stream, num, den)
1100 FILE *stream;
1101 long num, den;
1102 {
1103 	int q;				/* Doesn't need to be long */
1104 
1105 	if(num > 214748L)		/* 2147483647/10000 */
1106 		q = num / (den / 10000L);
1107 	else
1108 		q = 10000L * num / den;	/* Long calculations, though */
1109 	if (q < 0) {
1110 		putc('-', stream);
1111 		q = -q;
1112 	}
1113 	fprintf(stream, "%d.%02d%%", q / 100, q % 100);
1114 }
1115 
1116 version()
1117 {
1118 	fprintf(stderr, "%s\n", rcs_ident);
1119 	fprintf(stderr, "Options: ");
1120 #ifdef DEBUG
1121 	fprintf(stderr, "DEBUG, ");
1122 #endif
1123 #ifdef BSD4_2
1124 	fprintf(stderr, "BSD4_2, ");
1125 #endif
1126 	fprintf(stderr, "BITS = %d\n", BITS);
1127 }
1128 
1129 /*
1130  * The revision-history novel:
1131  *
1132  * $Header: compress.c,v 4.0 85/07/30 12:50:00 joe Release $
1133  * $Log:	compress.c,v $
1134  * Revision 4.0  85/07/30  12:50:00  joe
1135  * Removed ferror() calls in output routine on every output except first.
1136  * Prepared for release to the world.
1137  *
1138  * Revision 3.6  85/07/04  01:22:21  joe
1139  * Remove much wasted storage by overlaying hash table with the tables
1140  * used by decompress: tab_suffix[1<<BITS], stack[8000].  Updated USERMEM
1141  * computations.  Fixed dump_tab() DEBUG routine.
1142  *
1143  * Revision 3.5  85/06/30  20:47:21  jaw
1144  * Change hash function to use exclusive-or.  Rip out hash cache.  These
1145  * speedups render the megamemory version defunct, for now.  Make decoder
1146  * stack global.  Parts of the RCS trunks 2.7, 2.6, and 2.1 no longer apply.
1147  *
1148  * Revision 3.4  85/06/27  12:00:00  ken
1149  * Get rid of all floating-point calculations by doing all compression ratio
1150  * calculations in fixed point.
1151  *
1152  * Revision 3.3  85/06/24  21:53:24  joe
1153  * Incorporate portability suggestion for M_XENIX.  Got rid of text on #else
1154  * and #endif lines.  Cleaned up #ifdefs for vax and interdata.
1155  *
1156  * Revision 3.2  85/06/06  21:53:24  jaw
1157  * Incorporate portability suggestions for Z8000, IBM PC/XT from mailing list.
1158  * Default to "quiet" output (no compression statistics).
1159  *
1160  * Revision 3.1  85/05/12  18:56:13  jaw
1161  * Integrate decompress() stack speedups (from early pointer mods by McKie).
1162  * Repair multi-file USERMEM gaffe.  Unify 'force' flags to mimic semantics
1163  * of SVR2 'pack'.  Streamline block-compress table clear logic.  Increase
1164  * output byte count by magic number size.
1165  *
1166  * Revision 3.0	84/11/27  11:50:00  petsd!joe
1167  * Set HSIZE depending on BITS.  Set BITS depending on USERMEM.  Unrolled
1168  * loops in clear routines.  Added "-C" flag for 2.0 compatibility.  Used
1169  * unsigned compares on Perkin-Elmer.  Fixed foreground check.
1170  *
1171  * Revision 2.7	84/11/16  19:35:39  ames!jaw
1172  * Cache common hash codes based on input statistics; this improves
1173  * performance for low-density raster images.  Pass on #ifdef bundle
1174  * from Turkowski.
1175  *
1176  * Revision 2.6	84/11/05  19:18:21  ames!jaw
1177  * Vary size of hash tables to reduce time for small files.
1178  * Tune PDP-11 hash function.
1179  *
1180  * Revision 2.5	84/10/30  20:15:14  ames!jaw
1181  * Junk chaining; replace with the simpler (and, on the VAX, faster)
1182  * double hashing, discussed within.  Make block compression standard.
1183  *
1184  * Revision 2.4	84/10/16  11:11:11  ames!jaw
1185  * Introduce adaptive reset for block compression, to boost the rate
1186  * another several percent.  (See mailing list notes.)
1187  *
1188  * Revision 2.3	84/09/22  22:00:00  petsd!joe
1189  * Implemented "-B" block compress.  Implemented REVERSE sorting of tab_next.
1190  * Bug fix for last bits.  Changed fwrite to putchar loop everywhere.
1191  *
1192  * Revision 2.2	84/09/18  14:12:21  ames!jaw
1193  * Fold in news changes, small machine typedef from thomas,
1194  * #ifdef interdata from joe.
1195  *
1196  * Revision 2.1	84/09/10  12:34:56  ames!jaw
1197  * Configured fast table lookup for 32-bit machines.
1198  * This cuts user time in half for b <= FBITS, and is useful for news batching
1199  * from VAX to PDP sites.  Also sped up decompress() [fwrite->putc] and
1200  * added signal catcher [plus beef in writeerr()] to delete effluvia.
1201  *
1202  * Revision 2.0	84/08/28  22:00:00  petsd!joe
1203  * Add check for foreground before prompting user.  Insert maxbits into
1204  * compressed file.  Force file being uncompressed to end with ".Z".
1205  * Added "-c" flag and "zcat".  Prepared for release.
1206  *
1207  * Revision 1.10  84/08/24  18:28:00  turtlevax!ken
1208  * Will only compress regular files (no directories), added a magic number
1209  * header (plus an undocumented -n flag to handle old files without headers),
1210  * added -f flag to force overwriting of possibly existing destination file,
1211  * otherwise the user is prompted for a response.  Will tack on a .Z to a
1212  * filename if it doesn't have one when decompressing.  Will only replace
1213  * file if it was compressed.
1214  *
1215  * Revision 1.9  84/08/16  17:28:00  turtlevax!ken
1216  * Removed scanargs(), getopt(), added .Z extension and unlimited number of
1217  * filenames to compress.  Flags may be clustered (-Ddvb12) or separated
1218  * (-D -d -v -b 12), or combination thereof.  Modes and other status is
1219  * copied with copystat().  -O bug for 4.2 seems to have disappeared with
1220  * 1.8.
1221  *
1222  * Revision 1.8  84/08/09  23:15:00  joe
1223  * Made it compatible with vax version, installed jim's fixes/enhancements
1224  *
1225  * Revision 1.6  84/08/01  22:08:00  joe
1226  * Sped up algorithm significantly by sorting the compress chain.
1227  *
1228  * Revision 1.5  84/07/13  13:11:00  srd
1229  * Added C version of vax asm routines.  Changed structure to arrays to
1230  * save much memory.  Do unsigned compares where possible (faster on
1231  * Perkin-Elmer)
1232  *
1233  * Revision 1.4  84/07/05  03:11:11  thomas
1234  * Clean up the code a little and lint it.  (Lint complains about all
1235  * the regs used in the asm, but I'm not going to "fix" this.)
1236  *
1237  * Revision 1.3  84/07/05  02:06:54  thomas
1238  * Minor fixes.
1239  *
1240  * Revision 1.2  84/07/05  00:27:27  thomas
1241  * Add variable bit length output.
1242  */
1243