1*16dce513Schristos /* fitblk.c: example of fitting compressed output to a specified size
2*16dce513Schristos Not copyrighted -- provided to the public domain
3*16dce513Schristos Version 1.1 25 November 2004 Mark Adler */
4*16dce513Schristos
5*16dce513Schristos /* Version history:
6*16dce513Schristos 1.0 24 Nov 2004 First version
7*16dce513Schristos 1.1 25 Nov 2004 Change deflateInit2() to deflateInit()
8*16dce513Schristos Use fixed-size, stack-allocated raw buffers
9*16dce513Schristos Simplify code moving compression to subroutines
10*16dce513Schristos Use assert() for internal errors
11*16dce513Schristos Add detailed description of approach
12*16dce513Schristos */
13*16dce513Schristos
14*16dce513Schristos /* Approach to just fitting a requested compressed size:
15*16dce513Schristos
16*16dce513Schristos fitblk performs three compression passes on a portion of the input
17*16dce513Schristos data in order to determine how much of that input will compress to
18*16dce513Schristos nearly the requested output block size. The first pass generates
19*16dce513Schristos enough deflate blocks to produce output to fill the requested
20*16dce513Schristos output size plus a specfied excess amount (see the EXCESS define
21*16dce513Schristos below). The last deflate block may go quite a bit past that, but
22*16dce513Schristos is discarded. The second pass decompresses and recompresses just
23*16dce513Schristos the compressed data that fit in the requested plus excess sized
24*16dce513Schristos buffer. The deflate process is terminated after that amount of
25*16dce513Schristos input, which is less than the amount consumed on the first pass.
26*16dce513Schristos The last deflate block of the result will be of a comparable size
27*16dce513Schristos to the final product, so that the header for that deflate block and
28*16dce513Schristos the compression ratio for that block will be about the same as in
29*16dce513Schristos the final product. The third compression pass decompresses the
30*16dce513Schristos result of the second step, but only the compressed data up to the
31*16dce513Schristos requested size minus an amount to allow the compressed stream to
32*16dce513Schristos complete (see the MARGIN define below). That will result in a
33*16dce513Schristos final compressed stream whose length is less than or equal to the
34*16dce513Schristos requested size. Assuming sufficient input and a requested size
35*16dce513Schristos greater than a few hundred bytes, the shortfall will typically be
36*16dce513Schristos less than ten bytes.
37*16dce513Schristos
38*16dce513Schristos If the input is short enough that the first compression completes
39*16dce513Schristos before filling the requested output size, then that compressed
40*16dce513Schristos stream is return with no recompression.
41*16dce513Schristos
42*16dce513Schristos EXCESS is chosen to be just greater than the shortfall seen in a
43*16dce513Schristos two pass approach similar to the above. That shortfall is due to
44*16dce513Schristos the last deflate block compressing more efficiently with a smaller
45*16dce513Schristos header on the second pass. EXCESS is set to be large enough so
46*16dce513Schristos that there is enough uncompressed data for the second pass to fill
47*16dce513Schristos out the requested size, and small enough so that the final deflate
48*16dce513Schristos block of the second pass will be close in size to the final deflate
49*16dce513Schristos block of the third and final pass. MARGIN is chosen to be just
50*16dce513Schristos large enough to assure that the final compression has enough room
51*16dce513Schristos to complete in all cases.
52*16dce513Schristos */
53*16dce513Schristos
54*16dce513Schristos #include <stdio.h>
55*16dce513Schristos #include <stdlib.h>
56*16dce513Schristos #include <assert.h>
57*16dce513Schristos #include "zlib.h"
58*16dce513Schristos
59*16dce513Schristos #define local static
60*16dce513Schristos
61*16dce513Schristos /* print nastygram and leave */
quit(char * why)62*16dce513Schristos local void quit(char *why)
63*16dce513Schristos {
64*16dce513Schristos fprintf(stderr, "fitblk abort: %s\n", why);
65*16dce513Schristos exit(1);
66*16dce513Schristos }
67*16dce513Schristos
68*16dce513Schristos #define RAWLEN 4096 /* intermediate uncompressed buffer size */
69*16dce513Schristos
70*16dce513Schristos /* compress from file to def until provided buffer is full or end of
71*16dce513Schristos input reached; return last deflate() return value, or Z_ERRNO if
72*16dce513Schristos there was read error on the file */
partcompress(FILE * in,z_streamp def)73*16dce513Schristos local int partcompress(FILE *in, z_streamp def)
74*16dce513Schristos {
75*16dce513Schristos int ret, flush;
76*16dce513Schristos unsigned char raw[RAWLEN];
77*16dce513Schristos
78*16dce513Schristos flush = Z_NO_FLUSH;
79*16dce513Schristos do {
80*16dce513Schristos def->avail_in = fread(raw, 1, RAWLEN, in);
81*16dce513Schristos if (ferror(in))
82*16dce513Schristos return Z_ERRNO;
83*16dce513Schristos def->next_in = raw;
84*16dce513Schristos if (feof(in))
85*16dce513Schristos flush = Z_FINISH;
86*16dce513Schristos ret = deflate(def, flush);
87*16dce513Schristos assert(ret != Z_STREAM_ERROR);
88*16dce513Schristos } while (def->avail_out != 0 && flush == Z_NO_FLUSH);
89*16dce513Schristos return ret;
90*16dce513Schristos }
91*16dce513Schristos
92*16dce513Schristos /* recompress from inf's input to def's output; the input for inf and
93*16dce513Schristos the output for def are set in those structures before calling;
94*16dce513Schristos return last deflate() return value, or Z_MEM_ERROR if inflate()
95*16dce513Schristos was not able to allocate enough memory when it needed to */
recompress(z_streamp inf,z_streamp def)96*16dce513Schristos local int recompress(z_streamp inf, z_streamp def)
97*16dce513Schristos {
98*16dce513Schristos int ret, flush;
99*16dce513Schristos unsigned char raw[RAWLEN];
100*16dce513Schristos
101*16dce513Schristos flush = Z_NO_FLUSH;
102*16dce513Schristos do {
103*16dce513Schristos /* decompress */
104*16dce513Schristos inf->avail_out = RAWLEN;
105*16dce513Schristos inf->next_out = raw;
106*16dce513Schristos ret = inflate(inf, Z_NO_FLUSH);
107*16dce513Schristos assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR &&
108*16dce513Schristos ret != Z_NEED_DICT);
109*16dce513Schristos if (ret == Z_MEM_ERROR)
110*16dce513Schristos return ret;
111*16dce513Schristos
112*16dce513Schristos /* compress what was decompresed until done or no room */
113*16dce513Schristos def->avail_in = RAWLEN - inf->avail_out;
114*16dce513Schristos def->next_in = raw;
115*16dce513Schristos if (inf->avail_out != 0)
116*16dce513Schristos flush = Z_FINISH;
117*16dce513Schristos ret = deflate(def, flush);
118*16dce513Schristos assert(ret != Z_STREAM_ERROR);
119*16dce513Schristos } while (ret != Z_STREAM_END && def->avail_out != 0);
120*16dce513Schristos return ret;
121*16dce513Schristos }
122*16dce513Schristos
123*16dce513Schristos #define EXCESS 256 /* empirically determined stream overage */
124*16dce513Schristos #define MARGIN 8 /* amount to back off for completion */
125*16dce513Schristos
126*16dce513Schristos /* compress from stdin to fixed-size block on stdout */
main(int argc,char ** argv)127*16dce513Schristos int main(int argc, char **argv)
128*16dce513Schristos {
129*16dce513Schristos int ret; /* return code */
130*16dce513Schristos unsigned size; /* requested fixed output block size */
131*16dce513Schristos unsigned have; /* bytes written by deflate() call */
132*16dce513Schristos unsigned char *blk; /* intermediate and final stream */
133*16dce513Schristos unsigned char *tmp; /* close to desired size stream */
134*16dce513Schristos z_stream def, inf; /* zlib deflate and inflate states */
135*16dce513Schristos
136*16dce513Schristos /* get requested output size */
137*16dce513Schristos if (argc != 2)
138*16dce513Schristos quit("need one argument: size of output block");
139*16dce513Schristos ret = strtol(argv[1], argv + 1, 10);
140*16dce513Schristos if (argv[1][0] != 0)
141*16dce513Schristos quit("argument must be a number");
142*16dce513Schristos if (ret < 8) /* 8 is minimum zlib stream size */
143*16dce513Schristos quit("need positive size of 8 or greater");
144*16dce513Schristos size = (unsigned)ret;
145*16dce513Schristos
146*16dce513Schristos /* allocate memory for buffers and compression engine */
147*16dce513Schristos blk = malloc(size + EXCESS);
148*16dce513Schristos def.zalloc = Z_NULL;
149*16dce513Schristos def.zfree = Z_NULL;
150*16dce513Schristos def.opaque = Z_NULL;
151*16dce513Schristos ret = deflateInit(&def, Z_DEFAULT_COMPRESSION);
152*16dce513Schristos if (ret != Z_OK || blk == NULL)
153*16dce513Schristos quit("out of memory");
154*16dce513Schristos
155*16dce513Schristos /* compress from stdin until output full, or no more input */
156*16dce513Schristos def.avail_out = size + EXCESS;
157*16dce513Schristos def.next_out = blk;
158*16dce513Schristos ret = partcompress(stdin, &def);
159*16dce513Schristos if (ret == Z_ERRNO)
160*16dce513Schristos quit("error reading input");
161*16dce513Schristos
162*16dce513Schristos /* if it all fit, then size was undersubscribed -- done! */
163*16dce513Schristos if (ret == Z_STREAM_END && def.avail_out >= EXCESS) {
164*16dce513Schristos /* write block to stdout */
165*16dce513Schristos have = size + EXCESS - def.avail_out;
166*16dce513Schristos if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
167*16dce513Schristos quit("error writing output");
168*16dce513Schristos
169*16dce513Schristos /* clean up and print results to stderr */
170*16dce513Schristos ret = deflateEnd(&def);
171*16dce513Schristos assert(ret != Z_STREAM_ERROR);
172*16dce513Schristos free(blk);
173*16dce513Schristos fprintf(stderr,
174*16dce513Schristos "%u bytes unused out of %u requested (all input)\n",
175*16dce513Schristos size - have, size);
176*16dce513Schristos return 0;
177*16dce513Schristos }
178*16dce513Schristos
179*16dce513Schristos /* it didn't all fit -- set up for recompression */
180*16dce513Schristos inf.zalloc = Z_NULL;
181*16dce513Schristos inf.zfree = Z_NULL;
182*16dce513Schristos inf.opaque = Z_NULL;
183*16dce513Schristos inf.avail_in = 0;
184*16dce513Schristos inf.next_in = Z_NULL;
185*16dce513Schristos ret = inflateInit(&inf);
186*16dce513Schristos tmp = malloc(size + EXCESS);
187*16dce513Schristos if (ret != Z_OK || tmp == NULL)
188*16dce513Schristos quit("out of memory");
189*16dce513Schristos ret = deflateReset(&def);
190*16dce513Schristos assert(ret != Z_STREAM_ERROR);
191*16dce513Schristos
192*16dce513Schristos /* do first recompression close to the right amount */
193*16dce513Schristos inf.avail_in = size + EXCESS;
194*16dce513Schristos inf.next_in = blk;
195*16dce513Schristos def.avail_out = size + EXCESS;
196*16dce513Schristos def.next_out = tmp;
197*16dce513Schristos ret = recompress(&inf, &def);
198*16dce513Schristos if (ret == Z_MEM_ERROR)
199*16dce513Schristos quit("out of memory");
200*16dce513Schristos
201*16dce513Schristos /* set up for next reocmpression */
202*16dce513Schristos ret = inflateReset(&inf);
203*16dce513Schristos assert(ret != Z_STREAM_ERROR);
204*16dce513Schristos ret = deflateReset(&def);
205*16dce513Schristos assert(ret != Z_STREAM_ERROR);
206*16dce513Schristos
207*16dce513Schristos /* do second and final recompression (third compression) */
208*16dce513Schristos inf.avail_in = size - MARGIN; /* assure stream will complete */
209*16dce513Schristos inf.next_in = tmp;
210*16dce513Schristos def.avail_out = size;
211*16dce513Schristos def.next_out = blk;
212*16dce513Schristos ret = recompress(&inf, &def);
213*16dce513Schristos if (ret == Z_MEM_ERROR)
214*16dce513Schristos quit("out of memory");
215*16dce513Schristos assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */
216*16dce513Schristos
217*16dce513Schristos /* done -- write block to stdout */
218*16dce513Schristos have = size - def.avail_out;
219*16dce513Schristos if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
220*16dce513Schristos quit("error writing output");
221*16dce513Schristos
222*16dce513Schristos /* clean up and print results to stderr */
223*16dce513Schristos free(tmp);
224*16dce513Schristos ret = inflateEnd(&inf);
225*16dce513Schristos assert(ret != Z_STREAM_ERROR);
226*16dce513Schristos ret = deflateEnd(&def);
227*16dce513Schristos assert(ret != Z_STREAM_ERROR);
228*16dce513Schristos free(blk);
229*16dce513Schristos fprintf(stderr,
230*16dce513Schristos "%u bytes unused out of %u requested (%lu input)\n",
231*16dce513Schristos size - have, size, def.total_in);
232*16dce513Schristos return 0;
233*16dce513Schristos }
234