1*3117ece4Schristos /* fitblk.c: example of fitting compressed output to a specified size 2*3117ece4Schristos Not copyrighted -- provided to the public domain 3*3117ece4Schristos Version 1.1 25 November 2004 Mark Adler */ 4*3117ece4Schristos 5*3117ece4Schristos /* Version history: 6*3117ece4Schristos 1.0 24 Nov 2004 First version 7*3117ece4Schristos 1.1 25 Nov 2004 Change deflateInit2() to deflateInit() 8*3117ece4Schristos Use fixed-size, stack-allocated raw buffers 9*3117ece4Schristos Simplify code moving compression to subroutines 10*3117ece4Schristos Use assert() for internal errors 11*3117ece4Schristos Add detailed description of approach 12*3117ece4Schristos */ 13*3117ece4Schristos 14*3117ece4Schristos /* Approach to just fitting a requested compressed size: 15*3117ece4Schristos 16*3117ece4Schristos fitblk performs three compression passes on a portion of the input 17*3117ece4Schristos data in order to determine how much of that input will compress to 18*3117ece4Schristos nearly the requested output block size. The first pass generates 19*3117ece4Schristos enough deflate blocks to produce output to fill the requested 20*3117ece4Schristos output size plus a specified excess amount (see the EXCESS define 21*3117ece4Schristos below). The last deflate block may go quite a bit past that, but 22*3117ece4Schristos is discarded. The second pass decompresses and recompresses just 23*3117ece4Schristos the compressed data that fit in the requested plus excess sized 24*3117ece4Schristos buffer. The deflate process is terminated after that amount of 25*3117ece4Schristos input, which is less than the amount consumed on the first pass. 26*3117ece4Schristos The last deflate block of the result will be of a comparable size 27*3117ece4Schristos to the final product, so that the header for that deflate block and 28*3117ece4Schristos the compression ratio for that block will be about the same as in 29*3117ece4Schristos the final product. The third compression pass decompresses the 30*3117ece4Schristos result of the second step, but only the compressed data up to the 31*3117ece4Schristos requested size minus an amount to allow the compressed stream to 32*3117ece4Schristos complete (see the MARGIN define below). That will result in a 33*3117ece4Schristos final compressed stream whose length is less than or equal to the 34*3117ece4Schristos requested size. Assuming sufficient input and a requested size 35*3117ece4Schristos greater than a few hundred bytes, the shortfall will typically be 36*3117ece4Schristos less than ten bytes. 37*3117ece4Schristos 38*3117ece4Schristos If the input is short enough that the first compression completes 39*3117ece4Schristos before filling the requested output size, then that compressed 40*3117ece4Schristos stream is return with no recompression. 41*3117ece4Schristos 42*3117ece4Schristos EXCESS is chosen to be just greater than the shortfall seen in a 43*3117ece4Schristos two pass approach similar to the above. That shortfall is due to 44*3117ece4Schristos the last deflate block compressing more efficiently with a smaller 45*3117ece4Schristos header on the second pass. EXCESS is set to be large enough so 46*3117ece4Schristos that there is enough uncompressed data for the second pass to fill 47*3117ece4Schristos out the requested size, and small enough so that the final deflate 48*3117ece4Schristos block of the second pass will be close in size to the final deflate 49*3117ece4Schristos block of the third and final pass. MARGIN is chosen to be just 50*3117ece4Schristos large enough to assure that the final compression has enough room 51*3117ece4Schristos to complete in all cases. 52*3117ece4Schristos */ 53*3117ece4Schristos 54*3117ece4Schristos #include <stdio.h> 55*3117ece4Schristos #include <stdlib.h> 56*3117ece4Schristos #include <assert.h> 57*3117ece4Schristos #include "zlib.h" 58*3117ece4Schristos 59*3117ece4Schristos #define local static 60*3117ece4Schristos 61*3117ece4Schristos /* print nastygram and leave */ 62*3117ece4Schristos local void quit(char *why) 63*3117ece4Schristos { 64*3117ece4Schristos fprintf(stderr, "fitblk abort: %s\n", why); 65*3117ece4Schristos exit(1); 66*3117ece4Schristos } 67*3117ece4Schristos 68*3117ece4Schristos #define RAWLEN 4096 /* intermediate uncompressed buffer size */ 69*3117ece4Schristos 70*3117ece4Schristos /* compress from file to def until provided buffer is full or end of 71*3117ece4Schristos input reached; return last deflate() return value, or Z_ERRNO if 72*3117ece4Schristos there was read error on the file */ 73*3117ece4Schristos local int partcompress(FILE *in, z_streamp def) 74*3117ece4Schristos { 75*3117ece4Schristos int ret, flush; 76*3117ece4Schristos unsigned char raw[RAWLEN]; 77*3117ece4Schristos 78*3117ece4Schristos flush = Z_NO_FLUSH; 79*3117ece4Schristos do { 80*3117ece4Schristos def->avail_in = fread(raw, 1, RAWLEN, in); 81*3117ece4Schristos if (ferror(in)) 82*3117ece4Schristos return Z_ERRNO; 83*3117ece4Schristos def->next_in = raw; 84*3117ece4Schristos if (feof(in)) 85*3117ece4Schristos flush = Z_FINISH; 86*3117ece4Schristos ret = deflate(def, flush); 87*3117ece4Schristos assert(ret != Z_STREAM_ERROR); 88*3117ece4Schristos } while (def->avail_out != 0 && flush == Z_NO_FLUSH); 89*3117ece4Schristos return ret; 90*3117ece4Schristos } 91*3117ece4Schristos 92*3117ece4Schristos /* recompress from inf's input to def's output; the input for inf and 93*3117ece4Schristos the output for def are set in those structures before calling; 94*3117ece4Schristos return last deflate() return value, or Z_MEM_ERROR if inflate() 95*3117ece4Schristos was not able to allocate enough memory when it needed to */ 96*3117ece4Schristos local int recompress(z_streamp inf, z_streamp def) 97*3117ece4Schristos { 98*3117ece4Schristos int ret, flush; 99*3117ece4Schristos unsigned char raw[RAWLEN]; 100*3117ece4Schristos 101*3117ece4Schristos flush = Z_NO_FLUSH; 102*3117ece4Schristos do { 103*3117ece4Schristos /* decompress */ 104*3117ece4Schristos inf->avail_out = RAWLEN; 105*3117ece4Schristos inf->next_out = raw; 106*3117ece4Schristos ret = inflate(inf, Z_NO_FLUSH); 107*3117ece4Schristos assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && 108*3117ece4Schristos ret != Z_NEED_DICT); 109*3117ece4Schristos if (ret == Z_MEM_ERROR) 110*3117ece4Schristos return ret; 111*3117ece4Schristos 112*3117ece4Schristos /* compress what was decompressed until done or no room */ 113*3117ece4Schristos def->avail_in = RAWLEN - inf->avail_out; 114*3117ece4Schristos def->next_in = raw; 115*3117ece4Schristos if (inf->avail_out != 0) 116*3117ece4Schristos flush = Z_FINISH; 117*3117ece4Schristos ret = deflate(def, flush); 118*3117ece4Schristos assert(ret != Z_STREAM_ERROR); 119*3117ece4Schristos } while (ret != Z_STREAM_END && def->avail_out != 0); 120*3117ece4Schristos return ret; 121*3117ece4Schristos } 122*3117ece4Schristos 123*3117ece4Schristos #define EXCESS 256 /* empirically determined stream overage */ 124*3117ece4Schristos #define MARGIN 8 /* amount to back off for completion */ 125*3117ece4Schristos 126*3117ece4Schristos /* compress from stdin to fixed-size block on stdout */ 127*3117ece4Schristos int main(int argc, char **argv) 128*3117ece4Schristos { 129*3117ece4Schristos int ret; /* return code */ 130*3117ece4Schristos unsigned size; /* requested fixed output block size */ 131*3117ece4Schristos unsigned have; /* bytes written by deflate() call */ 132*3117ece4Schristos unsigned char *blk; /* intermediate and final stream */ 133*3117ece4Schristos unsigned char *tmp; /* close to desired size stream */ 134*3117ece4Schristos z_stream def, inf; /* zlib deflate and inflate states */ 135*3117ece4Schristos 136*3117ece4Schristos /* get requested output size */ 137*3117ece4Schristos if (argc != 2) 138*3117ece4Schristos quit("need one argument: size of output block"); 139*3117ece4Schristos ret = strtol(argv[1], argv + 1, 10); 140*3117ece4Schristos if (argv[1][0] != 0) 141*3117ece4Schristos quit("argument must be a number"); 142*3117ece4Schristos if (ret < 8) /* 8 is minimum zlib stream size */ 143*3117ece4Schristos quit("need positive size of 8 or greater"); 144*3117ece4Schristos size = (unsigned)ret; 145*3117ece4Schristos 146*3117ece4Schristos /* allocate memory for buffers and compression engine */ 147*3117ece4Schristos blk = malloc(size + EXCESS); 148*3117ece4Schristos def.zalloc = Z_NULL; 149*3117ece4Schristos def.zfree = Z_NULL; 150*3117ece4Schristos def.opaque = Z_NULL; 151*3117ece4Schristos ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); 152*3117ece4Schristos if (ret != Z_OK || blk == NULL) 153*3117ece4Schristos quit("out of memory"); 154*3117ece4Schristos 155*3117ece4Schristos /* compress from stdin until output full, or no more input */ 156*3117ece4Schristos def.avail_out = size + EXCESS; 157*3117ece4Schristos def.next_out = blk; 158*3117ece4Schristos ret = partcompress(stdin, &def); 159*3117ece4Schristos if (ret == Z_ERRNO) 160*3117ece4Schristos quit("error reading input"); 161*3117ece4Schristos 162*3117ece4Schristos /* if it all fit, then size was undersubscribed -- done! */ 163*3117ece4Schristos if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { 164*3117ece4Schristos /* write block to stdout */ 165*3117ece4Schristos have = size + EXCESS - def.avail_out; 166*3117ece4Schristos if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) 167*3117ece4Schristos quit("error writing output"); 168*3117ece4Schristos 169*3117ece4Schristos /* clean up and print results to stderr */ 170*3117ece4Schristos ret = deflateEnd(&def); 171*3117ece4Schristos assert(ret != Z_STREAM_ERROR); 172*3117ece4Schristos free(blk); 173*3117ece4Schristos fprintf(stderr, 174*3117ece4Schristos "%u bytes unused out of %u requested (all input)\n", 175*3117ece4Schristos size - have, size); 176*3117ece4Schristos return 0; 177*3117ece4Schristos } 178*3117ece4Schristos 179*3117ece4Schristos /* it didn't all fit -- set up for recompression */ 180*3117ece4Schristos inf.zalloc = Z_NULL; 181*3117ece4Schristos inf.zfree = Z_NULL; 182*3117ece4Schristos inf.opaque = Z_NULL; 183*3117ece4Schristos inf.avail_in = 0; 184*3117ece4Schristos inf.next_in = Z_NULL; 185*3117ece4Schristos ret = inflateInit(&inf); 186*3117ece4Schristos tmp = malloc(size + EXCESS); 187*3117ece4Schristos if (ret != Z_OK || tmp == NULL) 188*3117ece4Schristos quit("out of memory"); 189*3117ece4Schristos ret = deflateReset(&def); 190*3117ece4Schristos assert(ret != Z_STREAM_ERROR); 191*3117ece4Schristos 192*3117ece4Schristos /* do first recompression close to the right amount */ 193*3117ece4Schristos inf.avail_in = size + EXCESS; 194*3117ece4Schristos inf.next_in = blk; 195*3117ece4Schristos def.avail_out = size + EXCESS; 196*3117ece4Schristos def.next_out = tmp; 197*3117ece4Schristos ret = recompress(&inf, &def); 198*3117ece4Schristos if (ret == Z_MEM_ERROR) 199*3117ece4Schristos quit("out of memory"); 200*3117ece4Schristos 201*3117ece4Schristos /* set up for next recompression */ 202*3117ece4Schristos ret = inflateReset(&inf); 203*3117ece4Schristos assert(ret != Z_STREAM_ERROR); 204*3117ece4Schristos ret = deflateReset(&def); 205*3117ece4Schristos assert(ret != Z_STREAM_ERROR); 206*3117ece4Schristos 207*3117ece4Schristos /* do second and final recompression (third compression) */ 208*3117ece4Schristos inf.avail_in = size - MARGIN; /* assure stream will complete */ 209*3117ece4Schristos inf.next_in = tmp; 210*3117ece4Schristos def.avail_out = size; 211*3117ece4Schristos def.next_out = blk; 212*3117ece4Schristos ret = recompress(&inf, &def); 213*3117ece4Schristos if (ret == Z_MEM_ERROR) 214*3117ece4Schristos quit("out of memory"); 215*3117ece4Schristos assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */ 216*3117ece4Schristos 217*3117ece4Schristos /* done -- write block to stdout */ 218*3117ece4Schristos have = size - def.avail_out; 219*3117ece4Schristos if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) 220*3117ece4Schristos quit("error writing output"); 221*3117ece4Schristos 222*3117ece4Schristos /* clean up and print results to stderr */ 223*3117ece4Schristos free(tmp); 224*3117ece4Schristos ret = inflateEnd(&inf); 225*3117ece4Schristos assert(ret != Z_STREAM_ERROR); 226*3117ece4Schristos ret = deflateEnd(&def); 227*3117ece4Schristos assert(ret != Z_STREAM_ERROR); 228*3117ece4Schristos free(blk); 229*3117ece4Schristos fprintf(stderr, 230*3117ece4Schristos "%u bytes unused out of %u requested (%lu input)\n", 231*3117ece4Schristos size - have, size, def.total_in); 232*3117ece4Schristos return 0; 233*3117ece4Schristos } 234