1*3117ece4Schristos /* fitblk.c contains minimal changes required to be compiled with zlibWrapper: 2*3117ece4Schristos * - #include "zlib.h" was changed to #include "zstd_zlibwrapper.h" 3*3117ece4Schristos * - writing block to stdout was disabled */ 4*3117ece4Schristos 5*3117ece4Schristos /* fitblk.c: example of fitting compressed output to a specified size 6*3117ece4Schristos Not copyrighted -- provided to the public domain 7*3117ece4Schristos Version 1.1 25 November 2004 Mark Adler */ 8*3117ece4Schristos 9*3117ece4Schristos /* Version history: 10*3117ece4Schristos 1.0 24 Nov 2004 First version 11*3117ece4Schristos 1.1 25 Nov 2004 Change deflateInit2() to deflateInit() 12*3117ece4Schristos Use fixed-size, stack-allocated raw buffers 13*3117ece4Schristos Simplify code moving compression to subroutines 14*3117ece4Schristos Use assert() for internal errors 15*3117ece4Schristos Add detailed description of approach 16*3117ece4Schristos */ 17*3117ece4Schristos 18*3117ece4Schristos /* Approach to just fitting a requested compressed size: 19*3117ece4Schristos 20*3117ece4Schristos fitblk performs three compression passes on a portion of the input 21*3117ece4Schristos data in order to determine how much of that input will compress to 22*3117ece4Schristos nearly the requested output block size. The first pass generates 23*3117ece4Schristos enough deflate blocks to produce output to fill the requested 24*3117ece4Schristos output size plus a specified excess amount (see the EXCESS define 25*3117ece4Schristos below). The last deflate block may go quite a bit past that, but 26*3117ece4Schristos is discarded. The second pass decompresses and recompresses just 27*3117ece4Schristos the compressed data that fit in the requested plus excess sized 28*3117ece4Schristos buffer. The deflate process is terminated after that amount of 29*3117ece4Schristos input, which is less than the amount consumed on the first pass. 30*3117ece4Schristos The last deflate block of the result will be of a comparable size 31*3117ece4Schristos to the final product, so that the header for that deflate block and 32*3117ece4Schristos the compression ratio for that block will be about the same as in 33*3117ece4Schristos the final product. The third compression pass decompresses the 34*3117ece4Schristos result of the second step, but only the compressed data up to the 35*3117ece4Schristos requested size minus an amount to allow the compressed stream to 36*3117ece4Schristos complete (see the MARGIN define below). That will result in a 37*3117ece4Schristos final compressed stream whose length is less than or equal to the 38*3117ece4Schristos requested size. Assuming sufficient input and a requested size 39*3117ece4Schristos greater than a few hundred bytes, the shortfall will typically be 40*3117ece4Schristos less than ten bytes. 41*3117ece4Schristos 42*3117ece4Schristos If the input is short enough that the first compression completes 43*3117ece4Schristos before filling the requested output size, then that compressed 44*3117ece4Schristos stream is return with no recompression. 45*3117ece4Schristos 46*3117ece4Schristos EXCESS is chosen to be just greater than the shortfall seen in a 47*3117ece4Schristos two pass approach similar to the above. That shortfall is due to 48*3117ece4Schristos the last deflate block compressing more efficiently with a smaller 49*3117ece4Schristos header on the second pass. EXCESS is set to be large enough so 50*3117ece4Schristos that there is enough uncompressed data for the second pass to fill 51*3117ece4Schristos out the requested size, and small enough so that the final deflate 52*3117ece4Schristos block of the second pass will be close in size to the final deflate 53*3117ece4Schristos block of the third and final pass. MARGIN is chosen to be just 54*3117ece4Schristos large enough to assure that the final compression has enough room 55*3117ece4Schristos to complete in all cases. 56*3117ece4Schristos */ 57*3117ece4Schristos 58*3117ece4Schristos #include <stdio.h> 59*3117ece4Schristos #include <stdlib.h> 60*3117ece4Schristos #include <assert.h> 61*3117ece4Schristos #include "zstd_zlibwrapper.h" 62*3117ece4Schristos 63*3117ece4Schristos #define LOG_FITBLK(...) /*printf(__VA_ARGS__)*/ 64*3117ece4Schristos #define local static 65*3117ece4Schristos 66*3117ece4Schristos /* print nastygram and leave */ 67*3117ece4Schristos local void quit(char *why) 68*3117ece4Schristos { 69*3117ece4Schristos fprintf(stderr, "fitblk abort: %s\n", why); 70*3117ece4Schristos exit(1); 71*3117ece4Schristos } 72*3117ece4Schristos 73*3117ece4Schristos #define RAWLEN 4096 /* intermediate uncompressed buffer size */ 74*3117ece4Schristos 75*3117ece4Schristos /* compress from file to def until provided buffer is full or end of 76*3117ece4Schristos input reached; return last deflate() return value, or Z_ERRNO if 77*3117ece4Schristos there was read error on the file */ 78*3117ece4Schristos local int partcompress(FILE *in, z_streamp def) 79*3117ece4Schristos { 80*3117ece4Schristos int ret, flush; 81*3117ece4Schristos unsigned char raw[RAWLEN]; 82*3117ece4Schristos 83*3117ece4Schristos flush = Z_SYNC_FLUSH; 84*3117ece4Schristos do { 85*3117ece4Schristos def->avail_in = (uInt)fread(raw, 1, RAWLEN, in); 86*3117ece4Schristos if (ferror(in)) 87*3117ece4Schristos return Z_ERRNO; 88*3117ece4Schristos def->next_in = raw; 89*3117ece4Schristos if (feof(in)) 90*3117ece4Schristos flush = Z_FINISH; 91*3117ece4Schristos LOG_FITBLK("partcompress1 avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out); 92*3117ece4Schristos ret = deflate(def, flush); 93*3117ece4Schristos LOG_FITBLK("partcompress2 ret=%d avail_in=%d total_in=%d avail_out=%d total_out=%d\n", ret, (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out); 94*3117ece4Schristos assert(ret != Z_STREAM_ERROR); 95*3117ece4Schristos } while (def->avail_out != 0 && flush == Z_SYNC_FLUSH); 96*3117ece4Schristos return ret; 97*3117ece4Schristos } 98*3117ece4Schristos 99*3117ece4Schristos /* recompress from inf's input to def's output; the input for inf and 100*3117ece4Schristos the output for def are set in those structures before calling; 101*3117ece4Schristos return last deflate() return value, or Z_MEM_ERROR if inflate() 102*3117ece4Schristos was not able to allocate enough memory when it needed to */ 103*3117ece4Schristos local int recompress(z_streamp inf, z_streamp def) 104*3117ece4Schristos { 105*3117ece4Schristos int ret, flush; 106*3117ece4Schristos unsigned char raw[RAWLEN]; 107*3117ece4Schristos 108*3117ece4Schristos flush = Z_NO_FLUSH; 109*3117ece4Schristos LOG_FITBLK("recompress start\n"); 110*3117ece4Schristos do { 111*3117ece4Schristos /* decompress */ 112*3117ece4Schristos inf->avail_out = RAWLEN; 113*3117ece4Schristos inf->next_out = raw; 114*3117ece4Schristos LOG_FITBLK("recompress1inflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)inf->avail_in, (int)inf->total_in, (int)inf->avail_out, (int)inf->total_out); 115*3117ece4Schristos ret = inflate(inf, Z_NO_FLUSH); 116*3117ece4Schristos LOG_FITBLK("recompress2inflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)inf->avail_in, (int)inf->total_in, (int)inf->avail_out, (int)inf->total_out); 117*3117ece4Schristos assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && 118*3117ece4Schristos ret != Z_NEED_DICT); 119*3117ece4Schristos if (ret == Z_MEM_ERROR) 120*3117ece4Schristos return ret; 121*3117ece4Schristos 122*3117ece4Schristos /* compress what was decompressed until done or no room */ 123*3117ece4Schristos def->avail_in = RAWLEN - inf->avail_out; 124*3117ece4Schristos def->next_in = raw; 125*3117ece4Schristos if (inf->avail_out != 0) 126*3117ece4Schristos flush = Z_FINISH; 127*3117ece4Schristos LOG_FITBLK("recompress1deflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out); 128*3117ece4Schristos ret = deflate(def, flush); 129*3117ece4Schristos LOG_FITBLK("recompress2deflate ret=%d avail_in=%d total_in=%d avail_out=%d total_out=%d\n", ret, (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out); 130*3117ece4Schristos assert(ret != Z_STREAM_ERROR); 131*3117ece4Schristos } while (ret != Z_STREAM_END && def->avail_out != 0); 132*3117ece4Schristos return ret; 133*3117ece4Schristos } 134*3117ece4Schristos 135*3117ece4Schristos #define EXCESS 256 /* empirically determined stream overage */ 136*3117ece4Schristos #define MARGIN 8 /* amount to back off for completion */ 137*3117ece4Schristos 138*3117ece4Schristos /* compress from stdin to fixed-size block on stdout */ 139*3117ece4Schristos int main(int argc, char **argv) 140*3117ece4Schristos { 141*3117ece4Schristos int ret; /* return code */ 142*3117ece4Schristos unsigned size; /* requested fixed output block size */ 143*3117ece4Schristos unsigned have; /* bytes written by deflate() call */ 144*3117ece4Schristos unsigned char *blk; /* intermediate and final stream */ 145*3117ece4Schristos unsigned char *tmp; /* close to desired size stream */ 146*3117ece4Schristos z_stream def, inf; /* zlib deflate and inflate states */ 147*3117ece4Schristos 148*3117ece4Schristos /* get requested output size */ 149*3117ece4Schristos if (argc != 2) 150*3117ece4Schristos quit("need one argument: size of output block"); 151*3117ece4Schristos ret = (int)strtol(argv[1], argv + 1, 10); 152*3117ece4Schristos if (argv[1][0] != 0) 153*3117ece4Schristos quit("argument must be a number"); 154*3117ece4Schristos if (ret < 8) /* 8 is minimum zlib stream size */ 155*3117ece4Schristos quit("need positive size of 8 or greater"); 156*3117ece4Schristos size = (unsigned)ret; 157*3117ece4Schristos 158*3117ece4Schristos printf("zlib version %s\n", ZLIB_VERSION); 159*3117ece4Schristos if (ZWRAP_isUsingZSTDcompression()) printf("zstd version %s\n", zstdVersion()); 160*3117ece4Schristos 161*3117ece4Schristos /* allocate memory for buffers and compression engine */ 162*3117ece4Schristos blk = (unsigned char*)malloc(size + EXCESS); 163*3117ece4Schristos def.zalloc = Z_NULL; 164*3117ece4Schristos def.zfree = Z_NULL; 165*3117ece4Schristos def.opaque = Z_NULL; 166*3117ece4Schristos ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); 167*3117ece4Schristos if (ret != Z_OK || blk == NULL) 168*3117ece4Schristos quit("out of memory"); 169*3117ece4Schristos 170*3117ece4Schristos /* compress from stdin until output full, or no more input */ 171*3117ece4Schristos def.avail_out = size + EXCESS; 172*3117ece4Schristos def.next_out = blk; 173*3117ece4Schristos LOG_FITBLK("partcompress1 total_in=%d total_out=%d\n", (int)def.total_in, (int)def.total_out); 174*3117ece4Schristos ret = partcompress(stdin, &def); 175*3117ece4Schristos printf("partcompress total_in=%d total_out=%d\n", (int)def.total_in, (int)def.total_out); 176*3117ece4Schristos if (ret == Z_ERRNO) 177*3117ece4Schristos quit("error reading input"); 178*3117ece4Schristos 179*3117ece4Schristos /* if it all fit, then size was undersubscribed -- done! */ 180*3117ece4Schristos if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { 181*3117ece4Schristos /* write block to stdout */ 182*3117ece4Schristos have = size + EXCESS - def.avail_out; 183*3117ece4Schristos /* if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) 184*3117ece4Schristos * quit("error writing output"); */ 185*3117ece4Schristos 186*3117ece4Schristos /* clean up and print results to stderr */ 187*3117ece4Schristos ret = deflateEnd(&def); 188*3117ece4Schristos assert(ret != Z_STREAM_ERROR); 189*3117ece4Schristos free(blk); 190*3117ece4Schristos fprintf(stderr, 191*3117ece4Schristos "%u bytes unused out of %u requested (all input)\n", 192*3117ece4Schristos size - have, size); 193*3117ece4Schristos return 0; 194*3117ece4Schristos } 195*3117ece4Schristos 196*3117ece4Schristos /* it didn't all fit -- set up for recompression */ 197*3117ece4Schristos inf.zalloc = Z_NULL; 198*3117ece4Schristos inf.zfree = Z_NULL; 199*3117ece4Schristos inf.opaque = Z_NULL; 200*3117ece4Schristos inf.avail_in = 0; 201*3117ece4Schristos inf.next_in = Z_NULL; 202*3117ece4Schristos ret = inflateInit(&inf); 203*3117ece4Schristos tmp = (unsigned char*)malloc(size + EXCESS); 204*3117ece4Schristos if (ret != Z_OK || tmp == NULL) 205*3117ece4Schristos quit("out of memory"); 206*3117ece4Schristos ret = deflateReset(&def); 207*3117ece4Schristos assert(ret != Z_STREAM_ERROR); 208*3117ece4Schristos 209*3117ece4Schristos /* do first recompression close to the right amount */ 210*3117ece4Schristos inf.avail_in = size + EXCESS; 211*3117ece4Schristos inf.next_in = blk; 212*3117ece4Schristos def.avail_out = size + EXCESS; 213*3117ece4Schristos def.next_out = tmp; 214*3117ece4Schristos LOG_FITBLK("recompress1 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out); 215*3117ece4Schristos ret = recompress(&inf, &def); 216*3117ece4Schristos LOG_FITBLK("recompress1 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out); 217*3117ece4Schristos if (ret == Z_MEM_ERROR) 218*3117ece4Schristos quit("out of memory"); 219*3117ece4Schristos 220*3117ece4Schristos /* set up for next recompression */ 221*3117ece4Schristos ret = inflateReset(&inf); 222*3117ece4Schristos assert(ret != Z_STREAM_ERROR); 223*3117ece4Schristos ret = deflateReset(&def); 224*3117ece4Schristos assert(ret != Z_STREAM_ERROR); 225*3117ece4Schristos 226*3117ece4Schristos /* do second and final recompression (third compression) */ 227*3117ece4Schristos inf.avail_in = size - MARGIN; /* assure stream will complete */ 228*3117ece4Schristos inf.next_in = tmp; 229*3117ece4Schristos def.avail_out = size; 230*3117ece4Schristos def.next_out = blk; 231*3117ece4Schristos LOG_FITBLK("recompress2 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out); 232*3117ece4Schristos ret = recompress(&inf, &def); 233*3117ece4Schristos LOG_FITBLK("recompress2 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out); 234*3117ece4Schristos if (ret == Z_MEM_ERROR) 235*3117ece4Schristos quit("out of memory"); 236*3117ece4Schristos assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */ 237*3117ece4Schristos 238*3117ece4Schristos /* done -- write block to stdout */ 239*3117ece4Schristos have = size - def.avail_out; 240*3117ece4Schristos /* if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) 241*3117ece4Schristos * quit("error writing output"); */ 242*3117ece4Schristos 243*3117ece4Schristos /* clean up and print results to stderr */ 244*3117ece4Schristos free(tmp); 245*3117ece4Schristos ret = inflateEnd(&inf); 246*3117ece4Schristos assert(ret != Z_STREAM_ERROR); 247*3117ece4Schristos ret = deflateEnd(&def); 248*3117ece4Schristos assert(ret != Z_STREAM_ERROR); 249*3117ece4Schristos free(blk); 250*3117ece4Schristos fprintf(stderr, 251*3117ece4Schristos "%u bytes unused out of %u requested (%lu input)\n", 252*3117ece4Schristos size - have, size, def.total_in); 253*3117ece4Schristos return 0; 254*3117ece4Schristos } 255