xref: /netbsd-src/external/bsd/zstd/dist/zlibWrapper/examples/fitblk.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1*3117ece4Schristos /* fitblk.c contains minimal changes required to be compiled with zlibWrapper:
2*3117ece4Schristos  * - #include "zlib.h" was changed to #include "zstd_zlibwrapper.h"
3*3117ece4Schristos  * - writing block to stdout was disabled                          */
4*3117ece4Schristos 
5*3117ece4Schristos /* fitblk.c: example of fitting compressed output to a specified size
6*3117ece4Schristos    Not copyrighted -- provided to the public domain
7*3117ece4Schristos    Version 1.1  25 November 2004  Mark Adler */
8*3117ece4Schristos 
9*3117ece4Schristos /* Version history:
10*3117ece4Schristos    1.0  24 Nov 2004  First version
11*3117ece4Schristos    1.1  25 Nov 2004  Change deflateInit2() to deflateInit()
12*3117ece4Schristos                      Use fixed-size, stack-allocated raw buffers
13*3117ece4Schristos                      Simplify code moving compression to subroutines
14*3117ece4Schristos                      Use assert() for internal errors
15*3117ece4Schristos                      Add detailed description of approach
16*3117ece4Schristos  */
17*3117ece4Schristos 
18*3117ece4Schristos /* Approach to just fitting a requested compressed size:
19*3117ece4Schristos 
20*3117ece4Schristos    fitblk performs three compression passes on a portion of the input
21*3117ece4Schristos    data in order to determine how much of that input will compress to
22*3117ece4Schristos    nearly the requested output block size.  The first pass generates
23*3117ece4Schristos    enough deflate blocks to produce output to fill the requested
24*3117ece4Schristos    output size plus a specified excess amount (see the EXCESS define
25*3117ece4Schristos    below).  The last deflate block may go quite a bit past that, but
26*3117ece4Schristos    is discarded.  The second pass decompresses and recompresses just
27*3117ece4Schristos    the compressed data that fit in the requested plus excess sized
28*3117ece4Schristos    buffer.  The deflate process is terminated after that amount of
29*3117ece4Schristos    input, which is less than the amount consumed on the first pass.
30*3117ece4Schristos    The last deflate block of the result will be of a comparable size
31*3117ece4Schristos    to the final product, so that the header for that deflate block and
32*3117ece4Schristos    the compression ratio for that block will be about the same as in
33*3117ece4Schristos    the final product.  The third compression pass decompresses the
34*3117ece4Schristos    result of the second step, but only the compressed data up to the
35*3117ece4Schristos    requested size minus an amount to allow the compressed stream to
36*3117ece4Schristos    complete (see the MARGIN define below).  That will result in a
37*3117ece4Schristos    final compressed stream whose length is less than or equal to the
38*3117ece4Schristos    requested size.  Assuming sufficient input and a requested size
39*3117ece4Schristos    greater than a few hundred bytes, the shortfall will typically be
40*3117ece4Schristos    less than ten bytes.
41*3117ece4Schristos 
42*3117ece4Schristos    If the input is short enough that the first compression completes
43*3117ece4Schristos    before filling the requested output size, then that compressed
44*3117ece4Schristos    stream is return with no recompression.
45*3117ece4Schristos 
46*3117ece4Schristos    EXCESS is chosen to be just greater than the shortfall seen in a
47*3117ece4Schristos    two pass approach similar to the above.  That shortfall is due to
48*3117ece4Schristos    the last deflate block compressing more efficiently with a smaller
49*3117ece4Schristos    header on the second pass.  EXCESS is set to be large enough so
50*3117ece4Schristos    that there is enough uncompressed data for the second pass to fill
51*3117ece4Schristos    out the requested size, and small enough so that the final deflate
52*3117ece4Schristos    block of the second pass will be close in size to the final deflate
53*3117ece4Schristos    block of the third and final pass.  MARGIN is chosen to be just
54*3117ece4Schristos    large enough to assure that the final compression has enough room
55*3117ece4Schristos    to complete in all cases.
56*3117ece4Schristos  */
57*3117ece4Schristos 
58*3117ece4Schristos #include <stdio.h>
59*3117ece4Schristos #include <stdlib.h>
60*3117ece4Schristos #include <assert.h>
61*3117ece4Schristos #include "zstd_zlibwrapper.h"
62*3117ece4Schristos 
63*3117ece4Schristos #define LOG_FITBLK(...)   /*printf(__VA_ARGS__)*/
64*3117ece4Schristos #define local static
65*3117ece4Schristos 
66*3117ece4Schristos /* print nastygram and leave */
67*3117ece4Schristos local void quit(char *why)
68*3117ece4Schristos {
69*3117ece4Schristos     fprintf(stderr, "fitblk abort: %s\n", why);
70*3117ece4Schristos     exit(1);
71*3117ece4Schristos }
72*3117ece4Schristos 
73*3117ece4Schristos #define RAWLEN 4096    /* intermediate uncompressed buffer size */
74*3117ece4Schristos 
75*3117ece4Schristos /* compress from file to def until provided buffer is full or end of
76*3117ece4Schristos    input reached; return last deflate() return value, or Z_ERRNO if
77*3117ece4Schristos    there was read error on the file */
78*3117ece4Schristos local int partcompress(FILE *in, z_streamp def)
79*3117ece4Schristos {
80*3117ece4Schristos     int ret, flush;
81*3117ece4Schristos     unsigned char raw[RAWLEN];
82*3117ece4Schristos 
83*3117ece4Schristos     flush = Z_SYNC_FLUSH;
84*3117ece4Schristos     do {
85*3117ece4Schristos         def->avail_in = (uInt)fread(raw, 1, RAWLEN, in);
86*3117ece4Schristos         if (ferror(in))
87*3117ece4Schristos             return Z_ERRNO;
88*3117ece4Schristos         def->next_in = raw;
89*3117ece4Schristos         if (feof(in))
90*3117ece4Schristos             flush = Z_FINISH;
91*3117ece4Schristos         LOG_FITBLK("partcompress1 avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out);
92*3117ece4Schristos         ret = deflate(def, flush);
93*3117ece4Schristos         LOG_FITBLK("partcompress2 ret=%d avail_in=%d total_in=%d avail_out=%d total_out=%d\n", ret, (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out);
94*3117ece4Schristos         assert(ret != Z_STREAM_ERROR);
95*3117ece4Schristos     } while (def->avail_out != 0 && flush == Z_SYNC_FLUSH);
96*3117ece4Schristos     return ret;
97*3117ece4Schristos }
98*3117ece4Schristos 
99*3117ece4Schristos /* recompress from inf's input to def's output; the input for inf and
100*3117ece4Schristos    the output for def are set in those structures before calling;
101*3117ece4Schristos    return last deflate() return value, or Z_MEM_ERROR if inflate()
102*3117ece4Schristos    was not able to allocate enough memory when it needed to */
103*3117ece4Schristos local int recompress(z_streamp inf, z_streamp def)
104*3117ece4Schristos {
105*3117ece4Schristos     int ret, flush;
106*3117ece4Schristos     unsigned char raw[RAWLEN];
107*3117ece4Schristos 
108*3117ece4Schristos     flush = Z_NO_FLUSH;
109*3117ece4Schristos     LOG_FITBLK("recompress start\n");
110*3117ece4Schristos     do {
111*3117ece4Schristos         /* decompress */
112*3117ece4Schristos         inf->avail_out = RAWLEN;
113*3117ece4Schristos         inf->next_out = raw;
114*3117ece4Schristos         LOG_FITBLK("recompress1inflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)inf->avail_in, (int)inf->total_in, (int)inf->avail_out, (int)inf->total_out);
115*3117ece4Schristos         ret = inflate(inf, Z_NO_FLUSH);
116*3117ece4Schristos         LOG_FITBLK("recompress2inflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)inf->avail_in, (int)inf->total_in, (int)inf->avail_out, (int)inf->total_out);
117*3117ece4Schristos         assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR &&
118*3117ece4Schristos                ret != Z_NEED_DICT);
119*3117ece4Schristos         if (ret == Z_MEM_ERROR)
120*3117ece4Schristos             return ret;
121*3117ece4Schristos 
122*3117ece4Schristos         /* compress what was decompressed until done or no room */
123*3117ece4Schristos         def->avail_in = RAWLEN - inf->avail_out;
124*3117ece4Schristos         def->next_in = raw;
125*3117ece4Schristos         if (inf->avail_out != 0)
126*3117ece4Schristos             flush = Z_FINISH;
127*3117ece4Schristos         LOG_FITBLK("recompress1deflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out);
128*3117ece4Schristos         ret = deflate(def, flush);
129*3117ece4Schristos         LOG_FITBLK("recompress2deflate ret=%d avail_in=%d total_in=%d avail_out=%d total_out=%d\n", ret, (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out);
130*3117ece4Schristos         assert(ret != Z_STREAM_ERROR);
131*3117ece4Schristos     } while (ret != Z_STREAM_END && def->avail_out != 0);
132*3117ece4Schristos     return ret;
133*3117ece4Schristos }
134*3117ece4Schristos 
135*3117ece4Schristos #define EXCESS 256      /* empirically determined stream overage */
136*3117ece4Schristos #define MARGIN 8        /* amount to back off for completion */
137*3117ece4Schristos 
138*3117ece4Schristos /* compress from stdin to fixed-size block on stdout */
139*3117ece4Schristos int main(int argc, char **argv)
140*3117ece4Schristos {
141*3117ece4Schristos     int ret;                /* return code */
142*3117ece4Schristos     unsigned size;          /* requested fixed output block size */
143*3117ece4Schristos     unsigned have;          /* bytes written by deflate() call */
144*3117ece4Schristos     unsigned char *blk;     /* intermediate and final stream */
145*3117ece4Schristos     unsigned char *tmp;     /* close to desired size stream */
146*3117ece4Schristos     z_stream def, inf;      /* zlib deflate and inflate states */
147*3117ece4Schristos 
148*3117ece4Schristos     /* get requested output size */
149*3117ece4Schristos     if (argc != 2)
150*3117ece4Schristos         quit("need one argument: size of output block");
151*3117ece4Schristos     ret = (int)strtol(argv[1], argv + 1, 10);
152*3117ece4Schristos     if (argv[1][0] != 0)
153*3117ece4Schristos         quit("argument must be a number");
154*3117ece4Schristos     if (ret < 8)            /* 8 is minimum zlib stream size */
155*3117ece4Schristos         quit("need positive size of 8 or greater");
156*3117ece4Schristos     size = (unsigned)ret;
157*3117ece4Schristos 
158*3117ece4Schristos     printf("zlib version %s\n", ZLIB_VERSION);
159*3117ece4Schristos     if (ZWRAP_isUsingZSTDcompression()) printf("zstd version %s\n", zstdVersion());
160*3117ece4Schristos 
161*3117ece4Schristos     /* allocate memory for buffers and compression engine */
162*3117ece4Schristos     blk = (unsigned char*)malloc(size + EXCESS);
163*3117ece4Schristos     def.zalloc = Z_NULL;
164*3117ece4Schristos     def.zfree = Z_NULL;
165*3117ece4Schristos     def.opaque = Z_NULL;
166*3117ece4Schristos     ret = deflateInit(&def, Z_DEFAULT_COMPRESSION);
167*3117ece4Schristos     if (ret != Z_OK || blk == NULL)
168*3117ece4Schristos         quit("out of memory");
169*3117ece4Schristos 
170*3117ece4Schristos     /* compress from stdin until output full, or no more input */
171*3117ece4Schristos     def.avail_out = size + EXCESS;
172*3117ece4Schristos     def.next_out = blk;
173*3117ece4Schristos     LOG_FITBLK("partcompress1 total_in=%d total_out=%d\n", (int)def.total_in, (int)def.total_out);
174*3117ece4Schristos     ret = partcompress(stdin, &def);
175*3117ece4Schristos     printf("partcompress total_in=%d total_out=%d\n", (int)def.total_in, (int)def.total_out);
176*3117ece4Schristos     if (ret == Z_ERRNO)
177*3117ece4Schristos         quit("error reading input");
178*3117ece4Schristos 
179*3117ece4Schristos     /* if it all fit, then size was undersubscribed -- done! */
180*3117ece4Schristos     if (ret == Z_STREAM_END && def.avail_out >= EXCESS) {
181*3117ece4Schristos         /* write block to stdout */
182*3117ece4Schristos         have = size + EXCESS - def.avail_out;
183*3117ece4Schristos    /*     if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
184*3117ece4Schristos     *         quit("error writing output"); */
185*3117ece4Schristos 
186*3117ece4Schristos         /* clean up and print results to stderr */
187*3117ece4Schristos         ret = deflateEnd(&def);
188*3117ece4Schristos         assert(ret != Z_STREAM_ERROR);
189*3117ece4Schristos         free(blk);
190*3117ece4Schristos         fprintf(stderr,
191*3117ece4Schristos                 "%u bytes unused out of %u requested (all input)\n",
192*3117ece4Schristos                 size - have, size);
193*3117ece4Schristos         return 0;
194*3117ece4Schristos     }
195*3117ece4Schristos 
196*3117ece4Schristos     /* it didn't all fit -- set up for recompression */
197*3117ece4Schristos     inf.zalloc = Z_NULL;
198*3117ece4Schristos     inf.zfree = Z_NULL;
199*3117ece4Schristos     inf.opaque = Z_NULL;
200*3117ece4Schristos     inf.avail_in = 0;
201*3117ece4Schristos     inf.next_in = Z_NULL;
202*3117ece4Schristos     ret = inflateInit(&inf);
203*3117ece4Schristos     tmp = (unsigned char*)malloc(size + EXCESS);
204*3117ece4Schristos     if (ret != Z_OK || tmp == NULL)
205*3117ece4Schristos         quit("out of memory");
206*3117ece4Schristos     ret = deflateReset(&def);
207*3117ece4Schristos     assert(ret != Z_STREAM_ERROR);
208*3117ece4Schristos 
209*3117ece4Schristos     /* do first recompression close to the right amount */
210*3117ece4Schristos     inf.avail_in = size + EXCESS;
211*3117ece4Schristos     inf.next_in = blk;
212*3117ece4Schristos     def.avail_out = size + EXCESS;
213*3117ece4Schristos     def.next_out = tmp;
214*3117ece4Schristos     LOG_FITBLK("recompress1 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out);
215*3117ece4Schristos     ret = recompress(&inf, &def);
216*3117ece4Schristos     LOG_FITBLK("recompress1 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out);
217*3117ece4Schristos     if (ret == Z_MEM_ERROR)
218*3117ece4Schristos         quit("out of memory");
219*3117ece4Schristos 
220*3117ece4Schristos     /* set up for next recompression */
221*3117ece4Schristos     ret = inflateReset(&inf);
222*3117ece4Schristos     assert(ret != Z_STREAM_ERROR);
223*3117ece4Schristos     ret = deflateReset(&def);
224*3117ece4Schristos     assert(ret != Z_STREAM_ERROR);
225*3117ece4Schristos 
226*3117ece4Schristos     /* do second and final recompression (third compression) */
227*3117ece4Schristos     inf.avail_in = size - MARGIN;   /* assure stream will complete */
228*3117ece4Schristos     inf.next_in = tmp;
229*3117ece4Schristos     def.avail_out = size;
230*3117ece4Schristos     def.next_out = blk;
231*3117ece4Schristos     LOG_FITBLK("recompress2 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out);
232*3117ece4Schristos     ret = recompress(&inf, &def);
233*3117ece4Schristos     LOG_FITBLK("recompress2 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out);
234*3117ece4Schristos     if (ret == Z_MEM_ERROR)
235*3117ece4Schristos         quit("out of memory");
236*3117ece4Schristos     assert(ret == Z_STREAM_END);    /* otherwise MARGIN too small */
237*3117ece4Schristos 
238*3117ece4Schristos     /* done -- write block to stdout */
239*3117ece4Schristos     have = size - def.avail_out;
240*3117ece4Schristos     /* if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
241*3117ece4Schristos      *     quit("error writing output"); */
242*3117ece4Schristos 
243*3117ece4Schristos     /* clean up and print results to stderr */
244*3117ece4Schristos     free(tmp);
245*3117ece4Schristos     ret = inflateEnd(&inf);
246*3117ece4Schristos     assert(ret != Z_STREAM_ERROR);
247*3117ece4Schristos     ret = deflateEnd(&def);
248*3117ece4Schristos     assert(ret != Z_STREAM_ERROR);
249*3117ece4Schristos     free(blk);
250*3117ece4Schristos     fprintf(stderr,
251*3117ece4Schristos             "%u bytes unused out of %u requested (%lu input)\n",
252*3117ece4Schristos             size - have, size, def.total_in);
253*3117ece4Schristos     return 0;
254*3117ece4Schristos }
255