1*b175d1c2Schristos /* zran.c -- example of deflate stream indexing and random access 2*b175d1c2Schristos * Copyright (C) 2005, 2012, 2018, 2023 Mark Adler 3aaf4ece6Schristos * For conditions of distribution and use, see copyright notice in zlib.h 4*b175d1c2Schristos * Version 1.4 13 Apr 2023 Mark Adler */ 5c3423655Schristos 6c3423655Schristos /* Version History: 7c3423655Schristos 1.0 29 May 2005 First version 8c3423655Schristos 1.1 29 Sep 2012 Fix memory reallocation error 9ec47cc4bSchristos 1.2 14 Oct 2018 Handle gzip streams with multiple members 10ec47cc4bSchristos Add a header file to facilitate usage in applications 11*b175d1c2Schristos 1.3 18 Feb 2023 Permit raw deflate streams as well as zlib and gzip 12*b175d1c2Schristos Permit crossing gzip member boundaries when extracting 13*b175d1c2Schristos Support a size_t size when extracting (was an int) 14*b175d1c2Schristos Do a binary search over the index for an access point 15*b175d1c2Schristos Expose the access point type to enable save and load 16*b175d1c2Schristos 1.4 13 Apr 2023 Add a NOPRIME define to not use inflatePrime() 17c3423655Schristos */ 18aaf4ece6Schristos 19*b175d1c2Schristos // Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary() 20*b175d1c2Schristos // for random access of a compressed file. A file containing a raw deflate 21*b175d1c2Schristos // stream is provided on the command line. The compressed stream is decoded in 22*b175d1c2Schristos // its entirety, and an index built with access points about every SPAN bytes 23*b175d1c2Schristos // in the uncompressed output. The compressed file is left open, and can then 24*b175d1c2Schristos // be read randomly, having to decompress on the average SPAN/2 uncompressed 25*b175d1c2Schristos // bytes before getting to the desired block of data. 26*b175d1c2Schristos // 27*b175d1c2Schristos // An access point can be created at the start of any deflate block, by saving 28*b175d1c2Schristos // the starting file offset and bit of that block, and the 32K bytes of 29*b175d1c2Schristos // uncompressed data that precede that block. Also the uncompressed offset of 30*b175d1c2Schristos // that block is saved to provide a reference for locating a desired starting 31*b175d1c2Schristos // point in the uncompressed stream. deflate_index_build() decompresses the 32*b175d1c2Schristos // input raw deflate stream a block at a time, and at the end of each block 33*b175d1c2Schristos // decides if enough uncompressed data has gone by to justify the creation of a 34*b175d1c2Schristos // new access point. If so, that point is saved in a data structure that grows 35*b175d1c2Schristos // as needed to accommodate the points. 36*b175d1c2Schristos // 37*b175d1c2Schristos // To use the index, an offset in the uncompressed data is provided, for which 38*b175d1c2Schristos // the latest access point at or preceding that offset is located in the index. 39*b175d1c2Schristos // The input file is positioned to the specified location in the index, and if 40*b175d1c2Schristos // necessary the first few bits of the compressed data is read from the file. 41*b175d1c2Schristos // inflate is initialized with those bits and the 32K of uncompressed data, and 42*b175d1c2Schristos // decompression then proceeds until the desired offset in the file is reached. 43*b175d1c2Schristos // Then decompression continues to read the requested uncompressed data from 44*b175d1c2Schristos // the file. 45*b175d1c2Schristos // 46*b175d1c2Schristos // There is some fair bit of overhead to starting inflation for the random 47*b175d1c2Schristos // access, mainly copying the 32K byte dictionary. If small pieces of the file 48*b175d1c2Schristos // are being accessed, it would make sense to implement a cache to hold some 49*b175d1c2Schristos // lookahead to avoid many calls to deflate_index_extract() for small lengths. 50*b175d1c2Schristos // 51*b175d1c2Schristos // Another way to build an index would be to use inflateCopy(). That would not 52*b175d1c2Schristos // be constrained to have access points at block boundaries, but would require 53*b175d1c2Schristos // more memory per access point, and could not be saved to a file due to the 54*b175d1c2Schristos // use of pointers in the state. The approach here allows for storage of the 55*b175d1c2Schristos // index in a file. 56aaf4ece6Schristos 57aaf4ece6Schristos #include <stdio.h> 58aaf4ece6Schristos #include <stdlib.h> 59aaf4ece6Schristos #include <string.h> 60*b175d1c2Schristos #include <limits.h> 61aaf4ece6Schristos #include "zlib.h" 62ec47cc4bSchristos #include "zran.h" 63aaf4ece6Schristos 64*b175d1c2Schristos #define WINSIZE 32768U // sliding window size 65*b175d1c2Schristos #define CHUNK 16384 // file input buffer size 66aaf4ece6Schristos 67*b175d1c2Schristos // See comments in zran.h. 68*b175d1c2Schristos void deflate_index_free(struct deflate_index *index) { 69aaf4ece6Schristos if (index != NULL) { 70aaf4ece6Schristos free(index->list); 71aaf4ece6Schristos free(index); 72aaf4ece6Schristos } 73aaf4ece6Schristos } 74aaf4ece6Schristos 75*b175d1c2Schristos // Add an access point to the list. If out of memory, deallocate the existing 76*b175d1c2Schristos // list and return NULL. index->mode is temporarily the allocated number of 77*b175d1c2Schristos // access points, until it is time for deflate_index_build() to return. Then 78*b175d1c2Schristos // index->mode is set to the mode of inflation. 79*b175d1c2Schristos static struct deflate_index *add_point(struct deflate_index *index, int bits, 80ec47cc4bSchristos off_t in, off_t out, unsigned left, 81*b175d1c2Schristos unsigned char *window) { 82aaf4ece6Schristos if (index == NULL) { 83*b175d1c2Schristos // The list is empty. Create it, starting with eight access points. 84ec47cc4bSchristos index = malloc(sizeof(struct deflate_index)); 85*b175d1c2Schristos if (index == NULL) 86*b175d1c2Schristos return NULL; 87*b175d1c2Schristos index->have = 0; 88*b175d1c2Schristos index->mode = 8; 89*b175d1c2Schristos index->list = malloc(sizeof(point_t) * index->mode); 90aaf4ece6Schristos if (index->list == NULL) { 91aaf4ece6Schristos free(index); 92aaf4ece6Schristos return NULL; 93aaf4ece6Schristos } 94aaf4ece6Schristos } 95aaf4ece6Schristos 96*b175d1c2Schristos else if (index->have == index->mode) { 97*b175d1c2Schristos // The list is full. Make it bigger. 98*b175d1c2Schristos index->mode <<= 1; 99*b175d1c2Schristos point_t *next = realloc(index->list, sizeof(point_t) * index->mode); 100aaf4ece6Schristos if (next == NULL) { 101ec47cc4bSchristos deflate_index_free(index); 102aaf4ece6Schristos return NULL; 103aaf4ece6Schristos } 104aaf4ece6Schristos index->list = next; 105aaf4ece6Schristos } 106aaf4ece6Schristos 107*b175d1c2Schristos // Fill in the access point and increment how many we have. 108*b175d1c2Schristos point_t *next = (point_t *)(index->list) + index->have++; 109*b175d1c2Schristos if (index->have < 0) { 110*b175d1c2Schristos // Overflowed the int! 111*b175d1c2Schristos deflate_index_free(index); 112*b175d1c2Schristos return NULL; 113*b175d1c2Schristos } 114aaf4ece6Schristos next->out = out; 115*b175d1c2Schristos next->in = in; 116*b175d1c2Schristos next->bits = bits; 117aaf4ece6Schristos if (left) 118aaf4ece6Schristos memcpy(next->window, window + WINSIZE - left, left); 119aaf4ece6Schristos if (left < WINSIZE) 120aaf4ece6Schristos memcpy(next->window + left, window, WINSIZE - left); 121aaf4ece6Schristos 122*b175d1c2Schristos // Return the index, which may have been newly allocated or destroyed. 123aaf4ece6Schristos return index; 124aaf4ece6Schristos } 125aaf4ece6Schristos 126*b175d1c2Schristos // Decompression modes. These are the inflateInit2() windowBits parameter. 127*b175d1c2Schristos #define RAW -15 128*b175d1c2Schristos #define ZLIB 15 129*b175d1c2Schristos #define GZIP 31 130aaf4ece6Schristos 131*b175d1c2Schristos // See comments in zran.h. 132*b175d1c2Schristos int deflate_index_build(FILE *in, off_t span, struct deflate_index **built) { 133*b175d1c2Schristos // Set up inflation state. 134*b175d1c2Schristos z_stream strm = {0}; // inflate engine (gets fired up later) 135*b175d1c2Schristos unsigned char buf[CHUNK]; // input buffer 136*b175d1c2Schristos unsigned char win[WINSIZE] = {0}; // output sliding window 137*b175d1c2Schristos off_t totin = 0; // total bytes read from input 138*b175d1c2Schristos off_t totout = 0; // total bytes uncompressed 139*b175d1c2Schristos int mode = 0; // mode: RAW, ZLIB, or GZIP (0 => not set yet) 140aaf4ece6Schristos 141*b175d1c2Schristos // Decompress from in, generating access points along the way. 142*b175d1c2Schristos int ret; // the return value from zlib, or Z_ERRNO 143*b175d1c2Schristos off_t last; // last access point uncompressed offset 144*b175d1c2Schristos struct deflate_index *index = NULL; // list of access points 145aaf4ece6Schristos do { 146*b175d1c2Schristos // Assure available input, at least until reaching EOF. 147aaf4ece6Schristos if (strm.avail_in == 0) { 148*b175d1c2Schristos strm.avail_in = fread(buf, 1, sizeof(buf), in); 149aaf4ece6Schristos totin += strm.avail_in; 150*b175d1c2Schristos strm.next_in = buf; 151*b175d1c2Schristos if (strm.avail_in < sizeof(buf) && ferror(in)) { 152*b175d1c2Schristos ret = Z_ERRNO; 153aaf4ece6Schristos break; 154ec47cc4bSchristos } 155aaf4ece6Schristos 156*b175d1c2Schristos if (mode == 0) { 157*b175d1c2Schristos // At the start of the input -- determine the type. Assume raw 158*b175d1c2Schristos // if it is neither zlib nor gzip. This could in theory result 159*b175d1c2Schristos // in a false positive for zlib, but in practice the fill bits 160*b175d1c2Schristos // after a stored block are always zeros, so a raw stream won't 161*b175d1c2Schristos // start with an 8 in the low nybble. 162*b175d1c2Schristos mode = strm.avail_in == 0 ? RAW : // empty -- will fail 163*b175d1c2Schristos (strm.next_in[0] & 0xf) == 8 ? ZLIB : 164*b175d1c2Schristos strm.next_in[0] == 0x1f ? GZIP : 165*b175d1c2Schristos /* else */ RAW; 166*b175d1c2Schristos ret = inflateInit2(&strm, mode); 167*b175d1c2Schristos if (ret != Z_OK) 168*b175d1c2Schristos break; 169*b175d1c2Schristos } 170*b175d1c2Schristos } 171*b175d1c2Schristos 172*b175d1c2Schristos // Assure available output. This rotates the output through, for use as 173*b175d1c2Schristos // a sliding window on the uncompressed data. 174*b175d1c2Schristos if (strm.avail_out == 0) { 175*b175d1c2Schristos strm.avail_out = sizeof(win); 176*b175d1c2Schristos strm.next_out = win; 177*b175d1c2Schristos } 178*b175d1c2Schristos 179*b175d1c2Schristos if (mode == RAW && index == NULL) 180*b175d1c2Schristos // We skip the inflate() call at the start of raw deflate data in 181*b175d1c2Schristos // order generate an access point there. Set data_type to imitate 182*b175d1c2Schristos // the end of a header. 183*b175d1c2Schristos strm.data_type = 0x80; 184*b175d1c2Schristos else { 185*b175d1c2Schristos // Inflate and update the number of uncompressed bytes. 186*b175d1c2Schristos unsigned before = strm.avail_out; 187*b175d1c2Schristos ret = inflate(&strm, Z_BLOCK); 188*b175d1c2Schristos totout += before - strm.avail_out; 189*b175d1c2Schristos } 190*b175d1c2Schristos 191*b175d1c2Schristos if ((strm.data_type & 0xc0) == 0x80 && 192*b175d1c2Schristos (index == NULL || totout - last >= span)) { 193*b175d1c2Schristos // We are at the end of a header or a non-last deflate block, so we 194*b175d1c2Schristos // can add an access point here. Furthermore, we are either at the 195*b175d1c2Schristos // very start for the first access point, or there has been span or 196*b175d1c2Schristos // more uncompressed bytes since the last access point, so we want 197*b175d1c2Schristos // to add an access point here. 198*b175d1c2Schristos index = add_point(index, strm.data_type & 7, totin - strm.avail_in, 199*b175d1c2Schristos totout, strm.avail_out, win); 200aaf4ece6Schristos if (index == NULL) { 201aaf4ece6Schristos ret = Z_MEM_ERROR; 202*b175d1c2Schristos break; 203aaf4ece6Schristos } 204aaf4ece6Schristos last = totout; 205aaf4ece6Schristos } 206aaf4ece6Schristos 207*b175d1c2Schristos if (ret == Z_STREAM_END && mode == GZIP && 208*b175d1c2Schristos (strm.avail_in || ungetc(getc(in), in) != EOF)) 209*b175d1c2Schristos // There is more input after the end of a gzip member. Reset the 210*b175d1c2Schristos // inflate state to read another gzip member. On success, this will 211*b175d1c2Schristos // set ret to Z_OK to continue decompressing. 212*b175d1c2Schristos ret = inflateReset2(&strm, GZIP); 213*b175d1c2Schristos 214*b175d1c2Schristos // Keep going until Z_STREAM_END or error. If the compressed data ends 215*b175d1c2Schristos // prematurely without a file read error, Z_BUF_ERROR is returned. 216*b175d1c2Schristos } while (ret == Z_OK); 217*b175d1c2Schristos inflateEnd(&strm); 218*b175d1c2Schristos 219*b175d1c2Schristos if (ret != Z_STREAM_END) { 220*b175d1c2Schristos // An error was encountered. Discard the index and return a negative 221*b175d1c2Schristos // error code. 222*b175d1c2Schristos deflate_index_free(index); 223*b175d1c2Schristos return ret == Z_NEED_DICT ? Z_DATA_ERROR : ret; 224*b175d1c2Schristos } 225*b175d1c2Schristos 226*b175d1c2Schristos // Shrink the index to only the occupied access points and return it. 227*b175d1c2Schristos index->mode = mode; 228ec47cc4bSchristos index->length = totout; 229*b175d1c2Schristos point_t *list = realloc(index->list, sizeof(point_t) * index->have); 230*b175d1c2Schristos if (list == NULL) { 231*b175d1c2Schristos // Seems like a realloc() to make something smaller should always work, 232*b175d1c2Schristos // but just in case. 233*b175d1c2Schristos deflate_index_free(index); 234*b175d1c2Schristos return Z_MEM_ERROR; 235*b175d1c2Schristos } 236*b175d1c2Schristos index->list = list; 237aaf4ece6Schristos *built = index; 238ec47cc4bSchristos return index->have; 239aaf4ece6Schristos } 240aaf4ece6Schristos 241*b175d1c2Schristos #ifdef NOPRIME 242*b175d1c2Schristos // Support zlib versions before 1.2.3 (July 2005), or incomplete zlib clones 243*b175d1c2Schristos // that do not have inflatePrime(). 244aaf4ece6Schristos 245*b175d1c2Schristos # define INFLATEPRIME inflatePreface 246*b175d1c2Schristos 247*b175d1c2Schristos // Append the low bits bits of value to in[] at bit position *have, updating 248*b175d1c2Schristos // *have. value must be zero above its low bits bits. bits must be positive. 249*b175d1c2Schristos // This assumes that any bits above the *have bits in the last byte are zeros. 250*b175d1c2Schristos // That assumption is preserved on return, as any bits above *have + bits in 251*b175d1c2Schristos // the last byte written will be set to zeros. 252*b175d1c2Schristos static inline void append_bits(unsigned value, int bits, 253*b175d1c2Schristos unsigned char *in, int *have) { 254*b175d1c2Schristos in += *have >> 3; // where the first bits from value will go 255*b175d1c2Schristos int k = *have & 7; // the number of bits already there 256*b175d1c2Schristos *have += bits; 257*b175d1c2Schristos if (k) 258*b175d1c2Schristos *in |= value << k; // write value above the low k bits 259*b175d1c2Schristos else 260*b175d1c2Schristos *in = value; 261*b175d1c2Schristos k = 8 - k; // the number of bits just appended 262*b175d1c2Schristos while (bits > k) { 263*b175d1c2Schristos value >>= k; // drop the bits appended 264*b175d1c2Schristos bits -= k; 265*b175d1c2Schristos k = 8; // now at a byte boundary 266*b175d1c2Schristos *++in = value; 267*b175d1c2Schristos } 268*b175d1c2Schristos } 269*b175d1c2Schristos 270*b175d1c2Schristos // Insert enough bits in the form of empty deflate blocks in front of the 271*b175d1c2Schristos // low bits bits of value, in order to bring the sequence to a byte boundary. 272*b175d1c2Schristos // Then feed that to inflate(). This does what inflatePrime() does, except that 273*b175d1c2Schristos // a negative value of bits is not supported. bits must be in 0..16. If the 274*b175d1c2Schristos // arguments are invalid, Z_STREAM_ERROR is returned. Otherwise the return 275*b175d1c2Schristos // value from inflate() is returned. 276*b175d1c2Schristos static int inflatePreface(z_stream *strm, int bits, int value) { 277*b175d1c2Schristos // Check input. 278*b175d1c2Schristos if (strm == Z_NULL || bits < 0 || bits > 16) 279*b175d1c2Schristos return Z_STREAM_ERROR; 280*b175d1c2Schristos if (bits == 0) 281*b175d1c2Schristos return Z_OK; 282*b175d1c2Schristos value &= (2 << (bits - 1)) - 1; 283*b175d1c2Schristos 284*b175d1c2Schristos // An empty dynamic block with an odd number of bits (95). The high bit of 285*b175d1c2Schristos // the last byte is unused. 286*b175d1c2Schristos static const unsigned char dyn[] = { 287*b175d1c2Schristos 4, 0xe0, 0x81, 8, 0, 0, 0, 0, 0x20, 0xa8, 0xab, 0x1f 288*b175d1c2Schristos }; 289*b175d1c2Schristos const int dynlen = 95; // number of bits in the block 290*b175d1c2Schristos 291*b175d1c2Schristos // Build an input buffer for inflate that is a multiple of eight bits in 292*b175d1c2Schristos // length, and that ends with the low bits bits of value. 293*b175d1c2Schristos unsigned char in[(dynlen + 3 * 10 + 16 + 7) / 8]; 294*b175d1c2Schristos int have = 0; 295*b175d1c2Schristos if (bits & 1) { 296*b175d1c2Schristos // Insert an empty dynamic block to get to an odd number of bits, so 297*b175d1c2Schristos // when bits bits from value are appended, we are at an even number of 298*b175d1c2Schristos // bits. 299*b175d1c2Schristos memcpy(in, dyn, sizeof(dyn)); 300*b175d1c2Schristos have = dynlen; 301*b175d1c2Schristos } 302*b175d1c2Schristos while ((have + bits) & 7) 303*b175d1c2Schristos // Insert empty fixed blocks until appending bits bits would put us on 304*b175d1c2Schristos // a byte boundary. This will insert at most three fixed blocks. 305*b175d1c2Schristos append_bits(2, 10, in, &have); 306*b175d1c2Schristos 307*b175d1c2Schristos // Append the bits bits from value, which takes us to a byte boundary. 308*b175d1c2Schristos append_bits(value, bits, in, &have); 309*b175d1c2Schristos 310*b175d1c2Schristos // Deliver the input to inflate(). There is no output space provided, but 311*b175d1c2Schristos // inflate() can't get stuck waiting on output not ingesting all of the 312*b175d1c2Schristos // provided input. The reason is that there will be at most 16 bits of 313*b175d1c2Schristos // input from value after the empty deflate blocks (which themselves 314*b175d1c2Schristos // generate no output). At least ten bits are needed to generate the first 315*b175d1c2Schristos // output byte from a fixed block. The last two bytes of the buffer have to 316*b175d1c2Schristos // be ingested in order to get ten bits, which is the most that value can 317*b175d1c2Schristos // occupy. 318*b175d1c2Schristos strm->avail_in = have >> 3; 319*b175d1c2Schristos strm->next_in = in; 320*b175d1c2Schristos strm->avail_out = 0; 321*b175d1c2Schristos strm->next_out = in; // not used, but can't be NULL 322*b175d1c2Schristos return inflate(strm, Z_NO_FLUSH); 323*b175d1c2Schristos } 324*b175d1c2Schristos 325*b175d1c2Schristos #else 326*b175d1c2Schristos # define INFLATEPRIME inflatePrime 327*b175d1c2Schristos #endif 328*b175d1c2Schristos 329*b175d1c2Schristos // See comments in zran.h. 330*b175d1c2Schristos ptrdiff_t deflate_index_extract(FILE *in, struct deflate_index *index, 331*b175d1c2Schristos off_t offset, unsigned char *buf, size_t len) { 332*b175d1c2Schristos // Do a quick sanity check on the index. 333*b175d1c2Schristos if (index == NULL || index->have < 1 || index->list[0].out != 0) 334*b175d1c2Schristos return Z_STREAM_ERROR; 335*b175d1c2Schristos 336*b175d1c2Schristos // If nothing to extract, return zero bytes extracted. 337*b175d1c2Schristos if (len == 0 || offset < 0 || offset >= index->length) 338aaf4ece6Schristos return 0; 339aaf4ece6Schristos 340*b175d1c2Schristos // Find the access point closest to but not after offset. 341*b175d1c2Schristos int lo = -1, hi = index->have; 342*b175d1c2Schristos point_t *point = index->list; 343*b175d1c2Schristos while (hi - lo > 1) { 344*b175d1c2Schristos int mid = (lo + hi) >> 1; 345*b175d1c2Schristos if (offset < point[mid].out) 346*b175d1c2Schristos hi = mid; 347*b175d1c2Schristos else 348*b175d1c2Schristos lo = mid; 349*b175d1c2Schristos } 350*b175d1c2Schristos point += lo; 351aaf4ece6Schristos 352*b175d1c2Schristos // Initialize the input file and prime the inflate engine to start there. 353*b175d1c2Schristos int ret = fseeko(in, point->in - (point->bits ? 1 : 0), SEEK_SET); 354*b175d1c2Schristos if (ret == -1) 355*b175d1c2Schristos return Z_ERRNO; 356*b175d1c2Schristos int ch = 0; 357*b175d1c2Schristos if (point->bits && (ch = getc(in)) == EOF) 358*b175d1c2Schristos return ferror(in) ? Z_ERRNO : Z_BUF_ERROR; 359*b175d1c2Schristos z_stream strm = {0}; 360*b175d1c2Schristos ret = inflateInit2(&strm, RAW); 361aaf4ece6Schristos if (ret != Z_OK) 362aaf4ece6Schristos return ret; 363*b175d1c2Schristos if (point->bits) 364*b175d1c2Schristos INFLATEPRIME(&strm, point->bits, ch >> (8 - point->bits)); 365*b175d1c2Schristos inflateSetDictionary(&strm, point->window, WINSIZE); 366aaf4ece6Schristos 367*b175d1c2Schristos // Skip uncompressed bytes until offset reached, then satisfy request. 368*b175d1c2Schristos unsigned char input[CHUNK]; 369*b175d1c2Schristos unsigned char discard[WINSIZE]; 370*b175d1c2Schristos offset -= point->out; // number of bytes to skip to get to offset 371*b175d1c2Schristos size_t left = len; // number of bytes left to read after offset 372aaf4ece6Schristos do { 373*b175d1c2Schristos if (offset) { 374*b175d1c2Schristos // Discard up to offset uncompressed bytes. 375*b175d1c2Schristos strm.avail_out = offset < WINSIZE ? (unsigned)offset : WINSIZE; 376aaf4ece6Schristos strm.next_out = discard; 377ec47cc4bSchristos } 378ec47cc4bSchristos else { 379*b175d1c2Schristos // Uncompress up to left bytes into buf. 380*b175d1c2Schristos strm.avail_out = left < UINT_MAX ? (unsigned)left : UINT_MAX; 381*b175d1c2Schristos strm.next_out = buf + len - left; 382ec47cc4bSchristos } 383ec47cc4bSchristos 384*b175d1c2Schristos // Uncompress, setting got to the number of bytes uncompressed. 385ec47cc4bSchristos if (strm.avail_in == 0) { 386*b175d1c2Schristos // Assure available input. 387ec47cc4bSchristos strm.avail_in = fread(input, 1, CHUNK, in); 388*b175d1c2Schristos if (strm.avail_in < CHUNK && ferror(in)) { 389ec47cc4bSchristos ret = Z_ERRNO; 390*b175d1c2Schristos break; 391ec47cc4bSchristos } 392ec47cc4bSchristos strm.next_in = input; 393ec47cc4bSchristos } 394*b175d1c2Schristos unsigned got = strm.avail_out; 395*b175d1c2Schristos ret = inflate(&strm, Z_NO_FLUSH); 396*b175d1c2Schristos got -= strm.avail_out; 397ec47cc4bSchristos 398*b175d1c2Schristos // Update the appropriate count. 399*b175d1c2Schristos if (offset) 400*b175d1c2Schristos offset -= got; 401*b175d1c2Schristos else 402*b175d1c2Schristos left -= got; 403*b175d1c2Schristos 404*b175d1c2Schristos // If we're at the end of a gzip member and there's more to read, 405*b175d1c2Schristos // continue to the next gzip member. 406*b175d1c2Schristos if (ret == Z_STREAM_END && index->mode == GZIP) { 407*b175d1c2Schristos // Discard the gzip trailer. 408*b175d1c2Schristos unsigned drop = 8; // length of gzip trailer 409*b175d1c2Schristos if (strm.avail_in >= drop) { 410*b175d1c2Schristos strm.avail_in -= drop; 411*b175d1c2Schristos strm.next_in += drop; 412*b175d1c2Schristos } 413*b175d1c2Schristos else { 414*b175d1c2Schristos // Read and discard the remainder of the gzip trailer. 415*b175d1c2Schristos drop -= strm.avail_in; 416*b175d1c2Schristos strm.avail_in = 0; 417*b175d1c2Schristos do { 418*b175d1c2Schristos if (getc(in) == EOF) 419*b175d1c2Schristos // The input does not have a complete trailer. 420*b175d1c2Schristos return ferror(in) ? Z_ERRNO : Z_BUF_ERROR; 421*b175d1c2Schristos } while (--drop); 422ec47cc4bSchristos } 423ec47cc4bSchristos 424*b175d1c2Schristos if (strm.avail_in || ungetc(getc(in), in) != EOF) { 425*b175d1c2Schristos // There's more after the gzip trailer. Use inflate to skip the 426*b175d1c2Schristos // gzip header and resume the raw inflate there. 427*b175d1c2Schristos inflateReset2(&strm, GZIP); 428*b175d1c2Schristos do { 429*b175d1c2Schristos if (strm.avail_in == 0) { 430*b175d1c2Schristos strm.avail_in = fread(input, 1, CHUNK, in); 431*b175d1c2Schristos if (strm.avail_in < CHUNK && ferror(in)) { 432*b175d1c2Schristos ret = Z_ERRNO; 433aaf4ece6Schristos break; 434*b175d1c2Schristos } 435*b175d1c2Schristos strm.next_in = input; 436*b175d1c2Schristos } 437*b175d1c2Schristos strm.avail_out = WINSIZE; 438*b175d1c2Schristos strm.next_out = discard; 439*b175d1c2Schristos ret = inflate(&strm, Z_BLOCK); // stop at end of header 440*b175d1c2Schristos } while (ret == Z_OK && (strm.data_type & 0x80) == 0); 441*b175d1c2Schristos if (ret != Z_OK) 442*b175d1c2Schristos break; 443*b175d1c2Schristos inflateReset2(&strm, RAW); 444*b175d1c2Schristos } 445*b175d1c2Schristos } 446aaf4ece6Schristos 447*b175d1c2Schristos // Continue until we have the requested data, the deflate data has 448*b175d1c2Schristos // ended, or an error is encountered. 449*b175d1c2Schristos } while (ret == Z_OK && left); 450*b175d1c2Schristos inflateEnd(&strm); 451aaf4ece6Schristos 452*b175d1c2Schristos // Return the number of uncompressed bytes read into buf, or the error. 453*b175d1c2Schristos return ret == Z_OK || ret == Z_STREAM_END ? len - left : ret; 454aaf4ece6Schristos } 455aaf4ece6Schristos 456ec47cc4bSchristos #ifdef TEST 457ec47cc4bSchristos 458*b175d1c2Schristos #define SPAN 1048576L // desired distance between access points 459*b175d1c2Schristos #define LEN 16384 // number of bytes to extract 460ec47cc4bSchristos 461*b175d1c2Schristos // Demonstrate the use of deflate_index_build() and deflate_index_extract() by 462*b175d1c2Schristos // processing the file provided on the command line, and extracting LEN bytes 463*b175d1c2Schristos // from 2/3rds of the way through the uncompressed output, writing that to 464*b175d1c2Schristos // stdout. An offset can be provided as the second argument, in which case the 465*b175d1c2Schristos // data is extracted from there instead. 466*b175d1c2Schristos int main(int argc, char **argv) { 467*b175d1c2Schristos // Open the input file. 468ec47cc4bSchristos if (argc < 2 || argc > 3) { 469*b175d1c2Schristos fprintf(stderr, "usage: zran file.raw [offset]\n"); 470aaf4ece6Schristos return 1; 471aaf4ece6Schristos } 472*b175d1c2Schristos FILE *in = fopen(argv[1], "rb"); 473aaf4ece6Schristos if (in == NULL) { 474aaf4ece6Schristos fprintf(stderr, "zran: could not open %s for reading\n", argv[1]); 475aaf4ece6Schristos return 1; 476aaf4ece6Schristos } 477aaf4ece6Schristos 478*b175d1c2Schristos // Get optional offset. 479*b175d1c2Schristos off_t offset = -1; 480ec47cc4bSchristos if (argc == 3) { 481ec47cc4bSchristos char *end; 482ec47cc4bSchristos offset = strtoll(argv[2], &end, 10); 483ec47cc4bSchristos if (*end || offset < 0) { 484ec47cc4bSchristos fprintf(stderr, "zran: %s is not a valid offset\n", argv[2]); 485ec47cc4bSchristos return 1; 486ec47cc4bSchristos } 487ec47cc4bSchristos } 488ec47cc4bSchristos 489*b175d1c2Schristos // Build index. 490*b175d1c2Schristos struct deflate_index *index = NULL; 491*b175d1c2Schristos int len = deflate_index_build(in, SPAN, &index); 492aaf4ece6Schristos if (len < 0) { 493aaf4ece6Schristos fclose(in); 494aaf4ece6Schristos switch (len) { 495aaf4ece6Schristos case Z_MEM_ERROR: 496aaf4ece6Schristos fprintf(stderr, "zran: out of memory\n"); 497aaf4ece6Schristos break; 498*b175d1c2Schristos case Z_BUF_ERROR: 499*b175d1c2Schristos fprintf(stderr, "zran: %s ended prematurely\n", argv[1]); 500*b175d1c2Schristos break; 501aaf4ece6Schristos case Z_DATA_ERROR: 502aaf4ece6Schristos fprintf(stderr, "zran: compressed data error in %s\n", argv[1]); 503aaf4ece6Schristos break; 504aaf4ece6Schristos case Z_ERRNO: 505aaf4ece6Schristos fprintf(stderr, "zran: read error on %s\n", argv[1]); 506aaf4ece6Schristos break; 507aaf4ece6Schristos default: 508aaf4ece6Schristos fprintf(stderr, "zran: error %d while building index\n", len); 509aaf4ece6Schristos } 510aaf4ece6Schristos return 1; 511aaf4ece6Schristos } 512aaf4ece6Schristos fprintf(stderr, "zran: built index with %d access points\n", len); 513aaf4ece6Schristos 514*b175d1c2Schristos // Use index by reading some bytes from an arbitrary offset. 515*b175d1c2Schristos unsigned char buf[LEN]; 516ec47cc4bSchristos if (offset == -1) 517*b175d1c2Schristos offset = ((index->length + 1) << 1) / 3; 518*b175d1c2Schristos ptrdiff_t got = deflate_index_extract(in, index, offset, buf, LEN); 519*b175d1c2Schristos if (got < 0) 520aaf4ece6Schristos fprintf(stderr, "zran: extraction failed: %s error\n", 521*b175d1c2Schristos got == Z_MEM_ERROR ? "out of memory" : "input corrupted"); 522aaf4ece6Schristos else { 523*b175d1c2Schristos fwrite(buf, 1, got, stdout); 524*b175d1c2Schristos fprintf(stderr, "zran: extracted %ld bytes at %lld\n", got, offset); 525aaf4ece6Schristos } 526aaf4ece6Schristos 527*b175d1c2Schristos // Clean up and exit. 528ec47cc4bSchristos deflate_index_free(index); 529aaf4ece6Schristos fclose(in); 530aaf4ece6Schristos return 0; 531aaf4ece6Schristos } 532ec47cc4bSchristos 533ec47cc4bSchristos #endif 534