xref: /netbsd-src/common/dist/zlib/doc/rfc1950.txt (revision c34236556bea94afcaca1782d7d228301edc3ea0)
1*c3423655Schristos
2*c3423655Schristos
3*c3423655Schristos
4*c3423655Schristos
5*c3423655Schristos
6*c3423655Schristos
7*c3423655SchristosNetwork Working Group                                         P. Deutsch
8*c3423655SchristosRequest for Comments: 1950                           Aladdin Enterprises
9*c3423655SchristosCategory: Informational                                      J-L. Gailly
10*c3423655Schristos                                                                Info-ZIP
11*c3423655Schristos                                                                May 1996
12*c3423655Schristos
13*c3423655Schristos
14*c3423655Schristos         ZLIB Compressed Data Format Specification version 3.3
15*c3423655Schristos
16*c3423655SchristosStatus of This Memo
17*c3423655Schristos
18*c3423655Schristos   This memo provides information for the Internet community.  This memo
19*c3423655Schristos   does not specify an Internet standard of any kind.  Distribution of
20*c3423655Schristos   this memo is unlimited.
21*c3423655Schristos
22*c3423655SchristosIESG Note:
23*c3423655Schristos
24*c3423655Schristos   The IESG takes no position on the validity of any Intellectual
25*c3423655Schristos   Property Rights statements contained in this document.
26*c3423655Schristos
27*c3423655SchristosNotices
28*c3423655Schristos
29*c3423655Schristos   Copyright (c) 1996 L. Peter Deutsch and Jean-Loup Gailly
30*c3423655Schristos
31*c3423655Schristos   Permission is granted to copy and distribute this document for any
32*c3423655Schristos   purpose and without charge, including translations into other
33*c3423655Schristos   languages and incorporation into compilations, provided that the
34*c3423655Schristos   copyright notice and this notice are preserved, and that any
35*c3423655Schristos   substantive changes or deletions from the original are clearly
36*c3423655Schristos   marked.
37*c3423655Schristos
38*c3423655Schristos   A pointer to the latest version of this and related documentation in
39*c3423655Schristos   HTML format can be found at the URL
40*c3423655Schristos   <ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html>.
41*c3423655Schristos
42*c3423655SchristosAbstract
43*c3423655Schristos
44*c3423655Schristos   This specification defines a lossless compressed data format.  The
45*c3423655Schristos   data can be produced or consumed, even for an arbitrarily long
46*c3423655Schristos   sequentially presented input data stream, using only an a priori
47*c3423655Schristos   bounded amount of intermediate storage.  The format presently uses
48*c3423655Schristos   the DEFLATE compression method but can be easily extended to use
49*c3423655Schristos   other compression methods.  It can be implemented readily in a manner
50*c3423655Schristos   not covered by patents.  This specification also defines the ADLER-32
51*c3423655Schristos   checksum (an extension and improvement of the Fletcher checksum),
52*c3423655Schristos   used for detection of data corruption, and provides an algorithm for
53*c3423655Schristos   computing it.
54*c3423655Schristos
55*c3423655Schristos
56*c3423655Schristos
57*c3423655Schristos
58*c3423655SchristosDeutsch & Gailly             Informational                      [Page 1]
59*c3423655Schristos
60*c3423655SchristosRFC 1950       ZLIB Compressed Data Format Specification        May 1996
61*c3423655Schristos
62*c3423655Schristos
63*c3423655SchristosTable of Contents
64*c3423655Schristos
65*c3423655Schristos   1. Introduction ................................................... 2
66*c3423655Schristos      1.1. Purpose ................................................... 2
67*c3423655Schristos      1.2. Intended audience ......................................... 3
68*c3423655Schristos      1.3. Scope ..................................................... 3
69*c3423655Schristos      1.4. Compliance ................................................ 3
70*c3423655Schristos      1.5.  Definitions of terms and conventions used ................ 3
71*c3423655Schristos      1.6. Changes from previous versions ............................ 3
72*c3423655Schristos   2. Detailed specification ......................................... 3
73*c3423655Schristos      2.1. Overall conventions ....................................... 3
74*c3423655Schristos      2.2. Data format ............................................... 4
75*c3423655Schristos      2.3. Compliance ................................................ 7
76*c3423655Schristos   3. References ..................................................... 7
77*c3423655Schristos   4. Source code .................................................... 8
78*c3423655Schristos   5. Security Considerations ........................................ 8
79*c3423655Schristos   6. Acknowledgements ............................................... 8
80*c3423655Schristos   7. Authors' Addresses ............................................. 8
81*c3423655Schristos   8. Appendix: Rationale ............................................ 9
82*c3423655Schristos   9. Appendix: Sample code ..........................................10
83*c3423655Schristos
84*c3423655Schristos1. Introduction
85*c3423655Schristos
86*c3423655Schristos   1.1. Purpose
87*c3423655Schristos
88*c3423655Schristos      The purpose of this specification is to define a lossless
89*c3423655Schristos      compressed data format that:
90*c3423655Schristos
91*c3423655Schristos          * Is independent of CPU type, operating system, file system,
92*c3423655Schristos            and character set, and hence can be used for interchange;
93*c3423655Schristos
94*c3423655Schristos          * Can be produced or consumed, even for an arbitrarily long
95*c3423655Schristos            sequentially presented input data stream, using only an a
96*c3423655Schristos            priori bounded amount of intermediate storage, and hence can
97*c3423655Schristos            be used in data communications or similar structures such as
98*c3423655Schristos            Unix filters;
99*c3423655Schristos
100*c3423655Schristos          * Can use a number of different compression methods;
101*c3423655Schristos
102*c3423655Schristos          * Can be implemented readily in a manner not covered by
103*c3423655Schristos            patents, and hence can be practiced freely.
104*c3423655Schristos
105*c3423655Schristos      The data format defined by this specification does not attempt to
106*c3423655Schristos      allow random access to compressed data.
107*c3423655Schristos
108*c3423655Schristos
109*c3423655Schristos
110*c3423655Schristos
111*c3423655Schristos
112*c3423655Schristos
113*c3423655Schristos
114*c3423655SchristosDeutsch & Gailly             Informational                      [Page 2]
115*c3423655Schristos
116*c3423655SchristosRFC 1950       ZLIB Compressed Data Format Specification        May 1996
117*c3423655Schristos
118*c3423655Schristos
119*c3423655Schristos   1.2. Intended audience
120*c3423655Schristos
121*c3423655Schristos      This specification is intended for use by implementors of software
122*c3423655Schristos      to compress data into zlib format and/or decompress data from zlib
123*c3423655Schristos      format.
124*c3423655Schristos
125*c3423655Schristos      The text of the specification assumes a basic background in
126*c3423655Schristos      programming at the level of bits and other primitive data
127*c3423655Schristos      representations.
128*c3423655Schristos
129*c3423655Schristos   1.3. Scope
130*c3423655Schristos
131*c3423655Schristos      The specification specifies a compressed data format that can be
132*c3423655Schristos      used for in-memory compression of a sequence of arbitrary bytes.
133*c3423655Schristos
134*c3423655Schristos   1.4. Compliance
135*c3423655Schristos
136*c3423655Schristos      Unless otherwise indicated below, a compliant decompressor must be
137*c3423655Schristos      able to accept and decompress any data set that conforms to all
138*c3423655Schristos      the specifications presented here; a compliant compressor must
139*c3423655Schristos      produce data sets that conform to all the specifications presented
140*c3423655Schristos      here.
141*c3423655Schristos
142*c3423655Schristos   1.5.  Definitions of terms and conventions used
143*c3423655Schristos
144*c3423655Schristos      byte: 8 bits stored or transmitted as a unit (same as an octet).
145*c3423655Schristos      (For this specification, a byte is exactly 8 bits, even on
146*c3423655Schristos      machines which store a character on a number of bits different
147*c3423655Schristos      from 8.) See below, for the numbering of bits within a byte.
148*c3423655Schristos
149*c3423655Schristos   1.6. Changes from previous versions
150*c3423655Schristos
151*c3423655Schristos      Version 3.1 was the first public release of this specification.
152*c3423655Schristos      In version 3.2, some terminology was changed and the Adler-32
153*c3423655Schristos      sample code was rewritten for clarity.  In version 3.3, the
154*c3423655Schristos      support for a preset dictionary was introduced, and the
155*c3423655Schristos      specification was converted to RFC style.
156*c3423655Schristos
157*c3423655Schristos2. Detailed specification
158*c3423655Schristos
159*c3423655Schristos   2.1. Overall conventions
160*c3423655Schristos
161*c3423655Schristos      In the diagrams below, a box like this:
162*c3423655Schristos
163*c3423655Schristos         +---+
164*c3423655Schristos         |   | <-- the vertical bars might be missing
165*c3423655Schristos         +---+
166*c3423655Schristos
167*c3423655Schristos
168*c3423655Schristos
169*c3423655Schristos
170*c3423655SchristosDeutsch & Gailly             Informational                      [Page 3]
171*c3423655Schristos
172*c3423655SchristosRFC 1950       ZLIB Compressed Data Format Specification        May 1996
173*c3423655Schristos
174*c3423655Schristos
175*c3423655Schristos      represents one byte; a box like this:
176*c3423655Schristos
177*c3423655Schristos         +==============+
178*c3423655Schristos         |              |
179*c3423655Schristos         +==============+
180*c3423655Schristos
181*c3423655Schristos      represents a variable number of bytes.
182*c3423655Schristos
183*c3423655Schristos      Bytes stored within a computer do not have a "bit order", since
184*c3423655Schristos      they are always treated as a unit.  However, a byte considered as
185*c3423655Schristos      an integer between 0 and 255 does have a most- and least-
186*c3423655Schristos      significant bit, and since we write numbers with the most-
187*c3423655Schristos      significant digit on the left, we also write bytes with the most-
188*c3423655Schristos      significant bit on the left.  In the diagrams below, we number the
189*c3423655Schristos      bits of a byte so that bit 0 is the least-significant bit, i.e.,
190*c3423655Schristos      the bits are numbered:
191*c3423655Schristos
192*c3423655Schristos         +--------+
193*c3423655Schristos         |76543210|
194*c3423655Schristos         +--------+
195*c3423655Schristos
196*c3423655Schristos      Within a computer, a number may occupy multiple bytes.  All
197*c3423655Schristos      multi-byte numbers in the format described here are stored with
198*c3423655Schristos      the MOST-significant byte first (at the lower memory address).
199*c3423655Schristos      For example, the decimal number 520 is stored as:
200*c3423655Schristos
201*c3423655Schristos             0     1
202*c3423655Schristos         +--------+--------+
203*c3423655Schristos         |00000010|00001000|
204*c3423655Schristos         +--------+--------+
205*c3423655Schristos          ^        ^
206*c3423655Schristos          |        |
207*c3423655Schristos          |        + less significant byte = 8
208*c3423655Schristos          + more significant byte = 2 x 256
209*c3423655Schristos
210*c3423655Schristos   2.2. Data format
211*c3423655Schristos
212*c3423655Schristos      A zlib stream has the following structure:
213*c3423655Schristos
214*c3423655Schristos           0   1
215*c3423655Schristos         +---+---+
216*c3423655Schristos         |CMF|FLG|   (more-->)
217*c3423655Schristos         +---+---+
218*c3423655Schristos
219*c3423655Schristos
220*c3423655Schristos
221*c3423655Schristos
222*c3423655Schristos
223*c3423655Schristos
224*c3423655Schristos
225*c3423655Schristos
226*c3423655SchristosDeutsch & Gailly             Informational                      [Page 4]
227*c3423655Schristos
228*c3423655SchristosRFC 1950       ZLIB Compressed Data Format Specification        May 1996
229*c3423655Schristos
230*c3423655Schristos
231*c3423655Schristos      (if FLG.FDICT set)
232*c3423655Schristos
233*c3423655Schristos           0   1   2   3
234*c3423655Schristos         +---+---+---+---+
235*c3423655Schristos         |     DICTID    |   (more-->)
236*c3423655Schristos         +---+---+---+---+
237*c3423655Schristos
238*c3423655Schristos         +=====================+---+---+---+---+
239*c3423655Schristos         |...compressed data...|    ADLER32    |
240*c3423655Schristos         +=====================+---+---+---+---+
241*c3423655Schristos
242*c3423655Schristos      Any data which may appear after ADLER32 are not part of the zlib
243*c3423655Schristos      stream.
244*c3423655Schristos
245*c3423655Schristos      CMF (Compression Method and flags)
246*c3423655Schristos         This byte is divided into a 4-bit compression method and a 4-
247*c3423655Schristos         bit information field depending on the compression method.
248*c3423655Schristos
249*c3423655Schristos            bits 0 to 3  CM     Compression method
250*c3423655Schristos            bits 4 to 7  CINFO  Compression info
251*c3423655Schristos
252*c3423655Schristos      CM (Compression method)
253*c3423655Schristos         This identifies the compression method used in the file. CM = 8
254*c3423655Schristos         denotes the "deflate" compression method with a window size up
255*c3423655Schristos         to 32K.  This is the method used by gzip and PNG (see
256*c3423655Schristos         references [1] and [2] in Chapter 3, below, for the reference
257*c3423655Schristos         documents).  CM = 15 is reserved.  It might be used in a future
258*c3423655Schristos         version of this specification to indicate the presence of an
259*c3423655Schristos         extra field before the compressed data.
260*c3423655Schristos
261*c3423655Schristos      CINFO (Compression info)
262*c3423655Schristos         For CM = 8, CINFO is the base-2 logarithm of the LZ77 window
263*c3423655Schristos         size, minus eight (CINFO=7 indicates a 32K window size). Values
264*c3423655Schristos         of CINFO above 7 are not allowed in this version of the
265*c3423655Schristos         specification.  CINFO is not defined in this specification for
266*c3423655Schristos         CM not equal to 8.
267*c3423655Schristos
268*c3423655Schristos      FLG (FLaGs)
269*c3423655Schristos         This flag byte is divided as follows:
270*c3423655Schristos
271*c3423655Schristos            bits 0 to 4  FCHECK  (check bits for CMF and FLG)
272*c3423655Schristos            bit  5       FDICT   (preset dictionary)
273*c3423655Schristos            bits 6 to 7  FLEVEL  (compression level)
274*c3423655Schristos
275*c3423655Schristos         The FCHECK value must be such that CMF and FLG, when viewed as
276*c3423655Schristos         a 16-bit unsigned integer stored in MSB order (CMF*256 + FLG),
277*c3423655Schristos         is a multiple of 31.
278*c3423655Schristos
279*c3423655Schristos
280*c3423655Schristos
281*c3423655Schristos
282*c3423655SchristosDeutsch & Gailly             Informational                      [Page 5]
283*c3423655Schristos
284*c3423655SchristosRFC 1950       ZLIB Compressed Data Format Specification        May 1996
285*c3423655Schristos
286*c3423655Schristos
287*c3423655Schristos      FDICT (Preset dictionary)
288*c3423655Schristos         If FDICT is set, a DICT dictionary identifier is present
289*c3423655Schristos         immediately after the FLG byte. The dictionary is a sequence of
290*c3423655Schristos         bytes which are initially fed to the compressor without
291*c3423655Schristos         producing any compressed output. DICT is the Adler-32 checksum
292*c3423655Schristos         of this sequence of bytes (see the definition of ADLER32
293*c3423655Schristos         below).  The decompressor can use this identifier to determine
294*c3423655Schristos         which dictionary has been used by the compressor.
295*c3423655Schristos
296*c3423655Schristos      FLEVEL (Compression level)
297*c3423655Schristos         These flags are available for use by specific compression
298*c3423655Schristos         methods.  The "deflate" method (CM = 8) sets these flags as
299*c3423655Schristos         follows:
300*c3423655Schristos
301*c3423655Schristos            0 - compressor used fastest algorithm
302*c3423655Schristos            1 - compressor used fast algorithm
303*c3423655Schristos            2 - compressor used default algorithm
304*c3423655Schristos            3 - compressor used maximum compression, slowest algorithm
305*c3423655Schristos
306*c3423655Schristos         The information in FLEVEL is not needed for decompression; it
307*c3423655Schristos         is there to indicate if recompression might be worthwhile.
308*c3423655Schristos
309*c3423655Schristos      compressed data
310*c3423655Schristos         For compression method 8, the compressed data is stored in the
311*c3423655Schristos         deflate compressed data format as described in the document
312*c3423655Schristos         "DEFLATE Compressed Data Format Specification" by L. Peter
313*c3423655Schristos         Deutsch. (See reference [3] in Chapter 3, below)
314*c3423655Schristos
315*c3423655Schristos         Other compressed data formats are not specified in this version
316*c3423655Schristos         of the zlib specification.
317*c3423655Schristos
318*c3423655Schristos      ADLER32 (Adler-32 checksum)
319*c3423655Schristos         This contains a checksum value of the uncompressed data
320*c3423655Schristos         (excluding any dictionary data) computed according to Adler-32
321*c3423655Schristos         algorithm. This algorithm is a 32-bit extension and improvement
322*c3423655Schristos         of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073
323*c3423655Schristos         standard. See references [4] and [5] in Chapter 3, below)
324*c3423655Schristos
325*c3423655Schristos         Adler-32 is composed of two sums accumulated per byte: s1 is
326*c3423655Schristos         the sum of all bytes, s2 is the sum of all s1 values. Both sums
327*c3423655Schristos         are done modulo 65521. s1 is initialized to 1, s2 to zero.  The
328*c3423655Schristos         Adler-32 checksum is stored as s2*65536 + s1 in most-
329*c3423655Schristos         significant-byte first (network) order.
330*c3423655Schristos
331*c3423655Schristos
332*c3423655Schristos
333*c3423655Schristos
334*c3423655Schristos
335*c3423655Schristos
336*c3423655Schristos
337*c3423655Schristos
338*c3423655SchristosDeutsch & Gailly             Informational                      [Page 6]
339*c3423655Schristos
340*c3423655SchristosRFC 1950       ZLIB Compressed Data Format Specification        May 1996
341*c3423655Schristos
342*c3423655Schristos
343*c3423655Schristos   2.3. Compliance
344*c3423655Schristos
345*c3423655Schristos      A compliant compressor must produce streams with correct CMF, FLG
346*c3423655Schristos      and ADLER32, but need not support preset dictionaries.  When the
347*c3423655Schristos      zlib data format is used as part of another standard data format,
348*c3423655Schristos      the compressor may use only preset dictionaries that are specified
349*c3423655Schristos      by this other data format.  If this other format does not use the
350*c3423655Schristos      preset dictionary feature, the compressor must not set the FDICT
351*c3423655Schristos      flag.
352*c3423655Schristos
353*c3423655Schristos      A compliant decompressor must check CMF, FLG, and ADLER32, and
354*c3423655Schristos      provide an error indication if any of these have incorrect values.
355*c3423655Schristos      A compliant decompressor must give an error indication if CM is
356*c3423655Schristos      not one of the values defined in this specification (only the
357*c3423655Schristos      value 8 is permitted in this version), since another value could
358*c3423655Schristos      indicate the presence of new features that would cause subsequent
359*c3423655Schristos      data to be interpreted incorrectly.  A compliant decompressor must
360*c3423655Schristos      give an error indication if FDICT is set and DICTID is not the
361*c3423655Schristos      identifier of a known preset dictionary.  A decompressor may
362*c3423655Schristos      ignore FLEVEL and still be compliant.  When the zlib data format
363*c3423655Schristos      is being used as a part of another standard format, a compliant
364*c3423655Schristos      decompressor must support all the preset dictionaries specified by
365*c3423655Schristos      the other format. When the other format does not use the preset
366*c3423655Schristos      dictionary feature, a compliant decompressor must reject any
367*c3423655Schristos      stream in which the FDICT flag is set.
368*c3423655Schristos
369*c3423655Schristos3. References
370*c3423655Schristos
371*c3423655Schristos   [1] Deutsch, L.P.,"GZIP Compressed Data Format Specification",
372*c3423655Schristos       available in ftp://ftp.uu.net/pub/archiving/zip/doc/
373*c3423655Schristos
374*c3423655Schristos   [2] Thomas Boutell, "PNG (Portable Network Graphics) specification",
375*c3423655Schristos       available in ftp://ftp.uu.net/graphics/png/documents/
376*c3423655Schristos
377*c3423655Schristos   [3] Deutsch, L.P.,"DEFLATE Compressed Data Format Specification",
378*c3423655Schristos       available in ftp://ftp.uu.net/pub/archiving/zip/doc/
379*c3423655Schristos
380*c3423655Schristos   [4] Fletcher, J. G., "An Arithmetic Checksum for Serial
381*c3423655Schristos       Transmissions," IEEE Transactions on Communications, Vol. COM-30,
382*c3423655Schristos       No. 1, January 1982, pp. 247-252.
383*c3423655Schristos
384*c3423655Schristos   [5] ITU-T Recommendation X.224, Annex D, "Checksum Algorithms,"
385*c3423655Schristos       November, 1993, pp. 144, 145. (Available from
386*c3423655Schristos       gopher://info.itu.ch). ITU-T X.244 is also the same as ISO 8073.
387*c3423655Schristos
388*c3423655Schristos
389*c3423655Schristos
390*c3423655Schristos
391*c3423655Schristos
392*c3423655Schristos
393*c3423655Schristos
394*c3423655SchristosDeutsch & Gailly             Informational                      [Page 7]
395*c3423655Schristos
396*c3423655SchristosRFC 1950       ZLIB Compressed Data Format Specification        May 1996
397*c3423655Schristos
398*c3423655Schristos
399*c3423655Schristos4. Source code
400*c3423655Schristos
401*c3423655Schristos   Source code for a C language implementation of a "zlib" compliant
402*c3423655Schristos   library is available at ftp://ftp.uu.net/pub/archiving/zip/zlib/.
403*c3423655Schristos
404*c3423655Schristos5. Security Considerations
405*c3423655Schristos
406*c3423655Schristos   A decoder that fails to check the ADLER32 checksum value may be
407*c3423655Schristos   subject to undetected data corruption.
408*c3423655Schristos
409*c3423655Schristos6. Acknowledgements
410*c3423655Schristos
411*c3423655Schristos   Trademarks cited in this document are the property of their
412*c3423655Schristos   respective owners.
413*c3423655Schristos
414*c3423655Schristos   Jean-Loup Gailly and Mark Adler designed the zlib format and wrote
415*c3423655Schristos   the related software described in this specification.  Glenn
416*c3423655Schristos   Randers-Pehrson converted this document to RFC and HTML format.
417*c3423655Schristos
418*c3423655Schristos7. Authors' Addresses
419*c3423655Schristos
420*c3423655Schristos   L. Peter Deutsch
421*c3423655Schristos   Aladdin Enterprises
422*c3423655Schristos   203 Santa Margarita Ave.
423*c3423655Schristos   Menlo Park, CA 94025
424*c3423655Schristos
425*c3423655Schristos   Phone: (415) 322-0103 (AM only)
426*c3423655Schristos   FAX:   (415) 322-1734
427*c3423655Schristos   EMail: <ghost@aladdin.com>
428*c3423655Schristos
429*c3423655Schristos
430*c3423655Schristos   Jean-Loup Gailly
431*c3423655Schristos
432*c3423655Schristos   EMail: <gzip@prep.ai.mit.edu>
433*c3423655Schristos
434*c3423655Schristos   Questions about the technical content of this specification can be
435*c3423655Schristos   sent by email to
436*c3423655Schristos
437*c3423655Schristos   Jean-Loup Gailly <gzip@prep.ai.mit.edu> and
438*c3423655Schristos   Mark Adler <madler@alumni.caltech.edu>
439*c3423655Schristos
440*c3423655Schristos   Editorial comments on this specification can be sent by email to
441*c3423655Schristos
442*c3423655Schristos   L. Peter Deutsch <ghost@aladdin.com> and
443*c3423655Schristos   Glenn Randers-Pehrson <randeg@alumni.rpi.edu>
444*c3423655Schristos
445*c3423655Schristos
446*c3423655Schristos
447*c3423655Schristos
448*c3423655Schristos
449*c3423655Schristos
450*c3423655SchristosDeutsch & Gailly             Informational                      [Page 8]
451*c3423655Schristos
452*c3423655SchristosRFC 1950       ZLIB Compressed Data Format Specification        May 1996
453*c3423655Schristos
454*c3423655Schristos
455*c3423655Schristos8. Appendix: Rationale
456*c3423655Schristos
457*c3423655Schristos   8.1. Preset dictionaries
458*c3423655Schristos
459*c3423655Schristos      A preset dictionary is specially useful to compress short input
460*c3423655Schristos      sequences. The compressor can take advantage of the dictionary
461*c3423655Schristos      context to encode the input in a more compact manner. The
462*c3423655Schristos      decompressor can be initialized with the appropriate context by
463*c3423655Schristos      virtually decompressing a compressed version of the dictionary
464*c3423655Schristos      without producing any output. However for certain compression
465*c3423655Schristos      algorithms such as the deflate algorithm this operation can be
466*c3423655Schristos      achieved without actually performing any decompression.
467*c3423655Schristos
468*c3423655Schristos      The compressor and the decompressor must use exactly the same
469*c3423655Schristos      dictionary. The dictionary may be fixed or may be chosen among a
470*c3423655Schristos      certain number of predefined dictionaries, according to the kind
471*c3423655Schristos      of input data. The decompressor can determine which dictionary has
472*c3423655Schristos      been chosen by the compressor by checking the dictionary
473*c3423655Schristos      identifier. This document does not specify the contents of
474*c3423655Schristos      predefined dictionaries, since the optimal dictionaries are
475*c3423655Schristos      application specific. Standard data formats using this feature of
476*c3423655Schristos      the zlib specification must precisely define the allowed
477*c3423655Schristos      dictionaries.
478*c3423655Schristos
479*c3423655Schristos   8.2. The Adler-32 algorithm
480*c3423655Schristos
481*c3423655Schristos      The Adler-32 algorithm is much faster than the CRC32 algorithm yet
482*c3423655Schristos      still provides an extremely low probability of undetected errors.
483*c3423655Schristos
484*c3423655Schristos      The modulo on unsigned long accumulators can be delayed for 5552
485*c3423655Schristos      bytes, so the modulo operation time is negligible.  If the bytes
486*c3423655Schristos      are a, b, c, the second sum is 3a + 2b + c + 3, and so is position
487*c3423655Schristos      and order sensitive, unlike the first sum, which is just a
488*c3423655Schristos      checksum.  That 65521 is prime is important to avoid a possible
489*c3423655Schristos      large class of two-byte errors that leave the check unchanged.
490*c3423655Schristos      (The Fletcher checksum uses 255, which is not prime and which also
491*c3423655Schristos      makes the Fletcher check insensitive to single byte changes 0 <->
492*c3423655Schristos      255.)
493*c3423655Schristos
494*c3423655Schristos      The sum s1 is initialized to 1 instead of zero to make the length
495*c3423655Schristos      of the sequence part of s2, so that the length does not have to be
496*c3423655Schristos      checked separately. (Any sequence of zeroes has a Fletcher
497*c3423655Schristos      checksum of zero.)
498*c3423655Schristos
499*c3423655Schristos
500*c3423655Schristos
501*c3423655Schristos
502*c3423655Schristos
503*c3423655Schristos
504*c3423655Schristos
505*c3423655Schristos
506*c3423655SchristosDeutsch & Gailly             Informational                      [Page 9]
507*c3423655Schristos
508*c3423655SchristosRFC 1950       ZLIB Compressed Data Format Specification        May 1996
509*c3423655Schristos
510*c3423655Schristos
511*c3423655Schristos9. Appendix: Sample code
512*c3423655Schristos
513*c3423655Schristos   The following C code computes the Adler-32 checksum of a data buffer.
514*c3423655Schristos   It is written for clarity, not for speed.  The sample code is in the
515*c3423655Schristos   ANSI C programming language. Non C users may find it easier to read
516*c3423655Schristos   with these hints:
517*c3423655Schristos
518*c3423655Schristos      &      Bitwise AND operator.
519*c3423655Schristos      >>     Bitwise right shift operator. When applied to an
520*c3423655Schristos             unsigned quantity, as here, right shift inserts zero bit(s)
521*c3423655Schristos             at the left.
522*c3423655Schristos      <<     Bitwise left shift operator. Left shift inserts zero
523*c3423655Schristos             bit(s) at the right.
524*c3423655Schristos      ++     "n++" increments the variable n.
525*c3423655Schristos      %      modulo operator: a % b is the remainder of a divided by b.
526*c3423655Schristos
527*c3423655Schristos      #define BASE 65521 /* largest prime smaller than 65536 */
528*c3423655Schristos
529*c3423655Schristos      /*
530*c3423655Schristos         Update a running Adler-32 checksum with the bytes buf[0..len-1]
531*c3423655Schristos       and return the updated checksum. The Adler-32 checksum should be
532*c3423655Schristos       initialized to 1.
533*c3423655Schristos
534*c3423655Schristos       Usage example:
535*c3423655Schristos
536*c3423655Schristos         unsigned long adler = 1L;
537*c3423655Schristos
538*c3423655Schristos         while (read_buffer(buffer, length) != EOF) {
539*c3423655Schristos           adler = update_adler32(adler, buffer, length);
540*c3423655Schristos         }
541*c3423655Schristos         if (adler != original_adler) error();
542*c3423655Schristos      */
543*c3423655Schristos      unsigned long update_adler32(unsigned long adler,
544*c3423655Schristos         unsigned char *buf, int len)
545*c3423655Schristos      {
546*c3423655Schristos        unsigned long s1 = adler & 0xffff;
547*c3423655Schristos        unsigned long s2 = (adler >> 16) & 0xffff;
548*c3423655Schristos        int n;
549*c3423655Schristos
550*c3423655Schristos        for (n = 0; n < len; n++) {
551*c3423655Schristos          s1 = (s1 + buf[n]) % BASE;
552*c3423655Schristos          s2 = (s2 + s1)     % BASE;
553*c3423655Schristos        }
554*c3423655Schristos        return (s2 << 16) + s1;
555*c3423655Schristos      }
556*c3423655Schristos
557*c3423655Schristos      /* Return the adler32 of the bytes buf[0..len-1] */
558*c3423655Schristos
559*c3423655Schristos
560*c3423655Schristos
561*c3423655Schristos
562*c3423655SchristosDeutsch & Gailly             Informational                     [Page 10]
563*c3423655Schristos
564*c3423655SchristosRFC 1950       ZLIB Compressed Data Format Specification        May 1996
565*c3423655Schristos
566*c3423655Schristos
567*c3423655Schristos      unsigned long adler32(unsigned char *buf, int len)
568*c3423655Schristos      {
569*c3423655Schristos        return update_adler32(1L, buf, len);
570*c3423655Schristos      }
571*c3423655Schristos
572*c3423655Schristos
573*c3423655Schristos
574*c3423655Schristos
575*c3423655Schristos
576*c3423655Schristos
577*c3423655Schristos
578*c3423655Schristos
579*c3423655Schristos
580*c3423655Schristos
581*c3423655Schristos
582*c3423655Schristos
583*c3423655Schristos
584*c3423655Schristos
585*c3423655Schristos
586*c3423655Schristos
587*c3423655Schristos
588*c3423655Schristos
589*c3423655Schristos
590*c3423655Schristos
591*c3423655Schristos
592*c3423655Schristos
593*c3423655Schristos
594*c3423655Schristos
595*c3423655Schristos
596*c3423655Schristos
597*c3423655Schristos
598*c3423655Schristos
599*c3423655Schristos
600*c3423655Schristos
601*c3423655Schristos
602*c3423655Schristos
603*c3423655Schristos
604*c3423655Schristos
605*c3423655Schristos
606*c3423655Schristos
607*c3423655Schristos
608*c3423655Schristos
609*c3423655Schristos
610*c3423655Schristos
611*c3423655Schristos
612*c3423655Schristos
613*c3423655Schristos
614*c3423655Schristos
615*c3423655Schristos
616*c3423655Schristos
617*c3423655Schristos
618*c3423655SchristosDeutsch & Gailly             Informational                     [Page 11]
619*c3423655Schristos
620