1*c3423655Schristos 2*c3423655Schristos 3*c3423655Schristos 4*c3423655Schristos 5*c3423655Schristos 6*c3423655Schristos 7*c3423655SchristosNetwork Working Group P. Deutsch 8*c3423655SchristosRequest for Comments: 1950 Aladdin Enterprises 9*c3423655SchristosCategory: Informational J-L. Gailly 10*c3423655Schristos Info-ZIP 11*c3423655Schristos May 1996 12*c3423655Schristos 13*c3423655Schristos 14*c3423655Schristos ZLIB Compressed Data Format Specification version 3.3 15*c3423655Schristos 16*c3423655SchristosStatus of This Memo 17*c3423655Schristos 18*c3423655Schristos This memo provides information for the Internet community. This memo 19*c3423655Schristos does not specify an Internet standard of any kind. Distribution of 20*c3423655Schristos this memo is unlimited. 21*c3423655Schristos 22*c3423655SchristosIESG Note: 23*c3423655Schristos 24*c3423655Schristos The IESG takes no position on the validity of any Intellectual 25*c3423655Schristos Property Rights statements contained in this document. 26*c3423655Schristos 27*c3423655SchristosNotices 28*c3423655Schristos 29*c3423655Schristos Copyright (c) 1996 L. Peter Deutsch and Jean-Loup Gailly 30*c3423655Schristos 31*c3423655Schristos Permission is granted to copy and distribute this document for any 32*c3423655Schristos purpose and without charge, including translations into other 33*c3423655Schristos languages and incorporation into compilations, provided that the 34*c3423655Schristos copyright notice and this notice are preserved, and that any 35*c3423655Schristos substantive changes or deletions from the original are clearly 36*c3423655Schristos marked. 37*c3423655Schristos 38*c3423655Schristos A pointer to the latest version of this and related documentation in 39*c3423655Schristos HTML format can be found at the URL 40*c3423655Schristos <ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html>. 41*c3423655Schristos 42*c3423655SchristosAbstract 43*c3423655Schristos 44*c3423655Schristos This specification defines a lossless compressed data format. The 45*c3423655Schristos data can be produced or consumed, even for an arbitrarily long 46*c3423655Schristos sequentially presented input data stream, using only an a priori 47*c3423655Schristos bounded amount of intermediate storage. The format presently uses 48*c3423655Schristos the DEFLATE compression method but can be easily extended to use 49*c3423655Schristos other compression methods. It can be implemented readily in a manner 50*c3423655Schristos not covered by patents. This specification also defines the ADLER-32 51*c3423655Schristos checksum (an extension and improvement of the Fletcher checksum), 52*c3423655Schristos used for detection of data corruption, and provides an algorithm for 53*c3423655Schristos computing it. 54*c3423655Schristos 55*c3423655Schristos 56*c3423655Schristos 57*c3423655Schristos 58*c3423655SchristosDeutsch & Gailly Informational [Page 1] 59*c3423655Schristos 60*c3423655SchristosRFC 1950 ZLIB Compressed Data Format Specification May 1996 61*c3423655Schristos 62*c3423655Schristos 63*c3423655SchristosTable of Contents 64*c3423655Schristos 65*c3423655Schristos 1. Introduction ................................................... 2 66*c3423655Schristos 1.1. Purpose ................................................... 2 67*c3423655Schristos 1.2. Intended audience ......................................... 3 68*c3423655Schristos 1.3. Scope ..................................................... 3 69*c3423655Schristos 1.4. Compliance ................................................ 3 70*c3423655Schristos 1.5. Definitions of terms and conventions used ................ 3 71*c3423655Schristos 1.6. Changes from previous versions ............................ 3 72*c3423655Schristos 2. Detailed specification ......................................... 3 73*c3423655Schristos 2.1. Overall conventions ....................................... 3 74*c3423655Schristos 2.2. Data format ............................................... 4 75*c3423655Schristos 2.3. Compliance ................................................ 7 76*c3423655Schristos 3. References ..................................................... 7 77*c3423655Schristos 4. Source code .................................................... 8 78*c3423655Schristos 5. Security Considerations ........................................ 8 79*c3423655Schristos 6. Acknowledgements ............................................... 8 80*c3423655Schristos 7. Authors' Addresses ............................................. 8 81*c3423655Schristos 8. Appendix: Rationale ............................................ 9 82*c3423655Schristos 9. Appendix: Sample code ..........................................10 83*c3423655Schristos 84*c3423655Schristos1. Introduction 85*c3423655Schristos 86*c3423655Schristos 1.1. Purpose 87*c3423655Schristos 88*c3423655Schristos The purpose of this specification is to define a lossless 89*c3423655Schristos compressed data format that: 90*c3423655Schristos 91*c3423655Schristos * Is independent of CPU type, operating system, file system, 92*c3423655Schristos and character set, and hence can be used for interchange; 93*c3423655Schristos 94*c3423655Schristos * Can be produced or consumed, even for an arbitrarily long 95*c3423655Schristos sequentially presented input data stream, using only an a 96*c3423655Schristos priori bounded amount of intermediate storage, and hence can 97*c3423655Schristos be used in data communications or similar structures such as 98*c3423655Schristos Unix filters; 99*c3423655Schristos 100*c3423655Schristos * Can use a number of different compression methods; 101*c3423655Schristos 102*c3423655Schristos * Can be implemented readily in a manner not covered by 103*c3423655Schristos patents, and hence can be practiced freely. 104*c3423655Schristos 105*c3423655Schristos The data format defined by this specification does not attempt to 106*c3423655Schristos allow random access to compressed data. 107*c3423655Schristos 108*c3423655Schristos 109*c3423655Schristos 110*c3423655Schristos 111*c3423655Schristos 112*c3423655Schristos 113*c3423655Schristos 114*c3423655SchristosDeutsch & Gailly Informational [Page 2] 115*c3423655Schristos 116*c3423655SchristosRFC 1950 ZLIB Compressed Data Format Specification May 1996 117*c3423655Schristos 118*c3423655Schristos 119*c3423655Schristos 1.2. Intended audience 120*c3423655Schristos 121*c3423655Schristos This specification is intended for use by implementors of software 122*c3423655Schristos to compress data into zlib format and/or decompress data from zlib 123*c3423655Schristos format. 124*c3423655Schristos 125*c3423655Schristos The text of the specification assumes a basic background in 126*c3423655Schristos programming at the level of bits and other primitive data 127*c3423655Schristos representations. 128*c3423655Schristos 129*c3423655Schristos 1.3. Scope 130*c3423655Schristos 131*c3423655Schristos The specification specifies a compressed data format that can be 132*c3423655Schristos used for in-memory compression of a sequence of arbitrary bytes. 133*c3423655Schristos 134*c3423655Schristos 1.4. Compliance 135*c3423655Schristos 136*c3423655Schristos Unless otherwise indicated below, a compliant decompressor must be 137*c3423655Schristos able to accept and decompress any data set that conforms to all 138*c3423655Schristos the specifications presented here; a compliant compressor must 139*c3423655Schristos produce data sets that conform to all the specifications presented 140*c3423655Schristos here. 141*c3423655Schristos 142*c3423655Schristos 1.5. Definitions of terms and conventions used 143*c3423655Schristos 144*c3423655Schristos byte: 8 bits stored or transmitted as a unit (same as an octet). 145*c3423655Schristos (For this specification, a byte is exactly 8 bits, even on 146*c3423655Schristos machines which store a character on a number of bits different 147*c3423655Schristos from 8.) See below, for the numbering of bits within a byte. 148*c3423655Schristos 149*c3423655Schristos 1.6. Changes from previous versions 150*c3423655Schristos 151*c3423655Schristos Version 3.1 was the first public release of this specification. 152*c3423655Schristos In version 3.2, some terminology was changed and the Adler-32 153*c3423655Schristos sample code was rewritten for clarity. In version 3.3, the 154*c3423655Schristos support for a preset dictionary was introduced, and the 155*c3423655Schristos specification was converted to RFC style. 156*c3423655Schristos 157*c3423655Schristos2. Detailed specification 158*c3423655Schristos 159*c3423655Schristos 2.1. Overall conventions 160*c3423655Schristos 161*c3423655Schristos In the diagrams below, a box like this: 162*c3423655Schristos 163*c3423655Schristos +---+ 164*c3423655Schristos | | <-- the vertical bars might be missing 165*c3423655Schristos +---+ 166*c3423655Schristos 167*c3423655Schristos 168*c3423655Schristos 169*c3423655Schristos 170*c3423655SchristosDeutsch & Gailly Informational [Page 3] 171*c3423655Schristos 172*c3423655SchristosRFC 1950 ZLIB Compressed Data Format Specification May 1996 173*c3423655Schristos 174*c3423655Schristos 175*c3423655Schristos represents one byte; a box like this: 176*c3423655Schristos 177*c3423655Schristos +==============+ 178*c3423655Schristos | | 179*c3423655Schristos +==============+ 180*c3423655Schristos 181*c3423655Schristos represents a variable number of bytes. 182*c3423655Schristos 183*c3423655Schristos Bytes stored within a computer do not have a "bit order", since 184*c3423655Schristos they are always treated as a unit. However, a byte considered as 185*c3423655Schristos an integer between 0 and 255 does have a most- and least- 186*c3423655Schristos significant bit, and since we write numbers with the most- 187*c3423655Schristos significant digit on the left, we also write bytes with the most- 188*c3423655Schristos significant bit on the left. In the diagrams below, we number the 189*c3423655Schristos bits of a byte so that bit 0 is the least-significant bit, i.e., 190*c3423655Schristos the bits are numbered: 191*c3423655Schristos 192*c3423655Schristos +--------+ 193*c3423655Schristos |76543210| 194*c3423655Schristos +--------+ 195*c3423655Schristos 196*c3423655Schristos Within a computer, a number may occupy multiple bytes. All 197*c3423655Schristos multi-byte numbers in the format described here are stored with 198*c3423655Schristos the MOST-significant byte first (at the lower memory address). 199*c3423655Schristos For example, the decimal number 520 is stored as: 200*c3423655Schristos 201*c3423655Schristos 0 1 202*c3423655Schristos +--------+--------+ 203*c3423655Schristos |00000010|00001000| 204*c3423655Schristos +--------+--------+ 205*c3423655Schristos ^ ^ 206*c3423655Schristos | | 207*c3423655Schristos | + less significant byte = 8 208*c3423655Schristos + more significant byte = 2 x 256 209*c3423655Schristos 210*c3423655Schristos 2.2. Data format 211*c3423655Schristos 212*c3423655Schristos A zlib stream has the following structure: 213*c3423655Schristos 214*c3423655Schristos 0 1 215*c3423655Schristos +---+---+ 216*c3423655Schristos |CMF|FLG| (more-->) 217*c3423655Schristos +---+---+ 218*c3423655Schristos 219*c3423655Schristos 220*c3423655Schristos 221*c3423655Schristos 222*c3423655Schristos 223*c3423655Schristos 224*c3423655Schristos 225*c3423655Schristos 226*c3423655SchristosDeutsch & Gailly Informational [Page 4] 227*c3423655Schristos 228*c3423655SchristosRFC 1950 ZLIB Compressed Data Format Specification May 1996 229*c3423655Schristos 230*c3423655Schristos 231*c3423655Schristos (if FLG.FDICT set) 232*c3423655Schristos 233*c3423655Schristos 0 1 2 3 234*c3423655Schristos +---+---+---+---+ 235*c3423655Schristos | DICTID | (more-->) 236*c3423655Schristos +---+---+---+---+ 237*c3423655Schristos 238*c3423655Schristos +=====================+---+---+---+---+ 239*c3423655Schristos |...compressed data...| ADLER32 | 240*c3423655Schristos +=====================+---+---+---+---+ 241*c3423655Schristos 242*c3423655Schristos Any data which may appear after ADLER32 are not part of the zlib 243*c3423655Schristos stream. 244*c3423655Schristos 245*c3423655Schristos CMF (Compression Method and flags) 246*c3423655Schristos This byte is divided into a 4-bit compression method and a 4- 247*c3423655Schristos bit information field depending on the compression method. 248*c3423655Schristos 249*c3423655Schristos bits 0 to 3 CM Compression method 250*c3423655Schristos bits 4 to 7 CINFO Compression info 251*c3423655Schristos 252*c3423655Schristos CM (Compression method) 253*c3423655Schristos This identifies the compression method used in the file. CM = 8 254*c3423655Schristos denotes the "deflate" compression method with a window size up 255*c3423655Schristos to 32K. This is the method used by gzip and PNG (see 256*c3423655Schristos references [1] and [2] in Chapter 3, below, for the reference 257*c3423655Schristos documents). CM = 15 is reserved. It might be used in a future 258*c3423655Schristos version of this specification to indicate the presence of an 259*c3423655Schristos extra field before the compressed data. 260*c3423655Schristos 261*c3423655Schristos CINFO (Compression info) 262*c3423655Schristos For CM = 8, CINFO is the base-2 logarithm of the LZ77 window 263*c3423655Schristos size, minus eight (CINFO=7 indicates a 32K window size). Values 264*c3423655Schristos of CINFO above 7 are not allowed in this version of the 265*c3423655Schristos specification. CINFO is not defined in this specification for 266*c3423655Schristos CM not equal to 8. 267*c3423655Schristos 268*c3423655Schristos FLG (FLaGs) 269*c3423655Schristos This flag byte is divided as follows: 270*c3423655Schristos 271*c3423655Schristos bits 0 to 4 FCHECK (check bits for CMF and FLG) 272*c3423655Schristos bit 5 FDICT (preset dictionary) 273*c3423655Schristos bits 6 to 7 FLEVEL (compression level) 274*c3423655Schristos 275*c3423655Schristos The FCHECK value must be such that CMF and FLG, when viewed as 276*c3423655Schristos a 16-bit unsigned integer stored in MSB order (CMF*256 + FLG), 277*c3423655Schristos is a multiple of 31. 278*c3423655Schristos 279*c3423655Schristos 280*c3423655Schristos 281*c3423655Schristos 282*c3423655SchristosDeutsch & Gailly Informational [Page 5] 283*c3423655Schristos 284*c3423655SchristosRFC 1950 ZLIB Compressed Data Format Specification May 1996 285*c3423655Schristos 286*c3423655Schristos 287*c3423655Schristos FDICT (Preset dictionary) 288*c3423655Schristos If FDICT is set, a DICT dictionary identifier is present 289*c3423655Schristos immediately after the FLG byte. The dictionary is a sequence of 290*c3423655Schristos bytes which are initially fed to the compressor without 291*c3423655Schristos producing any compressed output. DICT is the Adler-32 checksum 292*c3423655Schristos of this sequence of bytes (see the definition of ADLER32 293*c3423655Schristos below). The decompressor can use this identifier to determine 294*c3423655Schristos which dictionary has been used by the compressor. 295*c3423655Schristos 296*c3423655Schristos FLEVEL (Compression level) 297*c3423655Schristos These flags are available for use by specific compression 298*c3423655Schristos methods. The "deflate" method (CM = 8) sets these flags as 299*c3423655Schristos follows: 300*c3423655Schristos 301*c3423655Schristos 0 - compressor used fastest algorithm 302*c3423655Schristos 1 - compressor used fast algorithm 303*c3423655Schristos 2 - compressor used default algorithm 304*c3423655Schristos 3 - compressor used maximum compression, slowest algorithm 305*c3423655Schristos 306*c3423655Schristos The information in FLEVEL is not needed for decompression; it 307*c3423655Schristos is there to indicate if recompression might be worthwhile. 308*c3423655Schristos 309*c3423655Schristos compressed data 310*c3423655Schristos For compression method 8, the compressed data is stored in the 311*c3423655Schristos deflate compressed data format as described in the document 312*c3423655Schristos "DEFLATE Compressed Data Format Specification" by L. Peter 313*c3423655Schristos Deutsch. (See reference [3] in Chapter 3, below) 314*c3423655Schristos 315*c3423655Schristos Other compressed data formats are not specified in this version 316*c3423655Schristos of the zlib specification. 317*c3423655Schristos 318*c3423655Schristos ADLER32 (Adler-32 checksum) 319*c3423655Schristos This contains a checksum value of the uncompressed data 320*c3423655Schristos (excluding any dictionary data) computed according to Adler-32 321*c3423655Schristos algorithm. This algorithm is a 32-bit extension and improvement 322*c3423655Schristos of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 323*c3423655Schristos standard. See references [4] and [5] in Chapter 3, below) 324*c3423655Schristos 325*c3423655Schristos Adler-32 is composed of two sums accumulated per byte: s1 is 326*c3423655Schristos the sum of all bytes, s2 is the sum of all s1 values. Both sums 327*c3423655Schristos are done modulo 65521. s1 is initialized to 1, s2 to zero. The 328*c3423655Schristos Adler-32 checksum is stored as s2*65536 + s1 in most- 329*c3423655Schristos significant-byte first (network) order. 330*c3423655Schristos 331*c3423655Schristos 332*c3423655Schristos 333*c3423655Schristos 334*c3423655Schristos 335*c3423655Schristos 336*c3423655Schristos 337*c3423655Schristos 338*c3423655SchristosDeutsch & Gailly Informational [Page 6] 339*c3423655Schristos 340*c3423655SchristosRFC 1950 ZLIB Compressed Data Format Specification May 1996 341*c3423655Schristos 342*c3423655Schristos 343*c3423655Schristos 2.3. Compliance 344*c3423655Schristos 345*c3423655Schristos A compliant compressor must produce streams with correct CMF, FLG 346*c3423655Schristos and ADLER32, but need not support preset dictionaries. When the 347*c3423655Schristos zlib data format is used as part of another standard data format, 348*c3423655Schristos the compressor may use only preset dictionaries that are specified 349*c3423655Schristos by this other data format. If this other format does not use the 350*c3423655Schristos preset dictionary feature, the compressor must not set the FDICT 351*c3423655Schristos flag. 352*c3423655Schristos 353*c3423655Schristos A compliant decompressor must check CMF, FLG, and ADLER32, and 354*c3423655Schristos provide an error indication if any of these have incorrect values. 355*c3423655Schristos A compliant decompressor must give an error indication if CM is 356*c3423655Schristos not one of the values defined in this specification (only the 357*c3423655Schristos value 8 is permitted in this version), since another value could 358*c3423655Schristos indicate the presence of new features that would cause subsequent 359*c3423655Schristos data to be interpreted incorrectly. A compliant decompressor must 360*c3423655Schristos give an error indication if FDICT is set and DICTID is not the 361*c3423655Schristos identifier of a known preset dictionary. A decompressor may 362*c3423655Schristos ignore FLEVEL and still be compliant. When the zlib data format 363*c3423655Schristos is being used as a part of another standard format, a compliant 364*c3423655Schristos decompressor must support all the preset dictionaries specified by 365*c3423655Schristos the other format. When the other format does not use the preset 366*c3423655Schristos dictionary feature, a compliant decompressor must reject any 367*c3423655Schristos stream in which the FDICT flag is set. 368*c3423655Schristos 369*c3423655Schristos3. References 370*c3423655Schristos 371*c3423655Schristos [1] Deutsch, L.P.,"GZIP Compressed Data Format Specification", 372*c3423655Schristos available in ftp://ftp.uu.net/pub/archiving/zip/doc/ 373*c3423655Schristos 374*c3423655Schristos [2] Thomas Boutell, "PNG (Portable Network Graphics) specification", 375*c3423655Schristos available in ftp://ftp.uu.net/graphics/png/documents/ 376*c3423655Schristos 377*c3423655Schristos [3] Deutsch, L.P.,"DEFLATE Compressed Data Format Specification", 378*c3423655Schristos available in ftp://ftp.uu.net/pub/archiving/zip/doc/ 379*c3423655Schristos 380*c3423655Schristos [4] Fletcher, J. G., "An Arithmetic Checksum for Serial 381*c3423655Schristos Transmissions," IEEE Transactions on Communications, Vol. COM-30, 382*c3423655Schristos No. 1, January 1982, pp. 247-252. 383*c3423655Schristos 384*c3423655Schristos [5] ITU-T Recommendation X.224, Annex D, "Checksum Algorithms," 385*c3423655Schristos November, 1993, pp. 144, 145. (Available from 386*c3423655Schristos gopher://info.itu.ch). ITU-T X.244 is also the same as ISO 8073. 387*c3423655Schristos 388*c3423655Schristos 389*c3423655Schristos 390*c3423655Schristos 391*c3423655Schristos 392*c3423655Schristos 393*c3423655Schristos 394*c3423655SchristosDeutsch & Gailly Informational [Page 7] 395*c3423655Schristos 396*c3423655SchristosRFC 1950 ZLIB Compressed Data Format Specification May 1996 397*c3423655Schristos 398*c3423655Schristos 399*c3423655Schristos4. Source code 400*c3423655Schristos 401*c3423655Schristos Source code for a C language implementation of a "zlib" compliant 402*c3423655Schristos library is available at ftp://ftp.uu.net/pub/archiving/zip/zlib/. 403*c3423655Schristos 404*c3423655Schristos5. Security Considerations 405*c3423655Schristos 406*c3423655Schristos A decoder that fails to check the ADLER32 checksum value may be 407*c3423655Schristos subject to undetected data corruption. 408*c3423655Schristos 409*c3423655Schristos6. Acknowledgements 410*c3423655Schristos 411*c3423655Schristos Trademarks cited in this document are the property of their 412*c3423655Schristos respective owners. 413*c3423655Schristos 414*c3423655Schristos Jean-Loup Gailly and Mark Adler designed the zlib format and wrote 415*c3423655Schristos the related software described in this specification. Glenn 416*c3423655Schristos Randers-Pehrson converted this document to RFC and HTML format. 417*c3423655Schristos 418*c3423655Schristos7. Authors' Addresses 419*c3423655Schristos 420*c3423655Schristos L. Peter Deutsch 421*c3423655Schristos Aladdin Enterprises 422*c3423655Schristos 203 Santa Margarita Ave. 423*c3423655Schristos Menlo Park, CA 94025 424*c3423655Schristos 425*c3423655Schristos Phone: (415) 322-0103 (AM only) 426*c3423655Schristos FAX: (415) 322-1734 427*c3423655Schristos EMail: <ghost@aladdin.com> 428*c3423655Schristos 429*c3423655Schristos 430*c3423655Schristos Jean-Loup Gailly 431*c3423655Schristos 432*c3423655Schristos EMail: <gzip@prep.ai.mit.edu> 433*c3423655Schristos 434*c3423655Schristos Questions about the technical content of this specification can be 435*c3423655Schristos sent by email to 436*c3423655Schristos 437*c3423655Schristos Jean-Loup Gailly <gzip@prep.ai.mit.edu> and 438*c3423655Schristos Mark Adler <madler@alumni.caltech.edu> 439*c3423655Schristos 440*c3423655Schristos Editorial comments on this specification can be sent by email to 441*c3423655Schristos 442*c3423655Schristos L. Peter Deutsch <ghost@aladdin.com> and 443*c3423655Schristos Glenn Randers-Pehrson <randeg@alumni.rpi.edu> 444*c3423655Schristos 445*c3423655Schristos 446*c3423655Schristos 447*c3423655Schristos 448*c3423655Schristos 449*c3423655Schristos 450*c3423655SchristosDeutsch & Gailly Informational [Page 8] 451*c3423655Schristos 452*c3423655SchristosRFC 1950 ZLIB Compressed Data Format Specification May 1996 453*c3423655Schristos 454*c3423655Schristos 455*c3423655Schristos8. Appendix: Rationale 456*c3423655Schristos 457*c3423655Schristos 8.1. Preset dictionaries 458*c3423655Schristos 459*c3423655Schristos A preset dictionary is specially useful to compress short input 460*c3423655Schristos sequences. The compressor can take advantage of the dictionary 461*c3423655Schristos context to encode the input in a more compact manner. The 462*c3423655Schristos decompressor can be initialized with the appropriate context by 463*c3423655Schristos virtually decompressing a compressed version of the dictionary 464*c3423655Schristos without producing any output. However for certain compression 465*c3423655Schristos algorithms such as the deflate algorithm this operation can be 466*c3423655Schristos achieved without actually performing any decompression. 467*c3423655Schristos 468*c3423655Schristos The compressor and the decompressor must use exactly the same 469*c3423655Schristos dictionary. The dictionary may be fixed or may be chosen among a 470*c3423655Schristos certain number of predefined dictionaries, according to the kind 471*c3423655Schristos of input data. The decompressor can determine which dictionary has 472*c3423655Schristos been chosen by the compressor by checking the dictionary 473*c3423655Schristos identifier. This document does not specify the contents of 474*c3423655Schristos predefined dictionaries, since the optimal dictionaries are 475*c3423655Schristos application specific. Standard data formats using this feature of 476*c3423655Schristos the zlib specification must precisely define the allowed 477*c3423655Schristos dictionaries. 478*c3423655Schristos 479*c3423655Schristos 8.2. The Adler-32 algorithm 480*c3423655Schristos 481*c3423655Schristos The Adler-32 algorithm is much faster than the CRC32 algorithm yet 482*c3423655Schristos still provides an extremely low probability of undetected errors. 483*c3423655Schristos 484*c3423655Schristos The modulo on unsigned long accumulators can be delayed for 5552 485*c3423655Schristos bytes, so the modulo operation time is negligible. If the bytes 486*c3423655Schristos are a, b, c, the second sum is 3a + 2b + c + 3, and so is position 487*c3423655Schristos and order sensitive, unlike the first sum, which is just a 488*c3423655Schristos checksum. That 65521 is prime is important to avoid a possible 489*c3423655Schristos large class of two-byte errors that leave the check unchanged. 490*c3423655Schristos (The Fletcher checksum uses 255, which is not prime and which also 491*c3423655Schristos makes the Fletcher check insensitive to single byte changes 0 <-> 492*c3423655Schristos 255.) 493*c3423655Schristos 494*c3423655Schristos The sum s1 is initialized to 1 instead of zero to make the length 495*c3423655Schristos of the sequence part of s2, so that the length does not have to be 496*c3423655Schristos checked separately. (Any sequence of zeroes has a Fletcher 497*c3423655Schristos checksum of zero.) 498*c3423655Schristos 499*c3423655Schristos 500*c3423655Schristos 501*c3423655Schristos 502*c3423655Schristos 503*c3423655Schristos 504*c3423655Schristos 505*c3423655Schristos 506*c3423655SchristosDeutsch & Gailly Informational [Page 9] 507*c3423655Schristos 508*c3423655SchristosRFC 1950 ZLIB Compressed Data Format Specification May 1996 509*c3423655Schristos 510*c3423655Schristos 511*c3423655Schristos9. Appendix: Sample code 512*c3423655Schristos 513*c3423655Schristos The following C code computes the Adler-32 checksum of a data buffer. 514*c3423655Schristos It is written for clarity, not for speed. The sample code is in the 515*c3423655Schristos ANSI C programming language. Non C users may find it easier to read 516*c3423655Schristos with these hints: 517*c3423655Schristos 518*c3423655Schristos & Bitwise AND operator. 519*c3423655Schristos >> Bitwise right shift operator. When applied to an 520*c3423655Schristos unsigned quantity, as here, right shift inserts zero bit(s) 521*c3423655Schristos at the left. 522*c3423655Schristos << Bitwise left shift operator. Left shift inserts zero 523*c3423655Schristos bit(s) at the right. 524*c3423655Schristos ++ "n++" increments the variable n. 525*c3423655Schristos % modulo operator: a % b is the remainder of a divided by b. 526*c3423655Schristos 527*c3423655Schristos #define BASE 65521 /* largest prime smaller than 65536 */ 528*c3423655Schristos 529*c3423655Schristos /* 530*c3423655Schristos Update a running Adler-32 checksum with the bytes buf[0..len-1] 531*c3423655Schristos and return the updated checksum. The Adler-32 checksum should be 532*c3423655Schristos initialized to 1. 533*c3423655Schristos 534*c3423655Schristos Usage example: 535*c3423655Schristos 536*c3423655Schristos unsigned long adler = 1L; 537*c3423655Schristos 538*c3423655Schristos while (read_buffer(buffer, length) != EOF) { 539*c3423655Schristos adler = update_adler32(adler, buffer, length); 540*c3423655Schristos } 541*c3423655Schristos if (adler != original_adler) error(); 542*c3423655Schristos */ 543*c3423655Schristos unsigned long update_adler32(unsigned long adler, 544*c3423655Schristos unsigned char *buf, int len) 545*c3423655Schristos { 546*c3423655Schristos unsigned long s1 = adler & 0xffff; 547*c3423655Schristos unsigned long s2 = (adler >> 16) & 0xffff; 548*c3423655Schristos int n; 549*c3423655Schristos 550*c3423655Schristos for (n = 0; n < len; n++) { 551*c3423655Schristos s1 = (s1 + buf[n]) % BASE; 552*c3423655Schristos s2 = (s2 + s1) % BASE; 553*c3423655Schristos } 554*c3423655Schristos return (s2 << 16) + s1; 555*c3423655Schristos } 556*c3423655Schristos 557*c3423655Schristos /* Return the adler32 of the bytes buf[0..len-1] */ 558*c3423655Schristos 559*c3423655Schristos 560*c3423655Schristos 561*c3423655Schristos 562*c3423655SchristosDeutsch & Gailly Informational [Page 10] 563*c3423655Schristos 564*c3423655SchristosRFC 1950 ZLIB Compressed Data Format Specification May 1996 565*c3423655Schristos 566*c3423655Schristos 567*c3423655Schristos unsigned long adler32(unsigned char *buf, int len) 568*c3423655Schristos { 569*c3423655Schristos return update_adler32(1L, buf, len); 570*c3423655Schristos } 571*c3423655Schristos 572*c3423655Schristos 573*c3423655Schristos 574*c3423655Schristos 575*c3423655Schristos 576*c3423655Schristos 577*c3423655Schristos 578*c3423655Schristos 579*c3423655Schristos 580*c3423655Schristos 581*c3423655Schristos 582*c3423655Schristos 583*c3423655Schristos 584*c3423655Schristos 585*c3423655Schristos 586*c3423655Schristos 587*c3423655Schristos 588*c3423655Schristos 589*c3423655Schristos 590*c3423655Schristos 591*c3423655Schristos 592*c3423655Schristos 593*c3423655Schristos 594*c3423655Schristos 595*c3423655Schristos 596*c3423655Schristos 597*c3423655Schristos 598*c3423655Schristos 599*c3423655Schristos 600*c3423655Schristos 601*c3423655Schristos 602*c3423655Schristos 603*c3423655Schristos 604*c3423655Schristos 605*c3423655Schristos 606*c3423655Schristos 607*c3423655Schristos 608*c3423655Schristos 609*c3423655Schristos 610*c3423655Schristos 611*c3423655Schristos 612*c3423655Schristos 613*c3423655Schristos 614*c3423655Schristos 615*c3423655Schristos 616*c3423655Schristos 617*c3423655Schristos 618*c3423655SchristosDeutsch & Gailly Informational [Page 11] 619*c3423655Schristos 620