1 // Written in the D programming language. 2 3 /** 4 * Compress/decompress data using the $(HTTP www._zlib.net, _zlib library). 5 * 6 * Examples: 7 * 8 * If you have a small buffer you can use $(LREF compress) and 9 * $(LREF uncompress) directly. 10 * 11 * ------- 12 * import std.zlib; 13 * 14 * auto src = 15 * "the quick brown fox jumps over the lazy dog\r 16 * the quick brown fox jumps over the lazy dog\r"; 17 * 18 * ubyte[] dst; 19 * ubyte[] result; 20 * 21 * dst = compress(src); 22 * result = cast(ubyte[]) uncompress(dst); 23 * assert(result == src); 24 * ------- 25 * 26 * When the data to be compressed doesn't fit in one buffer, use 27 * $(LREF Compress) and $(LREF UnCompress). 28 * 29 * ------- 30 * import std.zlib; 31 * import std.stdio; 32 * import std.conv : to; 33 * import std.algorithm.iteration : map; 34 * 35 * UnCompress decmp = new UnCompress; 36 * foreach (chunk; stdin.byChunk(4096).map!(x => decmp.uncompress(x))) 37 * { 38 * chunk.to!string.write; 39 * } 40 41 * ------- 42 * 43 * References: 44 * $(HTTP en.wikipedia.org/wiki/Zlib, Wikipedia) 45 * 46 * Copyright: Copyright Digital Mars 2000 - 2011. 47 * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 48 * Authors: $(HTTP digitalmars.com, Walter Bright) 49 * Source: $(PHOBOSSRC std/_zlib.d) 50 */ 51 /* Copyright Digital Mars 2000 - 2011. 52 * Distributed under the Boost Software License, Version 1.0. 53 * (See accompanying file LICENSE_1_0.txt or copy at 54 * http://www.boost.org/LICENSE_1_0.txt) 55 */ 56 module std.zlib; 57 58 //debug=zlib; // uncomment to turn on debugging printf's 59 60 import etc.c.zlib; 61 62 // Values for 'mode' 63 64 enum 65 { 66 Z_NO_FLUSH = 0, 67 Z_SYNC_FLUSH = 2, 68 Z_FULL_FLUSH = 3, 69 Z_FINISH = 4, 70 } 71 72 /************************************* 73 * Errors throw a ZlibException. 74 */ 75 76 class ZlibException : Exception 77 { 78 this(int errnum) 79 { string msg; 80 81 switch (errnum) 82 { 83 case Z_STREAM_END: msg = "stream end"; break; 84 case Z_NEED_DICT: msg = "need dict"; break; 85 case Z_ERRNO: msg = "errno"; break; 86 case Z_STREAM_ERROR: msg = "stream error"; break; 87 case Z_DATA_ERROR: msg = "data error"; break; 88 case Z_MEM_ERROR: msg = "mem error"; break; 89 case Z_BUF_ERROR: msg = "buf error"; break; 90 case Z_VERSION_ERROR: msg = "version error"; break; 91 default: msg = "unknown error"; break; 92 } 93 super(msg); 94 } 95 } 96 97 /** 98 * $(P Compute the Adler-32 checksum of a buffer's worth of data.) 99 * 100 * Params: 101 * adler = the starting checksum for the computation. Use 1 102 * for a new checksum. Use the output of this function 103 * for a cumulative checksum. 104 * buf = buffer containing input data 105 * 106 * Returns: 107 * A $(D uint) checksum for the provided input data and starting checksum 108 * 109 * See_Also: 110 * $(LINK http://en.wikipedia.org/wiki/Adler-32) 111 */ 112 113 uint adler32(uint adler, const(void)[] buf) 114 { 115 import std.range : chunks; 116 foreach (chunk; (cast(ubyte[]) buf).chunks(0xFFFF0000)) 117 { 118 adler = etc.c.zlib.adler32(adler, chunk.ptr, cast(uint) chunk.length); 119 } 120 return adler; 121 } 122 123 /// 124 @system unittest 125 { 126 static ubyte[] data = [1,2,3,4,5,6,7,8,9,10]; 127 128 uint adler = adler32(0u, data); 129 assert(adler == 0xdc0037); 130 } 131 132 @system unittest 133 { 134 static string data = "test"; 135 136 uint adler = adler32(1, data); 137 assert(adler == 0x045d01c1); 138 } 139 140 /** 141 * $(P Compute the CRC32 checksum of a buffer's worth of data.) 142 * 143 * Params: 144 * crc = the starting checksum for the computation. Use 0 145 * for a new checksum. Use the output of this function 146 * for a cumulative checksum. 147 * buf = buffer containing input data 148 * 149 * Returns: 150 * A $(D uint) checksum for the provided input data and starting checksum 151 * 152 * See_Also: 153 * $(LINK http://en.wikipedia.org/wiki/Cyclic_redundancy_check) 154 */ 155 156 uint crc32(uint crc, const(void)[] buf) 157 { 158 import std.range : chunks; 159 foreach (chunk; (cast(ubyte[]) buf).chunks(0xFFFF0000)) 160 { 161 crc = etc.c.zlib.crc32(crc, chunk.ptr, cast(uint) chunk.length); 162 } 163 return crc; 164 } 165 166 @system unittest 167 { 168 static ubyte[] data = [1,2,3,4,5,6,7,8,9,10]; 169 170 uint crc; 171 172 debug(zlib) printf("D.zlib.crc32.unittest\n"); 173 crc = crc32(0u, cast(void[]) data); 174 debug(zlib) printf("crc = %x\n", crc); 175 assert(crc == 0x2520577b); 176 } 177 178 /** 179 * $(P Compress data) 180 * 181 * Params: 182 * srcbuf = buffer containing the data to compress 183 * level = compression level. Legal values are -1 .. 9, with -1 indicating 184 * the default level (6), 0 indicating no compression, 1 being the 185 * least compression and 9 being the most. 186 * 187 * Returns: 188 * the compressed data 189 */ 190 191 ubyte[] compress(const(void)[] srcbuf, int level) 192 in 193 { 194 assert(-1 <= level && level <= 9); 195 } 196 body 197 { 198 import core.memory : GC; 199 auto destlen = srcbuf.length + ((srcbuf.length + 1023) / 1024) + 12; 200 auto destbuf = new ubyte[destlen]; 201 auto err = etc.c.zlib.compress2(destbuf.ptr, &destlen, cast(ubyte *) srcbuf.ptr, srcbuf.length, level); 202 if (err) 203 { 204 GC.free(destbuf.ptr); 205 throw new ZlibException(err); 206 } 207 208 destbuf.length = destlen; 209 return destbuf; 210 } 211 212 /********************************************* 213 * ditto 214 */ 215 216 ubyte[] compress(const(void)[] srcbuf) 217 { 218 return compress(srcbuf, Z_DEFAULT_COMPRESSION); 219 } 220 221 /********************************************* 222 * Decompresses the data in srcbuf[]. 223 * Params: 224 * srcbuf = buffer containing the compressed data. 225 * destlen = size of the uncompressed data. 226 * It need not be accurate, but the decompression will be faster 227 * if the exact size is supplied. 228 * winbits = the base two logarithm of the maximum window size. 229 * Returns: the decompressed data. 230 */ 231 232 void[] uncompress(const(void)[] srcbuf, size_t destlen = 0u, int winbits = 15) 233 { 234 import std.conv : to; 235 int err; 236 ubyte[] destbuf; 237 238 if (!destlen) 239 destlen = srcbuf.length * 2 + 1; 240 241 etc.c.zlib.z_stream zs; 242 zs.next_in = cast(typeof(zs.next_in)) srcbuf.ptr; 243 zs.avail_in = to!uint(srcbuf.length); 244 err = etc.c.zlib.inflateInit2(&zs, winbits); 245 if (err) 246 { 247 throw new ZlibException(err); 248 } 249 250 size_t olddestlen = 0u; 251 252 loop: 253 while (true) 254 { 255 destbuf.length = destlen; 256 zs.next_out = cast(typeof(zs.next_out)) &destbuf[olddestlen]; 257 zs.avail_out = to!uint(destlen - olddestlen); 258 olddestlen = destlen; 259 260 err = etc.c.zlib.inflate(&zs, Z_NO_FLUSH); 261 switch (err) 262 { 263 case Z_OK: 264 destlen = destbuf.length * 2; 265 continue loop; 266 267 case Z_STREAM_END: 268 destbuf.length = zs.total_out; 269 err = etc.c.zlib.inflateEnd(&zs); 270 if (err != Z_OK) 271 throw new ZlibException(err); 272 return destbuf; 273 274 default: 275 etc.c.zlib.inflateEnd(&zs); 276 throw new ZlibException(err); 277 } 278 } 279 assert(0); 280 } 281 282 @system unittest 283 { 284 auto src = 285 "the quick brown fox jumps over the lazy dog\r 286 the quick brown fox jumps over the lazy dog\r 287 "; 288 ubyte[] dst; 289 ubyte[] result; 290 291 //arrayPrint(src); 292 dst = compress(src); 293 //arrayPrint(dst); 294 result = cast(ubyte[]) uncompress(dst); 295 //arrayPrint(result); 296 assert(result == src); 297 } 298 299 @system unittest 300 { 301 ubyte[] src = new ubyte[1000000]; 302 ubyte[] dst; 303 ubyte[] result; 304 305 src[] = 0x80; 306 dst = compress(src); 307 assert(dst.length*2 + 1 < src.length); 308 result = cast(ubyte[]) uncompress(dst); 309 assert(result == src); 310 } 311 312 /+ 313 void arrayPrint(ubyte[] array) 314 { 315 //printf("array %p,%d\n", cast(void*) array, array.length); 316 for (size_t i = 0; i < array.length; i++) 317 { 318 printf("%02x ", array[i]); 319 if (((i + 1) & 15) == 0) 320 printf("\n"); 321 } 322 printf("\n\n"); 323 } 324 +/ 325 326 /// the header format the compressed stream is wrapped in 327 enum HeaderFormat { 328 deflate, /// a standard zlib header 329 gzip, /// a gzip file format header 330 determineFromData /// used when decompressing. Try to automatically detect the stream format by looking at the data 331 } 332 333 /********************************************* 334 * Used when the data to be compressed is not all in one buffer. 335 */ 336 337 class Compress 338 { 339 import std.conv : to; 340 341 private: 342 z_stream zs; 343 int level = Z_DEFAULT_COMPRESSION; 344 int inited; 345 immutable bool gzip; 346 347 void error(int err) 348 { 349 if (inited) 350 { deflateEnd(&zs); 351 inited = 0; 352 } 353 throw new ZlibException(err); 354 } 355 356 public: 357 358 /** 359 * Constructor. 360 * 361 * Params: 362 * level = compression level. Legal values are 1 .. 9, with 1 being the least 363 * compression and 9 being the most. The default value is 6. 364 * header = sets the compression type to one of the options available 365 * in $(LREF HeaderFormat). Defaults to HeaderFormat.deflate. 366 * 367 * See_Also: 368 * $(LREF compress), $(LREF HeaderFormat) 369 */ 370 this(int level, HeaderFormat header = HeaderFormat.deflate) 371 in 372 { 373 assert(1 <= level && level <= 9); 374 } 375 body 376 { 377 this.level = level; 378 this.gzip = header == HeaderFormat.gzip; 379 } 380 381 /// ditto 382 this(HeaderFormat header = HeaderFormat.deflate) 383 { 384 this.gzip = header == HeaderFormat.gzip; 385 } 386 387 ~this() 388 { int err; 389 390 if (inited) 391 { 392 inited = 0; 393 deflateEnd(&zs); 394 } 395 } 396 397 /** 398 * Compress the data in buf and return the compressed data. 399 * Params: 400 * buf = data to compress 401 * 402 * Returns: 403 * the compressed data. The buffers returned from successive calls to this should be concatenated together. 404 * 405 */ 406 const(void)[] compress(const(void)[] buf) 407 { 408 import core.memory : GC; 409 int err; 410 ubyte[] destbuf; 411 412 if (buf.length == 0) 413 return null; 414 415 if (!inited) 416 { 417 err = deflateInit2(&zs, level, Z_DEFLATED, 15 + (gzip ? 16 : 0), 8, Z_DEFAULT_STRATEGY); 418 if (err) 419 error(err); 420 inited = 1; 421 } 422 423 destbuf = new ubyte[zs.avail_in + buf.length]; 424 zs.next_out = destbuf.ptr; 425 zs.avail_out = to!uint(destbuf.length); 426 427 if (zs.avail_in) 428 buf = zs.next_in[0 .. zs.avail_in] ~ cast(ubyte[]) buf; 429 430 zs.next_in = cast(typeof(zs.next_in)) buf.ptr; 431 zs.avail_in = to!uint(buf.length); 432 433 err = deflate(&zs, Z_NO_FLUSH); 434 if (err != Z_STREAM_END && err != Z_OK) 435 { 436 GC.free(destbuf.ptr); 437 error(err); 438 } 439 destbuf.length = destbuf.length - zs.avail_out; 440 return destbuf; 441 } 442 443 /*** 444 * Compress and return any remaining data. 445 * The returned data should be appended to that returned by compress(). 446 * Params: 447 * mode = one of the following: 448 * $(DL 449 $(DT Z_SYNC_FLUSH ) 450 $(DD Syncs up flushing to the next byte boundary. 451 Used when more data is to be compressed later on.) 452 $(DT Z_FULL_FLUSH ) 453 $(DD Syncs up flushing to the next byte boundary. 454 Used when more data is to be compressed later on, 455 and the decompressor needs to be restartable at this 456 point.) 457 $(DT Z_FINISH) 458 $(DD (default) Used when finished compressing the data. ) 459 ) 460 */ 461 void[] flush(int mode = Z_FINISH) 462 in 463 { 464 assert(mode == Z_FINISH || mode == Z_SYNC_FLUSH || mode == Z_FULL_FLUSH); 465 } 466 body 467 { 468 import core.memory : GC; 469 ubyte[] destbuf; 470 ubyte[512] tmpbuf = void; 471 int err; 472 473 if (!inited) 474 return null; 475 476 /* may be zs.avail_out+<some constant> 477 * zs.avail_out is set nonzero by deflate in previous compress() 478 */ 479 //tmpbuf = new void[zs.avail_out]; 480 zs.next_out = tmpbuf.ptr; 481 zs.avail_out = tmpbuf.length; 482 483 while ( (err = deflate(&zs, mode)) != Z_STREAM_END) 484 { 485 if (err == Z_OK) 486 { 487 if (zs.avail_out != 0 && mode != Z_FINISH) 488 break; 489 else if (zs.avail_out == 0) 490 { 491 destbuf ~= tmpbuf; 492 zs.next_out = tmpbuf.ptr; 493 zs.avail_out = tmpbuf.length; 494 continue; 495 } 496 err = Z_BUF_ERROR; 497 } 498 GC.free(destbuf.ptr); 499 error(err); 500 } 501 destbuf ~= tmpbuf[0 .. (tmpbuf.length - zs.avail_out)]; 502 503 if (mode == Z_FINISH) 504 { 505 err = deflateEnd(&zs); 506 inited = 0; 507 if (err) 508 error(err); 509 } 510 return destbuf; 511 } 512 } 513 514 /****** 515 * Used when the data to be decompressed is not all in one buffer. 516 */ 517 518 class UnCompress 519 { 520 import std.conv : to; 521 522 private: 523 z_stream zs; 524 int inited; 525 int done; 526 size_t destbufsize; 527 528 HeaderFormat format; 529 530 void error(int err) 531 { 532 if (inited) 533 { inflateEnd(&zs); 534 inited = 0; 535 } 536 throw new ZlibException(err); 537 } 538 539 public: 540 541 /** 542 * Construct. destbufsize is the same as for D.zlib.uncompress(). 543 */ 544 this(uint destbufsize) 545 { 546 this.destbufsize = destbufsize; 547 } 548 549 /** ditto */ 550 this(HeaderFormat format = HeaderFormat.determineFromData) 551 { 552 this.format = format; 553 } 554 555 ~this() 556 { int err; 557 558 if (inited) 559 { 560 inited = 0; 561 inflateEnd(&zs); 562 } 563 done = 1; 564 } 565 566 /** 567 * Decompress the data in buf and return the decompressed data. 568 * The buffers returned from successive calls to this should be concatenated 569 * together. 570 */ 571 const(void)[] uncompress(const(void)[] buf) 572 in 573 { 574 assert(!done); 575 } 576 body 577 { 578 import core.memory : GC; 579 int err; 580 ubyte[] destbuf; 581 582 if (buf.length == 0) 583 return null; 584 585 if (!inited) 586 { 587 int windowBits = 15; 588 if (format == HeaderFormat.gzip) 589 windowBits += 16; 590 else if (format == HeaderFormat.determineFromData) 591 windowBits += 32; 592 593 err = inflateInit2(&zs, windowBits); 594 if (err) 595 error(err); 596 inited = 1; 597 } 598 599 if (!destbufsize) 600 destbufsize = to!uint(buf.length) * 2; 601 destbuf = new ubyte[zs.avail_in * 2 + destbufsize]; 602 zs.next_out = destbuf.ptr; 603 zs.avail_out = to!uint(destbuf.length); 604 605 if (zs.avail_in) 606 buf = zs.next_in[0 .. zs.avail_in] ~ cast(ubyte[]) buf; 607 608 zs.next_in = cast(ubyte*) buf.ptr; 609 zs.avail_in = to!uint(buf.length); 610 611 err = inflate(&zs, Z_NO_FLUSH); 612 if (err != Z_STREAM_END && err != Z_OK) 613 { 614 GC.free(destbuf.ptr); 615 error(err); 616 } 617 destbuf.length = destbuf.length - zs.avail_out; 618 return destbuf; 619 } 620 621 /** 622 * Decompress and return any remaining data. 623 * The returned data should be appended to that returned by uncompress(). 624 * The UnCompress object cannot be used further. 625 */ 626 void[] flush() 627 in 628 { 629 assert(!done); 630 } 631 out 632 { 633 assert(done); 634 } 635 body 636 { 637 import core.memory : GC; 638 ubyte[] extra; 639 ubyte[] destbuf; 640 int err; 641 642 done = 1; 643 if (!inited) 644 return null; 645 646 L1: 647 destbuf = new ubyte[zs.avail_in * 2 + 100]; 648 zs.next_out = destbuf.ptr; 649 zs.avail_out = to!uint(destbuf.length); 650 651 err = etc.c.zlib.inflate(&zs, Z_NO_FLUSH); 652 if (err == Z_OK && zs.avail_out == 0) 653 { 654 extra ~= destbuf; 655 goto L1; 656 } 657 if (err != Z_STREAM_END) 658 { 659 GC.free(destbuf.ptr); 660 if (err == Z_OK) 661 err = Z_BUF_ERROR; 662 error(err); 663 } 664 destbuf = destbuf.ptr[0 .. zs.next_out - destbuf.ptr]; 665 err = etc.c.zlib.inflateEnd(&zs); 666 inited = 0; 667 if (err) 668 error(err); 669 if (extra.length) 670 destbuf = extra ~ destbuf; 671 return destbuf; 672 } 673 } 674 675 /* ========================== unittest ========================= */ 676 677 import std.random; 678 import std.stdio; 679 680 @system unittest // by Dave 681 { 682 debug(zlib) writeln("std.zlib.unittest"); 683 684 bool CompressThenUncompress (void[] src) 685 { 686 ubyte[] dst = std.zlib.compress(src); 687 double ratio = (dst.length / cast(double) src.length); 688 debug(zlib) writef("src.length: %1$d, dst: %2$d, Ratio = %3$f", src.length, dst.length, ratio); 689 ubyte[] uncompressedBuf; 690 uncompressedBuf = cast(ubyte[]) std.zlib.uncompress(dst); 691 assert(src.length == uncompressedBuf.length); 692 assert(src == uncompressedBuf); 693 694 return true; 695 } 696 697 698 // smallish buffers 699 for (int idx = 0; idx < 25; idx++) 700 { 701 char[] buf = new char[uniform(0, 100)]; 702 703 // Alternate between more & less compressible 704 foreach (ref char c; buf) 705 c = cast(char) (' ' + (uniform(0, idx % 2 ? 91 : 2))); 706 707 if (CompressThenUncompress(buf)) 708 { 709 debug(zlib) writeln("; Success."); 710 } 711 else 712 { 713 return; 714 } 715 } 716 717 // larger buffers 718 for (int idx = 0; idx < 25; idx++) 719 { 720 char[] buf = new char[uniform(0, 1000/*0000*/)]; 721 722 // Alternate between more & less compressible 723 foreach (ref char c; buf) 724 c = cast(char) (' ' + (uniform(0, idx % 2 ? 91 : 10))); 725 726 if (CompressThenUncompress(buf)) 727 { 728 debug(zlib) writefln("; Success."); 729 } 730 else 731 { 732 return; 733 } 734 } 735 736 debug(zlib) writefln("PASSED std.zlib.unittest"); 737 } 738 739 740 @system unittest // by Artem Rebrov 741 { 742 Compress cmp = new Compress; 743 UnCompress decmp = new UnCompress; 744 745 const(void)[] input; 746 input = "tesatdffadf"; 747 748 const(void)[] buf = cmp.compress(input); 749 buf ~= cmp.flush(); 750 const(void)[] output = decmp.uncompress(buf); 751 752 //writefln("input = '%s'", cast(char[]) input); 753 //writefln("output = '%s'", cast(char[]) output); 754 assert( output[] == input[] ); 755 } 756 757 @system unittest 758 { 759 static assert(__traits(compiles, etc.c.zlib.gzclose(null))); // bugzilla 15457 760 } 761