1# gzip-compatible compression filter. 2 3implement Filter; 4 5include "sys.m"; 6 sys: Sys; 7 8include "filter.m"; 9 10GZMAGIC1: con byte 16r1f; 11GZMAGIC2: con byte 16r8b; 12 13GZDEFLATE: con byte 8; 14 15GZFTEXT: con byte 1 << 0; # file is text 16GZFHCRC: con byte 1 << 1; # crc of header included 17GZFEXTRA: con byte 1 << 2; # extra header included 18GZFNAME: con byte 1 << 3; # name of file included 19GZFCOMMENT: con byte 1 << 4; # header comment included 20GZFMASK: con (byte 1 << 5) - byte 1; # mask of specified bits 21 22GZXFAST: con byte 2; # used fast algorithm little compression 23GZXBEST: con byte 4; # used maximum compression algorithm 24 25GZOSFAT: con byte 0; # FAT file system 26GZOSAMIGA: con byte 1; # Amiga 27GZOSVMS: con byte 2; # VMS or OpenVMS 28GZOSUNIX: con byte 3; # Unix 29GZOSVMCMS: con byte 4; # VM/CMS 30GZOSATARI: con byte 5; # Atari TOS 31GZOSHPFS: con byte 6; # HPFS file system 32GZOSMAC: con byte 7; # Macintosh 33GZOSZSYS: con byte 8; # Z-System 34GZOSCPM: con byte 9; # CP/M 35GZOSTOPS20: con byte 10; # TOPS-20 36GZOSNTFS: con byte 11; # NTFS file system 37GZOSQDOS: con byte 12; # QDOS 38GZOSACORN: con byte 13; # Acorn RISCOS 39GZOSUNK: con byte 255; 40 41GZCRCPOLY: con int 16redb88320; 42GZOSINFERNO: con GZOSUNIX; 43 44 45Hnone, Hgzip, Hzlib: con iota; # LZstate.headers 46LZstate: adt 47{ 48 hist: array of byte; # [HistSize]; 49 epos: int; # end of history buffer 50 pos: int; # current location in history buffer 51 eof: int; 52 hash: array of int; # [Nhash] hash chains 53 nexts: array of int; # [MaxOff] 54 me: int; # pos in hash chains 55 dot: int; # dawn of time in history 56 prevlen: int; # lazy matching state 57 prevoff: int; 58 maxchars: int; # compressor tuning 59 maxdefer: int; 60 level: int; 61 62 crctab: array of int; # for gzip trailer 63 crc: int; 64 tot: int; 65 sum: big; # for zlib trailer 66 headers: int; # which header to print, if any 67 68 outbuf: array of byte; # current output buffer; 69 out: int; # current position in the output buffer 70 bits: int; # bit shift register 71 nbits: int; 72 73 verbose: int; 74 debug: int; 75 76 lzb: ref LZblock; 77 slop: array of byte; 78 dlitlentab: array of Huff; # [Nlitlen] 79 dofftab: array of Huff; # [Noff]; 80 hlitlentab: array of Huff; # [Nlitlen]; 81 dyncode: ref Dyncode; 82 hdyncode: ref Dyncode; 83 c: chan of ref Rq; 84 rc: chan of int; 85}; 86 87# 88# lempel-ziv compressed block 89# 90LZblock: adt 91{ 92 litlen: array of byte; # [MaxUncBlock+1]; 93 off: array of int; # [MaxUncBlock+1]; 94 litlencount: array of int; # [Nlitlen]; 95 offcount: array of int; # [Noff]; 96 entries: int; # entries in litlen & off tables 97 bytes: int; # consumed from the input 98 excost: int; # cost of encoding extra len & off bits 99}; 100 101# 102# encoding of dynamic huffman trees 103# 104Dyncode: adt 105{ 106 nlit: int; 107 noff: int; 108 nclen: int; 109 ncode: int; 110 codetab: array of Huff; # [Nclen]; 111 codes: array of byte; # [Nlitlen+Noff]; 112 codeaux: array of byte; # [Nlitlen+Noff]; 113}; 114 115# 116# huffman code table 117# 118Huff: adt 119{ 120 bits: int; # length of the code 121 encode: int; # the code 122}; 123 124DeflateBlock: con 64*1024-258-1; 125DeflateOut: con 258+10; 126 127 128DeflateUnc: con 0; # uncompressed block 129DeflateFix: con 1; # fixed huffman codes 130DeflateDyn: con 2; # dynamic huffman codes 131 132DeflateEob: con 256; # end of block code in lit/len book 133 134LenStart: con 257; # start of length codes in litlen 135Nlitlen: con 288; # number of litlen codes 136Noff: con 30; # number of offset codes 137Nclen: con 19; # number of codelen codes 138 139MaxLeaf: con Nlitlen; 140MaxHuffBits: con 15; # max bits in a huffman code 141ChainMem: con 2 * MaxHuffBits * (MaxHuffBits + 1); 142 143MaxUncBlock: con 64*1024-1; # maximum size of uncompressed block 144 145MaxOff: con 32*1024; 146MinMatch: con 3; # shortest match possible 147MaxMatch: con 258; # longest match possible 148MinMatchMaxOff: con 4096; # max profitable offset for small match; 149 # assumes 8 bits for len; 5+10 for offset 150HistSlop: con 4096; # slop for fewer calls to lzcomp 151HistSize: con MaxOff + 2*HistSlop; 152 153Hshift: con 4; # nice compromise between space & time 154Nhash: con 1<<(Hshift*MinMatch); 155Hmask: con Nhash-1; 156 157MaxOffCode: con 256; # biggest offset looked up in direct table 158 159EstLitBits: con 8; 160EstLenBits: con 4; 161EstOffBits: con 5; 162 163# conversion from len to code word 164lencode := array[MaxMatch] of int; 165 166# 167# conversion from off to code word 168# off <= MaxOffCode ? offcode[off] : bigoffcode[(off-1) >> 7] 169# 170offcode := array[MaxOffCode + 1] of int; 171bigoffcode := array[256] of int; 172 173# litlen code words LenStart-285 extra bits 174litlenbase := array[Nlitlen-LenStart] of int; 175litlenextra := array[Nlitlen-LenStart] of 176{ 177 0, 0, 0, 178 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 179 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 180 4, 5, 5, 5, 5, 0, 0, 0 181}; 182 183# offset code word extra bits 184offbase := array[Noff] of int; 185offextra := array[] of 186{ 187 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 188 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 189 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 190 0, 0, 191}; 192 193# order code lengths 194clenorder := array[Nclen] of 195{ 196 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 197}; 198 199# static huffman tables 200litlentab : array of Huff; 201offtab : array of Huff; 202hofftab : array of Huff; 203 204# bit reversal for brain dead endian swap in huffman codes 205revtab: array of byte; 206 207init() 208{ 209 sys = load Sys Sys->PATH; 210 211 bitcount := array[MaxHuffBits] of int; 212 i, j, ci, n: int; 213 214 # byte reverse table 215 revtab = array[256] of byte; 216 for(i=0; i<256; i++){ 217 revtab[i] = byte 0; 218 for(j=0; j<8; j++) 219 if(i & (1<<j)) 220 revtab[i] |= byte 16r80 >> j; 221 } 222 223 litlentab = array[Nlitlen] of Huff; 224 offtab = array[Noff] of Huff; 225 hofftab = array[Noff] of { * => Huff(0, 0) }; 226 227 # static Litlen bit lengths 228 for(i=0; i<144; i++) 229 litlentab[i].bits = 8; 230 for(i=144; i<256; i++) 231 litlentab[i].bits = 9; 232 for(i=256; i<280; i++) 233 litlentab[i].bits = 7; 234 for(i=280; i<Nlitlen; i++) 235 litlentab[i].bits = 8; 236 237 for(i = 0; i < 10; i++) 238 bitcount[i] = 0; 239 bitcount[8] += 144 - 0; 240 bitcount[9] += 256 - 144; 241 bitcount[7] += 280 - 256; 242 bitcount[8] += Nlitlen - 280; 243 244 hufftabinit(litlentab, Nlitlen, bitcount, 9); 245 246 # static offset bit lengths 247 for(i = 0; i < Noff; i++) 248 offtab[i].bits = 5; 249 250 for(i = 0; i < 5; i++) 251 bitcount[i] = 0; 252 bitcount[5] = Noff; 253 254 hufftabinit(offtab, Noff, bitcount, 5); 255 256 bitcount[0] = 0; 257 bitcount[1] = 0; 258 mkprecode(hofftab, bitcount, 2, MaxHuffBits); 259 260 # conversion tables for lens & offs to codes 261 ci = 0; 262 for(i = LenStart; i < 286; i++){ 263 n = ci + (1 << litlenextra[i - LenStart]); 264 litlenbase[i - LenStart] = ci; 265 for(; ci < n; ci++) 266 lencode[ci] = i; 267 } 268 # patch up special case for len MaxMatch 269 lencode[MaxMatch-MinMatch] = 285; 270 litlenbase[285-LenStart] = MaxMatch-MinMatch; 271 272 ci = 1; 273 for(i = 0; i < 16; i++){ 274 n = ci + (1 << offextra[i]); 275 offbase[i] = ci; 276 for(; ci < n; ci++) 277 offcode[ci] = i; 278 } 279 280 ci = (LenStart - 1) >> 7; 281 for(; i < 30; i++){ 282 n = ci + (1 << (offextra[i] - 7)); 283 offbase[i] = (ci << 7) + 1; 284 for(; ci < n; ci++) 285 bigoffcode[ci] = i; 286 } 287} 288 289start(param: string): chan of ref Rq 290{ 291 # param contains flags: 292 # [0-9] - compression level 293 # h gzip header/trailer 294 # z zlib header/trailer 295 # v verbose 296 # d debug 297 lz := ref LZstate; 298 lz.level = 6; 299 lz.verbose = lz.debug = 0; 300 lz.headers = Hnone; 301 lz.crc = lz.tot = 0; 302 lz.sum = big 1; 303 # XXX could also put filename and modification time in param 304 for (i := 0; i < len param; i++) { 305 case param[i] { 306 '0' to '9' => 307 lz.level = param[i] - '0'; 308 'v' => 309 lz.verbose = 1; 310 'h' => 311 lz.headers = Hgzip; 312 'z' => 313 lz.headers = Hzlib; 314 'd' => 315 lz.debug = 1; 316 } 317 } 318 319 lz.hist = array[HistSize] of byte; 320 lz.hash = array[Nhash] of int; 321 lz.nexts = array[MaxOff] of int; 322 lz.slop = array[2*MaxMatch] of byte; 323 lz.dlitlentab = array[Nlitlen] of Huff; 324 lz.dofftab = array[Noff] of Huff; 325 lz.hlitlentab = array[Nlitlen] of Huff; 326 327 lz.lzb = ref LZblock; 328 lzb := lz.lzb; 329 lzb.litlen = array[MaxUncBlock+1] of byte; 330 lzb.off = array[MaxUncBlock+1] of int; 331 lzb.litlencount = array[Nlitlen] of int; 332 lzb.offcount = array[Noff] of int; 333 334 lz.dyncode = ref Dyncode; 335 lz.dyncode.codetab =array[Nclen] of Huff; 336 lz.dyncode.codes =array[Nlitlen+Noff] of byte; 337 lz.dyncode.codeaux = array[Nlitlen+Noff] of byte; 338 lz.hdyncode = ref Dyncode; 339 lz.hdyncode.codetab =array[Nclen] of Huff; 340 lz.hdyncode.codes =array[Nlitlen+Noff] of byte; 341 lz.hdyncode.codeaux = array[Nlitlen+Noff] of byte; 342 343 for(i = 0; i < MaxOff; i++) 344 lz.nexts[i] = 0; 345 for(i = 0; i < Nhash; i++) 346 lz.hash[i] = 0; 347 lz.pos = 0; 348 lz.epos = 0; 349 lz.prevlen = MinMatch - 1; 350 lz.prevoff = 0; 351 lz.eof = 0; 352 lz.me = 4 * MaxOff; 353 lz.dot = lz.me; 354 lz.bits = 0; 355 lz.nbits = 0; 356 if(lz.level < 5) { 357 lz.maxchars = 1; 358 lz.maxdefer = 0; 359 } else if(lz.level == 9) { 360 lz.maxchars = 4000; 361 lz.maxdefer = MaxMatch; 362 } else { 363 lz.maxchars = 200; 364 lz.maxdefer = MaxMatch / 4; 365 } 366 if (lz.headers == Hgzip) 367 lz.crctab = mkcrctab(GZCRCPOLY); 368 lz.c = chan of ref Rq; 369 lz.rc = chan of int; 370 spawn deflate(lz); 371 return lz.c; 372} 373 374# return (eof, nbytes) 375fillbuf(lz: ref LZstate, buf: array of byte): (int, int) 376{ 377 n := 0; 378 while (n < len buf) { 379 lz.c <-= ref Rq.Fill(buf[n:], lz.rc); 380 nr := <-lz.rc; 381 if (nr == -1) 382 exit; 383 if (nr == 0) 384 return (1, n); 385 n += nr; 386 } 387 return (0, n); 388} 389 390deflate(lz: ref LZstate) 391{ 392 lz.c <-= ref Rq.Start(sys->pctl(0, nil)); 393 394 header(lz); 395 buf := array[DeflateBlock] of byte; 396 out := array[DeflateBlock + DeflateOut] of byte; 397 eof := 0; 398 for (;;) { 399 nslop := lz.epos - lz.pos; 400 nbuf := 0; 401 if (!eof) { 402 (eof, nbuf) = fillbuf(lz, buf); 403 inblock(lz, buf[0:nbuf]); 404 } 405 if(eof && nbuf == 0 && nslop == 0) { 406 if(lz.nbits) { 407 out[0] = byte lz.bits; 408 lz.nbits = 0; 409 lz.c <-= ref Rq.Result(out[0:1], lz.rc); 410 if (<-lz.rc == -1) 411 exit; 412 continue; 413 } 414 footer(lz); 415 lz.c <-= ref Rq.Finished(nil); 416 exit; 417 } 418 419 lz.outbuf = out; 420 421 if(nslop > 2*MaxMatch) { 422 lz.c <-= ref Rq.Error(sys->sprint("slop too large: %d", nslop)); 423 exit; 424 } 425 lz.slop[0:] = lz.hist[lz.pos:lz.epos]; # memmove(slop, lz.pos, nslop); 426 427 lzb := lz.lzb; 428 for(i := 0; i < Nlitlen; i++) 429 lzb.litlencount[i] = 0; 430 for(i = 0; i < Noff; i++) 431 lzb.offcount[i] = 0; 432 lzb.litlencount[DeflateEob]++; 433 434 lzb.bytes = 0; 435 lzb.entries = 0; 436 lzb.excost = 0; 437 lz.eof = 0; 438 439 n := 0; 440 while(n < nbuf || eof && !lz.eof){ 441 if(!lz.eof) { 442 if(lz.pos >= MaxOff + HistSlop) { 443 lz.pos -= MaxOff + HistSlop; 444 lz.epos -= MaxOff + HistSlop; 445 lz.hist[:] = lz.hist[MaxOff + HistSlop: MaxOff + HistSlop + lz.epos]; 446 } 447 m := HistSlop - (lz.epos - lz.pos); 448 if(lz.epos + m > HistSize) { 449 lz.c <-= ref Rq.Error("read too long"); 450 exit; 451 } 452 if(m >= nbuf - n) { 453 m = nbuf - n; 454 lz.eof = eof; 455 } 456 lz.hist[lz.epos:] = buf[n:n+m]; 457 n += m; 458 lz.epos += m; 459 } 460 lzcomp(lz, lzb, lz.epos - lz.pos); 461 } 462 463 lz.outbuf = out; 464 lz.out = 0; 465 466 nunc := lzb.bytes; 467 if(nunc < nslop) 468 nslop = nunc; 469 470 mkprecode(lz.dlitlentab, lzb.litlencount, Nlitlen, MaxHuffBits); 471 mkprecode(lz.dofftab, lzb.offcount, Noff, MaxHuffBits); 472 473 ndyn := huffcodes(lz.dyncode, lz.dlitlentab, lz.dofftab) 474 + bitcost(lz.dlitlentab, lzb.litlencount, Nlitlen) 475 + bitcost(lz.dofftab, lzb.offcount, Noff) 476 + lzb.excost; 477 478 litcount := array[Nlitlen] of int; 479 for(i = 0; i < Nlitlen; i++) 480 litcount[i] = 0; 481 for(i = 0; i < nslop; i++) 482 litcount[int lz.slop[i]]++; 483 for(i = 0; i < nunc-nslop; i++) 484 litcount[int buf[i]]++; 485 litcount[DeflateEob]++; 486 487 mkprecode(lz.hlitlentab, litcount, Nlitlen, MaxHuffBits); 488 nhuff := huffcodes(lz.hdyncode, lz.hlitlentab, hofftab) 489 + bitcost(lz.hlitlentab, litcount, Nlitlen); 490 491 nfix := bitcost(litlentab, lzb.litlencount, Nlitlen) 492 + bitcost(offtab, lzb.offcount, Noff) 493 + lzb.excost; 494 495 lzput(lz, lz.eof && lz.pos == lz.epos, 1); 496 497 if(lz.verbose) { 498 lz.c <-= ref Rq.Info(sys->sprint("block: %d bytes %d entries %d extra bits", 499 nunc, lzb.entries, lzb.excost)); 500 lz.c <-= ref Rq.Info(sys->sprint("\tuncompressed %d fixed %d dynamic %d huffman %d", 501 (nunc + 4) * 8, nfix, ndyn, nhuff)); 502 } 503 504 if((nunc + 4) * 8 < ndyn && (nunc + 4) * 8 < nfix && (nunc + 4) * 8 < nhuff) { 505 lzput(lz, DeflateUnc, 2); 506 lzflushbits(lz); 507 508 lz.outbuf[lz.out++] = byte(nunc); 509 lz.outbuf[lz.out++] = byte(nunc >> 8); 510 lz.outbuf[lz.out++] = byte(~nunc); 511 lz.outbuf[lz.out++] = byte(~nunc >> 8); 512 513 lz.outbuf[lz.out:] = lz.slop[:nslop]; 514 lz.out += nslop; 515 lz.outbuf[lz.out:] = buf[:nunc - nslop]; 516 lz.out += nunc - nslop; 517 } else if(ndyn < nfix && ndyn < nhuff) { 518 lzput(lz, DeflateDyn, 2); 519 520 wrdyncode(lz, lz.dyncode); 521 wrblock(lz, lzb.entries, lzb.litlen, lzb.off, lz.dlitlentab, lz.dofftab); 522 lzput(lz, lz.dlitlentab[DeflateEob].encode, lz.dlitlentab[DeflateEob].bits); 523 } else if(nhuff < nfix){ 524 lzput(lz, DeflateDyn, 2); 525 526 wrdyncode(lz, lz.hdyncode); 527 for(i = 0; i < len lzb.off; i++) 528 lzb.off[i] = 0; 529 530 wrblock(lz, nslop, lz.slop, lzb.off, lz.hlitlentab, hofftab); 531 wrblock(lz, nunc-nslop, buf, lzb.off, lz.hlitlentab, hofftab); 532 lzput(lz, lz.hlitlentab[DeflateEob].encode, lz.hlitlentab[DeflateEob].bits); 533 } else { 534 lzput(lz, DeflateFix, 2); 535 536 wrblock(lz, lzb.entries, lzb.litlen, lzb.off, litlentab, offtab); 537 lzput(lz, litlentab[DeflateEob].encode, litlentab[DeflateEob].bits); 538 } 539 540 lz.c <-= ref Rq.Result(out[0:lz.out], lz.rc); 541 if (<-lz.rc == -1) 542 exit; 543 } 544} 545 546headergzip(lz: ref LZstate) 547{ 548 buf := array[20] of byte; 549 i := 0; 550 buf[i++] = byte GZMAGIC1; 551 buf[i++] = byte GZMAGIC2; 552 buf[i++] = byte GZDEFLATE; 553 554 flags := 0; 555 #if(file != nil) 556 # flags |= GZFNAME; 557 buf[i++] = byte flags; 558 559 mtime := 0; 560 buf[i++] = byte(mtime); 561 buf[i++] = byte(mtime>>8); 562 buf[i++] = byte(mtime>>16); 563 buf[i++] = byte(mtime>>24); 564 565 buf[i++] = byte 0; 566 buf[i++] = byte GZOSINFERNO; 567 568 #if((flags & GZFNAME) == GZFNAME){ 569 # bout.puts(file); 570 # bout.putb(byte 0); 571 #} 572 lz.c <-= ref Rq.Result(buf[0:i], lz.rc); 573 if (<-lz.rc == -1) 574 exit; 575} 576 577headerzlib(lz: ref LZstate) 578{ 579 CIshift: con 12; 580 CMdeflate: con 8; 581 CMshift: con 8; 582 LVshift: con 6; 583 LVfastest, LVfast, LVnormal, LVbest: con iota; 584 585 level := LVnormal; 586 if(lz.level < 6) 587 level = LVfastest; 588 else if(lz.level >= 9) 589 level = LVbest; 590 591 h := 0; 592 h |= 7<<CIshift; # value is: (log2 of window size)-8 593 h |= CMdeflate<<CMshift; 594 h |= level<<LVshift; 595 h += 31-(h%31); 596 597 buf := array[2] of byte; 598 buf[0] = byte (h>>8); 599 buf[1] = byte (h>>0); 600 601 lz.c <-= ref Rq.Result(buf, lz.rc); 602 if (<-lz.rc == -1) 603 exit; 604} 605 606header(lz: ref LZstate) 607{ 608 case lz.headers { 609 Hgzip => headergzip(lz); 610 Hzlib => headerzlib(lz); 611 } 612} 613 614footergzip(lz: ref LZstate) 615{ 616 buf := array[8] of byte; 617 i := 0; 618 buf[i++] = byte(lz.crc); 619 buf[i++] = byte(lz.crc>>8); 620 buf[i++] = byte(lz.crc>>16); 621 buf[i++] = byte(lz.crc>>24); 622 623 buf[i++] = byte(lz.tot); 624 buf[i++] = byte(lz.tot>>8); 625 buf[i++] = byte(lz.tot>>16); 626 buf[i++] = byte(lz.tot>>24); 627 lz.c <-= ref Rq.Result(buf[0:i], lz.rc); 628 if (<-lz.rc == -1) 629 exit; 630} 631 632footerzlib(lz: ref LZstate) 633{ 634 buf := array[4] of byte; 635 i := 0; 636 buf[i++] = byte (lz.sum>>24); 637 buf[i++] = byte (lz.sum>>16); 638 buf[i++] = byte (lz.sum>>8); 639 buf[i++] = byte (lz.sum>>0); 640 641 lz.c <-= ref Rq.Result(buf, lz.rc); 642 if(<-lz.rc == -1) 643 exit; 644} 645 646footer(lz: ref LZstate) 647{ 648 case lz.headers { 649 Hgzip => footergzip(lz); 650 Hzlib => footerzlib(lz); 651 } 652} 653 654lzput(lz: ref LZstate, bits, nbits: int): int 655{ 656 bits = (bits << lz.nbits) | lz.bits; 657 for(nbits += lz.nbits; nbits >= 8; nbits -= 8){ 658 lz.outbuf[lz.out++] = byte bits; 659 bits >>= 8; 660 } 661 lz.bits = bits; 662 lz.nbits = nbits; 663 return 0; 664} 665 666lzflushbits(lz: ref LZstate): int 667{ 668 if(lz.nbits & 7) 669 lzput(lz, 0, 8 - (lz.nbits & 7)); 670 return 0; 671} 672 673# 674# write out a block of n samples, 675# given lz encoding and counts for huffman tables 676# todo: inline lzput 677# 678wrblock(lz: ref LZstate, n: int, litlen: array of byte, off: array of int, litlentab, offtab: array of Huff): int 679{ 680 for(i := 0; i < n; i++) { 681 offset := off[i]; 682 lit := int litlen[i]; 683 if(lz.debug) { 684 if(offset == 0) 685 lz.c <-= ref Rq.Info(sys->sprint("\tlit %.2ux %c", lit, lit)); 686 else 687 lz.c <-= ref Rq.Info(sys->sprint("\t<%d, %d>", offset, lit + MinMatch)); 688 } 689 if(offset == 0) 690 lzput(lz, litlentab[lit].encode, litlentab[lit].bits); 691 else { 692 c := lencode[lit]; 693 lzput(lz, litlentab[c].encode, litlentab[c].bits); 694 c -= LenStart; 695 if(litlenextra[c]) 696 lzput(lz, lit - litlenbase[c], litlenextra[c]); 697 698 if(offset <= MaxOffCode) 699 c = offcode[offset]; 700 else 701 c = bigoffcode[(offset - 1) >> 7]; 702 lzput(lz, offtab[c].encode, offtab[c].bits); 703 if(offextra[c]) 704 lzput(lz, offset - offbase[c], offextra[c]); 705 } 706 } 707 708 return n; 709} 710 711lzcomp(lz: ref LZstate, lzb: ref LZblock, max: int) 712{ 713 q, s, es, t: int; 714 you, m: int; 715 716# hashcheck(lz, "start"); 717 718 hist := lz.hist; 719 nexts := lz.nexts; 720 hash := lz.hash; 721 me := lz.me; 722 723 p := lz.pos; 724 ep := lz.epos; 725 if(p + max < ep) 726 ep = p + max; 727 if(lz.prevlen != MinMatch - 1) 728 p++; 729 730 # 731 # hash in the links for any hanging link positions, 732 # and calculate the hash for the current position. 733 # 734 n := MinMatch; 735 if(n > ep - p) 736 n = ep - p; 737 h := 0; 738 for(i := 0; i < n - 1; i++) { 739 m = me - ((MinMatch-1) - i); 740 if(m < lz.dot) 741 continue; 742 s = p - (me - m); 743 if(s < 0) 744 s += MaxOff + HistSlop; 745 h = hashit(s, hist); 746 for(you = hash[h]; me - you < me - m; you = nexts[you & (MaxOff-1)]) 747 ; 748 if(you == m) 749 continue; 750 nexts[m & (MaxOff-1)] = hash[h]; 751 hash[h] = m; 752 } 753 for(i = 0; i < n; i++) 754 h = ((h << Hshift) ^ int hist[p+i]) & Hmask; 755 756 # 757 # me must point to the index in the next/prev arrays 758 # corresponding to p's position in the history 759 # 760 entries := lzb.entries; 761 litlencount := lzb.litlencount; 762 offcount := lzb.offcount; 763 litlen := lzb.litlen; 764 off := lzb.off; 765 prevlen := lz.prevlen; 766 prevoff := lz.prevoff; 767 maxdefer := lz.maxdefer; 768 maxchars := lz.maxchars; 769 excost := 0; 770 for(;;) { 771 es = p + MaxMatch; 772 if(es > ep) { 773 if(!lz.eof || ep != lz.epos || p >= ep) 774 break; 775 es = ep; 776 } 777 778 # 779 # look for the longest, closest string which 780 # matches what we are going to send. the clever 781 # part here is looking for a string 1 longer than 782 # are previous best match. 783 # 784 runlen := prevlen; 785 m = 0; 786 chars := maxchars; 787 matchloop: 788 for(you = hash[h]; me-you <= MaxOff && chars > 0; you = nexts[you & (MaxOff-1)]) { 789 s = p + runlen; 790 if(s >= es) 791 break; 792 t = s - me + you; 793 if(t - runlen < 0) 794 t += MaxOff + HistSlop; 795 for(; s >= p; s--) { 796 if(hist[s] != hist[t]) { 797 chars -= p + runlen - s + 1; 798 continue matchloop; 799 } 800 t--; 801 } 802 803 # 804 # we have a new best match. 805 # extend it to it's maximum length 806 # 807 t += runlen + 2; 808 s += runlen + 2; 809 for(; s < es; s++) { 810 if(hist[s] != hist[t]) 811 break; 812 t++; 813 } 814 runlen = s - p; 815 m = you; 816 if(s == es) 817 break; 818 if(runlen > 7) 819 chars >>= 1; 820 chars -= runlen; 821 } 822 823 # 824 # back out of small matches too far in the past 825 # 826 if(runlen == MinMatch && me - m >= MinMatchMaxOff) { 827 runlen = MinMatch - 1; 828 m = 0; 829 } 830 831 # 832 # record the encoding and increment counts for huffman trees 833 # if we get a match, defer selecting it until we check for 834 # a longer match at the next position. 835 # 836 if(prevlen >= runlen && prevlen != MinMatch - 1) { 837 # 838 # old match at least as good; use that one 839 # 840 n = prevlen - MinMatch; 841 litlen[entries] = byte n; 842 n = lencode[n]; 843 litlencount[n]++; 844 excost += litlenextra[n - LenStart]; 845 846 off[entries++] = prevoff; 847 if(prevoff <= MaxOffCode) 848 n = offcode[prevoff]; 849 else 850 n = bigoffcode[(prevoff - 1) >> 7]; 851 offcount[n]++; 852 excost += offextra[n]; 853 854 runlen = prevlen - 1; 855 prevlen = MinMatch - 1; 856 } else if(runlen == MinMatch - 1) { 857 # 858 # no match; just put out the literal 859 # 860 n = int hist[p]; 861 litlen[entries] = byte n; 862 litlencount[n]++; 863 off[entries++] = 0; 864 runlen = 1; 865 } else { 866 if(prevlen != MinMatch - 1) { 867 # 868 # longer match now. output previous literal, 869 # update current match, and try again 870 # 871 n = int hist[p - 1]; 872 litlen[entries] = byte n; 873 litlencount[n]++; 874 off[entries++] = 0; 875 } 876 877 prevoff = me - m; 878 879 if(runlen < maxdefer) { 880 prevlen = runlen; 881 runlen = 1; 882 } else { 883 n = runlen - MinMatch; 884 litlen[entries] = byte n; 885 n = lencode[n]; 886 litlencount[n]++; 887 excost += litlenextra[n - LenStart]; 888 889 off[entries++] = prevoff; 890 if(prevoff <= MaxOffCode) 891 n = offcode[prevoff]; 892 else 893 n = bigoffcode[(prevoff - 1) >> 7]; 894 offcount[n]++; 895 excost += offextra[n]; 896 prevlen = MinMatch - 1; 897 } 898 } 899 900 # 901 # update the hash for the newly matched data 902 # this is constructed so the link for the old 903 # match in this position must at the end of a chain, 904 # and will expire when this match is added, ie it will 905 # never be examined for by the match loop. 906 # add to the hash chain only if we have the real hash data. 907 # 908 for(q = p + runlen; p != q; p++) { 909 if(p + MinMatch <= ep) { 910 nexts[me & (MaxOff-1)] = hash[h]; 911 hash[h] = me; 912 if(p + MinMatch < ep) 913 h = ((h << Hshift) ^ int hist[p + MinMatch]) & Hmask; 914 } 915 me++; 916 } 917 } 918 919 # 920 # we can just store away the lazy state and 921 # pick it up next time. the last block will have eof 922 # so we won't have any pending matches 923 # however, we need to correct for how much we've encoded 924 # 925 if(prevlen != MinMatch - 1) 926 p--; 927 928 lzb.excost += excost; 929 lzb.bytes += p - lz.pos; 930 lzb.entries = entries; 931 932 lz.pos = p; 933 lz.me = me; 934 lz.prevlen = prevlen; 935 lz.prevoff = prevoff; 936 937# hashcheck(lz, "stop"); 938} 939 940# 941# check all the hash list invariants are really satisfied 942# 943hashcheck(lz: ref LZstate, where: string) 944{ 945 s, age, a, you: int; 946 947 nexts := lz.nexts; 948 hash := lz.hash; 949 me := lz.me; 950 start := lz.pos; 951 if(lz.prevlen != MinMatch-1) 952 start++; 953 found := array [MaxOff] of byte; 954 for(i := 0; i < MaxOff; i++) 955 found[i] = byte 0; 956 for(i = 0; i < Nhash; i++) { 957 age = 0; 958 for(you = hash[i]; me-you <= MaxOff; you = nexts[you & (MaxOff-1)]) { 959 a = me - you; 960 if(a < age) 961 fatal(lz, sys->sprint("%s: out of order links age %d a %d me %d you %d", 962 where, age, a, me, you)); 963 964 age = a; 965 966 s = start - a; 967 if(s < 0) 968 s += MaxOff + HistSlop; 969 970 if(hashit(s, lz.hist) != i) 971 fatal(lz, sys->sprint("%s: bad hash chain you %d me %d s %d start %d chain %d hash %d %d %d", 972 where, you, me, s, start, i, hashit(s - 1, lz.hist), hashit(s, lz.hist), hashit(s + 1, lz.hist))); 973 974 if(found[you & (MaxOff - 1)] != byte 0) 975 fatal(lz, where + ": found link again"); 976 found[you & (MaxOff - 1)] = byte 1; 977 } 978 } 979 980 for(you = me - (MaxOff-1); you != me; you++) 981 found[you & (MaxOff - 1)] = byte 1; 982 983 for(i = 0; i < MaxOff; i++){ 984 if(found[i] == byte 0 && nexts[i] != 0) 985 fatal(lz, sys->sprint("%s: link not found: max %d at %d", where, me & (MaxOff-1), i)); 986 } 987} 988 989hashit(p: int, hist: array of byte): int 990{ 991 h := 0; 992 for(ep := p + MinMatch; p < ep; p++) 993 h = ((h << Hshift) ^ int hist[p]) & Hmask; 994 return h; 995} 996 997# 998# make up the dynamic code tables, and return the number of bits 999# needed to transmit them. 1000# 1001huffcodes(dc: ref Dyncode, littab, offtab: array of Huff): int 1002{ 1003 i, n, m, c, nlit, noff, ncode, nclen: int; 1004 1005 codetab := dc.codetab; 1006 codes := dc.codes; 1007 codeaux := dc.codeaux; 1008 1009 # 1010 # trim the sizes of the tables 1011 # 1012 for(nlit = Nlitlen; nlit > 257 && littab[nlit-1].bits == 0; nlit--) 1013 ; 1014 for(noff = Noff; noff > 1 && offtab[noff-1].bits == 0; noff--) 1015 ; 1016 1017 # 1018 # make the code-length code 1019 # 1020 for(i = 0; i < nlit; i++) 1021 codes[i] = byte littab[i].bits; 1022 for(i = 0; i < noff; i++) 1023 codes[i + nlit] = byte offtab[i].bits; 1024 1025 # 1026 # run-length compress the code-length code 1027 # 1028 excost := 0; 1029 c = 0; 1030 ncode = nlit+noff; 1031 for(i = 0; i < ncode; ) { 1032 n = i + 1; 1033 v := codes[i]; 1034 while(n < ncode && v == codes[n]) 1035 n++; 1036 n -= i; 1037 i += n; 1038 if(v == byte 0) { 1039 while(n >= 11) { 1040 m = n; 1041 if(m > 138) 1042 m = 138; 1043 codes[c] = byte 18; 1044 codeaux[c++] = byte(m - 11); 1045 n -= m; 1046 excost += 7; 1047 } 1048 if(n >= 3) { 1049 codes[c] = byte 17; 1050 codeaux[c++] = byte(n - 3); 1051 n = 0; 1052 excost += 3; 1053 } 1054 } 1055 while(n--) { 1056 codes[c++] = v; 1057 while(n >= 3) { 1058 m = n; 1059 if(m > 6) 1060 m = 6; 1061 codes[c] = byte 16; 1062 codeaux[c++] = byte(m - 3); 1063 n -= m; 1064 excost += 3; 1065 } 1066 } 1067 } 1068 1069 codecount := array[Nclen] of {* => 0}; 1070 for(i = 0; i < c; i++) 1071 codecount[int codes[i]]++; 1072 mkprecode(codetab, codecount, Nclen, 7); 1073 1074 for(nclen = Nclen; nclen > 4 && codetab[clenorder[nclen-1]].bits == 0; nclen--) 1075 ; 1076 1077 dc.nlit = nlit; 1078 dc.noff = noff; 1079 dc.nclen = nclen; 1080 dc.ncode = c; 1081 1082 return 5 + 5 + 4 + nclen * 3 + bitcost(codetab, codecount, Nclen) + excost; 1083} 1084 1085wrdyncode(out: ref LZstate, dc: ref Dyncode) 1086{ 1087 # 1088 # write out header, then code length code lengths, 1089 # and code lengths 1090 # 1091 lzput(out, dc.nlit-257, 5); 1092 lzput(out, dc.noff-1, 5); 1093 lzput(out, dc.nclen-4, 4); 1094 1095 codetab := dc.codetab; 1096 for(i := 0; i < dc.nclen; i++) 1097 lzput(out, codetab[clenorder[i]].bits, 3); 1098 1099 codes := dc.codes; 1100 codeaux := dc.codeaux; 1101 c := dc.ncode; 1102 for(i = 0; i < c; i++){ 1103 v := int codes[i]; 1104 lzput(out, codetab[v].encode, codetab[v].bits); 1105 if(v >= 16){ 1106 if(v == 16) 1107 lzput(out, int codeaux[i], 2); 1108 else if(v == 17) 1109 lzput(out, int codeaux[i], 3); 1110 else # v == 18 1111 lzput(out, int codeaux[i], 7); 1112 } 1113 } 1114} 1115 1116bitcost(tab: array of Huff, count: array of int, n: int): int 1117{ 1118 tot := 0; 1119 for(i := 0; i < n; i++) 1120 tot += count[i] * tab[i].bits; 1121 return tot; 1122} 1123 1124hufftabinit(tab: array of Huff, n: int, bitcount: array of int, nbits: int) 1125{ 1126 nc := array[MaxHuffBits + 1] of int; 1127 1128 code := 0; 1129 for(bits := 1; bits <= nbits; bits++) { 1130 code = (code + bitcount[bits-1]) << 1; 1131 nc[bits] = code; 1132 } 1133 1134 for(i := 0; i < n; i++) { 1135 bits = tab[i].bits; 1136 if(bits != 0) { 1137 code = nc[bits]++ << (16 - bits); 1138 tab[i].encode = int(revtab[code >> 8]) | (int(revtab[code & 16rff]) << 8); 1139 } 1140 } 1141} 1142 1143Chain: adt 1144{ 1145 count: int; # occurances of everything in the chain 1146 leaf: int; # leaves to the left of chain, or leaf value 1147 col: byte; # ref count for collecting unused chains 1148 gen: byte; # need to generate chains for next lower level 1149 up: int; # Chain up in the lists 1150}; 1151 1152Chains: adt 1153{ 1154 lists: array of int; # [MaxHuffBits * 2] 1155 chains: array of Chain; # [ChainMem] 1156 nleaf: int; # number of leaves 1157 free: int; 1158 col: byte; 1159 nlists: int; 1160}; 1161 1162Nil: con -1; 1163 1164# 1165# fast, low space overhead algorithm for max depth huffman type codes 1166# 1167# J. Katajainen, A. Moffat and A. Turpin, "A fast and space-economical 1168# algorithm for length-limited coding," Proc. Intl. Symp. on Algorithms 1169# and Computation, Cairns, Australia, Dec. 1995, Lecture Notes in Computer 1170# Science, Vol 1004, J. Staples, P. Eades, N. Katoh, and A. Moffat, eds., 1171# pp 12-21, Springer Verlag, New York, 1995. 1172# 1173mkprecode(tab: array of Huff, count: array of int, n, maxbits: int) 1174{ 1175 cs := ref Chains(array[MaxHuffBits * 2] of int, array[MaxLeaf+ChainMem] of Chain, 0, 0, byte 0, 0); 1176 bits: int; 1177 1178 for(i := 0; i < n; i++){ 1179 tab[i].bits = 0; 1180 tab[i].encode = 0; 1181 } 1182 1183 # 1184 # set up the sorted list of leaves 1185 # 1186 m := 0; 1187 for(i = 0; i < n; i++) { 1188 if(count[i] != 0){ 1189 cs.chains[m].count = count[i]; 1190 cs.chains[m].leaf = i; 1191 m++; 1192 } 1193 } 1194 if(m < 2) { 1195 if(m != 0) { 1196 m = cs.chains[0].leaf; 1197 tab[m].bits = 1; 1198 tab[m].encode = 0; 1199 } 1200 return; 1201 } 1202 cs.nleaf = m; 1203 csorts(cs.chains, 0, m); 1204 1205 cs.free = cs.nleaf + 2; 1206 cs.col = byte 1; 1207 1208 # 1209 # initialize chains for each list 1210 # 1211 c := cs.chains; 1212 cl := cs.nleaf; 1213 c[cl].count = cs.chains[0].count; 1214 c[cl].leaf = 1; 1215 c[cl].col = cs.col; 1216 c[cl].up = Nil; 1217 c[cl].gen = byte 0; 1218 c[cl + 1] = c[cl]; 1219 c[cl + 1].leaf = 2; 1220 c[cl + 1].count = cs.chains[1].count; 1221 for(i = 0; i < maxbits; i++){ 1222 cs.lists[i * 2] = cl; 1223 cs.lists[i * 2 + 1] = cl + 1; 1224 } 1225 1226 cs.nlists = 2 * maxbits; 1227 m = 2 * m - 2; 1228 for(i = 2; i < m; i++) 1229 nextchain(cs, cs.nlists - 2); 1230 1231 bitcount := array[MaxHuffBits + 1] of int; 1232 bits = 0; 1233 bitcount[0] = cs.nleaf; 1234 for(cl = cs.lists[2 * maxbits - 1]; cl != Nil; cl = c[cl].up) { 1235 m = c[cl].leaf; 1236 for(i = 0; i < m; i++) 1237 tab[cs.chains[i].leaf].bits++; 1238 bitcount[bits++] -= m; 1239 bitcount[bits] = m; 1240 } 1241 1242 hufftabinit(tab, n, bitcount, bits); 1243} 1244 1245# 1246# calculate the next chain on the list 1247# we can always toss out the old chain 1248# 1249nextchain(cs: ref Chains, clist: int) 1250{ 1251 i, nleaf, sumc: int; 1252 1253 oc := cs.lists[clist + 1]; 1254 cs.lists[clist] = oc; 1255 if(oc == Nil) 1256 return; 1257 1258 # 1259 # make sure we have all chains needed to make sumc 1260 # note it is possible to generate only one of these, 1261 # use twice that value for sumc, and then generate 1262 # the second if that preliminary sumc would be chosen. 1263 # however, this appears to be slower on current tests 1264 # 1265 chains := cs.chains; 1266 if(chains[oc].gen != byte 0) { 1267 nextchain(cs, clist - 2); 1268 nextchain(cs, clist - 2); 1269 chains[oc].gen = byte 0; 1270 } 1271 1272 # 1273 # pick up the chain we're going to add; 1274 # collect unused chains no free ones are left 1275 # 1276 for(c := cs.free; ; c++) { 1277 if(c >= ChainMem) { 1278 cs.col++; 1279 for(i = 0; i < cs.nlists; i++) 1280 for(c = cs.lists[i]; c != Nil; c = chains[c].up) 1281 chains[c].col = cs.col; 1282 c = cs.nleaf; 1283 } 1284 if(chains[c].col != cs.col) 1285 break; 1286 } 1287 1288 # 1289 # pick the cheapest of 1290 # 1) the next package from the previous list 1291 # 2) the next leaf 1292 # 1293 nleaf = chains[oc].leaf; 1294 sumc = 0; 1295 if(clist > 0 && cs.lists[clist-1] != Nil) 1296 sumc = chains[cs.lists[clist-2]].count + chains[cs.lists[clist-1]].count; 1297 if(sumc != 0 && (nleaf >= cs.nleaf || chains[nleaf].count > sumc)) { 1298 chains[c].count = sumc; 1299 chains[c].leaf = chains[oc].leaf; 1300 chains[c].up = cs.lists[clist-1]; 1301 chains[c].gen = byte 1; 1302 } else if(nleaf >= cs.nleaf) { 1303 cs.lists[clist + 1] = Nil; 1304 return; 1305 } else { 1306 chains[c].leaf = nleaf + 1; 1307 chains[c].count = chains[nleaf].count; 1308 chains[c].up = chains[oc].up; 1309 chains[c].gen = byte 0; 1310 } 1311 cs.free = c + 1; 1312 1313 cs.lists[clist + 1] = c; 1314 chains[c].col = cs.col; 1315} 1316 1317chaincmp(chain: array of Chain, ai, bi: int): int 1318{ 1319 ac := chain[ai].count; 1320 bc := chain[bi].count; 1321 if(ac < bc) 1322 return -1; 1323 if(ac > bc) 1324 return 1; 1325 ac = chain[ai].leaf; 1326 bc = chain[bi].leaf; 1327 if(ac > bc) 1328 return -1; 1329 return ac < bc; 1330} 1331 1332pivot(chain: array of Chain, a, n: int): int 1333{ 1334 j := n/6; 1335 pi := a + j; # 1/6 1336 j += j; 1337 pj := pi + j; # 1/2 1338 pk := pj + j; # 5/6 1339 if(chaincmp(chain, pi, pj) < 0) { 1340 if(chaincmp(chain, pi, pk) < 0) { 1341 if(chaincmp(chain, pj, pk) < 0) 1342 return pj; 1343 return pk; 1344 } 1345 return pi; 1346 } 1347 if(chaincmp(chain, pj, pk) < 0) { 1348 if(chaincmp(chain, pi, pk) < 0) 1349 return pi; 1350 return pk; 1351 } 1352 return pj; 1353} 1354 1355csorts(chain: array of Chain, a, n: int) 1356{ 1357 j, pi, pj, pn: int; 1358 1359 while(n > 1) { 1360 if(n > 10) 1361 pi = pivot(chain, a, n); 1362 else 1363 pi = a + (n>>1); 1364 1365 t := chain[pi]; 1366 chain[pi] = chain[a]; 1367 chain[a] = t; 1368 pi = a; 1369 pn = a + n; 1370 pj = pn; 1371 for(;;) { 1372 do 1373 pi++; 1374 while(pi < pn && chaincmp(chain, pi, a) < 0); 1375 do 1376 pj--; 1377 while(pj > a && chaincmp(chain, pj, a) > 0); 1378 if(pj < pi) 1379 break; 1380 t = chain[pi]; 1381 chain[pi] = chain[pj]; 1382 chain[pj] = t; 1383 } 1384 t = chain[a]; 1385 chain[a] = chain[pj]; 1386 chain[pj] = t; 1387 j = pj - a; 1388 1389 n = n-j-1; 1390 if(j >= n) { 1391 csorts(chain, a, j); 1392 a += j+1; 1393 } else { 1394 csorts(chain, a + (j+1), n); 1395 n = j; 1396 } 1397 } 1398} 1399 1400mkcrctab(poly: int): array of int 1401{ 1402 crctab := array[256] of int; 1403 for(i := 0; i < 256; i++){ 1404 crc := i; 1405 for(j := 0; j < 8; j++){ 1406 c := crc & 1; 1407 crc = (crc >> 1) & 16r7fffffff; 1408 if(c) 1409 crc ^= poly; 1410 } 1411 crctab[i] = crc; 1412 } 1413 return crctab; 1414} 1415 1416inblockcrc(lz: ref LZstate, buf: array of byte) 1417{ 1418 crc := lz.crc; 1419 n := len buf; 1420 crc ^= int 16rffffffff; 1421 for(i := 0; i < n; i++) 1422 crc = lz.crctab[int(byte crc ^ buf[i])] ^ ((crc >> 8) & 16r00ffffff); 1423 lz.crc = crc ^ int 16rffffffff; 1424 lz.tot += n; 1425} 1426 1427inblockadler(lz: ref LZstate, buf: array of byte) 1428{ 1429 ZLADLERBASE: con big 65521; 1430 1431 s1 := lz.sum & big 16rffff; 1432 s2 := (lz.sum>>16) & big 16rffff; 1433 1434 for(i := 0; i < len buf; i++) { 1435 s1 = (s1 + big buf[i]) % ZLADLERBASE; 1436 s2 = (s2 + s1) % ZLADLERBASE; 1437 } 1438 lz.sum = (s2<<16) + s1; 1439} 1440 1441inblock(lz: ref LZstate, buf: array of byte) 1442{ 1443 case lz.headers { 1444 Hgzip => inblockcrc(lz, buf); 1445 Hzlib => inblockadler(lz, buf); 1446 } 1447} 1448 1449fatal(lz: ref LZstate, s: string) 1450{ 1451 lz.c <-= ref Rq.Error(s); 1452 exit; 1453} 1454