1 #include "lib9.h"
2 #include <flate.h>
3
4 typedef struct Chain Chain;
5 typedef struct Chains Chains;
6 typedef struct Dyncode Dyncode;
7 typedef struct Huff Huff;
8 typedef struct LZblock LZblock;
9 typedef struct LZstate LZstate;
10
11 enum
12 {
13 /*
14 * deflate format paramaters
15 */
16 DeflateUnc = 0, /* uncompressed block */
17 DeflateFix = 1, /* fixed huffman codes */
18 DeflateDyn = 2, /* dynamic huffman codes */
19
20 DeflateEob = 256, /* end of block code in lit/len book */
21 DeflateMaxBlock = 64*1024-1, /* maximum size of uncompressed block */
22
23 DeflateMaxExp = 10, /* maximum expansion for a block */
24
25 LenStart = 257, /* start of length codes in litlen */
26 Nlitlen = 288, /* number of litlen codes */
27 Noff = 30, /* number of offset codes */
28 Nclen = 19, /* number of codelen codes */
29
30 MaxOff = 32*1024,
31 MinMatch = 3, /* shortest match possible */
32 MaxMatch = 258, /* longest match possible */
33
34 /*
35 * huffman code paramaters
36 */
37 MaxLeaf = Nlitlen,
38 MaxHuffBits = 16, /* max bits in a huffman code */
39 ChainMem = 2 * (MaxHuffBits - 1) * MaxHuffBits,
40
41 /*
42 * coding of the lz parse
43 */
44 LenFlag = 1 << 3,
45 LenShift = 4, /* leaves enough space for MinMatchMaxOff */
46 MaxLitRun = LenFlag - 1,
47
48 /*
49 * internal lz paramaters
50 */
51 DeflateOut = 4096, /* output buffer size */
52 BlockSize = 8192, /* attempted input read quanta */
53 DeflateBlock = DeflateMaxBlock & ~(BlockSize - 1),
54 MinMatchMaxOff = 4096, /* max profitable offset for small match;
55 * assumes 8 bits for len, 5+10 for offset
56 * DONT CHANGE WITHOUT CHANGING LZPARSE CONSTANTS
57 */
58 HistSlop = 512, /* must be at lead MaxMatch */
59 HistBlock = 64*1024,
60 HistSize = HistBlock + HistSlop,
61
62 HashLog = 13,
63 HashSize = 1<<HashLog,
64
65 MaxOffCode = 256, /* biggest offset looked up in direct table */
66
67 EstLitBits = 8,
68 EstLenBits = 4,
69 EstOffBits = 5,
70 };
71
72 /*
73 * knuth vol. 3 multiplicative hashing
74 * each byte x chosen according to rules
75 * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
76 * with reasonable spread between the bytes & their complements
77 *
78 * the 3 byte value appears to be as almost good as the 4 byte value,
79 * and might be faster on some machines
80 */
81 /*
82 #define hashit(c) (((ulong)(c) * 0x6b43a9) >> (24 - HashLog))
83 */
84 #define hashit(c) ((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog))
85
86 /*
87 * lempel-ziv style compression state
88 */
89 struct LZstate
90 {
91 uchar hist[HistSize];
92 ulong pos; /* current location in history buffer */
93 ulong avail; /* data available after pos */
94 int eof;
95 ushort hash[HashSize]; /* hash chains */
96 ushort nexts[MaxOff];
97 int now; /* pos in hash chains */
98 int dot; /* dawn of time in history */
99 int prevlen; /* lazy matching state */
100 int prevoff;
101 int maxcheck; /* compressor tuning */
102
103 uchar obuf[DeflateOut];
104 uchar *out; /* current position in the output buffer */
105 uchar *eout;
106 ulong bits; /* bit shift register */
107 int nbits;
108 int rbad; /* got an error reading the buffer */
109 int wbad; /* got an error writing the buffer */
110 int (*w)(void*, void*, int);
111 void *wr;
112
113 ulong totr; /* total input size */
114 ulong totw; /* total output size */
115 int debug;
116 };
117
118 struct LZblock
119 {
120 ushort parse[DeflateMaxBlock / 2 + 1];
121 int lastv; /* value being constucted for parse */
122 ulong litlencount[Nlitlen];
123 ulong offcount[Noff];
124 ushort *eparse; /* limit for parse table */
125 int bytes; /* consumed from the input */
126 int excost; /* cost of encoding extra len & off bits */
127 };
128
129 /*
130 * huffman code table
131 */
132 struct Huff
133 {
134 short bits; /* length of the code */
135 ushort encode; /* the code */
136 };
137
138 /*
139 * encoding of dynamic huffman trees
140 */
141 struct Dyncode
142 {
143 int nlit;
144 int noff;
145 int nclen;
146 int ncode;
147 Huff codetab[Nclen];
148 uchar codes[Nlitlen+Noff];
149 uchar codeaux[Nlitlen+Noff];
150 };
151
152 static int deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int));
153 static int lzcomp(LZstate*, LZblock*, uchar*, ushort*, int finish);
154 static void wrblock(LZstate*, int, ushort*, ushort*, Huff*, Huff*);
155 static int bitcost(Huff*, ulong*, int);
156 static int huffcodes(Dyncode*, Huff*, Huff*);
157 static void wrdyncode(LZstate*, Dyncode*);
158 static void lzput(LZstate*, ulong bits, int nbits);
159 static void lzflushbits(LZstate*);
160 static void lzflush(LZstate *lz);
161 static void lzwrite(LZstate *lz, void *buf, int n);
162
163 static int hufftabinit(Huff*, int, ulong*, int);
164 static int mkgzprecode(Huff*, ulong *, int, int);
165
166 static int mkprecode(Huff*, ulong *, int, int, ulong*);
167 static void nextchain(Chains*, int);
168 static void leafsort(ulong*, ushort*, int, int);
169
170 /* conversion from len to code word */
171 static int lencode[MaxMatch];
172
173 /*
174 * conversion from off to code word
175 * off <= MaxOffCode ? offcode[off] : bigoffcode[off >> 7]
176 */
177 static int offcode[MaxOffCode];
178 static int bigoffcode[256];
179
180 /* litlen code words LenStart-285 extra bits */
181 static int litlenbase[Nlitlen-LenStart];
182 static int litlenextra[Nlitlen-LenStart] =
183 {
184 /* 257 */ 0, 0, 0,
185 /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
186 /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
187 /* 280 */ 4, 5, 5, 5, 5, 0, 0, 0
188 };
189
190 /* offset code word extra bits */
191 static int offbase[Noff];
192 static int offextra[] =
193 {
194 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
195 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
196 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
197 0, 0,
198 };
199
200 /* order code lengths */
201 static int clenorder[Nclen] =
202 {
203 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
204 };
205
206 /* static huffman tables */
207 static Huff litlentab[Nlitlen];
208 static Huff offtab[Noff];
209 static Huff hofftab[Noff];
210
211 /* bit reversal for brain dead endian swap in huffman codes */
212 static uchar revtab[256];
213 static ulong nlits;
214 static ulong nmatches;
215
216 int
deflateinit(void)217 deflateinit(void)
218 {
219 ulong bitcount[MaxHuffBits];
220 int i, j, ci, n;
221
222 /* byte reverse table */
223 for(i=0; i<256; i++)
224 for(j=0; j<8; j++)
225 if(i & (1<<j))
226 revtab[i] |= 0x80 >> j;
227
228 /* static Litlen bit lengths */
229 for(i=0; i<144; i++)
230 litlentab[i].bits = 8;
231 for(i=144; i<256; i++)
232 litlentab[i].bits = 9;
233 for(i=256; i<280; i++)
234 litlentab[i].bits = 7;
235 for(i=280; i<Nlitlen; i++)
236 litlentab[i].bits = 8;
237
238 memset(bitcount, 0, sizeof(bitcount));
239 bitcount[8] += 144 - 0;
240 bitcount[9] += 256 - 144;
241 bitcount[7] += 280 - 256;
242 bitcount[8] += Nlitlen - 280;
243
244 if(!hufftabinit(litlentab, Nlitlen, bitcount, 9))
245 return FlateInternal;
246
247 /* static offset bit lengths */
248 for(i = 0; i < Noff; i++)
249 offtab[i].bits = 5;
250
251 memset(bitcount, 0, sizeof(bitcount));
252 bitcount[5] = Noff;
253
254 if(!hufftabinit(offtab, Noff, bitcount, 5))
255 return FlateInternal;
256
257 bitcount[0] = 0;
258 bitcount[1] = 0;
259 if(!mkgzprecode(hofftab, bitcount, 2, MaxHuffBits))
260 return FlateInternal;
261
262 /* conversion tables for lens & offs to codes */
263 ci = 0;
264 for(i = LenStart; i < 286; i++){
265 n = ci + (1 << litlenextra[i - LenStart]);
266 litlenbase[i - LenStart] = ci;
267 for(; ci < n; ci++)
268 lencode[ci] = i;
269 }
270 /* patch up special case for len MaxMatch */
271 lencode[MaxMatch-MinMatch] = 285;
272 litlenbase[285-LenStart] = MaxMatch-MinMatch;
273
274 ci = 0;
275 for(i = 0; i < 16; i++){
276 n = ci + (1 << offextra[i]);
277 offbase[i] = ci;
278 for(; ci < n; ci++)
279 offcode[ci] = i;
280 }
281
282 ci = ci >> 7;
283 for(; i < 30; i++){
284 n = ci + (1 << (offextra[i] - 7));
285 offbase[i] = ci << 7;
286 for(; ci < n; ci++)
287 bigoffcode[ci] = i;
288 }
289 return FlateOk;
290 }
291
292 static void
deflatereset(LZstate * lz,int level,int debug)293 deflatereset(LZstate *lz, int level, int debug)
294 {
295 memset(lz->nexts, 0, sizeof lz->nexts);
296 memset(lz->hash, 0, sizeof lz->hash);
297 lz->totr = 0;
298 lz->totw = 0;
299 lz->pos = 0;
300 lz->avail = 0;
301 lz->out = lz->obuf;
302 lz->eout = &lz->obuf[DeflateOut];
303 lz->prevlen = MinMatch - 1;
304 lz->prevoff = 0;
305 lz->now = MaxOff + 1;
306 lz->dot = lz->now;
307 lz->bits = 0;
308 lz->nbits = 0;
309 lz->maxcheck = (1 << level);
310 lz->maxcheck -= lz->maxcheck >> 2;
311 if(lz->maxcheck < 2)
312 lz->maxcheck = 2;
313 else if(lz->maxcheck > 1024)
314 lz->maxcheck = 1024;
315
316 lz->debug = debug;
317 }
318
319 int
deflate(void * wr,int (* w)(void *,void *,int),void * rr,int (* r)(void *,void *,int),int level,int debug)320 deflate(void *wr, int (*w)(void*, void*, int), void *rr, int (*r)(void*, void*, int), int level, int debug)
321 {
322 LZstate *lz;
323 LZblock *lzb;
324 int ok;
325
326 lz = malloc(sizeof *lz + sizeof *lzb);
327 if(lz == nil)
328 return FlateNoMem;
329 lzb = (LZblock*)&lz[1];
330
331 deflatereset(lz, level, debug);
332 lz->w = w;
333 lz->wr = wr;
334 lz->wbad = 0;
335 lz->rbad = 0;
336 lz->eof = 0;
337 ok = FlateOk;
338 while(!lz->eof || lz->avail){
339 ok = deflateb(lz, lzb, rr, r);
340 if(ok != FlateOk)
341 break;
342 }
343 if(ok == FlateOk && lz->rbad)
344 ok = FlateInputFail;
345 if(ok == FlateOk && lz->wbad)
346 ok = FlateOutputFail;
347 free(lz);
348 return ok;
349 }
350
351 static int
deflateb(LZstate * lz,LZblock * lzb,void * rr,int (* r)(void *,void *,int))352 deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int))
353 {
354 Dyncode dyncode, hdyncode;
355 Huff dlitlentab[Nlitlen], dofftab[Noff], hlitlentab[Nlitlen];
356 ulong litcount[Nlitlen];
357 long nunc, ndyn, nfix, nhuff;
358 uchar *slop, *hslop;
359 ulong ep;
360 int i, n, m, mm, nslop;
361
362 memset(lzb->litlencount, 0, sizeof lzb->litlencount);
363 memset(lzb->offcount, 0, sizeof lzb->offcount);
364 lzb->litlencount[DeflateEob]++;
365
366 lzb->bytes = 0;
367 lzb->eparse = lzb->parse;
368 lzb->lastv = 0;
369 lzb->excost = 0;
370
371 slop = &lz->hist[lz->pos];
372 n = lz->avail;
373 while(n < DeflateBlock && (!lz->eof || lz->avail)){
374 /*
375 * fill the buffer as much as possible,
376 * while leaving room for MaxOff history behind lz->pos,
377 * and not reading more than we can handle.
378 *
379 * make sure we read at least HistSlop bytes.
380 */
381 if(!lz->eof){
382 ep = lz->pos + lz->avail;
383 if(ep >= HistBlock)
384 ep -= HistBlock;
385 m = HistBlock - MaxOff - lz->avail;
386 if(m > HistBlock - n)
387 m = HistBlock - n;
388 if(m > (HistBlock + HistSlop) - ep)
389 m = (HistBlock + HistSlop) - ep;
390 if(m & ~(BlockSize - 1))
391 m &= ~(BlockSize - 1);
392
393 /*
394 * be nice to the caller: stop reads that are too small.
395 * can only get here when we've already filled the buffer some
396 */
397 if(m < HistSlop){
398 if(!m || !lzb->bytes)
399 return FlateInternal;
400 break;
401 }
402
403 mm = (*r)(rr, &lz->hist[ep], m);
404 if(mm > 0){
405 /*
406 * wrap data to end if we're read it from the beginning
407 * this way, we don't have to wrap searches.
408 *
409 * wrap reads past the end to the beginning.
410 * this way, we can guarantee minimum size reads.
411 */
412 if(ep < HistSlop)
413 memmove(&lz->hist[ep + HistBlock], &lz->hist[ep], HistSlop - ep);
414 else if(ep + mm > HistBlock)
415 memmove(&lz->hist[0], &lz->hist[HistBlock], ep + mm - HistBlock);
416
417 lz->totr += mm;
418 n += mm;
419 lz->avail += mm;
420 }else{
421 if(mm < 0)
422 lz->rbad = 1;
423 lz->eof = 1;
424 }
425 }
426 ep = lz->pos + lz->avail;
427 if(ep > HistSize)
428 ep = HistSize;
429 if(lzb->bytes + ep - lz->pos > DeflateMaxBlock)
430 ep = lz->pos + DeflateMaxBlock - lzb->bytes;
431 m = lzcomp(lz, lzb, &lz->hist[ep], lzb->eparse, lz->eof);
432 lzb->bytes += m;
433 lz->pos = (lz->pos + m) & (HistBlock - 1);
434 lz->avail -= m;
435 }
436 if(lzb->lastv)
437 *lzb->eparse++ = lzb->lastv;
438 if(lzb->eparse > lzb->parse + nelem(lzb->parse))
439 return FlateInternal;
440 nunc = lzb->bytes;
441
442 if(!mkgzprecode(dlitlentab, lzb->litlencount, Nlitlen, MaxHuffBits)
443 || !mkgzprecode(dofftab, lzb->offcount, Noff, MaxHuffBits))
444 return FlateInternal;
445
446 ndyn = huffcodes(&dyncode, dlitlentab, dofftab);
447 if(ndyn < 0)
448 return FlateInternal;
449 ndyn += bitcost(dlitlentab, lzb->litlencount, Nlitlen)
450 + bitcost(dofftab, lzb->offcount, Noff)
451 + lzb->excost;
452
453 memset(litcount, 0, sizeof litcount);
454
455 nslop = nunc;
456 if(nslop > &lz->hist[HistSize] - slop)
457 nslop = &lz->hist[HistSize] - slop;
458
459 for(i = 0; i < nslop; i++)
460 litcount[slop[i]]++;
461 hslop = &lz->hist[HistSlop - nslop];
462 for(; i < nunc; i++)
463 litcount[hslop[i]]++;
464 litcount[DeflateEob]++;
465
466 if(!mkgzprecode(hlitlentab, litcount, Nlitlen, MaxHuffBits))
467 return FlateInternal;
468 nhuff = huffcodes(&hdyncode, hlitlentab, hofftab);
469 if(nhuff < 0)
470 return FlateInternal;
471 nhuff += bitcost(hlitlentab, litcount, Nlitlen);
472
473 nfix = bitcost(litlentab, lzb->litlencount, Nlitlen)
474 + bitcost(offtab, lzb->offcount, Noff)
475 + lzb->excost;
476
477 lzput(lz, lz->eof && !lz->avail, 1);
478
479 if(lz->debug){
480 fprint(2, "block: bytes=%lud entries=%ld extra bits=%d\n\tuncompressed=%lud fixed=%lud dynamic=%lud huffman=%lud\n",
481 nunc, lzb->eparse - lzb->parse, lzb->excost, (nunc + 4) * 8, nfix, ndyn, nhuff);
482 fprint(2, "\tnlit=%lud matches=%lud eof=%d\n", nlits, nmatches, lz->eof && !lz->avail);
483 }
484
485 if((nunc + 4) * 8 < ndyn && (nunc + 4) * 8 < nfix && (nunc + 4) * 8 < nhuff){
486 lzput(lz, DeflateUnc, 2);
487 lzflushbits(lz);
488
489 lzput(lz, nunc & 0xff, 8);
490 lzput(lz, (nunc >> 8) & 0xff, 8);
491 lzput(lz, ~nunc & 0xff, 8);
492 lzput(lz, (~nunc >> 8) & 0xff, 8);
493 lzflush(lz);
494
495 lzwrite(lz, slop, nslop);
496 lzwrite(lz, &lz->hist[HistSlop], nunc - nslop);
497 }else if(ndyn < nfix && ndyn < nhuff){
498 lzput(lz, DeflateDyn, 2);
499
500 wrdyncode(lz, &dyncode);
501 wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, dlitlentab, dofftab);
502 lzput(lz, dlitlentab[DeflateEob].encode, dlitlentab[DeflateEob].bits);
503 }else if(nhuff < nfix){
504 lzput(lz, DeflateDyn, 2);
505
506 wrdyncode(lz, &hdyncode);
507
508 m = 0;
509 for(i = nunc; i > MaxLitRun; i -= MaxLitRun)
510 lzb->parse[m++] = MaxLitRun;
511 lzb->parse[m++] = i;
512
513 wrblock(lz, slop - lz->hist, lzb->parse, lzb->parse + m, hlitlentab, hofftab);
514 lzput(lz, hlitlentab[DeflateEob].encode, hlitlentab[DeflateEob].bits);
515 }else{
516 lzput(lz, DeflateFix, 2);
517
518 wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, litlentab, offtab);
519 lzput(lz, litlentab[DeflateEob].encode, litlentab[DeflateEob].bits);
520 }
521
522 if(lz->eof && !lz->avail){
523 lzflushbits(lz);
524 lzflush(lz);
525 }
526 return FlateOk;
527 }
528
529 static void
lzwrite(LZstate * lz,void * buf,int n)530 lzwrite(LZstate *lz, void *buf, int n)
531 {
532 int nw;
533
534 if(n && lz->w){
535 nw = (*lz->w)(lz->wr, buf, n);
536 if(nw != n){
537 lz->w = nil;
538 lz->wbad = 1;
539 }else
540 lz->totw += n;
541 }
542 }
543
544 static void
lzflush(LZstate * lz)545 lzflush(LZstate *lz)
546 {
547 lzwrite(lz, lz->obuf, lz->out - lz->obuf);
548 lz->out = lz->obuf;
549 }
550
551 static void
lzput(LZstate * lz,ulong bits,int nbits)552 lzput(LZstate *lz, ulong bits, int nbits)
553 {
554 bits = (bits << lz->nbits) | lz->bits;
555 for(nbits += lz->nbits; nbits >= 8; nbits -= 8){
556 *lz->out++ = bits;
557 if(lz->out == lz->eout)
558 lzflush(lz);
559 bits >>= 8;
560 }
561 lz->bits = bits;
562 lz->nbits = nbits;
563 }
564
565 static void
lzflushbits(LZstate * lz)566 lzflushbits(LZstate *lz)
567 {
568 if(lz->nbits)
569 lzput(lz, 0, 8 - (lz->nbits & 7));
570 }
571
572 /*
573 * write out a block of n samples,
574 * given lz encoding and counts for huffman tables
575 */
576 static void
wrblock(LZstate * out,int litoff,ushort * soff,ushort * eoff,Huff * litlentab,Huff * offtab)577 wrblock(LZstate *out, int litoff, ushort *soff, ushort *eoff, Huff *litlentab, Huff *offtab)
578 {
579 ushort *off;
580 int i, run, offset, lit, len, c;
581
582 if(out->debug > 2){
583 for(off = soff; off < eoff; ){
584 offset = *off++;
585 run = offset & MaxLitRun;
586 if(run){
587 for(i = 0; i < run; i++){
588 lit = out->hist[litoff & (HistBlock - 1)];
589 litoff++;
590 fprint(2, "\tlit %.2ux %c\n", lit, lit);
591 }
592 if(!(offset & LenFlag))
593 continue;
594 len = offset >> LenShift;
595 offset = *off++;
596 }else if(offset & LenFlag){
597 len = offset >> LenShift;
598 offset = *off++;
599 }else{
600 len = 0;
601 offset >>= LenShift;
602 }
603 litoff += len + MinMatch;
604 fprint(2, "\t<%d, %d>\n", offset + 1, len + MinMatch);
605 }
606 }
607
608 for(off = soff; off < eoff; ){
609 offset = *off++;
610 run = offset & MaxLitRun;
611 if(run){
612 for(i = 0; i < run; i++){
613 lit = out->hist[litoff & (HistBlock - 1)];
614 litoff++;
615 lzput(out, litlentab[lit].encode, litlentab[lit].bits);
616 }
617 if(!(offset & LenFlag))
618 continue;
619 len = offset >> LenShift;
620 offset = *off++;
621 }else if(offset & LenFlag){
622 len = offset >> LenShift;
623 offset = *off++;
624 }else{
625 len = 0;
626 offset >>= LenShift;
627 }
628 litoff += len + MinMatch;
629 c = lencode[len];
630 lzput(out, litlentab[c].encode, litlentab[c].bits);
631 c -= LenStart;
632 if(litlenextra[c])
633 lzput(out, len - litlenbase[c], litlenextra[c]);
634
635 if(offset < MaxOffCode)
636 c = offcode[offset];
637 else
638 c = bigoffcode[offset >> 7];
639 lzput(out, offtab[c].encode, offtab[c].bits);
640 if(offextra[c])
641 lzput(out, offset - offbase[c], offextra[c]);
642 }
643 }
644
645 /*
646 * look for the longest, closest string which matches
647 * the next prefix. the clever part here is looking for
648 * a string 1 longer than the previous best match.
649 *
650 * follows the recommendation of limiting number of chains
651 * which are checked. this appears to be the best heuristic.
652 */
653 static int
lzmatch(int now,int then,uchar * p,uchar * es,ushort * nexts,uchar * hist,int runlen,int check,int * m)654 lzmatch(int now, int then, uchar *p, uchar *es, ushort *nexts, uchar *hist, int runlen, int check, int *m)
655 {
656 uchar *s, *t;
657 int ml, off, last;
658
659 ml = check;
660 if(runlen >= 8)
661 check >>= 2;
662 *m = 0;
663 if(p + runlen >= es)
664 return runlen;
665 last = 0;
666 for(; check-- > 0; then = nexts[then & (MaxOff-1)]){
667 off = (ushort)(now - then);
668 if(off <= last || off > MaxOff)
669 break;
670 s = p + runlen;
671 t = hist + (((p - hist) - off) & (HistBlock-1));
672 t += runlen;
673 for(; s >= p; s--){
674 if(*s != *t)
675 goto matchloop;
676 t--;
677 }
678
679 /*
680 * we have a new best match.
681 * extend it to it's maximum length
682 */
683 t += runlen + 2;
684 s += runlen + 2;
685 for(; s < es; s++){
686 if(*s != *t)
687 break;
688 t++;
689 }
690 runlen = s - p;
691 *m = off - 1;
692 if(s == es || runlen > ml)
693 break;
694 matchloop:;
695 last = off;
696 }
697 return runlen;
698 }
699
700 static int
lzcomp(LZstate * lz,LZblock * lzb,uchar * ep,ushort * parse,int finish)701 lzcomp(LZstate *lz, LZblock *lzb, uchar *ep, ushort *parse, int finish)
702 {
703 ulong cont, excost, *litlencount, *offcount;
704 uchar *p, *q, *s, *es;
705 ushort *nexts, *hash;
706 int v, i, h, runlen, n, now, then, m, prevlen, prevoff, maxdefer;
707
708 litlencount = lzb->litlencount;
709 offcount = lzb->offcount;
710 nexts = lz->nexts;
711 hash = lz->hash;
712 now = lz->now;
713
714 p = &lz->hist[lz->pos];
715 if(lz->prevlen != MinMatch - 1)
716 p++;
717
718 /*
719 * hash in the links for any hanging link positions,
720 * and calculate the hash for the current position.
721 */
722 n = MinMatch;
723 if(n > ep - p)
724 n = ep - p;
725 cont = 0;
726 for(i = 0; i < n - 1; i++){
727 m = now - ((MinMatch-1) - i);
728 if(m < lz->dot)
729 continue;
730 s = lz->hist + (((p - lz->hist) - (now - m)) & (HistBlock-1));
731
732 cont = (s[0] << 16) | (s[1] << 8) | s[2];
733 h = hashit(cont);
734 prevoff = 0;
735 for(then = hash[h]; ; then = nexts[then & (MaxOff-1)]){
736 v = (ushort)(now - then);
737 if(v <= prevoff || v >= (MinMatch-1) - i)
738 break;
739 prevoff = v;
740 }
741 if(then == (ushort)m)
742 continue;
743 nexts[m & (MaxOff-1)] = hash[h];
744 hash[h] = m;
745 }
746 for(i = 0; i < n; i++)
747 cont = (cont << 8) | p[i];
748
749 /*
750 * now must point to the index in the nexts array
751 * corresponding to p's position in the history
752 */
753 prevlen = lz->prevlen;
754 prevoff = lz->prevoff;
755 maxdefer = lz->maxcheck >> 2;
756 excost = 0;
757 v = lzb->lastv;
758 for(;;){
759 es = p + MaxMatch;
760 if(es > ep){
761 if(!finish || p >= ep)
762 break;
763 es = ep;
764 }
765
766 h = hashit(cont);
767 runlen = lzmatch(now, hash[h], p, es, nexts, lz->hist, prevlen, lz->maxcheck, &m);
768
769 /*
770 * back out of small matches too far in the past
771 */
772 if(runlen == MinMatch && m >= MinMatchMaxOff){
773 runlen = MinMatch - 1;
774 m = 0;
775 }
776
777 /*
778 * record the encoding and increment counts for huffman trees
779 * if we get a match, defer selecting it until we check for
780 * a longer match at the next position.
781 */
782 if(prevlen >= runlen && prevlen != MinMatch - 1){
783 /*
784 * old match at least as good; use that one
785 */
786 n = prevlen - MinMatch;
787 if(v || n){
788 *parse++ = v | LenFlag | (n << LenShift);
789 *parse++ = prevoff;
790 }else
791 *parse++ = prevoff << LenShift;
792 v = 0;
793
794 n = lencode[n];
795 litlencount[n]++;
796 excost += litlenextra[n - LenStart];
797
798 if(prevoff < MaxOffCode)
799 n = offcode[prevoff];
800 else
801 n = bigoffcode[prevoff >> 7];
802 offcount[n]++;
803 excost += offextra[n];
804
805 runlen = prevlen - 1;
806 prevlen = MinMatch - 1;
807 nmatches++;
808 }else if(runlen == MinMatch - 1){
809 /*
810 * no match; just put out the literal
811 */
812 if(++v == MaxLitRun){
813 *parse++ = v;
814 v = 0;
815 }
816 litlencount[*p]++;
817 nlits++;
818 runlen = 1;
819 }else{
820 if(prevlen != MinMatch - 1){
821 /*
822 * longer match now. output previous literal,
823 * update current match, and try again
824 */
825 if(++v == MaxLitRun){
826 *parse++ = v;
827 v = 0;
828 }
829 litlencount[p[-1]]++;
830 nlits++;
831 }
832
833 prevoff = m;
834
835 if(runlen < maxdefer){
836 prevlen = runlen;
837 runlen = 1;
838 }else{
839 n = runlen - MinMatch;
840 if(v || n){
841 *parse++ = v | LenFlag | (n << LenShift);
842 *parse++ = prevoff;
843 }else
844 *parse++ = prevoff << LenShift;
845 v = 0;
846
847 n = lencode[n];
848 litlencount[n]++;
849 excost += litlenextra[n - LenStart];
850
851 if(prevoff < MaxOffCode)
852 n = offcode[prevoff];
853 else
854 n = bigoffcode[prevoff >> 7];
855 offcount[n]++;
856 excost += offextra[n];
857
858 prevlen = MinMatch - 1;
859 nmatches++;
860 }
861 }
862
863 /*
864 * update the hash for the newly matched data
865 * this is constructed so the link for the old
866 * match in this position must be at the end of a chain,
867 * and will expire when this match is added, ie it will
868 * never be examined by the match loop.
869 * add to the hash chain only if we have the real hash data.
870 */
871 for(q = p + runlen; p != q; p++){
872 if(p + MinMatch <= ep){
873 h = hashit(cont);
874 nexts[now & (MaxOff-1)] = hash[h];
875 hash[h] = now;
876 if(p + MinMatch < ep)
877 cont = (cont << 8) | p[MinMatch];
878 }
879 now++;
880 }
881 }
882
883 /*
884 * we can just store away the lazy state and
885 * pick it up next time. the last block will have finish set
886 * so we won't have any pending matches
887 * however, we need to correct for how much we've encoded
888 */
889 if(prevlen != MinMatch - 1)
890 p--;
891
892 lzb->excost += excost;
893 lzb->eparse = parse;
894 lzb->lastv = v;
895
896 lz->now = now;
897 lz->prevlen = prevlen;
898 lz->prevoff = prevoff;
899
900 return p - &lz->hist[lz->pos];
901 }
902
903 /*
904 * make up the dynamic code tables, and return the number of bits
905 * needed to transmit them.
906 */
907 static int
huffcodes(Dyncode * dc,Huff * littab,Huff * offtab)908 huffcodes(Dyncode *dc, Huff *littab, Huff *offtab)
909 {
910 Huff *codetab;
911 uchar *codes, *codeaux;
912 ulong codecount[Nclen], excost;
913 int i, n, m, v, c, nlit, noff, ncode, nclen;
914
915 codetab = dc->codetab;
916 codes = dc->codes;
917 codeaux = dc->codeaux;
918
919 /*
920 * trim the sizes of the tables
921 */
922 for(nlit = Nlitlen; nlit > 257 && littab[nlit-1].bits == 0; nlit--)
923 ;
924 for(noff = Noff; noff > 1 && offtab[noff-1].bits == 0; noff--)
925 ;
926
927 /*
928 * make the code-length code
929 */
930 for(i = 0; i < nlit; i++)
931 codes[i] = littab[i].bits;
932 for(i = 0; i < noff; i++)
933 codes[i + nlit] = offtab[i].bits;
934
935 /*
936 * run-length compress the code-length code
937 */
938 excost = 0;
939 c = 0;
940 ncode = nlit+noff;
941 for(i = 0; i < ncode; ){
942 n = i + 1;
943 v = codes[i];
944 while(n < ncode && v == codes[n])
945 n++;
946 n -= i;
947 i += n;
948 if(v == 0){
949 while(n >= 11){
950 m = n;
951 if(m > 138)
952 m = 138;
953 codes[c] = 18;
954 codeaux[c++] = m - 11;
955 n -= m;
956 excost += 7;
957 }
958 if(n >= 3){
959 codes[c] = 17;
960 codeaux[c++] = n - 3;
961 n = 0;
962 excost += 3;
963 }
964 }
965 while(n--){
966 codes[c++] = v;
967 while(n >= 3){
968 m = n;
969 if(m > 6)
970 m = 6;
971 codes[c] = 16;
972 codeaux[c++] = m - 3;
973 n -= m;
974 excost += 3;
975 }
976 }
977 }
978
979 memset(codecount, 0, sizeof codecount);
980 for(i = 0; i < c; i++)
981 codecount[codes[i]]++;
982 if(!mkgzprecode(codetab, codecount, Nclen, 8))
983 return -1;
984
985 for(nclen = Nclen; nclen > 4 && codetab[clenorder[nclen-1]].bits == 0; nclen--)
986 ;
987
988 dc->nlit = nlit;
989 dc->noff = noff;
990 dc->nclen = nclen;
991 dc->ncode = c;
992
993 return 5 + 5 + 4 + nclen * 3 + bitcost(codetab, codecount, Nclen) + excost;
994 }
995
996 static void
wrdyncode(LZstate * out,Dyncode * dc)997 wrdyncode(LZstate *out, Dyncode *dc)
998 {
999 Huff *codetab;
1000 uchar *codes, *codeaux;
1001 int i, v, c;
1002
1003 /*
1004 * write out header, then code length code lengths,
1005 * and code lengths
1006 */
1007 lzput(out, dc->nlit-257, 5);
1008 lzput(out, dc->noff-1, 5);
1009 lzput(out, dc->nclen-4, 4);
1010
1011 codetab = dc->codetab;
1012 for(i = 0; i < dc->nclen; i++)
1013 lzput(out, codetab[clenorder[i]].bits, 3);
1014
1015 codes = dc->codes;
1016 codeaux = dc->codeaux;
1017 c = dc->ncode;
1018 for(i = 0; i < c; i++){
1019 v = codes[i];
1020 lzput(out, codetab[v].encode, codetab[v].bits);
1021 if(v >= 16){
1022 if(v == 16)
1023 lzput(out, codeaux[i], 2);
1024 else if(v == 17)
1025 lzput(out, codeaux[i], 3);
1026 else /* v == 18 */
1027 lzput(out, codeaux[i], 7);
1028 }
1029 }
1030 }
1031
1032 static int
bitcost(Huff * tab,ulong * count,int n)1033 bitcost(Huff *tab, ulong *count, int n)
1034 {
1035 ulong tot;
1036 int i;
1037
1038 tot = 0;
1039 for(i = 0; i < n; i++)
1040 tot += count[i] * tab[i].bits;
1041 return tot;
1042 }
1043
1044 static int
mkgzprecode(Huff * tab,ulong * count,int n,int maxbits)1045 mkgzprecode(Huff *tab, ulong *count, int n, int maxbits)
1046 {
1047 ulong bitcount[MaxHuffBits];
1048 int i, nbits;
1049
1050 nbits = mkprecode(tab, count, n, maxbits, bitcount);
1051 for(i = 0; i < n; i++){
1052 if(tab[i].bits == -1)
1053 tab[i].bits = 0;
1054 else if(tab[i].bits == 0){
1055 if(nbits != 0 || bitcount[0] != 1)
1056 return 0;
1057 bitcount[1] = 1;
1058 bitcount[0] = 0;
1059 nbits = 1;
1060 tab[i].bits = 1;
1061 }
1062 }
1063 if(bitcount[0] != 0)
1064 return 0;
1065 return hufftabinit(tab, n, bitcount, nbits);
1066 }
1067
1068 static int
hufftabinit(Huff * tab,int n,ulong * bitcount,int nbits)1069 hufftabinit(Huff *tab, int n, ulong *bitcount, int nbits)
1070 {
1071 ulong code, nc[MaxHuffBits];
1072 int i, bits;
1073
1074 code = 0;
1075 for(bits = 1; bits <= nbits; bits++){
1076 code = (code + bitcount[bits-1]) << 1;
1077 nc[bits] = code;
1078 }
1079
1080 for(i = 0; i < n; i++){
1081 bits = tab[i].bits;
1082 if(bits){
1083 code = nc[bits]++ << (16 - bits);
1084 if(code & ~0xffff)
1085 return 0;
1086 tab[i].encode = revtab[code >> 8] | (revtab[code & 0xff] << 8);
1087 }
1088 }
1089 return 1;
1090 }
1091
1092
1093 /*
1094 * this should be in a library
1095 */
1096 struct Chain
1097 {
1098 ulong count; /* occurances of everything in the chain */
1099 ushort leaf; /* leaves to the left of chain, or leaf value */
1100 char col; /* ref count for collecting unused chains */
1101 char gen; /* need to generate chains for next lower level */
1102 Chain *up; /* Chain up in the lists */
1103 };
1104
1105 struct Chains
1106 {
1107 Chain *lists[(MaxHuffBits - 1) * 2];
1108 ulong leafcount[MaxLeaf]; /* sorted list of leaf counts */
1109 ushort leafmap[MaxLeaf]; /* map to actual leaf number */
1110 int nleaf; /* number of leaves */
1111 Chain chains[ChainMem];
1112 Chain *echains;
1113 Chain *free;
1114 char col;
1115 int nlists;
1116 };
1117
1118 /*
1119 * fast, low space overhead algorithm for max depth huffman type codes
1120 *
1121 * J. Katajainen, A. Moffat and A. Turpin, "A fast and space-economical
1122 * algorithm for length-limited coding," Proc. Intl. Symp. on Algorithms
1123 * and Computation, Cairns, Australia, Dec. 1995, Lecture Notes in Computer
1124 * Science, Vol 1004, J. Staples, P. Eades, N. Katoh, and A. Moffat, eds.,
1125 * pp 12-21, Springer Verlag, New York, 1995.
1126 */
1127 static int
mkprecode(Huff * tab,ulong * count,int n,int maxbits,ulong * bitcount)1128 mkprecode(Huff *tab, ulong *count, int n, int maxbits, ulong *bitcount)
1129 {
1130 Chains cs;
1131 Chain *c;
1132 int i, m, em, bits;
1133
1134 /*
1135 * set up the sorted list of leaves
1136 */
1137 m = 0;
1138 for(i = 0; i < n; i++){
1139 tab[i].bits = -1;
1140 tab[i].encode = 0;
1141 if(count[i] != 0){
1142 cs.leafcount[m] = count[i];
1143 cs.leafmap[m] = i;
1144 m++;
1145 }
1146 }
1147 if(m < 2){
1148 if(m != 0){
1149 tab[cs.leafmap[0]].bits = 0;
1150 bitcount[0] = 1;
1151 }else
1152 bitcount[0] = 0;
1153 return 0;
1154 }
1155 cs.nleaf = m;
1156 leafsort(cs.leafcount, cs.leafmap, 0, m);
1157
1158 for(i = 0; i < m; i++)
1159 cs.leafcount[i] = count[cs.leafmap[i]];
1160
1161 /*
1162 * set up free list
1163 */
1164 cs.free = &cs.chains[2];
1165 cs.echains = &cs.chains[ChainMem];
1166 cs.col = 1;
1167
1168 /*
1169 * initialize chains for each list
1170 */
1171 c = &cs.chains[0];
1172 c->count = cs.leafcount[0];
1173 c->leaf = 1;
1174 c->col = cs.col;
1175 c->up = nil;
1176 c->gen = 0;
1177 cs.chains[1] = cs.chains[0];
1178 cs.chains[1].leaf = 2;
1179 cs.chains[1].count = cs.leafcount[1];
1180 for(i = 0; i < maxbits-1; i++){
1181 cs.lists[i * 2] = &cs.chains[0];
1182 cs.lists[i * 2 + 1] = &cs.chains[1];
1183 }
1184
1185 cs.nlists = 2 * (maxbits - 1);
1186 m = 2 * m - 2;
1187 for(i = 2; i < m; i++)
1188 nextchain(&cs, cs.nlists - 2);
1189
1190 bits = 0;
1191 bitcount[0] = cs.nleaf;
1192 for(c = cs.lists[cs.nlists - 1]; c != nil; c = c->up){
1193 m = c->leaf;
1194 bitcount[bits++] -= m;
1195 bitcount[bits] = m;
1196 }
1197 m = 0;
1198 for(i = bits; i >= 0; i--)
1199 for(em = m + bitcount[i]; m < em; m++)
1200 tab[cs.leafmap[m]].bits = i;
1201
1202 return bits;
1203 }
1204
1205 /*
1206 * calculate the next chain on the list
1207 * we can always toss out the old chain
1208 */
1209 static void
nextchain(Chains * cs,int list)1210 nextchain(Chains *cs, int list)
1211 {
1212 Chain *c, *oc;
1213 int i, nleaf, sumc;
1214
1215 oc = cs->lists[list + 1];
1216 cs->lists[list] = oc;
1217 if(oc == nil)
1218 return;
1219
1220 /*
1221 * make sure we have all chains needed to make sumc
1222 * note it is possible to generate only one of these,
1223 * use twice that value for sumc, and then generate
1224 * the second if that preliminary sumc would be chosen.
1225 * however, this appears to be slower on current tests
1226 */
1227 if(oc->gen){
1228 nextchain(cs, list - 2);
1229 nextchain(cs, list - 2);
1230 oc->gen = 0;
1231 }
1232
1233 /*
1234 * pick up the chain we're going to add;
1235 * collect unused chains no free ones are left
1236 */
1237 for(c = cs->free; ; c++){
1238 if(c >= cs->echains){
1239 cs->col++;
1240 for(i = 0; i < cs->nlists; i++)
1241 for(c = cs->lists[i]; c != nil; c = c->up)
1242 c->col = cs->col;
1243 c = cs->chains;
1244 }
1245 if(c->col != cs->col)
1246 break;
1247 }
1248
1249 /*
1250 * pick the cheapest of
1251 * 1) the next package from the previous list
1252 * 2) the next leaf
1253 */
1254 nleaf = oc->leaf;
1255 sumc = 0;
1256 if(list > 0 && cs->lists[list-1] != nil)
1257 sumc = cs->lists[list-2]->count + cs->lists[list-1]->count;
1258 if(sumc != 0 && (nleaf >= cs->nleaf || cs->leafcount[nleaf] > sumc)){
1259 c->count = sumc;
1260 c->leaf = oc->leaf;
1261 c->up = cs->lists[list-1];
1262 c->gen = 1;
1263 }else if(nleaf >= cs->nleaf){
1264 cs->lists[list + 1] = nil;
1265 return;
1266 }else{
1267 c->leaf = nleaf + 1;
1268 c->count = cs->leafcount[nleaf];
1269 c->up = oc->up;
1270 c->gen = 0;
1271 }
1272 cs->free = c + 1;
1273
1274 cs->lists[list + 1] = c;
1275 c->col = cs->col;
1276 }
1277
1278 static int
pivot(ulong * c,int a,int n)1279 pivot(ulong *c, int a, int n)
1280 {
1281 int j, pi, pj, pk;
1282
1283 j = n/6;
1284 pi = a + j; /* 1/6 */
1285 j += j;
1286 pj = pi + j; /* 1/2 */
1287 pk = pj + j; /* 5/6 */
1288 if(c[pi] < c[pj]){
1289 if(c[pi] < c[pk]){
1290 if(c[pj] < c[pk])
1291 return pj;
1292 return pk;
1293 }
1294 return pi;
1295 }
1296 if(c[pj] < c[pk]){
1297 if(c[pi] < c[pk])
1298 return pi;
1299 return pk;
1300 }
1301 return pj;
1302 }
1303
1304 static void
leafsort(ulong * leafcount,ushort * leafmap,int a,int n)1305 leafsort(ulong *leafcount, ushort *leafmap, int a, int n)
1306 {
1307 ulong t;
1308 int j, pi, pj, pn;
1309
1310 while(n > 1){
1311 if(n > 10){
1312 pi = pivot(leafcount, a, n);
1313 }else
1314 pi = a + (n>>1);
1315
1316 t = leafcount[pi];
1317 leafcount[pi] = leafcount[a];
1318 leafcount[a] = t;
1319 t = leafmap[pi];
1320 leafmap[pi] = leafmap[a];
1321 leafmap[a] = t;
1322 pi = a;
1323 pn = a + n;
1324 pj = pn;
1325 for(;;){
1326 do
1327 pi++;
1328 while(pi < pn && (leafcount[pi] < leafcount[a] || leafcount[pi] == leafcount[a] && leafmap[pi] > leafmap[a]));
1329 do
1330 pj--;
1331 while(pj > a && (leafcount[pj] > leafcount[a] || leafcount[pj] == leafcount[a] && leafmap[pj] < leafmap[a]));
1332 if(pj < pi)
1333 break;
1334 t = leafcount[pi];
1335 leafcount[pi] = leafcount[pj];
1336 leafcount[pj] = t;
1337 t = leafmap[pi];
1338 leafmap[pi] = leafmap[pj];
1339 leafmap[pj] = t;
1340 }
1341 t = leafcount[a];
1342 leafcount[a] = leafcount[pj];
1343 leafcount[pj] = t;
1344 t = leafmap[a];
1345 leafmap[a] = leafmap[pj];
1346 leafmap[pj] = t;
1347 j = pj - a;
1348
1349 n = n-j-1;
1350 if(j >= n){
1351 leafsort(leafcount, leafmap, a, j);
1352 a += j+1;
1353 }else{
1354 leafsort(leafcount, leafmap, a + (j+1), n);
1355 n = j;
1356 }
1357 }
1358 }
1359