xref: /netbsd-src/external/zlib/pigz/dist/zopfli/lz77.c (revision 3e407a68a64115001ff3f1b658def92a1368b7e6)
1 /*
2 Copyright 2011 Google Inc. All Rights Reserved.
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8     http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 */
19 
20 #include "lz77.h"
21 #include "util.h"
22 
23 #include <assert.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 
ZopfliInitLZ77Store(ZopfliLZ77Store * store)27 void ZopfliInitLZ77Store(ZopfliLZ77Store* store) {
28   store->size = 0;
29   store->litlens = 0;
30   store->dists = 0;
31 }
32 
ZopfliCleanLZ77Store(ZopfliLZ77Store * store)33 void ZopfliCleanLZ77Store(ZopfliLZ77Store* store) {
34   free(store->litlens);
35   free(store->dists);
36 }
37 
ZopfliCopyLZ77Store(const ZopfliLZ77Store * source,ZopfliLZ77Store * dest)38 void ZopfliCopyLZ77Store(
39     const ZopfliLZ77Store* source, ZopfliLZ77Store* dest) {
40   size_t i;
41   ZopfliCleanLZ77Store(dest);
42   dest->litlens =
43       (unsigned short*)malloc(sizeof(*dest->litlens) * source->size);
44   dest->dists = (unsigned short*)malloc(sizeof(*dest->dists) * source->size);
45 
46   if (!dest->litlens || !dest->dists) exit(-1); /* Allocation failed. */
47 
48   dest->size = source->size;
49   for (i = 0; i < source->size; i++) {
50     dest->litlens[i] = source->litlens[i];
51     dest->dists[i] = source->dists[i];
52   }
53 }
54 
55 /*
56 Appends the length and distance to the LZ77 arrays of the ZopfliLZ77Store.
57 context must be a ZopfliLZ77Store*.
58 */
ZopfliStoreLitLenDist(unsigned short length,unsigned short dist,ZopfliLZ77Store * store)59 void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
60                            ZopfliLZ77Store* store) {
61   size_t size2 = store->size;  /* Needed for using ZOPFLI_APPEND_DATA twice. */
62   ZOPFLI_APPEND_DATA(length, &store->litlens, &store->size);
63   ZOPFLI_APPEND_DATA(dist, &store->dists, &size2);
64 }
65 
66 /*
67 Gets the value of the length given the distance. Typically, the value of the
68 length is the length, but if the distance is very long, decrease the value of
69 the length a bit to make up for the fact that long distances use large amounts
70 of extra bits.
71 */
GetLengthValue(int length,int distance)72 static int GetLengthValue(int length, int distance) {
73   /*
74   At distance > 1024, using length 3 is no longer good, due to the large amount
75   of extra bits for the distance code. distance > 1024 uses 9+ extra bits, and
76   this seems to be the sweet spot.
77   */
78   return distance > 1024 ? length - 1 : length;
79 }
80 
ZopfliVerifyLenDist(const unsigned char * data,size_t datasize,size_t pos,unsigned short dist,unsigned short length)81 void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos,
82                          unsigned short dist, unsigned short length) {
83 
84   /* TODO(lode): make this only run in a debug compile, it's for assert only. */
85   size_t i;
86 
87   assert(pos + length <= datasize);
88   for (i = 0; i < length; i++) {
89     if (data[pos - dist + i] != data[pos + i]) {
90       assert(data[pos - dist + i] == data[pos + i]);
91       break;
92     }
93   }
94 }
95 
96 /*
97 Finds how long the match of scan and match is. Can be used to find how many
98 bytes starting from scan, and from match, are equal. Returns the last byte
99 after scan, which is still equal to the correspondinb byte after match.
100 scan is the position to compare
101 match is the earlier position to compare.
102 end is the last possible byte, beyond which to stop looking.
103 safe_end is a few (8) bytes before end, for comparing multiple bytes at once.
104 */
GetMatch(const unsigned char * scan,const unsigned char * match,const unsigned char * end,const unsigned char * safe_end)105 static const unsigned char* GetMatch(const unsigned char* scan,
106                                      const unsigned char* match,
107                                      const unsigned char* end,
108                                      const unsigned char* safe_end) {
109 
110   if (sizeof(size_t) == 8) {
111     /* 8 checks at once per array bounds check (size_t is 64-bit). */
112     while (scan < safe_end && *((size_t*)scan) == *((size_t*)match)) {
113       scan += 8;
114       match += 8;
115     }
116   } else if (sizeof(unsigned int) == 4) {
117     /* 4 checks at once per array bounds check (unsigned int is 32-bit). */
118     while (scan < safe_end
119         && *((unsigned int*)scan) == *((unsigned int*)match)) {
120       scan += 4;
121       match += 4;
122     }
123   } else {
124     /* do 8 checks at once per array bounds check. */
125     while (scan < safe_end && *scan == *match && *++scan == *++match
126           && *++scan == *++match && *++scan == *++match
127           && *++scan == *++match && *++scan == *++match
128           && *++scan == *++match && *++scan == *++match) {
129       scan++; match++;
130     }
131   }
132 
133   /* The remaining few bytes. */
134   while (scan != end && *scan == *match) {
135     scan++; match++;
136   }
137 
138   return scan;
139 }
140 
141 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
142 /*
143 Gets distance, length and sublen values from the cache if possible.
144 Returns 1 if it got the values from the cache, 0 if not.
145 Updates the limit value to a smaller one if possible with more limited
146 information from the cache.
147 */
TryGetFromLongestMatchCache(ZopfliBlockState * s,size_t pos,size_t * limit,unsigned short * sublen,unsigned short * distance,unsigned short * length)148 static int TryGetFromLongestMatchCache(ZopfliBlockState* s,
149     size_t pos, size_t* limit,
150     unsigned short* sublen, unsigned short* distance, unsigned short* length) {
151   /* The LMC cache starts at the beginning of the block rather than the
152      beginning of the whole array. */
153   size_t lmcpos = pos - s->blockstart;
154 
155   /* Length > 0 and dist 0 is invalid combination, which indicates on purpose
156      that this cache value is not filled in yet. */
157   unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 ||
158       s->lmc->dist[lmcpos] != 0);
159   unsigned char limit_ok_for_cache = cache_available &&
160       (*limit == ZOPFLI_MAX_MATCH || s->lmc->length[lmcpos] <= *limit ||
161       (sublen && ZopfliMaxCachedSublen(s->lmc,
162           lmcpos, s->lmc->length[lmcpos]) >= *limit));
163 
164   if (s->lmc && limit_ok_for_cache && cache_available) {
165     if (!sublen || s->lmc->length[lmcpos]
166         <= ZopfliMaxCachedSublen(s->lmc, lmcpos, s->lmc->length[lmcpos])) {
167       *length = s->lmc->length[lmcpos];
168       if (*length > *limit) *length = *limit;
169       if (sublen) {
170         ZopfliCacheToSublen(s->lmc, lmcpos, *length, sublen);
171         *distance = sublen[*length];
172         if (*limit == ZOPFLI_MAX_MATCH && *length >= ZOPFLI_MIN_MATCH) {
173           assert(sublen[*length] == s->lmc->dist[lmcpos]);
174         }
175       } else {
176         *distance = s->lmc->dist[lmcpos];
177       }
178       return 1;
179     }
180     /* Can't use much of the cache, since the "sublens" need to be calculated,
181        but at  least we already know when to stop. */
182     *limit = s->lmc->length[lmcpos];
183   }
184 
185   return 0;
186 }
187 
188 /*
189 Stores the found sublen, distance and length in the longest match cache, if
190 possible.
191 */
StoreInLongestMatchCache(ZopfliBlockState * s,size_t pos,size_t limit,const unsigned short * sublen,unsigned short distance,unsigned short length)192 static void StoreInLongestMatchCache(ZopfliBlockState* s,
193     size_t pos, size_t limit,
194     const unsigned short* sublen,
195     unsigned short distance, unsigned short length) {
196   /* The LMC cache starts at the beginning of the block rather than the
197      beginning of the whole array. */
198   size_t lmcpos = pos - s->blockstart;
199 
200   /* Length > 0 and dist 0 is invalid combination, which indicates on purpose
201      that this cache value is not filled in yet. */
202   unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 ||
203       s->lmc->dist[lmcpos] != 0);
204 
205   if (s->lmc && limit == ZOPFLI_MAX_MATCH && sublen && !cache_available) {
206     assert(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0);
207     s->lmc->dist[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : distance;
208     s->lmc->length[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : length;
209     assert(!(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0));
210     ZopfliSublenToCache(sublen, lmcpos, length, s->lmc);
211   }
212 }
213 #endif
214 
ZopfliFindLongestMatch(ZopfliBlockState * s,const ZopfliHash * h,const unsigned char * array,size_t pos,size_t size,size_t limit,unsigned short * sublen,unsigned short * distance,unsigned short * length)215 void ZopfliFindLongestMatch(ZopfliBlockState* s, const ZopfliHash* h,
216     const unsigned char* array,
217     size_t pos, size_t size, size_t limit,
218     unsigned short* sublen, unsigned short* distance, unsigned short* length) {
219   unsigned short hpos = pos & ZOPFLI_WINDOW_MASK, p, pp;
220   unsigned short bestdist = 0;
221   unsigned short bestlength = 1;
222   const unsigned char* scan;
223   const unsigned char* match;
224   const unsigned char* arrayend;
225   const unsigned char* arrayend_safe;
226 #if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE
227   int chain_counter = ZOPFLI_MAX_CHAIN_HITS;  /* For quitting early. */
228 #endif
229 
230   unsigned dist = 0;  /* Not unsigned short on purpose. */
231 
232   int* hhead = h->head;
233   unsigned short* hprev = h->prev;
234   int* hhashval = h->hashval;
235   int hval = h->val;
236 
237 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
238   if (TryGetFromLongestMatchCache(s, pos, &limit, sublen, distance, length)) {
239     assert(pos + *length <= size);
240     return;
241   }
242 #endif
243 
244   assert(limit <= ZOPFLI_MAX_MATCH);
245   assert(limit >= ZOPFLI_MIN_MATCH);
246   assert(pos < size);
247 
248   if (size - pos < ZOPFLI_MIN_MATCH) {
249     /* The rest of the code assumes there are at least ZOPFLI_MIN_MATCH bytes to
250        try. */
251     *length = 0;
252     *distance = 0;
253     return;
254   }
255 
256   if (pos + limit > size) {
257     limit = size - pos;
258   }
259   arrayend = &array[pos] + limit;
260   arrayend_safe = arrayend - 8;
261 
262   assert(hval < 65536);
263 
264   pp = hhead[hval];  /* During the whole loop, p == hprev[pp]. */
265   p = hprev[pp];
266 
267   assert(pp == hpos);
268 
269   dist = p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp);
270 
271   /* Go through all distances. */
272   while (dist < ZOPFLI_WINDOW_SIZE) {
273     unsigned short currentlength = 0;
274 
275     assert(p < ZOPFLI_WINDOW_SIZE);
276     assert(p == hprev[pp]);
277     assert(hhashval[p] == hval);
278 
279     if (dist > 0) {
280       assert(pos < size);
281       assert(dist <= pos);
282       scan = &array[pos];
283       match = &array[pos - dist];
284 
285       /* Testing the byte at position bestlength first, goes slightly faster. */
286       if (pos + bestlength >= size
287           || *(scan + bestlength) == *(match + bestlength)) {
288 
289 #ifdef ZOPFLI_HASH_SAME
290         unsigned short same0 = h->same[pos & ZOPFLI_WINDOW_MASK];
291         if (same0 > 2 && *scan == *match) {
292           unsigned short same1 = h->same[(pos - dist) & ZOPFLI_WINDOW_MASK];
293           unsigned short same = same0 < same1 ? same0 : same1;
294           if (same > limit) same = limit;
295           scan += same;
296           match += same;
297         }
298 #endif
299         scan = GetMatch(scan, match, arrayend, arrayend_safe);
300         currentlength = scan - &array[pos];  /* The found length. */
301       }
302 
303       if (currentlength > bestlength) {
304         if (sublen) {
305           unsigned short j;
306           for (j = bestlength + 1; j <= currentlength; j++) {
307             sublen[j] = dist;
308           }
309         }
310         bestdist = dist;
311         bestlength = currentlength;
312         if (currentlength >= limit) break;
313       }
314     }
315 
316 
317 #ifdef ZOPFLI_HASH_SAME_HASH
318     /* Switch to the other hash once this will be more efficient. */
319     if (hhead != h->head2 && bestlength >= h->same[hpos] &&
320         h->val2 == h->hashval2[p]) {
321       /* Now use the hash that encodes the length and first byte. */
322       hhead = h->head2;
323       hprev = h->prev2;
324       hhashval = h->hashval2;
325       hval = h->val2;
326     }
327 #endif
328 
329     pp = p;
330     p = hprev[p];
331     if (p == pp) break;  /* Uninited prev value. */
332 
333     dist += p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp);
334 
335 #if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE
336     chain_counter--;
337     if (chain_counter <= 0) break;
338 #endif
339   }
340 
341 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
342   StoreInLongestMatchCache(s, pos, limit, sublen, bestdist, bestlength);
343 #endif
344 
345   assert(bestlength <= limit);
346 
347   *distance = bestdist;
348   *length = bestlength;
349   assert(pos + *length <= size);
350 }
351 
ZopfliLZ77Greedy(ZopfliBlockState * s,const unsigned char * in,size_t instart,size_t inend,ZopfliLZ77Store * store)352 void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
353                       size_t instart, size_t inend,
354                       ZopfliLZ77Store* store) {
355   size_t i = 0, j;
356   unsigned short leng;
357   unsigned short dist;
358   int lengvalue;
359   size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
360       ? instart - ZOPFLI_WINDOW_SIZE : 0;
361   unsigned short dummysublen[259];
362 
363   ZopfliHash hash;
364   ZopfliHash* h = &hash;
365 
366 #ifdef ZOPFLI_LAZY_MATCHING
367   /* Lazy matching. */
368   unsigned prev_length = 0;
369   unsigned prev_match = 0;
370   int prevlengvalue;
371   int match_available = 0;
372 #endif
373 
374   if (instart == inend) return;
375 
376   ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
377   ZopfliWarmupHash(in, windowstart, inend, h);
378   for (i = windowstart; i < instart; i++) {
379     ZopfliUpdateHash(in, i, inend, h);
380   }
381 
382   for (i = instart; i < inend; i++) {
383     ZopfliUpdateHash(in, i, inend, h);
384 
385     ZopfliFindLongestMatch(s, h, in, i, inend, ZOPFLI_MAX_MATCH, dummysublen,
386                            &dist, &leng);
387     lengvalue = GetLengthValue(leng, dist);
388 
389 #ifdef ZOPFLI_LAZY_MATCHING
390     /* Lazy matching. */
391     prevlengvalue = GetLengthValue(prev_length, prev_match);
392     if (match_available) {
393       match_available = 0;
394       if (lengvalue > prevlengvalue + 1) {
395         ZopfliStoreLitLenDist(in[i - 1], 0, store);
396         if (lengvalue >= ZOPFLI_MIN_MATCH && lengvalue < ZOPFLI_MAX_MATCH) {
397           match_available = 1;
398           prev_length = leng;
399           prev_match = dist;
400           continue;
401         }
402       } else {
403         /* Add previous to output. */
404         leng = prev_length;
405         dist = prev_match;
406         lengvalue = prevlengvalue;
407         /* Add to output. */
408         ZopfliVerifyLenDist(in, inend, i - 1, dist, leng);
409         ZopfliStoreLitLenDist(leng, dist, store);
410         for (j = 2; j < leng; j++) {
411           assert(i < inend);
412           i++;
413           ZopfliUpdateHash(in, i, inend, h);
414         }
415         continue;
416       }
417     }
418     else if (lengvalue >= ZOPFLI_MIN_MATCH && leng < ZOPFLI_MAX_MATCH) {
419       match_available = 1;
420       prev_length = leng;
421       prev_match = dist;
422       continue;
423     }
424     /* End of lazy matching. */
425 #endif
426 
427     /* Add to output. */
428     if (lengvalue >= ZOPFLI_MIN_MATCH) {
429       ZopfliVerifyLenDist(in, inend, i, dist, leng);
430       ZopfliStoreLitLenDist(leng, dist, store);
431     } else {
432       leng = 1;
433       ZopfliStoreLitLenDist(in[i], 0, store);
434     }
435     for (j = 1; j < leng; j++) {
436       assert(i < inend);
437       i++;
438       ZopfliUpdateHash(in, i, inend, h);
439     }
440   }
441 
442   ZopfliCleanHash(h);
443 }
444 
ZopfliLZ77Counts(const unsigned short * litlens,const unsigned short * dists,size_t start,size_t end,size_t * ll_count,size_t * d_count)445 void ZopfliLZ77Counts(const unsigned short* litlens,
446                       const unsigned short* dists,
447                       size_t start, size_t end,
448                       size_t* ll_count, size_t* d_count) {
449   size_t i;
450 
451   for (i = 0; i < 288; i++) {
452     ll_count[i] = 0;
453   }
454   for (i = 0; i < 32; i++) {
455     d_count[i] = 0;
456   }
457 
458   for (i = start; i < end; i++) {
459     if (dists[i] == 0) {
460       ll_count[litlens[i]]++;
461     } else {
462       ll_count[ZopfliGetLengthSymbol(litlens[i])]++;
463       d_count[ZopfliGetDistSymbol(dists[i])]++;
464     }
465   }
466 
467   ll_count[256] = 1;  /* End symbol. */
468 }
469