1 /*
2 Copyright 2011 Google Inc. All Rights Reserved.
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15
16 Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 */
19
20 #include "lz77.h"
21 #include "util.h"
22
23 #include <assert.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26
ZopfliInitLZ77Store(ZopfliLZ77Store * store)27 void ZopfliInitLZ77Store(ZopfliLZ77Store* store) {
28 store->size = 0;
29 store->litlens = 0;
30 store->dists = 0;
31 }
32
ZopfliCleanLZ77Store(ZopfliLZ77Store * store)33 void ZopfliCleanLZ77Store(ZopfliLZ77Store* store) {
34 free(store->litlens);
35 free(store->dists);
36 }
37
ZopfliCopyLZ77Store(const ZopfliLZ77Store * source,ZopfliLZ77Store * dest)38 void ZopfliCopyLZ77Store(
39 const ZopfliLZ77Store* source, ZopfliLZ77Store* dest) {
40 size_t i;
41 ZopfliCleanLZ77Store(dest);
42 dest->litlens =
43 (unsigned short*)malloc(sizeof(*dest->litlens) * source->size);
44 dest->dists = (unsigned short*)malloc(sizeof(*dest->dists) * source->size);
45
46 if (!dest->litlens || !dest->dists) exit(-1); /* Allocation failed. */
47
48 dest->size = source->size;
49 for (i = 0; i < source->size; i++) {
50 dest->litlens[i] = source->litlens[i];
51 dest->dists[i] = source->dists[i];
52 }
53 }
54
55 /*
56 Appends the length and distance to the LZ77 arrays of the ZopfliLZ77Store.
57 context must be a ZopfliLZ77Store*.
58 */
ZopfliStoreLitLenDist(unsigned short length,unsigned short dist,ZopfliLZ77Store * store)59 void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
60 ZopfliLZ77Store* store) {
61 size_t size2 = store->size; /* Needed for using ZOPFLI_APPEND_DATA twice. */
62 ZOPFLI_APPEND_DATA(length, &store->litlens, &store->size);
63 ZOPFLI_APPEND_DATA(dist, &store->dists, &size2);
64 }
65
66 /*
67 Gets the value of the length given the distance. Typically, the value of the
68 length is the length, but if the distance is very long, decrease the value of
69 the length a bit to make up for the fact that long distances use large amounts
70 of extra bits.
71 */
GetLengthValue(int length,int distance)72 static int GetLengthValue(int length, int distance) {
73 /*
74 At distance > 1024, using length 3 is no longer good, due to the large amount
75 of extra bits for the distance code. distance > 1024 uses 9+ extra bits, and
76 this seems to be the sweet spot.
77 */
78 return distance > 1024 ? length - 1 : length;
79 }
80
ZopfliVerifyLenDist(const unsigned char * data,size_t datasize,size_t pos,unsigned short dist,unsigned short length)81 void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos,
82 unsigned short dist, unsigned short length) {
83
84 /* TODO(lode): make this only run in a debug compile, it's for assert only. */
85 size_t i;
86
87 assert(pos + length <= datasize);
88 for (i = 0; i < length; i++) {
89 if (data[pos - dist + i] != data[pos + i]) {
90 assert(data[pos - dist + i] == data[pos + i]);
91 break;
92 }
93 }
94 }
95
96 /*
97 Finds how long the match of scan and match is. Can be used to find how many
98 bytes starting from scan, and from match, are equal. Returns the last byte
99 after scan, which is still equal to the correspondinb byte after match.
100 scan is the position to compare
101 match is the earlier position to compare.
102 end is the last possible byte, beyond which to stop looking.
103 safe_end is a few (8) bytes before end, for comparing multiple bytes at once.
104 */
GetMatch(const unsigned char * scan,const unsigned char * match,const unsigned char * end,const unsigned char * safe_end)105 static const unsigned char* GetMatch(const unsigned char* scan,
106 const unsigned char* match,
107 const unsigned char* end,
108 const unsigned char* safe_end) {
109
110 if (sizeof(size_t) == 8) {
111 /* 8 checks at once per array bounds check (size_t is 64-bit). */
112 while (scan < safe_end && *((size_t*)scan) == *((size_t*)match)) {
113 scan += 8;
114 match += 8;
115 }
116 } else if (sizeof(unsigned int) == 4) {
117 /* 4 checks at once per array bounds check (unsigned int is 32-bit). */
118 while (scan < safe_end
119 && *((unsigned int*)scan) == *((unsigned int*)match)) {
120 scan += 4;
121 match += 4;
122 }
123 } else {
124 /* do 8 checks at once per array bounds check. */
125 while (scan < safe_end && *scan == *match && *++scan == *++match
126 && *++scan == *++match && *++scan == *++match
127 && *++scan == *++match && *++scan == *++match
128 && *++scan == *++match && *++scan == *++match) {
129 scan++; match++;
130 }
131 }
132
133 /* The remaining few bytes. */
134 while (scan != end && *scan == *match) {
135 scan++; match++;
136 }
137
138 return scan;
139 }
140
141 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
142 /*
143 Gets distance, length and sublen values from the cache if possible.
144 Returns 1 if it got the values from the cache, 0 if not.
145 Updates the limit value to a smaller one if possible with more limited
146 information from the cache.
147 */
TryGetFromLongestMatchCache(ZopfliBlockState * s,size_t pos,size_t * limit,unsigned short * sublen,unsigned short * distance,unsigned short * length)148 static int TryGetFromLongestMatchCache(ZopfliBlockState* s,
149 size_t pos, size_t* limit,
150 unsigned short* sublen, unsigned short* distance, unsigned short* length) {
151 /* The LMC cache starts at the beginning of the block rather than the
152 beginning of the whole array. */
153 size_t lmcpos = pos - s->blockstart;
154
155 /* Length > 0 and dist 0 is invalid combination, which indicates on purpose
156 that this cache value is not filled in yet. */
157 unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 ||
158 s->lmc->dist[lmcpos] != 0);
159 unsigned char limit_ok_for_cache = cache_available &&
160 (*limit == ZOPFLI_MAX_MATCH || s->lmc->length[lmcpos] <= *limit ||
161 (sublen && ZopfliMaxCachedSublen(s->lmc,
162 lmcpos, s->lmc->length[lmcpos]) >= *limit));
163
164 if (s->lmc && limit_ok_for_cache && cache_available) {
165 if (!sublen || s->lmc->length[lmcpos]
166 <= ZopfliMaxCachedSublen(s->lmc, lmcpos, s->lmc->length[lmcpos])) {
167 *length = s->lmc->length[lmcpos];
168 if (*length > *limit) *length = *limit;
169 if (sublen) {
170 ZopfliCacheToSublen(s->lmc, lmcpos, *length, sublen);
171 *distance = sublen[*length];
172 if (*limit == ZOPFLI_MAX_MATCH && *length >= ZOPFLI_MIN_MATCH) {
173 assert(sublen[*length] == s->lmc->dist[lmcpos]);
174 }
175 } else {
176 *distance = s->lmc->dist[lmcpos];
177 }
178 return 1;
179 }
180 /* Can't use much of the cache, since the "sublens" need to be calculated,
181 but at least we already know when to stop. */
182 *limit = s->lmc->length[lmcpos];
183 }
184
185 return 0;
186 }
187
188 /*
189 Stores the found sublen, distance and length in the longest match cache, if
190 possible.
191 */
StoreInLongestMatchCache(ZopfliBlockState * s,size_t pos,size_t limit,const unsigned short * sublen,unsigned short distance,unsigned short length)192 static void StoreInLongestMatchCache(ZopfliBlockState* s,
193 size_t pos, size_t limit,
194 const unsigned short* sublen,
195 unsigned short distance, unsigned short length) {
196 /* The LMC cache starts at the beginning of the block rather than the
197 beginning of the whole array. */
198 size_t lmcpos = pos - s->blockstart;
199
200 /* Length > 0 and dist 0 is invalid combination, which indicates on purpose
201 that this cache value is not filled in yet. */
202 unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 ||
203 s->lmc->dist[lmcpos] != 0);
204
205 if (s->lmc && limit == ZOPFLI_MAX_MATCH && sublen && !cache_available) {
206 assert(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0);
207 s->lmc->dist[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : distance;
208 s->lmc->length[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : length;
209 assert(!(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0));
210 ZopfliSublenToCache(sublen, lmcpos, length, s->lmc);
211 }
212 }
213 #endif
214
ZopfliFindLongestMatch(ZopfliBlockState * s,const ZopfliHash * h,const unsigned char * array,size_t pos,size_t size,size_t limit,unsigned short * sublen,unsigned short * distance,unsigned short * length)215 void ZopfliFindLongestMatch(ZopfliBlockState* s, const ZopfliHash* h,
216 const unsigned char* array,
217 size_t pos, size_t size, size_t limit,
218 unsigned short* sublen, unsigned short* distance, unsigned short* length) {
219 unsigned short hpos = pos & ZOPFLI_WINDOW_MASK, p, pp;
220 unsigned short bestdist = 0;
221 unsigned short bestlength = 1;
222 const unsigned char* scan;
223 const unsigned char* match;
224 const unsigned char* arrayend;
225 const unsigned char* arrayend_safe;
226 #if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE
227 int chain_counter = ZOPFLI_MAX_CHAIN_HITS; /* For quitting early. */
228 #endif
229
230 unsigned dist = 0; /* Not unsigned short on purpose. */
231
232 int* hhead = h->head;
233 unsigned short* hprev = h->prev;
234 int* hhashval = h->hashval;
235 int hval = h->val;
236
237 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
238 if (TryGetFromLongestMatchCache(s, pos, &limit, sublen, distance, length)) {
239 assert(pos + *length <= size);
240 return;
241 }
242 #endif
243
244 assert(limit <= ZOPFLI_MAX_MATCH);
245 assert(limit >= ZOPFLI_MIN_MATCH);
246 assert(pos < size);
247
248 if (size - pos < ZOPFLI_MIN_MATCH) {
249 /* The rest of the code assumes there are at least ZOPFLI_MIN_MATCH bytes to
250 try. */
251 *length = 0;
252 *distance = 0;
253 return;
254 }
255
256 if (pos + limit > size) {
257 limit = size - pos;
258 }
259 arrayend = &array[pos] + limit;
260 arrayend_safe = arrayend - 8;
261
262 assert(hval < 65536);
263
264 pp = hhead[hval]; /* During the whole loop, p == hprev[pp]. */
265 p = hprev[pp];
266
267 assert(pp == hpos);
268
269 dist = p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp);
270
271 /* Go through all distances. */
272 while (dist < ZOPFLI_WINDOW_SIZE) {
273 unsigned short currentlength = 0;
274
275 assert(p < ZOPFLI_WINDOW_SIZE);
276 assert(p == hprev[pp]);
277 assert(hhashval[p] == hval);
278
279 if (dist > 0) {
280 assert(pos < size);
281 assert(dist <= pos);
282 scan = &array[pos];
283 match = &array[pos - dist];
284
285 /* Testing the byte at position bestlength first, goes slightly faster. */
286 if (pos + bestlength >= size
287 || *(scan + bestlength) == *(match + bestlength)) {
288
289 #ifdef ZOPFLI_HASH_SAME
290 unsigned short same0 = h->same[pos & ZOPFLI_WINDOW_MASK];
291 if (same0 > 2 && *scan == *match) {
292 unsigned short same1 = h->same[(pos - dist) & ZOPFLI_WINDOW_MASK];
293 unsigned short same = same0 < same1 ? same0 : same1;
294 if (same > limit) same = limit;
295 scan += same;
296 match += same;
297 }
298 #endif
299 scan = GetMatch(scan, match, arrayend, arrayend_safe);
300 currentlength = scan - &array[pos]; /* The found length. */
301 }
302
303 if (currentlength > bestlength) {
304 if (sublen) {
305 unsigned short j;
306 for (j = bestlength + 1; j <= currentlength; j++) {
307 sublen[j] = dist;
308 }
309 }
310 bestdist = dist;
311 bestlength = currentlength;
312 if (currentlength >= limit) break;
313 }
314 }
315
316
317 #ifdef ZOPFLI_HASH_SAME_HASH
318 /* Switch to the other hash once this will be more efficient. */
319 if (hhead != h->head2 && bestlength >= h->same[hpos] &&
320 h->val2 == h->hashval2[p]) {
321 /* Now use the hash that encodes the length and first byte. */
322 hhead = h->head2;
323 hprev = h->prev2;
324 hhashval = h->hashval2;
325 hval = h->val2;
326 }
327 #endif
328
329 pp = p;
330 p = hprev[p];
331 if (p == pp) break; /* Uninited prev value. */
332
333 dist += p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp);
334
335 #if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE
336 chain_counter--;
337 if (chain_counter <= 0) break;
338 #endif
339 }
340
341 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
342 StoreInLongestMatchCache(s, pos, limit, sublen, bestdist, bestlength);
343 #endif
344
345 assert(bestlength <= limit);
346
347 *distance = bestdist;
348 *length = bestlength;
349 assert(pos + *length <= size);
350 }
351
ZopfliLZ77Greedy(ZopfliBlockState * s,const unsigned char * in,size_t instart,size_t inend,ZopfliLZ77Store * store)352 void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
353 size_t instart, size_t inend,
354 ZopfliLZ77Store* store) {
355 size_t i = 0, j;
356 unsigned short leng;
357 unsigned short dist;
358 int lengvalue;
359 size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
360 ? instart - ZOPFLI_WINDOW_SIZE : 0;
361 unsigned short dummysublen[259];
362
363 ZopfliHash hash;
364 ZopfliHash* h = &hash;
365
366 #ifdef ZOPFLI_LAZY_MATCHING
367 /* Lazy matching. */
368 unsigned prev_length = 0;
369 unsigned prev_match = 0;
370 int prevlengvalue;
371 int match_available = 0;
372 #endif
373
374 if (instart == inend) return;
375
376 ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
377 ZopfliWarmupHash(in, windowstart, inend, h);
378 for (i = windowstart; i < instart; i++) {
379 ZopfliUpdateHash(in, i, inend, h);
380 }
381
382 for (i = instart; i < inend; i++) {
383 ZopfliUpdateHash(in, i, inend, h);
384
385 ZopfliFindLongestMatch(s, h, in, i, inend, ZOPFLI_MAX_MATCH, dummysublen,
386 &dist, &leng);
387 lengvalue = GetLengthValue(leng, dist);
388
389 #ifdef ZOPFLI_LAZY_MATCHING
390 /* Lazy matching. */
391 prevlengvalue = GetLengthValue(prev_length, prev_match);
392 if (match_available) {
393 match_available = 0;
394 if (lengvalue > prevlengvalue + 1) {
395 ZopfliStoreLitLenDist(in[i - 1], 0, store);
396 if (lengvalue >= ZOPFLI_MIN_MATCH && lengvalue < ZOPFLI_MAX_MATCH) {
397 match_available = 1;
398 prev_length = leng;
399 prev_match = dist;
400 continue;
401 }
402 } else {
403 /* Add previous to output. */
404 leng = prev_length;
405 dist = prev_match;
406 lengvalue = prevlengvalue;
407 /* Add to output. */
408 ZopfliVerifyLenDist(in, inend, i - 1, dist, leng);
409 ZopfliStoreLitLenDist(leng, dist, store);
410 for (j = 2; j < leng; j++) {
411 assert(i < inend);
412 i++;
413 ZopfliUpdateHash(in, i, inend, h);
414 }
415 continue;
416 }
417 }
418 else if (lengvalue >= ZOPFLI_MIN_MATCH && leng < ZOPFLI_MAX_MATCH) {
419 match_available = 1;
420 prev_length = leng;
421 prev_match = dist;
422 continue;
423 }
424 /* End of lazy matching. */
425 #endif
426
427 /* Add to output. */
428 if (lengvalue >= ZOPFLI_MIN_MATCH) {
429 ZopfliVerifyLenDist(in, inend, i, dist, leng);
430 ZopfliStoreLitLenDist(leng, dist, store);
431 } else {
432 leng = 1;
433 ZopfliStoreLitLenDist(in[i], 0, store);
434 }
435 for (j = 1; j < leng; j++) {
436 assert(i < inend);
437 i++;
438 ZopfliUpdateHash(in, i, inend, h);
439 }
440 }
441
442 ZopfliCleanHash(h);
443 }
444
ZopfliLZ77Counts(const unsigned short * litlens,const unsigned short * dists,size_t start,size_t end,size_t * ll_count,size_t * d_count)445 void ZopfliLZ77Counts(const unsigned short* litlens,
446 const unsigned short* dists,
447 size_t start, size_t end,
448 size_t* ll_count, size_t* d_count) {
449 size_t i;
450
451 for (i = 0; i < 288; i++) {
452 ll_count[i] = 0;
453 }
454 for (i = 0; i < 32; i++) {
455 d_count[i] = 0;
456 }
457
458 for (i = start; i < end; i++) {
459 if (dists[i] == 0) {
460 ll_count[litlens[i]]++;
461 } else {
462 ll_count[ZopfliGetLengthSymbol(litlens[i])]++;
463 d_count[ZopfliGetDistSymbol(dists[i])]++;
464 }
465 }
466
467 ll_count[256] = 1; /* End symbol. */
468 }
469