1 /* 2 Copyright 2011 Google Inc. All Rights Reserved. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 16 Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17 Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18 */ 19 20 #include "lz77.h" 21 #include "util.h" 22 23 #include <assert.h> 24 #include <stdio.h> 25 #include <stdlib.h> 26 27 void ZopfliInitLZ77Store(ZopfliLZ77Store* store) { 28 store->size = 0; 29 store->litlens = 0; 30 store->dists = 0; 31 } 32 33 void ZopfliCleanLZ77Store(ZopfliLZ77Store* store) { 34 free(store->litlens); 35 free(store->dists); 36 } 37 38 void ZopfliCopyLZ77Store( 39 const ZopfliLZ77Store* source, ZopfliLZ77Store* dest) { 40 size_t i; 41 ZopfliCleanLZ77Store(dest); 42 dest->litlens = 43 (unsigned short*)malloc(sizeof(*dest->litlens) * source->size); 44 dest->dists = (unsigned short*)malloc(sizeof(*dest->dists) * source->size); 45 46 if (!dest->litlens || !dest->dists) exit(-1); /* Allocation failed. */ 47 48 dest->size = source->size; 49 for (i = 0; i < source->size; i++) { 50 dest->litlens[i] = source->litlens[i]; 51 dest->dists[i] = source->dists[i]; 52 } 53 } 54 55 /* 56 Appends the length and distance to the LZ77 arrays of the ZopfliLZ77Store. 57 context must be a ZopfliLZ77Store*. 58 */ 59 void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist, 60 ZopfliLZ77Store* store) { 61 size_t size2 = store->size; /* Needed for using ZOPFLI_APPEND_DATA twice. */ 62 ZOPFLI_APPEND_DATA(length, &store->litlens, &store->size); 63 ZOPFLI_APPEND_DATA(dist, &store->dists, &size2); 64 } 65 66 /* 67 Gets the value of the length given the distance. Typically, the value of the 68 length is the length, but if the distance is very long, decrease the value of 69 the length a bit to make up for the fact that long distances use large amounts 70 of extra bits. 71 */ 72 static int GetLengthValue(int length, int distance) { 73 /* 74 At distance > 1024, using length 3 is no longer good, due to the large amount 75 of extra bits for the distance code. distance > 1024 uses 9+ extra bits, and 76 this seems to be the sweet spot. 77 */ 78 return distance > 1024 ? length - 1 : length; 79 } 80 81 void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos, 82 unsigned short dist, unsigned short length) { 83 84 /* TODO(lode): make this only run in a debug compile, it's for assert only. */ 85 size_t i; 86 87 assert(pos + length <= datasize); 88 for (i = 0; i < length; i++) { 89 if (data[pos - dist + i] != data[pos + i]) { 90 assert(data[pos - dist + i] == data[pos + i]); 91 break; 92 } 93 } 94 } 95 96 /* 97 Finds how long the match of scan and match is. Can be used to find how many 98 bytes starting from scan, and from match, are equal. Returns the last byte 99 after scan, which is still equal to the correspondinb byte after match. 100 scan is the position to compare 101 match is the earlier position to compare. 102 end is the last possible byte, beyond which to stop looking. 103 safe_end is a few (8) bytes before end, for comparing multiple bytes at once. 104 */ 105 static const unsigned char* GetMatch(const unsigned char* scan, 106 const unsigned char* match, 107 const unsigned char* end, 108 const unsigned char* safe_end) { 109 110 if (sizeof(size_t) == 8) { 111 /* 8 checks at once per array bounds check (size_t is 64-bit). */ 112 while (scan < safe_end && *((size_t*)scan) == *((size_t*)match)) { 113 scan += 8; 114 match += 8; 115 } 116 } else if (sizeof(unsigned int) == 4) { 117 /* 4 checks at once per array bounds check (unsigned int is 32-bit). */ 118 while (scan < safe_end 119 && *((unsigned int*)scan) == *((unsigned int*)match)) { 120 scan += 4; 121 match += 4; 122 } 123 } else { 124 /* do 8 checks at once per array bounds check. */ 125 while (scan < safe_end && *scan == *match && *++scan == *++match 126 && *++scan == *++match && *++scan == *++match 127 && *++scan == *++match && *++scan == *++match 128 && *++scan == *++match && *++scan == *++match) { 129 scan++; match++; 130 } 131 } 132 133 /* The remaining few bytes. */ 134 while (scan != end && *scan == *match) { 135 scan++; match++; 136 } 137 138 return scan; 139 } 140 141 #ifdef ZOPFLI_LONGEST_MATCH_CACHE 142 /* 143 Gets distance, length and sublen values from the cache if possible. 144 Returns 1 if it got the values from the cache, 0 if not. 145 Updates the limit value to a smaller one if possible with more limited 146 information from the cache. 147 */ 148 static int TryGetFromLongestMatchCache(ZopfliBlockState* s, 149 size_t pos, size_t* limit, 150 unsigned short* sublen, unsigned short* distance, unsigned short* length) { 151 /* The LMC cache starts at the beginning of the block rather than the 152 beginning of the whole array. */ 153 size_t lmcpos = pos - s->blockstart; 154 155 /* Length > 0 and dist 0 is invalid combination, which indicates on purpose 156 that this cache value is not filled in yet. */ 157 unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 || 158 s->lmc->dist[lmcpos] != 0); 159 unsigned char limit_ok_for_cache = cache_available && 160 (*limit == ZOPFLI_MAX_MATCH || s->lmc->length[lmcpos] <= *limit || 161 (sublen && ZopfliMaxCachedSublen(s->lmc, 162 lmcpos, s->lmc->length[lmcpos]) >= *limit)); 163 164 if (s->lmc && limit_ok_for_cache && cache_available) { 165 if (!sublen || s->lmc->length[lmcpos] 166 <= ZopfliMaxCachedSublen(s->lmc, lmcpos, s->lmc->length[lmcpos])) { 167 *length = s->lmc->length[lmcpos]; 168 if (*length > *limit) *length = *limit; 169 if (sublen) { 170 ZopfliCacheToSublen(s->lmc, lmcpos, *length, sublen); 171 *distance = sublen[*length]; 172 if (*limit == ZOPFLI_MAX_MATCH && *length >= ZOPFLI_MIN_MATCH) { 173 assert(sublen[*length] == s->lmc->dist[lmcpos]); 174 } 175 } else { 176 *distance = s->lmc->dist[lmcpos]; 177 } 178 return 1; 179 } 180 /* Can't use much of the cache, since the "sublens" need to be calculated, 181 but at least we already know when to stop. */ 182 *limit = s->lmc->length[lmcpos]; 183 } 184 185 return 0; 186 } 187 188 /* 189 Stores the found sublen, distance and length in the longest match cache, if 190 possible. 191 */ 192 static void StoreInLongestMatchCache(ZopfliBlockState* s, 193 size_t pos, size_t limit, 194 const unsigned short* sublen, 195 unsigned short distance, unsigned short length) { 196 /* The LMC cache starts at the beginning of the block rather than the 197 beginning of the whole array. */ 198 size_t lmcpos = pos - s->blockstart; 199 200 /* Length > 0 and dist 0 is invalid combination, which indicates on purpose 201 that this cache value is not filled in yet. */ 202 unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 || 203 s->lmc->dist[lmcpos] != 0); 204 205 if (s->lmc && limit == ZOPFLI_MAX_MATCH && sublen && !cache_available) { 206 assert(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0); 207 s->lmc->dist[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : distance; 208 s->lmc->length[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : length; 209 assert(!(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0)); 210 ZopfliSublenToCache(sublen, lmcpos, length, s->lmc); 211 } 212 } 213 #endif 214 215 void ZopfliFindLongestMatch(ZopfliBlockState* s, const ZopfliHash* h, 216 const unsigned char* array, 217 size_t pos, size_t size, size_t limit, 218 unsigned short* sublen, unsigned short* distance, unsigned short* length) { 219 unsigned short hpos = pos & ZOPFLI_WINDOW_MASK, p, pp; 220 unsigned short bestdist = 0; 221 unsigned short bestlength = 1; 222 const unsigned char* scan; 223 const unsigned char* match; 224 const unsigned char* arrayend; 225 const unsigned char* arrayend_safe; 226 #if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE 227 int chain_counter = ZOPFLI_MAX_CHAIN_HITS; /* For quitting early. */ 228 #endif 229 230 unsigned dist = 0; /* Not unsigned short on purpose. */ 231 232 int* hhead = h->head; 233 unsigned short* hprev = h->prev; 234 int* hhashval = h->hashval; 235 int hval = h->val; 236 237 #ifdef ZOPFLI_LONGEST_MATCH_CACHE 238 if (TryGetFromLongestMatchCache(s, pos, &limit, sublen, distance, length)) { 239 assert(pos + *length <= size); 240 return; 241 } 242 #endif 243 244 assert(limit <= ZOPFLI_MAX_MATCH); 245 assert(limit >= ZOPFLI_MIN_MATCH); 246 assert(pos < size); 247 248 if (size - pos < ZOPFLI_MIN_MATCH) { 249 /* The rest of the code assumes there are at least ZOPFLI_MIN_MATCH bytes to 250 try. */ 251 *length = 0; 252 *distance = 0; 253 return; 254 } 255 256 if (pos + limit > size) { 257 limit = size - pos; 258 } 259 arrayend = &array[pos] + limit; 260 arrayend_safe = arrayend - 8; 261 262 assert(hval < 65536); 263 264 pp = hhead[hval]; /* During the whole loop, p == hprev[pp]. */ 265 p = hprev[pp]; 266 267 assert(pp == hpos); 268 269 dist = p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp); 270 271 /* Go through all distances. */ 272 while (dist < ZOPFLI_WINDOW_SIZE) { 273 unsigned short currentlength = 0; 274 275 assert(p < ZOPFLI_WINDOW_SIZE); 276 assert(p == hprev[pp]); 277 assert(hhashval[p] == hval); 278 279 if (dist > 0) { 280 assert(pos < size); 281 assert(dist <= pos); 282 scan = &array[pos]; 283 match = &array[pos - dist]; 284 285 /* Testing the byte at position bestlength first, goes slightly faster. */ 286 if (pos + bestlength >= size 287 || *(scan + bestlength) == *(match + bestlength)) { 288 289 #ifdef ZOPFLI_HASH_SAME 290 unsigned short same0 = h->same[pos & ZOPFLI_WINDOW_MASK]; 291 if (same0 > 2 && *scan == *match) { 292 unsigned short same1 = h->same[(pos - dist) & ZOPFLI_WINDOW_MASK]; 293 unsigned short same = same0 < same1 ? same0 : same1; 294 if (same > limit) same = limit; 295 scan += same; 296 match += same; 297 } 298 #endif 299 scan = GetMatch(scan, match, arrayend, arrayend_safe); 300 currentlength = scan - &array[pos]; /* The found length. */ 301 } 302 303 if (currentlength > bestlength) { 304 if (sublen) { 305 unsigned short j; 306 for (j = bestlength + 1; j <= currentlength; j++) { 307 sublen[j] = dist; 308 } 309 } 310 bestdist = dist; 311 bestlength = currentlength; 312 if (currentlength >= limit) break; 313 } 314 } 315 316 317 #ifdef ZOPFLI_HASH_SAME_HASH 318 /* Switch to the other hash once this will be more efficient. */ 319 if (hhead != h->head2 && bestlength >= h->same[hpos] && 320 h->val2 == h->hashval2[p]) { 321 /* Now use the hash that encodes the length and first byte. */ 322 hhead = h->head2; 323 hprev = h->prev2; 324 hhashval = h->hashval2; 325 hval = h->val2; 326 } 327 #endif 328 329 pp = p; 330 p = hprev[p]; 331 if (p == pp) break; /* Uninited prev value. */ 332 333 dist += p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp); 334 335 #if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE 336 chain_counter--; 337 if (chain_counter <= 0) break; 338 #endif 339 } 340 341 #ifdef ZOPFLI_LONGEST_MATCH_CACHE 342 StoreInLongestMatchCache(s, pos, limit, sublen, bestdist, bestlength); 343 #endif 344 345 assert(bestlength <= limit); 346 347 *distance = bestdist; 348 *length = bestlength; 349 assert(pos + *length <= size); 350 } 351 352 void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in, 353 size_t instart, size_t inend, 354 ZopfliLZ77Store* store) { 355 size_t i = 0, j; 356 unsigned short leng; 357 unsigned short dist; 358 int lengvalue; 359 size_t windowstart = instart > ZOPFLI_WINDOW_SIZE 360 ? instart - ZOPFLI_WINDOW_SIZE : 0; 361 unsigned short dummysublen[259]; 362 363 ZopfliHash hash; 364 ZopfliHash* h = &hash; 365 366 #ifdef ZOPFLI_LAZY_MATCHING 367 /* Lazy matching. */ 368 unsigned prev_length = 0; 369 unsigned prev_match = 0; 370 int prevlengvalue; 371 int match_available = 0; 372 #endif 373 374 if (instart == inend) return; 375 376 ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h); 377 ZopfliWarmupHash(in, windowstart, inend, h); 378 for (i = windowstart; i < instart; i++) { 379 ZopfliUpdateHash(in, i, inend, h); 380 } 381 382 for (i = instart; i < inend; i++) { 383 ZopfliUpdateHash(in, i, inend, h); 384 385 ZopfliFindLongestMatch(s, h, in, i, inend, ZOPFLI_MAX_MATCH, dummysublen, 386 &dist, &leng); 387 lengvalue = GetLengthValue(leng, dist); 388 389 #ifdef ZOPFLI_LAZY_MATCHING 390 /* Lazy matching. */ 391 prevlengvalue = GetLengthValue(prev_length, prev_match); 392 if (match_available) { 393 match_available = 0; 394 if (lengvalue > prevlengvalue + 1) { 395 ZopfliStoreLitLenDist(in[i - 1], 0, store); 396 if (lengvalue >= ZOPFLI_MIN_MATCH && lengvalue < ZOPFLI_MAX_MATCH) { 397 match_available = 1; 398 prev_length = leng; 399 prev_match = dist; 400 continue; 401 } 402 } else { 403 /* Add previous to output. */ 404 leng = prev_length; 405 dist = prev_match; 406 lengvalue = prevlengvalue; 407 /* Add to output. */ 408 ZopfliVerifyLenDist(in, inend, i - 1, dist, leng); 409 ZopfliStoreLitLenDist(leng, dist, store); 410 for (j = 2; j < leng; j++) { 411 assert(i < inend); 412 i++; 413 ZopfliUpdateHash(in, i, inend, h); 414 } 415 continue; 416 } 417 } 418 else if (lengvalue >= ZOPFLI_MIN_MATCH && leng < ZOPFLI_MAX_MATCH) { 419 match_available = 1; 420 prev_length = leng; 421 prev_match = dist; 422 continue; 423 } 424 /* End of lazy matching. */ 425 #endif 426 427 /* Add to output. */ 428 if (lengvalue >= ZOPFLI_MIN_MATCH) { 429 ZopfliVerifyLenDist(in, inend, i, dist, leng); 430 ZopfliStoreLitLenDist(leng, dist, store); 431 } else { 432 leng = 1; 433 ZopfliStoreLitLenDist(in[i], 0, store); 434 } 435 for (j = 1; j < leng; j++) { 436 assert(i < inend); 437 i++; 438 ZopfliUpdateHash(in, i, inend, h); 439 } 440 } 441 442 ZopfliCleanHash(h); 443 } 444 445 void ZopfliLZ77Counts(const unsigned short* litlens, 446 const unsigned short* dists, 447 size_t start, size_t end, 448 size_t* ll_count, size_t* d_count) { 449 size_t i; 450 451 for (i = 0; i < 288; i++) { 452 ll_count[i] = 0; 453 } 454 for (i = 0; i < 32; i++) { 455 d_count[i] = 0; 456 } 457 458 for (i = start; i < end; i++) { 459 if (dists[i] == 0) { 460 ll_count[litlens[i]]++; 461 } else { 462 ll_count[ZopfliGetLengthSymbol(litlens[i])]++; 463 d_count[ZopfliGetDistSymbol(dists[i])]++; 464 } 465 } 466 467 ll_count[256] = 1; /* End symbol. */ 468 } 469