1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 #include "zstd_compress_internal.h" 12 #include "zstd_double_fast.h" 13 14 #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR 15 16 static 17 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 18 void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms, 19 void const* end, ZSTD_dictTableLoadMethod_e dtlm) 20 { 21 const ZSTD_compressionParameters* const cParams = &ms->cParams; 22 U32* const hashLarge = ms->hashTable; 23 U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; 24 U32 const mls = cParams->minMatch; 25 U32* const hashSmall = ms->chainTable; 26 U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; 27 const BYTE* const base = ms->window.base; 28 const BYTE* ip = base + ms->nextToUpdate; 29 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; 30 const U32 fastHashFillStep = 3; 31 32 /* Always insert every fastHashFillStep position into the hash tables. 33 * Insert the other positions into the large hash table if their entry 34 * is empty. 35 */ 36 for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { 37 U32 const curr = (U32)(ip - base); 38 U32 i; 39 for (i = 0; i < fastHashFillStep; ++i) { 40 size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls); 41 size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8); 42 if (i == 0) { 43 ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i); 44 } 45 if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { 46 ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i); 47 } 48 /* Only load extra positions for ZSTD_dtlm_full */ 49 if (dtlm == ZSTD_dtlm_fast) 50 break; 51 } } 52 } 53 54 static 55 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 56 void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms, 57 void const* end, ZSTD_dictTableLoadMethod_e dtlm) 58 { 59 const ZSTD_compressionParameters* const cParams = &ms->cParams; 60 U32* const hashLarge = ms->hashTable; 61 U32 const hBitsL = cParams->hashLog; 62 U32 const mls = cParams->minMatch; 63 U32* const hashSmall = ms->chainTable; 64 U32 const hBitsS = cParams->chainLog; 65 const BYTE* const base = ms->window.base; 66 const BYTE* ip = base + ms->nextToUpdate; 67 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; 68 const U32 fastHashFillStep = 3; 69 70 /* Always insert every fastHashFillStep position into the hash tables. 71 * Insert the other positions into the large hash table if their entry 72 * is empty. 73 */ 74 for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { 75 U32 const curr = (U32)(ip - base); 76 U32 i; 77 for (i = 0; i < fastHashFillStep; ++i) { 78 size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); 79 size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); 80 if (i == 0) 81 hashSmall[smHash] = curr + i; 82 if (i == 0 || hashLarge[lgHash] == 0) 83 hashLarge[lgHash] = curr + i; 84 /* Only load extra positions for ZSTD_dtlm_full */ 85 if (dtlm == ZSTD_dtlm_fast) 86 break; 87 } } 88 } 89 90 void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, 91 const void* const end, 92 ZSTD_dictTableLoadMethod_e dtlm, 93 ZSTD_tableFillPurpose_e tfp) 94 { 95 if (tfp == ZSTD_tfp_forCDict) { 96 ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm); 97 } else { 98 ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm); 99 } 100 } 101 102 103 FORCE_INLINE_TEMPLATE 104 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 105 size_t ZSTD_compressBlock_doubleFast_noDict_generic( 106 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 107 void const* src, size_t srcSize, U32 const mls /* template */) 108 { 109 ZSTD_compressionParameters const* cParams = &ms->cParams; 110 U32* const hashLong = ms->hashTable; 111 const U32 hBitsL = cParams->hashLog; 112 U32* const hashSmall = ms->chainTable; 113 const U32 hBitsS = cParams->chainLog; 114 const BYTE* const base = ms->window.base; 115 const BYTE* const istart = (const BYTE*)src; 116 const BYTE* anchor = istart; 117 const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); 118 /* presumes that, if there is a dictionary, it must be using Attach mode */ 119 const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); 120 const BYTE* const prefixLowest = base + prefixLowestIndex; 121 const BYTE* const iend = istart + srcSize; 122 const BYTE* const ilimit = iend - HASH_READ_SIZE; 123 U32 offset_1=rep[0], offset_2=rep[1]; 124 U32 offsetSaved1 = 0, offsetSaved2 = 0; 125 126 size_t mLength; 127 U32 offset; 128 U32 curr; 129 130 /* how many positions to search before increasing step size */ 131 const size_t kStepIncr = 1 << kSearchStrength; 132 /* the position at which to increment the step size if no match is found */ 133 const BYTE* nextStep; 134 size_t step; /* the current step size */ 135 136 size_t hl0; /* the long hash at ip */ 137 size_t hl1; /* the long hash at ip1 */ 138 139 U32 idxl0; /* the long match index for ip */ 140 U32 idxl1; /* the long match index for ip1 */ 141 142 const BYTE* matchl0; /* the long match for ip */ 143 const BYTE* matchs0; /* the short match for ip */ 144 const BYTE* matchl1; /* the long match for ip1 */ 145 146 const BYTE* ip = istart; /* the current position */ 147 const BYTE* ip1; /* the next position */ 148 149 DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic"); 150 151 /* init */ 152 ip += ((ip - prefixLowest) == 0); 153 { 154 U32 const current = (U32)(ip - base); 155 U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); 156 U32 const maxRep = current - windowLow; 157 if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; 158 if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; 159 } 160 161 /* Outer Loop: one iteration per match found and stored */ 162 while (1) { 163 step = 1; 164 nextStep = ip + kStepIncr; 165 ip1 = ip + step; 166 167 if (ip1 > ilimit) { 168 goto _cleanup; 169 } 170 171 hl0 = ZSTD_hashPtr(ip, hBitsL, 8); 172 idxl0 = hashLong[hl0]; 173 matchl0 = base + idxl0; 174 175 /* Inner Loop: one iteration per search / position */ 176 do { 177 const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls); 178 const U32 idxs0 = hashSmall[hs0]; 179 curr = (U32)(ip-base); 180 matchs0 = base + idxs0; 181 182 hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */ 183 184 /* check noDict repcode */ 185 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { 186 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; 187 ip++; 188 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); 189 goto _match_stored; 190 } 191 192 hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); 193 194 if (idxl0 > prefixLowestIndex) { 195 /* check prefix long match */ 196 if (MEM_read64(matchl0) == MEM_read64(ip)) { 197 mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8; 198 offset = (U32)(ip-matchl0); 199 while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ 200 goto _match_found; 201 } 202 } 203 204 idxl1 = hashLong[hl1]; 205 matchl1 = base + idxl1; 206 207 if (idxs0 > prefixLowestIndex) { 208 /* check prefix short match */ 209 if (MEM_read32(matchs0) == MEM_read32(ip)) { 210 goto _search_next_long; 211 } 212 } 213 214 if (ip1 >= nextStep) { 215 PREFETCH_L1(ip1 + 64); 216 PREFETCH_L1(ip1 + 128); 217 step++; 218 nextStep += kStepIncr; 219 } 220 ip = ip1; 221 ip1 += step; 222 223 hl0 = hl1; 224 idxl0 = idxl1; 225 matchl0 = matchl1; 226 #if defined(__aarch64__) 227 PREFETCH_L1(ip+256); 228 #endif 229 } while (ip1 <= ilimit); 230 231 _cleanup: 232 /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), 233 * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ 234 offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; 235 236 /* save reps for next block */ 237 rep[0] = offset_1 ? offset_1 : offsetSaved1; 238 rep[1] = offset_2 ? offset_2 : offsetSaved2; 239 240 /* Return the last literals size */ 241 return (size_t)(iend - anchor); 242 243 _search_next_long: 244 245 /* check prefix long +1 match */ 246 if (idxl1 > prefixLowestIndex) { 247 if (MEM_read64(matchl1) == MEM_read64(ip1)) { 248 ip = ip1; 249 mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8; 250 offset = (U32)(ip-matchl1); 251 while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ 252 goto _match_found; 253 } 254 } 255 256 /* if no long +1 match, explore the short match we found */ 257 mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; 258 offset = (U32)(ip - matchs0); 259 while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ 260 261 /* fall-through */ 262 263 _match_found: /* requires ip, offset, mLength */ 264 offset_2 = offset_1; 265 offset_1 = offset; 266 267 if (step < 4) { 268 /* It is unsafe to write this value back to the hashtable when ip1 is 269 * greater than or equal to the new ip we will have after we're done 270 * processing this match. Rather than perform that test directly 271 * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler 272 * more predictable test. The minmatch even if we take a short match is 273 * 4 bytes, so as long as step, the distance between ip and ip1 274 * (initially) is less than 4, we know ip1 < new ip. */ 275 hashLong[hl1] = (U32)(ip1 - base); 276 } 277 278 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); 279 280 _match_stored: 281 /* match found */ 282 ip += mLength; 283 anchor = ip; 284 285 if (ip <= ilimit) { 286 /* Complementary insertion */ 287 /* done after iLimit test, as candidates could be > iend-8 */ 288 { U32 const indexToInsert = curr+2; 289 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; 290 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); 291 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; 292 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); 293 } 294 295 /* check immediate repcode */ 296 while ( (ip <= ilimit) 297 && ( (offset_2>0) 298 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { 299 /* store sequence */ 300 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; 301 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ 302 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); 303 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); 304 ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength); 305 ip += rLength; 306 anchor = ip; 307 continue; /* faster when present ... (?) */ 308 } 309 } 310 } 311 } 312 313 314 FORCE_INLINE_TEMPLATE 315 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 316 size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( 317 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 318 void const* src, size_t srcSize, 319 U32 const mls /* template */) 320 { 321 ZSTD_compressionParameters const* cParams = &ms->cParams; 322 U32* const hashLong = ms->hashTable; 323 const U32 hBitsL = cParams->hashLog; 324 U32* const hashSmall = ms->chainTable; 325 const U32 hBitsS = cParams->chainLog; 326 const BYTE* const base = ms->window.base; 327 const BYTE* const istart = (const BYTE*)src; 328 const BYTE* ip = istart; 329 const BYTE* anchor = istart; 330 const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); 331 /* presumes that, if there is a dictionary, it must be using Attach mode */ 332 const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); 333 const BYTE* const prefixLowest = base + prefixLowestIndex; 334 const BYTE* const iend = istart + srcSize; 335 const BYTE* const ilimit = iend - HASH_READ_SIZE; 336 U32 offset_1=rep[0], offset_2=rep[1]; 337 338 const ZSTD_matchState_t* const dms = ms->dictMatchState; 339 const ZSTD_compressionParameters* const dictCParams = &dms->cParams; 340 const U32* const dictHashLong = dms->hashTable; 341 const U32* const dictHashSmall = dms->chainTable; 342 const U32 dictStartIndex = dms->window.dictLimit; 343 const BYTE* const dictBase = dms->window.base; 344 const BYTE* const dictStart = dictBase + dictStartIndex; 345 const BYTE* const dictEnd = dms->window.nextSrc; 346 const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase); 347 const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; 348 const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; 349 const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); 350 351 DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic"); 352 353 /* if a dictionary is attached, it must be within window range */ 354 assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); 355 356 if (ms->prefetchCDictTables) { 357 size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); 358 size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32); 359 PREFETCH_AREA(dictHashLong, hashTableBytes); 360 PREFETCH_AREA(dictHashSmall, chainTableBytes); 361 } 362 363 /* init */ 364 ip += (dictAndPrefixLength == 0); 365 366 /* dictMatchState repCode checks don't currently handle repCode == 0 367 * disabling. */ 368 assert(offset_1 <= dictAndPrefixLength); 369 assert(offset_2 <= dictAndPrefixLength); 370 371 /* Main Search Loop */ 372 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ 373 size_t mLength; 374 U32 offset; 375 size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); 376 size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); 377 size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8); 378 size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls); 379 U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS]; 380 U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS]; 381 int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL); 382 int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS); 383 U32 const curr = (U32)(ip-base); 384 U32 const matchIndexL = hashLong[h2]; 385 U32 matchIndexS = hashSmall[h]; 386 const BYTE* matchLong = base + matchIndexL; 387 const BYTE* match = base + matchIndexS; 388 const U32 repIndex = curr + 1 - offset_1; 389 const BYTE* repMatch = (repIndex < prefixLowestIndex) ? 390 dictBase + (repIndex - dictIndexDelta) : 391 base + repIndex; 392 hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ 393 394 /* check repcode */ 395 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) 396 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 397 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; 398 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; 399 ip++; 400 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); 401 goto _match_stored; 402 } 403 404 if (matchIndexL > prefixLowestIndex) { 405 /* check prefix long match */ 406 if (MEM_read64(matchLong) == MEM_read64(ip)) { 407 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; 408 offset = (U32)(ip-matchLong); 409 while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ 410 goto _match_found; 411 } 412 } else if (dictTagsMatchL) { 413 /* check dictMatchState long match */ 414 U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS; 415 const BYTE* dictMatchL = dictBase + dictMatchIndexL; 416 assert(dictMatchL < dictEnd); 417 418 if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { 419 mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; 420 offset = (U32)(curr - dictMatchIndexL - dictIndexDelta); 421 while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ 422 goto _match_found; 423 } } 424 425 if (matchIndexS > prefixLowestIndex) { 426 /* check prefix short match */ 427 if (MEM_read32(match) == MEM_read32(ip)) { 428 goto _search_next_long; 429 } 430 } else if (dictTagsMatchS) { 431 /* check dictMatchState short match */ 432 U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS; 433 match = dictBase + dictMatchIndexS; 434 matchIndexS = dictMatchIndexS + dictIndexDelta; 435 436 if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { 437 goto _search_next_long; 438 } } 439 440 ip += ((ip-anchor) >> kSearchStrength) + 1; 441 #if defined(__aarch64__) 442 PREFETCH_L1(ip+256); 443 #endif 444 continue; 445 446 _search_next_long: 447 { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); 448 size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8); 449 U32 const matchIndexL3 = hashLong[hl3]; 450 U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS]; 451 int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3); 452 const BYTE* matchL3 = base + matchIndexL3; 453 hashLong[hl3] = curr + 1; 454 455 /* check prefix long +1 match */ 456 if (matchIndexL3 > prefixLowestIndex) { 457 if (MEM_read64(matchL3) == MEM_read64(ip+1)) { 458 mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; 459 ip++; 460 offset = (U32)(ip-matchL3); 461 while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ 462 goto _match_found; 463 } 464 } else if (dictTagsMatchL3) { 465 /* check dict long +1 match */ 466 U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS; 467 const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; 468 assert(dictMatchL3 < dictEnd); 469 if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { 470 mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; 471 ip++; 472 offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta); 473 while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ 474 goto _match_found; 475 } } } 476 477 /* if no long +1 match, explore the short match we found */ 478 if (matchIndexS < prefixLowestIndex) { 479 mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; 480 offset = (U32)(curr - matchIndexS); 481 while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 482 } else { 483 mLength = ZSTD_count(ip+4, match+4, iend) + 4; 484 offset = (U32)(ip - match); 485 while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 486 } 487 488 _match_found: 489 offset_2 = offset_1; 490 offset_1 = offset; 491 492 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); 493 494 _match_stored: 495 /* match found */ 496 ip += mLength; 497 anchor = ip; 498 499 if (ip <= ilimit) { 500 /* Complementary insertion */ 501 /* done after iLimit test, as candidates could be > iend-8 */ 502 { U32 const indexToInsert = curr+2; 503 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; 504 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); 505 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; 506 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); 507 } 508 509 /* check immediate repcode */ 510 while (ip <= ilimit) { 511 U32 const current2 = (U32)(ip-base); 512 U32 const repIndex2 = current2 - offset_2; 513 const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? 514 dictBase + repIndex2 - dictIndexDelta : 515 base + repIndex2; 516 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) 517 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 518 const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; 519 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; 520 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ 521 ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); 522 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; 523 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; 524 ip += repLength2; 525 anchor = ip; 526 continue; 527 } 528 break; 529 } 530 } 531 } /* while (ip < ilimit) */ 532 533 /* save reps for next block */ 534 rep[0] = offset_1; 535 rep[1] = offset_2; 536 537 /* Return the last literals size */ 538 return (size_t)(iend - anchor); 539 } 540 541 #define ZSTD_GEN_DFAST_FN(dictMode, mls) \ 542 static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \ 543 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ 544 void const* src, size_t srcSize) \ 545 { \ 546 return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ 547 } 548 549 ZSTD_GEN_DFAST_FN(noDict, 4) 550 ZSTD_GEN_DFAST_FN(noDict, 5) 551 ZSTD_GEN_DFAST_FN(noDict, 6) 552 ZSTD_GEN_DFAST_FN(noDict, 7) 553 554 ZSTD_GEN_DFAST_FN(dictMatchState, 4) 555 ZSTD_GEN_DFAST_FN(dictMatchState, 5) 556 ZSTD_GEN_DFAST_FN(dictMatchState, 6) 557 ZSTD_GEN_DFAST_FN(dictMatchState, 7) 558 559 560 size_t ZSTD_compressBlock_doubleFast( 561 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 562 void const* src, size_t srcSize) 563 { 564 const U32 mls = ms->cParams.minMatch; 565 switch(mls) 566 { 567 default: /* includes case 3 */ 568 case 4 : 569 return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize); 570 case 5 : 571 return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize); 572 case 6 : 573 return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize); 574 case 7 : 575 return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize); 576 } 577 } 578 579 580 size_t ZSTD_compressBlock_doubleFast_dictMatchState( 581 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 582 void const* src, size_t srcSize) 583 { 584 const U32 mls = ms->cParams.minMatch; 585 switch(mls) 586 { 587 default: /* includes case 3 */ 588 case 4 : 589 return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize); 590 case 5 : 591 return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize); 592 case 6 : 593 return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize); 594 case 7 : 595 return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize); 596 } 597 } 598 599 600 static 601 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 602 size_t ZSTD_compressBlock_doubleFast_extDict_generic( 603 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 604 void const* src, size_t srcSize, 605 U32 const mls /* template */) 606 { 607 ZSTD_compressionParameters const* cParams = &ms->cParams; 608 U32* const hashLong = ms->hashTable; 609 U32 const hBitsL = cParams->hashLog; 610 U32* const hashSmall = ms->chainTable; 611 U32 const hBitsS = cParams->chainLog; 612 const BYTE* const istart = (const BYTE*)src; 613 const BYTE* ip = istart; 614 const BYTE* anchor = istart; 615 const BYTE* const iend = istart + srcSize; 616 const BYTE* const ilimit = iend - 8; 617 const BYTE* const base = ms->window.base; 618 const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); 619 const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); 620 const U32 dictStartIndex = lowLimit; 621 const U32 dictLimit = ms->window.dictLimit; 622 const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; 623 const BYTE* const prefixStart = base + prefixStartIndex; 624 const BYTE* const dictBase = ms->window.dictBase; 625 const BYTE* const dictStart = dictBase + dictStartIndex; 626 const BYTE* const dictEnd = dictBase + prefixStartIndex; 627 U32 offset_1=rep[0], offset_2=rep[1]; 628 629 DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); 630 631 /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ 632 if (prefixStartIndex == dictStartIndex) 633 return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize); 634 635 /* Search Loop */ 636 while (ip < ilimit) { /* < instead of <=, because (ip+1) */ 637 const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); 638 const U32 matchIndex = hashSmall[hSmall]; 639 const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; 640 const BYTE* match = matchBase + matchIndex; 641 642 const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); 643 const U32 matchLongIndex = hashLong[hLong]; 644 const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; 645 const BYTE* matchLong = matchLongBase + matchLongIndex; 646 647 const U32 curr = (U32)(ip-base); 648 const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ 649 const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; 650 const BYTE* const repMatch = repBase + repIndex; 651 size_t mLength; 652 hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ 653 654 if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ 655 & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */ 656 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 657 const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; 658 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; 659 ip++; 660 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); 661 } else { 662 if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { 663 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; 664 const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; 665 U32 offset; 666 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; 667 offset = curr - matchLongIndex; 668 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ 669 offset_2 = offset_1; 670 offset_1 = offset; 671 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); 672 673 } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { 674 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); 675 U32 const matchIndex3 = hashLong[h3]; 676 const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; 677 const BYTE* match3 = match3Base + matchIndex3; 678 U32 offset; 679 hashLong[h3] = curr + 1; 680 if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { 681 const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; 682 const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; 683 mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; 684 ip++; 685 offset = curr+1 - matchIndex3; 686 while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ 687 } else { 688 const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; 689 const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; 690 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; 691 offset = curr - matchIndex; 692 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 693 } 694 offset_2 = offset_1; 695 offset_1 = offset; 696 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); 697 698 } else { 699 ip += ((ip-anchor) >> kSearchStrength) + 1; 700 continue; 701 } } 702 703 /* move to next sequence start */ 704 ip += mLength; 705 anchor = ip; 706 707 if (ip <= ilimit) { 708 /* Complementary insertion */ 709 /* done after iLimit test, as candidates could be > iend-8 */ 710 { U32 const indexToInsert = curr+2; 711 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; 712 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); 713 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; 714 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); 715 } 716 717 /* check immediate repcode */ 718 while (ip <= ilimit) { 719 U32 const current2 = (U32)(ip-base); 720 U32 const repIndex2 = current2 - offset_2; 721 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; 722 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ 723 & (offset_2 <= current2 - dictStartIndex)) 724 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 725 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; 726 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; 727 U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ 728 ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); 729 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; 730 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; 731 ip += repLength2; 732 anchor = ip; 733 continue; 734 } 735 break; 736 } } } 737 738 /* save reps for next block */ 739 rep[0] = offset_1; 740 rep[1] = offset_2; 741 742 /* Return the last literals size */ 743 return (size_t)(iend - anchor); 744 } 745 746 ZSTD_GEN_DFAST_FN(extDict, 4) 747 ZSTD_GEN_DFAST_FN(extDict, 5) 748 ZSTD_GEN_DFAST_FN(extDict, 6) 749 ZSTD_GEN_DFAST_FN(extDict, 7) 750 751 size_t ZSTD_compressBlock_doubleFast_extDict( 752 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 753 void const* src, size_t srcSize) 754 { 755 U32 const mls = ms->cParams.minMatch; 756 switch(mls) 757 { 758 default: /* includes case 3 */ 759 case 4 : 760 return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize); 761 case 5 : 762 return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize); 763 case 6 : 764 return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize); 765 case 7 : 766 return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize); 767 } 768 } 769 770 #endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */ 771