1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 #include "zstd_compress_internal.h" 12 #include "hist.h" 13 #include "zstd_opt.h" 14 15 #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ 16 || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ 17 || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) 18 19 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ 20 #define ZSTD_MAX_PRICE (1<<30) 21 22 #define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ 23 24 25 /*-************************************* 26 * Price functions for optimal parser 27 ***************************************/ 28 29 #if 0 /* approximation at bit level (for tests) */ 30 # define BITCOST_ACCURACY 0 31 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) 32 # define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat)) 33 #elif 0 /* fractional bit accuracy (for tests) */ 34 # define BITCOST_ACCURACY 8 35 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) 36 # define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat)) 37 #else /* opt==approx, ultra==accurate */ 38 # define BITCOST_ACCURACY 8 39 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) 40 # define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) 41 #endif 42 43 /* ZSTD_bitWeight() : 44 * provide estimated "cost" of a stat in full bits only */ 45 MEM_STATIC U32 ZSTD_bitWeight(U32 stat) 46 { 47 return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); 48 } 49 50 /* ZSTD_fracWeight() : 51 * provide fractional-bit "cost" of a stat, 52 * using linear interpolation approximation */ 53 MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) 54 { 55 U32 const stat = rawStat + 1; 56 U32 const hb = ZSTD_highbit32(stat); 57 U32 const BWeight = hb * BITCOST_MULTIPLIER; 58 /* Fweight was meant for "Fractional weight" 59 * but it's effectively a value between 1 and 2 60 * using fixed point arithmetic */ 61 U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; 62 U32 const weight = BWeight + FWeight; 63 assert(hb + BITCOST_ACCURACY < 31); 64 return weight; 65 } 66 67 #if (DEBUGLEVEL>=2) 68 /* debugging function, 69 * @return price in bytes as fractional value 70 * for debug messages only */ 71 MEM_STATIC double ZSTD_fCost(int price) 72 { 73 return (double)price / (BITCOST_MULTIPLIER*8); 74 } 75 #endif 76 77 static int ZSTD_compressedLiterals(optState_t const* const optPtr) 78 { 79 return optPtr->literalCompressionMode != ZSTD_ps_disable; 80 } 81 82 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) 83 { 84 if (ZSTD_compressedLiterals(optPtr)) 85 optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); 86 optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); 87 optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); 88 optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); 89 } 90 91 92 static U32 sum_u32(const unsigned table[], size_t nbElts) 93 { 94 size_t n; 95 U32 total = 0; 96 for (n=0; n<nbElts; n++) { 97 total += table[n]; 98 } 99 return total; 100 } 101 102 typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e; 103 104 static U32 105 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1) 106 { 107 U32 s, sum=0; 108 DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", 109 (unsigned)lastEltIndex+1, (unsigned)shift ); 110 assert(shift < 30); 111 for (s=0; s<lastEltIndex+1; s++) { 112 unsigned const base = base1 ? 1 : (table[s]>0); 113 unsigned const newStat = base + (table[s] >> shift); 114 sum += newStat; 115 table[s] = newStat; 116 } 117 return sum; 118 } 119 120 /* ZSTD_scaleStats() : 121 * reduce all elt frequencies in table if sum too large 122 * return the resulting sum of elements */ 123 static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) 124 { 125 U32 const prevsum = sum_u32(table, lastEltIndex+1); 126 U32 const factor = prevsum >> logTarget; 127 DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget); 128 assert(logTarget < 30); 129 if (factor <= 1) return prevsum; 130 return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed); 131 } 132 133 /* ZSTD_rescaleFreqs() : 134 * if first block (detected by optPtr->litLengthSum == 0) : init statistics 135 * take hints from dictionary if there is one 136 * and init from zero if there is none, 137 * using src for literals stats, and baseline stats for sequence symbols 138 * otherwise downscale existing stats, to be used as seed for next block. 139 */ 140 static void 141 ZSTD_rescaleFreqs(optState_t* const optPtr, 142 const BYTE* const src, size_t const srcSize, 143 int const optLevel) 144 { 145 int const compressedLiterals = ZSTD_compressedLiterals(optPtr); 146 DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); 147 optPtr->priceType = zop_dynamic; 148 149 if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */ 150 151 /* heuristic: use pre-defined stats for too small inputs */ 152 if (srcSize <= ZSTD_PREDEF_THRESHOLD) { 153 DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD); 154 optPtr->priceType = zop_predef; 155 } 156 157 assert(optPtr->symbolCosts != NULL); 158 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { 159 160 /* huffman stats covering the full value set : table presumed generated by dictionary */ 161 optPtr->priceType = zop_dynamic; 162 163 if (compressedLiterals) { 164 /* generate literals statistics from huffman table */ 165 unsigned lit; 166 assert(optPtr->litFreq != NULL); 167 optPtr->litSum = 0; 168 for (lit=0; lit<=MaxLit; lit++) { 169 U32 const scaleLog = 11; /* scale to 2K */ 170 U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit); 171 assert(bitCost <= scaleLog); 172 optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; 173 optPtr->litSum += optPtr->litFreq[lit]; 174 } } 175 176 { unsigned ll; 177 FSE_CState_t llstate; 178 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable); 179 optPtr->litLengthSum = 0; 180 for (ll=0; ll<=MaxLL; ll++) { 181 U32 const scaleLog = 10; /* scale to 1K */ 182 U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); 183 assert(bitCost < scaleLog); 184 optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; 185 optPtr->litLengthSum += optPtr->litLengthFreq[ll]; 186 } } 187 188 { unsigned ml; 189 FSE_CState_t mlstate; 190 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); 191 optPtr->matchLengthSum = 0; 192 for (ml=0; ml<=MaxML; ml++) { 193 U32 const scaleLog = 10; 194 U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); 195 assert(bitCost < scaleLog); 196 optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; 197 optPtr->matchLengthSum += optPtr->matchLengthFreq[ml]; 198 } } 199 200 { unsigned of; 201 FSE_CState_t ofstate; 202 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable); 203 optPtr->offCodeSum = 0; 204 for (of=0; of<=MaxOff; of++) { 205 U32 const scaleLog = 10; 206 U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); 207 assert(bitCost < scaleLog); 208 optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; 209 optPtr->offCodeSum += optPtr->offCodeFreq[of]; 210 } } 211 212 } else { /* first block, no dictionary */ 213 214 assert(optPtr->litFreq != NULL); 215 if (compressedLiterals) { 216 /* base initial cost of literals on direct frequency within src */ 217 unsigned lit = MaxLit; 218 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ 219 optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible); 220 } 221 222 { unsigned const baseLLfreqs[MaxLL+1] = { 223 4, 2, 1, 1, 1, 1, 1, 1, 224 1, 1, 1, 1, 1, 1, 1, 1, 225 1, 1, 1, 1, 1, 1, 1, 1, 226 1, 1, 1, 1, 1, 1, 1, 1, 227 1, 1, 1, 1 228 }; 229 ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); 230 optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1); 231 } 232 233 { unsigned ml; 234 for (ml=0; ml<=MaxML; ml++) 235 optPtr->matchLengthFreq[ml] = 1; 236 } 237 optPtr->matchLengthSum = MaxML+1; 238 239 { unsigned const baseOFCfreqs[MaxOff+1] = { 240 6, 2, 1, 1, 2, 3, 4, 4, 241 4, 3, 2, 1, 1, 1, 1, 1, 242 1, 1, 1, 1, 1, 1, 1, 1, 243 1, 1, 1, 1, 1, 1, 1, 1 244 }; 245 ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); 246 optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1); 247 } 248 249 } 250 251 } else { /* new block : scale down accumulated statistics */ 252 253 if (compressedLiterals) 254 optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12); 255 optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11); 256 optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11); 257 optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11); 258 } 259 260 ZSTD_setBasePrices(optPtr, optLevel); 261 } 262 263 /* ZSTD_rawLiteralsCost() : 264 * price of literals (only) in specified segment (which length can be 0). 265 * does not include price of literalLength symbol */ 266 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, 267 const optState_t* const optPtr, 268 int optLevel) 269 { 270 DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength); 271 if (litLength == 0) return 0; 272 273 if (!ZSTD_compressedLiterals(optPtr)) 274 return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ 275 276 if (optPtr->priceType == zop_predef) 277 return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ 278 279 /* dynamic statistics */ 280 { U32 price = optPtr->litSumBasePrice * litLength; 281 U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER; 282 U32 u; 283 assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER); 284 for (u=0; u < litLength; u++) { 285 U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel); 286 if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax; 287 price -= litPrice; 288 } 289 return price; 290 } 291 } 292 293 /* ZSTD_litLengthPrice() : 294 * cost of literalLength symbol */ 295 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) 296 { 297 assert(litLength <= ZSTD_BLOCKSIZE_MAX); 298 if (optPtr->priceType == zop_predef) 299 return WEIGHT(litLength, optLevel); 300 301 /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX 302 * because it isn't representable in the zstd format. 303 * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. 304 * In such a case, the block would be all literals. 305 */ 306 if (litLength == ZSTD_BLOCKSIZE_MAX) 307 return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel); 308 309 /* dynamic statistics */ 310 { U32 const llCode = ZSTD_LLcode(litLength); 311 return (LL_bits[llCode] * BITCOST_MULTIPLIER) 312 + optPtr->litLengthSumBasePrice 313 - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); 314 } 315 } 316 317 /* ZSTD_getMatchPrice() : 318 * Provides the cost of the match part (offset + matchLength) of a sequence. 319 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. 320 * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq() 321 * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) 322 */ 323 FORCE_INLINE_TEMPLATE U32 324 ZSTD_getMatchPrice(U32 const offBase, 325 U32 const matchLength, 326 const optState_t* const optPtr, 327 int const optLevel) 328 { 329 U32 price; 330 U32 const offCode = ZSTD_highbit32(offBase); 331 U32 const mlBase = matchLength - MINMATCH; 332 assert(matchLength >= MINMATCH); 333 334 if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */ 335 return WEIGHT(mlBase, optLevel) 336 + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */ 337 338 /* dynamic statistics */ 339 price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); 340 if ((optLevel<2) /*static*/ && offCode >= 20) 341 price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */ 342 343 /* match Length */ 344 { U32 const mlCode = ZSTD_MLcode(mlBase); 345 price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel)); 346 } 347 348 price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */ 349 350 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); 351 return price; 352 } 353 354 /* ZSTD_updateStats() : 355 * assumption : literals + litLength <= iend */ 356 static void ZSTD_updateStats(optState_t* const optPtr, 357 U32 litLength, const BYTE* literals, 358 U32 offBase, U32 matchLength) 359 { 360 /* literals */ 361 if (ZSTD_compressedLiterals(optPtr)) { 362 U32 u; 363 for (u=0; u < litLength; u++) 364 optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; 365 optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; 366 } 367 368 /* literal Length */ 369 { U32 const llCode = ZSTD_LLcode(litLength); 370 optPtr->litLengthFreq[llCode]++; 371 optPtr->litLengthSum++; 372 } 373 374 /* offset code : follows storeSeq() numeric representation */ 375 { U32 const offCode = ZSTD_highbit32(offBase); 376 assert(offCode <= MaxOff); 377 optPtr->offCodeFreq[offCode]++; 378 optPtr->offCodeSum++; 379 } 380 381 /* match Length */ 382 { U32 const mlBase = matchLength - MINMATCH; 383 U32 const mlCode = ZSTD_MLcode(mlBase); 384 optPtr->matchLengthFreq[mlCode]++; 385 optPtr->matchLengthSum++; 386 } 387 } 388 389 390 /* ZSTD_readMINMATCH() : 391 * function safe only for comparisons 392 * assumption : memPtr must be at least 4 bytes before end of buffer */ 393 MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) 394 { 395 switch (length) 396 { 397 default : 398 case 4 : return MEM_read32(memPtr); 399 case 3 : if (MEM_isLittleEndian()) 400 return MEM_read32(memPtr)<<8; 401 else 402 return MEM_read32(memPtr)>>8; 403 } 404 } 405 406 407 /* Update hashTable3 up to ip (excluded) 408 Assumption : always within prefix (i.e. not within extDict) */ 409 static 410 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 411 U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms, 412 U32* nextToUpdate3, 413 const BYTE* const ip) 414 { 415 U32* const hashTable3 = ms->hashTable3; 416 U32 const hashLog3 = ms->hashLog3; 417 const BYTE* const base = ms->window.base; 418 U32 idx = *nextToUpdate3; 419 U32 const target = (U32)(ip - base); 420 size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); 421 assert(hashLog3 > 0); 422 423 while(idx < target) { 424 hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; 425 idx++; 426 } 427 428 *nextToUpdate3 = target; 429 return hashTable3[hash3]; 430 } 431 432 433 /*-************************************* 434 * Binary Tree search 435 ***************************************/ 436 /** ZSTD_insertBt1() : add one or multiple positions to tree. 437 * @param ip assumed <= iend-8 . 438 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position 439 * @return : nb of positions added */ 440 static 441 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 442 U32 ZSTD_insertBt1( 443 const ZSTD_matchState_t* ms, 444 const BYTE* const ip, const BYTE* const iend, 445 U32 const target, 446 U32 const mls, const int extDict) 447 { 448 const ZSTD_compressionParameters* const cParams = &ms->cParams; 449 U32* const hashTable = ms->hashTable; 450 U32 const hashLog = cParams->hashLog; 451 size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 452 U32* const bt = ms->chainTable; 453 U32 const btLog = cParams->chainLog - 1; 454 U32 const btMask = (1 << btLog) - 1; 455 U32 matchIndex = hashTable[h]; 456 size_t commonLengthSmaller=0, commonLengthLarger=0; 457 const BYTE* const base = ms->window.base; 458 const BYTE* const dictBase = ms->window.dictBase; 459 const U32 dictLimit = ms->window.dictLimit; 460 const BYTE* const dictEnd = dictBase + dictLimit; 461 const BYTE* const prefixStart = base + dictLimit; 462 const BYTE* match; 463 const U32 curr = (U32)(ip-base); 464 const U32 btLow = btMask >= curr ? 0 : curr - btMask; 465 U32* smallerPtr = bt + 2*(curr&btMask); 466 U32* largerPtr = smallerPtr + 1; 467 U32 dummy32; /* to be nullified at the end */ 468 /* windowLow is based on target because 469 * we only need positions that will be in the window at the end of the tree update. 470 */ 471 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog); 472 U32 matchEndIdx = curr+8+1; 473 size_t bestLength = 8; 474 U32 nbCompares = 1U << cParams->searchLog; 475 #ifdef ZSTD_C_PREDICT 476 U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0); 477 U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1); 478 predictedSmall += (predictedSmall>0); 479 predictedLarge += (predictedLarge>0); 480 #endif /* ZSTD_C_PREDICT */ 481 482 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr); 483 484 assert(curr <= target); 485 assert(ip <= iend-8); /* required for h calculation */ 486 hashTable[h] = curr; /* Update Hash Table */ 487 488 assert(windowLow > 0); 489 for (; nbCompares && (matchIndex >= windowLow); --nbCompares) { 490 U32* const nextPtr = bt + 2*(matchIndex & btMask); 491 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 492 assert(matchIndex < curr); 493 494 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ 495 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ 496 if (matchIndex == predictedSmall) { 497 /* no need to check length, result known */ 498 *smallerPtr = matchIndex; 499 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 500 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ 501 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 502 predictedSmall = predictPtr[1] + (predictPtr[1]>0); 503 continue; 504 } 505 if (matchIndex == predictedLarge) { 506 *largerPtr = matchIndex; 507 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 508 largerPtr = nextPtr; 509 matchIndex = nextPtr[0]; 510 predictedLarge = predictPtr[0] + (predictPtr[0]>0); 511 continue; 512 } 513 #endif 514 515 if (!extDict || (matchIndex+matchLength >= dictLimit)) { 516 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */ 517 match = base + matchIndex; 518 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); 519 } else { 520 match = dictBase + matchIndex; 521 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 522 if (matchIndex+matchLength >= dictLimit) 523 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ 524 } 525 526 if (matchLength > bestLength) { 527 bestLength = matchLength; 528 if (matchLength > matchEndIdx - matchIndex) 529 matchEndIdx = matchIndex + (U32)matchLength; 530 } 531 532 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ 533 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ 534 } 535 536 if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ 537 /* match is smaller than current */ 538 *smallerPtr = matchIndex; /* update smaller idx */ 539 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 540 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ 541 smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ 542 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ 543 } else { 544 /* match is larger than current */ 545 *largerPtr = matchIndex; 546 commonLengthLarger = matchLength; 547 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ 548 largerPtr = nextPtr; 549 matchIndex = nextPtr[0]; 550 } } 551 552 *smallerPtr = *largerPtr = 0; 553 { U32 positions = 0; 554 if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ 555 assert(matchEndIdx > curr + 8); 556 return MAX(positions, matchEndIdx - (curr + 8)); 557 } 558 } 559 560 FORCE_INLINE_TEMPLATE 561 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 562 void ZSTD_updateTree_internal( 563 ZSTD_matchState_t* ms, 564 const BYTE* const ip, const BYTE* const iend, 565 const U32 mls, const ZSTD_dictMode_e dictMode) 566 { 567 const BYTE* const base = ms->window.base; 568 U32 const target = (U32)(ip - base); 569 U32 idx = ms->nextToUpdate; 570 DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", 571 idx, target, dictMode); 572 573 while(idx < target) { 574 U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict); 575 assert(idx < (U32)(idx + forward)); 576 idx += forward; 577 } 578 assert((size_t)(ip - base) <= (size_t)(U32)(-1)); 579 assert((size_t)(iend - base) <= (size_t)(U32)(-1)); 580 ms->nextToUpdate = target; 581 } 582 583 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { 584 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); 585 } 586 587 FORCE_INLINE_TEMPLATE 588 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 589 U32 590 ZSTD_insertBtAndGetAllMatches ( 591 ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ 592 ZSTD_matchState_t* ms, 593 U32* nextToUpdate3, 594 const BYTE* const ip, const BYTE* const iLimit, 595 const ZSTD_dictMode_e dictMode, 596 const U32 rep[ZSTD_REP_NUM], 597 const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ 598 const U32 lengthToBeat, 599 const U32 mls /* template */) 600 { 601 const ZSTD_compressionParameters* const cParams = &ms->cParams; 602 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); 603 const BYTE* const base = ms->window.base; 604 U32 const curr = (U32)(ip-base); 605 U32 const hashLog = cParams->hashLog; 606 U32 const minMatch = (mls==3) ? 3 : 4; 607 U32* const hashTable = ms->hashTable; 608 size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 609 U32 matchIndex = hashTable[h]; 610 U32* const bt = ms->chainTable; 611 U32 const btLog = cParams->chainLog - 1; 612 U32 const btMask= (1U << btLog) - 1; 613 size_t commonLengthSmaller=0, commonLengthLarger=0; 614 const BYTE* const dictBase = ms->window.dictBase; 615 U32 const dictLimit = ms->window.dictLimit; 616 const BYTE* const dictEnd = dictBase + dictLimit; 617 const BYTE* const prefixStart = base + dictLimit; 618 U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; 619 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); 620 U32 const matchLow = windowLow ? windowLow : 1; 621 U32* smallerPtr = bt + 2*(curr&btMask); 622 U32* largerPtr = bt + 2*(curr&btMask) + 1; 623 U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */ 624 U32 dummy32; /* to be nullified at the end */ 625 U32 mnum = 0; 626 U32 nbCompares = 1U << cParams->searchLog; 627 628 const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; 629 const ZSTD_compressionParameters* const dmsCParams = 630 dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; 631 const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; 632 const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; 633 U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0; 634 U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0; 635 U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0; 636 U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog; 637 U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog; 638 U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0; 639 U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit; 640 641 size_t bestLength = lengthToBeat-1; 642 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr); 643 644 /* check repCode */ 645 assert(ll0 <= 1); /* necessarily 1 or 0 */ 646 { U32 const lastR = ZSTD_REP_NUM + ll0; 647 U32 repCode; 648 for (repCode = ll0; repCode < lastR; repCode++) { 649 U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; 650 U32 const repIndex = curr - repOffset; 651 U32 repLen = 0; 652 assert(curr >= dictLimit); 653 if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */ 654 /* We must validate the repcode offset because when we're using a dictionary the 655 * valid offset range shrinks when the dictionary goes out of bounds. 656 */ 657 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { 658 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; 659 } 660 } else { /* repIndex < dictLimit || repIndex >= curr */ 661 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ? 662 dmsBase + repIndex - dmsIndexDelta : 663 dictBase + repIndex; 664 assert(curr >= windowLow); 665 if ( dictMode == ZSTD_extDict 666 && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */ 667 & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) 668 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { 669 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; 670 } 671 if (dictMode == ZSTD_dictMatchState 672 && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */ 673 & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ 674 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { 675 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; 676 } } 677 /* save longer solution */ 678 if (repLen > bestLength) { 679 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", 680 repCode, ll0, repOffset, repLen); 681 bestLength = repLen; 682 matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */ 683 matches[mnum].len = (U32)repLen; 684 mnum++; 685 if ( (repLen > sufficient_len) 686 | (ip+repLen == iLimit) ) { /* best possible */ 687 return mnum; 688 } } } } 689 690 /* HC3 match finder */ 691 if ((mls == 3) /*static*/ && (bestLength < mls)) { 692 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); 693 if ((matchIndex3 >= matchLow) 694 & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { 695 size_t mlen; 696 if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) { 697 const BYTE* const match = base + matchIndex3; 698 mlen = ZSTD_count(ip, match, iLimit); 699 } else { 700 const BYTE* const match = dictBase + matchIndex3; 701 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); 702 } 703 704 /* save best solution */ 705 if (mlen >= mls /* == 3 > bestLength */) { 706 DEBUGLOG(8, "found small match with hlog3, of length %u", 707 (U32)mlen); 708 bestLength = mlen; 709 assert(curr > matchIndex3); 710 assert(mnum==0); /* no prior solution */ 711 matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3); 712 matches[0].len = (U32)mlen; 713 mnum = 1; 714 if ( (mlen > sufficient_len) | 715 (ip+mlen == iLimit) ) { /* best possible length */ 716 ms->nextToUpdate = curr+1; /* skip insertion */ 717 return 1; 718 } } } 719 /* no dictMatchState lookup: dicts don't have a populated HC3 table */ 720 } /* if (mls == 3) */ 721 722 hashTable[h] = curr; /* Update Hash Table */ 723 724 for (; nbCompares && (matchIndex >= matchLow); --nbCompares) { 725 U32* const nextPtr = bt + 2*(matchIndex & btMask); 726 const BYTE* match; 727 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 728 assert(curr > matchIndex); 729 730 if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { 731 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ 732 match = base + matchIndex; 733 if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ 734 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); 735 } else { 736 match = dictBase + matchIndex; 737 assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ 738 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); 739 if (matchIndex+matchLength >= dictLimit) 740 match = base + matchIndex; /* prepare for match[matchLength] read */ 741 } 742 743 if (matchLength > bestLength) { 744 DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)", 745 (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex)); 746 assert(matchEndIdx > matchIndex); 747 if (matchLength > matchEndIdx - matchIndex) 748 matchEndIdx = matchIndex + (U32)matchLength; 749 bestLength = matchLength; 750 matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex); 751 matches[mnum].len = (U32)matchLength; 752 mnum++; 753 if ( (matchLength > ZSTD_OPT_NUM) 754 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { 755 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ 756 break; /* drop, to preserve bt consistency (miss a little bit of compression) */ 757 } } 758 759 if (match[matchLength] < ip[matchLength]) { 760 /* match smaller than current */ 761 *smallerPtr = matchIndex; /* update smaller idx */ 762 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 763 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 764 smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ 765 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ 766 } else { 767 *largerPtr = matchIndex; 768 commonLengthLarger = matchLength; 769 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 770 largerPtr = nextPtr; 771 matchIndex = nextPtr[0]; 772 } } 773 774 *smallerPtr = *largerPtr = 0; 775 776 assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */ 777 if (dictMode == ZSTD_dictMatchState && nbCompares) { 778 size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls); 779 U32 dictMatchIndex = dms->hashTable[dmsH]; 780 const U32* const dmsBt = dms->chainTable; 781 commonLengthSmaller = commonLengthLarger = 0; 782 for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) { 783 const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask); 784 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 785 const BYTE* match = dmsBase + dictMatchIndex; 786 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart); 787 if (dictMatchIndex+matchLength >= dmsHighLimit) 788 match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */ 789 790 if (matchLength > bestLength) { 791 matchIndex = dictMatchIndex + dmsIndexDelta; 792 DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)", 793 (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex)); 794 if (matchLength > matchEndIdx - matchIndex) 795 matchEndIdx = matchIndex + (U32)matchLength; 796 bestLength = matchLength; 797 matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex); 798 matches[mnum].len = (U32)matchLength; 799 mnum++; 800 if ( (matchLength > ZSTD_OPT_NUM) 801 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { 802 break; /* drop, to guarantee consistency (miss a little bit of compression) */ 803 } } 804 805 if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ 806 if (match[matchLength] < ip[matchLength]) { 807 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 808 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 809 } else { 810 /* match is larger than current */ 811 commonLengthLarger = matchLength; 812 dictMatchIndex = nextPtr[0]; 813 } } } /* if (dictMode == ZSTD_dictMatchState) */ 814 815 assert(matchEndIdx > curr+8); 816 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ 817 return mnum; 818 } 819 820 typedef U32 (*ZSTD_getAllMatchesFn)( 821 ZSTD_match_t*, 822 ZSTD_matchState_t*, 823 U32*, 824 const BYTE*, 825 const BYTE*, 826 const U32 rep[ZSTD_REP_NUM], 827 U32 const ll0, 828 U32 const lengthToBeat); 829 830 FORCE_INLINE_TEMPLATE 831 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 832 U32 ZSTD_btGetAllMatches_internal( 833 ZSTD_match_t* matches, 834 ZSTD_matchState_t* ms, 835 U32* nextToUpdate3, 836 const BYTE* ip, 837 const BYTE* const iHighLimit, 838 const U32 rep[ZSTD_REP_NUM], 839 U32 const ll0, 840 U32 const lengthToBeat, 841 const ZSTD_dictMode_e dictMode, 842 const U32 mls) 843 { 844 assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls); 845 DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls); 846 if (ip < ms->window.base + ms->nextToUpdate) 847 return 0; /* skipped area */ 848 ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode); 849 return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls); 850 } 851 852 #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls 853 854 #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \ 855 static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \ 856 ZSTD_match_t* matches, \ 857 ZSTD_matchState_t* ms, \ 858 U32* nextToUpdate3, \ 859 const BYTE* ip, \ 860 const BYTE* const iHighLimit, \ 861 const U32 rep[ZSTD_REP_NUM], \ 862 U32 const ll0, \ 863 U32 const lengthToBeat) \ 864 { \ 865 return ZSTD_btGetAllMatches_internal( \ 866 matches, ms, nextToUpdate3, ip, iHighLimit, \ 867 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \ 868 } 869 870 #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \ 871 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \ 872 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \ 873 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \ 874 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6) 875 876 GEN_ZSTD_BT_GET_ALL_MATCHES(noDict) 877 GEN_ZSTD_BT_GET_ALL_MATCHES(extDict) 878 GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState) 879 880 #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \ 881 { \ 882 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \ 883 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \ 884 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \ 885 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \ 886 } 887 888 static ZSTD_getAllMatchesFn 889 ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode) 890 { 891 ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = { 892 ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict), 893 ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict), 894 ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState) 895 }; 896 U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6); 897 assert((U32)dictMode < 3); 898 assert(mls - 3 < 4); 899 return getAllMatchesFns[(int)dictMode][mls - 3]; 900 } 901 902 /************************* 903 * LDM helper functions * 904 *************************/ 905 906 /* Struct containing info needed to make decision about ldm inclusion */ 907 typedef struct { 908 rawSeqStore_t seqStore; /* External match candidates store for this block */ 909 U32 startPosInBlock; /* Start position of the current match candidate */ 910 U32 endPosInBlock; /* End position of the current match candidate */ 911 U32 offset; /* Offset of the match candidate */ 912 } ZSTD_optLdm_t; 913 914 /* ZSTD_optLdm_skipRawSeqStoreBytes(): 915 * Moves forward in @rawSeqStore by @nbBytes, 916 * which will update the fields 'pos' and 'posInSequence'. 917 */ 918 static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) 919 { 920 U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); 921 while (currPos && rawSeqStore->pos < rawSeqStore->size) { 922 rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; 923 if (currPos >= currSeq.litLength + currSeq.matchLength) { 924 currPos -= currSeq.litLength + currSeq.matchLength; 925 rawSeqStore->pos++; 926 } else { 927 rawSeqStore->posInSequence = currPos; 928 break; 929 } 930 } 931 if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { 932 rawSeqStore->posInSequence = 0; 933 } 934 } 935 936 /* ZSTD_opt_getNextMatchAndUpdateSeqStore(): 937 * Calculates the beginning and end of the next match in the current block. 938 * Updates 'pos' and 'posInSequence' of the ldmSeqStore. 939 */ 940 static void 941 ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock, 942 U32 blockBytesRemaining) 943 { 944 rawSeq currSeq; 945 U32 currBlockEndPos; 946 U32 literalsBytesRemaining; 947 U32 matchBytesRemaining; 948 949 /* Setting match end position to MAX to ensure we never use an LDM during this block */ 950 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { 951 optLdm->startPosInBlock = UINT_MAX; 952 optLdm->endPosInBlock = UINT_MAX; 953 return; 954 } 955 /* Calculate appropriate bytes left in matchLength and litLength 956 * after adjusting based on ldmSeqStore->posInSequence */ 957 currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos]; 958 assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength); 959 currBlockEndPos = currPosInBlock + blockBytesRemaining; 960 literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ? 961 currSeq.litLength - (U32)optLdm->seqStore.posInSequence : 962 0; 963 matchBytesRemaining = (literalsBytesRemaining == 0) ? 964 currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) : 965 currSeq.matchLength; 966 967 /* If there are more literal bytes than bytes remaining in block, no ldm is possible */ 968 if (literalsBytesRemaining >= blockBytesRemaining) { 969 optLdm->startPosInBlock = UINT_MAX; 970 optLdm->endPosInBlock = UINT_MAX; 971 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining); 972 return; 973 } 974 975 /* Matches may be < MINMATCH by this process. In that case, we will reject them 976 when we are deciding whether or not to add the ldm */ 977 optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining; 978 optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining; 979 optLdm->offset = currSeq.offset; 980 981 if (optLdm->endPosInBlock > currBlockEndPos) { 982 /* Match ends after the block ends, we can't use the whole match */ 983 optLdm->endPosInBlock = currBlockEndPos; 984 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock); 985 } else { 986 /* Consume nb of bytes equal to size of sequence left */ 987 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining); 988 } 989 } 990 991 /* ZSTD_optLdm_maybeAddMatch(): 992 * Adds a match if it's long enough, 993 * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock', 994 * into 'matches'. Maintains the correct ordering of 'matches'. 995 */ 996 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, 997 const ZSTD_optLdm_t* optLdm, U32 currPosInBlock) 998 { 999 U32 const posDiff = currPosInBlock - optLdm->startPosInBlock; 1000 /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */ 1001 U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; 1002 1003 /* Ensure that current block position is not outside of the match */ 1004 if (currPosInBlock < optLdm->startPosInBlock 1005 || currPosInBlock >= optLdm->endPosInBlock 1006 || candidateMatchLength < MINMATCH) { 1007 return; 1008 } 1009 1010 if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { 1011 U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset); 1012 DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u", 1013 candidateOffBase, candidateMatchLength, currPosInBlock); 1014 matches[*nbMatches].len = candidateMatchLength; 1015 matches[*nbMatches].off = candidateOffBase; 1016 (*nbMatches)++; 1017 } 1018 } 1019 1020 /* ZSTD_optLdm_processMatchCandidate(): 1021 * Wrapper function to update ldm seq store and call ldm functions as necessary. 1022 */ 1023 static void 1024 ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, 1025 ZSTD_match_t* matches, U32* nbMatches, 1026 U32 currPosInBlock, U32 remainingBytes) 1027 { 1028 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { 1029 return; 1030 } 1031 1032 if (currPosInBlock >= optLdm->endPosInBlock) { 1033 if (currPosInBlock > optLdm->endPosInBlock) { 1034 /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily 1035 * at the end of a match from the ldm seq store, and will often be some bytes 1036 * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots" 1037 */ 1038 U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock; 1039 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot); 1040 } 1041 ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes); 1042 } 1043 ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock); 1044 } 1045 1046 1047 /*-******************************* 1048 * Optimal parser 1049 *********************************/ 1050 1051 #if 0 /* debug */ 1052 1053 static void 1054 listStats(const U32* table, int lastEltID) 1055 { 1056 int const nbElts = lastEltID + 1; 1057 int enb; 1058 for (enb=0; enb < nbElts; enb++) { 1059 (void)table; 1060 /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */ 1061 RAWLOG(2, "%4i,", table[enb]); 1062 } 1063 RAWLOG(2, " \n"); 1064 } 1065 1066 #endif 1067 1068 #define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel) 1069 #define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel) 1070 #define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1)) 1071 1072 FORCE_INLINE_TEMPLATE 1073 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1074 size_t 1075 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, 1076 seqStore_t* seqStore, 1077 U32 rep[ZSTD_REP_NUM], 1078 const void* src, size_t srcSize, 1079 const int optLevel, 1080 const ZSTD_dictMode_e dictMode) 1081 { 1082 optState_t* const optStatePtr = &ms->opt; 1083 const BYTE* const istart = (const BYTE*)src; 1084 const BYTE* ip = istart; 1085 const BYTE* anchor = istart; 1086 const BYTE* const iend = istart + srcSize; 1087 const BYTE* const ilimit = iend - 8; 1088 const BYTE* const base = ms->window.base; 1089 const BYTE* const prefixStart = base + ms->window.dictLimit; 1090 const ZSTD_compressionParameters* const cParams = &ms->cParams; 1091 1092 ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode); 1093 1094 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); 1095 U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; 1096 U32 nextToUpdate3 = ms->nextToUpdate; 1097 1098 ZSTD_optimal_t* const opt = optStatePtr->priceTable; 1099 ZSTD_match_t* const matches = optStatePtr->matchTable; 1100 ZSTD_optimal_t lastStretch; 1101 ZSTD_optLdm_t optLdm; 1102 1103 ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t)); 1104 1105 optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; 1106 optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; 1107 ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip)); 1108 1109 /* init */ 1110 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", 1111 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); 1112 assert(optLevel <= 2); 1113 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); 1114 ip += (ip==prefixStart); 1115 1116 /* Match Loop */ 1117 while (ip < ilimit) { 1118 U32 cur, last_pos = 0; 1119 1120 /* find first match */ 1121 { U32 const litlen = (U32)(ip - anchor); 1122 U32 const ll0 = !litlen; 1123 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch); 1124 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, 1125 (U32)(ip-istart), (U32)(iend-ip)); 1126 if (!nbMatches) { 1127 DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart)); 1128 ip++; 1129 continue; 1130 } 1131 1132 /* Match found: let's store this solution, and eventually find more candidates. 1133 * During this forward pass, @opt is used to store stretches, 1134 * defined as "a match followed by N literals". 1135 * Note how this is different from a Sequence, which is "N literals followed by a match". 1136 * Storing stretches allows us to store different match predecessors 1137 * for each literal position part of a literals run. */ 1138 1139 /* initialize opt[0] */ 1140 opt[0].mlen = 0; /* there are only literals so far */ 1141 opt[0].litlen = litlen; 1142 /* No need to include the actual price of the literals before the first match 1143 * because it is static for the duration of the forward pass, and is included 1144 * in every subsequent price. But, we include the literal length because 1145 * the cost variation of litlen depends on the value of litlen. 1146 */ 1147 opt[0].price = LL_PRICE(litlen); 1148 ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0])); 1149 ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep)); 1150 1151 /* large match -> immediate encoding */ 1152 { U32 const maxML = matches[nbMatches-1].len; 1153 U32 const maxOffBase = matches[nbMatches-1].off; 1154 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series", 1155 nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart)); 1156 1157 if (maxML > sufficient_len) { 1158 lastStretch.litlen = 0; 1159 lastStretch.mlen = maxML; 1160 lastStretch.off = maxOffBase; 1161 DEBUGLOG(6, "large match (%u>%u) => immediate encoding", 1162 maxML, sufficient_len); 1163 cur = 0; 1164 last_pos = maxML; 1165 goto _shortestPath; 1166 } } 1167 1168 /* set prices for first matches starting position == 0 */ 1169 assert(opt[0].price >= 0); 1170 { U32 pos; 1171 U32 matchNb; 1172 for (pos = 1; pos < minMatch; pos++) { 1173 opt[pos].price = ZSTD_MAX_PRICE; 1174 opt[pos].mlen = 0; 1175 opt[pos].litlen = litlen + pos; 1176 } 1177 for (matchNb = 0; matchNb < nbMatches; matchNb++) { 1178 U32 const offBase = matches[matchNb].off; 1179 U32 const end = matches[matchNb].len; 1180 for ( ; pos <= end ; pos++ ) { 1181 int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel); 1182 int const sequencePrice = opt[0].price + matchPrice; 1183 DEBUGLOG(7, "rPos:%u => set initial price : %.2f", 1184 pos, ZSTD_fCost(sequencePrice)); 1185 opt[pos].mlen = pos; 1186 opt[pos].off = offBase; 1187 opt[pos].litlen = 0; /* end of match */ 1188 opt[pos].price = sequencePrice + LL_PRICE(0); 1189 } 1190 } 1191 last_pos = pos-1; 1192 opt[pos].price = ZSTD_MAX_PRICE; 1193 } 1194 } 1195 1196 /* check further positions */ 1197 for (cur = 1; cur <= last_pos; cur++) { 1198 const BYTE* const inr = ip + cur; 1199 assert(cur <= ZSTD_OPT_NUM); 1200 DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur); 1201 1202 /* Fix current position with one literal if cheaper */ 1203 { U32 const litlen = opt[cur-1].litlen + 1; 1204 int const price = opt[cur-1].price 1205 + LIT_PRICE(ip+cur-1) 1206 + LL_INCPRICE(litlen); 1207 assert(price < 1000000000); /* overflow check */ 1208 if (price <= opt[cur].price) { 1209 ZSTD_optimal_t const prevMatch = opt[cur]; 1210 DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", 1211 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, 1212 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); 1213 opt[cur] = opt[cur-1]; 1214 opt[cur].litlen = litlen; 1215 opt[cur].price = price; 1216 if ( (optLevel >= 1) /* additional check only for higher modes */ 1217 && (prevMatch.litlen == 0) /* replace a match */ 1218 && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */ 1219 && LIKELY(ip + cur < iend) 1220 ) { 1221 /* check next position, in case it would be cheaper */ 1222 int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1); 1223 int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1); 1224 DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f", 1225 cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals)); 1226 if ( (with1literal < withMoreLiterals) 1227 && (with1literal < opt[cur+1].price) ) { 1228 /* update offset history - before it disappears */ 1229 U32 const prev = cur - prevMatch.mlen; 1230 repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0); 1231 assert(cur >= prevMatch.mlen); 1232 DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !", 1233 ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals), 1234 newReps.rep[0], newReps.rep[1], newReps.rep[2] ); 1235 opt[cur+1] = prevMatch; /* mlen & offbase */ 1236 ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t)); 1237 opt[cur+1].litlen = 1; 1238 opt[cur+1].price = with1literal; 1239 if (last_pos < cur+1) last_pos = cur+1; 1240 } 1241 } 1242 } else { 1243 DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)", 1244 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price)); 1245 } 1246 } 1247 1248 /* Offset history is not updated during match comparison. 1249 * Do it here, now that the match is selected and confirmed. 1250 */ 1251 ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); 1252 assert(cur >= opt[cur].mlen); 1253 if (opt[cur].litlen == 0) { 1254 /* just finished a match => alter offset history */ 1255 U32 const prev = cur - opt[cur].mlen; 1256 repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0); 1257 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); 1258 } 1259 1260 /* last match must start at a minimum distance of 8 from oend */ 1261 if (inr > ilimit) continue; 1262 1263 if (cur == last_pos) break; 1264 1265 if ( (optLevel==0) /*static_test*/ 1266 && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { 1267 DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1); 1268 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ 1269 } 1270 1271 assert(opt[cur].price >= 0); 1272 { U32 const ll0 = (opt[cur].litlen == 0); 1273 int const previousPrice = opt[cur].price; 1274 int const basePrice = previousPrice + LL_PRICE(0); 1275 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch); 1276 U32 matchNb; 1277 1278 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, 1279 (U32)(inr-istart), (U32)(iend-inr)); 1280 1281 if (!nbMatches) { 1282 DEBUGLOG(7, "rPos:%u : no match found", cur); 1283 continue; 1284 } 1285 1286 { U32 const longestML = matches[nbMatches-1].len; 1287 DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u", 1288 inr-istart, cur, nbMatches, longestML); 1289 1290 if ( (longestML > sufficient_len) 1291 || (cur + longestML >= ZSTD_OPT_NUM) 1292 || (ip + cur + longestML >= iend) ) { 1293 lastStretch.mlen = longestML; 1294 lastStretch.off = matches[nbMatches-1].off; 1295 lastStretch.litlen = 0; 1296 last_pos = cur + longestML; 1297 goto _shortestPath; 1298 } } 1299 1300 /* set prices using matches found at position == cur */ 1301 for (matchNb = 0; matchNb < nbMatches; matchNb++) { 1302 U32 const offset = matches[matchNb].off; 1303 U32 const lastML = matches[matchNb].len; 1304 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; 1305 U32 mlen; 1306 1307 DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u", 1308 matchNb, matches[matchNb].off, lastML, opt[cur].litlen); 1309 1310 for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ 1311 U32 const pos = cur + mlen; 1312 int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); 1313 1314 if ((pos > last_pos) || (price < opt[pos].price)) { 1315 DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", 1316 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); 1317 while (last_pos < pos) { 1318 /* fill empty positions, for future comparisons */ 1319 last_pos++; 1320 opt[last_pos].price = ZSTD_MAX_PRICE; 1321 opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */ 1322 } 1323 opt[pos].mlen = mlen; 1324 opt[pos].off = offset; 1325 opt[pos].litlen = 0; 1326 opt[pos].price = price; 1327 } else { 1328 DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", 1329 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); 1330 if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ 1331 } 1332 } } } 1333 opt[last_pos+1].price = ZSTD_MAX_PRICE; 1334 } /* for (cur = 1; cur <= last_pos; cur++) */ 1335 1336 lastStretch = opt[last_pos]; 1337 assert(cur >= lastStretch.mlen); 1338 cur = last_pos - lastStretch.mlen; 1339 1340 _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ 1341 assert(opt[0].mlen == 0); 1342 assert(last_pos >= lastStretch.mlen); 1343 assert(cur == last_pos - lastStretch.mlen); 1344 1345 if (lastStretch.mlen==0) { 1346 /* no solution : all matches have been converted into literals */ 1347 assert(lastStretch.litlen == (ip - anchor) + last_pos); 1348 ip += last_pos; 1349 continue; 1350 } 1351 assert(lastStretch.off > 0); 1352 1353 /* Update offset history */ 1354 if (lastStretch.litlen == 0) { 1355 /* finishing on a match : update offset history */ 1356 repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0); 1357 ZSTD_memcpy(rep, &reps, sizeof(repcodes_t)); 1358 } else { 1359 ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t)); 1360 assert(cur >= lastStretch.litlen); 1361 cur -= lastStretch.litlen; 1362 } 1363 1364 /* Let's write the shortest path solution. 1365 * It is stored in @opt in reverse order, 1366 * starting from @storeEnd (==cur+2), 1367 * effectively partially @opt overwriting. 1368 * Content is changed too: 1369 * - So far, @opt stored stretches, aka a match followed by literals 1370 * - Now, it will store sequences, aka literals followed by a match 1371 */ 1372 { U32 const storeEnd = cur + 2; 1373 U32 storeStart = storeEnd; 1374 U32 stretchPos = cur; 1375 1376 DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", 1377 last_pos, cur); (void)last_pos; 1378 assert(storeEnd < ZSTD_OPT_SIZE); 1379 DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", 1380 storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off); 1381 if (lastStretch.litlen > 0) { 1382 /* last "sequence" is unfinished: just a bunch of literals */ 1383 opt[storeEnd].litlen = lastStretch.litlen; 1384 opt[storeEnd].mlen = 0; 1385 storeStart = storeEnd-1; 1386 opt[storeStart] = lastStretch; 1387 } { 1388 opt[storeEnd] = lastStretch; /* note: litlen will be fixed */ 1389 storeStart = storeEnd; 1390 } 1391 while (1) { 1392 ZSTD_optimal_t nextStretch = opt[stretchPos]; 1393 opt[storeStart].litlen = nextStretch.litlen; 1394 DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)", 1395 opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off); 1396 if (nextStretch.mlen == 0) { 1397 /* reaching beginning of segment */ 1398 break; 1399 } 1400 storeStart--; 1401 opt[storeStart] = nextStretch; /* note: litlen will be fixed */ 1402 assert(nextStretch.litlen + nextStretch.mlen <= stretchPos); 1403 stretchPos -= nextStretch.litlen + nextStretch.mlen; 1404 } 1405 1406 /* save sequences */ 1407 DEBUGLOG(6, "sending selected sequences into seqStore"); 1408 { U32 storePos; 1409 for (storePos=storeStart; storePos <= storeEnd; storePos++) { 1410 U32 const llen = opt[storePos].litlen; 1411 U32 const mlen = opt[storePos].mlen; 1412 U32 const offBase = opt[storePos].off; 1413 U32 const advance = llen + mlen; 1414 DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", 1415 anchor - istart, (unsigned)llen, (unsigned)mlen); 1416 1417 if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ 1418 assert(storePos == storeEnd); /* must be last sequence */ 1419 ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ 1420 continue; /* will finish */ 1421 } 1422 1423 assert(anchor + llen <= iend); 1424 ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen); 1425 ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen); 1426 anchor += advance; 1427 ip = anchor; 1428 } } 1429 DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]); 1430 1431 /* update all costs */ 1432 ZSTD_setBasePrices(optStatePtr, optLevel); 1433 } 1434 } /* while (ip < ilimit) */ 1435 1436 /* Return the last literals size */ 1437 return (size_t)(iend - anchor); 1438 } 1439 #endif /* build exclusions */ 1440 1441 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR 1442 static size_t ZSTD_compressBlock_opt0( 1443 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1444 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) 1445 { 1446 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode); 1447 } 1448 #endif 1449 1450 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR 1451 static size_t ZSTD_compressBlock_opt2( 1452 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1453 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) 1454 { 1455 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode); 1456 } 1457 #endif 1458 1459 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR 1460 size_t ZSTD_compressBlock_btopt( 1461 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1462 const void* src, size_t srcSize) 1463 { 1464 DEBUGLOG(5, "ZSTD_compressBlock_btopt"); 1465 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict); 1466 } 1467 #endif 1468 1469 1470 1471 1472 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR 1473 /* ZSTD_initStats_ultra(): 1474 * make a first compression pass, just to seed stats with more accurate starting values. 1475 * only works on first block, with no dictionary and no ldm. 1476 * this function cannot error out, its narrow contract must be respected. 1477 */ 1478 static 1479 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1480 void ZSTD_initStats_ultra(ZSTD_matchState_t* ms, 1481 seqStore_t* seqStore, 1482 U32 rep[ZSTD_REP_NUM], 1483 const void* src, size_t srcSize) 1484 { 1485 U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ 1486 ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); 1487 1488 DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); 1489 assert(ms->opt.litLengthSum == 0); /* first block */ 1490 assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ 1491 assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ 1492 assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ 1493 1494 ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/ 1495 1496 /* invalidate first scan from history, only keep entropy stats */ 1497 ZSTD_resetSeqStore(seqStore); 1498 ms->window.base -= srcSize; 1499 ms->window.dictLimit += (U32)srcSize; 1500 ms->window.lowLimit = ms->window.dictLimit; 1501 ms->nextToUpdate = ms->window.dictLimit; 1502 1503 } 1504 1505 size_t ZSTD_compressBlock_btultra( 1506 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1507 const void* src, size_t srcSize) 1508 { 1509 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); 1510 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); 1511 } 1512 1513 size_t ZSTD_compressBlock_btultra2( 1514 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1515 const void* src, size_t srcSize) 1516 { 1517 U32 const curr = (U32)((const BYTE*)src - ms->window.base); 1518 DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); 1519 1520 /* 2-passes strategy: 1521 * this strategy makes a first pass over first block to collect statistics 1522 * in order to seed next round's statistics with it. 1523 * After 1st pass, function forgets history, and starts a new block. 1524 * Consequently, this can only work if no data has been previously loaded in tables, 1525 * aka, no dictionary, no prefix, no ldm preprocessing. 1526 * The compression ratio gain is generally small (~0.5% on first block), 1527 * the cost is 2x cpu time on first block. */ 1528 assert(srcSize <= ZSTD_BLOCKSIZE_MAX); 1529 if ( (ms->opt.litLengthSum==0) /* first block */ 1530 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ 1531 && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ 1532 && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ 1533 && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */ 1534 ) { 1535 ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); 1536 } 1537 1538 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); 1539 } 1540 #endif 1541 1542 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR 1543 size_t ZSTD_compressBlock_btopt_dictMatchState( 1544 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1545 const void* src, size_t srcSize) 1546 { 1547 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); 1548 } 1549 1550 size_t ZSTD_compressBlock_btopt_extDict( 1551 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1552 const void* src, size_t srcSize) 1553 { 1554 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict); 1555 } 1556 #endif 1557 1558 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR 1559 size_t ZSTD_compressBlock_btultra_dictMatchState( 1560 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1561 const void* src, size_t srcSize) 1562 { 1563 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); 1564 } 1565 1566 size_t ZSTD_compressBlock_btultra_extDict( 1567 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1568 const void* src, size_t srcSize) 1569 { 1570 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict); 1571 } 1572 #endif 1573 1574 /* note : no btultra2 variant for extDict nor dictMatchState, 1575 * because btultra2 is not meant to work with dictionaries 1576 * and is only specific for the first block (no prefix) */ 1577