1*3117ece4Schristos /* 2*3117ece4Schristos * Copyright (c) Meta Platforms, Inc. and affiliates. 3*3117ece4Schristos * All rights reserved. 4*3117ece4Schristos * 5*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 6*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 8*3117ece4Schristos * You may select, at your option, one of the above-listed licenses. 9*3117ece4Schristos */ 10*3117ece4Schristos 11*3117ece4Schristos #include "zstd_compress_internal.h" 12*3117ece4Schristos #include "hist.h" 13*3117ece4Schristos #include "zstd_opt.h" 14*3117ece4Schristos 15*3117ece4Schristos #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ 16*3117ece4Schristos || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ 17*3117ece4Schristos || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) 18*3117ece4Schristos 19*3117ece4Schristos #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ 20*3117ece4Schristos #define ZSTD_MAX_PRICE (1<<30) 21*3117ece4Schristos 22*3117ece4Schristos #define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ 23*3117ece4Schristos 24*3117ece4Schristos 25*3117ece4Schristos /*-************************************* 26*3117ece4Schristos * Price functions for optimal parser 27*3117ece4Schristos ***************************************/ 28*3117ece4Schristos 29*3117ece4Schristos #if 0 /* approximation at bit level (for tests) */ 30*3117ece4Schristos # define BITCOST_ACCURACY 0 31*3117ece4Schristos # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) 32*3117ece4Schristos # define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat)) 33*3117ece4Schristos #elif 0 /* fractional bit accuracy (for tests) */ 34*3117ece4Schristos # define BITCOST_ACCURACY 8 35*3117ece4Schristos # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) 36*3117ece4Schristos # define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat)) 37*3117ece4Schristos #else /* opt==approx, ultra==accurate */ 38*3117ece4Schristos # define BITCOST_ACCURACY 8 39*3117ece4Schristos # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) 40*3117ece4Schristos # define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) 41*3117ece4Schristos #endif 42*3117ece4Schristos 43*3117ece4Schristos /* ZSTD_bitWeight() : 44*3117ece4Schristos * provide estimated "cost" of a stat in full bits only */ 45*3117ece4Schristos MEM_STATIC U32 ZSTD_bitWeight(U32 stat) 46*3117ece4Schristos { 47*3117ece4Schristos return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); 48*3117ece4Schristos } 49*3117ece4Schristos 50*3117ece4Schristos /* ZSTD_fracWeight() : 51*3117ece4Schristos * provide fractional-bit "cost" of a stat, 52*3117ece4Schristos * using linear interpolation approximation */ 53*3117ece4Schristos MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) 54*3117ece4Schristos { 55*3117ece4Schristos U32 const stat = rawStat + 1; 56*3117ece4Schristos U32 const hb = ZSTD_highbit32(stat); 57*3117ece4Schristos U32 const BWeight = hb * BITCOST_MULTIPLIER; 58*3117ece4Schristos /* Fweight was meant for "Fractional weight" 59*3117ece4Schristos * but it's effectively a value between 1 and 2 60*3117ece4Schristos * using fixed point arithmetic */ 61*3117ece4Schristos U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; 62*3117ece4Schristos U32 const weight = BWeight + FWeight; 63*3117ece4Schristos assert(hb + BITCOST_ACCURACY < 31); 64*3117ece4Schristos return weight; 65*3117ece4Schristos } 66*3117ece4Schristos 67*3117ece4Schristos #if (DEBUGLEVEL>=2) 68*3117ece4Schristos /* debugging function, 69*3117ece4Schristos * @return price in bytes as fractional value 70*3117ece4Schristos * for debug messages only */ 71*3117ece4Schristos MEM_STATIC double ZSTD_fCost(int price) 72*3117ece4Schristos { 73*3117ece4Schristos return (double)price / (BITCOST_MULTIPLIER*8); 74*3117ece4Schristos } 75*3117ece4Schristos #endif 76*3117ece4Schristos 77*3117ece4Schristos static int ZSTD_compressedLiterals(optState_t const* const optPtr) 78*3117ece4Schristos { 79*3117ece4Schristos return optPtr->literalCompressionMode != ZSTD_ps_disable; 80*3117ece4Schristos } 81*3117ece4Schristos 82*3117ece4Schristos static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) 83*3117ece4Schristos { 84*3117ece4Schristos if (ZSTD_compressedLiterals(optPtr)) 85*3117ece4Schristos optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); 86*3117ece4Schristos optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); 87*3117ece4Schristos optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); 88*3117ece4Schristos optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); 89*3117ece4Schristos } 90*3117ece4Schristos 91*3117ece4Schristos 92*3117ece4Schristos static U32 sum_u32(const unsigned table[], size_t nbElts) 93*3117ece4Schristos { 94*3117ece4Schristos size_t n; 95*3117ece4Schristos U32 total = 0; 96*3117ece4Schristos for (n=0; n<nbElts; n++) { 97*3117ece4Schristos total += table[n]; 98*3117ece4Schristos } 99*3117ece4Schristos return total; 100*3117ece4Schristos } 101*3117ece4Schristos 102*3117ece4Schristos typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e; 103*3117ece4Schristos 104*3117ece4Schristos static U32 105*3117ece4Schristos ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1) 106*3117ece4Schristos { 107*3117ece4Schristos U32 s, sum=0; 108*3117ece4Schristos DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", 109*3117ece4Schristos (unsigned)lastEltIndex+1, (unsigned)shift ); 110*3117ece4Schristos assert(shift < 30); 111*3117ece4Schristos for (s=0; s<lastEltIndex+1; s++) { 112*3117ece4Schristos unsigned const base = base1 ? 1 : (table[s]>0); 113*3117ece4Schristos unsigned const newStat = base + (table[s] >> shift); 114*3117ece4Schristos sum += newStat; 115*3117ece4Schristos table[s] = newStat; 116*3117ece4Schristos } 117*3117ece4Schristos return sum; 118*3117ece4Schristos } 119*3117ece4Schristos 120*3117ece4Schristos /* ZSTD_scaleStats() : 121*3117ece4Schristos * reduce all elt frequencies in table if sum too large 122*3117ece4Schristos * return the resulting sum of elements */ 123*3117ece4Schristos static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) 124*3117ece4Schristos { 125*3117ece4Schristos U32 const prevsum = sum_u32(table, lastEltIndex+1); 126*3117ece4Schristos U32 const factor = prevsum >> logTarget; 127*3117ece4Schristos DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget); 128*3117ece4Schristos assert(logTarget < 30); 129*3117ece4Schristos if (factor <= 1) return prevsum; 130*3117ece4Schristos return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed); 131*3117ece4Schristos } 132*3117ece4Schristos 133*3117ece4Schristos /* ZSTD_rescaleFreqs() : 134*3117ece4Schristos * if first block (detected by optPtr->litLengthSum == 0) : init statistics 135*3117ece4Schristos * take hints from dictionary if there is one 136*3117ece4Schristos * and init from zero if there is none, 137*3117ece4Schristos * using src for literals stats, and baseline stats for sequence symbols 138*3117ece4Schristos * otherwise downscale existing stats, to be used as seed for next block. 139*3117ece4Schristos */ 140*3117ece4Schristos static void 141*3117ece4Schristos ZSTD_rescaleFreqs(optState_t* const optPtr, 142*3117ece4Schristos const BYTE* const src, size_t const srcSize, 143*3117ece4Schristos int const optLevel) 144*3117ece4Schristos { 145*3117ece4Schristos int const compressedLiterals = ZSTD_compressedLiterals(optPtr); 146*3117ece4Schristos DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); 147*3117ece4Schristos optPtr->priceType = zop_dynamic; 148*3117ece4Schristos 149*3117ece4Schristos if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */ 150*3117ece4Schristos 151*3117ece4Schristos /* heuristic: use pre-defined stats for too small inputs */ 152*3117ece4Schristos if (srcSize <= ZSTD_PREDEF_THRESHOLD) { 153*3117ece4Schristos DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD); 154*3117ece4Schristos optPtr->priceType = zop_predef; 155*3117ece4Schristos } 156*3117ece4Schristos 157*3117ece4Schristos assert(optPtr->symbolCosts != NULL); 158*3117ece4Schristos if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { 159*3117ece4Schristos 160*3117ece4Schristos /* huffman stats covering the full value set : table presumed generated by dictionary */ 161*3117ece4Schristos optPtr->priceType = zop_dynamic; 162*3117ece4Schristos 163*3117ece4Schristos if (compressedLiterals) { 164*3117ece4Schristos /* generate literals statistics from huffman table */ 165*3117ece4Schristos unsigned lit; 166*3117ece4Schristos assert(optPtr->litFreq != NULL); 167*3117ece4Schristos optPtr->litSum = 0; 168*3117ece4Schristos for (lit=0; lit<=MaxLit; lit++) { 169*3117ece4Schristos U32 const scaleLog = 11; /* scale to 2K */ 170*3117ece4Schristos U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit); 171*3117ece4Schristos assert(bitCost <= scaleLog); 172*3117ece4Schristos optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; 173*3117ece4Schristos optPtr->litSum += optPtr->litFreq[lit]; 174*3117ece4Schristos } } 175*3117ece4Schristos 176*3117ece4Schristos { unsigned ll; 177*3117ece4Schristos FSE_CState_t llstate; 178*3117ece4Schristos FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable); 179*3117ece4Schristos optPtr->litLengthSum = 0; 180*3117ece4Schristos for (ll=0; ll<=MaxLL; ll++) { 181*3117ece4Schristos U32 const scaleLog = 10; /* scale to 1K */ 182*3117ece4Schristos U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); 183*3117ece4Schristos assert(bitCost < scaleLog); 184*3117ece4Schristos optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; 185*3117ece4Schristos optPtr->litLengthSum += optPtr->litLengthFreq[ll]; 186*3117ece4Schristos } } 187*3117ece4Schristos 188*3117ece4Schristos { unsigned ml; 189*3117ece4Schristos FSE_CState_t mlstate; 190*3117ece4Schristos FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); 191*3117ece4Schristos optPtr->matchLengthSum = 0; 192*3117ece4Schristos for (ml=0; ml<=MaxML; ml++) { 193*3117ece4Schristos U32 const scaleLog = 10; 194*3117ece4Schristos U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); 195*3117ece4Schristos assert(bitCost < scaleLog); 196*3117ece4Schristos optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; 197*3117ece4Schristos optPtr->matchLengthSum += optPtr->matchLengthFreq[ml]; 198*3117ece4Schristos } } 199*3117ece4Schristos 200*3117ece4Schristos { unsigned of; 201*3117ece4Schristos FSE_CState_t ofstate; 202*3117ece4Schristos FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable); 203*3117ece4Schristos optPtr->offCodeSum = 0; 204*3117ece4Schristos for (of=0; of<=MaxOff; of++) { 205*3117ece4Schristos U32 const scaleLog = 10; 206*3117ece4Schristos U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); 207*3117ece4Schristos assert(bitCost < scaleLog); 208*3117ece4Schristos optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; 209*3117ece4Schristos optPtr->offCodeSum += optPtr->offCodeFreq[of]; 210*3117ece4Schristos } } 211*3117ece4Schristos 212*3117ece4Schristos } else { /* first block, no dictionary */ 213*3117ece4Schristos 214*3117ece4Schristos assert(optPtr->litFreq != NULL); 215*3117ece4Schristos if (compressedLiterals) { 216*3117ece4Schristos /* base initial cost of literals on direct frequency within src */ 217*3117ece4Schristos unsigned lit = MaxLit; 218*3117ece4Schristos HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ 219*3117ece4Schristos optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible); 220*3117ece4Schristos } 221*3117ece4Schristos 222*3117ece4Schristos { unsigned const baseLLfreqs[MaxLL+1] = { 223*3117ece4Schristos 4, 2, 1, 1, 1, 1, 1, 1, 224*3117ece4Schristos 1, 1, 1, 1, 1, 1, 1, 1, 225*3117ece4Schristos 1, 1, 1, 1, 1, 1, 1, 1, 226*3117ece4Schristos 1, 1, 1, 1, 1, 1, 1, 1, 227*3117ece4Schristos 1, 1, 1, 1 228*3117ece4Schristos }; 229*3117ece4Schristos ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); 230*3117ece4Schristos optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1); 231*3117ece4Schristos } 232*3117ece4Schristos 233*3117ece4Schristos { unsigned ml; 234*3117ece4Schristos for (ml=0; ml<=MaxML; ml++) 235*3117ece4Schristos optPtr->matchLengthFreq[ml] = 1; 236*3117ece4Schristos } 237*3117ece4Schristos optPtr->matchLengthSum = MaxML+1; 238*3117ece4Schristos 239*3117ece4Schristos { unsigned const baseOFCfreqs[MaxOff+1] = { 240*3117ece4Schristos 6, 2, 1, 1, 2, 3, 4, 4, 241*3117ece4Schristos 4, 3, 2, 1, 1, 1, 1, 1, 242*3117ece4Schristos 1, 1, 1, 1, 1, 1, 1, 1, 243*3117ece4Schristos 1, 1, 1, 1, 1, 1, 1, 1 244*3117ece4Schristos }; 245*3117ece4Schristos ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); 246*3117ece4Schristos optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1); 247*3117ece4Schristos } 248*3117ece4Schristos 249*3117ece4Schristos } 250*3117ece4Schristos 251*3117ece4Schristos } else { /* new block : scale down accumulated statistics */ 252*3117ece4Schristos 253*3117ece4Schristos if (compressedLiterals) 254*3117ece4Schristos optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12); 255*3117ece4Schristos optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11); 256*3117ece4Schristos optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11); 257*3117ece4Schristos optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11); 258*3117ece4Schristos } 259*3117ece4Schristos 260*3117ece4Schristos ZSTD_setBasePrices(optPtr, optLevel); 261*3117ece4Schristos } 262*3117ece4Schristos 263*3117ece4Schristos /* ZSTD_rawLiteralsCost() : 264*3117ece4Schristos * price of literals (only) in specified segment (which length can be 0). 265*3117ece4Schristos * does not include price of literalLength symbol */ 266*3117ece4Schristos static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, 267*3117ece4Schristos const optState_t* const optPtr, 268*3117ece4Schristos int optLevel) 269*3117ece4Schristos { 270*3117ece4Schristos DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength); 271*3117ece4Schristos if (litLength == 0) return 0; 272*3117ece4Schristos 273*3117ece4Schristos if (!ZSTD_compressedLiterals(optPtr)) 274*3117ece4Schristos return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ 275*3117ece4Schristos 276*3117ece4Schristos if (optPtr->priceType == zop_predef) 277*3117ece4Schristos return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ 278*3117ece4Schristos 279*3117ece4Schristos /* dynamic statistics */ 280*3117ece4Schristos { U32 price = optPtr->litSumBasePrice * litLength; 281*3117ece4Schristos U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER; 282*3117ece4Schristos U32 u; 283*3117ece4Schristos assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER); 284*3117ece4Schristos for (u=0; u < litLength; u++) { 285*3117ece4Schristos U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel); 286*3117ece4Schristos if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax; 287*3117ece4Schristos price -= litPrice; 288*3117ece4Schristos } 289*3117ece4Schristos return price; 290*3117ece4Schristos } 291*3117ece4Schristos } 292*3117ece4Schristos 293*3117ece4Schristos /* ZSTD_litLengthPrice() : 294*3117ece4Schristos * cost of literalLength symbol */ 295*3117ece4Schristos static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) 296*3117ece4Schristos { 297*3117ece4Schristos assert(litLength <= ZSTD_BLOCKSIZE_MAX); 298*3117ece4Schristos if (optPtr->priceType == zop_predef) 299*3117ece4Schristos return WEIGHT(litLength, optLevel); 300*3117ece4Schristos 301*3117ece4Schristos /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX 302*3117ece4Schristos * because it isn't representable in the zstd format. 303*3117ece4Schristos * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. 304*3117ece4Schristos * In such a case, the block would be all literals. 305*3117ece4Schristos */ 306*3117ece4Schristos if (litLength == ZSTD_BLOCKSIZE_MAX) 307*3117ece4Schristos return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel); 308*3117ece4Schristos 309*3117ece4Schristos /* dynamic statistics */ 310*3117ece4Schristos { U32 const llCode = ZSTD_LLcode(litLength); 311*3117ece4Schristos return (LL_bits[llCode] * BITCOST_MULTIPLIER) 312*3117ece4Schristos + optPtr->litLengthSumBasePrice 313*3117ece4Schristos - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); 314*3117ece4Schristos } 315*3117ece4Schristos } 316*3117ece4Schristos 317*3117ece4Schristos /* ZSTD_getMatchPrice() : 318*3117ece4Schristos * Provides the cost of the match part (offset + matchLength) of a sequence. 319*3117ece4Schristos * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. 320*3117ece4Schristos * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq() 321*3117ece4Schristos * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) 322*3117ece4Schristos */ 323*3117ece4Schristos FORCE_INLINE_TEMPLATE U32 324*3117ece4Schristos ZSTD_getMatchPrice(U32 const offBase, 325*3117ece4Schristos U32 const matchLength, 326*3117ece4Schristos const optState_t* const optPtr, 327*3117ece4Schristos int const optLevel) 328*3117ece4Schristos { 329*3117ece4Schristos U32 price; 330*3117ece4Schristos U32 const offCode = ZSTD_highbit32(offBase); 331*3117ece4Schristos U32 const mlBase = matchLength - MINMATCH; 332*3117ece4Schristos assert(matchLength >= MINMATCH); 333*3117ece4Schristos 334*3117ece4Schristos if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */ 335*3117ece4Schristos return WEIGHT(mlBase, optLevel) 336*3117ece4Schristos + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */ 337*3117ece4Schristos 338*3117ece4Schristos /* dynamic statistics */ 339*3117ece4Schristos price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); 340*3117ece4Schristos if ((optLevel<2) /*static*/ && offCode >= 20) 341*3117ece4Schristos price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */ 342*3117ece4Schristos 343*3117ece4Schristos /* match Length */ 344*3117ece4Schristos { U32 const mlCode = ZSTD_MLcode(mlBase); 345*3117ece4Schristos price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel)); 346*3117ece4Schristos } 347*3117ece4Schristos 348*3117ece4Schristos price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */ 349*3117ece4Schristos 350*3117ece4Schristos DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); 351*3117ece4Schristos return price; 352*3117ece4Schristos } 353*3117ece4Schristos 354*3117ece4Schristos /* ZSTD_updateStats() : 355*3117ece4Schristos * assumption : literals + litLength <= iend */ 356*3117ece4Schristos static void ZSTD_updateStats(optState_t* const optPtr, 357*3117ece4Schristos U32 litLength, const BYTE* literals, 358*3117ece4Schristos U32 offBase, U32 matchLength) 359*3117ece4Schristos { 360*3117ece4Schristos /* literals */ 361*3117ece4Schristos if (ZSTD_compressedLiterals(optPtr)) { 362*3117ece4Schristos U32 u; 363*3117ece4Schristos for (u=0; u < litLength; u++) 364*3117ece4Schristos optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; 365*3117ece4Schristos optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; 366*3117ece4Schristos } 367*3117ece4Schristos 368*3117ece4Schristos /* literal Length */ 369*3117ece4Schristos { U32 const llCode = ZSTD_LLcode(litLength); 370*3117ece4Schristos optPtr->litLengthFreq[llCode]++; 371*3117ece4Schristos optPtr->litLengthSum++; 372*3117ece4Schristos } 373*3117ece4Schristos 374*3117ece4Schristos /* offset code : follows storeSeq() numeric representation */ 375*3117ece4Schristos { U32 const offCode = ZSTD_highbit32(offBase); 376*3117ece4Schristos assert(offCode <= MaxOff); 377*3117ece4Schristos optPtr->offCodeFreq[offCode]++; 378*3117ece4Schristos optPtr->offCodeSum++; 379*3117ece4Schristos } 380*3117ece4Schristos 381*3117ece4Schristos /* match Length */ 382*3117ece4Schristos { U32 const mlBase = matchLength - MINMATCH; 383*3117ece4Schristos U32 const mlCode = ZSTD_MLcode(mlBase); 384*3117ece4Schristos optPtr->matchLengthFreq[mlCode]++; 385*3117ece4Schristos optPtr->matchLengthSum++; 386*3117ece4Schristos } 387*3117ece4Schristos } 388*3117ece4Schristos 389*3117ece4Schristos 390*3117ece4Schristos /* ZSTD_readMINMATCH() : 391*3117ece4Schristos * function safe only for comparisons 392*3117ece4Schristos * assumption : memPtr must be at least 4 bytes before end of buffer */ 393*3117ece4Schristos MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) 394*3117ece4Schristos { 395*3117ece4Schristos switch (length) 396*3117ece4Schristos { 397*3117ece4Schristos default : 398*3117ece4Schristos case 4 : return MEM_read32(memPtr); 399*3117ece4Schristos case 3 : if (MEM_isLittleEndian()) 400*3117ece4Schristos return MEM_read32(memPtr)<<8; 401*3117ece4Schristos else 402*3117ece4Schristos return MEM_read32(memPtr)>>8; 403*3117ece4Schristos } 404*3117ece4Schristos } 405*3117ece4Schristos 406*3117ece4Schristos 407*3117ece4Schristos /* Update hashTable3 up to ip (excluded) 408*3117ece4Schristos Assumption : always within prefix (i.e. not within extDict) */ 409*3117ece4Schristos static 410*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 411*3117ece4Schristos U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms, 412*3117ece4Schristos U32* nextToUpdate3, 413*3117ece4Schristos const BYTE* const ip) 414*3117ece4Schristos { 415*3117ece4Schristos U32* const hashTable3 = ms->hashTable3; 416*3117ece4Schristos U32 const hashLog3 = ms->hashLog3; 417*3117ece4Schristos const BYTE* const base = ms->window.base; 418*3117ece4Schristos U32 idx = *nextToUpdate3; 419*3117ece4Schristos U32 const target = (U32)(ip - base); 420*3117ece4Schristos size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); 421*3117ece4Schristos assert(hashLog3 > 0); 422*3117ece4Schristos 423*3117ece4Schristos while(idx < target) { 424*3117ece4Schristos hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; 425*3117ece4Schristos idx++; 426*3117ece4Schristos } 427*3117ece4Schristos 428*3117ece4Schristos *nextToUpdate3 = target; 429*3117ece4Schristos return hashTable3[hash3]; 430*3117ece4Schristos } 431*3117ece4Schristos 432*3117ece4Schristos 433*3117ece4Schristos /*-************************************* 434*3117ece4Schristos * Binary Tree search 435*3117ece4Schristos ***************************************/ 436*3117ece4Schristos /** ZSTD_insertBt1() : add one or multiple positions to tree. 437*3117ece4Schristos * @param ip assumed <= iend-8 . 438*3117ece4Schristos * @param target The target of ZSTD_updateTree_internal() - we are filling to this position 439*3117ece4Schristos * @return : nb of positions added */ 440*3117ece4Schristos static 441*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 442*3117ece4Schristos U32 ZSTD_insertBt1( 443*3117ece4Schristos const ZSTD_matchState_t* ms, 444*3117ece4Schristos const BYTE* const ip, const BYTE* const iend, 445*3117ece4Schristos U32 const target, 446*3117ece4Schristos U32 const mls, const int extDict) 447*3117ece4Schristos { 448*3117ece4Schristos const ZSTD_compressionParameters* const cParams = &ms->cParams; 449*3117ece4Schristos U32* const hashTable = ms->hashTable; 450*3117ece4Schristos U32 const hashLog = cParams->hashLog; 451*3117ece4Schristos size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 452*3117ece4Schristos U32* const bt = ms->chainTable; 453*3117ece4Schristos U32 const btLog = cParams->chainLog - 1; 454*3117ece4Schristos U32 const btMask = (1 << btLog) - 1; 455*3117ece4Schristos U32 matchIndex = hashTable[h]; 456*3117ece4Schristos size_t commonLengthSmaller=0, commonLengthLarger=0; 457*3117ece4Schristos const BYTE* const base = ms->window.base; 458*3117ece4Schristos const BYTE* const dictBase = ms->window.dictBase; 459*3117ece4Schristos const U32 dictLimit = ms->window.dictLimit; 460*3117ece4Schristos const BYTE* const dictEnd = dictBase + dictLimit; 461*3117ece4Schristos const BYTE* const prefixStart = base + dictLimit; 462*3117ece4Schristos const BYTE* match; 463*3117ece4Schristos const U32 curr = (U32)(ip-base); 464*3117ece4Schristos const U32 btLow = btMask >= curr ? 0 : curr - btMask; 465*3117ece4Schristos U32* smallerPtr = bt + 2*(curr&btMask); 466*3117ece4Schristos U32* largerPtr = smallerPtr + 1; 467*3117ece4Schristos U32 dummy32; /* to be nullified at the end */ 468*3117ece4Schristos /* windowLow is based on target because 469*3117ece4Schristos * we only need positions that will be in the window at the end of the tree update. 470*3117ece4Schristos */ 471*3117ece4Schristos U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog); 472*3117ece4Schristos U32 matchEndIdx = curr+8+1; 473*3117ece4Schristos size_t bestLength = 8; 474*3117ece4Schristos U32 nbCompares = 1U << cParams->searchLog; 475*3117ece4Schristos #ifdef ZSTD_C_PREDICT 476*3117ece4Schristos U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0); 477*3117ece4Schristos U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1); 478*3117ece4Schristos predictedSmall += (predictedSmall>0); 479*3117ece4Schristos predictedLarge += (predictedLarge>0); 480*3117ece4Schristos #endif /* ZSTD_C_PREDICT */ 481*3117ece4Schristos 482*3117ece4Schristos DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr); 483*3117ece4Schristos 484*3117ece4Schristos assert(curr <= target); 485*3117ece4Schristos assert(ip <= iend-8); /* required for h calculation */ 486*3117ece4Schristos hashTable[h] = curr; /* Update Hash Table */ 487*3117ece4Schristos 488*3117ece4Schristos assert(windowLow > 0); 489*3117ece4Schristos for (; nbCompares && (matchIndex >= windowLow); --nbCompares) { 490*3117ece4Schristos U32* const nextPtr = bt + 2*(matchIndex & btMask); 491*3117ece4Schristos size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 492*3117ece4Schristos assert(matchIndex < curr); 493*3117ece4Schristos 494*3117ece4Schristos #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ 495*3117ece4Schristos const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ 496*3117ece4Schristos if (matchIndex == predictedSmall) { 497*3117ece4Schristos /* no need to check length, result known */ 498*3117ece4Schristos *smallerPtr = matchIndex; 499*3117ece4Schristos if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 500*3117ece4Schristos smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ 501*3117ece4Schristos matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 502*3117ece4Schristos predictedSmall = predictPtr[1] + (predictPtr[1]>0); 503*3117ece4Schristos continue; 504*3117ece4Schristos } 505*3117ece4Schristos if (matchIndex == predictedLarge) { 506*3117ece4Schristos *largerPtr = matchIndex; 507*3117ece4Schristos if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 508*3117ece4Schristos largerPtr = nextPtr; 509*3117ece4Schristos matchIndex = nextPtr[0]; 510*3117ece4Schristos predictedLarge = predictPtr[0] + (predictPtr[0]>0); 511*3117ece4Schristos continue; 512*3117ece4Schristos } 513*3117ece4Schristos #endif 514*3117ece4Schristos 515*3117ece4Schristos if (!extDict || (matchIndex+matchLength >= dictLimit)) { 516*3117ece4Schristos assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */ 517*3117ece4Schristos match = base + matchIndex; 518*3117ece4Schristos matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); 519*3117ece4Schristos } else { 520*3117ece4Schristos match = dictBase + matchIndex; 521*3117ece4Schristos matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 522*3117ece4Schristos if (matchIndex+matchLength >= dictLimit) 523*3117ece4Schristos match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ 524*3117ece4Schristos } 525*3117ece4Schristos 526*3117ece4Schristos if (matchLength > bestLength) { 527*3117ece4Schristos bestLength = matchLength; 528*3117ece4Schristos if (matchLength > matchEndIdx - matchIndex) 529*3117ece4Schristos matchEndIdx = matchIndex + (U32)matchLength; 530*3117ece4Schristos } 531*3117ece4Schristos 532*3117ece4Schristos if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ 533*3117ece4Schristos break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ 534*3117ece4Schristos } 535*3117ece4Schristos 536*3117ece4Schristos if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ 537*3117ece4Schristos /* match is smaller than current */ 538*3117ece4Schristos *smallerPtr = matchIndex; /* update smaller idx */ 539*3117ece4Schristos commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 540*3117ece4Schristos if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ 541*3117ece4Schristos smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ 542*3117ece4Schristos matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ 543*3117ece4Schristos } else { 544*3117ece4Schristos /* match is larger than current */ 545*3117ece4Schristos *largerPtr = matchIndex; 546*3117ece4Schristos commonLengthLarger = matchLength; 547*3117ece4Schristos if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ 548*3117ece4Schristos largerPtr = nextPtr; 549*3117ece4Schristos matchIndex = nextPtr[0]; 550*3117ece4Schristos } } 551*3117ece4Schristos 552*3117ece4Schristos *smallerPtr = *largerPtr = 0; 553*3117ece4Schristos { U32 positions = 0; 554*3117ece4Schristos if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ 555*3117ece4Schristos assert(matchEndIdx > curr + 8); 556*3117ece4Schristos return MAX(positions, matchEndIdx - (curr + 8)); 557*3117ece4Schristos } 558*3117ece4Schristos } 559*3117ece4Schristos 560*3117ece4Schristos FORCE_INLINE_TEMPLATE 561*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 562*3117ece4Schristos void ZSTD_updateTree_internal( 563*3117ece4Schristos ZSTD_matchState_t* ms, 564*3117ece4Schristos const BYTE* const ip, const BYTE* const iend, 565*3117ece4Schristos const U32 mls, const ZSTD_dictMode_e dictMode) 566*3117ece4Schristos { 567*3117ece4Schristos const BYTE* const base = ms->window.base; 568*3117ece4Schristos U32 const target = (U32)(ip - base); 569*3117ece4Schristos U32 idx = ms->nextToUpdate; 570*3117ece4Schristos DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", 571*3117ece4Schristos idx, target, dictMode); 572*3117ece4Schristos 573*3117ece4Schristos while(idx < target) { 574*3117ece4Schristos U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict); 575*3117ece4Schristos assert(idx < (U32)(idx + forward)); 576*3117ece4Schristos idx += forward; 577*3117ece4Schristos } 578*3117ece4Schristos assert((size_t)(ip - base) <= (size_t)(U32)(-1)); 579*3117ece4Schristos assert((size_t)(iend - base) <= (size_t)(U32)(-1)); 580*3117ece4Schristos ms->nextToUpdate = target; 581*3117ece4Schristos } 582*3117ece4Schristos 583*3117ece4Schristos void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { 584*3117ece4Schristos ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); 585*3117ece4Schristos } 586*3117ece4Schristos 587*3117ece4Schristos FORCE_INLINE_TEMPLATE 588*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 589*3117ece4Schristos U32 590*3117ece4Schristos ZSTD_insertBtAndGetAllMatches ( 591*3117ece4Schristos ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ 592*3117ece4Schristos ZSTD_matchState_t* ms, 593*3117ece4Schristos U32* nextToUpdate3, 594*3117ece4Schristos const BYTE* const ip, const BYTE* const iLimit, 595*3117ece4Schristos const ZSTD_dictMode_e dictMode, 596*3117ece4Schristos const U32 rep[ZSTD_REP_NUM], 597*3117ece4Schristos const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ 598*3117ece4Schristos const U32 lengthToBeat, 599*3117ece4Schristos const U32 mls /* template */) 600*3117ece4Schristos { 601*3117ece4Schristos const ZSTD_compressionParameters* const cParams = &ms->cParams; 602*3117ece4Schristos U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); 603*3117ece4Schristos const BYTE* const base = ms->window.base; 604*3117ece4Schristos U32 const curr = (U32)(ip-base); 605*3117ece4Schristos U32 const hashLog = cParams->hashLog; 606*3117ece4Schristos U32 const minMatch = (mls==3) ? 3 : 4; 607*3117ece4Schristos U32* const hashTable = ms->hashTable; 608*3117ece4Schristos size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 609*3117ece4Schristos U32 matchIndex = hashTable[h]; 610*3117ece4Schristos U32* const bt = ms->chainTable; 611*3117ece4Schristos U32 const btLog = cParams->chainLog - 1; 612*3117ece4Schristos U32 const btMask= (1U << btLog) - 1; 613*3117ece4Schristos size_t commonLengthSmaller=0, commonLengthLarger=0; 614*3117ece4Schristos const BYTE* const dictBase = ms->window.dictBase; 615*3117ece4Schristos U32 const dictLimit = ms->window.dictLimit; 616*3117ece4Schristos const BYTE* const dictEnd = dictBase + dictLimit; 617*3117ece4Schristos const BYTE* const prefixStart = base + dictLimit; 618*3117ece4Schristos U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; 619*3117ece4Schristos U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); 620*3117ece4Schristos U32 const matchLow = windowLow ? windowLow : 1; 621*3117ece4Schristos U32* smallerPtr = bt + 2*(curr&btMask); 622*3117ece4Schristos U32* largerPtr = bt + 2*(curr&btMask) + 1; 623*3117ece4Schristos U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */ 624*3117ece4Schristos U32 dummy32; /* to be nullified at the end */ 625*3117ece4Schristos U32 mnum = 0; 626*3117ece4Schristos U32 nbCompares = 1U << cParams->searchLog; 627*3117ece4Schristos 628*3117ece4Schristos const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; 629*3117ece4Schristos const ZSTD_compressionParameters* const dmsCParams = 630*3117ece4Schristos dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; 631*3117ece4Schristos const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; 632*3117ece4Schristos const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; 633*3117ece4Schristos U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0; 634*3117ece4Schristos U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0; 635*3117ece4Schristos U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0; 636*3117ece4Schristos U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog; 637*3117ece4Schristos U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog; 638*3117ece4Schristos U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0; 639*3117ece4Schristos U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit; 640*3117ece4Schristos 641*3117ece4Schristos size_t bestLength = lengthToBeat-1; 642*3117ece4Schristos DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr); 643*3117ece4Schristos 644*3117ece4Schristos /* check repCode */ 645*3117ece4Schristos assert(ll0 <= 1); /* necessarily 1 or 0 */ 646*3117ece4Schristos { U32 const lastR = ZSTD_REP_NUM + ll0; 647*3117ece4Schristos U32 repCode; 648*3117ece4Schristos for (repCode = ll0; repCode < lastR; repCode++) { 649*3117ece4Schristos U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; 650*3117ece4Schristos U32 const repIndex = curr - repOffset; 651*3117ece4Schristos U32 repLen = 0; 652*3117ece4Schristos assert(curr >= dictLimit); 653*3117ece4Schristos if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */ 654*3117ece4Schristos /* We must validate the repcode offset because when we're using a dictionary the 655*3117ece4Schristos * valid offset range shrinks when the dictionary goes out of bounds. 656*3117ece4Schristos */ 657*3117ece4Schristos if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { 658*3117ece4Schristos repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; 659*3117ece4Schristos } 660*3117ece4Schristos } else { /* repIndex < dictLimit || repIndex >= curr */ 661*3117ece4Schristos const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ? 662*3117ece4Schristos dmsBase + repIndex - dmsIndexDelta : 663*3117ece4Schristos dictBase + repIndex; 664*3117ece4Schristos assert(curr >= windowLow); 665*3117ece4Schristos if ( dictMode == ZSTD_extDict 666*3117ece4Schristos && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */ 667*3117ece4Schristos & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) 668*3117ece4Schristos && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { 669*3117ece4Schristos repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; 670*3117ece4Schristos } 671*3117ece4Schristos if (dictMode == ZSTD_dictMatchState 672*3117ece4Schristos && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */ 673*3117ece4Schristos & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ 674*3117ece4Schristos && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { 675*3117ece4Schristos repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; 676*3117ece4Schristos } } 677*3117ece4Schristos /* save longer solution */ 678*3117ece4Schristos if (repLen > bestLength) { 679*3117ece4Schristos DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", 680*3117ece4Schristos repCode, ll0, repOffset, repLen); 681*3117ece4Schristos bestLength = repLen; 682*3117ece4Schristos matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */ 683*3117ece4Schristos matches[mnum].len = (U32)repLen; 684*3117ece4Schristos mnum++; 685*3117ece4Schristos if ( (repLen > sufficient_len) 686*3117ece4Schristos | (ip+repLen == iLimit) ) { /* best possible */ 687*3117ece4Schristos return mnum; 688*3117ece4Schristos } } } } 689*3117ece4Schristos 690*3117ece4Schristos /* HC3 match finder */ 691*3117ece4Schristos if ((mls == 3) /*static*/ && (bestLength < mls)) { 692*3117ece4Schristos U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); 693*3117ece4Schristos if ((matchIndex3 >= matchLow) 694*3117ece4Schristos & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { 695*3117ece4Schristos size_t mlen; 696*3117ece4Schristos if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) { 697*3117ece4Schristos const BYTE* const match = base + matchIndex3; 698*3117ece4Schristos mlen = ZSTD_count(ip, match, iLimit); 699*3117ece4Schristos } else { 700*3117ece4Schristos const BYTE* const match = dictBase + matchIndex3; 701*3117ece4Schristos mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); 702*3117ece4Schristos } 703*3117ece4Schristos 704*3117ece4Schristos /* save best solution */ 705*3117ece4Schristos if (mlen >= mls /* == 3 > bestLength */) { 706*3117ece4Schristos DEBUGLOG(8, "found small match with hlog3, of length %u", 707*3117ece4Schristos (U32)mlen); 708*3117ece4Schristos bestLength = mlen; 709*3117ece4Schristos assert(curr > matchIndex3); 710*3117ece4Schristos assert(mnum==0); /* no prior solution */ 711*3117ece4Schristos matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3); 712*3117ece4Schristos matches[0].len = (U32)mlen; 713*3117ece4Schristos mnum = 1; 714*3117ece4Schristos if ( (mlen > sufficient_len) | 715*3117ece4Schristos (ip+mlen == iLimit) ) { /* best possible length */ 716*3117ece4Schristos ms->nextToUpdate = curr+1; /* skip insertion */ 717*3117ece4Schristos return 1; 718*3117ece4Schristos } } } 719*3117ece4Schristos /* no dictMatchState lookup: dicts don't have a populated HC3 table */ 720*3117ece4Schristos } /* if (mls == 3) */ 721*3117ece4Schristos 722*3117ece4Schristos hashTable[h] = curr; /* Update Hash Table */ 723*3117ece4Schristos 724*3117ece4Schristos for (; nbCompares && (matchIndex >= matchLow); --nbCompares) { 725*3117ece4Schristos U32* const nextPtr = bt + 2*(matchIndex & btMask); 726*3117ece4Schristos const BYTE* match; 727*3117ece4Schristos size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 728*3117ece4Schristos assert(curr > matchIndex); 729*3117ece4Schristos 730*3117ece4Schristos if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { 731*3117ece4Schristos assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ 732*3117ece4Schristos match = base + matchIndex; 733*3117ece4Schristos if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ 734*3117ece4Schristos matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); 735*3117ece4Schristos } else { 736*3117ece4Schristos match = dictBase + matchIndex; 737*3117ece4Schristos assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ 738*3117ece4Schristos matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); 739*3117ece4Schristos if (matchIndex+matchLength >= dictLimit) 740*3117ece4Schristos match = base + matchIndex; /* prepare for match[matchLength] read */ 741*3117ece4Schristos } 742*3117ece4Schristos 743*3117ece4Schristos if (matchLength > bestLength) { 744*3117ece4Schristos DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)", 745*3117ece4Schristos (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex)); 746*3117ece4Schristos assert(matchEndIdx > matchIndex); 747*3117ece4Schristos if (matchLength > matchEndIdx - matchIndex) 748*3117ece4Schristos matchEndIdx = matchIndex + (U32)matchLength; 749*3117ece4Schristos bestLength = matchLength; 750*3117ece4Schristos matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex); 751*3117ece4Schristos matches[mnum].len = (U32)matchLength; 752*3117ece4Schristos mnum++; 753*3117ece4Schristos if ( (matchLength > ZSTD_OPT_NUM) 754*3117ece4Schristos | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { 755*3117ece4Schristos if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ 756*3117ece4Schristos break; /* drop, to preserve bt consistency (miss a little bit of compression) */ 757*3117ece4Schristos } } 758*3117ece4Schristos 759*3117ece4Schristos if (match[matchLength] < ip[matchLength]) { 760*3117ece4Schristos /* match smaller than current */ 761*3117ece4Schristos *smallerPtr = matchIndex; /* update smaller idx */ 762*3117ece4Schristos commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 763*3117ece4Schristos if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 764*3117ece4Schristos smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ 765*3117ece4Schristos matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ 766*3117ece4Schristos } else { 767*3117ece4Schristos *largerPtr = matchIndex; 768*3117ece4Schristos commonLengthLarger = matchLength; 769*3117ece4Schristos if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 770*3117ece4Schristos largerPtr = nextPtr; 771*3117ece4Schristos matchIndex = nextPtr[0]; 772*3117ece4Schristos } } 773*3117ece4Schristos 774*3117ece4Schristos *smallerPtr = *largerPtr = 0; 775*3117ece4Schristos 776*3117ece4Schristos assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */ 777*3117ece4Schristos if (dictMode == ZSTD_dictMatchState && nbCompares) { 778*3117ece4Schristos size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls); 779*3117ece4Schristos U32 dictMatchIndex = dms->hashTable[dmsH]; 780*3117ece4Schristos const U32* const dmsBt = dms->chainTable; 781*3117ece4Schristos commonLengthSmaller = commonLengthLarger = 0; 782*3117ece4Schristos for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) { 783*3117ece4Schristos const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask); 784*3117ece4Schristos size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 785*3117ece4Schristos const BYTE* match = dmsBase + dictMatchIndex; 786*3117ece4Schristos matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart); 787*3117ece4Schristos if (dictMatchIndex+matchLength >= dmsHighLimit) 788*3117ece4Schristos match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */ 789*3117ece4Schristos 790*3117ece4Schristos if (matchLength > bestLength) { 791*3117ece4Schristos matchIndex = dictMatchIndex + dmsIndexDelta; 792*3117ece4Schristos DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)", 793*3117ece4Schristos (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex)); 794*3117ece4Schristos if (matchLength > matchEndIdx - matchIndex) 795*3117ece4Schristos matchEndIdx = matchIndex + (U32)matchLength; 796*3117ece4Schristos bestLength = matchLength; 797*3117ece4Schristos matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex); 798*3117ece4Schristos matches[mnum].len = (U32)matchLength; 799*3117ece4Schristos mnum++; 800*3117ece4Schristos if ( (matchLength > ZSTD_OPT_NUM) 801*3117ece4Schristos | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { 802*3117ece4Schristos break; /* drop, to guarantee consistency (miss a little bit of compression) */ 803*3117ece4Schristos } } 804*3117ece4Schristos 805*3117ece4Schristos if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ 806*3117ece4Schristos if (match[matchLength] < ip[matchLength]) { 807*3117ece4Schristos commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 808*3117ece4Schristos dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 809*3117ece4Schristos } else { 810*3117ece4Schristos /* match is larger than current */ 811*3117ece4Schristos commonLengthLarger = matchLength; 812*3117ece4Schristos dictMatchIndex = nextPtr[0]; 813*3117ece4Schristos } } } /* if (dictMode == ZSTD_dictMatchState) */ 814*3117ece4Schristos 815*3117ece4Schristos assert(matchEndIdx > curr+8); 816*3117ece4Schristos ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ 817*3117ece4Schristos return mnum; 818*3117ece4Schristos } 819*3117ece4Schristos 820*3117ece4Schristos typedef U32 (*ZSTD_getAllMatchesFn)( 821*3117ece4Schristos ZSTD_match_t*, 822*3117ece4Schristos ZSTD_matchState_t*, 823*3117ece4Schristos U32*, 824*3117ece4Schristos const BYTE*, 825*3117ece4Schristos const BYTE*, 826*3117ece4Schristos const U32 rep[ZSTD_REP_NUM], 827*3117ece4Schristos U32 const ll0, 828*3117ece4Schristos U32 const lengthToBeat); 829*3117ece4Schristos 830*3117ece4Schristos FORCE_INLINE_TEMPLATE 831*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 832*3117ece4Schristos U32 ZSTD_btGetAllMatches_internal( 833*3117ece4Schristos ZSTD_match_t* matches, 834*3117ece4Schristos ZSTD_matchState_t* ms, 835*3117ece4Schristos U32* nextToUpdate3, 836*3117ece4Schristos const BYTE* ip, 837*3117ece4Schristos const BYTE* const iHighLimit, 838*3117ece4Schristos const U32 rep[ZSTD_REP_NUM], 839*3117ece4Schristos U32 const ll0, 840*3117ece4Schristos U32 const lengthToBeat, 841*3117ece4Schristos const ZSTD_dictMode_e dictMode, 842*3117ece4Schristos const U32 mls) 843*3117ece4Schristos { 844*3117ece4Schristos assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls); 845*3117ece4Schristos DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls); 846*3117ece4Schristos if (ip < ms->window.base + ms->nextToUpdate) 847*3117ece4Schristos return 0; /* skipped area */ 848*3117ece4Schristos ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode); 849*3117ece4Schristos return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls); 850*3117ece4Schristos } 851*3117ece4Schristos 852*3117ece4Schristos #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls 853*3117ece4Schristos 854*3117ece4Schristos #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \ 855*3117ece4Schristos static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \ 856*3117ece4Schristos ZSTD_match_t* matches, \ 857*3117ece4Schristos ZSTD_matchState_t* ms, \ 858*3117ece4Schristos U32* nextToUpdate3, \ 859*3117ece4Schristos const BYTE* ip, \ 860*3117ece4Schristos const BYTE* const iHighLimit, \ 861*3117ece4Schristos const U32 rep[ZSTD_REP_NUM], \ 862*3117ece4Schristos U32 const ll0, \ 863*3117ece4Schristos U32 const lengthToBeat) \ 864*3117ece4Schristos { \ 865*3117ece4Schristos return ZSTD_btGetAllMatches_internal( \ 866*3117ece4Schristos matches, ms, nextToUpdate3, ip, iHighLimit, \ 867*3117ece4Schristos rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \ 868*3117ece4Schristos } 869*3117ece4Schristos 870*3117ece4Schristos #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \ 871*3117ece4Schristos GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \ 872*3117ece4Schristos GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \ 873*3117ece4Schristos GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \ 874*3117ece4Schristos GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6) 875*3117ece4Schristos 876*3117ece4Schristos GEN_ZSTD_BT_GET_ALL_MATCHES(noDict) 877*3117ece4Schristos GEN_ZSTD_BT_GET_ALL_MATCHES(extDict) 878*3117ece4Schristos GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState) 879*3117ece4Schristos 880*3117ece4Schristos #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \ 881*3117ece4Schristos { \ 882*3117ece4Schristos ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \ 883*3117ece4Schristos ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \ 884*3117ece4Schristos ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \ 885*3117ece4Schristos ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \ 886*3117ece4Schristos } 887*3117ece4Schristos 888*3117ece4Schristos static ZSTD_getAllMatchesFn 889*3117ece4Schristos ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode) 890*3117ece4Schristos { 891*3117ece4Schristos ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = { 892*3117ece4Schristos ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict), 893*3117ece4Schristos ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict), 894*3117ece4Schristos ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState) 895*3117ece4Schristos }; 896*3117ece4Schristos U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6); 897*3117ece4Schristos assert((U32)dictMode < 3); 898*3117ece4Schristos assert(mls - 3 < 4); 899*3117ece4Schristos return getAllMatchesFns[(int)dictMode][mls - 3]; 900*3117ece4Schristos } 901*3117ece4Schristos 902*3117ece4Schristos /************************* 903*3117ece4Schristos * LDM helper functions * 904*3117ece4Schristos *************************/ 905*3117ece4Schristos 906*3117ece4Schristos /* Struct containing info needed to make decision about ldm inclusion */ 907*3117ece4Schristos typedef struct { 908*3117ece4Schristos rawSeqStore_t seqStore; /* External match candidates store for this block */ 909*3117ece4Schristos U32 startPosInBlock; /* Start position of the current match candidate */ 910*3117ece4Schristos U32 endPosInBlock; /* End position of the current match candidate */ 911*3117ece4Schristos U32 offset; /* Offset of the match candidate */ 912*3117ece4Schristos } ZSTD_optLdm_t; 913*3117ece4Schristos 914*3117ece4Schristos /* ZSTD_optLdm_skipRawSeqStoreBytes(): 915*3117ece4Schristos * Moves forward in @rawSeqStore by @nbBytes, 916*3117ece4Schristos * which will update the fields 'pos' and 'posInSequence'. 917*3117ece4Schristos */ 918*3117ece4Schristos static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) 919*3117ece4Schristos { 920*3117ece4Schristos U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); 921*3117ece4Schristos while (currPos && rawSeqStore->pos < rawSeqStore->size) { 922*3117ece4Schristos rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; 923*3117ece4Schristos if (currPos >= currSeq.litLength + currSeq.matchLength) { 924*3117ece4Schristos currPos -= currSeq.litLength + currSeq.matchLength; 925*3117ece4Schristos rawSeqStore->pos++; 926*3117ece4Schristos } else { 927*3117ece4Schristos rawSeqStore->posInSequence = currPos; 928*3117ece4Schristos break; 929*3117ece4Schristos } 930*3117ece4Schristos } 931*3117ece4Schristos if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { 932*3117ece4Schristos rawSeqStore->posInSequence = 0; 933*3117ece4Schristos } 934*3117ece4Schristos } 935*3117ece4Schristos 936*3117ece4Schristos /* ZSTD_opt_getNextMatchAndUpdateSeqStore(): 937*3117ece4Schristos * Calculates the beginning and end of the next match in the current block. 938*3117ece4Schristos * Updates 'pos' and 'posInSequence' of the ldmSeqStore. 939*3117ece4Schristos */ 940*3117ece4Schristos static void 941*3117ece4Schristos ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock, 942*3117ece4Schristos U32 blockBytesRemaining) 943*3117ece4Schristos { 944*3117ece4Schristos rawSeq currSeq; 945*3117ece4Schristos U32 currBlockEndPos; 946*3117ece4Schristos U32 literalsBytesRemaining; 947*3117ece4Schristos U32 matchBytesRemaining; 948*3117ece4Schristos 949*3117ece4Schristos /* Setting match end position to MAX to ensure we never use an LDM during this block */ 950*3117ece4Schristos if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { 951*3117ece4Schristos optLdm->startPosInBlock = UINT_MAX; 952*3117ece4Schristos optLdm->endPosInBlock = UINT_MAX; 953*3117ece4Schristos return; 954*3117ece4Schristos } 955*3117ece4Schristos /* Calculate appropriate bytes left in matchLength and litLength 956*3117ece4Schristos * after adjusting based on ldmSeqStore->posInSequence */ 957*3117ece4Schristos currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos]; 958*3117ece4Schristos assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength); 959*3117ece4Schristos currBlockEndPos = currPosInBlock + blockBytesRemaining; 960*3117ece4Schristos literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ? 961*3117ece4Schristos currSeq.litLength - (U32)optLdm->seqStore.posInSequence : 962*3117ece4Schristos 0; 963*3117ece4Schristos matchBytesRemaining = (literalsBytesRemaining == 0) ? 964*3117ece4Schristos currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) : 965*3117ece4Schristos currSeq.matchLength; 966*3117ece4Schristos 967*3117ece4Schristos /* If there are more literal bytes than bytes remaining in block, no ldm is possible */ 968*3117ece4Schristos if (literalsBytesRemaining >= blockBytesRemaining) { 969*3117ece4Schristos optLdm->startPosInBlock = UINT_MAX; 970*3117ece4Schristos optLdm->endPosInBlock = UINT_MAX; 971*3117ece4Schristos ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining); 972*3117ece4Schristos return; 973*3117ece4Schristos } 974*3117ece4Schristos 975*3117ece4Schristos /* Matches may be < MINMATCH by this process. In that case, we will reject them 976*3117ece4Schristos when we are deciding whether or not to add the ldm */ 977*3117ece4Schristos optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining; 978*3117ece4Schristos optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining; 979*3117ece4Schristos optLdm->offset = currSeq.offset; 980*3117ece4Schristos 981*3117ece4Schristos if (optLdm->endPosInBlock > currBlockEndPos) { 982*3117ece4Schristos /* Match ends after the block ends, we can't use the whole match */ 983*3117ece4Schristos optLdm->endPosInBlock = currBlockEndPos; 984*3117ece4Schristos ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock); 985*3117ece4Schristos } else { 986*3117ece4Schristos /* Consume nb of bytes equal to size of sequence left */ 987*3117ece4Schristos ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining); 988*3117ece4Schristos } 989*3117ece4Schristos } 990*3117ece4Schristos 991*3117ece4Schristos /* ZSTD_optLdm_maybeAddMatch(): 992*3117ece4Schristos * Adds a match if it's long enough, 993*3117ece4Schristos * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock', 994*3117ece4Schristos * into 'matches'. Maintains the correct ordering of 'matches'. 995*3117ece4Schristos */ 996*3117ece4Schristos static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, 997*3117ece4Schristos const ZSTD_optLdm_t* optLdm, U32 currPosInBlock) 998*3117ece4Schristos { 999*3117ece4Schristos U32 const posDiff = currPosInBlock - optLdm->startPosInBlock; 1000*3117ece4Schristos /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */ 1001*3117ece4Schristos U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; 1002*3117ece4Schristos 1003*3117ece4Schristos /* Ensure that current block position is not outside of the match */ 1004*3117ece4Schristos if (currPosInBlock < optLdm->startPosInBlock 1005*3117ece4Schristos || currPosInBlock >= optLdm->endPosInBlock 1006*3117ece4Schristos || candidateMatchLength < MINMATCH) { 1007*3117ece4Schristos return; 1008*3117ece4Schristos } 1009*3117ece4Schristos 1010*3117ece4Schristos if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { 1011*3117ece4Schristos U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset); 1012*3117ece4Schristos DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u", 1013*3117ece4Schristos candidateOffBase, candidateMatchLength, currPosInBlock); 1014*3117ece4Schristos matches[*nbMatches].len = candidateMatchLength; 1015*3117ece4Schristos matches[*nbMatches].off = candidateOffBase; 1016*3117ece4Schristos (*nbMatches)++; 1017*3117ece4Schristos } 1018*3117ece4Schristos } 1019*3117ece4Schristos 1020*3117ece4Schristos /* ZSTD_optLdm_processMatchCandidate(): 1021*3117ece4Schristos * Wrapper function to update ldm seq store and call ldm functions as necessary. 1022*3117ece4Schristos */ 1023*3117ece4Schristos static void 1024*3117ece4Schristos ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, 1025*3117ece4Schristos ZSTD_match_t* matches, U32* nbMatches, 1026*3117ece4Schristos U32 currPosInBlock, U32 remainingBytes) 1027*3117ece4Schristos { 1028*3117ece4Schristos if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { 1029*3117ece4Schristos return; 1030*3117ece4Schristos } 1031*3117ece4Schristos 1032*3117ece4Schristos if (currPosInBlock >= optLdm->endPosInBlock) { 1033*3117ece4Schristos if (currPosInBlock > optLdm->endPosInBlock) { 1034*3117ece4Schristos /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily 1035*3117ece4Schristos * at the end of a match from the ldm seq store, and will often be some bytes 1036*3117ece4Schristos * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots" 1037*3117ece4Schristos */ 1038*3117ece4Schristos U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock; 1039*3117ece4Schristos ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot); 1040*3117ece4Schristos } 1041*3117ece4Schristos ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes); 1042*3117ece4Schristos } 1043*3117ece4Schristos ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock); 1044*3117ece4Schristos } 1045*3117ece4Schristos 1046*3117ece4Schristos 1047*3117ece4Schristos /*-******************************* 1048*3117ece4Schristos * Optimal parser 1049*3117ece4Schristos *********************************/ 1050*3117ece4Schristos 1051*3117ece4Schristos #if 0 /* debug */ 1052*3117ece4Schristos 1053*3117ece4Schristos static void 1054*3117ece4Schristos listStats(const U32* table, int lastEltID) 1055*3117ece4Schristos { 1056*3117ece4Schristos int const nbElts = lastEltID + 1; 1057*3117ece4Schristos int enb; 1058*3117ece4Schristos for (enb=0; enb < nbElts; enb++) { 1059*3117ece4Schristos (void)table; 1060*3117ece4Schristos /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */ 1061*3117ece4Schristos RAWLOG(2, "%4i,", table[enb]); 1062*3117ece4Schristos } 1063*3117ece4Schristos RAWLOG(2, " \n"); 1064*3117ece4Schristos } 1065*3117ece4Schristos 1066*3117ece4Schristos #endif 1067*3117ece4Schristos 1068*3117ece4Schristos #define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel) 1069*3117ece4Schristos #define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel) 1070*3117ece4Schristos #define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1)) 1071*3117ece4Schristos 1072*3117ece4Schristos FORCE_INLINE_TEMPLATE 1073*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1074*3117ece4Schristos size_t 1075*3117ece4Schristos ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, 1076*3117ece4Schristos seqStore_t* seqStore, 1077*3117ece4Schristos U32 rep[ZSTD_REP_NUM], 1078*3117ece4Schristos const void* src, size_t srcSize, 1079*3117ece4Schristos const int optLevel, 1080*3117ece4Schristos const ZSTD_dictMode_e dictMode) 1081*3117ece4Schristos { 1082*3117ece4Schristos optState_t* const optStatePtr = &ms->opt; 1083*3117ece4Schristos const BYTE* const istart = (const BYTE*)src; 1084*3117ece4Schristos const BYTE* ip = istart; 1085*3117ece4Schristos const BYTE* anchor = istart; 1086*3117ece4Schristos const BYTE* const iend = istart + srcSize; 1087*3117ece4Schristos const BYTE* const ilimit = iend - 8; 1088*3117ece4Schristos const BYTE* const base = ms->window.base; 1089*3117ece4Schristos const BYTE* const prefixStart = base + ms->window.dictLimit; 1090*3117ece4Schristos const ZSTD_compressionParameters* const cParams = &ms->cParams; 1091*3117ece4Schristos 1092*3117ece4Schristos ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode); 1093*3117ece4Schristos 1094*3117ece4Schristos U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); 1095*3117ece4Schristos U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; 1096*3117ece4Schristos U32 nextToUpdate3 = ms->nextToUpdate; 1097*3117ece4Schristos 1098*3117ece4Schristos ZSTD_optimal_t* const opt = optStatePtr->priceTable; 1099*3117ece4Schristos ZSTD_match_t* const matches = optStatePtr->matchTable; 1100*3117ece4Schristos ZSTD_optimal_t lastStretch; 1101*3117ece4Schristos ZSTD_optLdm_t optLdm; 1102*3117ece4Schristos 1103*3117ece4Schristos ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t)); 1104*3117ece4Schristos 1105*3117ece4Schristos optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; 1106*3117ece4Schristos optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; 1107*3117ece4Schristos ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip)); 1108*3117ece4Schristos 1109*3117ece4Schristos /* init */ 1110*3117ece4Schristos DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", 1111*3117ece4Schristos (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); 1112*3117ece4Schristos assert(optLevel <= 2); 1113*3117ece4Schristos ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); 1114*3117ece4Schristos ip += (ip==prefixStart); 1115*3117ece4Schristos 1116*3117ece4Schristos /* Match Loop */ 1117*3117ece4Schristos while (ip < ilimit) { 1118*3117ece4Schristos U32 cur, last_pos = 0; 1119*3117ece4Schristos 1120*3117ece4Schristos /* find first match */ 1121*3117ece4Schristos { U32 const litlen = (U32)(ip - anchor); 1122*3117ece4Schristos U32 const ll0 = !litlen; 1123*3117ece4Schristos U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch); 1124*3117ece4Schristos ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, 1125*3117ece4Schristos (U32)(ip-istart), (U32)(iend-ip)); 1126*3117ece4Schristos if (!nbMatches) { 1127*3117ece4Schristos DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart)); 1128*3117ece4Schristos ip++; 1129*3117ece4Schristos continue; 1130*3117ece4Schristos } 1131*3117ece4Schristos 1132*3117ece4Schristos /* Match found: let's store this solution, and eventually find more candidates. 1133*3117ece4Schristos * During this forward pass, @opt is used to store stretches, 1134*3117ece4Schristos * defined as "a match followed by N literals". 1135*3117ece4Schristos * Note how this is different from a Sequence, which is "N literals followed by a match". 1136*3117ece4Schristos * Storing stretches allows us to store different match predecessors 1137*3117ece4Schristos * for each literal position part of a literals run. */ 1138*3117ece4Schristos 1139*3117ece4Schristos /* initialize opt[0] */ 1140*3117ece4Schristos opt[0].mlen = 0; /* there are only literals so far */ 1141*3117ece4Schristos opt[0].litlen = litlen; 1142*3117ece4Schristos /* No need to include the actual price of the literals before the first match 1143*3117ece4Schristos * because it is static for the duration of the forward pass, and is included 1144*3117ece4Schristos * in every subsequent price. But, we include the literal length because 1145*3117ece4Schristos * the cost variation of litlen depends on the value of litlen. 1146*3117ece4Schristos */ 1147*3117ece4Schristos opt[0].price = LL_PRICE(litlen); 1148*3117ece4Schristos ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0])); 1149*3117ece4Schristos ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep)); 1150*3117ece4Schristos 1151*3117ece4Schristos /* large match -> immediate encoding */ 1152*3117ece4Schristos { U32 const maxML = matches[nbMatches-1].len; 1153*3117ece4Schristos U32 const maxOffBase = matches[nbMatches-1].off; 1154*3117ece4Schristos DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series", 1155*3117ece4Schristos nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart)); 1156*3117ece4Schristos 1157*3117ece4Schristos if (maxML > sufficient_len) { 1158*3117ece4Schristos lastStretch.litlen = 0; 1159*3117ece4Schristos lastStretch.mlen = maxML; 1160*3117ece4Schristos lastStretch.off = maxOffBase; 1161*3117ece4Schristos DEBUGLOG(6, "large match (%u>%u) => immediate encoding", 1162*3117ece4Schristos maxML, sufficient_len); 1163*3117ece4Schristos cur = 0; 1164*3117ece4Schristos last_pos = maxML; 1165*3117ece4Schristos goto _shortestPath; 1166*3117ece4Schristos } } 1167*3117ece4Schristos 1168*3117ece4Schristos /* set prices for first matches starting position == 0 */ 1169*3117ece4Schristos assert(opt[0].price >= 0); 1170*3117ece4Schristos { U32 pos; 1171*3117ece4Schristos U32 matchNb; 1172*3117ece4Schristos for (pos = 1; pos < minMatch; pos++) { 1173*3117ece4Schristos opt[pos].price = ZSTD_MAX_PRICE; 1174*3117ece4Schristos opt[pos].mlen = 0; 1175*3117ece4Schristos opt[pos].litlen = litlen + pos; 1176*3117ece4Schristos } 1177*3117ece4Schristos for (matchNb = 0; matchNb < nbMatches; matchNb++) { 1178*3117ece4Schristos U32 const offBase = matches[matchNb].off; 1179*3117ece4Schristos U32 const end = matches[matchNb].len; 1180*3117ece4Schristos for ( ; pos <= end ; pos++ ) { 1181*3117ece4Schristos int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel); 1182*3117ece4Schristos int const sequencePrice = opt[0].price + matchPrice; 1183*3117ece4Schristos DEBUGLOG(7, "rPos:%u => set initial price : %.2f", 1184*3117ece4Schristos pos, ZSTD_fCost(sequencePrice)); 1185*3117ece4Schristos opt[pos].mlen = pos; 1186*3117ece4Schristos opt[pos].off = offBase; 1187*3117ece4Schristos opt[pos].litlen = 0; /* end of match */ 1188*3117ece4Schristos opt[pos].price = sequencePrice + LL_PRICE(0); 1189*3117ece4Schristos } 1190*3117ece4Schristos } 1191*3117ece4Schristos last_pos = pos-1; 1192*3117ece4Schristos opt[pos].price = ZSTD_MAX_PRICE; 1193*3117ece4Schristos } 1194*3117ece4Schristos } 1195*3117ece4Schristos 1196*3117ece4Schristos /* check further positions */ 1197*3117ece4Schristos for (cur = 1; cur <= last_pos; cur++) { 1198*3117ece4Schristos const BYTE* const inr = ip + cur; 1199*3117ece4Schristos assert(cur <= ZSTD_OPT_NUM); 1200*3117ece4Schristos DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur); 1201*3117ece4Schristos 1202*3117ece4Schristos /* Fix current position with one literal if cheaper */ 1203*3117ece4Schristos { U32 const litlen = opt[cur-1].litlen + 1; 1204*3117ece4Schristos int const price = opt[cur-1].price 1205*3117ece4Schristos + LIT_PRICE(ip+cur-1) 1206*3117ece4Schristos + LL_INCPRICE(litlen); 1207*3117ece4Schristos assert(price < 1000000000); /* overflow check */ 1208*3117ece4Schristos if (price <= opt[cur].price) { 1209*3117ece4Schristos ZSTD_optimal_t const prevMatch = opt[cur]; 1210*3117ece4Schristos DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", 1211*3117ece4Schristos inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, 1212*3117ece4Schristos opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); 1213*3117ece4Schristos opt[cur] = opt[cur-1]; 1214*3117ece4Schristos opt[cur].litlen = litlen; 1215*3117ece4Schristos opt[cur].price = price; 1216*3117ece4Schristos if ( (optLevel >= 1) /* additional check only for higher modes */ 1217*3117ece4Schristos && (prevMatch.litlen == 0) /* replace a match */ 1218*3117ece4Schristos && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */ 1219*3117ece4Schristos && LIKELY(ip + cur < iend) 1220*3117ece4Schristos ) { 1221*3117ece4Schristos /* check next position, in case it would be cheaper */ 1222*3117ece4Schristos int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1); 1223*3117ece4Schristos int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1); 1224*3117ece4Schristos DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f", 1225*3117ece4Schristos cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals)); 1226*3117ece4Schristos if ( (with1literal < withMoreLiterals) 1227*3117ece4Schristos && (with1literal < opt[cur+1].price) ) { 1228*3117ece4Schristos /* update offset history - before it disappears */ 1229*3117ece4Schristos U32 const prev = cur - prevMatch.mlen; 1230*3117ece4Schristos repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0); 1231*3117ece4Schristos assert(cur >= prevMatch.mlen); 1232*3117ece4Schristos DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !", 1233*3117ece4Schristos ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals), 1234*3117ece4Schristos newReps.rep[0], newReps.rep[1], newReps.rep[2] ); 1235*3117ece4Schristos opt[cur+1] = prevMatch; /* mlen & offbase */ 1236*3117ece4Schristos ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t)); 1237*3117ece4Schristos opt[cur+1].litlen = 1; 1238*3117ece4Schristos opt[cur+1].price = with1literal; 1239*3117ece4Schristos if (last_pos < cur+1) last_pos = cur+1; 1240*3117ece4Schristos } 1241*3117ece4Schristos } 1242*3117ece4Schristos } else { 1243*3117ece4Schristos DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)", 1244*3117ece4Schristos inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price)); 1245*3117ece4Schristos } 1246*3117ece4Schristos } 1247*3117ece4Schristos 1248*3117ece4Schristos /* Offset history is not updated during match comparison. 1249*3117ece4Schristos * Do it here, now that the match is selected and confirmed. 1250*3117ece4Schristos */ 1251*3117ece4Schristos ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); 1252*3117ece4Schristos assert(cur >= opt[cur].mlen); 1253*3117ece4Schristos if (opt[cur].litlen == 0) { 1254*3117ece4Schristos /* just finished a match => alter offset history */ 1255*3117ece4Schristos U32 const prev = cur - opt[cur].mlen; 1256*3117ece4Schristos repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0); 1257*3117ece4Schristos ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); 1258*3117ece4Schristos } 1259*3117ece4Schristos 1260*3117ece4Schristos /* last match must start at a minimum distance of 8 from oend */ 1261*3117ece4Schristos if (inr > ilimit) continue; 1262*3117ece4Schristos 1263*3117ece4Schristos if (cur == last_pos) break; 1264*3117ece4Schristos 1265*3117ece4Schristos if ( (optLevel==0) /*static_test*/ 1266*3117ece4Schristos && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { 1267*3117ece4Schristos DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1); 1268*3117ece4Schristos continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ 1269*3117ece4Schristos } 1270*3117ece4Schristos 1271*3117ece4Schristos assert(opt[cur].price >= 0); 1272*3117ece4Schristos { U32 const ll0 = (opt[cur].litlen == 0); 1273*3117ece4Schristos int const previousPrice = opt[cur].price; 1274*3117ece4Schristos int const basePrice = previousPrice + LL_PRICE(0); 1275*3117ece4Schristos U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch); 1276*3117ece4Schristos U32 matchNb; 1277*3117ece4Schristos 1278*3117ece4Schristos ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, 1279*3117ece4Schristos (U32)(inr-istart), (U32)(iend-inr)); 1280*3117ece4Schristos 1281*3117ece4Schristos if (!nbMatches) { 1282*3117ece4Schristos DEBUGLOG(7, "rPos:%u : no match found", cur); 1283*3117ece4Schristos continue; 1284*3117ece4Schristos } 1285*3117ece4Schristos 1286*3117ece4Schristos { U32 const longestML = matches[nbMatches-1].len; 1287*3117ece4Schristos DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u", 1288*3117ece4Schristos inr-istart, cur, nbMatches, longestML); 1289*3117ece4Schristos 1290*3117ece4Schristos if ( (longestML > sufficient_len) 1291*3117ece4Schristos || (cur + longestML >= ZSTD_OPT_NUM) 1292*3117ece4Schristos || (ip + cur + longestML >= iend) ) { 1293*3117ece4Schristos lastStretch.mlen = longestML; 1294*3117ece4Schristos lastStretch.off = matches[nbMatches-1].off; 1295*3117ece4Schristos lastStretch.litlen = 0; 1296*3117ece4Schristos last_pos = cur + longestML; 1297*3117ece4Schristos goto _shortestPath; 1298*3117ece4Schristos } } 1299*3117ece4Schristos 1300*3117ece4Schristos /* set prices using matches found at position == cur */ 1301*3117ece4Schristos for (matchNb = 0; matchNb < nbMatches; matchNb++) { 1302*3117ece4Schristos U32 const offset = matches[matchNb].off; 1303*3117ece4Schristos U32 const lastML = matches[matchNb].len; 1304*3117ece4Schristos U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; 1305*3117ece4Schristos U32 mlen; 1306*3117ece4Schristos 1307*3117ece4Schristos DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u", 1308*3117ece4Schristos matchNb, matches[matchNb].off, lastML, opt[cur].litlen); 1309*3117ece4Schristos 1310*3117ece4Schristos for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ 1311*3117ece4Schristos U32 const pos = cur + mlen; 1312*3117ece4Schristos int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); 1313*3117ece4Schristos 1314*3117ece4Schristos if ((pos > last_pos) || (price < opt[pos].price)) { 1315*3117ece4Schristos DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", 1316*3117ece4Schristos pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); 1317*3117ece4Schristos while (last_pos < pos) { 1318*3117ece4Schristos /* fill empty positions, for future comparisons */ 1319*3117ece4Schristos last_pos++; 1320*3117ece4Schristos opt[last_pos].price = ZSTD_MAX_PRICE; 1321*3117ece4Schristos opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */ 1322*3117ece4Schristos } 1323*3117ece4Schristos opt[pos].mlen = mlen; 1324*3117ece4Schristos opt[pos].off = offset; 1325*3117ece4Schristos opt[pos].litlen = 0; 1326*3117ece4Schristos opt[pos].price = price; 1327*3117ece4Schristos } else { 1328*3117ece4Schristos DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", 1329*3117ece4Schristos pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); 1330*3117ece4Schristos if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ 1331*3117ece4Schristos } 1332*3117ece4Schristos } } } 1333*3117ece4Schristos opt[last_pos+1].price = ZSTD_MAX_PRICE; 1334*3117ece4Schristos } /* for (cur = 1; cur <= last_pos; cur++) */ 1335*3117ece4Schristos 1336*3117ece4Schristos lastStretch = opt[last_pos]; 1337*3117ece4Schristos assert(cur >= lastStretch.mlen); 1338*3117ece4Schristos cur = last_pos - lastStretch.mlen; 1339*3117ece4Schristos 1340*3117ece4Schristos _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ 1341*3117ece4Schristos assert(opt[0].mlen == 0); 1342*3117ece4Schristos assert(last_pos >= lastStretch.mlen); 1343*3117ece4Schristos assert(cur == last_pos - lastStretch.mlen); 1344*3117ece4Schristos 1345*3117ece4Schristos if (lastStretch.mlen==0) { 1346*3117ece4Schristos /* no solution : all matches have been converted into literals */ 1347*3117ece4Schristos assert(lastStretch.litlen == (ip - anchor) + last_pos); 1348*3117ece4Schristos ip += last_pos; 1349*3117ece4Schristos continue; 1350*3117ece4Schristos } 1351*3117ece4Schristos assert(lastStretch.off > 0); 1352*3117ece4Schristos 1353*3117ece4Schristos /* Update offset history */ 1354*3117ece4Schristos if (lastStretch.litlen == 0) { 1355*3117ece4Schristos /* finishing on a match : update offset history */ 1356*3117ece4Schristos repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0); 1357*3117ece4Schristos ZSTD_memcpy(rep, &reps, sizeof(repcodes_t)); 1358*3117ece4Schristos } else { 1359*3117ece4Schristos ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t)); 1360*3117ece4Schristos assert(cur >= lastStretch.litlen); 1361*3117ece4Schristos cur -= lastStretch.litlen; 1362*3117ece4Schristos } 1363*3117ece4Schristos 1364*3117ece4Schristos /* Let's write the shortest path solution. 1365*3117ece4Schristos * It is stored in @opt in reverse order, 1366*3117ece4Schristos * starting from @storeEnd (==cur+2), 1367*3117ece4Schristos * effectively partially @opt overwriting. 1368*3117ece4Schristos * Content is changed too: 1369*3117ece4Schristos * - So far, @opt stored stretches, aka a match followed by literals 1370*3117ece4Schristos * - Now, it will store sequences, aka literals followed by a match 1371*3117ece4Schristos */ 1372*3117ece4Schristos { U32 const storeEnd = cur + 2; 1373*3117ece4Schristos U32 storeStart = storeEnd; 1374*3117ece4Schristos U32 stretchPos = cur; 1375*3117ece4Schristos 1376*3117ece4Schristos DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", 1377*3117ece4Schristos last_pos, cur); (void)last_pos; 1378*3117ece4Schristos assert(storeEnd < ZSTD_OPT_SIZE); 1379*3117ece4Schristos DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", 1380*3117ece4Schristos storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off); 1381*3117ece4Schristos if (lastStretch.litlen > 0) { 1382*3117ece4Schristos /* last "sequence" is unfinished: just a bunch of literals */ 1383*3117ece4Schristos opt[storeEnd].litlen = lastStretch.litlen; 1384*3117ece4Schristos opt[storeEnd].mlen = 0; 1385*3117ece4Schristos storeStart = storeEnd-1; 1386*3117ece4Schristos opt[storeStart] = lastStretch; 1387*3117ece4Schristos } { 1388*3117ece4Schristos opt[storeEnd] = lastStretch; /* note: litlen will be fixed */ 1389*3117ece4Schristos storeStart = storeEnd; 1390*3117ece4Schristos } 1391*3117ece4Schristos while (1) { 1392*3117ece4Schristos ZSTD_optimal_t nextStretch = opt[stretchPos]; 1393*3117ece4Schristos opt[storeStart].litlen = nextStretch.litlen; 1394*3117ece4Schristos DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)", 1395*3117ece4Schristos opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off); 1396*3117ece4Schristos if (nextStretch.mlen == 0) { 1397*3117ece4Schristos /* reaching beginning of segment */ 1398*3117ece4Schristos break; 1399*3117ece4Schristos } 1400*3117ece4Schristos storeStart--; 1401*3117ece4Schristos opt[storeStart] = nextStretch; /* note: litlen will be fixed */ 1402*3117ece4Schristos assert(nextStretch.litlen + nextStretch.mlen <= stretchPos); 1403*3117ece4Schristos stretchPos -= nextStretch.litlen + nextStretch.mlen; 1404*3117ece4Schristos } 1405*3117ece4Schristos 1406*3117ece4Schristos /* save sequences */ 1407*3117ece4Schristos DEBUGLOG(6, "sending selected sequences into seqStore"); 1408*3117ece4Schristos { U32 storePos; 1409*3117ece4Schristos for (storePos=storeStart; storePos <= storeEnd; storePos++) { 1410*3117ece4Schristos U32 const llen = opt[storePos].litlen; 1411*3117ece4Schristos U32 const mlen = opt[storePos].mlen; 1412*3117ece4Schristos U32 const offBase = opt[storePos].off; 1413*3117ece4Schristos U32 const advance = llen + mlen; 1414*3117ece4Schristos DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", 1415*3117ece4Schristos anchor - istart, (unsigned)llen, (unsigned)mlen); 1416*3117ece4Schristos 1417*3117ece4Schristos if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ 1418*3117ece4Schristos assert(storePos == storeEnd); /* must be last sequence */ 1419*3117ece4Schristos ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ 1420*3117ece4Schristos continue; /* will finish */ 1421*3117ece4Schristos } 1422*3117ece4Schristos 1423*3117ece4Schristos assert(anchor + llen <= iend); 1424*3117ece4Schristos ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen); 1425*3117ece4Schristos ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen); 1426*3117ece4Schristos anchor += advance; 1427*3117ece4Schristos ip = anchor; 1428*3117ece4Schristos } } 1429*3117ece4Schristos DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]); 1430*3117ece4Schristos 1431*3117ece4Schristos /* update all costs */ 1432*3117ece4Schristos ZSTD_setBasePrices(optStatePtr, optLevel); 1433*3117ece4Schristos } 1434*3117ece4Schristos } /* while (ip < ilimit) */ 1435*3117ece4Schristos 1436*3117ece4Schristos /* Return the last literals size */ 1437*3117ece4Schristos return (size_t)(iend - anchor); 1438*3117ece4Schristos } 1439*3117ece4Schristos #endif /* build exclusions */ 1440*3117ece4Schristos 1441*3117ece4Schristos #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR 1442*3117ece4Schristos static size_t ZSTD_compressBlock_opt0( 1443*3117ece4Schristos ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1444*3117ece4Schristos const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) 1445*3117ece4Schristos { 1446*3117ece4Schristos return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode); 1447*3117ece4Schristos } 1448*3117ece4Schristos #endif 1449*3117ece4Schristos 1450*3117ece4Schristos #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR 1451*3117ece4Schristos static size_t ZSTD_compressBlock_opt2( 1452*3117ece4Schristos ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1453*3117ece4Schristos const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) 1454*3117ece4Schristos { 1455*3117ece4Schristos return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode); 1456*3117ece4Schristos } 1457*3117ece4Schristos #endif 1458*3117ece4Schristos 1459*3117ece4Schristos #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR 1460*3117ece4Schristos size_t ZSTD_compressBlock_btopt( 1461*3117ece4Schristos ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1462*3117ece4Schristos const void* src, size_t srcSize) 1463*3117ece4Schristos { 1464*3117ece4Schristos DEBUGLOG(5, "ZSTD_compressBlock_btopt"); 1465*3117ece4Schristos return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict); 1466*3117ece4Schristos } 1467*3117ece4Schristos #endif 1468*3117ece4Schristos 1469*3117ece4Schristos 1470*3117ece4Schristos 1471*3117ece4Schristos 1472*3117ece4Schristos #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR 1473*3117ece4Schristos /* ZSTD_initStats_ultra(): 1474*3117ece4Schristos * make a first compression pass, just to seed stats with more accurate starting values. 1475*3117ece4Schristos * only works on first block, with no dictionary and no ldm. 1476*3117ece4Schristos * this function cannot error out, its narrow contract must be respected. 1477*3117ece4Schristos */ 1478*3117ece4Schristos static 1479*3117ece4Schristos ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1480*3117ece4Schristos void ZSTD_initStats_ultra(ZSTD_matchState_t* ms, 1481*3117ece4Schristos seqStore_t* seqStore, 1482*3117ece4Schristos U32 rep[ZSTD_REP_NUM], 1483*3117ece4Schristos const void* src, size_t srcSize) 1484*3117ece4Schristos { 1485*3117ece4Schristos U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ 1486*3117ece4Schristos ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); 1487*3117ece4Schristos 1488*3117ece4Schristos DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); 1489*3117ece4Schristos assert(ms->opt.litLengthSum == 0); /* first block */ 1490*3117ece4Schristos assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ 1491*3117ece4Schristos assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ 1492*3117ece4Schristos assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ 1493*3117ece4Schristos 1494*3117ece4Schristos ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/ 1495*3117ece4Schristos 1496*3117ece4Schristos /* invalidate first scan from history, only keep entropy stats */ 1497*3117ece4Schristos ZSTD_resetSeqStore(seqStore); 1498*3117ece4Schristos ms->window.base -= srcSize; 1499*3117ece4Schristos ms->window.dictLimit += (U32)srcSize; 1500*3117ece4Schristos ms->window.lowLimit = ms->window.dictLimit; 1501*3117ece4Schristos ms->nextToUpdate = ms->window.dictLimit; 1502*3117ece4Schristos 1503*3117ece4Schristos } 1504*3117ece4Schristos 1505*3117ece4Schristos size_t ZSTD_compressBlock_btultra( 1506*3117ece4Schristos ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1507*3117ece4Schristos const void* src, size_t srcSize) 1508*3117ece4Schristos { 1509*3117ece4Schristos DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); 1510*3117ece4Schristos return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); 1511*3117ece4Schristos } 1512*3117ece4Schristos 1513*3117ece4Schristos size_t ZSTD_compressBlock_btultra2( 1514*3117ece4Schristos ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1515*3117ece4Schristos const void* src, size_t srcSize) 1516*3117ece4Schristos { 1517*3117ece4Schristos U32 const curr = (U32)((const BYTE*)src - ms->window.base); 1518*3117ece4Schristos DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); 1519*3117ece4Schristos 1520*3117ece4Schristos /* 2-passes strategy: 1521*3117ece4Schristos * this strategy makes a first pass over first block to collect statistics 1522*3117ece4Schristos * in order to seed next round's statistics with it. 1523*3117ece4Schristos * After 1st pass, function forgets history, and starts a new block. 1524*3117ece4Schristos * Consequently, this can only work if no data has been previously loaded in tables, 1525*3117ece4Schristos * aka, no dictionary, no prefix, no ldm preprocessing. 1526*3117ece4Schristos * The compression ratio gain is generally small (~0.5% on first block), 1527*3117ece4Schristos * the cost is 2x cpu time on first block. */ 1528*3117ece4Schristos assert(srcSize <= ZSTD_BLOCKSIZE_MAX); 1529*3117ece4Schristos if ( (ms->opt.litLengthSum==0) /* first block */ 1530*3117ece4Schristos && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ 1531*3117ece4Schristos && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ 1532*3117ece4Schristos && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ 1533*3117ece4Schristos && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */ 1534*3117ece4Schristos ) { 1535*3117ece4Schristos ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); 1536*3117ece4Schristos } 1537*3117ece4Schristos 1538*3117ece4Schristos return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); 1539*3117ece4Schristos } 1540*3117ece4Schristos #endif 1541*3117ece4Schristos 1542*3117ece4Schristos #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR 1543*3117ece4Schristos size_t ZSTD_compressBlock_btopt_dictMatchState( 1544*3117ece4Schristos ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1545*3117ece4Schristos const void* src, size_t srcSize) 1546*3117ece4Schristos { 1547*3117ece4Schristos return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); 1548*3117ece4Schristos } 1549*3117ece4Schristos 1550*3117ece4Schristos size_t ZSTD_compressBlock_btopt_extDict( 1551*3117ece4Schristos ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1552*3117ece4Schristos const void* src, size_t srcSize) 1553*3117ece4Schristos { 1554*3117ece4Schristos return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict); 1555*3117ece4Schristos } 1556*3117ece4Schristos #endif 1557*3117ece4Schristos 1558*3117ece4Schristos #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR 1559*3117ece4Schristos size_t ZSTD_compressBlock_btultra_dictMatchState( 1560*3117ece4Schristos ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1561*3117ece4Schristos const void* src, size_t srcSize) 1562*3117ece4Schristos { 1563*3117ece4Schristos return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); 1564*3117ece4Schristos } 1565*3117ece4Schristos 1566*3117ece4Schristos size_t ZSTD_compressBlock_btultra_extDict( 1567*3117ece4Schristos ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1568*3117ece4Schristos const void* src, size_t srcSize) 1569*3117ece4Schristos { 1570*3117ece4Schristos return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict); 1571*3117ece4Schristos } 1572*3117ece4Schristos #endif 1573*3117ece4Schristos 1574*3117ece4Schristos /* note : no btultra2 variant for extDict nor dictMatchState, 1575*3117ece4Schristos * because btultra2 is not meant to work with dictionaries 1576*3117ece4Schristos * and is only specific for the first block (no prefix) */ 1577